add a maven property to check certain external links

check links to the GitHub master branch using the local existing files
This commit is contained in:
Kristian Scheibe
2019-01-01 19:55:08 +01:00
parent 16c404f0ed
commit 8c54d73f8e
3 changed files with 167 additions and 46 deletions

View File

@ -18,29 +18,43 @@
<maven.compiler.target>1.${java.version}</maven.compiler.target>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<executions>
<execution>
<goals>
<goal>java</goal>
</goals>
<phase>package</phase>
</execution>
</executions>
<configuration>
<mainClass>net.sourceforge.pmd.docs.GenerateRuleDocsCmd</mainClass>
<arguments>
<argument>${project.basedir}</argument>
</arguments>
</configuration>
</plugin>
</plugins>
</build>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<executions>
<execution>
<id>generate-rule-docs</id>
<goals>
<goal>java</goal>
</goals>
<phase>package</phase>
<configuration>
<mainClass>net.sourceforge.pmd.docs.GenerateRuleDocsCmd</mainClass>
<arguments>
<argument>${project.basedir}</argument>
</arguments>
</configuration>
</execution>
<execution>
<id>check-dead-links</id>
<goals>
<goal>java</goal>
</goals>
<phase>verify</phase>
<configuration>
<mainClass>net.sourceforge.pmd.docs.DeadLinksChecker</mainClass>
<arguments>
<argument>${project.basedir}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>

View File

@ -6,13 +6,20 @@ package net.sourceforge.pmd.docs;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -24,6 +31,9 @@ import org.apache.commons.io.IOUtils;
*/
public class DeadLinksChecker {
private static final String CHECK_EXTERNAL_LINKS_PROPERTY = "pmd.doc.checkExternalLinks";
private static final boolean CHECK_EXTERNAL_LINKS = Boolean.parseBoolean(System.getProperty(CHECK_EXTERNAL_LINKS_PROPERTY));
// Markdown-Link: something in []'s followed by something in ()'s
private static final Pattern LOCAL_LINK_PATTERN = Pattern.compile("\\[.*?\\]\\((.*?)\\)");
@ -36,7 +46,29 @@ public class DeadLinksChecker {
"^pmd_userdocs_cli_reference\\.html.*" // anchors in the CLI reference are a plain HTML include
);
public void checkDeadLinks(Path pagesDirectory) {
// the link is actually pointing to a file in the pmd project
private static final String LOCAL_FILE_PREFIX = "https://github.com/pmd/pmd/blob/master/";
// don't check links to PMD bugs/issues/pull-requests (performance optimization)
private static final List<String> IGNORED_URL_PREFIXES = Collections.unmodifiableList(Arrays.asList(
"https://github.com/pmd/pmd/issues/",
"https://github.com/pmd/pmd/pull/",
"https://sourceforge.net/p/pmd/bugs/"
));
// prevent checking the same link multiple times
private final Map<String, Integer> linkResultCache = new HashMap<>();
public static void main(String[] args) throws IOException {
final Path rootDirectory = Paths.get(args[0]).resolve("..").toRealPath();
DeadLinksChecker deadLinksChecker = new DeadLinksChecker();
deadLinksChecker.checkDeadLinks(rootDirectory);
}
public void checkDeadLinks(Path rootDirectory) {
final Path pagesDirectory = rootDirectory.resolve("docs/pages");
if (!Files.isDirectory(pagesDirectory)) {
System.err.println("can't check for dead links, didn't find \"pages\" directory at: " + pagesDirectory);
System.exit(1);
@ -49,9 +81,14 @@ public class DeadLinksChecker {
// make a list of all valid link targets
final Set<String> htmlPages = extractLinkTargets(mdFiles);
// buffer the report to not have it broken up by error messages while checking links
final List<String> deadLinksReport = new ArrayList<>();
// scan all .md-files for dead local links
Path errorFile = null;
int scannedFiles = 0;
int foundExternalLinks = 0;
int checkedExternalLinks = 0;
for (Path mdFile : mdFiles) {
final String pageContent = fileToString(mdFile);
scannedFiles++;
@ -64,42 +101,82 @@ public class DeadLinksChecker {
final Matcher matcher = LOCAL_LINK_PATTERN.matcher(line);
linkCheck:
while (matcher.find()) {
String linkTarget = matcher.group(1);
linkTarget = linkTarget.replaceAll("^/+", ""); // remove the leading "/"
boolean linkOk;
// ignore http/https links
if (linkTarget.startsWith("http://") || linkTarget.startsWith("https://")) {
continue;
if (linkTarget.startsWith(LOCAL_FILE_PREFIX)) {
String localLinkPart = linkTarget.substring(LOCAL_FILE_PREFIX.length());
if (localLinkPart.contains("#")) {
localLinkPart = localLinkPart.substring(0, localLinkPart.indexOf('#'));
}
final Path localFile = rootDirectory.resolve(localLinkPart);
linkOk = Files.isRegularFile(localFile);
if (!linkOk) {
System.err.println("local file not found: " + localFile);
System.err.println(" linked by: " + linkTarget);
}
} else if (linkTarget.startsWith("http://") || linkTarget.startsWith("https://")) {
foundExternalLinks++;
for (String ignoredUrlPrefix : IGNORED_URL_PREFIXES) {
if (linkTarget.startsWith(ignoredUrlPrefix)) {
System.out.println("not checking link: " + linkTarget);
continue linkCheck;
}
}
if (!CHECK_EXTERNAL_LINKS) {
System.out.println("ignoring check of external url: " + linkTarget);
continue;
}
checkedExternalLinks++;
linkOk = checkExternalLink(linkTarget);
} else {
// ignore local anchors
if (linkTarget.startsWith("#")) {
continue;
}
// ignore some pages where automatic link detection doesn't work
if (EXCLUDED_LINK_TARGETS.matcher(linkTarget).matches()) {
continue;
}
linkOk = linkTarget.isEmpty() || htmlPages.contains(linkTarget);
}
// ignore local anchors
if (linkTarget.startsWith("#")) {
continue;
}
// ignore some pages where automatic link detection doesn't work
if (EXCLUDED_LINK_TARGETS.matcher(linkTarget).matches()) {
continue;
}
if (!linkTarget.isEmpty() && !htmlPages.contains(linkTarget)) {
if (!linkOk) {
if (errorFile == null) {
System.err.println("Found dead link(s):");
deadLinksReport.add("Found dead link(s):");
}
if (!mdFile.equals(errorFile)) {
System.err.println(mdFile);
deadLinksReport.add(String.valueOf(mdFile));
errorFile = mdFile;
}
System.err.printf("%8d: %s%n", lineNo, matcher.group());
deadLinksReport.add(String.format("%8d: %s", lineNo, matcher.group()));
}
}
}
}
if (errorFile != null) {
System.out.println("Scanned " + scannedFiles + " files for dead links.");
System.out.println(" Found " + foundExternalLinks + " external links, " + checkedExternalLinks + " of those where checked.");
if (!CHECK_EXTERNAL_LINKS) {
System.out.println("External links weren't checked, set -D" + CHECK_EXTERNAL_LINKS_PROPERTY + "=true to enable it.");
}
if (!deadLinksReport.isEmpty()) {
for (String line : deadLinksReport) {
System.err.println(line);
}
throw new AssertionError("dead links detected");
} else {
System.out.println("Scanned " + scannedFiles + " files for dead links - no errors found!");
System.out.println("no errors found!");
}
}
@ -155,4 +232,37 @@ public class DeadLinksChecker {
}
}
private boolean checkExternalLink(String url) {
System.out.println("checking url: " + url + " ...");
if (linkResultCache.containsKey(url)) {
System.out.println("response: HTTP " + linkResultCache.get(url) + " (CACHED)");
return linkResultCache.get(url) < 400;
}
try {
final HttpURLConnection httpURLConnection = (HttpURLConnection) new URL(url).openConnection();
httpURLConnection.setRequestMethod("GET");
httpURLConnection.setConnectTimeout(5000);
httpURLConnection.setReadTimeout(15000);
httpURLConnection.connect();
final int responseCode = httpURLConnection.getResponseCode();
String response = "HTTP " + responseCode;
if (httpURLConnection.getHeaderField("Location") != null) {
response += ", Location: " + httpURLConnection.getHeaderField("Location");
}
System.out.println("response: " + response);
linkResultCache.put(url, responseCode);
// success (HTTP 2xx) or redirection (HTTP 3xx)
return responseCode < 400;
} catch (IOException ex) {
System.out.println("response: " + ex.getClass().getName() + " " + ex.getMessage());
linkResultCache.put(url, 599);
return false;
}
}
}

View File

@ -41,9 +41,6 @@ public final class GenerateRuleDocsCmd {
generator.generate(registeredRuleSets, additionalRulesets);
System.out.println("Generated docs in " + (System.currentTimeMillis() - start) + " ms");
DeadLinksChecker deadLinksChecker = new DeadLinksChecker();
deadLinksChecker.checkDeadLinks(output.resolve("docs/pages"));
}
public static List<String> findAdditionalRulesets(Path basePath) {