From 8c54d73f8e528413c46c12162d71bc0eb027df27 Mon Sep 17 00:00:00 2001 From: Kristian Scheibe Date: Tue, 1 Jan 2019 19:55:08 +0100 Subject: [PATCH] add a maven property to check certain external links check links to the GitHub master branch using the local existing files --- pmd-doc/pom.xml | 60 ++++--- .../pmd/docs/DeadLinksChecker.java | 150 +++++++++++++++--- .../pmd/docs/GenerateRuleDocsCmd.java | 3 - 3 files changed, 167 insertions(+), 46 deletions(-) diff --git a/pmd-doc/pom.xml b/pmd-doc/pom.xml index 2b5aa775a7..0f56881346 100644 --- a/pmd-doc/pom.xml +++ b/pmd-doc/pom.xml @@ -18,29 +18,43 @@ 1.${java.version} - - - - org.codehaus.mojo - exec-maven-plugin - 1.6.0 - - - - java - - package - - - - net.sourceforge.pmd.docs.GenerateRuleDocsCmd - - ${project.basedir} - - - - - + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + generate-rule-docs + + java + + package + + net.sourceforge.pmd.docs.GenerateRuleDocsCmd + + ${project.basedir} + + + + + check-dead-links + + java + + verify + + net.sourceforge.pmd.docs.DeadLinksChecker + + ${project.basedir} + + + + + + + diff --git a/pmd-doc/src/main/java/net/sourceforge/pmd/docs/DeadLinksChecker.java b/pmd-doc/src/main/java/net/sourceforge/pmd/docs/DeadLinksChecker.java index b0383887c5..9d2be20a94 100644 --- a/pmd-doc/src/main/java/net/sourceforge/pmd/docs/DeadLinksChecker.java +++ b/pmd-doc/src/main/java/net/sourceforge/pmd/docs/DeadLinksChecker.java @@ -6,13 +6,20 @@ package net.sourceforge.pmd.docs; import java.io.IOException; import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.URL; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -24,6 +31,9 @@ import org.apache.commons.io.IOUtils; */ public class DeadLinksChecker { + private static final String CHECK_EXTERNAL_LINKS_PROPERTY = "pmd.doc.checkExternalLinks"; + private static final boolean CHECK_EXTERNAL_LINKS = Boolean.parseBoolean(System.getProperty(CHECK_EXTERNAL_LINKS_PROPERTY)); + // Markdown-Link: something in []'s followed by something in ()'s private static final Pattern LOCAL_LINK_PATTERN = Pattern.compile("\\[.*?\\]\\((.*?)\\)"); @@ -36,7 +46,29 @@ public class DeadLinksChecker { "^pmd_userdocs_cli_reference\\.html.*" // anchors in the CLI reference are a plain HTML include ); - public void checkDeadLinks(Path pagesDirectory) { + // the link is actually pointing to a file in the pmd project + private static final String LOCAL_FILE_PREFIX = "https://github.com/pmd/pmd/blob/master/"; + + // don't check links to PMD bugs/issues/pull-requests (performance optimization) + private static final List IGNORED_URL_PREFIXES = Collections.unmodifiableList(Arrays.asList( + "https://github.com/pmd/pmd/issues/", + "https://github.com/pmd/pmd/pull/", + "https://sourceforge.net/p/pmd/bugs/" + )); + + // prevent checking the same link multiple times + private final Map linkResultCache = new HashMap<>(); + + public static void main(String[] args) throws IOException { + final Path rootDirectory = Paths.get(args[0]).resolve("..").toRealPath(); + + DeadLinksChecker deadLinksChecker = new DeadLinksChecker(); + deadLinksChecker.checkDeadLinks(rootDirectory); + } + + public void checkDeadLinks(Path rootDirectory) { + final Path pagesDirectory = rootDirectory.resolve("docs/pages"); + if (!Files.isDirectory(pagesDirectory)) { System.err.println("can't check for dead links, didn't find \"pages\" directory at: " + pagesDirectory); System.exit(1); @@ -49,9 +81,14 @@ public class DeadLinksChecker { // make a list of all valid link targets final Set htmlPages = extractLinkTargets(mdFiles); + // buffer the report to not have it broken up by error messages while checking links + final List deadLinksReport = new ArrayList<>(); + // scan all .md-files for dead local links Path errorFile = null; int scannedFiles = 0; + int foundExternalLinks = 0; + int checkedExternalLinks = 0; for (Path mdFile : mdFiles) { final String pageContent = fileToString(mdFile); scannedFiles++; @@ -64,42 +101,82 @@ public class DeadLinksChecker { final Matcher matcher = LOCAL_LINK_PATTERN.matcher(line); + linkCheck: while (matcher.find()) { String linkTarget = matcher.group(1); linkTarget = linkTarget.replaceAll("^/+", ""); // remove the leading "/" + boolean linkOk; - // ignore http/https links - if (linkTarget.startsWith("http://") || linkTarget.startsWith("https://")) { - continue; + if (linkTarget.startsWith(LOCAL_FILE_PREFIX)) { + String localLinkPart = linkTarget.substring(LOCAL_FILE_PREFIX.length()); + if (localLinkPart.contains("#")) { + localLinkPart = localLinkPart.substring(0, localLinkPart.indexOf('#')); + } + + final Path localFile = rootDirectory.resolve(localLinkPart); + linkOk = Files.isRegularFile(localFile); + if (!linkOk) { + System.err.println("local file not found: " + localFile); + System.err.println(" linked by: " + linkTarget); + } + + } else if (linkTarget.startsWith("http://") || linkTarget.startsWith("https://")) { + foundExternalLinks++; + for (String ignoredUrlPrefix : IGNORED_URL_PREFIXES) { + if (linkTarget.startsWith(ignoredUrlPrefix)) { + System.out.println("not checking link: " + linkTarget); + continue linkCheck; + } + } + if (!CHECK_EXTERNAL_LINKS) { + System.out.println("ignoring check of external url: " + linkTarget); + continue; + } + + checkedExternalLinks++; + linkOk = checkExternalLink(linkTarget); + + } else { + // ignore local anchors + if (linkTarget.startsWith("#")) { + continue; + } + + // ignore some pages where automatic link detection doesn't work + if (EXCLUDED_LINK_TARGETS.matcher(linkTarget).matches()) { + continue; + } + + linkOk = linkTarget.isEmpty() || htmlPages.contains(linkTarget); } - // ignore local anchors - if (linkTarget.startsWith("#")) { - continue; - } - - // ignore some pages where automatic link detection doesn't work - if (EXCLUDED_LINK_TARGETS.matcher(linkTarget).matches()) { - continue; - } - - if (!linkTarget.isEmpty() && !htmlPages.contains(linkTarget)) { + if (!linkOk) { if (errorFile == null) { - System.err.println("Found dead link(s):"); + deadLinksReport.add("Found dead link(s):"); } if (!mdFile.equals(errorFile)) { - System.err.println(mdFile); + deadLinksReport.add(String.valueOf(mdFile)); errorFile = mdFile; } - System.err.printf("%8d: %s%n", lineNo, matcher.group()); + deadLinksReport.add(String.format("%8d: %s", lineNo, matcher.group())); } } } } - if (errorFile != null) { + System.out.println("Scanned " + scannedFiles + " files for dead links."); + System.out.println(" Found " + foundExternalLinks + " external links, " + checkedExternalLinks + " of those where checked."); + + if (!CHECK_EXTERNAL_LINKS) { + System.out.println("External links weren't checked, set -D" + CHECK_EXTERNAL_LINKS_PROPERTY + "=true to enable it."); + } + + if (!deadLinksReport.isEmpty()) { + for (String line : deadLinksReport) { + System.err.println(line); + } throw new AssertionError("dead links detected"); } else { - System.out.println("Scanned " + scannedFiles + " files for dead links - no errors found!"); + System.out.println("no errors found!"); } } @@ -155,4 +232,37 @@ public class DeadLinksChecker { } } + private boolean checkExternalLink(String url) { + System.out.println("checking url: " + url + " ..."); + if (linkResultCache.containsKey(url)) { + System.out.println("response: HTTP " + linkResultCache.get(url) + " (CACHED)"); + return linkResultCache.get(url) < 400; + } + + try { + final HttpURLConnection httpURLConnection = (HttpURLConnection) new URL(url).openConnection(); + httpURLConnection.setRequestMethod("GET"); + httpURLConnection.setConnectTimeout(5000); + httpURLConnection.setReadTimeout(15000); + httpURLConnection.connect(); + final int responseCode = httpURLConnection.getResponseCode(); + + String response = "HTTP " + responseCode; + if (httpURLConnection.getHeaderField("Location") != null) { + response += ", Location: " + httpURLConnection.getHeaderField("Location"); + } + + System.out.println("response: " + response); + linkResultCache.put(url, responseCode); + + // success (HTTP 2xx) or redirection (HTTP 3xx) + return responseCode < 400; + + } catch (IOException ex) { + System.out.println("response: " + ex.getClass().getName() + " " + ex.getMessage()); + linkResultCache.put(url, 599); + return false; + } + } + } diff --git a/pmd-doc/src/main/java/net/sourceforge/pmd/docs/GenerateRuleDocsCmd.java b/pmd-doc/src/main/java/net/sourceforge/pmd/docs/GenerateRuleDocsCmd.java index 2fa4cdab61..e58f1d72a7 100644 --- a/pmd-doc/src/main/java/net/sourceforge/pmd/docs/GenerateRuleDocsCmd.java +++ b/pmd-doc/src/main/java/net/sourceforge/pmd/docs/GenerateRuleDocsCmd.java @@ -41,9 +41,6 @@ public final class GenerateRuleDocsCmd { generator.generate(registeredRuleSets, additionalRulesets); System.out.println("Generated docs in " + (System.currentTimeMillis() - start) + " ms"); - - DeadLinksChecker deadLinksChecker = new DeadLinksChecker(); - deadLinksChecker.checkDeadLinks(output.resolve("docs/pages")); } public static List findAdditionalRulesets(Path basePath) {