diff --git a/docs/pages/release_notes.md b/docs/pages/release_notes.md index 912c4ce1c5..a20a35fba2 100644 --- a/docs/pages/release_notes.md +++ b/docs/pages/release_notes.md @@ -23,6 +23,7 @@ This is a {{ site.pmd.release_type }} release. ### 🐛 Fixed Issues * core * [#494](https://github.com/pmd/pmd/issues/494): \[core] Adopt JApiCmp to enforce control over API changes + * [#4942](https://github.com/pmd/pmd/issues/4942): \[core] CPD: `--skip-duplicate-files` has no effect (7.0.0 regression) * cli * [#4791](https://github.com/pmd/pmd/issues/4791): \[cli] Could not find or load main class * [#4913](https://github.com/pmd/pmd/issues/4913): \[cli] cpd-gui closes immediately diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/internal/util/FileCollectionUtil.java b/pmd-core/src/main/java/net/sourceforge/pmd/internal/util/FileCollectionUtil.java index 09ac693b50..baad664954 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/internal/util/FileCollectionUtil.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/internal/util/FileCollectionUtil.java @@ -6,16 +6,20 @@ package net.sourceforge.pmd.internal.util; import java.io.IOException; import java.io.Reader; +import java.io.UncheckedIOException; import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.sql.SQLException; +import java.util.HashSet; import java.util.List; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import net.sourceforge.pmd.AbstractConfiguration; +import net.sourceforge.pmd.cpd.CPDConfiguration; import net.sourceforge.pmd.lang.document.FileCollector; import net.sourceforge.pmd.lang.document.FileId; import net.sourceforge.pmd.lang.document.InternalApiBridge; @@ -36,6 +40,34 @@ public final class FileCollectionUtil { } + public static void collectFiles(CPDConfiguration cpdConfiguration, FileCollector collector) { + if (cpdConfiguration.isSkipDuplicates()) { + final Set alreadyAddedFileNamesWithSize = new HashSet<>(); + collector.setAddFileFilter(path -> { + if (!Files.isRegularFile(path)) { + // file is not a simple file, maybe inside a ZIP archive + // don't filter these out + LOG.debug("Path {} is not a regular file, skipping addFileFilter", path); + return true; + } + + try { + String signature = path.getFileName() + "_" + Files.size(path); + if (!alreadyAddedFileNamesWithSize.add(signature)) { + LOG.info("Skipping {} since it appears to be a duplicate file and --skip-duplicate-files is set", + path); + return false; + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return true; + }); + } + + collectFiles((AbstractConfiguration) cpdConfiguration, collector); + } + public static void collectFiles(AbstractConfiguration configuration, FileCollector collector) { if (configuration.getSourceEncoding() != null) { collector.setCharset(configuration.getSourceEncoding()); @@ -77,7 +109,7 @@ public final class FileCollectionUtil { try { addRoot(collector, rootLocation); } catch (IOException e) { - collector.getReporter().errorEx("Error collecting " + rootLocation, e); + collector.getReporter().errorEx("Error collecting {0}", new Object[]{ rootLocation }, e); } } } @@ -85,7 +117,7 @@ public final class FileCollectionUtil { public static void collectFileList(FileCollector collector, Path fileList) { LOG.debug("Reading file list {}.", fileList); if (!Files.exists(fileList)) { - collector.getReporter().error("No such file {}", fileList); + collector.getReporter().error("No such file {0}", fileList); return; } @@ -93,7 +125,7 @@ public final class FileCollectionUtil { try { filePaths = FileUtil.readFilelistEntries(fileList); } catch (IOException e) { - collector.getReporter().errorEx("Error reading {}", new Object[] { fileList }, e); + collector.getReporter().errorEx("Error reading {0}", new Object[] { fileList }, e); return; } collectFiles(collector, filePaths); @@ -135,7 +167,7 @@ public final class FileCollectionUtil { String source = IOUtil.readToString(sourceCode); collector.addSourceFile(FileId.fromPathLikeString(falseFilePath), source); } catch (SQLException ex) { - collector.getReporter().warnEx("Cannot get SourceCode for {} - skipping ...", + collector.getReporter().warnEx("Cannot get SourceCode for {0} - skipping ...", new Object[] { falseFilePath }, ex); } @@ -143,7 +175,7 @@ public final class FileCollectionUtil { } catch (ClassNotFoundException e) { collector.getReporter().errorEx("Cannot get files from DB - probably missing database JDBC driver", e); } catch (Exception e) { - collector.getReporter().errorEx("Cannot get files from DB - ''{}''", new Object[] { uri }, e); + collector.getReporter().errorEx("Cannot get files from DB - ''{0}''", new Object[] { uri }, e); } } } diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java index fcbbdf1de1..4b3f73fec8 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/lang/document/FileCollector.java @@ -16,6 +16,7 @@ import java.nio.file.FileVisitOption; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.ProviderNotFoundException; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; @@ -29,6 +30,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.function.Predicate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,6 +62,7 @@ public final class FileCollector implements AutoCloseable { private final FileId outerFsPath; private boolean closed; private boolean recursive = true; + private Predicate addFileFilter = file -> true; // construction @@ -206,6 +209,12 @@ public final class FileCollector implements AutoCloseable { private boolean addFileImpl(TextFile textFile) { LOG.trace("Adding file {} (lang: {}) ", textFile.getFileId().getAbsolutePath(), textFile.getLanguageVersion().getTerseName()); + + if (!addFileFilter.test(Paths.get(textFile.getFileId().getAbsolutePath()))) { + LOG.trace("File was skipped due to addFileFilter..."); + return false; + } + if (allFilesToProcess.add(textFile)) { return true; } @@ -377,6 +386,17 @@ public final class FileCollector implements AutoCloseable { this.charset = Objects.requireNonNull(charset); } + /** + * Sets an additional filter that is being called before adding the + * file to the list. + * + * @param addFileFilter the filter should return {@code true} if the file + * should be collected and analyzed. + * @throws NullPointerException if {@code addFileFilter} is {@code null}. + */ + public void setAddFileFilter(Predicate addFileFilter) { + this.addFileFilter = Objects.requireNonNull(addFileFilter); + } // filtering diff --git a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CpdAnalysisTest.java b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CpdAnalysisTest.java index bffa3fa499..a60a74cfd2 100644 --- a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CpdAnalysisTest.java +++ b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/CpdAnalysisTest.java @@ -8,18 +8,25 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; -import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Arrays; import java.util.List; -import org.apache.commons.lang3.SystemUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledOnOs; +import org.junit.jupiter.api.condition.OS; +import org.junit.jupiter.api.io.TempDir; import net.sourceforge.pmd.lang.DummyLanguageModule; +import net.sourceforge.pmd.lang.document.FileId; +import net.sourceforge.pmd.lang.document.TextFile; /** * Unit test for {@link CpdAnalysis} @@ -29,14 +36,13 @@ class CpdAnalysisTest { private static final String BASE_TEST_RESOURCE_PATH = "src/test/resources/net/sourceforge/pmd/cpd/files/"; private static final String TARGET_TEST_RESOURCE_PATH = "target/classes/net/sourceforge/pmd/cpd/files/"; + @TempDir + private Path tempDir; - // Symlinks are not well supported under Windows - so the tests are - // simply executed only on linux. - private boolean canTestSymLinks = SystemUtils.IS_OS_UNIX; - CPDConfiguration config = new CPDConfiguration(); + private CPDConfiguration config = new CPDConfiguration(); @BeforeEach - void setup() throws Exception { + void setup() { config.setOnlyRecognizeLanguage(DummyLanguageModule.getInstance()); config.setMinimumTileSize(10); } @@ -49,8 +55,6 @@ class CpdAnalysisTest { * any error */ private void prepareSymLinks() throws Exception { - assumeTrue(canTestSymLinks, "Skipping unit tests with symlinks."); - Runtime runtime = Runtime.getRuntime(); if (!new File(TARGET_TEST_RESOURCE_PATH, "symlink-for-real-file.txt").exists()) { runtime.exec(new String[] { "ln", "-s", BASE_TEST_RESOURCE_PATH + "real-file.txt", @@ -70,6 +74,7 @@ class CpdAnalysisTest { * any error */ @Test + @EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows void testFileSectionWithBrokenSymlinks() throws Exception { prepareSymLinks(); @@ -91,6 +96,7 @@ class CpdAnalysisTest { * any error */ @Test + @EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows void testFileAddedAsSymlinkAndReal() throws Exception { prepareSymLinks(); @@ -109,6 +115,7 @@ class CpdAnalysisTest { * A file should be not be added via a sym link. */ @Test + @EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows void testNoFileAddedAsSymlink() throws Exception { prepareSymLinks(); @@ -180,6 +187,29 @@ class CpdAnalysisTest { } } + @Test + void duplicatedFilesShouldBeSkipped() throws IOException { + String filename = "file1.dummy"; + Path aFile1 = Files.createDirectory(tempDir.resolve("a")).resolve(filename).toAbsolutePath(); + Path bFile1 = Files.createDirectory(tempDir.resolve("b")).resolve(filename).toAbsolutePath(); + + Files.write(aFile1, "Same content".getBytes(StandardCharsets.UTF_8)); + Files.write(bFile1, "Same content".getBytes(StandardCharsets.UTF_8)); + + config.setSkipDuplicates(true); + config.setInputPathList(Arrays.asList(tempDir)); + try (CpdAnalysis cpd = CpdAnalysis.create(config)) { + List collectedFiles = cpd.files().getCollectedFiles(); + collectedFiles.stream().map(TextFile::getFileId).map(FileId::getAbsolutePath).forEach(System.out::println); + assertEquals(1, collectedFiles.size()); + + // the order of directory traversal is most likely not defined, so either one + // of the two files might be added, but not both + String collectedFile = collectedFiles.get(0).getFileId().getAbsolutePath(); + assertTrue(collectedFile.equals(aFile1.toString()) || collectedFile.equals(bFile1.toString())); + } + } + /** * Simple listener that fails, if too many files were added and not skipped. */