[core] Add a filter possibility to FileCollector

Fixes #4942
This commit is contained in:
Andreas Dangel
2024-04-12 12:44:55 +02:00
parent c472fdf48c
commit 2bddfd2b45
4 changed files with 97 additions and 14 deletions

View File

@ -23,6 +23,7 @@ This is a {{ site.pmd.release_type }} release.
### 🐛 Fixed Issues
* core
* [#494](https://github.com/pmd/pmd/issues/494): \[core] Adopt JApiCmp to enforce control over API changes
* [#4942](https://github.com/pmd/pmd/issues/4942): \[core] CPD: `--skip-duplicate-files` has no effect (7.0.0 regression)
* cli
* [#4791](https://github.com/pmd/pmd/issues/4791): \[cli] Could not find or load main class
* [#4913](https://github.com/pmd/pmd/issues/4913): \[cli] cpd-gui closes immediately

View File

@ -6,16 +6,20 @@ package net.sourceforge.pmd.internal.util;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import net.sourceforge.pmd.AbstractConfiguration;
import net.sourceforge.pmd.cpd.CPDConfiguration;
import net.sourceforge.pmd.lang.document.FileCollector;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.InternalApiBridge;
@ -36,6 +40,34 @@ public final class FileCollectionUtil {
}
public static void collectFiles(CPDConfiguration cpdConfiguration, FileCollector collector) {
if (cpdConfiguration.isSkipDuplicates()) {
final Set<String> alreadyAddedFileNamesWithSize = new HashSet<>();
collector.setAddFileFilter(path -> {
if (!Files.isRegularFile(path)) {
// file is not a simple file, maybe inside a ZIP archive
// don't filter these out
LOG.debug("Path {} is not a regular file, skipping addFileFilter", path);
return true;
}
try {
String signature = path.getFileName() + "_" + Files.size(path);
if (!alreadyAddedFileNamesWithSize.add(signature)) {
LOG.info("Skipping {} since it appears to be a duplicate file and --skip-duplicate-files is set",
path);
return false;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return true;
});
}
collectFiles((AbstractConfiguration) cpdConfiguration, collector);
}
public static void collectFiles(AbstractConfiguration configuration, FileCollector collector) {
if (configuration.getSourceEncoding() != null) {
collector.setCharset(configuration.getSourceEncoding());
@ -77,7 +109,7 @@ public final class FileCollectionUtil {
try {
addRoot(collector, rootLocation);
} catch (IOException e) {
collector.getReporter().errorEx("Error collecting " + rootLocation, e);
collector.getReporter().errorEx("Error collecting {0}", new Object[]{ rootLocation }, e);
}
}
}
@ -85,7 +117,7 @@ public final class FileCollectionUtil {
public static void collectFileList(FileCollector collector, Path fileList) {
LOG.debug("Reading file list {}.", fileList);
if (!Files.exists(fileList)) {
collector.getReporter().error("No such file {}", fileList);
collector.getReporter().error("No such file {0}", fileList);
return;
}
@ -93,7 +125,7 @@ public final class FileCollectionUtil {
try {
filePaths = FileUtil.readFilelistEntries(fileList);
} catch (IOException e) {
collector.getReporter().errorEx("Error reading {}", new Object[] { fileList }, e);
collector.getReporter().errorEx("Error reading {0}", new Object[] { fileList }, e);
return;
}
collectFiles(collector, filePaths);
@ -135,7 +167,7 @@ public final class FileCollectionUtil {
String source = IOUtil.readToString(sourceCode);
collector.addSourceFile(FileId.fromPathLikeString(falseFilePath), source);
} catch (SQLException ex) {
collector.getReporter().warnEx("Cannot get SourceCode for {} - skipping ...",
collector.getReporter().warnEx("Cannot get SourceCode for {0} - skipping ...",
new Object[] { falseFilePath },
ex);
}
@ -143,7 +175,7 @@ public final class FileCollectionUtil {
} catch (ClassNotFoundException e) {
collector.getReporter().errorEx("Cannot get files from DB - probably missing database JDBC driver", e);
} catch (Exception e) {
collector.getReporter().errorEx("Cannot get files from DB - ''{}''", new Object[] { uri }, e);
collector.getReporter().errorEx("Cannot get files from DB - ''{0}''", new Object[] { uri }, e);
}
}
}

View File

@ -16,6 +16,7 @@ import java.nio.file.FileVisitOption;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.ProviderNotFoundException;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
@ -29,6 +30,7 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -60,6 +62,7 @@ public final class FileCollector implements AutoCloseable {
private final FileId outerFsPath;
private boolean closed;
private boolean recursive = true;
private Predicate<Path> addFileFilter = file -> true;
// construction
@ -206,6 +209,12 @@ public final class FileCollector implements AutoCloseable {
private boolean addFileImpl(TextFile textFile) {
LOG.trace("Adding file {} (lang: {}) ", textFile.getFileId().getAbsolutePath(), textFile.getLanguageVersion().getTerseName());
if (!addFileFilter.test(Paths.get(textFile.getFileId().getAbsolutePath()))) {
LOG.trace("File was skipped due to addFileFilter...");
return false;
}
if (allFilesToProcess.add(textFile)) {
return true;
}
@ -377,6 +386,17 @@ public final class FileCollector implements AutoCloseable {
this.charset = Objects.requireNonNull(charset);
}
/**
* Sets an additional filter that is being called before adding the
* file to the list.
*
* @param addFileFilter the filter should return {@code true} if the file
* should be collected and analyzed.
* @throws NullPointerException if {@code addFileFilter} is {@code null}.
*/
public void setAddFileFilter(Predicate<Path> addFileFilter) {
this.addFileFilter = Objects.requireNonNull(addFileFilter);
}
// filtering

View File

@ -8,18 +8,25 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.SystemUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledOnOs;
import org.junit.jupiter.api.condition.OS;
import org.junit.jupiter.api.io.TempDir;
import net.sourceforge.pmd.lang.DummyLanguageModule;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.TextFile;
/**
* Unit test for {@link CpdAnalysis}
@ -29,14 +36,13 @@ class CpdAnalysisTest {
private static final String BASE_TEST_RESOURCE_PATH = "src/test/resources/net/sourceforge/pmd/cpd/files/";
private static final String TARGET_TEST_RESOURCE_PATH = "target/classes/net/sourceforge/pmd/cpd/files/";
@TempDir
private Path tempDir;
// Symlinks are not well supported under Windows - so the tests are
// simply executed only on linux.
private boolean canTestSymLinks = SystemUtils.IS_OS_UNIX;
CPDConfiguration config = new CPDConfiguration();
private CPDConfiguration config = new CPDConfiguration();
@BeforeEach
void setup() throws Exception {
void setup() {
config.setOnlyRecognizeLanguage(DummyLanguageModule.getInstance());
config.setMinimumTileSize(10);
}
@ -49,8 +55,6 @@ class CpdAnalysisTest {
* any error
*/
private void prepareSymLinks() throws Exception {
assumeTrue(canTestSymLinks, "Skipping unit tests with symlinks.");
Runtime runtime = Runtime.getRuntime();
if (!new File(TARGET_TEST_RESOURCE_PATH, "symlink-for-real-file.txt").exists()) {
runtime.exec(new String[] { "ln", "-s", BASE_TEST_RESOURCE_PATH + "real-file.txt",
@ -70,6 +74,7 @@ class CpdAnalysisTest {
* any error
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testFileSectionWithBrokenSymlinks() throws Exception {
prepareSymLinks();
@ -91,6 +96,7 @@ class CpdAnalysisTest {
* any error
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testFileAddedAsSymlinkAndReal() throws Exception {
prepareSymLinks();
@ -109,6 +115,7 @@ class CpdAnalysisTest {
* A file should be not be added via a sym link.
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testNoFileAddedAsSymlink() throws Exception {
prepareSymLinks();
@ -180,6 +187,29 @@ class CpdAnalysisTest {
}
}
@Test
void duplicatedFilesShouldBeSkipped() throws IOException {
String filename = "file1.dummy";
Path aFile1 = Files.createDirectory(tempDir.resolve("a")).resolve(filename).toAbsolutePath();
Path bFile1 = Files.createDirectory(tempDir.resolve("b")).resolve(filename).toAbsolutePath();
Files.write(aFile1, "Same content".getBytes(StandardCharsets.UTF_8));
Files.write(bFile1, "Same content".getBytes(StandardCharsets.UTF_8));
config.setSkipDuplicates(true);
config.setInputPathList(Arrays.asList(tempDir));
try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
List<TextFile> collectedFiles = cpd.files().getCollectedFiles();
collectedFiles.stream().map(TextFile::getFileId).map(FileId::getAbsolutePath).forEach(System.out::println);
assertEquals(1, collectedFiles.size());
// the order of directory traversal is most likely not defined, so either one
// of the two files might be added, but not both
String collectedFile = collectedFiles.get(0).getFileId().getAbsolutePath();
assertTrue(collectedFile.equals(aFile1.toString()) || collectedFile.equals(bFile1.toString()));
}
}
/**
* Simple listener that fails, if too many files were added and not skipped.
*/