diff --git a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java index 19e4fb2eb6..ba78977ce7 100644 --- a/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java +++ b/pmd-core/src/main/java/net/sourceforge/pmd/cpd/SourceCode.java @@ -15,7 +15,9 @@ import java.util.List; import net.sourceforge.pmd.PMD; +import org.apache.commons.io.ByteOrderMark; import org.apache.commons.io.IOUtils; +import org.apache.commons.io.input.BOMInputStream; public class SourceCode { @@ -68,7 +70,18 @@ public class SourceCode { @Override public Reader getReader() throws Exception { - return new InputStreamReader(new FileInputStream(file), encoding); + BOMInputStream inputStream = + new BOMInputStream(new FileInputStream(file), + ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE); + + if (inputStream.hasBOM()) { + encoding = inputStream.getBOMCharsetName(); + } + return new InputStreamReader(inputStream, encoding); + } + + public String getEncoding() { + return encoding; } @Override diff --git a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/SourceCodeTest.java b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/SourceCodeTest.java index a3758ccc90..62cbe12785 100644 --- a/pmd-core/src/test/java/net/sourceforge/pmd/cpd/SourceCodeTest.java +++ b/pmd-core/src/test/java/net/sourceforge/pmd/cpd/SourceCodeTest.java @@ -5,14 +5,18 @@ package net.sourceforge.pmd.cpd; import static org.junit.Assert.assertEquals; +import java.io.File; import java.util.ArrayList; import net.sourceforge.pmd.PMD; +import net.sourceforge.pmd.cpd.SourceCode.FileCodeLoader; import org.junit.Test; public class SourceCodeTest { - + private static final String BASE_RESOURCE_PATH = + "src/test/resources/net/sourceforge/pmd/cpd/files/"; + private static final String SAMPLE_CODE = "Line 1\n" + "Line 2\n" + @@ -36,4 +40,24 @@ public class SourceCodeTest { assertEquals("Line 2", sourceCode.getSlice(2, 2)); assertEquals("Line 1" + PMD.EOL + "Line 2", sourceCode.getSlice(1, 2)); } + + @Test + public void testEncodingDetectionFromBOM() throws Exception { + FileCodeLoader loader = + new SourceCode.FileCodeLoader(new File(BASE_RESOURCE_PATH + "file_with_utf8_bom.java"), "ISO-8859-1"); + + //The encoding detection is done when the reader is created + loader.getReader(); + assertEquals("UTF-8", loader.getEncoding()); + } + + @Test + public void testEncodingIsNotChangedWhenThereIsNoBOM() throws Exception { + FileCodeLoader loader = + new SourceCode.FileCodeLoader(new File(BASE_RESOURCE_PATH + "file_with_ISO-8859-1_encoding.java"), "ISO-8859-1"); + + //The encoding detection is done when the reader is created + loader.getReader(); + assertEquals("ISO-8859-1", loader.getEncoding()); + } } diff --git a/pmd-core/src/test/resources/net/sourceforge/pmd/cpd/files/file_with_ISO-8859-1_encoding.java b/pmd-core/src/test/resources/net/sourceforge/pmd/cpd/files/file_with_ISO-8859-1_encoding.java new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pmd-core/src/test/resources/net/sourceforge/pmd/cpd/files/file_with_utf8_bom.java b/pmd-core/src/test/resources/net/sourceforge/pmd/cpd/files/file_with_utf8_bom.java new file mode 100644 index 0000000000..5f282702bb --- /dev/null +++ b/pmd-core/src/test/resources/net/sourceforge/pmd/cpd/files/file_with_utf8_bom.java @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/pmd-java8/pom.xml b/pmd-java8/pom.xml index 1fc2151c74..04147945e2 100644 --- a/pmd-java8/pom.xml +++ b/pmd-java8/pom.xml @@ -7,7 +7,7 @@ net.sourceforge.pmd pmd - 5.3.6-SNAPSHOT + 5.3.7-SNAPSHOT