Merge branch 'detect_file_encoding_using_UTF_BOM' of https://github.com/tiobe/pmd into pr-31
This commit is contained in:
@ -15,7 +15,9 @@ import java.util.List;
|
|||||||
|
|
||||||
import net.sourceforge.pmd.PMD;
|
import net.sourceforge.pmd.PMD;
|
||||||
|
|
||||||
|
import org.apache.commons.io.ByteOrderMark;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.io.input.BOMInputStream;
|
||||||
|
|
||||||
public class SourceCode {
|
public class SourceCode {
|
||||||
|
|
||||||
@ -68,7 +70,18 @@ public class SourceCode {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Reader getReader() throws Exception {
|
public Reader getReader() throws Exception {
|
||||||
return new InputStreamReader(new FileInputStream(file), encoding);
|
BOMInputStream inputStream =
|
||||||
|
new BOMInputStream(new FileInputStream(file),
|
||||||
|
ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE);
|
||||||
|
|
||||||
|
if (inputStream.hasBOM()) {
|
||||||
|
encoding = inputStream.getBOMCharsetName();
|
||||||
|
}
|
||||||
|
return new InputStreamReader(inputStream, encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEncoding() {
|
||||||
|
return encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -5,14 +5,18 @@ package net.sourceforge.pmd.cpd;
|
|||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import net.sourceforge.pmd.PMD;
|
import net.sourceforge.pmd.PMD;
|
||||||
|
import net.sourceforge.pmd.cpd.SourceCode.FileCodeLoader;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class SourceCodeTest {
|
public class SourceCodeTest {
|
||||||
|
private static final String BASE_RESOURCE_PATH =
|
||||||
|
"src/test/resources/net/sourceforge/pmd/cpd/files/";
|
||||||
|
|
||||||
private static final String SAMPLE_CODE =
|
private static final String SAMPLE_CODE =
|
||||||
"Line 1\n" +
|
"Line 1\n" +
|
||||||
"Line 2\n" +
|
"Line 2\n" +
|
||||||
@ -36,4 +40,24 @@ public class SourceCodeTest {
|
|||||||
assertEquals("Line 2", sourceCode.getSlice(2, 2));
|
assertEquals("Line 2", sourceCode.getSlice(2, 2));
|
||||||
assertEquals("Line 1" + PMD.EOL + "Line 2", sourceCode.getSlice(1, 2));
|
assertEquals("Line 1" + PMD.EOL + "Line 2", sourceCode.getSlice(1, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEncodingDetectionFromBOM() throws Exception {
|
||||||
|
FileCodeLoader loader =
|
||||||
|
new SourceCode.FileCodeLoader(new File(BASE_RESOURCE_PATH + "file_with_utf8_bom.java"), "ISO-8859-1");
|
||||||
|
|
||||||
|
//The encoding detection is done when the reader is created
|
||||||
|
loader.getReader();
|
||||||
|
assertEquals("UTF-8", loader.getEncoding());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEncodingIsNotChangedWhenThereIsNoBOM() throws Exception {
|
||||||
|
FileCodeLoader loader =
|
||||||
|
new SourceCode.FileCodeLoader(new File(BASE_RESOURCE_PATH + "file_with_ISO-8859-1_encoding.java"), "ISO-8859-1");
|
||||||
|
|
||||||
|
//The encoding detection is done when the reader is created
|
||||||
|
loader.getReader();
|
||||||
|
assertEquals("ISO-8859-1", loader.getEncoding());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
|
@ -7,7 +7,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>net.sourceforge.pmd</groupId>
|
<groupId>net.sourceforge.pmd</groupId>
|
||||||
<artifactId>pmd</artifactId>
|
<artifactId>pmd</artifactId>
|
||||||
<version>5.3.6-SNAPSHOT</version>
|
<version>5.3.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
Reference in New Issue
Block a user