diff --git a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java index 9fe761c348..f4301169f8 100644 --- a/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java +++ b/pmd-jsp/src/main/java/net/sourceforge/pmd/cpd/JSPTokenizer.java @@ -3,27 +3,41 @@ */ package net.sourceforge.pmd.cpd; +import java.io.Reader; import java.io.StringReader; +import org.apache.commons.io.IOUtils; + import net.sourceforge.pmd.lang.LanguageRegistry; import net.sourceforge.pmd.lang.LanguageVersionHandler; import net.sourceforge.pmd.lang.TokenManager; import net.sourceforge.pmd.lang.jsp.JspLanguageModule; import net.sourceforge.pmd.lang.jsp.ast.Token; +import net.sourceforge.pmd.util.IOUtil; public class JSPTokenizer implements Tokenizer { - public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { - StringBuilder buffer = sourceCode.getCodeBuffer(); - LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JspLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler(); - TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()) - .getTokenManager(sourceCode.getFileName(), new StringReader(buffer.toString())); - Token currentToken = (Token) tokenMgr.getNextToken(); - while (currentToken.image.length() > 0) { - tokenEntries.add(new TokenEntry(String.valueOf(currentToken.kind), sourceCode.getFileName(), - currentToken.beginLine)); - currentToken = (Token) tokenMgr.getNextToken(); + public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { + StringBuilder buffer = sourceCode.getCodeBuffer(); + LanguageVersionHandler languageVersionHandler = + LanguageRegistry.getLanguage(JspLanguageModule.NAME).getDefaultVersion().getLanguageVersionHandler(); + Reader reader = null; + + try { + reader = new StringReader(buffer.toString()); + reader = IOUtil.skipBOM(reader); + TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()) + .getTokenManager(sourceCode.getFileName(), reader); + Token currentToken = (Token) tokenMgr.getNextToken(); + + while (currentToken.image.length() > 0) { + tokenEntries.add(new TokenEntry(String.valueOf(currentToken.kind), sourceCode.getFileName(), + currentToken.beginLine)); + currentToken = (Token) tokenMgr.getNextToken(); + } + } finally { + IOUtils.closeQuietly(reader); + } + tokenEntries.add(TokenEntry.getEOF()); } - tokenEntries.add(TokenEntry.getEOF()); - } } diff --git a/src/site/markdown/overview/changelog.md b/src/site/markdown/overview/changelog.md index bda43e573d..287c4abd5b 100644 --- a/src/site/markdown/overview/changelog.md +++ b/src/site/markdown/overview/changelog.md @@ -16,6 +16,7 @@ **Pull Requests:** * [#27](https://github.com/adangel/pmd/pull/27): Added support for Raw String Literals (C++11). +* [#29)(https://github.com/adangel/pmd/pull/29): Added support for files with UTF-8 BOM to JSP tokenizer. * [#79](https://github.com/pmd/pmd/pull/79): do not flag public static void main(String[]) as UseVarargs; ignore @Override for UseVarargs * [#80](https://github.com/pmd/pmd/pull/80): Update mvn-plugin.md * [#83](https://github.com/pmd/pmd/pull/83): Adds new Code Climate-compliant JSON renderer