Javascript tokenizer now ignores comment tokens.
This commit is contained in:
776
pmd-javascript/etc/grammar/es5.jj
Normal file
776
pmd-javascript/etc/grammar/es5.jj
Normal file
File diff suppressed because it is too large
Load Diff
@ -32,6 +32,47 @@
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-antrun-plugin</artifactId>
|
||||
<inherited>true</inherited>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>generate-sources</id>
|
||||
<phase>generate-sources</phase>
|
||||
<configuration>
|
||||
<target>
|
||||
<ant antfile="src/main/ant/alljavacc.xml">
|
||||
<property name="target" value="${project.build.directory}/generated-sources/javacc" />
|
||||
<property name="javacc.jar" value="${settings.localRepository}/net/java/dev/javacc/javacc/${javacc.version}/javacc-${javacc.version}.jar" />
|
||||
</ant>
|
||||
</target>
|
||||
</configuration>
|
||||
<goals>
|
||||
<goal>run</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>add-javacc-generated-sources</id>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sources>
|
||||
<source>${project.build.directory}/generated-sources/javacc</source>
|
||||
</sources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>net.sourceforge.pmd</groupId>
|
||||
<artifactId>pmd-build</artifactId>
|
||||
|
44
pmd-javascript/src/main/ant/alljavacc.xml
Normal file
44
pmd-javascript/src/main/ant/alljavacc.xml
Normal file
@ -0,0 +1,44 @@
|
||||
<project name="pmd" default="alljavacc" basedir="../../">
|
||||
|
||||
<property name="javacc-home.path" value="target/lib" />
|
||||
|
||||
<target name="alljavacc"
|
||||
description="Generates all JavaCC aspects within PMD"
|
||||
depends="checkUpToDate,init,es5javacc,cleanup" />
|
||||
|
||||
<target name="checkUpToDate">
|
||||
<uptodate property="javaccBuildNotRequired" targetfile="${target}/last-generated-timestamp">
|
||||
<srcfiles dir="etc/grammar" includes="*.jj*"/>
|
||||
</uptodate>
|
||||
<echo message="up to date check: javaccBuildNotRequired=${javaccBuildNotRequired}"/>
|
||||
</target>
|
||||
|
||||
<target name="init" unless="javaccBuildNotRequired">
|
||||
<mkdir dir="${javacc-home.path}" />
|
||||
<copy file="${javacc.jar}" tofile="${javacc-home.path}/javacc.jar" />
|
||||
|
||||
<mkdir dir="${target}"/>
|
||||
<touch file="${target}/last-generated-timestamp"/>
|
||||
</target>
|
||||
|
||||
<target name="cleanup">
|
||||
<delete dir="${javacc-home.path}" />
|
||||
</target>
|
||||
|
||||
<target name="es5javacc" description="Generates the Ecmascript 5 grammar" unless="javaccBuildNotRequired">
|
||||
<delete dir="${target}/net/sourceforge/pmd/lang/ecmascript5/ast" />
|
||||
<mkdir dir="${target}/net/sourceforge/pmd/lang/ecmascript5/ast" />
|
||||
<!-- Ensure generated using CharStream interface -->
|
||||
<javacc static="false"
|
||||
usercharstream="true"
|
||||
target="etc/grammar/es5.jj"
|
||||
outputdirectory="${target}/net/sourceforge/pmd/lang/ecmascript5/ast"
|
||||
javacchome="${javacc-home.path}" />
|
||||
<replace file="${target}/net/sourceforge/pmd/lang/ecmascript5/ast/Ecmascript5ParserTokenManager.java"
|
||||
token="class Ecmascript5ParserTokenManager"
|
||||
value="class Ecmascript5ParserTokenManager extends net.sourceforge.pmd.lang.ast.AbstractTokenManager" />
|
||||
<delete file="${target}/net/sourceforge/pmd/lang/ecmascript5/ast/CharStream.java" />
|
||||
<delete file="${target}/net/sourceforge/pmd/lang/ecmascript5/ast/ParseException.java" />
|
||||
<delete file="${target}/net/sourceforge/pmd/lang/ecmascript5/ast/TokenMgrError.java" />
|
||||
</target>
|
||||
</project>
|
@ -3,25 +3,56 @@
|
||||
*/
|
||||
package net.sourceforge.pmd.cpd;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
public class EcmascriptTokenizer extends AbstractTokenizer {
|
||||
public EcmascriptTokenizer() {
|
||||
// setting markers for "string" in javascript
|
||||
this.stringToken = new ArrayList<String>();
|
||||
this.stringToken.add( "\'" );
|
||||
this.stringToken.add( "\"" );
|
||||
import net.sourceforge.pmd.lang.LanguageRegistry;
|
||||
import net.sourceforge.pmd.lang.LanguageVersionHandler;
|
||||
import net.sourceforge.pmd.lang.TokenManager;
|
||||
import net.sourceforge.pmd.lang.ast.TokenMgrError;
|
||||
import net.sourceforge.pmd.lang.ecmascript.EcmascriptLanguageModule;
|
||||
import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5ParserConstants;
|
||||
import net.sourceforge.pmd.lang.ecmascript5.ast.Token;
|
||||
|
||||
// setting markers for 'ignorable character' in javascript
|
||||
this.ignorableCharacter = new ArrayList<String>();
|
||||
this.ignorableCharacter.add( ";" );
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
// setting markers for 'ignorable string' in javascript
|
||||
this.ignorableStmt = new ArrayList<String>();
|
||||
/**
|
||||
* The Ecmascript Tokenizer
|
||||
*/
|
||||
public class EcmascriptTokenizer implements Tokenizer {
|
||||
|
||||
// strings do indeed span multiple lines in javascript
|
||||
this.spanMultipleLinesString = true;
|
||||
// the lines do to end with backslashes
|
||||
this.spanMultipleLinesLineContinuationCharacter = '\\';
|
||||
@Override
|
||||
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
|
||||
StringBuilder buffer = sourceCode.getCodeBuffer();
|
||||
Reader reader = null;
|
||||
try {
|
||||
LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(EcmascriptLanguageModule.NAME)
|
||||
.getDefaultVersion().getLanguageVersionHandler();
|
||||
reader = new StringReader(buffer.toString());
|
||||
TokenManager tokenManager = languageVersionHandler.getParser(
|
||||
languageVersionHandler.getDefaultParserOptions()).getTokenManager(sourceCode.getFileName(), reader);
|
||||
Token currentToken = (Token) tokenManager.getNextToken();
|
||||
while (currentToken.image.length() > 0) {
|
||||
tokenEntries.add(new TokenEntry(getTokenImage(currentToken), sourceCode.getFileName(), currentToken.beginLine));
|
||||
currentToken = (Token) tokenManager.getNextToken();
|
||||
}
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
System.err.println("Added " + sourceCode.getFileName());
|
||||
} catch (TokenMgrError err) {
|
||||
err.printStackTrace();
|
||||
System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error");
|
||||
tokenEntries.add(TokenEntry.getEOF());
|
||||
} finally {
|
||||
IOUtils.closeQuietly(reader);
|
||||
}
|
||||
}
|
||||
|
||||
private String getTokenImage(Token token) {
|
||||
//Remove line continuation characters from string literals
|
||||
if (token.kind == Ecmascript5ParserConstants.STRING_LITERAL ||
|
||||
token.kind == Ecmascript5ParserConstants.UNTERMINATED_STRING_LITERAL) {
|
||||
return token.image.replaceAll("(?<!\\\\)\\\\(\\r\\n|\\r|\\n)", "");
|
||||
}
|
||||
return token.image;
|
||||
}
|
||||
}
|
@ -11,6 +11,7 @@ import net.sourceforge.pmd.lang.ParserOptions;
|
||||
import net.sourceforge.pmd.lang.TokenManager;
|
||||
import net.sourceforge.pmd.lang.ast.Node;
|
||||
import net.sourceforge.pmd.lang.ast.ParseException;
|
||||
import net.sourceforge.pmd.lang.ecmascript5.Ecmascript5TokenManager;
|
||||
|
||||
/**
|
||||
* Adapter for the EcmascriptParser.
|
||||
@ -25,7 +26,7 @@ public class Ecmascript3Parser extends AbstractParser {
|
||||
|
||||
@Override
|
||||
public TokenManager createTokenManager(Reader source) {
|
||||
return null;
|
||||
return new Ecmascript5TokenManager(source);
|
||||
}
|
||||
|
||||
public boolean canParse() {
|
||||
|
@ -0,0 +1,34 @@
|
||||
/**
|
||||
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
|
||||
*/
|
||||
package net.sourceforge.pmd.lang.ecmascript5;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import net.sourceforge.pmd.lang.TokenManager;
|
||||
import net.sourceforge.pmd.lang.ast.SimpleCharStream;
|
||||
import net.sourceforge.pmd.lang.ecmascript5.ast.Ecmascript5ParserTokenManager;
|
||||
|
||||
/**
|
||||
* Ecmascript 5 Token Manager implementation.
|
||||
*/
|
||||
public class Ecmascript5TokenManager implements TokenManager {
|
||||
private final Ecmascript5ParserTokenManager tokenManager;
|
||||
|
||||
/**
|
||||
* Creates a new Ecmascript 5 Token Manager from the given source code.
|
||||
* @param source the source code
|
||||
*/
|
||||
public Ecmascript5TokenManager(Reader source) {
|
||||
tokenManager = new Ecmascript5ParserTokenManager(new SimpleCharStream(source));
|
||||
}
|
||||
|
||||
public Object getNextToken() {
|
||||
return tokenManager.getNextToken();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setFileName(String fileName) {
|
||||
Ecmascript5ParserTokenManager.setFileName(fileName);
|
||||
}
|
||||
}
|
@ -21,7 +21,7 @@ public class EcmascriptTokenizerTest {
|
||||
SourceCode sourceCode = new SourceCode( new SourceCode.StringCodeLoader( getCode1() ) );
|
||||
Tokens tokens = new Tokens();
|
||||
tokenizer.tokenize( sourceCode, tokens );
|
||||
assertEquals( 22, tokens.size() );
|
||||
assertEquals( 40, tokens.size() );
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -30,7 +30,7 @@ public class EcmascriptTokenizerTest {
|
||||
SourceCode sourceCode = new SourceCode( new SourceCode.StringCodeLoader( getCode2() ) );
|
||||
Tokens tokens = new Tokens();
|
||||
t.tokenize( sourceCode, tokens );
|
||||
assertEquals( 22, tokens.size() );
|
||||
assertEquals( 45, tokens.size() );
|
||||
}
|
||||
|
||||
/**
|
||||
@ -47,14 +47,50 @@ public class EcmascriptTokenizerTest {
|
||||
+ "continues2\";\n") );
|
||||
Tokens tokens = new Tokens();
|
||||
t.tokenize(sourceCode, tokens);
|
||||
assertEquals(11, tokens.size());
|
||||
List<TokenEntry> list = tokens.getTokens();
|
||||
assertEquals("var", list.get(0).getIdentifier(), list.get(5).getIdentifier());
|
||||
assertEquals("s", list.get(1).getIdentifier(), list.get(6).getIdentifier());
|
||||
assertEquals("=", list.get(2).getIdentifier(), list.get(7).getIdentifier());
|
||||
assertEquals("\"a string continues\"", list.get(3).toString());
|
||||
assertEquals("\"a string continues2\"", list.get(8).toString());
|
||||
assertFalse(list.get(3).getIdentifier() == list.get(8).getIdentifier());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIgnoreSingleLineComments() throws IOException {
|
||||
Tokenizer t = new EcmascriptTokenizer();
|
||||
SourceCode sourceCode = new SourceCode( new SourceCode.StringCodeLoader(
|
||||
"//This is a single line comment\n"
|
||||
+ "var i = 0;\n\n"
|
||||
+ "//This is another comment\n"
|
||||
+ "i++;") );
|
||||
Tokens tokens = new Tokens();
|
||||
t.tokenize(sourceCode, tokens);
|
||||
assertEquals(9, tokens.size());
|
||||
List<TokenEntry> list = tokens.getTokens();
|
||||
assertEquals("var", list.get(0).getIdentifier(), list.get(4).getIdentifier());
|
||||
assertEquals("s", list.get(1).getIdentifier(), list.get(5).getIdentifier());
|
||||
assertEquals("=", list.get(2).getIdentifier(), list.get(6).getIdentifier());
|
||||
assertEquals("\"a string continues\"", list.get(3).toString());
|
||||
assertEquals("\"a string continues2\"", list.get(7).toString());
|
||||
assertFalse(list.get(3).getIdentifier() == list.get(7).getIdentifier());
|
||||
assertEquals("var", list.get(0).toString());
|
||||
assertEquals("++", list.get(6).toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIgnoreMultiLineComments() throws IOException {
|
||||
Tokenizer t = new EcmascriptTokenizer();
|
||||
SourceCode sourceCode = new SourceCode( new SourceCode.StringCodeLoader(
|
||||
"/* This is a multi line comment\n"
|
||||
+ " * \n"
|
||||
+ " */ \n"
|
||||
+ "var i = 0;\n\n"
|
||||
+ "/* This is another multi line comment\n"
|
||||
+ " * second line \n"
|
||||
+ " * third line */\n"
|
||||
+ "i++;") );
|
||||
Tokens tokens = new Tokens();
|
||||
t.tokenize(sourceCode, tokens);
|
||||
assertEquals(9, tokens.size());
|
||||
List<TokenEntry> list = tokens.getTokens();
|
||||
assertEquals("var", list.get(0).toString());
|
||||
assertEquals("++", list.get(6).toString());
|
||||
}
|
||||
|
||||
// no semi-colons
|
||||
|
Reference in New Issue
Block a user