Merge remote-tracking branch 'tiobe/master' into tiobe-master

This commit is contained in:
Andreas Dangel
2012-11-20 22:53:47 +01:00
2 changed files with 235 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
public class CsLanguage extends AbstractLanguage {
public CsLanguage() {
super(new CsTokenizer(), ".cs");
}
}

View File

@@ -0,0 +1,225 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.IOException;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
/**
* This class does a best-guess try-anything tokenization.
*
* @author jheintz
*/
public class CsTokenizer implements Tokenizer {
public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
BufferedReader reader = new BufferedReader(new CharArrayReader(sourceCode.getCodeBuffer().toString().toCharArray()));
try {
int ic = reader.read(), line=1;
char c;
StringBuilder b;
while(ic!=-1)
{
c = (char)ic;
switch(c)
{
// new line
case '\n':
line++;
ic = reader.read();
break;
// white space
case ' ':
case '\t':
case '\r':
ic = reader.read();
break;
// < << <= <<= > >> >= >>=
case '<':
case '>':
ic = reader.read();
if(ic == '=')
{
tokenEntries.add(new TokenEntry(String.valueOf(c)+"=", sourceCode.getFileName(), line));
ic = reader.read();
}
else if(ic == c)
{
ic = reader.read();
if(ic == '=')
{
tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf(c)+"=", sourceCode.getFileName(), line));
ic = reader.read();
}
else
{
tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf(c), sourceCode.getFileName(), line));
}
}
else
{
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
}
break;
// = == & &= && | |= || + += ++ - -= --
case '=':
case '&':
case '|':
case '+':
case '-':
ic = reader.read();
if(ic == '=' || ic == c)
{
tokenEntries.add(new TokenEntry(String.valueOf(c)+String.valueOf((char)ic), sourceCode.getFileName(), line));
ic = reader.read();
}
else
{
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
}
break;
// ! != * *= % %= ^ ^= ~ ~=
case '!':
case '*':
case '%':
case '^':
case '~':
ic = reader.read();
if(ic == '=')
{
tokenEntries.add(new TokenEntry(String.valueOf(c)+"=", sourceCode.getFileName(), line));
ic = reader.read();
}
else
{
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
}
break;
// strings & chars
case '"':
case '\'':
b = new StringBuilder();
b.append(c);
while((ic = reader.read()) != c)
{
if(ic == -1)
break;
b.append((char)ic);
if(ic == '\\')
b.append((char)reader.read());
}
b.append((char)ic);
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
ic = reader.read();
// / /= /*...*/ //...
case '/':
switch(c = (char)(ic = reader.read()))
{
case '*':
int state = 1;
b = new StringBuilder();
b.append(c);
while((ic = reader.read()) != -1)
{
c = (char)ic;
b.append(c);
if(state==1)
{
if(c == '*')
state = 2;
}
else
{
if(c == '/')
break;
else if(c != '*')
state = 1;
}
}
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
break;
case '/':
b = new StringBuilder();
b.append(c);
while((ic = reader.read()) != '\n')
{
if(ic==-1)
break;
b.append((char)ic);
}
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
break;
case '=':
tokenEntries.add(new TokenEntry("/=", sourceCode.getFileName(), line));
ic = reader.read();
break;
default:
tokenEntries.add(new TokenEntry("/", sourceCode.getFileName(), line));
break;
}
break;
default:
// [a-zA-Z_][a-zA-Z_0-9]*
if(Character.isJavaIdentifierStart(c))
{
b = new StringBuilder();
do
{
b.append(c);
c = (char)(ic = reader.read());
} while(Character.isJavaIdentifierPart(c));
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
}
// numbers
else if(Character.isDigit(c) || c == '.')
{
b = new StringBuilder();
do
{
b.append(c);
if(c == 'e' || c == 'E')
{
c = (char)(ic = reader.read());
if("1234567890-".indexOf(c)==-1)
break;
b.append(c);
}
c = (char)(ic = reader.read());
} while("1234567890.iIlLfFdDsSuUeExX".indexOf(c)!=-1);
tokenEntries.add(new TokenEntry(b.toString(), sourceCode.getFileName(), line));
}
// anything else
else
{
tokenEntries.add(new TokenEntry(String.valueOf(c), sourceCode.getFileName(), line));
ic = reader.read();
break;
}
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
tokenEntries.add(TokenEntry.getEOF());
}
}