Add CaseChangingCharStream for t-sql, downgrade antlr

This commit is contained in:
Andreas Dangel
2023-02-16 18:55:47 +01:00
parent c9b19b680a
commit d4d7f4b75b
4 changed files with 94 additions and 5 deletions

View File

@ -27,7 +27,7 @@ lexer grammar TSqlLexer;
// Basic keywords (from https://msdn.microsoft.com/en-us/library/ms189822.aspx) // Basic keywords (from https://msdn.microsoft.com/en-us/library/ms189822.aspx)
options { caseInsensitive = true; } /*options { caseInsensitive = true; }*/
ADD: 'ADD'; ADD: 'ADD';
ALL: 'ALL'; ALL: 'ALL';
@ -1215,7 +1215,7 @@ LOCAL_ID: '@' ([A-Z_$@#0-9] | FullWidthLetter)*;
TEMP_ID: '#' ([A-Z_$@#0-9] | FullWidthLetter)*; TEMP_ID: '#' ([A-Z_$@#0-9] | FullWidthLetter)*;
DECIMAL: DEC_DIGIT+; DECIMAL: DEC_DIGIT+;
ID: ( [A-Z_#] | FullWidthLetter) ( [A-Z_#$@0-9] | FullWidthLetter )*; ID: ( [A-Z_#] | FullWidthLetter) ( [A-Z_#$@0-9] | FullWidthLetter )*;
STRING options { caseInsensitive=false; } : 'N'? '\'' (~'\'' | '\'\'')* '\''; STRING /*options { caseInsensitive=false; }*/ : 'N'? '\'' (~'\'' | '\'\'')* '\'';
BINARY: '0' 'X' HEX_DIGIT*; BINARY: '0' 'X' HEX_DIGIT*;
FLOAT: DEC_DOT_DEC; FLOAT: DEC_DOT_DEC;
REAL: (DECIMAL | DEC_DOT_DEC) ('E' [+-]? DEC_DIGIT+); REAL: (DECIMAL | DEC_DOT_DEC) ('E' [+-]? DEC_DIGIT+);
@ -1265,7 +1265,7 @@ fragment HEX_DIGIT: [0-9A-F];
fragment DEC_DIGIT: [0-9]; fragment DEC_DIGIT: [0-9];
fragment FullWidthLetter options { caseInsensitive=false; } fragment FullWidthLetter /*options { caseInsensitive=false; }*/
: '\u00c0'..'\u00d6' : '\u00c0'..'\u00d6'
| '\u00d8'..'\u00f6' | '\u00d8'..'\u00f6'
| '\u00f8'..'\u00ff' | '\u00f8'..'\u00ff'
@ -1280,4 +1280,4 @@ fragment FullWidthLetter options { caseInsensitive=false; }
| '\uff00'..'\ufff0' | '\uff00'..'\ufff0'
// | '\u10000'..'\u1F9FF' //not support four bytes chars // | '\u10000'..'\u1F9FF' //not support four bytes chars
// | '\u20000'..'\u2FA1F' // | '\u20000'..'\u2FA1F'
; ;

View File

@ -0,0 +1,88 @@
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.tsql.cpd;
// Copied from: https://github.com/antlr/antlr4/blob/4.9.1/doc/resources/CaseChangingCharStream.java
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.misc.Interval;
/**
* This class supports case-insensitive lexing by wrapping an existing
* {@link CharStream} and forcing the lexer to see either upper or
* lowercase characters. Grammar literals should then be either upper or
* lower case such as 'BEGIN' or 'begin'. The text of the character
* stream is unaffected. Example: input 'BeGiN' would match lexer rule
* 'BEGIN' if constructor parameter upper=true but getText() would return
* 'BeGiN'.
*/
class CaseChangingCharStream implements CharStream {
final CharStream stream;
final boolean upper;
/**
* Constructs a new CaseChangingCharStream wrapping the given {@link CharStream} forcing
* all characters to upper case or lower case.
* @param stream The stream to wrap.
* @param upper If true force each symbol to upper case, otherwise force to lower.
*/
CaseChangingCharStream(CharStream stream, boolean upper) {
this.stream = stream;
this.upper = upper;
}
@Override
public String getText(Interval interval) {
return stream.getText(interval);
}
@Override
public void consume() {
stream.consume();
}
@Override
public int LA(int i) {
int c = stream.LA(i);
if (c <= 0) {
return c;
}
if (upper) {
return Character.toUpperCase(c);
}
return Character.toLowerCase(c);
}
@Override
public int mark() {
return stream.mark();
}
@Override
public void release(int marker) {
stream.release(marker);
}
@Override
public int index() {
return stream.index();
}
@Override
public void seek(int index) {
stream.seek(index);
}
@Override
public int size() {
return stream.size();
}
@Override
public String getSourceName() {
return stream.getSourceName();
}
}

View File

@ -16,6 +16,7 @@ public class TSqlTokenizer extends AntlrTokenizer {
@Override @Override
protected AntlrTokenManager getLexerForSource(SourceCode sourceCode) { protected AntlrTokenManager getLexerForSource(SourceCode sourceCode) {
CharStream charStream = AntlrTokenizer.getCharStreamFromSourceCode(sourceCode); CharStream charStream = AntlrTokenizer.getCharStreamFromSourceCode(sourceCode);
charStream = new CaseChangingCharStream(charStream, true);
return new AntlrTokenManager(new TSqlLexer(charStream), sourceCode.getFileName()); return new AntlrTokenManager(new TSqlLexer(charStream), sourceCode.getFileName());
} }
} }

View File

@ -98,7 +98,7 @@
<pmd.plugin.version>3.20.0</pmd.plugin.version> <pmd.plugin.version>3.20.0</pmd.plugin.version>
<ant.version>1.10.13</ant.version> <ant.version>1.10.13</ant.version>
<javadoc.plugin.version>3.4.1</javadoc.plugin.version> <javadoc.plugin.version>3.4.1</javadoc.plugin.version>
<antlr.version>4.11.1</antlr.version> <antlr.version>4.7.2</antlr.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>