package expr import ( "fmt" "strings" "unicode" ) // TokenType represents token type type TokenType int const ( // TokenKeyword keyword token TokenKeyword TokenType = iota // TokenField field token TokenField // TokenOperator operator token TokenOperator // TokenNumber number token TokenNumber // TokenString string token TokenString // TokenLeftParen left parenthesis token TokenLeftParen // TokenRightParen right parenthesis token TokenRightParen // TokenComma comma token TokenComma ) // Token represents a token type Token struct { // Type token type Type TokenType // Value token value Value string } // tokenize breaks expression string into token list // Supports numbers, identifiers, operators, parentheses, string literals, etc. func tokenize(expr string) ([]string, error) { // Check empty expression if len(strings.TrimSpace(expr)) == 0 { return nil, fmt.Errorf("empty expression") } var tokens []string i := 0 for i < len(expr) { // Skip whitespace characters if unicode.IsSpace(rune(expr[i])) { i++ continue } // Handle string literals if expr[i] == '\'' || expr[i] == '"' { quote := expr[i] start := i i++ // Skip opening quote // Find closing quote for i < len(expr) && expr[i] != quote { if expr[i] == '\\' && i+1 < len(expr) { i += 2 // Skip escape character } else { i++ } } if i >= len(expr) { return nil, fmt.Errorf("unterminated string literal") } i++ // Skip closing quote tokens = append(tokens, expr[start:i]) continue } // Handle backtick identifiers if expr[i] == '`' { start := i i++ // Skip opening backtick // Find closing backtick for i < len(expr) && expr[i] != '`' { i++ } if i >= len(expr) { return nil, fmt.Errorf("unterminated backtick identifier") } i++ // Skip closing backtick tokens = append(tokens, expr[start:i]) continue } // Handle numbers (including negative numbers and numbers starting with decimal point) // Note: Numbers starting with decimal point are only valid when not preceded by digit character if isDigit(expr[i]) || (expr[i] == '-' && i+1 < len(expr) && isDigit(expr[i+1])) || (expr[i] == '.' && i+1 < len(expr) && isDigit(expr[i+1]) && (i == 0 || (!isDigit(expr[i-1]) && expr[i-1] != '.'))) { start := i if expr[i] == '-' { i++ // Skip negative sign } // Read integer part for i < len(expr) && isDigit(expr[i]) { i++ } // Handle decimal point (only one decimal point allowed) hasDecimal := false if i < len(expr) && expr[i] == '.' { // Check if there's already a decimal point or next character is not a digit if !hasDecimal && i+1 < len(expr) && isDigit(expr[i+1]) { hasDecimal = true i++ // Read decimal part for i < len(expr) && isDigit(expr[i]) { i++ } } } // Handle scientific notation if i < len(expr) && (expr[i] == 'e' || expr[i] == 'E') { i++ if i < len(expr) && (expr[i] == '+' || expr[i] == '-') { i++ } for i < len(expr) && isDigit(expr[i]) { i++ } } tokens = append(tokens, expr[start:i]) continue } // Handle multi-character operators if i+1 < len(expr) { twoChar := expr[i : i+2] if isOperator(twoChar) { tokens = append(tokens, twoChar) i += 2 continue } } // Handle single-character operators and parentheses (including standalone decimal point) if isOperator(string(expr[i])) || expr[i] == '(' || expr[i] == ')' || expr[i] == ',' || expr[i] == '.' { tokens = append(tokens, string(expr[i])) i++ continue } // Handle identifiers and keywords if isLetter(expr[i]) || expr[i] == '_' || expr[i] == '$' { start := i for i < len(expr) && (isLetter(expr[i]) || isDigit(expr[i]) || expr[i] == '_' || expr[i] == '.' || expr[i] == '$') { i++ } tokens = append(tokens, expr[start:i]) continue } // Unknown character return nil, fmt.Errorf("unexpected character '%c' at position %d", expr[i], i) } return tokens, nil } // isDigit checks if character is a digit func isDigit(ch byte) bool { return ch >= '0' && ch <= '9' } // isLetter checks if character is a letter func isLetter(ch byte) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } // isNumber checks if string is a number func isNumber(s string) bool { if len(s) == 0 { return false } i := 0 // Handle negative sign if s[0] == '-' { i = 1 if len(s) == 1 { return false } } hasDigit := false hasDot := false for i < len(s) { if isDigit(s[i]) { hasDigit = true } else if s[i] == '.' && !hasDot { hasDot = true } else if s[i] == 'e' || s[i] == 'E' { // Scientific notation i++ if i < len(s) && (s[i] == '+' || s[i] == '-') { i++ } for i < len(s) && isDigit(s[i]) { i++ } break } else { return false } i++ } return hasDigit } // isIdentifier checks if string is a valid identifier func isIdentifier(s string) bool { if len(s) == 0 { return false } // First character must be letter or underscore if !isLetter(s[0]) && s[0] != '_' { return false } // Remaining characters can be letters, digits, or underscores for i := 1; i < len(s); i++ { if !isLetter(s[i]) && !isDigit(s[i]) && s[i] != '_' { return false } } return true } // isOperator checks if string is an operator func isOperator(s string) bool { operators := []string{ "+", "-", "*", "/", "%", "^", "=", "==", "!=", "<>", ">", "<", ">=", "<=", "AND", "OR", "NOT", "LIKE", "IS", } for _, op := range operators { if strings.EqualFold(s, op) { return true } } return false } // isComparisonOperator checks if it's a comparison operator func isComparisonOperator(op string) bool { comparisonOps := []string{"==", "=", "!=", "<>", ">", "<", ">=", "<=", "LIKE", "IS"} for _, compOp := range comparisonOps { if strings.EqualFold(op, compOp) { return true } } return false } // isStringLiteral checks if it's a string literal func isStringLiteral(s string) bool { return len(s) >= 2 && ((s[0] == '\'' && s[len(s)-1] == '\'') || (s[0] == '"' && s[len(s)-1] == '"')) }