package rsql import ( "testing" "github.com/stretchr/testify/assert" ) // TestNewLexer 测试词法分析器的创建 func TestNewLexer(t *testing.T) { input := "SELECT * FROM table" lexer := NewLexer(input) if lexer == nil { t.Fatal("Expected lexer to be created, got nil") } if lexer.input != input { t.Errorf("Expected input %s, got %s", input, lexer.input) } if lexer.line != 1 { t.Errorf("Expected line to be 1, got %d", lexer.line) } if lexer.column != 1 { t.Errorf("Expected column to be 1, got %d", lexer.column) } } // TestLexerBasicTokens 测试基本token的识别 func TestLexerBasicTokens(t *testing.T) { tests := []struct { input string expected []TokenType }{ {"SELECT", []TokenType{TokenSELECT, TokenEOF}}, {"FROM", []TokenType{TokenFROM, TokenEOF}}, {"WHERE", []TokenType{TokenWHERE, TokenEOF}}, {"GROUP BY", []TokenType{TokenGROUP, TokenBY, TokenEOF}}, {"ORDER", []TokenType{TokenOrder, TokenEOF}}, {"DISTINCT", []TokenType{TokenDISTINCT, TokenEOF}}, {"LIMIT", []TokenType{TokenLIMIT, TokenEOF}}, {"HAVING", []TokenType{TokenHAVING, TokenEOF}}, {"AS", []TokenType{TokenAS, TokenEOF}}, {"AND", []TokenType{TokenAND, TokenEOF}}, {"OR", []TokenType{TokenOR, TokenEOF}}, {"LIKE", []TokenType{TokenLIKE, TokenEOF}}, {"IS", []TokenType{TokenIS, TokenEOF}}, {"NULL", []TokenType{TokenNULL, TokenEOF}}, {"NOT", []TokenType{TokenNOT, TokenEOF}}, } for _, test := range tests { t.Run(test.input, func(t *testing.T) { lexer := NewLexer(test.input) for i, expectedType := range test.expected { token := lexer.NextToken() if token.Type != expectedType { t.Errorf("Token %d: expected %v, got %v", i, expectedType, token.Type) } } }) } } // TestQuotedIdentifiers 测试反引号标识符的词法分析 func TestQuotedIdentifiers(t *testing.T) { t.Run("基本反引号标识符", func(t *testing.T) { lexer := NewLexer("`deviceId`") token := lexer.NextToken() assert.Equal(t, TokenQuotedIdent, token.Type) assert.Equal(t, "`deviceId`", token.Value) }) t.Run("包含特殊字符的反引号标识符", func(t *testing.T) { lexer := NewLexer("`device-id`") token := lexer.NextToken() assert.Equal(t, TokenQuotedIdent, token.Type) assert.Equal(t, "`device-id`", token.Value) }) t.Run("包含空格的反引号标识符", func(t *testing.T) { lexer := NewLexer("`device id`") token := lexer.NextToken() assert.Equal(t, TokenQuotedIdent, token.Type) assert.Equal(t, "`device id`", token.Value) }) t.Run("未闭合的反引号标识符", func(t *testing.T) { lexer := NewLexer("`deviceId") errorRecovery := NewErrorRecovery(nil) lexer.SetErrorRecovery(errorRecovery) token := lexer.NextToken() assert.Equal(t, TokenQuotedIdent, token.Type) assert.True(t, errorRecovery.HasErrors()) errors := errorRecovery.GetErrors() assert.Equal(t, 1, len(errors)) assert.Equal(t, ErrorTypeUnterminatedString, errors[0].Type) }) } // TestStringLiterals 测试字符串常量的词法分析 func TestStringLiterals(t *testing.T) { t.Run("单引号字符串", func(t *testing.T) { lexer := NewLexer("'hello world'") token := lexer.NextToken() assert.Equal(t, TokenString, token.Type) assert.Equal(t, "'hello world'", token.Value) }) t.Run("双引号字符串", func(t *testing.T) { lexer := NewLexer(`"hello world"`) token := lexer.NextToken() assert.Equal(t, TokenString, token.Type) assert.Equal(t, `"hello world"`, token.Value) }) t.Run("未闭合的字符串", func(t *testing.T) { lexer := NewLexer("'hello world") errorRecovery := NewErrorRecovery(nil) lexer.SetErrorRecovery(errorRecovery) token := lexer.NextToken() assert.Equal(t, TokenString, token.Type) assert.True(t, errorRecovery.HasErrors()) errors := errorRecovery.GetErrors() assert.Equal(t, 1, len(errors)) assert.Equal(t, ErrorTypeUnterminatedString, errors[0].Type) }) } // TestLexerErrorHandling 测试词法分析器错误处理 func TestLexerErrorHandling(t *testing.T) { tests := []struct { name string input string }{ {"InvalidCharacter", "SELECT * FROM table WHERE id # 5"}, {"UnterminatedString", "SELECT * FROM table WHERE name = 'test"}, {"UnterminatedQuotedIdent", "SELECT `field FROM table"}, {"InvalidNumber", "SELECT * FROM table WHERE value = 123.456.789"}, {"InvalidOperator", "SELECT * FROM table WHERE a !! b"}, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { lexer := NewLexer(test.input) errorRecovery := NewErrorRecovery(nil) lexer.SetErrorRecovery(errorRecovery) // 读取所有token直到EOF for { token := lexer.NextToken() if token.Type == TokenEOF { break } } // 应该有错误 if !errorRecovery.HasErrors() { t.Errorf("Expected errors for input: %s", test.input) } }) } // 测试词法分析器的位置获取 lexer := NewLexer("SELECT * FROM table") pos, line, column := lexer.GetPosition() if pos < 0 || line < 1 || column < 0 { t.Errorf("Invalid position: pos=%d, line=%d, column=%d", pos, line, column) } // 测试词法分析器的位置跟踪 lexer = NewLexer("SELECT\n *\nFROM\n table") // SELECT token := lexer.NextToken() if token.Line != 1 || token.Column != 1 { t.Errorf("Expected token at line 1, column 1, got line %d, column %d", token.Line, token.Column) } // * token = lexer.NextToken() if token.Line != 2 || token.Column != 3 { t.Errorf("Expected token at line 2, column 3, got line %d, column %d", token.Line, token.Column) } // FROM token = lexer.NextToken() if token.Line != 3 || token.Column != 1 { t.Errorf("Expected token at line 3, column 1, got line %d, column %d", token.Line, token.Column) } // table token = lexer.NextToken() if token.Line != 4 || token.Column != 3 { t.Errorf("Expected token at line 4, column 3, got line %d, column %d", token.Line, token.Column) } } // TestLexerOperators 测试操作符的词法分析 func TestLexerOperators(t *testing.T) { tests := []struct { input string expected []TokenType }{ {"=", []TokenType{TokenEQ, TokenEOF}}, {"!=", []TokenType{TokenNE, TokenEOF}}, {"<>", []TokenType{TokenLT, TokenGT, TokenEOF}}, {"<", []TokenType{TokenLT, TokenEOF}}, {"<=", []TokenType{TokenLE, TokenEOF}}, {">", []TokenType{TokenGT, TokenEOF}}, {">=", []TokenType{TokenGE, TokenEOF}}, {"+", []TokenType{TokenPlus, TokenEOF}}, {"-", []TokenType{TokenMinus, TokenEOF}}, {"*", []TokenType{TokenAsterisk, TokenEOF}}, {"/", []TokenType{TokenSlash, TokenEOF}}, {"(", []TokenType{TokenLParen, TokenEOF}}, {")", []TokenType{TokenRParen, TokenEOF}}, {",", []TokenType{TokenComma, TokenEOF}}, } for _, test := range tests { t.Run(test.input, func(t *testing.T) { lexer := NewLexer(test.input) for i, expectedType := range test.expected { token := lexer.NextToken() if token.Type != expectedType { t.Errorf("Token %d: expected %v, got %v", i, expectedType, token.Type) } } }) } } // TestLexerNumbers 测试数字的词法分析 func TestLexerNumbers(t *testing.T) { tests := []struct { input string expected string }{ {"123", "123"}, {"123.456", "123.456"}, {"0", "0"}, {"0.0", "0.0"}, {"1000000", "1000000"}, } for _, test := range tests { t.Run(test.input, func(t *testing.T) { lexer := NewLexer(test.input) token := lexer.NextToken() if token.Type != TokenNumber { t.Errorf("Expected TokenNumber, got %v", token.Type) } if token.Value != test.expected { t.Errorf("Expected value %s, got %s", test.expected, token.Value) } }) } } // TestLexerIdentifiers 测试标识符的词法分析 func TestLexerIdentifiers(t *testing.T) { tests := []struct { input string expected string }{ {"table", "table"}, {"field_name", "field_name"}, {"table123", "table123"}, {"_private", "_private"}, {"CamelCase", "CamelCase"}, {"deviceId", "deviceId"}, } for _, test := range tests { t.Run(test.input, func(t *testing.T) { lexer := NewLexer(test.input) token := lexer.NextToken() if token.Type != TokenIdent { t.Errorf("Expected TokenIdent, got %v", token.Type) } if token.Value != test.expected { t.Errorf("Expected value %s, got %s", test.expected, token.Value) } }) } } // TestTokenTypes 测试Token类型 func TestTokenTypes(t *testing.T) { // 测试关键字token keywordTests := []struct { input string expected TokenType }{ {"SELECT", TokenSELECT}, {"FROM", TokenFROM}, {"WHERE", TokenWHERE}, {"GROUP", TokenGROUP}, {"BY", TokenBY}, {"HAVING", TokenHAVING}, {"ORDER", TokenOrder}, {"LIMIT", TokenLIMIT}, {"AND", TokenAND}, {"OR", TokenOR}, {"NOT", TokenNOT}, {"AS", TokenAS}, {"DISTINCT", TokenDISTINCT}, } for _, test := range keywordTests { t.Run(test.input, func(t *testing.T) { lexer := NewLexer(test.input) token := lexer.NextToken() if token.Type != test.expected { t.Errorf("Expected token type %v for %s, got %v", test.expected, test.input, token.Type) } if token.Value != test.input { t.Errorf("Expected token value %s, got %s", test.input, token.Value) } }) } } // TestLexerWhitespace 测试空白字符处理 func TestLexerWhitespace(t *testing.T) { tests := []struct { name string input string expected []TokenType }{ { name: "Spaces", input: "SELECT * FROM table", expected: []TokenType{TokenSELECT, TokenAsterisk, TokenFROM, TokenIdent, TokenEOF}, }, { name: "Tabs", input: "SELECT\t*\tFROM\ttable", expected: []TokenType{TokenSELECT, TokenAsterisk, TokenFROM, TokenIdent, TokenEOF}, }, { name: "Newlines", input: "SELECT\n*\nFROM\ntable", expected: []TokenType{TokenSELECT, TokenAsterisk, TokenFROM, TokenIdent, TokenEOF}, }, { name: "Mixed whitespace", input: "SELECT \t\n * \t\n FROM \t\n table", expected: []TokenType{TokenSELECT, TokenAsterisk, TokenFROM, TokenIdent, TokenEOF}, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { lexer := NewLexer(test.input) for i, expectedType := range test.expected { token := lexer.NextToken() if token.Type != expectedType { t.Errorf("Token %d: expected %v, got %v", i, expectedType, token.Type) } } }) } } // TestLexerComplexTokens 测试复杂token组合 func TestLexerComplexTokens(t *testing.T) { tests := []struct { name string input string expected []struct { type_ TokenType value string } }{ { name: "Function call", input: "COUNT(*)", expected: []struct { type_ TokenType value string }{ {TokenIdent, "COUNT"}, {TokenLParen, "("}, {TokenAsterisk, "*"}, {TokenRParen, ")"}, {TokenEOF, ""}, }, }, { name: "Comparison", input: "age >= 18", expected: []struct { type_ TokenType value string }{ {TokenIdent, "age"}, {TokenGE, ">="}, {TokenNumber, "18"}, {TokenEOF, ""}, }, }, { name: "String with quotes", input: "name = 'John Doe'", expected: []struct { type_ TokenType value string }{ {TokenIdent, "name"}, {TokenEQ, "="}, {TokenString, "'John Doe'"}, {TokenEOF, ""}, }, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { lexer := NewLexer(test.input) for i, expected := range test.expected { token := lexer.NextToken() if token.Type != expected.type_ { t.Errorf("Token %d: expected type %v, got %v", i, expected.type_, token.Type) } if expected.value != "" && token.Value != expected.value { t.Errorf("Token %d: expected value %s, got %s", i, expected.value, token.Value) } } }) } }