diff --git a/README.md b/README.md index c4bc8b7..7057569 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![Go Report](https://goreportcard.com/badge/github.com/rulego/streamsql)](https://goreportcard.com/report/github.com/rulego/streamsql) [![CI](https://github.com/rulego/streamsql/actions/workflows/ci.yml/badge.svg)](https://github.com/rulego/streamsql/actions/workflows/ci.yml) [![RELEASE](https://github.com/rulego/streamsql/actions/workflows/release.yml/badge.svg)](https://github.com/rulego/streamsql/actions/workflows/release.yml) +[![codecov](https://codecov.io/gh/rulego/streamsql/graph/badge.svg?token=1CK1O5J1BI)](https://codecov.io/gh/rulego/streamsql) English| [简体中文](README_ZH.md) @@ -111,6 +112,7 @@ func main() { // Process data one by one, each will output results immediately for _, data := range sensorData { ssql.Emit(data) + //changedData,err:=ssql.EmitSync(data) //Synchronize to obtain processing results time.Sleep(100 * time.Millisecond) // Simulate real-time data arrival } diff --git a/README_ZH.md b/README_ZH.md index 18e5730..eb27876 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -3,6 +3,7 @@ [![Go Report](https://goreportcard.com/badge/github.com/rulego/streamsql)](https://goreportcard.com/report/github.com/rulego/streamsql) [![CI](https://github.com/rulego/streamsql/actions/workflows/ci.yml/badge.svg)](https://github.com/rulego/streamsql/actions/workflows/ci.yml) [![RELEASE](https://github.com/rulego/streamsql/actions/workflows/release.yml/badge.svg)](https://github.com/rulego/streamsql/actions/workflows/release.yml) +[![codecov](https://codecov.io/gh/rulego/streamsql/graph/badge.svg?token=1CK1O5J1BI)](https://codecov.io/gh/rulego/streamsql) [English](README.md)| 简体中文 @@ -114,6 +115,7 @@ func main() { // 逐条处理数据,每条都会立即输出结果 for _, data := range sensorData { ssql.Emit(data) + //changedData,err:=ssql.EmitSync(data) //同步获得处理结果 time.Sleep(100 * time.Millisecond) // 模拟实时数据到达 } diff --git a/aggregator/builtin.go b/aggregator/builtin.go index 46dd21f..c66f9da 100644 --- a/aggregator/builtin.go +++ b/aggregator/builtin.go @@ -4,12 +4,10 @@ import ( "github.com/rulego/streamsql/functions" ) -// 为了向后兼容,重新导出functions模块中的类型和函数 - -// AggregateType 聚合类型,重新导出functions.AggregateType +// AggregateType aggregate type, re-exports functions.AggregateType type AggregateType = functions.AggregateType -// 重新导出所有聚合类型常量 +// Re-export all aggregate type constants const ( Sum = functions.Sum Count = functions.Count @@ -28,29 +26,29 @@ const ( Deduplicate = functions.Deduplicate Var = functions.Var VarS = functions.VarS - // 分析函数 + // Analytical functions Lag = functions.Lag Latest = functions.Latest ChangedCol = functions.ChangedCol HadChanged = functions.HadChanged - // 表达式聚合器,用于处理自定义函数 + // Expression aggregator for handling custom functions Expression = functions.Expression ) -// AggregatorFunction 聚合器函数接口,重新导出functions.LegacyAggregatorFunction +// AggregatorFunction aggregator function interface, re-exports functions.LegacyAggregatorFunction type AggregatorFunction = functions.LegacyAggregatorFunction -// ContextAggregator 支持context机制的聚合器接口,重新导出functions.ContextAggregator +// ContextAggregator aggregator interface supporting context mechanism, re-exports functions.ContextAggregator type ContextAggregator = functions.ContextAggregator -// Register 添加自定义聚合器到全局注册表,重新导出functions.RegisterLegacyAggregator +// Register adds custom aggregator to global registry, re-exports functions.RegisterLegacyAggregator func Register(name string, constructor func() AggregatorFunction) { functions.RegisterLegacyAggregator(name, constructor) } -// CreateBuiltinAggregator 创建内置聚合器,重新导出functions.CreateLegacyAggregator +// CreateBuiltinAggregator creates built-in aggregator, re-exports functions.CreateLegacyAggregator func CreateBuiltinAggregator(aggType AggregateType) AggregatorFunction { - // 特殊处理expression类型 + // Special handling for expression type if aggType == "expression" { return &ExpressionAggregatorWrapper{ function: functions.NewExpressionAggregatorFunction(), @@ -60,7 +58,7 @@ func CreateBuiltinAggregator(aggType AggregateType) AggregatorFunction { return functions.CreateLegacyAggregator(aggType) } -// ExpressionAggregatorWrapper 包装表达式聚合器,使其兼容LegacyAggregatorFunction接口 +// ExpressionAggregatorWrapper wraps expression aggregator to make it compatible with LegacyAggregatorFunction interface type ExpressionAggregatorWrapper struct { function *functions.ExpressionAggregatorFunction } diff --git a/aggregator/doc.go b/aggregator/doc.go new file mode 100644 index 0000000..d4aa4d4 --- /dev/null +++ b/aggregator/doc.go @@ -0,0 +1,165 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package aggregator provides data aggregation functionality for StreamSQL. + +This package implements group-based aggregation operations for stream processing, +supporting various aggregation functions and expression evaluation. It provides +thread-safe aggregation with support for custom expressions and built-in functions. + +# Core Features + +• Group Aggregation - Group data by specified fields and apply aggregation functions +• Built-in Functions - Support for Sum, Count, Avg, Max, Min, and more +• Expression Support - Custom expression evaluation within aggregations +• Thread Safety - Concurrent aggregation operations with proper synchronization +• Type Flexibility - Automatic type conversion and validation +• Performance Optimized - Efficient memory usage and processing + +# Aggregation Types + +Supported aggregation functions (re-exported from functions package): + + // Mathematical aggregations + Sum, Count, Avg, Max, Min + StdDev, StdDevS, Var, VarS + Median, Percentile + + // Collection aggregations + Collect, LastValue, MergeAgg + Deduplicate + + // Window aggregations + WindowStart, WindowEnd + + // Analytical functions + Lag, Latest, ChangedCol, HadChanged + + // Custom expressions + Expression + +# Core Interfaces + +Main aggregation interfaces: + + type Aggregator interface { + Add(data interface{}) error + Put(key string, val interface{}) error + GetResults() ([]map[string]interface{}, error) + Reset() + RegisterExpression(field, expression string, fields []string, evaluator func(data interface{}) (interface{}, error)) + } + + type AggregatorFunction interface { + New() AggregatorFunction + Add(value interface{}) + Result() interface{} + } + +# Aggregation Configuration + +Field configuration for aggregations: + + type AggregationField struct { + InputField string // Source field name + AggregateType AggregateType // Aggregation function type + OutputAlias string // Result field alias + } + +# Usage Examples + +Basic group aggregation: + + // Define aggregation fields + aggFields := []AggregationField{ + {InputField: "temperature", AggregateType: Avg, OutputAlias: "avg_temp"}, + {InputField: "humidity", AggregateType: Max, OutputAlias: "max_humidity"}, + {InputField: "device_id", AggregateType: Count, OutputAlias: "device_count"}, + } + + // Create group aggregator + aggregator := NewGroupAggregator([]string{"location"}, aggFields) + + // Add data + data := map[string]interface{}{ + "location": "room1", + "temperature": 25.5, + "humidity": 60, + "device_id": "sensor001", + } + aggregator.Add(data) + + // Get results + results, err := aggregator.GetResults() + +Expression-based aggregation: + + // Register custom expression + aggregator.RegisterExpression( + "comfort_index", + "temperature * 0.7 + humidity * 0.3", + []string{"temperature", "humidity"}, + func(data interface{}) (interface{}, error) { + // Custom evaluation logic + return evaluateComfortIndex(data) + }, + ) + +Multiple group aggregation: + + // Group by multiple fields + aggregator := NewGroupAggregator( + []string{"location", "device_type"}, + aggFields, + ) + + // Results will be grouped by both location and device_type + results, err := aggregator.GetResults() + +# Built-in Aggregators + +Create built-in aggregation functions: + + // Create specific aggregator + sumAgg := CreateBuiltinAggregator(Sum) + avgAgg := CreateBuiltinAggregator(Avg) + countAgg := CreateBuiltinAggregator(Count) + + // Use aggregator + sumAgg.Add(10) + sumAgg.Add(20) + result := sumAgg.Result() // returns 30 + +# Custom Aggregators + +Register custom aggregation functions: + + Register("custom_avg", func() AggregatorFunction { + return &CustomAvgAggregator{} + }) + +# Integration + +Integrates with other StreamSQL components: + +• Functions package - Built-in aggregation function implementations +• Stream package - Real-time data aggregation in streams +• Window package - Window-based aggregation operations +• Types package - Data type definitions and conversions +• RSQL package - SQL GROUP BY and aggregation parsing +*/ +package aggregator \ No newline at end of file diff --git a/aggregator/group_aggregator.go b/aggregator/group_aggregator.go index ce6109c..cb021b4 100644 --- a/aggregator/group_aggregator.go +++ b/aggregator/group_aggregator.go @@ -11,20 +11,21 @@ import ( "github.com/rulego/streamsql/utils/fieldpath" ) +// Aggregator aggregator interface type Aggregator interface { Add(data interface{}) error Put(key string, val interface{}) error GetResults() ([]map[string]interface{}, error) Reset() - // RegisterExpression 注册表达式计算器 + // RegisterExpression registers expression evaluator RegisterExpression(field, expression string, fields []string, evaluator func(data interface{}) (interface{}, error)) } -// AggregationField 定义单个聚合字段的配置 +// AggregationField defines configuration for a single aggregation field type AggregationField struct { - InputField string // 输入字段名(如 "temperature") - AggregateType AggregateType // 聚合类型(如 Sum, Avg) - OutputAlias string // 输出别名(如 "temp_sum") + InputField string // Input field name (e.g., "temperature") + AggregateType AggregateType // Aggregation type (e.g., Sum, Avg) + OutputAlias string // Output alias (e.g., "temp_sum") } type GroupAggregator struct { @@ -34,26 +35,26 @@ type GroupAggregator struct { groups map[string]map[string]AggregatorFunction mu sync.RWMutex context map[string]interface{} - // 表达式计算器 + // Expression evaluators expressions map[string]*ExpressionEvaluator } -// ExpressionEvaluator 包装表达式计算功能 +// ExpressionEvaluator wraps expression evaluation functionality type ExpressionEvaluator struct { - Expression string // 完整表达式 - Field string // 主字段名 - Fields []string // 表达式中引用的所有字段 + Expression string // Complete expression + Field string // Primary field name + Fields []string // All fields referenced in expression evaluateFunc func(data interface{}) (interface{}, error) } -// NewGroupAggregator 创建新的分组聚合器 +// NewGroupAggregator creates a new group aggregator func NewGroupAggregator(groupFields []string, aggregationFields []AggregationField) *GroupAggregator { aggregators := make(map[string]AggregatorFunction) - // 为每个聚合字段创建聚合器 + // Create aggregator for each aggregation field for i := range aggregationFields { if aggregationFields[i].OutputAlias == "" { - // 如果没有指定别名,使用输入字段名 + // If no alias specified, use input field name aggregationFields[i].OutputAlias = aggregationFields[i].InputField } aggregators[aggregationFields[i].OutputAlias] = CreateBuiltinAggregator(aggregationFields[i].AggregateType) @@ -68,7 +69,7 @@ func NewGroupAggregator(groupFields []string, aggregationFields []AggregationFie } } -// RegisterExpression 注册表达式计算器 +// RegisterExpression registers expression evaluator func (ga *GroupAggregator) RegisterExpression(field, expression string, fields []string, evaluator func(data interface{}) (interface{}, error)) { ga.mu.Lock() defer ga.mu.Unlock() @@ -91,26 +92,26 @@ func (ga *GroupAggregator) Put(key string, val interface{}) error { return nil } -// isNumericAggregator 检查聚合器是否需要数值类型输入 +// isNumericAggregator checks if aggregator requires numeric type input func (ga *GroupAggregator) isNumericAggregator(aggType AggregateType) bool { - // 通过functions模块动态检查函数类型 + // Dynamically check function type through functions module if fn, exists := functions.Get(string(aggType)); exists { switch fn.GetType() { case functions.TypeMath: - // 数学函数通常需要数值输入 + // Math functions usually require numeric input return true case functions.TypeAggregation: - // 检查是否是数值聚合函数 + // Check if it's a numeric aggregation function switch string(aggType) { case functions.SumStr, functions.AvgStr, functions.MinStr, functions.MaxStr, functions.CountStr, functions.StdDevStr, functions.MedianStr, functions.PercentileStr, functions.VarStr, functions.VarSStr, functions.StdDevSStr: return true case functions.CollectStr, functions.MergeAggStr, functions.DeduplicateStr, functions.LastValueStr: - // 这些函数可以处理任意类型 + // These functions can handle any type return false default: - // 对于未知的聚合函数,尝试检查函数名称模式 + // For unknown aggregation functions, try to check function name patterns funcName := string(aggType) if strings.Contains(funcName, functions.SumStr) || strings.Contains(funcName, functions.AvgStr) || strings.Contains(funcName, functions.MinStr) || strings.Contains(funcName, functions.MaxStr) || @@ -120,15 +121,15 @@ func (ga *GroupAggregator) isNumericAggregator(aggType AggregateType) bool { return false } case functions.TypeAnalytical: - // 分析函数通常可以处理任意类型 + // Analytical functions can usually handle any type return false default: - // 其他类型的函数,保守起见认为不需要数值转换 + // For other types of functions, conservatively assume no numeric conversion needed return false } } - // 如果函数不存在,根据名称模式判断 + // If function doesn't exist, judge by name pattern funcName := string(aggType) if strings.Contains(funcName, functions.SumStr) || strings.Contains(funcName, functions.AvgStr) || strings.Contains(funcName, functions.MinStr) || strings.Contains(funcName, functions.MaxStr) || @@ -160,11 +161,11 @@ func (ga *GroupAggregator) Add(data interface{}) error { var fieldVal interface{} var found bool - // 检查是否是嵌套字段 + // Check if it's a nested field if fieldpath.IsNestedField(field) { fieldVal, found = fieldpath.GetNestedField(data, field) } else { - // 原有的字段访问逻辑 + // Original field access logic var f reflect.Value if v.Kind() == reflect.Map { keyVal := reflect.ValueOf(field) @@ -198,21 +199,21 @@ func (ga *GroupAggregator) Add(data interface{}) error { ga.groups[key] = make(map[string]AggregatorFunction) } - // 为每个字段创建聚合器实例 + // Create aggregator instances for each field for outputAlias, agg := range ga.aggregators { if _, exists := ga.groups[key][outputAlias]; !exists { ga.groups[key][outputAlias] = agg.New() } } - // 处理每个聚合字段 + // Process each aggregation field for _, aggField := range ga.aggregationFields { outputAlias := aggField.OutputAlias if outputAlias == "" { outputAlias = aggField.InputField } - // 检查是否有表达式计算器 + // Check if there's an expression evaluator if expr, hasExpr := ga.expressions[outputAlias]; hasExpr { result, err := expr.evaluateFunc(data) if err != nil { @@ -227,23 +228,23 @@ func (ga *GroupAggregator) Add(data interface{}) error { inputField := aggField.InputField - // 特殊处理count(*)的情况 + // Special handling for count(*) case if inputField == "*" { - // 对于count(*),直接添加1,不需要获取具体字段值 + // For count(*), directly add 1 without getting specific field value if groupAgg, exists := ga.groups[key][outputAlias]; exists { groupAgg.Add(1) } continue } - // 获取字段值 - 支持嵌套字段 + // Get field value - supports nested fields var fieldVal interface{} var found bool if fieldpath.IsNestedField(inputField) { fieldVal, found = fieldpath.GetNestedField(data, inputField) } else { - // 原有的字段访问逻辑 + // Original field access logic var f reflect.Value if v.Kind() == reflect.Map { keyVal := reflect.ValueOf(inputField) @@ -259,7 +260,7 @@ func (ga *GroupAggregator) Add(data interface{}) error { } if !found { - // 尝试从context中获取 + // Try to get from context if ga.context != nil { if groupAgg, exists := ga.groups[key][outputAlias]; exists { if contextAgg, ok := groupAgg.(ContextAggregator); ok { @@ -275,9 +276,9 @@ func (ga *GroupAggregator) Add(data interface{}) error { aggType := aggField.AggregateType - // 动态检查是否需要数值转换 + // Dynamically check if numeric conversion is needed if ga.isNumericAggregator(aggType) { - // 对于数值聚合函数,尝试转换为数值类型 + // For numeric aggregation functions, try to convert to numeric type if numVal, err := cast.ToFloat64E(fieldVal); err == nil { if groupAgg, exists := ga.groups[key][outputAlias]; exists { groupAgg.Add(numVal) @@ -286,7 +287,7 @@ func (ga *GroupAggregator) Add(data interface{}) error { return fmt.Errorf("cannot convert field %s value %v to numeric type for aggregator %s", inputField, fieldVal, aggType) } } else { - // 对于非数值聚合函数,直接传递原始值 + // For non-numeric aggregation functions, pass original value directly if groupAgg, exists := ga.groups[key][outputAlias]; exists { groupAgg.Add(fieldVal) } diff --git a/aggregator/group_aggregator_test.go b/aggregator/group_aggregator_test.go index 21e9978..431198e 100644 --- a/aggregator/group_aggregator_test.go +++ b/aggregator/group_aggregator_test.go @@ -129,14 +129,14 @@ func TestGroupAggregator_MultipleAggregators(t *testing.T) { } func TestGroupAggregator_NoAlias(t *testing.T) { - // 测试没有指定别名的情况,应该使用输入字段名作为输出字段名 + // Test case where no alias is specified, should use input field name as output field name agg := NewGroupAggregator( []string{"Device"}, []AggregationField{ { InputField: "temperature", AggregateType: Sum, - // OutputAlias 留空,应该使用 InputField + // OutputAlias left empty, should use InputField }, }, ) diff --git a/condition/condition.go b/condition/condition.go index 0b5a9a2..9d61c1c 100644 --- a/condition/condition.go +++ b/condition/condition.go @@ -16,7 +16,7 @@ type ExprCondition struct { } func NewExprCondition(expression string) (Condition, error) { - // 添加自定义字符串函数支持(startsWith、endsWith、contains是内置操作符) + // Add custom string function support (startsWith, endsWith, contains are built-in operators) options := []expr.Option{ expr.Function("like_match", func(params ...any) (any, error) { if len(params) != 2 { @@ -60,22 +60,22 @@ func (ec *ExprCondition) Evaluate(env interface{}) bool { return result.(bool) } -// matchesLikePattern 实现LIKE模式匹配 -// 支持%(匹配任意字符序列)和_(匹配单个字符) +// matchesLikePattern implements LIKE pattern matching +// Supports % (matches any character sequence) and _ (matches single character) func matchesLikePattern(text, pattern string) bool { return likeMatch(text, pattern, 0, 0) } -// likeMatch 递归实现LIKE匹配算法 +// likeMatch recursively implements LIKE matching algorithm func likeMatch(text, pattern string, textIndex, patternIndex int) bool { - // 如果模式已经匹配完成 + // If pattern has been fully matched if patternIndex >= len(pattern) { - return textIndex >= len(text) // 文本也应该匹配完成 + return textIndex >= len(text) // Text should also be fully matched } - // 如果文本已经结束,但模式还有非%字符,则不匹配 + // If text has ended but pattern still has non-% characters, no match if textIndex >= len(text) { - // 检查剩余的模式是否都是% + // Check if remaining pattern characters are all % for i := patternIndex; i < len(pattern); i++ { if pattern[i] != '%' { return false @@ -84,16 +84,16 @@ func likeMatch(text, pattern string, textIndex, patternIndex int) bool { return true } - // 处理当前模式字符 + // Process current pattern character patternChar := pattern[patternIndex] if patternChar == '%' { - // %可以匹配0个或多个字符 - // 尝试匹配0个字符(跳过%) + // % can match 0 or more characters + // Try matching 0 characters (skip %) if likeMatch(text, pattern, textIndex, patternIndex+1) { return true } - // 尝试匹配1个或多个字符 + // Try matching 1 or more characters for i := textIndex; i < len(text); i++ { if likeMatch(text, pattern, i+1, patternIndex+1) { return true @@ -101,10 +101,10 @@ func likeMatch(text, pattern string, textIndex, patternIndex int) bool { } return false } else if patternChar == '_' { - // _匹配恰好一个字符 + // _ matches exactly one character return likeMatch(text, pattern, textIndex+1, patternIndex+1) } else { - // 普通字符必须精确匹配 + // Regular characters must match exactly if text[textIndex] == patternChar { return likeMatch(text, pattern, textIndex+1, patternIndex+1) } diff --git a/condition/doc.go b/condition/doc.go new file mode 100644 index 0000000..5263c96 --- /dev/null +++ b/condition/doc.go @@ -0,0 +1,111 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package condition provides condition evaluation functionality for StreamSQL. + +This package implements condition evaluation using the expr-lang library, +supporting complex boolean expressions for filtering and conditional logic. +It provides custom functions for SQL-like operations including LIKE pattern +matching and NULL checking. + +# Core Features + +• Boolean Expression Evaluation - Evaluate complex boolean conditions +• LIKE Pattern Matching - SQL-style pattern matching with % and _ wildcards +• NULL Checking - Support for IS NULL and IS NOT NULL operations +• Custom Functions - Extended function library for SQL compatibility +• Type Safety - Automatic type conversion and validation +• Performance Optimized - Compiled expressions for fast evaluation + +# Condition Interface + +Unified interface for condition evaluation: + + type Condition interface { + Evaluate(env interface{}) bool + } + +# Custom Functions + +Built-in SQL-compatible functions: + + // LIKE pattern matching + like_match(text, pattern) - SQL LIKE operation with % and _ wildcards + + // NULL checking + is_null(value) - Check if value is NULL + is_not_null(value) - Check if value is not NULL + +# Usage Examples + +Basic condition evaluation: + + condition, err := NewExprCondition("age >= 18 AND status == 'active'") + if err != nil { + log.Fatal(err) + } + + data := map[string]interface{}{ + "age": 25, + "status": "active", + } + + result := condition.Evaluate(data) // returns true + +LIKE pattern matching: + + condition, err := NewExprCondition("like_match(name, 'John%')") + data := map[string]interface{}{"name": "John Smith"} + result := condition.Evaluate(data) // returns true + +NULL checking: + + condition, err := NewExprCondition("is_not_null(email)") + data := map[string]interface{}{"email": "user@example.com"} + result := condition.Evaluate(data) // returns true + +Complex conditions: + + condition, err := NewExprCondition(` + age >= 18 AND + like_match(email, '%@company.com') AND + is_not_null(department) + `) + +# Pattern Matching + +LIKE pattern matching supports: + + % - Matches any sequence of characters (including empty) + _ - Matches exactly one character + +Examples: + + 'John%' matches 'John', 'John Smith', 'Johnny' + 'J_hn' matches 'John' but not 'Johan' + '%@gmail.com' matches any email ending with @gmail.com + +# Integration + +Integrates with other StreamSQL components: + +• Stream package - Data filtering and conditional processing +• RSQL package - WHERE and HAVING clause evaluation +• Types package - Data type handling and conversion +• Expr package - Expression parsing and evaluation +*/ +package condition \ No newline at end of file diff --git a/doc.go b/doc.go index 4721f96..a0d6510 100644 --- a/doc.go +++ b/doc.go @@ -15,23 +15,24 @@ */ /* -Package streamsql 是一个轻量级的、基于 SQL 的物联网边缘流处理引擎。 +Package streamsql is a lightweight, SQL-based IoT edge stream processing engine. -StreamSQL 提供了高效的无界数据流处理和分析能力,支持多种窗口类型、聚合函数、 -自定义函数,以及与 RuleGo 生态的无缝集成。 +StreamSQL provides efficient unbounded data stream processing and analysis capabilities, +supporting multiple window types, aggregate functions, custom functions, and seamless +integration with the RuleGo ecosystem. -# 核心特性 +# Core Features -• 轻量级设计 - 纯内存操作,无外部依赖 -• SQL语法支持 - 使用熟悉的SQL语法处理流数据 -• 多种窗口类型 - 滑动窗口、滚动窗口、计数窗口、会话窗口 -• 丰富的聚合函数 - MAX, MIN, AVG, SUM, STDDEV, MEDIAN, PERCENTILE等 -• 插件式自定义函数 - 运行时动态注册,支持8种函数类型 -• RuleGo生态集成 - 利用RuleGo组件扩展输入输出源 +• Lightweight design - Pure in-memory operations, no external dependencies +• SQL syntax support - Process stream data using familiar SQL syntax +• Multiple window types - Sliding, tumbling, counting, and session windows +• Rich aggregate functions - MAX, MIN, AVG, SUM, STDDEV, MEDIAN, PERCENTILE, etc. +• Plugin-based custom functions - Runtime dynamic registration, supports 8 function types +• RuleGo ecosystem integration - Extend input/output sources using RuleGo components -# 入门示例 +# Getting Started -基本的流数据处理: +Basic stream data processing: package main @@ -43,10 +44,10 @@ StreamSQL 提供了高效的无界数据流处理和分析能力,支持多种 ) func main() { - // 创建StreamSQL实例 + // Create StreamSQL instance ssql := streamsql.New() - // 定义SQL查询 - 每5秒按设备ID分组计算温度平均值 + // Define SQL query - Calculate temperature average by device ID every 5 seconds sql := `SELECT deviceId, AVG(temperature) as avg_temp, MIN(humidity) as min_humidity, @@ -56,18 +57,18 @@ StreamSQL 提供了高效的无界数据流处理和分析能力,支持多种 WHERE deviceId != 'device3' GROUP BY deviceId, TumblingWindow('5s')` - // 执行SQL,创建流处理任务 + // Execute SQL, create stream processing task err := ssql.Execute(sql) if err != nil { panic(err) } - // 添加结果处理回调 - ssql.AddSink(func(result interface{}) { - fmt.Printf("聚合结果: %v\n", result) + // Add result processing callback + ssql.AddSink(func(result []map[string]interface{}) { + fmt.Printf("Aggregation result: %v\n", result) }) - // 模拟发送流数据 + // Simulate sending stream data go func() { ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() @@ -75,7 +76,7 @@ StreamSQL 提供了高效的无界数据流处理和分析能力,支持多种 for { select { case <-ticker.C: - // 生成随机设备数据 + // Generate random device data data := map[string]interface{}{ "deviceId": fmt.Sprintf("device%d", rand.Intn(3)+1), "temperature": 20.0 + rand.Float64()*10, @@ -86,36 +87,36 @@ StreamSQL 提供了高效的无界数据流处理和分析能力,支持多种 } }() - // 运行30秒 + // Run for 30 seconds time.Sleep(30 * time.Second) } -# 窗口函数 +# Window Functions -StreamSQL 支持多种窗口类型: +StreamSQL supports multiple window types: - // 滚动窗口 - 每5秒一个独立窗口 + // Tumbling window - Independent window every 5 seconds SELECT AVG(temperature) FROM stream GROUP BY TumblingWindow('5s') - // 滑动窗口 - 窗口大小30秒,每10秒滑动一次 + // Sliding window - 30-second window size, slides every 10 seconds SELECT MAX(temperature) FROM stream GROUP BY SlidingWindow('30s', '10s') - // 计数窗口 - 每100条记录一个窗口 + // Counting window - One window per 100 records SELECT COUNT(*) FROM stream GROUP BY CountingWindow(100) - // 会话窗口 - 超时5分钟自动关闭会话 + // Session window - Automatically closes session after 5-minute timeout SELECT user_id, COUNT(*) FROM stream GROUP BY user_id, SessionWindow('5m') -# 自定义函数 +# Custom Functions -StreamSQL 支持插件式自定义函数,运行时动态注册: +StreamSQL supports plugin-based custom functions with runtime dynamic registration: - // 注册温度转换函数 + // Register temperature conversion function functions.RegisterCustomFunction( "fahrenheit_to_celsius", functions.TypeConversion, - "温度转换", - "华氏度转摄氏度", + "Temperature conversion", + "Fahrenheit to Celsius", 1, 1, func(ctx *functions.FunctionContext, args []interface{}) (interface{}, error) { f, _ := functions.ConvertToFloat64(args[0]) @@ -123,55 +124,55 @@ StreamSQL 支持插件式自定义函数,运行时动态注册: }, ) - // 立即在SQL中使用 + // Use immediately in SQL sql := `SELECT deviceId, AVG(fahrenheit_to_celsius(temperature)) as avg_celsius FROM stream GROUP BY deviceId, TumblingWindow('5s')` -支持的自定义函数类型: -• TypeMath - 数学计算函数 -• TypeString - 字符串处理函数 -• TypeConversion - 类型转换函数 -• TypeDateTime - 时间日期函数 -• TypeAggregation - 聚合函数 -• TypeAnalytical - 分析函数 -• TypeWindow - 窗口函数 -• TypeCustom - 通用自定义函数 +Supported custom function types: +• TypeMath - Mathematical calculation functions +• TypeString - String processing functions +• TypeConversion - Type conversion functions +• TypeDateTime - Date and time functions +• TypeAggregation - Aggregate functions +• TypeAnalytical - Analytical functions +• TypeWindow - Window functions +• TypeCustom - General custom functions -# 日志配置 +# Log Configuration -StreamSQL 提供灵活的日志配置选项: +StreamSQL provides flexible log configuration options: - // 设置日志级别 + // Set log level ssql := streamsql.New(streamsql.WithLogLevel(logger.DEBUG)) - // 输出到文件 + // Output to file logFile, _ := os.OpenFile("app.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) ssql := streamsql.New(streamsql.WithLogOutput(logFile, logger.INFO)) - // 禁用日志(生产环境) + // Disable logging (production environment) ssql := streamsql.New(streamsql.WithDiscardLog()) -# 与RuleGo集成 +# RuleGo Integration -StreamSQL提供了与RuleGo规则引擎的深度集成,通过两个专用组件实现流式数据处理: +StreamSQL provides deep integration with the RuleGo rule engine through two dedicated components for stream data processing: -• streamTransform (x/streamTransform) - 流转换器,处理非聚合SQL查询 -• streamAggregator (x/streamAggregator) - 流聚合器,处理聚合SQL查询 +• streamTransform (x/streamTransform) - Stream transformer, handles non-aggregation SQL queries +• streamAggregator (x/streamAggregator) - Stream aggregator, handles aggregation SQL queries -基本集成示例: +Basic integration example: package main import ( "github.com/rulego/rulego" "github.com/rulego/rulego/api/types" - // 注册StreamSQL组件 + // Register StreamSQL components _ "github.com/rulego/rulego-components/external/streamsql" ) func main() { - // 规则链配置 + // Rule chain configuration ruleChainJson := `{ "ruleChain": {"id": "rule01"}, "metadata": { @@ -196,10 +197,10 @@ StreamSQL提供了与RuleGo规则引擎的深度集成,通过两个专用组 } }` - // 创建规则引擎 + // Create rule engine ruleEngine, _ := rulego.New("rule01", []byte(ruleChainJson)) - // 发送数据 + // Send data data := `{"deviceId":"sensor01","temperature":25.5}` msg := types.NewMsg(0, "TELEMETRY", types.JSON, types.NewMetadata(), data) ruleEngine.OnMsg(msg) diff --git a/expr/doc.go b/expr/doc.go new file mode 100644 index 0000000..7d20a9b --- /dev/null +++ b/expr/doc.go @@ -0,0 +1,117 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package expr provides expression parsing and evaluation capabilities for StreamSQL. + +This package implements a comprehensive expression engine that supports mathematical operations, +logical comparisons, function calls, field references, and complex CASE expressions. +It serves as the foundation for WHERE clauses, HAVING clauses, and computed fields in SQL queries. + +# Core Features + +• Mathematical Operations - Supports arithmetic operators (+, -, *, /, %, ^) with proper precedence +• Logical Operations - Boolean logic with AND, OR operators and comparison operators (=, !=, <, >, <=, >=, LIKE) +• Function Integration - Seamless integration with the functions package for built-in and custom functions +• Field References - Dynamic field access with dot notation support for nested data structures +• CASE Expressions - Full support for both simple and searched CASE expressions +• Type Safety - Automatic type conversion and validation during expression evaluation +• Fallback Support - Integration with expr-lang/expr library for complex expressions + +# Expression Types + +The package supports various expression node types: + + // Basic types + TypeNumber - Numeric constants (integers and floats) + TypeString - String literals with proper escaping + TypeField - Field references (e.g., "temperature", "device.id") + TypeOperator - Binary and unary operators + TypeFunction - Function calls with argument validation + TypeParenthesis - Grouped expressions for precedence control + TypeCase - CASE expressions for conditional logic + +# Usage Examples + +Basic mathematical expression: + + expr, err := NewExpression("temperature * 1.8 + 32") + if err != nil { + log.Fatal(err) + } + result, err := expr.Evaluate(data) + +Logical expression with field references: + + expr, err := NewExpression("temperature > 25 AND humidity < 60") + result, err := expr.Evaluate(data) + +Function call expression: + + expr, err := NewExpression("UPPER(device_name) LIKE 'SENSOR%'") + result, err := expr.Evaluate(data) + +CASE expression for conditional logic: + + expr, err := NewExpression(` + CASE + WHEN temperature > 30 THEN 'hot' + WHEN temperature > 20 THEN 'warm' + ELSE 'cold' + END + `) + result, err := expr.Evaluate(data) + +# Operator Precedence + +The expression parser follows standard mathematical precedence rules: + + 1. Parentheses (highest) + 2. Power (^) + 3. Multiplication, Division, Modulo (*, /, %) + 4. Addition, Subtraction (+, -) + 5. Comparison (>, <, >=, <=, LIKE, IS) + 6. Equality (=, ==, !=, <>) + 7. Logical AND + 8. Logical OR (lowest) + +# Error Handling + +The package provides comprehensive error handling with detailed error messages: + +• Syntax validation during expression creation +• Type checking during evaluation +• Function argument validation +• Graceful fallback to expr-lang for unsupported expressions + +# Performance Considerations + +• Expressions are parsed once and can be evaluated multiple times +• Built-in operator optimization for common mathematical operations +• Lazy evaluation for logical operators (short-circuiting) +• Efficient field access caching for repeated evaluations +• Automatic fallback to optimized expr-lang library when needed + +# Integration + +This package integrates seamlessly with other StreamSQL components: + +• Functions package - For built-in and custom function execution +• Types package - For data type definitions and conversions +• Stream package - For real-time expression evaluation in data streams +• RSQL package - For SQL parsing and expression extraction +*/ +package expr \ No newline at end of file diff --git a/expr/expression.go b/expr/expression.go index dbf2dde..41b7d68 100644 --- a/expr/expression.go +++ b/expr/expression.go @@ -10,18 +10,18 @@ import ( "github.com/rulego/streamsql/utils/fieldpath" ) -// 表达式类型 +// Expression types const ( - TypeNumber = "number" // 数字常量 - TypeField = "field" // 字段引用 - TypeOperator = "operator" // 运算符 - TypeFunction = "function" // 函数调用 - TypeParenthesis = "parenthesis" // 括号 - TypeCase = "case" // CASE表达式 - TypeString = "string" // 字符串常量 + TypeNumber = "number" // Number constant + TypeField = "field" // Field reference + TypeOperator = "operator" // Operator + TypeFunction = "function" // Function call + TypeParenthesis = "parenthesis" // Parenthesis + TypeCase = "case" // CASE expression + TypeString = "string" // String constant ) -// 操作符优先级 +// Operator precedence var operatorPrecedence = map[string]int{ "OR": 1, "AND": 2, @@ -29,47 +29,47 @@ var operatorPrecedence = map[string]int{ ">": 4, "<": 4, ">=": 4, "<=": 4, "LIKE": 4, "IS": 4, "+": 5, "-": 5, "*": 6, "/": 6, "%": 6, - "^": 7, // 幂运算 + "^": 7, // Power operation } -// CASE表达式的WHEN子句 +// WhenClause represents a WHEN clause in CASE expression type WhenClause struct { - Condition *ExprNode // WHEN条件 - Result *ExprNode // THEN结果 + Condition *ExprNode // WHEN condition + Result *ExprNode // THEN result } -// 表达式节点 +// ExprNode represents an expression node type ExprNode struct { Type string Value string Left *ExprNode Right *ExprNode - Args []*ExprNode // 用于函数调用的参数 + Args []*ExprNode // Arguments for function calls - // CASE表达式专用字段 - CaseExpr *ExprNode // CASE后面的表达式(简单CASE) - WhenClauses []WhenClause // WHEN子句列表 - ElseExpr *ExprNode // ELSE表达式 + // Fields specific to CASE expressions + CaseExpr *ExprNode // Expression after CASE (simple CASE) + WhenClauses []WhenClause // List of WHEN clauses + ElseExpr *ExprNode // ELSE expression } -// Expression 表示一个可计算的表达式 +// Expression represents a computable expression type Expression struct { Root *ExprNode - useExprLang bool // 是否使用expr-lang/expr - exprLangExpression string // expr-lang表达式字符串 + useExprLang bool // Whether to use expr-lang/expr + exprLangExpression string // expr-lang expression string } -// NewExpression 创建一个新的表达式 +// NewExpression creates a new expression func NewExpression(exprStr string) (*Expression, error) { - // 进行基本的语法验证 + // Perform basic syntax validation if err := validateBasicSyntax(exprStr); err != nil { return nil, err } - // 首先尝试使用自定义解析器 + // First try using custom parser tokens, err := tokenize(exprStr) if err != nil { - // 如果自定义解析失败,标记为使用expr-lang + // If custom parsing fails, mark to use expr-lang return &Expression{ Root: nil, useExprLang: true, @@ -79,7 +79,7 @@ func NewExpression(exprStr string) (*Expression, error) { root, err := parseExpression(tokens) if err != nil { - // 如果自定义解析失败,标记为使用expr-lang + // If custom parsing fails, mark to use expr-lang return &Expression{ Root: nil, useExprLang: true, @@ -93,9 +93,9 @@ func NewExpression(exprStr string) (*Expression, error) { }, nil } -// validateBasicSyntax 进行基本的语法验证 +// validateBasicSyntax performs basic syntax validation func validateBasicSyntax(exprStr string) error { - // 检查空表达式 + // Check empty expression trimmed := strings.TrimSpace(exprStr) if trimmed == "" { return fmt.Errorf("empty expression") @@ -133,10 +133,10 @@ func validateBasicSyntax(exprStr string) error { return nil } -// checkConsecutiveOperators 检查连续运算符 +// checkConsecutiveOperators checks for consecutive operators func checkConsecutiveOperators(expr string) error { - // 简化的连续运算符检查:查找明显的双运算符模式 - // 但要允许比较运算符后跟负数的情况 + // Simplified consecutive operator check: look for obvious double operator patterns + // But allow comparison operators followed by negative numbers operators := []string{"+", "-", "*", "/", "%", "^", "==", "!=", ">=", "<=", ">", "<"} comparisonOps := []string{"==", "!=", ">=", "<=", ">", "<"} @@ -209,34 +209,34 @@ func checkConsecutiveOperators(expr string) error { return nil } -// isValidChar 检查字符是否有效 +// isValidChar checks if a character is valid func isValidChar(ch rune) bool { - // 字母和数字 + // Letters and digits if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') { return true } - // 特殊字符 + // Special characters switch ch { - case ' ', '\t', '\n', '\r': // 空白字符 + case ' ', '\t', '\n', '\r': // Whitespace characters return true - case '+', '-', '*', '/', '%', '^': // 算术运算符 + case '+', '-', '*', '/', '%', '^': // Arithmetic operators return true - case '(', ')', ',': // 括号和逗号 + case '(', ')', ',': // Parentheses and comma return true - case '>', '<', '=', '!': // 比较运算符 + case '>', '<', '=', '!': // Comparison operators return true - case '\'', '"': // 引号 + case '\'', '"': // Quotes return true - case '.', '_': // 点和下划线 + case '.', '_': // Dot and underscore return true - case '$': // 美元符号(用于JSON路径等) + case '$': // Dollar sign (for JSON paths etc.) return true default: return false } } -// Evaluate 计算表达式的值 +// Evaluate calculates the value of the expression func (e *Expression) Evaluate(data map[string]interface{}) (float64, error) { if e.useExprLang { return e.evaluateWithExprLang(data) @@ -244,16 +244,16 @@ func (e *Expression) Evaluate(data map[string]interface{}) (float64, error) { return evaluateNode(e.Root, data) } -// evaluateWithExprLang 使用expr-lang/expr评估表达式 +// evaluateWithExprLang evaluates expression using expr-lang/expr func (e *Expression) evaluateWithExprLang(data map[string]interface{}) (float64, error) { - // 使用桥接器评估表达式 + // Use bridge to evaluate expression bridge := functions.GetExprBridge() result, err := bridge.EvaluateExpression(e.exprLangExpression, data) if err != nil { return 0, err } - // 尝试转换结果为float64 + // Try to convert result to float64 switch v := result.(type) { case float64: return v, nil @@ -275,11 +275,11 @@ func (e *Expression) evaluateWithExprLang(data map[string]interface{}) (float64, } } -// GetFields 获取表达式中引用的所有字段 +// GetFields gets all fields referenced in the expression func (e *Expression) GetFields() []string { if e.useExprLang { - // 对于expr-lang表达式,需要解析字段引用 - // 这里简化处理,实际应该使用AST分析 + // For expr-lang expressions, need to parse field references + // Simplified handling here, should use AST analysis in practice return extractFieldsFromExprLang(e.exprLangExpression) } @@ -293,13 +293,13 @@ func (e *Expression) GetFields() []string { return result } -// extractFieldsFromExprLang 从expr-lang表达式中提取字段引用(简化版本) +// extractFieldsFromExprLang extracts field references from expr-lang expression (simplified version) func extractFieldsFromExprLang(expression string) []string { - // 这是一个简化的实现,实际应该使用AST解析 - // 暂时使用正则表达式或简单的字符串解析 + // This is a simplified implementation, should use AST parsing in practice + // Temporarily use regex or simple string parsing fields := make(map[string]bool) - // 简单的字段提取:查找标识符模式,支持点号分隔的嵌套字段 + // Simple field extraction: find identifier patterns, support dot-separated nested fields tokens := strings.FieldsFunc(expression, func(c rune) bool { return !(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9') && c != '_' && c != '.' }) @@ -317,13 +317,13 @@ func extractFieldsFromExprLang(expression string) []string { return result } -// isValidFieldIdentifier 检查是否是有效的字段标识符(支持点号分隔的嵌套字段) +// isValidFieldIdentifier checks if it's a valid field identifier (supports dot-separated nested fields) func isValidFieldIdentifier(s string) bool { if len(s) == 0 { return false } - // 分割点号分隔的字段 + // Split dot-separated fields parts := strings.Split(s, ".") for _, part := range parts { if !isIdentifier(part) { @@ -334,13 +334,13 @@ func isValidFieldIdentifier(s string) bool { return true } -// isFunctionOrKeyword 检查是否是函数名或关键字 +// isFunctionOrKeyword checks if it's a function name or keyword func isFunctionOrKeyword(token string) bool { - // 检查是否是已知函数或关键字 + // Check if it's a known function or keyword keywords := []string{ "and", "or", "not", "true", "false", "nil", "null", "is", "if", "else", "then", "in", "contains", "matches", - // CASE表达式关键字 + // CASE expression keywords "case", "when", "then", "else", "end", } @@ -350,13 +350,13 @@ func isFunctionOrKeyword(token string) bool { } } - // 检查是否是注册的函数 + // Check if it's a registered function bridge := functions.GetExprBridge() _, exists, _ := bridge.ResolveFunction(token) return exists } -// collectFields 收集表达式中所有字段 +// collectFields collects all fields in the expression func collectFields(node *ExprNode, fields map[string]bool) { if node == nil { return @@ -366,20 +366,20 @@ func collectFields(node *ExprNode, fields map[string]bool) { fields[node.Value] = true } - // 处理CASE表达式的字段收集 + // Handle field collection for CASE expressions if node.Type == TypeCase { - // 收集CASE表达式本身的字段 + // Collect fields from CASE expression itself if node.CaseExpr != nil { collectFields(node.CaseExpr, fields) } - // 收集所有WHEN子句中的字段 + // Collect fields from all WHEN clauses for _, whenClause := range node.WhenClauses { collectFields(whenClause.Condition, fields) collectFields(whenClause.Result, fields) } - // 收集ELSE表达式中的字段 + // Collect fields from ELSE expression if node.ElseExpr != nil { collectFields(node.ElseExpr, fields) } @@ -395,7 +395,7 @@ func collectFields(node *ExprNode, fields map[string]bool) { } } -// evaluateNode 计算节点的值 +// evaluateNode calculates the value of a node func evaluateNode(node *ExprNode, data map[string]interface{}) (float64, error) { if node == nil { return 0, fmt.Errorf("null expression node") @@ -406,54 +406,54 @@ func evaluateNode(node *ExprNode, data map[string]interface{}) (float64, error) return strconv.ParseFloat(node.Value, 64) case TypeString: - // 处理字符串类型,去掉引号并尝试转换为数字 - // 如果无法转换,返回错误(因为这个函数返回float64) + // Handle string type, remove quotes and try to convert to number + // If conversion fails, return error (since this function returns float64) value := node.Value if len(value) >= 2 && (value[0] == '\'' || value[0] == '"') { - value = value[1 : len(value)-1] // 去掉引号 + value = value[1 : len(value)-1] // Remove quotes } - // 尝试转换为数字 + // Try to convert to number if f, err := strconv.ParseFloat(value, 64); err == nil { return f, nil } - // 对于字符串比较,我们需要返回一个哈希值或者错误 - // 这里简化处理,将字符串转换为其长度(作为临时解决方案) + // For string comparison, we need to return a hash value or error + // Simplified handling here, convert string to its length (as temporary solution) return float64(len(value)), nil case TypeField: - // 处理反引号标识符,去除反引号 + // Handle backtick identifiers, remove backticks fieldName := node.Value if len(fieldName) >= 2 && fieldName[0] == '`' && fieldName[len(fieldName)-1] == '`' { - fieldName = fieldName[1 : len(fieldName)-1] // 去掉反引号 + fieldName = fieldName[1 : len(fieldName)-1] // Remove backticks } - // 支持嵌套字段访问 + // Support nested field access if fieldpath.IsNestedField(fieldName) { if val, found := fieldpath.GetNestedField(data, fieldName); found { - // 尝试转换为float64 + // Try to convert to float64 if floatVal, err := convertToFloat(val); err == nil { return floatVal, nil } - // 如果不能转换为数字,返回错误 + // If cannot convert to number, return error return 0, fmt.Errorf("field '%s' value cannot be converted to number: %v", fieldName, val) } } else { - // 原有的简单字段访问 + // Original simple field access if val, found := data[fieldName]; found { - // 尝试转换为float64 + // Try to convert to float64 if floatVal, err := convertToFloat(val); err == nil { return floatVal, nil } - // 如果不能转换为数字,返回错误 + // If cannot convert to number, return error return 0, fmt.Errorf("field '%s' value cannot be converted to number: %v", fieldName, val) } } return 0, fmt.Errorf("field '%s' not found", fieldName) case TypeOperator: - // 计算左右子表达式的值 + // Calculate values of left and right sub-expressions left, err := evaluateNode(node.Left, data) if err != nil { return 0, err @@ -464,7 +464,7 @@ func evaluateNode(node *ExprNode, data map[string]interface{}) (float64, error) return 0, err } - // 执行运算 + // Perform operation switch node.Value { case "+": return left + right, nil @@ -489,13 +489,13 @@ func evaluateNode(node *ExprNode, data map[string]interface{}) (float64, error) } case TypeFunction: - // 首先检查是否是新的函数注册系统中的函数 + // First check if it's a function in the new function registration system fn, exists := functions.Get(node.Value) if exists { - // 计算所有参数,但保持原始类型 + // Calculate all arguments but keep original types args := make([]interface{}, len(node.Args)) for i, arg := range node.Args { - // 使用evaluateNodeValue获取原始类型的值 + // Use evaluateNodeValue to get original type values val, err := evaluateNodeValue(arg, data) if err != nil { return 0, err @@ -503,18 +503,18 @@ func evaluateNode(node *ExprNode, data map[string]interface{}) (float64, error) args[i] = val } - // 创建函数执行上下文 + // Create function execution context ctx := &functions.FunctionContext{ Data: data, } - // 执行函数 + // Execute function result, err := fn.Execute(ctx, args) if err != nil { return 0, err } - // 转换结果为 float64 + // Convert result to float64 switch r := result.(type) { case float64: return r, nil @@ -527,13 +527,13 @@ func evaluateNode(node *ExprNode, data map[string]interface{}) (float64, error) case int64: return float64(r), nil case string: - // 对于字符串结果,尝试转换为数字,如果失败则返回字符串长度 + // For string results, try to convert to number, if failed return string length if f, err := strconv.ParseFloat(r, 64); err == nil { return f, nil } return float64(len(r)), nil case bool: - // 布尔值转换:true=1, false=0 + // Boolean conversion: true=1, false=0 if r { return 1.0, nil } @@ -543,18 +543,18 @@ func evaluateNode(node *ExprNode, data map[string]interface{}) (float64, error) } } - // 回退到内置函数处理(保持向后兼容) + // Fall back to built-in function handling (maintain backward compatibility) return evaluateBuiltinFunction(node, data) case TypeCase: - // 处理CASE表达式 + // Handle CASE expression return evaluateCaseExpression(node, data) } return 0, fmt.Errorf("unknown node type: %s", node.Type) } -// evaluateBuiltinFunction 处理内置函数(向后兼容) +// evaluateBuiltinFunction handles built-in functions (backward compatibility) func evaluateBuiltinFunction(node *ExprNode, data map[string]interface{}) (float64, error) { switch strings.ToLower(node.Value) { case "abs": @@ -645,28 +645,28 @@ func evaluateBuiltinFunction(node *ExprNode, data map[string]interface{}) (float } } -// evaluateCaseExpression 计算CASE表达式 +// evaluateCaseExpression evaluates CASE expression func evaluateCaseExpression(node *ExprNode, data map[string]interface{}) (float64, error) { if node.Type != TypeCase { return 0, fmt.Errorf("node is not a CASE expression") } - // 处理简单CASE表达式 (CASE expr WHEN value1 THEN result1 ...) + // Handle simple CASE expression (CASE expr WHEN value1 THEN result1 ...) if node.CaseExpr != nil { - // 计算CASE后面的表达式值 + // Calculate the value of expression after CASE caseValue, err := evaluateNodeValue(node.CaseExpr, data) if err != nil { return 0, err } - // 遍历WHEN子句,查找匹配的值 + // Iterate through WHEN clauses to find matching values for _, whenClause := range node.WhenClauses { conditionValue, err := evaluateNodeValue(whenClause.Condition, data) if err != nil { return 0, err } - // 比较值是否相等 + // Compare if values are equal isEqual, err := compareValues(caseValue, conditionValue, "==") if err != nil { return 0, err @@ -677,54 +677,54 @@ func evaluateCaseExpression(node *ExprNode, data map[string]interface{}) (float6 } } } else { - // 处理搜索CASE表达式 (CASE WHEN condition1 THEN result1 ...) + // Handle search CASE expression (CASE WHEN condition1 THEN result1 ...) for _, whenClause := range node.WhenClauses { - // 评估WHEN条件,这里需要特殊处理布尔表达式 + // Evaluate WHEN condition, need special handling for boolean expressions conditionResult, err := evaluateBooleanCondition(whenClause.Condition, data) if err != nil { return 0, err } - // 如果条件为真,返回对应的结果 + // If condition is true, return corresponding result if conditionResult { return evaluateNode(whenClause.Result, data) } } } - // 如果没有匹配的WHEN子句,执行ELSE子句 + // If no WHEN clause matches, execute ELSE clause if node.ElseExpr != nil { return evaluateNode(node.ElseExpr, data) } - // 如果没有ELSE子句,SQL标准是返回NULL,这里返回0 + // If no ELSE clause, SQL standard returns NULL, here return 0 return 0, nil } -// evaluateBooleanCondition 计算布尔条件表达式 +// evaluateBooleanCondition evaluates boolean condition expression func evaluateBooleanCondition(node *ExprNode, data map[string]interface{}) (bool, error) { if node == nil { return false, fmt.Errorf("null condition expression") } - // 处理逻辑运算符(实现短路求值) + // Handle logical operators (implement short-circuit evaluation) if node.Type == TypeOperator && (node.Value == "AND" || node.Value == "OR") { leftBool, err := evaluateBooleanCondition(node.Left, data) if err != nil { return false, err } - // 短路求值:对于AND,如果左边为false,立即返回false + // Short-circuit evaluation: for AND, if left is false, return false immediately if node.Value == "AND" && !leftBool { return false, nil } - // 短路求值:对于OR,如果左边为true,立即返回true + // Short-circuit evaluation: for OR, if left is true, return true immediately if node.Value == "OR" && leftBool { return true, nil } - // 只有在需要时才评估右边的表达式 + // Only evaluate right expression when needed rightBool, err := evaluateBooleanCondition(node.Right, data) if err != nil { return false, err @@ -738,12 +738,12 @@ func evaluateBooleanCondition(node *ExprNode, data map[string]interface{}) (bool } } - // 处理IS NULL和IS NOT NULL特殊情况 + // Handle IS NULL and IS NOT NULL special cases if node.Type == TypeOperator && node.Value == "IS" { return evaluateIsCondition(node, data) } - // 处理比较运算符 + // Handle comparison operators if node.Type == TypeOperator { leftValue, err := evaluateNodeValue(node.Left, data) if err != nil { @@ -758,36 +758,36 @@ func evaluateBooleanCondition(node *ExprNode, data map[string]interface{}) (bool return compareValues(leftValue, rightValue, node.Value) } - // 对于其他表达式,计算其数值并转换为布尔值 + // For other expressions, calculate numeric value and convert to boolean result, err := evaluateNode(node, data) if err != nil { return false, err } - // 非零值为真,零值为假 + // Non-zero values are true, zero values are false return result != 0, nil } -// evaluateIsCondition 处理IS NULL和IS NOT NULL条件 +// evaluateIsCondition handles IS NULL and IS NOT NULL conditions func evaluateIsCondition(node *ExprNode, data map[string]interface{}) (bool, error) { if node == nil || node.Left == nil || node.Right == nil { return false, fmt.Errorf("invalid IS condition") } - // 获取左侧值 + // Get left side value leftValue, err := evaluateNodeValue(node.Left, data) if err != nil { - // 如果字段不存在,认为是null + // If field doesn't exist, consider it as null leftValue = nil } - // 检查右侧是否是NULL或NOT NULL + // Check if right side is NULL or NOT NULL if node.Right.Type == TypeField && strings.ToUpper(node.Right.Value) == "NULL" { // IS NULL return leftValue == nil, nil } - // 检查是否是IS NOT NULL + // Check if it's IS NOT NULL if node.Right.Type == TypeOperator && node.Right.Value == "NOT" && node.Right.Right != nil && node.Right.Right.Type == TypeField && strings.ToUpper(node.Right.Right.Value) == "NULL" { @@ -795,7 +795,7 @@ func evaluateIsCondition(node *ExprNode, data map[string]interface{}) (bool, err return leftValue != nil, nil } - // 其他IS比较(如IS TRUE, IS FALSE等,暂不支持) + // Other IS comparisons (like IS TRUE, IS FALSE etc., not supported yet) rightValue, err := evaluateNodeValue(node.Right, data) if err != nil { return false, err @@ -804,7 +804,7 @@ func evaluateIsCondition(node *ExprNode, data map[string]interface{}) (bool, err return compareValues(leftValue, rightValue, "==") } -// evaluateNodeValue 计算节点值,返回interface{}以支持不同类型 +// evaluateNodeValue calculates node value, returns interface{} to support different types func evaluateNodeValue(node *ExprNode, data map[string]interface{}) (interface{}, error) { if node == nil { return nil, fmt.Errorf("null expression node") @@ -815,7 +815,7 @@ func evaluateNodeValue(node *ExprNode, data map[string]interface{}) (interface{} return strconv.ParseFloat(node.Value, 64) case TypeString: - // 去掉引号 + // Remove quotes value := node.Value if len(value) >= 2 && (value[0] == '\'' || value[0] == '"') { value = value[1 : len(value)-1] @@ -823,19 +823,19 @@ func evaluateNodeValue(node *ExprNode, data map[string]interface{}) (interface{} return value, nil case TypeField: - // 处理反引号标识符,去除反引号 + // Handle backtick identifiers, remove backticks fieldName := node.Value if len(fieldName) >= 2 && fieldName[0] == '`' && fieldName[len(fieldName)-1] == '`' { - fieldName = fieldName[1 : len(fieldName)-1] // 去掉反引号 + fieldName = fieldName[1 : len(fieldName)-1] // Remove backticks } - // 支持嵌套字段访问 + // Support nested field access if fieldpath.IsNestedField(fieldName) { if val, found := fieldpath.GetNestedField(data, fieldName); found { return val, nil } } else { - // 原有的简单字段访问 + // Original simple field access if val, found := data[fieldName]; found { return val, nil } @@ -843,14 +843,14 @@ func evaluateNodeValue(node *ExprNode, data map[string]interface{}) (interface{} return nil, fmt.Errorf("field '%s' not found", fieldName) default: - // 对于其他类型,回退到数值计算 + // For other types, fall back to numeric calculation return evaluateNode(node, data) } } -// compareValues 比较两个值 +// compareValues compares two values func compareValues(left, right interface{}, operator string) (bool, error) { - // 尝试字符串比较 + // Try string comparison leftStr, leftIsStr := left.(string) rightStr, rightIsStr := right.(string) @@ -875,7 +875,7 @@ func compareValues(left, right interface{}, operator string) (bool, error) { } } - // 转换为数值进行比较 + // Convert to numeric values for comparison leftNum, err1 := convertToFloat(left) rightNum, err2 := convertToFloat(right) @@ -901,22 +901,22 @@ func compareValues(left, right interface{}, operator string) (bool, error) { } } -// matchesLikePattern 实现LIKE模式匹配 -// 支持%(匹配任意字符序列)和_(匹配单个字符) +// matchesLikePattern implements LIKE pattern matching +// Supports % (matches any character sequence) and _ (matches single character) func matchesLikePattern(text, pattern string) bool { return likeMatch(text, pattern, 0, 0) } -// likeMatch 递归实现LIKE匹配算法 +// likeMatch recursively implements LIKE matching algorithm func likeMatch(text, pattern string, textIndex, patternIndex int) bool { - // 如果模式已经匹配完成 + // If pattern matching is complete if patternIndex >= len(pattern) { - return textIndex >= len(text) // 文本也应该匹配完成 + return textIndex >= len(text) // Text should also be completely matched } - // 如果文本已经结束,但模式还有非%字符,则不匹配 + // If text has ended but pattern still has non-% characters, no match if textIndex >= len(text) { - // 检查剩余的模式是否都是% + // Check if remaining pattern consists only of % for i := patternIndex; i < len(pattern); i++ { if pattern[i] != '%' { return false @@ -927,12 +927,12 @@ func likeMatch(text, pattern string, textIndex, patternIndex int) bool { switch pattern[patternIndex] { case '%': - // %可以匹配0个或多个字符 - // 尝试匹配0个字符(跳过%) + // % can match 0 or more characters + // Try matching 0 characters (skip %) if likeMatch(text, pattern, textIndex, patternIndex+1) { return true } - // 尝试匹配1个或多个字符 + // Try matching 1 or more characters for i := textIndex; i < len(text); i++ { if likeMatch(text, pattern, i+1, patternIndex+1) { return true @@ -941,11 +941,11 @@ func likeMatch(text, pattern string, textIndex, patternIndex int) bool { return false case '_': - // _匹配任意单个字符 + // _ matches any single character return likeMatch(text, pattern, textIndex+1, patternIndex+1) default: - // 普通字符必须精确匹配 + // Regular characters must match exactly if text[textIndex] == pattern[patternIndex] { return likeMatch(text, pattern, textIndex+1, patternIndex+1) } @@ -953,7 +953,7 @@ func likeMatch(text, pattern string, textIndex, patternIndex int) bool { } } -// convertToFloat 将值转换为float64 +// convertToFloat converts value to float64 func convertToFloat(val interface{}) (float64, error) { switch v := val.(type) { case float64: @@ -973,7 +973,7 @@ func convertToFloat(val interface{}) (float64, error) { } } -// tokenize 将表达式字符串转换为token列表 +// tokenize converts expression string to token list func tokenize(expr string) ([]string, error) { expr = strings.TrimSpace(expr) if expr == "" { @@ -986,13 +986,13 @@ func tokenize(expr string) ([]string, error) { for i < len(expr) { ch := expr[i] - // 跳过空白字符 + // Skip whitespace characters if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { i++ continue } - // 处理数字 + // Handle numbers if isDigit(ch) || (ch == '.' && i+1 < len(expr) && isDigit(expr[i+1])) { start := i hasDot := ch == '.' @@ -1009,37 +1009,37 @@ func tokenize(expr string) ([]string, error) { continue } - // 处理运算符和括号 + // Handle operators and parentheses if ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '%' || ch == '^' || ch == '(' || ch == ')' || ch == ',' { - // 特殊处理负号:如果是负号且前面是运算符、括号或开始位置,则可能是负数 + // Special handling for minus sign: if it's minus and preceded by operator, parenthesis or start position, it might be negative number if ch == '-' { - // 检查是否可能是负数的开始 - canBeNegativeNumber := i == 0 || // 表达式开始 - len(tokens) == 0 // tokens为空时也可能是负数开始 + // Check if it could be the start of a negative number + canBeNegativeNumber := i == 0 || // Expression start + len(tokens) == 0 // When tokens is empty, it could also be negative number start - // 只有当tokens不为空时才检查前一个token + // Only check previous token when tokens is not empty if len(tokens) > 0 { prevToken := tokens[len(tokens)-1] canBeNegativeNumber = canBeNegativeNumber || - prevToken == "(" || // 左括号后 - prevToken == "," || // 逗号后(函数参数) - isOperator(prevToken) || // 运算符后 - isComparisonOperator(prevToken) || // 比较运算符后 - strings.ToUpper(prevToken) == "THEN" || // THEN后 - strings.ToUpper(prevToken) == "ELSE" || // ELSE后 - strings.ToUpper(prevToken) == "WHEN" || // WHEN后 - strings.ToUpper(prevToken) == "AND" || // AND后 - strings.ToUpper(prevToken) == "OR" // OR后 + prevToken == "(" || // After left parenthesis + prevToken == "," || // After comma (function parameter) + isOperator(prevToken) || // After operator + isComparisonOperator(prevToken) || // After comparison operator + strings.ToUpper(prevToken) == "THEN" || // After THEN + strings.ToUpper(prevToken) == "ELSE" || // After ELSE + strings.ToUpper(prevToken) == "WHEN" || // After WHEN + strings.ToUpper(prevToken) == "AND" || // After AND + strings.ToUpper(prevToken) == "OR" // After OR } if canBeNegativeNumber && i+1 < len(expr) && isDigit(expr[i+1]) { - // 这是一个负数,解析整个数字 + // This is a negative number, parse the entire number start := i - i++ // 跳过负号 + i++ // Skip minus sign - // 解析数字部分 + // Parse numeric part for i < len(expr) && (isDigit(expr[i]) || expr[i] == '.') { i++ } @@ -1054,12 +1054,12 @@ func tokenize(expr string) ([]string, error) { continue } - // 处理比较运算符 + // Handle comparison operators if ch == '>' || ch == '<' || ch == '=' || ch == '!' { start := i i++ - // 处理双字符运算符 + // Handle two-character operators if i < len(expr) { switch ch { case '>': @@ -1093,21 +1093,21 @@ func tokenize(expr string) ([]string, error) { } } - // 单字符运算符 + // Single character operator tokens = append(tokens, expr[start:i]) continue } - // 处理字符串字面量(单引号和双引号) + // Handle string literals (single and double quotes) if ch == '\'' || ch == '"' { quote := ch start := i - i++ // 跳过开始引号 + i++ // Skip opening quote - // 寻找结束引号 + // Find closing quote for i < len(expr) && expr[i] != quote { if expr[i] == '\\' && i+1 < len(expr) { - i += 2 // 跳过转义字符 + i += 2 // Skip escape character } else { i++ } @@ -1117,17 +1117,17 @@ func tokenize(expr string) ([]string, error) { return nil, fmt.Errorf("unterminated string literal starting at position %d", start) } - i++ // 跳过结束引号 + i++ // Skip closing quote tokens = append(tokens, expr[start:i]) continue } - // 处理反引号标识符 + // Handle backtick identifiers if ch == '`' { start := i - i++ // 跳过开始反引号 + i++ // Skip opening backtick - // 寻找结束反引号 + // Find closing backtick for i < len(expr) && expr[i] != '`' { i++ } @@ -1136,12 +1136,12 @@ func tokenize(expr string) ([]string, error) { return nil, fmt.Errorf("unterminated quoted identifier starting at position %d", start) } - i++ // 跳过结束反引号 + i++ // Skip closing backtick tokens = append(tokens, expr[start:i]) continue } - // 处理标识符(字段名或函数名) + // Handle identifiers (field names or function names) if isLetter(ch) { start := i i++ @@ -1153,20 +1153,20 @@ func tokenize(expr string) ([]string, error) { continue } - // 未知字符 + // Unknown character return nil, fmt.Errorf("unexpected character: %c at position %d", ch, i) } return tokens, nil } -// parseExpression 解析表达式 +// parseExpression parses expression func parseExpression(tokens []string) (*ExprNode, error) { if len(tokens) == 0 { return nil, fmt.Errorf("empty token list") } - // 使用Shunting-yard算法处理运算符优先级 + // Use Shunting-yard algorithm to handle operator precedence output := make([]*ExprNode, 0) operators := make([]string, 0) @@ -1174,7 +1174,7 @@ func parseExpression(tokens []string) (*ExprNode, error) { for i < len(tokens) { token := tokens[i] - // 处理数字 + // Handle numbers if isNumber(token) { output = append(output, &ExprNode{ Type: TypeNumber, @@ -1184,7 +1184,7 @@ func parseExpression(tokens []string) (*ExprNode, error) { continue } - // 处理字符串字面量 + // Handle string literals if isStringLiteral(token) { output = append(output, &ExprNode{ Type: TypeString, @@ -1194,12 +1194,12 @@ func parseExpression(tokens []string) (*ExprNode, error) { continue } - // 处理字段名或函数调用 + // Handle field names or function calls if isIdentifier(token) { - // 检查是否是逻辑运算符关键字 + // Check if it's a logical operator keyword upperToken := strings.ToUpper(token) if upperToken == "AND" || upperToken == "OR" || upperToken == "NOT" || upperToken == "LIKE" { - // 处理逻辑运算符 + // Handle logical operators for len(operators) > 0 && operators[len(operators)-1] != "(" && operatorPrecedence[operators[len(operators)-1]] >= operatorPrecedence[upperToken] { op := operators[len(operators)-1] @@ -1226,9 +1226,9 @@ func parseExpression(tokens []string) (*ExprNode, error) { continue } - // 特殊处理IS运算符,需要检查后续的NOT NULL组合 + // Special handling for IS operator, need to check subsequent NOT NULL combination if upperToken == "IS" { - // 处理待处理的运算符 + // Handle pending operators for len(operators) > 0 && operators[len(operators)-1] != "(" && operatorPrecedence[operators[len(operators)-1]] >= operatorPrecedence["IS"] { op := operators[len(operators)-1] @@ -1250,11 +1250,11 @@ func parseExpression(tokens []string) (*ExprNode, error) { }) } - // 检查是否是IS NOT NULL模式 + // Check if it's IS NOT NULL pattern if i+2 < len(tokens) && strings.ToUpper(tokens[i+1]) == "NOT" && strings.ToUpper(tokens[i+2]) == "NULL" { - // 这是IS NOT NULL,创建特殊的右侧节点结构 + // This is IS NOT NULL, create special right-side node structure notNullNode := &ExprNode{ Type: TypeOperator, Value: "NOT", @@ -1266,10 +1266,10 @@ func parseExpression(tokens []string) (*ExprNode, error) { operators = append(operators, "IS") output = append(output, notNullNode) - i += 3 // 跳过IS NOT NULL三个token + i += 3 // Skip three tokens: IS NOT NULL continue } else if i+1 < len(tokens) && strings.ToUpper(tokens[i+1]) == "NULL" { - // 这是IS NULL,创建NULL节点 + // This is IS NULL, create NULL node nullNode := &ExprNode{ Type: TypeField, Value: "NULL", @@ -1277,17 +1277,17 @@ func parseExpression(tokens []string) (*ExprNode, error) { operators = append(operators, "IS") output = append(output, nullNode) - i += 2 // 跳过IS NULL两个token + i += 2 // Skip two tokens: IS NULL continue } else { - // 普通的IS运算符 + // Regular IS operator operators = append(operators, "IS") i++ continue } } - // 检查是否是CASE表达式 + // Check if it's CASE expression if strings.ToUpper(token) == "CASE" { caseNode, newIndex, err := parseCaseExpression(tokens, i) if err != nil { @@ -1298,12 +1298,12 @@ func parseExpression(tokens []string) (*ExprNode, error) { continue } - // 检查下一个token是否是左括号,如果是则为函数调用 + // Check if next token is left parenthesis, if so it's a function call if i+1 < len(tokens) && tokens[i+1] == "(" { funcName := token - i += 2 // 跳过函数名和左括号 + i += 2 // Skip function name and left parenthesis - // 解析函数参数 + // Parse function arguments args, newIndex, err := parseFunctionArgs(tokens, i) if err != nil { return nil, err @@ -1319,7 +1319,7 @@ func parseExpression(tokens []string) (*ExprNode, error) { continue } - // 普通字段 + // Regular field output = append(output, &ExprNode{ Type: TypeField, Value: token, @@ -1328,14 +1328,14 @@ func parseExpression(tokens []string) (*ExprNode, error) { continue } - // 处理左括号 + // Handle left parenthesis if token == "(" { operators = append(operators, token) i++ continue } - // 处理右括号 + // Handle right parenthesis if token == ")" { for len(operators) > 0 && operators[len(operators)-1] != "(" { op := operators[len(operators)-1] @@ -1361,12 +1361,12 @@ func parseExpression(tokens []string) (*ExprNode, error) { return nil, fmt.Errorf("mismatched parentheses") } - operators = operators[:len(operators)-1] // 弹出左括号 + operators = operators[:len(operators)-1] // Pop left parenthesis i++ continue } - // 处理运算符 + // Handle operators if isOperator(token) { for len(operators) > 0 && operators[len(operators)-1] != "(" && operatorPrecedence[operators[len(operators)-1]] >= operatorPrecedence[token] { @@ -1394,7 +1394,7 @@ func parseExpression(tokens []string) (*ExprNode, error) { continue } - // 处理逗号(在函数参数列表中处理) + // Handle comma (processed in function argument list) if token == "," { i++ continue @@ -1403,7 +1403,7 @@ func parseExpression(tokens []string) (*ExprNode, error) { return nil, fmt.Errorf("unexpected token: %s", token) } - // 处理剩余的运算符 + // Handle remaining operators for len(operators) > 0 { op := operators[len(operators)-1] operators = operators[:len(operators)-1] @@ -1435,18 +1435,18 @@ func parseExpression(tokens []string) (*ExprNode, error) { return output[0], nil } -// parseFunctionArgs 解析函数参数 +// parseFunctionArgs parses function arguments func parseFunctionArgs(tokens []string, startIndex int) ([]*ExprNode, int, error) { args := make([]*ExprNode, 0) i := startIndex - // 处理空参数列表 + // Handle empty argument list if i < len(tokens) && tokens[i] == ")" { return args, i + 1, nil } for i < len(tokens) { - // 解析参数表达式 + // Parse argument expression argTokens := make([]string, 0) parenthesesCount := 0 @@ -1495,7 +1495,7 @@ func parseFunctionArgs(tokens []string, startIndex int) ([]*ExprNode, int, error return nil, 0, fmt.Errorf("unexpected end of tokens in function arguments") } -// parseCaseExpression 解析CASE表达式 +// parseCaseExpression parses CASE expression func parseCaseExpression(tokens []string, startIndex int) (*ExprNode, int, error) { if startIndex >= len(tokens) || strings.ToUpper(tokens[startIndex]) != "CASE" { return nil, startIndex, fmt.Errorf("expected CASE keyword") diff --git a/expr/expression_test.go b/expr/expression_test.go index 34e520d..7cf45ca 100644 --- a/expr/expression_test.go +++ b/expr/expression_test.go @@ -14,7 +14,7 @@ func TestExpressionEvaluation(t *testing.T) { expected float64 hasError bool }{ - // 基本运算测试 + // Basic arithmetic tests {"Simple Addition", "a + b", map[string]interface{}{"a": 5, "b": 3}, 8, false}, {"Simple Subtraction", "a - b", map[string]interface{}{"a": 5, "b": 3}, 2, false}, {"Simple Multiplication", "a * b", map[string]interface{}{"a": 5, "b": 3}, 15, false}, @@ -22,24 +22,24 @@ func TestExpressionEvaluation(t *testing.T) { {"Modulo", "a % b", map[string]interface{}{"a": 7, "b": 4}, 3, false}, {"Power", "a ^ b", map[string]interface{}{"a": 2, "b": 3}, 8, false}, - // 复合表达式测试 + // Compound expression tests {"Complex Expression", "a + b * c", map[string]interface{}{"a": 5, "b": 3, "c": 2}, 11, false}, {"Complex Expression With Parentheses", "(a + b) * c", map[string]interface{}{"a": 5, "b": 3, "c": 2}, 16, false}, {"Multiple Operations", "a + b * c - d / e", map[string]interface{}{"a": 5, "b": 3, "c": 2, "d": 8, "e": 4}, 9, false}, - // 函数调用测试 + // Function call tests {"Abs Function", "abs(a - b)", map[string]interface{}{"a": 3, "b": 5}, 2, false}, {"Sqrt Function", "sqrt(a)", map[string]interface{}{"a": 16}, 4, false}, {"Round Function", "round(a)", map[string]interface{}{"a": 3.7}, 4, false}, - // 转换测试 + // Conversion tests {"String to Number", "a + b", map[string]interface{}{"a": "5", "b": 3}, 8, false}, - // 复杂表达式测试 + // Complex expression tests {"Temperature Conversion", "temperature * 1.8 + 32", map[string]interface{}{"temperature": 25}, 77, false}, {"Complex Math", "sqrt(abs(a * b - c / d))", map[string]interface{}{"a": 10, "b": 2, "c": 5, "d": 1}, 3.872983346207417, false}, - // 错误测试 + // Error tests {"Division by Zero", "a / b", map[string]interface{}{"a": 5, "b": 0}, 0, true}, {"Missing Field", "a + b", map[string]interface{}{"a": 5}, 0, true}, {"Invalid Function", "unknown(a)", map[string]interface{}{"a": 5}, 0, true}, @@ -61,7 +61,7 @@ func TestExpressionEvaluation(t *testing.T) { } } -// TestCaseExpressionParsing 测试CASE表达式的解析功能 +// TestCaseExpressionParsing tests CASE expression parsing functionality func TestCaseExpressionParsing(t *testing.T) { tests := []struct { name string @@ -71,105 +71,105 @@ func TestCaseExpressionParsing(t *testing.T) { wantErr bool }{ { - name: "简单的搜索CASE表达式", + name: "Simple search CASE expression", exprStr: "CASE WHEN temperature > 30 THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": 35.0}, expected: 1.0, wantErr: false, }, { - name: "简单CASE表达式 - 值匹配", + name: "Simple CASE expression - value matching", exprStr: "CASE status WHEN 'active' THEN 1 WHEN 'inactive' THEN 0 ELSE -1 END", data: map[string]interface{}{"status": "active"}, expected: 1.0, wantErr: false, }, { - name: "CASE表达式 - ELSE分支", + name: "CASE expression - ELSE branch", exprStr: "CASE WHEN temperature > 50 THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": 25.5}, expected: 0.0, wantErr: false, }, { - name: "复杂搜索CASE表达式", + name: "Complex search CASE expression", exprStr: "CASE WHEN temperature > 30 THEN 'HOT' WHEN temperature > 20 THEN 'WARM' ELSE 'COLD' END", data: map[string]interface{}{"temperature": 25.0}, - expected: 4.0, // 字符串"WARM"的长度 + expected: 4.0, // Length of string "WARM" wantErr: false, }, { - name: "数值比较的简单CASE", + name: "Simple CASE with numeric comparison", exprStr: "CASE temperature WHEN 25 THEN 1 WHEN 30 THEN 2 ELSE 0 END", data: map[string]interface{}{"temperature": 30.0}, expected: 2.0, wantErr: false, }, { - name: "布尔值CASE表达式", + name: "Boolean CASE expression", exprStr: "CASE WHEN temperature > 25 AND humidity > 50 THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": 30.0, "humidity": 60.0}, expected: 1.0, wantErr: false, }, { - name: "多条件CASE表达式_AND", + name: "Multi-condition CASE expression with AND", exprStr: "CASE WHEN temperature > 30 AND humidity < 60 THEN 1 WHEN temperature > 20 THEN 2 ELSE 0 END", data: map[string]interface{}{"temperature": 35.0, "humidity": 50.0}, expected: 1.0, wantErr: false, }, { - name: "多条件CASE表达式_OR", + name: "Multi-condition CASE expression with OR", exprStr: "CASE WHEN temperature > 40 OR humidity > 80 THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": 25.0, "humidity": 85.0}, expected: 1.0, wantErr: false, }, { - name: "函数调用在CASE中_ABS", + name: "Function call in CASE - ABS", exprStr: "CASE WHEN ABS(temperature) > 30 THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": -35.0}, expected: 1.0, wantErr: false, }, { - name: "函数调用在CASE中_ROUND", + name: "Function call in CASE - ROUND", exprStr: "CASE WHEN ROUND(temperature) = 25 THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": 24.7}, expected: 1.0, wantErr: false, }, { - name: "复杂条件组合", + name: "Complex condition combination", exprStr: "CASE WHEN temperature > 30 AND (humidity > 60 OR pressure < 1000) THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": 35.0, "humidity": 55.0, "pressure": 950.0}, expected: 1.0, wantErr: false, }, { - name: "CASE中的算术表达式", + name: "Arithmetic expression in CASE", exprStr: "CASE WHEN temperature * 1.8 + 32 > 100 THEN 1 ELSE 0 END", data: map[string]interface{}{"temperature": 40.0}, // 40*1.8+32 = 104 expected: 1.0, wantErr: false, }, { - name: "字符串函数在CASE中", + name: "String function in CASE", exprStr: "CASE WHEN LENGTH(device_name) > 5 THEN 1 ELSE 0 END", data: map[string]interface{}{"device_name": "sensor123"}, - expected: 1.0, // LENGTH函数正常工作,"sensor123"长度为9 > 5,返回1 + expected: 1.0, // LENGTH function works normally, "sensor123" length is 9 > 5, returns 1 wantErr: false, }, { - name: "简单CASE与函数", + name: "Simple CASE with function", exprStr: "CASE ABS(temperature) WHEN 30 THEN 1 WHEN 25 THEN 2 ELSE 0 END", data: map[string]interface{}{"temperature": -30.0}, expected: 1.0, wantErr: false, }, { - name: "CASE结果中的函数", + name: "Function in CASE result", exprStr: "CASE WHEN temperature > 30 THEN ABS(temperature) ELSE ROUND(temperature) END", data: map[string]interface{}{"temperature": 35.5}, expected: 35.5, @@ -188,7 +188,7 @@ func TestCaseExpressionParsing(t *testing.T) { assert.NoError(t, err, "Expression creation should not fail") assert.NotNil(t, expression, "Expression should not be nil") - // 测试表达式计算 + // Test expression evaluation result, err := expression.Evaluate(tt.data) if tt.wantErr { assert.Error(t, err) diff --git a/functions/builtin.go b/functions/builtin.go index 9f71288..66b06b2 100644 --- a/functions/builtin.go +++ b/functions/builtin.go @@ -102,7 +102,7 @@ func registerBuiltinFunctions() { _ = Register(NewChangedColFunction()) _ = Register(NewHadChangedFunction()) - // 注册窗口函数 + // Window functions _ = Register(NewWindowStartFunction()) _ = Register(NewWindowEndFunction()) diff --git a/functions/doc.go b/functions/doc.go new file mode 100644 index 0000000..514d07a --- /dev/null +++ b/functions/doc.go @@ -0,0 +1,184 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package functions provides a comprehensive function registry and execution framework for StreamSQL. + +This package implements a unified function management system that supports built-in functions, +custom user-defined functions, and specialized aggregation and analytical functions. +It serves as the central hub for all function-related operations in SQL expressions and stream processing. + +# Core Features + +• Unified Function Registry - Centralized registration and management of all function types +• Plugin Architecture - Runtime registration of custom functions without code modification +• Type System - Comprehensive function categorization and type validation +• Aggregation Support - Specialized interfaces for incremental aggregation functions +• Analytical Functions - Advanced analytical functions with state management +• Performance Optimization - Efficient function dispatch and execution +• Automatic Adaptation - Seamless integration between function types and aggregator modules + +# Function Types + +The package supports eight distinct function categories: + + TypeMath - Mathematical functions (SIN, COS, SQRT, ABS, etc.) + TypeString - String manipulation functions (UPPER, LOWER, SUBSTRING, etc.) + TypeConversion - Type conversion functions (CAST, CONVERT, TO_NUMBER, etc.) + TypeDateTime - Date and time functions (NOW, DATE_FORMAT, EXTRACT, etc.) + TypeAggregation - Aggregate functions (SUM, AVG, COUNT, MAX, MIN, etc.) + TypeAnalytical - Analytical functions (ROW_NUMBER, RANK, LAG, LEAD, etc.) + TypeWindow - Window functions (TUMBLING_WINDOW, SLIDING_WINDOW, etc.) + TypeCustom - User-defined custom functions + +# Built-in Functions + +Extensive collection of built-in functions across all categories: + + // Mathematical functions + ABS(x) - Absolute value + SQRT(x) - Square root + POWER(x, y) - Power operation + ROUND(x, d) - Round to decimal places + + // String functions + UPPER(str) - Convert to uppercase + LOWER(str) - Convert to lowercase + LENGTH(str) - String length + SUBSTRING(str, start, len) - Extract substring + + // Aggregation functions + SUM(field) - Sum of values + AVG(field) - Average of values + COUNT(*) - Count of records + MAX(field) - Maximum value + MIN(field) - Minimum value + +# Custom Function Registration + +Simple API for registering custom functions: + + // Register a simple custom function + RegisterCustomFunction( + "fahrenheit_to_celsius", + TypeConversion, + "Temperature conversion", + "Convert Fahrenheit to Celsius", + 1, 1, // min and max arguments + func(ctx *FunctionContext, args []interface{}) (interface{}, error) { + f := args[0].(float64) + return (f - 32) * 5 / 9, nil + }, + ) + + // Register an aggregation function + type CustomSumFunction struct { + *BaseFunction + sum float64 + } + + func (f *CustomSumFunction) Add(value interface{}) { + if v, ok := value.(float64); ok { + f.sum += v + } + } + + func (f *CustomSumFunction) Result() interface{} { + return f.sum + } + +# Function Interfaces + +The package defines several interfaces for different function types: + + // Basic function interface + type Function interface { + GetName() string + GetType() FunctionType + Execute(ctx *FunctionContext, args []interface{}) (interface{}, error) + } + + // Aggregation function interface + type AggregatorFunction interface { + Function + New() AggregatorFunction + Add(value interface{}) + Result() interface{} + Reset() + Clone() AggregatorFunction + } + + // Analytical function interface + type AnalyticalFunction interface { + AggregatorFunction + } + +# Adapter System + +Automatic adaptation between function types and aggregator modules: + + // AggregatorAdapter - Adapts functions to aggregator interface + type AggregatorAdapter struct { + function AggregatorFunction + } + + // AnalyticalAdapter - Adapts analytical functions + type AnalyticalAdapter struct { + function AnalyticalFunction + } + +# Performance Features + +• Function Caching - Efficient function lookup and caching +• Lazy Initialization - Functions are initialized only when needed +• Batch Processing - Optimized batch execution for aggregation functions +• Memory Management - Automatic cleanup and resource management +• Type Optimization - Specialized execution paths for common data types + +# Usage Examples + +Basic function usage in SQL: + + SELECT UPPER(device_name), ROUND(temperature, 2) + FROM stream + WHERE ABS(temperature - 25) > 5 + +Aggregation functions with windows: + + SELECT device_id, + AVG(temperature) as avg_temp, + STDDEV(temperature) as temp_variance + FROM stream + GROUP BY device_id, TumblingWindow('5s') + +Custom function in expressions: + + SELECT device_id, + fahrenheit_to_celsius(temperature) as temp_celsius + FROM stream + WHERE fahrenheit_to_celsius(temperature) > 30 + +# Integration + +Seamless integration with other StreamSQL components: + +• Expr package - Function execution in expressions +• Aggregator package - Automatic function adaptation +• RSQL package - Function parsing and validation +• Stream package - Real-time function execution +• Types package - Function context and data type support +*/ +package functions \ No newline at end of file diff --git a/logger/logger.go b/logger/logger.go index 4f00def..396a211 100644 --- a/logger/logger.go +++ b/logger/logger.go @@ -14,8 +14,8 @@ * limitations under the License. */ -// Package logger 提供StreamSQL的日志记录功能。 -// 支持不同日志级别和可配置的日志输出后端。 +// Package logger provides logging functionality for StreamSQL. +// Supports different log levels and configurable log output backends. package logger import ( @@ -26,23 +26,23 @@ import ( "time" ) -// Level 定义日志级别 +// Level defines log levels type Level int const ( - // DEBUG 调试级别,显示详细的调试信息 + // DEBUG debug level, displays detailed debug information DEBUG Level = iota - // INFO 信息级别,显示一般信息 + // INFO info level, displays general information INFO - // WARN 警告级别,显示警告信息 + // WARN warning level, displays warning information WARN - // ERROR 错误级别,仅显示错误信息 + // ERROR error level, only displays error information ERROR - // OFF 关闭日志 + // OFF disables logging OFF ) -// String 返回日志级别的字符串表示 +// String returns string representation of log level func (l Level) String() string { switch l { case DEBUG: @@ -60,38 +60,38 @@ func (l Level) String() string { } } -// Logger 接口定义了日志记录的基本方法 +// Logger interface defines basic methods for logging type Logger interface { - // Debug 记录调试级别的日志 + // Debug records debug level logs Debug(format string, args ...interface{}) - // Info 记录信息级别的日志 + // Info records info level logs Info(format string, args ...interface{}) - // Warn 记录警告级别的日志 + // Warn records warning level logs Warn(format string, args ...interface{}) - // Error 记录错误级别的日志 + // Error records error level logs Error(format string, args ...interface{}) - // SetLevel 设置日志级别 + // SetLevel sets the log level SetLevel(level Level) } -// defaultLogger 是默认的日志实现 +// defaultLogger is the default log implementation type defaultLogger struct { level Level logger *log.Logger } -// NewLogger 创建一个新的日志记录器 -// 参数: -// - level: 日志级别 -// - output: 输出目标,如os.Stdout、os.Stderr或文件 +// NewLogger creates a new logger +// Parameters: +// - level: log level +// - output: output destination, such as os.Stdout, os.Stderr, or file // -// 返回值: -// - Logger: 日志记录器实例 +// Returns: +// - Logger: logger instance // -// 示例: +// Example: // // logger := NewLogger(INFO, os.Stdout) -// logger.Info("应用程序启动") +// logger.Info("Application started") func NewLogger(level Level, output io.Writer) Logger { return &defaultLogger{ level: level, @@ -132,7 +132,7 @@ func (l *defaultLogger) SetLevel(level Level) { l.level = level } -// log 内部日志记录方法,格式化输出日志信息 +// log internal logging method, formats and outputs log information func (l *defaultLogger) log(level Level, format string, args ...interface{}) { if l.level == OFF { return @@ -144,11 +144,11 @@ func (l *defaultLogger) log(level Level, format string, args ...interface{}) { l.logger.Println(logLine) } -// discardLogger 是一个丢弃所有日志输出的记录器 +// discardLogger is a logger that discards all log output type discardLogger struct{} -// NewDiscardLogger 创建一个丢弃所有日志的记录器 -// 用于在不需要日志输出的场景中使用 +// NewDiscardLogger creates a logger that discards all logs +// Used in scenarios where log output is not needed func NewDiscardLogger() Logger { return &discardLogger{} } @@ -159,37 +159,37 @@ func (d *discardLogger) Warn(format string, args ...interface{}) {} func (d *discardLogger) Error(format string, args ...interface{}) {} func (d *discardLogger) SetLevel(level Level) {} -// 全局默认日志记录器 +// Global default logger var defaultInstance Logger = NewLogger(INFO, os.Stdout) -// SetDefault 设置全局默认日志记录器 +// SetDefault sets the global default logger func SetDefault(logger Logger) { defaultInstance = logger } -// GetDefault 获取全局默认日志记录器 +// GetDefault gets the global default logger func GetDefault() Logger { return defaultInstance } // 便捷的全局日志方法 -// Debug 使用默认日志记录器记录调试信息 +// Debug uses the default logger to record debug information func Debug(format string, args ...interface{}) { defaultInstance.Debug(format, args...) } -// Info 使用默认日志记录器记录信息 +// Info uses the default logger to record information func Info(format string, args ...interface{}) { defaultInstance.Info(format, args...) } -// Warn 使用默认日志记录器记录警告 +// Warn uses the default logger to record warnings func Warn(format string, args ...interface{}) { defaultInstance.Warn(format, args...) } -// Error 使用默认日志记录器记录错误 +// Error uses the default logger to record errors func Error(format string, args ...interface{}) { defaultInstance.Error(format, args...) } diff --git a/options.go b/options.go index 9881b27..1dd3408 100644 --- a/options.go +++ b/options.go @@ -23,48 +23,48 @@ import ( "github.com/rulego/streamsql/types" ) -// Option 定义StreamSQL的配置选项类型 +// Option defines the configuration option type for StreamSQL type Option func(*Streamsql) -// WithLogLevel 设置日志级别 +// WithLogLevel sets the log level func WithLogLevel(level logger.Level) Option { return func(s *Streamsql) { logger.GetDefault().SetLevel(level) } } -// WithDiscardLog 禁用日志输出 +// WithDiscardLog disables log output func WithDiscardLog() Option { return func(s *Streamsql) { logger.SetDefault(logger.NewDiscardLogger()) } } -// WithHighPerformance 使用高性能配置 -// 适用于需要最大吞吐量的场景 +// WithHighPerformance uses high-performance configuration +// Suitable for scenarios requiring maximum throughput func WithHighPerformance() Option { return func(s *Streamsql) { s.performanceMode = "high_performance" } } -// WithLowLatency 使用低延迟配置 -// 适用于实时交互应用,最小化延迟 +// WithLowLatency uses low-latency configuration +// Suitable for real-time interactive applications, minimizing latency func WithLowLatency() Option { return func(s *Streamsql) { s.performanceMode = "low_latency" } } -// WithZeroDataLoss 使用零数据丢失配置 -// 适用于关键业务数据,保证数据不丢失 +// WithZeroDataLoss uses zero data loss configuration +// Suitable for critical business data, ensuring no data loss func WithZeroDataLoss() Option { return func(s *Streamsql) { s.performanceMode = "zero_data_loss" } } -// WithCustomPerformance 使用自定义性能配置 +// WithCustomPerformance uses custom performance configuration func WithCustomPerformance(config types.PerformanceConfig) Option { return func(s *Streamsql) { s.performanceMode = "custom" @@ -72,7 +72,7 @@ func WithCustomPerformance(config types.PerformanceConfig) Option { } } -// WithPersistence 使用持久化配置预设 +// WithPersistence uses persistence configuration preset func WithPersistence() Option { return func(s *Streamsql) { s.performanceMode = "custom" @@ -81,7 +81,7 @@ func WithPersistence() Option { } } -// WithCustomPersistence 使用自定义持久化配置 +// WithCustomPersistence uses custom persistence configuration func WithCustomPersistence(dataDir string, maxFileSize int64, flushInterval time.Duration) Option { return func(s *Streamsql) { s.performanceMode = "custom" @@ -98,7 +98,7 @@ func WithCustomPersistence(dataDir string, maxFileSize int64, flushInterval time } } -// WithBufferSizes 设置自定义缓冲区大小 +// WithBufferSizes sets custom buffer sizes func WithBufferSizes(dataChannelSize, resultChannelSize, windowOutputSize int) Option { return func(s *Streamsql) { s.performanceMode = "custom" @@ -110,7 +110,7 @@ func WithBufferSizes(dataChannelSize, resultChannelSize, windowOutputSize int) O } } -// WithOverflowStrategy 设置溢出策略 +// WithOverflowStrategy sets the overflow strategy func WithOverflowStrategy(strategy string, blockTimeout time.Duration) Option { return func(s *Streamsql) { s.performanceMode = "custom" @@ -122,7 +122,7 @@ func WithOverflowStrategy(strategy string, blockTimeout time.Duration) Option { } } -// WithWorkerConfig 设置工作池配置 +// WithWorkerConfig sets the worker pool configuration func WithWorkerConfig(sinkPoolSize, sinkWorkerCount, maxRetryRoutines int) Option { return func(s *Streamsql) { s.performanceMode = "custom" @@ -134,7 +134,7 @@ func WithWorkerConfig(sinkPoolSize, sinkWorkerCount, maxRetryRoutines int) Optio } } -// WithMonitoring 启用详细监控 +// WithMonitoring enables detailed monitoring func WithMonitoring(updateInterval time.Duration, enableDetailedStats bool) Option { return func(s *Streamsql) { s.performanceMode = "custom" diff --git a/rsql/ast.go b/rsql/ast.go index 9a16984..18fc05f 100644 --- a/rsql/ast.go +++ b/rsql/ast.go @@ -17,7 +17,7 @@ import ( type SelectStatement struct { Fields []Field Distinct bool - SelectAll bool // 新增:标识是否是SELECT *查询 + SelectAll bool // Flag to indicate if this is a SELECT * query Source string Condition string Window WindowDefinition @@ -39,7 +39,7 @@ type WindowDefinition struct { TimeUnit time.Duration } -// ToStreamConfig 将AST转换为Stream配置 +// ToStreamConfig converts AST to Stream configuration func (s *SelectStatement) ToStreamConfig() (*types.Config, string, error) { if s.Source == "" { return nil, "", fmt.Errorf("missing FROM clause") @@ -59,14 +59,14 @@ func (s *SelectStatement) ToStreamConfig() (*types.Config, string, error) { params, err := parseWindowParamsWithType(s.Window.Params, windowType) if err != nil { - return nil, "", fmt.Errorf("解析窗口参数失败: %w", err) + return nil, "", fmt.Errorf("failed to parse window parameters: %w", err) } - // 检查是否需要窗口处理 + // Check if window processing is needed needWindow := s.Window.Type != "" var simpleFields []string - // 检查是否有聚合函数 + // Check if there are aggregation functions hasAggregation := false for _, field := range s.Fields { if isAggregationFunction(field.Expression) { @@ -75,53 +75,53 @@ func (s *SelectStatement) ToStreamConfig() (*types.Config, string, error) { } } - // 如果没有指定窗口但有聚合函数,默认使用滚动窗口 + // If no window is specified but has aggregation functions, use tumbling window by default if !needWindow && hasAggregation { needWindow = true windowType = window.TypeTumbling params = map[string]interface{}{ - "size": 10 * time.Second, // 默认10秒窗口 + "size": 10 * time.Second, // Default 10-second window } } - // 处理 SessionWindow 的特殊配置 + // Handle special configuration for SessionWindow var groupByKey string if windowType == window.TypeSession && len(s.GroupBy) > 0 { - // 对于会话窗口,使用第一个 GROUP BY 字段作为会话键 + // For session window, use the first GROUP BY field as session key groupByKey = s.GroupBy[0] } - // 如果没有聚合函数,收集简单字段 + // If no aggregation functions, collect simple fields if !hasAggregation { - // 如果是SELECT *查询,设置特殊标记 + // If SELECT * query, set special marker if s.SelectAll { simpleFields = append(simpleFields, "*") } else { for _, field := range s.Fields { fieldName := field.Expression if field.Alias != "" { - // 如果有别名,用别名作为字段名 + // If has alias, use alias as field name simpleFields = append(simpleFields, fieldName+":"+field.Alias) } else { - // 对于没有别名的字段,检查是否为字符串字面量 + // For fields without alias, check if it's a string literal _, n, _, _ := ParseAggregateTypeWithExpression(fieldName) if n != "" { - // 如果是字符串字面量,使用解析出的字段名(去掉引号) + // If string literal, use parsed field name (remove quotes) simpleFields = append(simpleFields, n) } else { - // 否则使用原始表达式 + // Otherwise use original expression simpleFields = append(simpleFields, fieldName) } } } } - logger.Debug("收集简单字段: %v", simpleFields) + logger.Debug("Collected simple fields: %v", simpleFields) } - // 构建字段映射和表达式信息 + // Build field mapping and expression information aggs, fields, expressions := buildSelectFieldsWithExpressions(s.Fields) - // 提取字段顺序信息 + // Extract field order information fieldOrder := extractFieldOrder(s.Fields) // 构建Stream配置 @@ -148,65 +148,65 @@ func (s *SelectStatement) ToStreamConfig() (*types.Config, string, error) { return &config, s.Condition, nil } -// 判断表达式是否是聚合函数 +// Check if expression is an aggregation function func isAggregationFunction(expr string) bool { - // 提取函数名 + // Extract function name funcName := extractFunctionName(expr) if funcName == "" { return false } - // 检查是否是注册的函数 + // Check if it's a registered function if fn, exists := functions.Get(funcName); exists { // 根据函数类型判断是否需要聚合处理 switch fn.GetType() { case functions.TypeAggregation: - // 聚合函数需要聚合处理 + // Aggregation function needs aggregation processing return true case functions.TypeAnalytical: - // 分析函数也需要聚合处理(状态管理) + // Analytical function also needs aggregation processing (state management) return true case functions.TypeWindow: - // 窗口函数需要聚合处理 + // Window function needs aggregation processing return true default: - // 其他类型的函数(字符串、转换等)不需要聚合处理 + // Other types of functions (string, conversion, etc.) don't need aggregation processing return false } } - // 对于未注册的函数,检查是否是expr-lang内置函数 - // 这些函数通过ExprBridge处理,不需要聚合模式 + // For unregistered functions, check if it's expr-lang built-in function + // These functions are handled through ExprBridge, don't need aggregation mode bridge := functions.GetExprBridge() if bridge.IsExprLangFunction(funcName) { return false } - // 如果不是注册的函数也不是expr-lang函数,但包含括号,保守起见认为可能是聚合函数 + // If not registered function and not expr-lang function, but contains parentheses, conservatively assume it might be aggregation function if strings.Contains(expr, "(") && strings.Contains(expr, ")") { return true } return false } -// extractFieldOrder 从Fields切片中提取字段的原始顺序 -// 返回按SELECT语句中出现顺序排列的字段名列表 +// extractFieldOrder extracts original order of fields from Fields slice +// Returns field names list in order of appearance in SELECT statement func extractFieldOrder(fields []Field) []string { var fieldOrder []string for _, field := range fields { - // 如果有别名,使用别名作为字段名 + // If has alias, use alias as field name if field.Alias != "" { fieldOrder = append(fieldOrder, field.Alias) } else { - // 没有别名时,尝试解析表达式获取字段名 + // Without alias, try to parse expression to get field name _, fieldName, _, _ := ParseAggregateTypeWithExpression(field.Expression) if fieldName != "" { - // 如果解析出字段名(如字符串字面量),使用解析出的名称 + // If parsed field name (like string literal), use parsed name fieldOrder = append(fieldOrder, fieldName) } else { - // 否则使用原始表达式作为字段名 + // Otherwise use original expression as field name fieldOrder = append(fieldOrder, field.Expression) } } @@ -217,7 +217,7 @@ func extractFieldOrder(fields []Field) []string { func extractGroupFields(s *SelectStatement) []string { var fields []string for _, f := range s.GroupBy { - if !strings.Contains(f, "(") { // 排除聚合函数 + if !strings.Contains(f, "(") { // Exclude aggregation functions fields = append(fields, f) } } @@ -232,19 +232,19 @@ func buildSelectFields(fields []Field) (aggMap map[string]aggregator.AggregateTy if alias := f.Alias; alias != "" { t, n, _, _ := ParseAggregateTypeWithExpression(f.Expression) if t != "" { - // 使用别名作为聚合器的key,而不是字段名 + // Use alias as key for aggregator, not field name selectFields[alias] = t - // 字段映射:输出字段名(别名) -> 输入字段名(保持与buildSelectFieldsWithExpressions一致) + // Field mapping: output field name(alias) -> input field name (consistent with buildSelectFieldsWithExpressions) if n != "" { fieldMap[alias] = n } else { - // 如果没有提取到字段名,使用别名本身 + // If no field name extracted, use alias itself fieldMap[alias] = alias } } } else { - // 没有别名的情况,使用表达式本身作为字段名 + // Without alias, use expression itself as field name t, n, _, _ := ParseAggregateTypeWithExpression(f.Expression) if t != "" && n != "" { selectFields[n] = t @@ -255,23 +255,23 @@ func buildSelectFields(fields []Field) (aggMap map[string]aggregator.AggregateTy return selectFields, fieldMap } -// 解析聚合函数,并返回表达式信息 +// Parse aggregation function and return expression information func ParseAggregateTypeWithExpression(exprStr string) (aggType aggregator.AggregateType, name string, expression string, allFields []string) { - // 特殊处理 CASE 表达式 + // Special handling for CASE expressions if strings.HasPrefix(strings.ToUpper(strings.TrimSpace(exprStr)), "CASE") { - // CASE 表达式作为特殊的表达式处理 + // CASE expressions are handled as special expressions if parsedExpr, err := expr.NewExpression(exprStr); err == nil { allFields = parsedExpr.GetFields() } return "expression", "", exprStr, allFields } - // 检查是否是嵌套函数 + // Check if it's nested functions if hasNestedFunctions(exprStr) { - // 嵌套函数情况,提取所有函数 + // Nested function case, extract all functions funcs := extractAllFunctions(exprStr) - // 查找聚合函数 + // Find aggregation functions var aggregationFunc string for _, funcName := range funcs { if fn, exists := functions.Get(funcName); exists { @@ -284,13 +284,13 @@ func ParseAggregateTypeWithExpression(exprStr string) (aggType aggregator.Aggreg } if aggregationFunc != "" { - // 有聚合函数的嵌套表达式,整个表达式作为expression处理 + // Nested expression with aggregation function, handle entire expression as expression if parsedExpr, err := expr.NewExpression(exprStr); err == nil { allFields = parsedExpr.GetFields() } return aggregator.AggregateType(aggregationFunc), "", exprStr, allFields } else { - // 没有聚合函数的嵌套表达式,作为普通表达式处理 + // Nested expression without aggregation function, handle as regular expression if parsedExpr, err := expr.NewExpression(exprStr); err == nil { allFields = parsedExpr.GetFields() } @@ -298,8 +298,8 @@ func ParseAggregateTypeWithExpression(exprStr string) (aggType aggregator.Aggreg } } - // 单一函数的原有逻辑 - // 提取函数名 + // Original logic for single function + // Extract function name funcName := extractFunctionName(exprStr) if funcName == "" { // 检查是否是字符串字面量 @@ -324,33 +324,33 @@ func ParseAggregateTypeWithExpression(exprStr string) (aggType aggregator.Aggreg return "", "", "", nil } - // 检查是否是注册的函数 + // Check if it's a registered function fn, exists := functions.Get(funcName) if !exists { return "", "", "", nil } - // 提取函数参数和表达式信息 + // Extract function parameters and expression information name, expression, allFields = extractAggFieldWithExpression(exprStr, funcName) - // 根据函数类型决定聚合类型 + // Determine aggregation type based on function type switch fn.GetType() { case functions.TypeAggregation: - // 聚合函数:使用函数名作为聚合类型 + // Aggregation function: use function name as aggregation type return aggregator.AggregateType(funcName), name, expression, allFields case functions.TypeAnalytical: - // 分析函数:使用函数名作为聚合类型 + // Analytical function: use function name as aggregation type return aggregator.AggregateType(funcName), name, expression, allFields case functions.TypeWindow: - // 窗口函数:使用函数名作为聚合类型 + // Window function: use function name as aggregation type return aggregator.AggregateType(funcName), name, expression, allFields case functions.TypeString, functions.TypeConversion, functions.TypeCustom, functions.TypeMath: - // 字符串函数、转换函数、自定义函数、数学函数:在聚合查询中作为表达式处理 - // 使用 "expression" 作为特殊的聚合类型,表示这是一个表达式计算 - // 对于这些函数,应该保存完整的函数调用作为表达式,而不是只保存参数部分 + // String, conversion, custom, math functions: handle as expressions in aggregation queries + // Use "expression" as special aggregation type, indicating this is an expression calculation + // For these functions, should save complete function call as expression, not just parameter part fullExpression := exprStr if parsedExpr, err := expr.NewExpression(fullExpression); err == nil { allFields = parsedExpr.GetFields() @@ -364,18 +364,18 @@ func ParseAggregateTypeWithExpression(exprStr string) (aggType aggregator.Aggreg } } -// extractFunctionName 从表达式中提取函数名 +// extractFunctionName extracts function name from expression func extractFunctionName(expr string) string { - // 查找第一个左括号 + // Find first left parenthesis parenIndex := strings.Index(expr, "(") if parenIndex == -1 { return "" } - // 提取函数名部分 + // Extract function name part funcName := strings.TrimSpace(expr[:parenIndex]) - // 如果函数名包含其他运算符或空格,说明不是简单的函数调用 + // If function name contains other operators or spaces, it's not a simple function call if strings.ContainsAny(funcName, " +-*/=<>!&|") { return "" } @@ -383,21 +383,21 @@ func extractFunctionName(expr string) string { return funcName } -// 提取表达式中的所有函数名 +// Extract all function names from expression func extractAllFunctions(expr string) []string { var funcNames []string // 简单的函数名匹配 i := 0 for i < len(expr) { - // 查找函数名模式 + // Find function name pattern start := i for i < len(expr) && (expr[i] >= 'a' && expr[i] <= 'z' || expr[i] >= 'A' && expr[i] <= 'Z' || expr[i] == '_') { i++ } if i < len(expr) && expr[i] == '(' && i > start { - // 找到可能的函数名 + // Found possible function name funcName := expr[start:i] if _, exists := functions.Get(funcName); exists { funcNames = append(funcNames, funcName) @@ -412,13 +412,13 @@ func extractAllFunctions(expr string) []string { return funcNames } -// 检查表达式是否包含嵌套函数 +// Check if expression contains nested functions func hasNestedFunctions(expr string) bool { funcs := extractAllFunctions(expr) return len(funcs) > 1 } -// 提取聚合函数字段,并解析表达式信息 +// Extract aggregation function fields and parse expression information func extractAggFieldWithExpression(exprStr string, funcName string) (fieldName string, expression string, allFields []string) { start := strings.Index(strings.ToLower(exprStr), strings.ToLower(funcName)+"(") if start < 0 { @@ -434,12 +434,12 @@ func extractAggFieldWithExpression(exprStr string, funcName string) (fieldName s // 提取括号内的表达式 fieldExpr := strings.TrimSpace(exprStr[start:end]) - // 特殊处理count(*)的情况 + // Special handling for count(*) case if strings.ToLower(funcName) == "count" && fieldExpr == "*" { return "*", "", nil } - // 检查是否是简单字段名(只包含字母、数字、下划线) + // Check if it's a simple field name (only letters, numbers, underscores) isSimpleField := true for _, char := range fieldExpr { if !((char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || @@ -449,22 +449,22 @@ func extractAggFieldWithExpression(exprStr string, funcName string) (fieldName s } } - // 如果是简单字段,直接返回字段名,不创建表达式 + // If simple field, return field name directly, don't create expression if isSimpleField { return fieldExpr, "", nil } - // 对于复杂表达式,包括多参数函数调用 + // For complex expressions, including multi-parameter function calls expression = fieldExpr - // 对于CONCAT等字符串函数,直接保存完整表达式 + // For string functions like CONCAT, save complete expression directly if strings.ToLower(funcName) == "concat" { - // 智能解析CONCAT函数的参数来提取字段名 + // Intelligently parse CONCAT function parameters to extract field names var fields []string params := parseSmartParameters(fieldExpr) for _, param := range params { param = strings.TrimSpace(param) - // 如果参数不是字符串常量(不被引号包围),则认为是字段名 + // If parameter is not string constant (not surrounded by quotes), consider it as field name if !((strings.HasPrefix(param, "'") && strings.HasSuffix(param, "'")) || (strings.HasPrefix(param, "\"") && strings.HasSuffix(param, "\""))) { if isIdentifier(param) { @@ -476,7 +476,7 @@ func extractAggFieldWithExpression(exprStr string, funcName string) (fieldName s // 对于CONCAT函数,保存完整的函数调用作为表达式 return fields[0], funcName + "(" + fieldExpr + ")", fields } - // 如果没有找到字段,返回空字段名但保留表达式 + // If no field found, return empty field name but keep expression return "", funcName + "(" + fieldExpr + ")", nil } @@ -496,36 +496,36 @@ func extractAggFieldWithExpression(exprStr string, funcName string) (fieldName s } } if len(fields) > 0 { - // 对于多参数函数,使用所有参数字段,主字段名为第一个参数 + // For multi-parameter functions, use all parameter fields, main field name is first parameter return fields[0], expression, fields } } - // 如果还是解析失败,尝试使用简单方法提取 + // If still fails to parse, try simple extraction method fieldName = extractSimpleField(fieldExpr) return fieldName, expression, []string{fieldName} } - // 获取表达式中引用的所有字段 + // Get all fields referenced in expression allFields = parsedExpr.GetFields() - // 如果只有一个字段,直接返回 + // If only one field, return directly if len(allFields) == 1 { return allFields[0], expression, allFields } - // 如果有多个字段,使用第一个字段名作为主字段 + // If multiple fields, use first field name as main field if len(allFields) > 0 { - // 记录完整表达式和所有字段 + // Record complete expression and all fields logger.Debug("复杂表达式 '%s' 包含多个字段: %v", fieldExpr, allFields) return allFields[0], expression, allFields } - // 如果没有字段(纯常量表达式),返回整个表达式作为字段名 + // If no fields (pure constant expression), return entire expression as field name return fieldExpr, expression, nil } -// parseSmartParameters 智能解析函数参数,正确处理引号内的逗号 +// parseSmartParameters intelligently parses function parameters, correctly handles commas within quotes func parseSmartParameters(paramsStr string) []string { var params []string var current strings.Builder @@ -564,16 +564,16 @@ func parseSmartParameters(paramsStr string) []string { return params } -// isIdentifier 检查字符串是否是有效的标识符 +// isIdentifier checks if string is a valid identifier func isIdentifier(s string) bool { if len(s) == 0 { return false } - // 第一个字符必须是字母或下划线 + // First character must be letter or underscore if !((s[0] >= 'a' && s[0] <= 'z') || (s[0] >= 'A' && s[0] <= 'Z') || s[0] == '_') { return false } - // 其余字符必须是字母、数字或下划线 + // Remaining characters must be letters, numbers, or underscores for i := 1; i < len(s); i++ { char := s[i] if !((char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || @@ -584,9 +584,9 @@ func isIdentifier(s string) bool { return true } -// 提取简单字段(向后兼容) +// extractSimpleField for backward compatibility func extractSimpleField(fieldExpr string) string { - // 如果包含运算符,提取第一个操作数作为字段名 + // If contains operators, extract first operand as field name for _, op := range []string{"/", "*", "+", "-"} { if opIndex := strings.Index(fieldExpr, op); opIndex > 0 { return strings.TrimSpace(fieldExpr[:opIndex]) @@ -604,14 +604,14 @@ func parseWindowParamsWithType(params []interface{}, windowType string) (map[str var key string for index, v := range params { if windowType == window.TypeSession { - // SessionWindow 的第一个参数是 timeout + // First parameter for SessionWindow is timeout if index == 0 { key = "timeout" } else { key = fmt.Sprintf("param%d", index) } } else { - // 其他窗口类型的参数 + // Parameters for other window types if index == 0 { key = "size" } else if index == 1 { @@ -627,7 +627,7 @@ func parseWindowParamsWithType(params []interface{}, windowType string) (map[str } result[key] = dur } else { - return nil, fmt.Errorf("%s参数必须为字符串格式(如'5s')", s) + return nil, fmt.Errorf("%s parameter must be string format (like '5s')", s) } } @@ -650,7 +650,7 @@ func parseAggregateExpression(expr string) string { return "" } -// 解析包括表达式在内的字段信息 +// Parse field information including expressions func buildSelectFieldsWithExpressions(fields []Field) ( aggMap map[string]aggregator.AggregateType, fieldMap map[string]string, @@ -664,14 +664,14 @@ func buildSelectFieldsWithExpressions(fields []Field) ( if alias := f.Alias; alias != "" { t, n, expression, allFields := ParseAggregateTypeWithExpression(f.Expression) if t != "" { - // 使用别名作为键,这样每个聚合函数都有唯一的键 + // Use alias as key so each aggregation function has unique key selectFields[alias] = t - // 字段映射:输出字段名 -> 输入字段名(直接为聚合器准备正确的映射) + // Field mapping: output field name -> input field name (prepare correct mapping for aggregator) if n != "" { fieldMap[alias] = n } else { - // 如果没有提取到字段名,使用别名本身 + // If no field name extracted, use alias itself fieldMap[alias] = alias } @@ -685,10 +685,10 @@ func buildSelectFieldsWithExpressions(fields []Field) ( } } } else { - // 没有别名的情况,使用表达式本身作为字段名 + // Without alias, use expression itself as field name t, n, expression, allFields := ParseAggregateTypeWithExpression(f.Expression) if t != "" && n != "" { - // 对于字符串字面量,使用解析出的字段名(去掉引号)作为键 + // For string literals, use parsed field name (remove quotes) as key selectFields[n] = t fieldMap[n] = n diff --git a/rsql/doc.go b/rsql/doc.go new file mode 100644 index 0000000..f56ebdf --- /dev/null +++ b/rsql/doc.go @@ -0,0 +1,214 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package rsql provides SQL parsing and analysis capabilities for StreamSQL. + +This package implements a comprehensive SQL parser specifically designed for stream processing, +supporting standard SQL syntax with extensions for window functions and streaming operations. +It transforms SQL queries into executable stream processing configurations. + +# Core Features + +• Complete SQL Parser - Full support for SELECT, FROM, WHERE, GROUP BY, HAVING, ORDER BY, LIMIT +• Window Function Support - Native parsing of tumbling, sliding, counting, and session windows +• Expression Analysis - Deep parsing of complex expressions, functions, and field references +• Error Recovery - Advanced error detection and recovery with detailed error reporting +• Function Validation - Integration with function registry for syntax and semantic validation +• AST Generation - Abstract Syntax Tree generation for query optimization +• Stream-Specific Extensions - Custom syntax for streaming operations and window management + +# Supported SQL Syntax + +Standard SQL clauses with streaming extensions: + + // Basic SELECT statement + SELECT field1, field2, AGG_FUNC(field3) + FROM stream + WHERE condition + GROUP BY field1, WindowFunction('params') + HAVING aggregate_condition + ORDER BY field1 ASC, field2 DESC + LIMIT 100 + + // Window functions + TumblingWindow('5s') - Non-overlapping time windows + SlidingWindow('30s', '10s') - Overlapping time windows + CountingWindow(100) - Count-based windows + SessionWindow('5m') - Session-based windows + +# Lexical Analysis + +Advanced tokenization with comprehensive token types: + + // Token types + TOKEN_SELECT, TOKEN_FROM, TOKEN_WHERE - SQL keywords + TOKEN_IDENTIFIER, TOKEN_STRING - Identifiers and literals + TOKEN_NUMBER, TOKEN_FLOAT - Numeric literals + TOKEN_OPERATOR, TOKEN_COMPARISON - Operators + TOKEN_FUNCTION, TOKEN_WINDOW - Function calls + TOKEN_LPAREN, TOKEN_RPAREN - Parentheses + TOKEN_COMMA, TOKEN_SEMICOLON - Delimiters + +# Parser Architecture + +Recursive descent parser with error recovery: + + type Parser struct { + lexer *Lexer + errorRecovery *ErrorRecovery + currentToken Token + input string + } + + // Main parsing entry point + func (p *Parser) Parse() (*SelectStatement, error) + + // Clause-specific parsers + func (p *Parser) parseSelect(stmt *SelectStatement) error + func (p *Parser) parseFrom(stmt *SelectStatement) error + func (p *Parser) parseWhere(stmt *SelectStatement) error + func (p *Parser) parseGroupBy(stmt *SelectStatement) error + +# Error Handling + +Comprehensive error detection and recovery: + + // Error types + type ParseError struct { + Message string + Position int + Line int + Column int + Context string + ErrorType ErrorType + } + + // Error recovery strategies + type ErrorRecovery struct { + errors []*ParseError + parser *Parser + strategies []RecoveryStrategy + } + +# Function Validation + +Integration with function registry for validation: + + // Function validator + type FunctionValidator struct { + functionRegistry map[string]FunctionInfo + } + + // Validation methods + func (fv *FunctionValidator) ValidateFunction(name string, args []Expression) error + func (fv *FunctionValidator) ValidateAggregateFunction(name string, context AggregateContext) error + func (fv *FunctionValidator) ValidateWindowFunction(name string, params []Parameter) error + +# AST Structure + +StreamSQL AST representation: + + type SelectStatement struct { + Fields []Field + Distinct bool + SelectAll bool + Source string + Condition string + Window WindowDefinition + GroupBy []string + Limit int + Having string + } + + type Field struct { + Expression string + Alias string + AggType string + } + + type WindowDefinition struct { + Type string + Params []interface{} + TsProp string + TimeUnit time.Duration + } + +# Usage Examples + +Basic SQL parsing: + + parser := NewParser("SELECT AVG(temperature) FROM stream WHERE device_id = 'sensor1'") + stmt, err := parser.Parse() + if err != nil { + log.Fatal(err) + } + + // Convert to stream configuration + config, condition, err := stmt.ToStreamConfig() + +Window function parsing: + + sql := `SELECT device_id, AVG(temperature) + FROM stream + GROUP BY device_id, TumblingWindow('5s')` + config, condition, err := Parse(sql) + +Complex query with multiple clauses: + + sql := `SELECT device_id, + AVG(temperature) as avg_temp, + MAX(humidity) as max_humidity + FROM stream + WHERE device_id LIKE 'sensor%' + GROUP BY device_id, SlidingWindow('1m', '30s') + HAVING avg_temp > 25 + ORDER BY avg_temp DESC + LIMIT 10` + config, condition, err := Parse(sql) + +# Configuration Generation + +Transformation from AST to stream processing configuration: + + type Config struct { + WindowConfig WindowConfig + GroupFields []string + SelectFields map[string]AggregateType + FieldAlias map[string]string + SimpleFields []string + FieldExpressions map[string]FieldExpression + FieldOrder []string + Where string + Having string + NeedWindow bool + Distinct bool + Limit int + } + + + +# Integration + +Seamless integration with other StreamSQL components: + +• Functions package - Function validation and registry integration +• Expr package - Expression parsing and evaluation +• Types package - Configuration and data type definitions +• Stream package - Configuration application and execution +• Window package - Window function parsing and configuration +*/ +package rsql \ No newline at end of file diff --git a/rsql/error.go b/rsql/error.go index ce93299..7b41ab9 100644 --- a/rsql/error.go +++ b/rsql/error.go @@ -5,7 +5,7 @@ import ( "strings" ) -// ErrorType 定义错误类型 +// ErrorType defines error types type ErrorType int const ( @@ -22,7 +22,7 @@ const ( ErrorTypeUnknownFunction ) -// ParseError 增强的解析错误结构 +// ParseError enhanced parsing error structure type ParseError struct { Type ErrorType Message string @@ -36,36 +36,36 @@ type ParseError struct { Recoverable bool } -// Error 实现 error 接口 +// Error implements the error interface func (e *ParseError) Error() string { var builder strings.Builder - // 基本错误信息 + // Basic error information builder.WriteString(fmt.Sprintf("[%s] %s", e.getErrorTypeName(), e.Message)) - // 位置信息 + // Position information if e.Line > 0 && e.Column > 0 { builder.WriteString(fmt.Sprintf(" at line %d, column %d", e.Line, e.Column)) } else if e.Position >= 0 { builder.WriteString(fmt.Sprintf(" at position %d", e.Position)) } - // 当前token信息 + // Current token information if e.Token != "" { builder.WriteString(fmt.Sprintf(" (found '%s')", e.Token)) } - // 期望的token + // Expected token if len(e.Expected) > 0 { builder.WriteString(fmt.Sprintf(", expected: %s", strings.Join(e.Expected, ", "))) } - // 上下文信息 + // Context information if e.Context != "" { builder.WriteString(fmt.Sprintf("\nContext: %s", e.Context)) } - // 建议 + // Suggestions if len(e.Suggestions) > 0 { builder.WriteString(fmt.Sprintf("\nSuggestions: %s", strings.Join(e.Suggestions, "; "))) } @@ -73,7 +73,7 @@ func (e *ParseError) Error() string { return builder.String() } -// getErrorTypeName 获取错误类型名称 +// getErrorTypeName gets error type name func (e *ParseError) getErrorTypeName() string { switch e.Type { case ErrorTypeSyntax: @@ -103,18 +103,18 @@ func (e *ParseError) getErrorTypeName() string { } } -// IsRecoverable 检查错误是否可恢复 +// IsRecoverable checks if error is recoverable func (e *ParseError) IsRecoverable() bool { return e.Recoverable } -// ErrorRecovery 错误恢复策略 +// ErrorRecovery error recovery strategy type ErrorRecovery struct { parser *Parser errors []*ParseError } -// NewErrorRecovery 创建错误恢复实例 +// NewErrorRecovery creates error recovery instance func NewErrorRecovery(parser *Parser) *ErrorRecovery { return &ErrorRecovery{ parser: parser, @@ -122,46 +122,46 @@ func NewErrorRecovery(parser *Parser) *ErrorRecovery { } } -// AddError 添加错误 +// AddError adds an error func (er *ErrorRecovery) AddError(err *ParseError) { er.errors = append(er.errors, err) } -// GetErrors 获取所有错误 +// GetErrors gets all errors func (er *ErrorRecovery) GetErrors() []*ParseError { return er.errors } -// HasErrors 检查是否有错误 +// HasErrors checks if there are errors func (er *ErrorRecovery) HasErrors() bool { return len(er.errors) > 0 } -// RecoverFromError 从错误中恢复 +// RecoverFromError recovers from error func (er *ErrorRecovery) RecoverFromError(errorType ErrorType) bool { switch errorType { case ErrorTypeUnexpectedToken: - // 跳过当前token,尝试继续解析 + // Skip current token and continue parsing er.parser.lexer.NextToken() return true case ErrorTypeMissingToken: - // 插入默认token或跳过 + // Insert default token or skip return true case ErrorTypeInvalidExpression: - // 跳到下一个逗号或关键字 + // Jump to next comma or keyword return er.skipToNextDelimiter() case ErrorTypeSyntax: - // 语法错误也尝试恢复,继续解析 + // Syntax errors also attempt recovery and continue parsing return true case ErrorTypeUnknownKeyword: - // 未知关键字错误也尝试恢复 + // Unknown keyword errors also attempt recovery return true default: return false } } -// skipToNextDelimiter 跳到下一个分隔符 +// skipToNextDelimiter jumps to next delimiter func (er *ErrorRecovery) skipToNextDelimiter() bool { maxSkip := 10 skipped := 0 @@ -180,7 +180,7 @@ func (er *ErrorRecovery) skipToNextDelimiter() bool { return false } -// CreateSyntaxError 创建语法错误 +// CreateSyntaxError creates syntax error func CreateSyntaxError(message string, position int, token string, expected []string) *ParseError { line, column := calculateLineColumn(position) return &ParseError{ @@ -196,7 +196,7 @@ func CreateSyntaxError(message string, position int, token string, expected []st } } -// CreateLexicalError 创建词法错误 +// CreateLexicalError creates lexical error func CreateLexicalError(message string, position int, char byte) *ParseError { line, column := calculateLineColumn(position) return &ParseError{ @@ -211,7 +211,7 @@ func CreateLexicalError(message string, position int, char byte) *ParseError { } } -// CreateLexicalErrorWithPosition 创建词法错误(带准确位置信息) +// CreateLexicalErrorWithPosition creates lexical error with accurate position func CreateLexicalErrorWithPosition(message string, position int, line int, column int, char byte) *ParseError { return &ParseError{ Type: ErrorTypeLexical, @@ -225,7 +225,7 @@ func CreateLexicalErrorWithPosition(message string, position int, line int, colu } } -// CreateUnexpectedTokenError 创建意外token错误 +// CreateUnexpectedTokenError creates unexpected token error func CreateUnexpectedTokenError(found string, expected []string, position int) *ParseError { line, column := calculateLineColumn(position) return &ParseError{ @@ -241,7 +241,7 @@ func CreateUnexpectedTokenError(found string, expected []string, position int) * } } -// CreateMissingTokenError 创建缺失token错误 +// CreateMissingTokenError creates missing token error func CreateMissingTokenError(expected string, position int) *ParseError { line, column := calculateLineColumn(position) return &ParseError{ @@ -256,7 +256,7 @@ func CreateMissingTokenError(expected string, position int) *ParseError { } } -// CreateUnknownFunctionError 创建未知函数错误 +// CreateUnknownFunctionError creates unknown function error func CreateUnknownFunctionError(functionName string, position int) *ParseError { line, column := calculateLineColumn(position) return &ParseError{ @@ -271,17 +271,17 @@ func CreateUnknownFunctionError(functionName string, position int) *ParseError { } } -// calculateLineColumn 计算行列号 -// 注意:这是一个简化的实现,实际的行列号应该由lexer提供 +// calculateLineColumn calculates line and column numbers +// Note: This is a simplified implementation, actual line/column should be provided by lexer func calculateLineColumn(position int) (int, int) { - // 简化实现,实际应该基于输入文本计算 - // 这里返回基于位置的估算值 + // Simplified implementation, should be calculated based on input text + // Returns estimated value based on position line := position/50 + 1 // 假设平均每行50个字符 column := position%50 + 1 return line, column } -// generateSuggestions 生成建议 +// generateSuggestions generates suggestions func generateSuggestions(found string, expected []string) []string { suggestions := make([]string, 0) @@ -289,7 +289,7 @@ func generateSuggestions(found string, expected []string) []string { suggestions = append(suggestions, fmt.Sprintf("Try using '%s' instead of '%s'", expected[0], found)) } - // 基于常见错误模式生成建议 + // Generate suggestions based on common error patterns switch strings.ToUpper(found) { case "SELCT": suggestions = append(suggestions, "Did you mean 'SELECT'?") @@ -306,11 +306,11 @@ func generateSuggestions(found string, expected []string) []string { return suggestions } -// generateFunctionSuggestions 生成函数建议 +// generateFunctionSuggestions generates function suggestions func generateFunctionSuggestions(functionName string) []string { suggestions := make([]string, 0) - // 基于常见函数名拼写错误生成建议 + // Generate suggestions based on common function name misspellings funcLower := strings.ToLower(functionName) switch { case strings.Contains(funcLower, "coun"): @@ -345,7 +345,7 @@ func generateFunctionSuggestions(functionName string) []string { suggestions = append(suggestions, "Did you mean 'CEILING' function?") } - // 通用建议 + // Generic suggestions suggestions = append(suggestions, "Check if the function name is spelled correctly") suggestions = append(suggestions, "Confirm that the function is registered or is a built-in function") suggestions = append(suggestions, "View the list of available functions") @@ -353,7 +353,7 @@ func generateFunctionSuggestions(functionName string) []string { return suggestions } -// FormatErrorContext 格式化错误上下文 +// FormatErrorContext formats error context func FormatErrorContext(input string, position int, contextLength int) string { if position < 0 || position >= len(input) { return "" diff --git a/rsql/function_validator.go b/rsql/function_validator.go index bcddc30..2c91f02 100644 --- a/rsql/function_validator.go +++ b/rsql/function_validator.go @@ -7,33 +7,33 @@ import ( "github.com/rulego/streamsql/functions" ) -// FunctionValidator 函数验证器 +// FunctionValidator validates SQL functions in expressions type FunctionValidator struct { errorRecovery *ErrorRecovery } -// NewFunctionValidator 创建函数验证器 +// NewFunctionValidator creates a new function validator func NewFunctionValidator(errorRecovery *ErrorRecovery) *FunctionValidator { return &FunctionValidator{ errorRecovery: errorRecovery, } } -// ValidateExpression 验证表达式中的函数 +// ValidateExpression validates functions within expressions func (fv *FunctionValidator) ValidateExpression(expression string, position int) { functionCalls := fv.extractFunctionCalls(expression) for _, funcCall := range functionCalls { funcName := funcCall.Name - // 检查函数是否在注册表中 + // Check if function exists in registry if _, exists := functions.Get(funcName); !exists { - // 检查是否是内置函数 + // Check if it's a built-in function if !fv.isBuiltinFunction(funcName) { - // 检查是否是expr-lang函数 + // Check if it's an expr-lang function bridge := functions.GetExprBridge() if !bridge.IsExprLangFunction(funcName) { - // 创建未知函数错误 + // Create unknown function error err := CreateUnknownFunctionError(funcName, position+funcCall.Position) fv.errorRecovery.AddError(err) } @@ -42,29 +42,29 @@ func (fv *FunctionValidator) ValidateExpression(expression string, position int) } } -// FunctionCall 函数调用信息 +// FunctionCall contains function call information type FunctionCall struct { Name string Position int } -// extractFunctionCalls 从表达式中提取函数调用 +// extractFunctionCalls extracts function calls from expressions func (fv *FunctionValidator) extractFunctionCalls(expression string) []FunctionCall { var functionCalls []FunctionCall - // 使用正则表达式匹配函数调用模式: identifier( + // Use regex to match function call patterns: identifier( funcPattern := regexp.MustCompile(`([a-zA-Z_][a-zA-Z0-9_]*)\s*\(`) matches := funcPattern.FindAllStringSubmatchIndex(expression, -1) for _, match := range matches { - // match[0] 是整个匹配的开始位置 - // match[1] 是整个匹配的结束位置 - // match[2] 是第一个捕获组(函数名)的开始位置 - // match[3] 是第一个捕获组(函数名)的结束位置 + // match[0] is the start position of entire match + // match[1] is the end position of entire match + // match[2] is the start position of first capture group (function name) + // match[3] is the end position of first capture group (function name) funcName := expression[match[2]:match[3]] position := match[2] - // 过滤掉关键字(如 CASE、IF 等) + // Filter out keywords (like CASE, IF, etc.) if !fv.isKeyword(funcName) { functionCalls = append(functionCalls, FunctionCall{ Name: funcName, @@ -76,7 +76,7 @@ func (fv *FunctionValidator) extractFunctionCalls(expression string) []FunctionC return functionCalls } -// isBuiltinFunction 检查是否是内置函数 +// isBuiltinFunction checks if it's a built-in function func (fv *FunctionValidator) isBuiltinFunction(funcName string) bool { builtinFunctions := []string{ "abs", "sqrt", "sin", "cos", "tan", "floor", "ceil", "round", @@ -92,7 +92,7 @@ func (fv *FunctionValidator) isBuiltinFunction(funcName string) bool { return false } -// isKeyword 检查是否是SQL关键字 +// isKeyword checks if it's an SQL keyword func (fv *FunctionValidator) isKeyword(word string) bool { keywords := []string{ "SELECT", "FROM", "WHERE", "GROUP", "BY", "HAVING", "ORDER", diff --git a/rsql/lexer.go b/rsql/lexer.go index cf7f9c8..bd84a3a 100644 --- a/rsql/lexer.go +++ b/rsql/lexer.go @@ -242,8 +242,8 @@ func (l *Lexer) peekChar() byte { func (l *Lexer) readIdentifier() string { position := l.pos - // 只处理基本标识符和点号(用于嵌套字段访问) - // 数组索引(方括号)应该由解析器处理,而不是词法分析器 + // Only handle basic identifiers and dots (for nested field access) + // Array indexing (brackets) should be handled by the parser, not the lexer for isLetter(l.ch) || isDigit(l.ch) || l.ch == '.' { l.readChar() } @@ -254,16 +254,16 @@ func (l *Lexer) readPreviousIdentifier() string { // 保存当前位置 endPos := l.pos - // 向前移动直到找到非字母字符或到达输入开始 + // Move backward until finding a non-letter character or reaching the input start startPos := endPos - 1 for startPos >= 0 && isLetter(l.input[startPos]) { startPos-- } - // 调整到第一个字母字符的位置 + // Adjust to the position of the first letter character startPos++ - // 如果找到有效的标识符,返回它 + // If a valid identifier is found, return it if startPos < endPos { return l.input[startPos:endPos] } @@ -280,8 +280,8 @@ func (l *Lexer) readNumber() string { } func (l *Lexer) readString() string { - quoteChar := l.ch // 记录引号类型(单引号或双引号) - startPos := l.pos // 记录开始位置(包含引号) + quoteChar := l.ch // Record the quote type (single or double quote) + startPos := l.pos // Record the start position (including the quote) l.readChar() // 跳过开头引号 for l.ch != quoteChar && l.ch != 0 { @@ -292,7 +292,7 @@ func (l *Lexer) readString() string { l.readChar() // 跳过结尾引号 } - // 返回包含引号的完整字符串 + // Return the complete string including quotes return l.input[startPos:l.pos] } @@ -363,7 +363,7 @@ func (l *Lexer) lookupIdent(ident string) Token { case "END": return Token{Type: TokenEND, Value: ident} default: - // 检查是否是常见的拼写错误 + // Check for common typos if l.errorRecovery != nil { l.checkForTypos(ident, upperIdent) } @@ -416,7 +416,7 @@ func (l *Lexer) readStringToken(pos, line, column int) Token { } if l.ch == 0 { - // 未闭合的字符串 + // Unterminated string if l.errorRecovery != nil { err := &ParseError{ Type: ErrorTypeUnterminatedString, @@ -452,7 +452,7 @@ func (l *Lexer) readQuotedIdentToken(pos, line, column int) Token { } if l.ch == 0 { - // 未闭合的反引号标识符 + // Unterminated backtick identifier if l.errorRecovery != nil { err := &ParseError{ Type: ErrorTypeUnterminatedString, diff --git a/rsql/parser.go b/rsql/parser.go index 47b7e53..38ba2d3 100644 --- a/rsql/parser.go +++ b/rsql/parser.go @@ -191,13 +191,13 @@ func (p *Parser) createCombinedError() error { } func (p *Parser) parseSelect(stmt *SelectStatement) error { - // 验证第一个token是否为SELECT + // Validate if first token is SELECT firstToken := p.lexer.NextToken() if firstToken.Type != TokenSELECT { - // 如果不是SELECT,检查是否是拼写错误 + // If not SELECT, check for typos if firstToken.Type == TokenIdent { - // 这里的错误已经由lexer的checkForTypos处理了 - // 我们继续解析,假设用户想要SELECT + // The error here has been handled by lexer's checkForTypos + // Continue parsing, assuming user meant SELECT } else { return CreateSyntaxError( fmt.Sprintf("Expected SELECT, got %s", firstToken.Value), @@ -237,7 +237,7 @@ func (p *Parser) parseSelect(stmt *SelectStatement) error { for { fieldCount++ - // 安全检查:防止无限循环 + // Safety check: prevent infinite loops if fieldCount > maxFields { return errors.New("select field list parsing exceeded maximum fields, possible syntax error") } @@ -369,7 +369,7 @@ func (p *Parser) parseWhere(stmt *SelectStatement) error { return nil } - // 设置最大次数限制,防止无限循环 + // Set max iterations limit to prevent infinite loops maxIterations := 100 iterations := 0 @@ -410,16 +410,16 @@ func (p *Parser) parseWhere(stmt *SelectStatement) error { case TokenNOT: conditions = append(conditions, "NOT") default: - // 处理字符串值的引号 - if len(conditions) > 0 && conditions[len(conditions)-1] == "'" { - conditions[len(conditions)-1] = conditions[len(conditions)-1] + tok.Value - } else { - conditions = append(conditions, tok.Value) - } + // Handle string value quotes + if len(conditions) > 0 && conditions[len(conditions)-1] == "'" { + conditions[len(conditions)-1] = conditions[len(conditions)-1] + tok.Value + } else { + conditions = append(conditions, tok.Value) + } } } - // 验证WHERE条件中的函数 + // Validate functions in WHERE condition whereCondition := strings.Join(conditions, " ") if whereCondition != "" { validator := NewFunctionValidator(p.errorRecovery) @@ -454,7 +454,7 @@ func (p *Parser) parseWindowFunction(stmt *SelectStatement, winType string) erro continue } //valTok := p.lexer.NextToken() - // 处理引号包裹的值 + // Handle quoted values if strings.HasPrefix(valTok.Value, "'") && strings.HasSuffix(valTok.Value, "'") { valTok.Value = strings.Trim(valTok.Value, "'") } @@ -611,7 +611,7 @@ func (p *Parser) parseWith(stmt *SelectStatement) error { if strings.HasPrefix(next.Value, "'") && strings.HasSuffix(next.Value, "'") { next.Value = strings.Trim(next.Value, "'") } - // 检查Window是否已初始化,如果未初始化则创建新的WindowDefinition + // Check if Window is initialized; if not, create new WindowDefinition if stmt.Window.Type == "" { stmt.Window = WindowDefinition{ TsProp: next.Value, @@ -643,7 +643,7 @@ func (p *Parser) parseWith(stmt *SelectStatement) error { default: } - // 检查Window是否已初始化,如果未初始化则创建新的WindowDefinition + // Check if Window is initialized; if not, create new WindowDefinition if stmt.Window.Type == "" { stmt.Window = WindowDefinition{ TimeUnit: timeUnit, @@ -862,7 +862,7 @@ func (p *Parser) parseHaving(stmt *SelectStatement) error { case TokenNOT: conditions = append(conditions, "NOT") default: - // 处理字符串值的引号 + // Handle string value quotes if len(conditions) > 0 && conditions[len(conditions)-1] == "'" { conditions[len(conditions)-1] = conditions[len(conditions)-1] + tok.Value } else { @@ -871,7 +871,7 @@ func (p *Parser) parseHaving(stmt *SelectStatement) error { } } - // 验证HAVING条件中的函数 + // Validate functions in HAVING condition havingCondition := strings.Join(conditions, " ") if havingCondition != "" { validator := NewFunctionValidator(p.errorRecovery) diff --git a/stream/doc.go b/stream/doc.go new file mode 100644 index 0000000..6b7f012 --- /dev/null +++ b/stream/doc.go @@ -0,0 +1,281 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package stream provides the core stream processing engine for StreamSQL. + +This package implements the central stream processing pipeline that orchestrates data flow, +window management, aggregation, filtering, and result generation. It serves as the execution +engine that brings together all StreamSQL components into a cohesive streaming system. + +# Core Features + +• Real-time Stream Processing - High-throughput, low-latency data stream processing +• Window Management - Integration with all window types (tumbling, sliding, counting, session) +• Aggregation Engine - Efficient aggregation with incremental computation +• Filtering Pipeline - Multi-stage filtering with WHERE and HAVING clause support +• Performance Modes - Configurable performance profiles for different use cases +• Metrics and Monitoring - Comprehensive performance metrics and health monitoring +• Persistence Support - Optional data persistence for reliability and recovery +• Backpressure Handling - Intelligent backpressure management and overflow strategies + +# Stream Architecture + +The stream processing pipeline consists of several key components: + + type Stream struct { + dataChan chan map[string]interface{} // Input data channel + filter condition.Condition // WHERE clause filter + Window window.Window // Window manager + aggregator aggregator.Aggregator // Aggregation engine + config types.Config // Stream configuration + sinks []func([]map[string]interface{}) // Result processors + resultChan chan []map[string]interface{} // Result channel + persistenceManager *PersistenceManager // Data persistence + dataStrategy DataProcessingStrategy // Data processing strategy + } + +# Performance Modes + +Configurable performance profiles for different scenarios: + + // High Performance Mode + // - Optimized for maximum throughput + // - Larger buffer sizes + // - Batch processing optimization + stream := NewStreamWithHighPerformance(config) + + // Low Latency Mode + // - Optimized for minimal processing delay + // - Smaller buffer sizes + // - Immediate processing + stream := NewStreamWithLowLatency(config) + + // Zero Data Loss Mode + // - Guaranteed data persistence + // - Synchronous processing + // - Enhanced error recovery + stream := NewStreamWithZeroDataLoss(config) + + // Custom Performance Mode + // - User-defined performance parameters + customConfig := &PerformanceConfig{ + BufferSize: 1000, + BatchSize: 50, + FlushInterval: time.Second, + WorkerPoolSize: 4, + } + stream := NewStreamWithCustomPerformance(config, *customConfig) + +# Data Processing Pipeline + +Multi-stage processing pipeline with optimized data flow: + + 1. Data Ingestion + ├── Input validation and type checking + ├── Timestamp extraction and normalization + └── Initial data transformation + + 2. Filtering (WHERE clause) + ├── Field-based filtering + ├── Expression evaluation + └── Early data rejection + + 3. Window Processing + ├── Window assignment + ├── Data buffering + └── Window trigger management + + 4. Aggregation + ├── Group-by processing + ├── Aggregate function execution + └── Incremental computation + + 5. Post-Aggregation Filtering (HAVING clause) + ├── Aggregate result filtering + ├── Complex condition evaluation + └── Final result validation + + 6. Result Generation + ├── Field projection + ├── Alias application + └── Output formatting + +# Window Integration + +Seamless integration with all window types: + + // Tumbling Windows - Non-overlapping time-based windows + config.WindowConfig = WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{ + "size": "5s", + }, + } + + // Sliding Windows - Overlapping time-based windows + config.WindowConfig = WindowConfig{ + Type: "sliding", + Params: map[string]interface{}{ + "size": "30s", + "slide": "10s", + }, + } + + // Counting Windows - Count-based windows + config.WindowConfig = WindowConfig{ + Type: "counting", + Params: map[string]interface{}{ + "count": 100, + }, + } + + // Session Windows - Activity-based windows + config.WindowConfig = WindowConfig{ + Type: "session", + Params: map[string]interface{}{ + "timeout": "5m", + "groupBy": "user_id", + }, + } + +# Metrics and Monitoring + +Comprehensive performance monitoring: + + type MetricsManager struct { + processedCount int64 // Total processed records + filteredCount int64 // Filtered out records + aggregatedCount int64 // Aggregated records + errorCount int64 // Processing errors + processingTime time.Duration // Average processing time + throughput float64 // Records per second + memoryUsage int64 // Memory consumption + bufferUtilization float64 // Buffer usage percentage + } + + // Get basic statistics + stats := stream.GetStats() + fmt.Printf("Processed: %d, Errors: %d\n", stats["processed"], stats["errors"]) + + // Get detailed performance metrics + detailed := stream.GetDetailedStats() + fmt.Printf("Throughput: %.2f records/sec\n", detailed["throughput"]) + fmt.Printf("Memory Usage: %d bytes\n", detailed["memory_usage"]) + +# Persistence and Reliability + +Optional data persistence for enhanced reliability: + + type PersistenceManager struct { + enabled bool + storageType string // "memory", "file", "database" + batchSize int // Persistence batch size + flushInterval time.Duration // Automatic flush interval + recoveryMode string // Recovery strategy + } + + // Enable persistence + stream.EnablePersistence(PersistenceConfig{ + StorageType: "file", + BatchSize: 100, + FlushInterval: 5 * time.Second, + RecoveryMode: "automatic", + }) + +# Backpressure Management + +Intelligent handling of system overload: + + // Overflow strategies + const ( + OverflowStrategyDrop = "drop" // Drop oldest data + OverflowStrategyBlock = "block" // Block new data + OverflowStrategySpill = "spill" // Spill to disk + OverflowStrategyCompress = "compress" // Compress data + ) + + // Configure backpressure handling + config.PerformanceConfig.OverflowStrategy = OverflowStrategySpill + config.PerformanceConfig.BufferSize = 10000 + config.PerformanceConfig.HighWaterMark = 0.8 + +# Usage Examples + +Basic stream processing: + + // Create stream with default configuration + stream, err := NewStream(config) + if err != nil { + log.Fatal(err) + } + + // Register result handler + stream.AddSink(func(results []map[string]interface{}) { + fmt.Printf("Results: %v\n", results) + }) + + // Start processing + stream.Start() + + // Send data + stream.Emit(map[string]interface{}{ + "device_id": "sensor001", + "temperature": 25.5, + "timestamp": time.Now(), + }) + +High-performance stream processing: + + // Create high-performance stream + stream, err := NewStreamWithHighPerformance(config) + + // Configure for maximum throughput + stream.SetBufferSize(50000) + stream.SetBatchSize(1000) + stream.SetWorkerPoolSize(8) + + // Enable metrics monitoring + stream.EnableMetrics(true) + + // Process data in batches + for _, batch := range dataBatches { + stream.EmitBatch(batch) + } + +Synchronous processing for non-aggregation queries: + + // Process single record synchronously + result, err := stream.ProcessSync(data) + if err != nil { + log.Printf("Processing error: %v", err) + } else if result != nil { + fmt.Printf("Immediate result: %v\n", result) + } + +# Integration + +Central integration point for all StreamSQL components: + +• RSQL package - Configuration parsing and application +• Window package - Window lifecycle management +• Aggregator package - Aggregation execution +• Functions package - Function execution in expressions +• Condition package - Filter condition evaluation +• Types package - Data type handling and configuration +• Logger package - Comprehensive logging and debugging +*/ +package stream \ No newline at end of file diff --git a/stream/handler_data.go b/stream/handler_data.go index a2b119f..bffa023 100644 --- a/stream/handler_data.go +++ b/stream/handler_data.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( diff --git a/stream/handler_result.go b/stream/handler_result.go index 47de456..6c16fa6 100644 --- a/stream/handler_result.go +++ b/stream/handler_result.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( @@ -27,17 +43,17 @@ func (s *Stream) startSinkWorkerPool(workerCount int) { go func(workerID int) { for { select { - case task := <-s.sinkWorkerPool: - // Execute sink task - func() { - defer func() { - // Enhanced error recovery to prevent single worker crash - if r := recover(); r != nil { - logger.Error("Sink worker %d panic recovered: %v", workerID, r) - } + case task := <-s.sinkWorkerPool: + // Execute sink task + func() { + defer func() { + // Enhanced error recovery to prevent single worker crash + if r := recover(); r != nil { + logger.Error("Sink worker %d panic recovered: %v", workerID, r) + } + }() + task() }() - task() - }() case <-s.done: return } diff --git a/stream/manager_metrics.go b/stream/manager_metrics.go index c39dd68..87835f6 100644 --- a/stream/manager_metrics.go +++ b/stream/manager_metrics.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( diff --git a/stream/metrics.go b/stream/metrics.go index 0494e14..468e95e 100644 --- a/stream/metrics.go +++ b/stream/metrics.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( @@ -141,4 +157,4 @@ func (sc *StatsCollector) GetDetailedStats(basicStats map[string]int64) map[stri DropRate: dropRate, PerformanceLevel: AssessPerformanceLevel(dataUsage, dropRate), } -} \ No newline at end of file +} diff --git a/stream/persistence.go b/stream/persistence.go index e3d0eb8..3c5103f 100644 --- a/stream/persistence.go +++ b/stream/persistence.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( diff --git a/stream/processor_data.go b/stream/processor_data.go index 8bce5f2..a0269ea 100644 --- a/stream/processor_data.go +++ b/stream/processor_data.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( @@ -92,12 +108,12 @@ func (dp *DataProcessor) registerExpressionCalculator(field string, fieldExpr ty currentFieldExpr.Expression, currentFieldExpr.Fields, func(data interface{}) (interface{}, error) { - // Ensure data is map[string]interface{} type - if dataMap, ok := data.(map[string]interface{}); ok { - return dp.evaluateExpressionForAggregation(currentFieldExpr, dataMap) - } - return nil, fmt.Errorf("unsupported data type: %T, expected map[string]interface{}", data) - }, + // Ensure data is map[string]interface{} type + if dataMap, ok := data.(map[string]interface{}); ok { + return dp.evaluateExpressionForAggregation(currentFieldExpr, dataMap) + } + return nil, fmt.Errorf("unsupported data type: %T, expected map[string]interface{}", data) + }, ) } diff --git a/stream/strategy.go b/stream/strategy.go index 7040a27..999dfe3 100644 --- a/stream/strategy.go +++ b/stream/strategy.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( diff --git a/stream/stream.go b/stream/stream.go index bb1098b..d5d0d7d 100644 --- a/stream/stream.go +++ b/stream/stream.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( diff --git a/stream/stream_factory.go b/stream/stream_factory.go index d3115a8..f69f894 100644 --- a/stream/stream_factory.go +++ b/stream/stream_factory.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package stream import ( diff --git a/streamsql.go b/streamsql.go index 7c46814..4f3417b 100644 --- a/streamsql.go +++ b/streamsql.go @@ -25,10 +25,10 @@ import ( "github.com/rulego/streamsql/utils/table" ) -// Streamsql 是StreamSQL流处理引擎的主要接口。 -// 它封装了SQL解析、流处理、窗口管理等核心功能。 +// Streamsql is the main interface for the StreamSQL streaming engine. +// It encapsulates core functionality including SQL parsing, stream processing, and window management. // -// 使用示例: +// Usage example: // // ssql := streamsql.New() // err := ssql.Execute("SELECT AVG(temperature) FROM stream GROUP BY TumblingWindow('5s')") @@ -36,42 +36,39 @@ import ( type Streamsql struct { stream *stream.Stream - // 性能配置模式 + // Performance configuration mode performanceMode string // "default", "high_performance", "low_latency", "zero_data_loss", "custom" customConfig *types.PerformanceConfig - // 新增:同步处理模式配置 - enableSyncMode bool // 是否启用同步模式(用于非聚合查询) - - // 保存原始SELECT字段顺序,用于表格输出时保持字段顺序 + // Save original SELECT field order to maintain field order for table output fieldOrder []string } -// New 创建一个新的StreamSQL实例。 -// 支持通过可选的Option参数进行配置。 +// New creates a new StreamSQL instance. +// Supports configuration through optional Option parameters. // -// 参数: -// - options: 可变长度的配置选项,用于自定义StreamSQL行为 +// Parameters: +// - options: Variable configuration options for customizing StreamSQL behavior // -// 返回值: -// - *Streamsql: 新创建的StreamSQL实例 +// Returns: +// - *Streamsql: Newly created StreamSQL instance // -// 示例: +// Examples: // -// // 创建默认实例 +// // Create default instance // ssql := streamsql.New() // -// // 创建高性能实例 +// // Create high performance instance // ssql := streamsql.New(streamsql.WithHighPerformance()) // -// // 创建零数据丢失实例 +// // Create zero data loss instance // ssql := streamsql.New(streamsql.WithZeroDataLoss()) func New(options ...Option) *Streamsql { s := &Streamsql{ - performanceMode: "default", // 默认使用标准性能配置 + performanceMode: "default", // Default to standard performance configuration } - // 应用所有配置选项 + // Apply all configuration options for _, option := range options { option(s) } @@ -79,39 +76,39 @@ func New(options ...Option) *Streamsql { return s } -// Execute 解析并执行SQL查询,创建对应的流处理管道。 -// 这是StreamSQL的核心方法,负责将SQL转换为实际的流处理逻辑。 +// Execute parses and executes SQL queries, creating corresponding stream processing pipelines. +// This is the core method of StreamSQL, responsible for converting SQL into actual stream processing logic. // -// 支持的SQL语法: -// - SELECT 子句: 选择字段和聚合函数 -// - FROM 子句: 指定数据源(通常为'stream') -// - WHERE 子句: 数据过滤条件 -// - GROUP BY 子句: 分组字段和窗口函数 -// - HAVING 子句: 聚合结果过滤 -// - LIMIT 子句: 限制结果数量 -// - DISTINCT: 结果去重 +// Supported SQL syntax: +// - SELECT clause: Select fields and aggregate functions +// - FROM clause: Specify data source (usually 'stream') +// - WHERE clause: Data filtering conditions +// - GROUP BY clause: Grouping fields and window functions +// - HAVING clause: Aggregate result filtering +// - LIMIT clause: Limit result count +// - DISTINCT: Result deduplication // -// 窗口函数: -// - TumblingWindow('5s'): 滚动窗口 -// - SlidingWindow('30s', '10s'): 滑动窗口 -// - CountingWindow(100): 计数窗口 -// - SessionWindow('5m'): 会话窗口 +// Window functions: +// - TumblingWindow('5s'): Tumbling window +// - SlidingWindow('30s', '10s'): Sliding window +// - CountingWindow(100): Counting window +// - SessionWindow('5m'): Session window // -// 参数: -// - sql: 要执行的SQL查询语句 +// Parameters: +// - sql: SQL query statement to execute // -// 返回值: -// - error: 如果SQL解析或执行失败,返回相应错误 +// Returns: +// - error: Returns error if SQL parsing or execution fails // -// 示例: +// Examples: // -// // 基本聚合查询 +// // Basic aggregation query // err := ssql.Execute("SELECT deviceId, AVG(temperature) FROM stream GROUP BY deviceId, TumblingWindow('5s')") // -// // 带过滤条件的查询 +// // Query with filtering conditions // err := ssql.Execute("SELECT * FROM stream WHERE temperature > 30") // -// // 复杂的窗口聚合 +// // Complex window aggregation // err := ssql.Execute(` // SELECT deviceId, // AVG(temperature) as avg_temp, @@ -123,16 +120,16 @@ func New(options ...Option) *Streamsql { // LIMIT 100 // `) func (s *Streamsql) Execute(sql string) error { - // 解析SQL语句 + // Parse SQL statement config, condition, err := rsql.Parse(sql) if err != nil { - return fmt.Errorf("SQL解析失败: %w", err) + return fmt.Errorf("SQL parsing failed: %w", err) } - // 从解析结果中获取字段顺序信息 + // Get field order information from parsing result s.fieldOrder = config.FieldOrder - // 根据性能模式创建流处理器 + // Create stream processor based on performance mode var streamInstance *stream.Stream switch s.performanceMode { @@ -153,30 +150,30 @@ func (s *Streamsql) Execute(sql string) error { } if err != nil { - return fmt.Errorf("创建流处理器失败: %w", err) + return fmt.Errorf("failed to create stream processor: %w", err) } s.stream = streamInstance - // 注册过滤条件 + // Register filter condition if err = s.stream.RegisterFilter(condition); err != nil { - return fmt.Errorf("注册过滤条件失败: %w", err) + return fmt.Errorf("failed to register filter condition: %w", err) } - // 启动流处理 + // Start stream processing s.stream.Start() return nil } -// Emit 添加数据到流处理管道。 -// 接受类型安全的map[string]interface{}格式数据。 +// Emit adds data to the stream processing pipeline. +// Accepts type-safe map[string]interface{} format data. // -// 参数: -// - data: 要添加的数据,必须是map[string]interface{}类型 +// Parameters: +// - data: Data to add, must be map[string]interface{} type // -// 示例: +// Examples: // -// // 添加设备数据 +// // Add device data // ssql.Emit(map[string]interface{}{ // "deviceId": "sensor001", // "temperature": 25.5, @@ -184,7 +181,7 @@ func (s *Streamsql) Execute(sql string) error { // "timestamp": time.Now(), // }) // -// // 添加用户行为数据 +// // Add user behavior data // ssql.Emit(map[string]interface{}{ // "userId": "user123", // "action": "click", @@ -196,43 +193,43 @@ func (s *Streamsql) Emit(data map[string]interface{}) { } } -// EmitSync 同步处理数据,立即返回处理结果。 -// 仅适用于非聚合查询,聚合查询会返回错误。 -// 接受类型安全的map[string]interface{}格式数据。 +// EmitSync processes data synchronously, returning results immediately. +// Only applicable for non-aggregation queries, aggregation queries will return an error. +// Accepts type-safe map[string]interface{} format data. // -// 参数: -// - data: 要处理的数据,必须是map[string]interface{}类型 +// Parameters: +// - data: Data to process, must be map[string]interface{} type // -// 返回值: -// - map[string]interface{}: 处理后的结果数据,如果不匹配过滤条件返回nil -// - error: 处理错误 +// Returns: +// - map[string]interface{}: Processed result data, returns nil if filter conditions don't match +// - error: Processing error // -// 示例: +// Examples: // // result, err := ssql.EmitSync(map[string]interface{}{ // "deviceId": "sensor001", // "temperature": 25.5, // }) // if err != nil { -// log.Printf("处理错误: %v", err) +// log.Printf("processing error: %v", err) // } else if result != nil { -// // 立即使用处理结果(result是map[string]interface{}类型) -// fmt.Printf("处理结果: %v\n", result) +// // Use processed result immediately (result is map[string]interface{} type) +// fmt.Printf("Processing result: %v\n", result) // } func (s *Streamsql) EmitSync(data map[string]interface{}) (map[string]interface{}, error) { if s.stream == nil { - return nil, fmt.Errorf("stream未初始化") + return nil, fmt.Errorf("stream not initialized") } - // 检查是否为非聚合查询 + // Check if it's a non-aggregation query if s.stream.IsAggregationQuery() { - return nil, fmt.Errorf("同步模式仅支持非聚合查询,聚合查询请使用Emit()方法") + return nil, fmt.Errorf("synchronous mode only supports non-aggregation queries, use Emit() method for aggregation queries") } return s.stream.ProcessSync(data) } -// IsAggregationQuery 检查当前查询是否为聚合查询 +// IsAggregationQuery checks if the current query is an aggregation query func (s *Streamsql) IsAggregationQuery() bool { if s.stream == nil { return false @@ -240,36 +237,36 @@ func (s *Streamsql) IsAggregationQuery() bool { return s.stream.IsAggregationQuery() } -// Stream 返回底层的流处理器实例。 -// 通过此方法可以访问更底层的流处理功能。 +// Stream returns the underlying stream processor instance. +// Provides access to lower-level stream processing functionality. // -// 返回值: -// - *stream.Stream: 底层流处理器实例,如果未执行SQL则返回nil +// Returns: +// - *stream.Stream: Underlying stream processor instance, returns nil if SQL not executed // -// 常用场景: -// - 添加结果处理回调 -// - 获取结果通道 -// - 手动控制流处理生命周期 +// Common use cases: +// - Add result processing callbacks +// - Get result channel +// - Manually control stream processing lifecycle // -// 示例: +// Examples: // -// // 添加结果处理回调 +// // Add result processing callback // ssql.Stream().AddSink(func(results []map[string]interface{}) { -// fmt.Printf("处理结果: %v\n", results) +// fmt.Printf("Processing results: %v\n", results) // }) // -// // 获取结果通道 +// // Get result channel // resultChan := ssql.Stream().GetResultsChan() // go func() { // for result := range resultChan { -// // 处理结果 +// // Process result // } // }() func (s *Streamsql) Stream() *stream.Stream { return s.stream } -// GetStats 获取流处理统计信息 +// GetStats returns stream processing statistics func (s *Streamsql) GetStats() map[string]int64 { if s.stream != nil { return s.stream.GetStats() @@ -277,7 +274,7 @@ func (s *Streamsql) GetStats() map[string]int64 { return make(map[string]int64) } -// GetDetailedStats 获取详细的性能统计信息 +// GetDetailedStats returns detailed performance statistics func (s *Streamsql) GetDetailedStats() map[string]interface{} { if s.stream != nil { return s.stream.GetDetailedStats() @@ -285,40 +282,40 @@ func (s *Streamsql) GetDetailedStats() map[string]interface{} { return make(map[string]interface{}) } -// Stop 停止流处理器,释放相关资源。 -// 调用此方法后,流处理器将停止接收和处理新数据。 +// Stop stops the stream processor and releases related resources. +// After calling this method, the stream processor will stop receiving and processing new data. // -// 建议在应用程序退出前调用此方法进行清理: +// Recommended to call this method for cleanup before application exit: // // defer ssql.Stop() // -// 注意: 停止后的StreamSQL实例不能重新启动,需要创建新实例。 +// Note: StreamSQL instance cannot be restarted after stopping, create a new instance. func (s *Streamsql) Stop() { if s.stream != nil { s.stream.Stop() } } -// AddSink 直接添加结果处理回调函数。 -// 这是对 Stream().AddSink() 的便捷封装,使API调用更简洁。 +// AddSink directly adds result processing callback functions. +// Convenience wrapper for Stream().AddSink() for cleaner API calls. // -// 参数: -// - sink: 结果处理函数,接收[]map[string]interface{}类型的结果数据 +// Parameters: +// - sink: Result processing function, receives []map[string]interface{} type result data // -// 示例: +// Examples: // -// // 直接添加结果处理 +// // Directly add result processing // ssql.AddSink(func(results []map[string]interface{}) { -// fmt.Printf("处理结果: %v\n", results) +// fmt.Printf("Processing results: %v\n", results) // }) // -// // 添加多个处理器 +// // Add multiple processors // ssql.AddSink(func(results []map[string]interface{}) { -// // 保存到数据库 +// // Save to database // saveToDatabase(results) // }) // ssql.AddSink(func(results []map[string]interface{}) { -// // 发送到消息队列 +// // Send to message queue // sendToQueue(results) // }) func (s *Streamsql) AddSink(sink func([]map[string]interface{})) { @@ -327,20 +324,20 @@ func (s *Streamsql) AddSink(sink func([]map[string]interface{})) { } } -// PrintTable 以表格形式打印结果到控制台,类似数据库输出格式。 -// 首先显示列名,然后逐行显示数据。 +// PrintTable prints results to console in table format, similar to database output. +// Displays column names first, then data rows. // -// 支持的数据格式: -// - []map[string]interface{}: 多行记录 -// - map[string]interface{}: 单行记录 -// - 其他类型: 直接打印 +// Supported data formats: +// - []map[string]interface{}: Multiple rows +// - map[string]interface{}: Single row +// - Other types: Direct print // -// 示例: +// Example: // -// // 表格式打印结果 +// // Print results in table format // ssql.PrintTable() // -// // 输出格式: +// // Output format: // // +--------+----------+ // // | device | max_temp | // // +--------+----------+ @@ -353,37 +350,37 @@ func (s *Streamsql) PrintTable() { }) } -// printTableFormat 格式化打印表格数据 -// 参数: -// - results: []map[string]interface{}类型的结果数据 +// printTableFormat formats and prints table data +// Parameters: +// - results: Result data of type []map[string]interface{} func (s *Streamsql) printTableFormat(results []map[string]interface{}) { table.FormatTableData(results, s.fieldOrder) } -// ToChannel 返回结果通道,用于异步获取处理结果。 -// 通过此通道可以以非阻塞方式获取流处理结果。 +// ToChannel returns result channel for asynchronous result retrieval. +// Provides non-blocking access to stream processing results. // -// 返回值: -// - <-chan interface{}: 只读的结果通道,如果未执行SQL则返回nil +// Returns: +// - <-chan interface{}: Read-only result channel, returns nil if SQL not executed // -// 示例: +// Example: // -// // 获取结果通道 +// // Get result channel // resultChan := ssql.ToChannel() // if resultChan != nil { // go func() { // for result := range resultChan { -// fmt.Printf("异步结果: %v\n", result) +// fmt.Printf("Async result: %v\n", result) // } // }() // } -// ToChannel 将查询结果转换为通道输出 -// 返回一个只读通道,用于接收查询结果 +// ToChannel converts query results to channel output +// Returns a read-only channel for receiving query results // -// 注意: -// - 必须有消费者持续从通道读取数据,否则可能导致流处理阻塞 -// - 返回的通道传输批量结果数据 +// Notes: +// - Consumer must continuously read from channel to prevent stream processing blocking +// - Channel transmits batch result data func (s *Streamsql) ToChannel() <-chan []map[string]interface{} { if s.stream != nil { return s.stream.GetResultsChan() diff --git a/streamsql_sync_sink_test.go b/streamsql_sync_sink_test.go index 9db4bd2..680cfb8 100644 --- a/streamsql_sync_sink_test.go +++ b/streamsql_sync_sink_test.go @@ -157,7 +157,7 @@ func TestEmitSyncWithAddSink(t *testing.T) { assert.Error(t, err) assert.Nil(t, result) - assert.Contains(t, err.Error(), "同步模式仅支持非聚合查询") + assert.Contains(t, err.Error(), "synchronous mode only supports non-aggregation queries, use Emit() method for aggregation queries") }) t.Run("多个AddSink回调都被触发", func(t *testing.T) { diff --git a/types/config.go b/types/config.go index 6c7230b..62458e6 100644 --- a/types/config.go +++ b/types/config.go @@ -6,48 +6,48 @@ import ( "github.com/rulego/streamsql/aggregator" ) -// Config 流处理配置 +// Config stream processing configuration type Config struct { - // SQL 处理相关配置 + // SQL processing related configuration WindowConfig WindowConfig `json:"windowConfig"` GroupFields []string `json:"groupFields"` SelectFields map[string]aggregator.AggregateType `json:"selectFields"` FieldAlias map[string]string `json:"fieldAlias"` SimpleFields []string `json:"simpleFields"` FieldExpressions map[string]FieldExpression `json:"fieldExpressions"` - FieldOrder []string `json:"fieldOrder"` // SELECT语句中字段的原始顺序 + FieldOrder []string `json:"fieldOrder"` // Original order of fields in SELECT statement Where string `json:"where"` Having string `json:"having"` - // 功能开关 + // Feature switches NeedWindow bool `json:"needWindow"` Distinct bool `json:"distinct"` - // 结果控制 + // Result control Limit int `json:"limit"` Projections []Projection `json:"projections"` - // 性能配置 + // Performance configuration PerformanceConfig PerformanceConfig `json:"performanceConfig"` } -// WindowConfig 窗口配置 +// WindowConfig window configuration type WindowConfig struct { Type string `json:"type"` Params map[string]interface{} `json:"params"` TsProp string `json:"tsProp"` TimeUnit time.Duration `json:"timeUnit"` - GroupByKey string `json:"groupByKey"` // 会话窗口分组键 + GroupByKey string `json:"groupByKey"` // Session window grouping key } -// FieldExpression 字段表达式配置 +// FieldExpression field expression configuration type FieldExpression struct { - Field string `json:"field"` // 原始字段名 - Expression string `json:"expression"` // 完整表达式 - Fields []string `json:"fields"` // 表达式中引用的所有字段 + Field string `json:"field"` // original field name + Expression string `json:"expression"` // complete expression + Fields []string `json:"fields"` // all fields referenced in expression } -// ProjectionSourceType 投影来源类型 +// ProjectionSourceType projection source type type ProjectionSourceType int const ( @@ -56,95 +56,95 @@ const ( SourceWindowProperty // For window_start, window_end ) -// Projection SELECT列表中的投影配置 +// Projection projection configuration in SELECT list type Projection struct { - OutputName string `json:"outputName"` // 输出字段名 - SourceType ProjectionSourceType `json:"sourceType"` // 数据来源类型 - InputName string `json:"inputName"` // 输入字段名 + OutputName string `json:"outputName"` // output field name + SourceType ProjectionSourceType `json:"sourceType"` // data source type + InputName string `json:"inputName"` // input field name } -// PerformanceConfig 性能配置 +// PerformanceConfig performance configuration type PerformanceConfig struct { - BufferConfig BufferConfig `json:"bufferConfig"` // 缓冲区配置 - OverflowConfig OverflowConfig `json:"overflowConfig"` // 溢出策略配置 - WorkerConfig WorkerConfig `json:"workerConfig"` // 工作池配置 - MonitoringConfig MonitoringConfig `json:"monitoringConfig"` // 监控配置 + BufferConfig BufferConfig `json:"bufferConfig"` // buffer configuration + OverflowConfig OverflowConfig `json:"overflowConfig"` // overflow strategy configuration + WorkerConfig WorkerConfig `json:"workerConfig"` // worker pool configuration + MonitoringConfig MonitoringConfig `json:"monitoringConfig"` // monitoring configuration } -// BufferConfig 缓冲区配置 +// BufferConfig buffer configuration type BufferConfig struct { - DataChannelSize int `json:"dataChannelSize"` // 数据输入缓冲区大小 - ResultChannelSize int `json:"resultChannelSize"` // 结果输出缓冲区大小 - WindowOutputSize int `json:"windowOutputSize"` // 窗口输出缓冲区大小 - EnableDynamicResize bool `json:"enableDynamicResize"` // 是否启用动态缓冲区调整 - MaxBufferSize int `json:"maxBufferSize"` // 最大缓冲区大小 - UsageThreshold float64 `json:"usageThreshold"` // 缓冲区使用率阈值 + DataChannelSize int `json:"dataChannelSize"` // Data input buffer size + ResultChannelSize int `json:"resultChannelSize"` // Result output buffer size + WindowOutputSize int `json:"windowOutputSize"` // Window output buffer size + EnableDynamicResize bool `json:"enableDynamicResize"` // Enable dynamic buffer resizing + MaxBufferSize int `json:"maxBufferSize"` // Maximum buffer size + UsageThreshold float64 `json:"usageThreshold"` // Buffer usage threshold } -// OverflowConfig 溢出策略配置 +// OverflowConfig overflow strategy configuration type OverflowConfig struct { - Strategy string `json:"strategy"` // 溢出策略: "drop", "block", "expand", "persist" - BlockTimeout time.Duration `json:"blockTimeout"` // 阻塞超时时间 - AllowDataLoss bool `json:"allowDataLoss"` // 是否允许数据丢失 - PersistenceConfig *PersistenceConfig `json:"persistenceConfig"` // 持久化配置 - ExpansionConfig ExpansionConfig `json:"expansionConfig"` // 扩容配置 + Strategy string `json:"strategy"` // Overflow strategy: "drop", "block", "expand", "persist" + BlockTimeout time.Duration `json:"blockTimeout"` // Block timeout duration + AllowDataLoss bool `json:"allowDataLoss"` // Allow data loss + PersistenceConfig *PersistenceConfig `json:"persistenceConfig"` // Persistence configuration + ExpansionConfig ExpansionConfig `json:"expansionConfig"` // Expansion configuration } -// PersistenceConfig 持久化配置 +// PersistenceConfig persistence configuration type PersistenceConfig struct { - DataDir string `json:"dataDir"` // 持久化数据目录 - MaxFileSize int64 `json:"maxFileSize"` // 最大文件大小 - FlushInterval time.Duration `json:"flushInterval"` // 刷新间隔 - MaxRetries int `json:"maxRetries"` // 最大重试次数 - RetryInterval time.Duration `json:"retryInterval"` // 重试间隔 + DataDir string `json:"dataDir"` // Persistence data directory + MaxFileSize int64 `json:"maxFileSize"` // Maximum file size + FlushInterval time.Duration `json:"flushInterval"` // Flush interval + MaxRetries int `json:"maxRetries"` // Maximum retry count + RetryInterval time.Duration `json:"retryInterval"` // Retry interval } -// ExpansionConfig 扩容配置 +// ExpansionConfig expansion configuration type ExpansionConfig struct { - GrowthFactor float64 `json:"growthFactor"` // 扩容因子 - MinIncrement int `json:"minIncrement"` // 最小扩容增量 - TriggerThreshold float64 `json:"triggerThreshold"` // 扩容触发阈值 - ExpansionTimeout time.Duration `json:"expansionTimeout"` // 扩容超时时间 + GrowthFactor float64 `json:"growthFactor"` // Growth factor + MinIncrement int `json:"minIncrement"` // Minimum expansion increment + TriggerThreshold float64 `json:"triggerThreshold"` // Expansion trigger threshold + ExpansionTimeout time.Duration `json:"expansionTimeout"` // Expansion timeout duration } -// WorkerConfig 工作池配置 +// WorkerConfig worker pool configuration type WorkerConfig struct { - SinkPoolSize int `json:"sinkPoolSize"` // Sink工作池大小 - SinkWorkerCount int `json:"sinkWorkerCount"` // Sink工作线程数 - MaxRetryRoutines int `json:"maxRetryRoutines"` // 最大重试协程数 + SinkPoolSize int `json:"sinkPoolSize"` // Sink pool size + SinkWorkerCount int `json:"sinkWorkerCount"` // Sink worker count + MaxRetryRoutines int `json:"maxRetryRoutines"` // Maximum retry routines } -// MonitoringConfig 监控配置 +// MonitoringConfig monitoring configuration type MonitoringConfig struct { - EnableMonitoring bool `json:"enableMonitoring"` // 是否启用性能监控 - StatsUpdateInterval time.Duration `json:"statsUpdateInterval"` // 统计信息更新间隔 - EnableDetailedStats bool `json:"enableDetailedStats"` // 是否启用详细统计 - WarningThresholds WarningThresholds `json:"warningThresholds"` // 性能警告阈值 + EnableMonitoring bool `json:"enableMonitoring"` // Enable performance monitoring + StatsUpdateInterval time.Duration `json:"statsUpdateInterval"` // Statistics update interval + EnableDetailedStats bool `json:"enableDetailedStats"` // Enable detailed statistics + WarningThresholds WarningThresholds `json:"warningThresholds"` // Performance warning thresholds } -// WarningThresholds 性能警告阈值 +// WarningThresholds performance warning thresholds type WarningThresholds struct { - DropRateWarning float64 `json:"dropRateWarning"` // 丢弃率警告阈值 - DropRateCritical float64 `json:"dropRateCritical"` // 丢弃率严重阈值 - BufferUsageWarning float64 `json:"bufferUsageWarning"` // 缓冲区使用率警告阈值 - BufferUsageCritical float64 `json:"bufferUsageCritical"` // 缓冲区使用率严重阈值 + DropRateWarning float64 `json:"dropRateWarning"` // Drop rate warning threshold + DropRateCritical float64 `json:"dropRateCritical"` // Drop rate critical threshold + BufferUsageWarning float64 `json:"bufferUsageWarning"` // Buffer usage warning threshold + BufferUsageCritical float64 `json:"bufferUsageCritical"` // Buffer usage critical threshold } -// NewConfig 创建默认配置 +// NewConfig creates default configuration func NewConfig() Config { return Config{ PerformanceConfig: DefaultPerformanceConfig(), } } -// NewConfigWithPerformance 创建带性能配置的Config +// NewConfigWithPerformance creates Config with performance configuration func NewConfigWithPerformance(perfConfig PerformanceConfig) Config { return Config{ PerformanceConfig: perfConfig, } } -// DefaultPerformanceConfig 默认性能配置 +// DefaultPerformanceConfig default performance configuration func DefaultPerformanceConfig() PerformanceConfig { return PerformanceConfig{ BufferConfig: BufferConfig{ @@ -185,7 +185,7 @@ func DefaultPerformanceConfig() PerformanceConfig { } } -// HighPerformanceConfig 高性能配置预设 +// HighPerformanceConfig high performance configuration preset func HighPerformanceConfig() PerformanceConfig { config := DefaultPerformanceConfig() config.BufferConfig.DataChannelSize = 50000 @@ -197,7 +197,7 @@ func HighPerformanceConfig() PerformanceConfig { return config } -// LowLatencyConfig 低延迟配置预设 +// LowLatencyConfig low latency configuration preset func LowLatencyConfig() PerformanceConfig { config := DefaultPerformanceConfig() config.BufferConfig.DataChannelSize = 1000 @@ -209,7 +209,7 @@ func LowLatencyConfig() PerformanceConfig { return config } -// ZeroDataLossConfig 零数据丢失配置预设 +// ZeroDataLossConfig zero data loss configuration preset func ZeroDataLossConfig() PerformanceConfig { config := DefaultPerformanceConfig() config.BufferConfig.DataChannelSize = 20000 @@ -218,11 +218,11 @@ func ZeroDataLossConfig() PerformanceConfig { config.BufferConfig.EnableDynamicResize = true config.OverflowConfig.Strategy = "block" config.OverflowConfig.AllowDataLoss = false - config.OverflowConfig.BlockTimeout = 0 // 无超时,永久阻塞 + config.OverflowConfig.BlockTimeout = 0 // no timeout, permanent blocking return config } -// PersistencePerformanceConfig 持久化配置预设 +// PersistencePerformanceConfig persistence configuration preset func PersistencePerformanceConfig() PerformanceConfig { config := DefaultPerformanceConfig() config.OverflowConfig.Strategy = "persist" diff --git a/types/doc.go b/types/doc.go new file mode 100644 index 0000000..9034eae --- /dev/null +++ b/types/doc.go @@ -0,0 +1,280 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package types provides core type definitions and data structures for StreamSQL. + +This package defines fundamental data types, configuration structures, and interfaces +used throughout the StreamSQL stream processing pipeline. It ensures type safety +and provides a unified API for data manipulation across components. + +# Core Features + +• Data Types - Core data structures for stream processing +• Configuration Management - Centralized configuration structures +• Type Safety - Strong typing with validation +• Serialization Support - JSON serialization support +• Cross-Component Compatibility - Shared types across packages + +# Configuration Structures + +Core configuration types: + + type Config struct { + WindowConfig WindowConfig // Window settings + GroupFields []string // GROUP BY fields + SelectFields map[string]aggregator.AggregateType // SELECT aggregations + FieldAlias map[string]string // Field aliases + SimpleFields []string // Non-aggregated fields + FieldExpressions map[string]FieldExpression // Computed expressions + Where string // WHERE clause + Having string // HAVING clause + NeedWindow bool // Window requirement + Distinct bool // DISTINCT flag + Limit int // LIMIT clause + PerformanceConfig PerformanceConfig // Performance settings + } + +# Window Configuration + +Unified configuration for all window types: + + type WindowConfig struct { + Type string // Window type + Params map[string]interface{} // Parameters + TsProp string // Timestamp property + TimeUnit time.Duration // Time unit + GroupByKey string // Grouping key + } + + // Example configurations + // Tumbling window + windowConfig := WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{ + "size": "5s", + }, + TsProp: "timestamp", + } + + // Sliding window + windowConfig := WindowConfig{ + Type: "sliding", + Params: map[string]interface{}{ + "size": "30s", + "slide": "10s", + }, + TsProp: "timestamp", + } + + // Counting window + windowConfig := WindowConfig{ + Type: "counting", + Params: map[string]interface{}{ + "count": 100, + }, + } + + // Session window + windowConfig := WindowConfig{ + Type: "session", + Params: map[string]interface{}{ + "timeout": "5m", + }, + GroupByKey: "user_id", + } + +# Performance Configuration + +Comprehensive performance tuning options: + + type PerformanceConfig struct { + // Buffer management + BufferSize int // Input buffer size + BatchSize int // Processing batch size + FlushInterval time.Duration // Automatic flush interval + HighWaterMark float64 // Buffer high water mark (0.0-1.0) + LowWaterMark float64 // Buffer low water mark (0.0-1.0) + + // Worker pool configuration + WorkerPoolSize int // Number of worker goroutines + MaxWorkers int // Maximum worker limit + WorkerIdleTime time.Duration // Worker idle timeout + + // Overflow handling + OverflowStrategy string // "drop", "block", "spill", "compress" + SpillDirectory string // Directory for spill files + CompressionLevel int // Compression level (1-9) + + // Memory management + MaxMemoryUsage int64 // Maximum memory usage in bytes + GCInterval time.Duration // Garbage collection interval + MemoryThreshold float64 // Memory usage threshold + + // Monitoring + MetricsEnabled bool // Enable metrics collection + MetricsInterval time.Duration // Metrics collection interval + HealthCheckPort int // Health check HTTP port + + // Persistence + PersistenceEnabled bool // Enable data persistence + PersistenceType string // "memory", "file", "database" + PersistencePath string // Persistence storage path + RecoveryEnabled bool // Enable automatic recovery + } + +# Field Management + +Advanced field handling and expression support: + + type FieldExpression struct { + Field string // Field name + Expression string // Expression + Fields []string // Referenced fields + } + + type Projection struct { + SourceType ProjectionSourceType // Source type (field, expression, aggregate) + Source string // Source identifier + Alias string // Output alias + DataType string // Expected data type + } + + type ProjectionSourceType string + + const ( + ProjectionSourceField ProjectionSourceType = "field" // Direct field reference + ProjectionSourceExpression ProjectionSourceType = "expression" // Computed expression + ProjectionSourceAggregate ProjectionSourceType = "aggregate" // Aggregate function + ProjectionSourceConstant ProjectionSourceType = "constant" // Constant value + ) + +# Data Row Representation + +Type-safe data row structures for stream processing: + + type Row struct { + Data map[string]interface{} // Row data + Timestamp time.Time // Row timestamp + Metadata map[string]interface{} // Additional metadata + GroupKey string // Grouping key for aggregation + WindowID string // Window identifier + } + + // Row creation and manipulation + func NewRow(data map[string]interface{}) *Row + func (r *Row) GetValue(field string) interface{} + func (r *Row) SetValue(field string, value interface{}) + func (r *Row) HasField(field string) bool + func (r *Row) Clone() *Row + +# Time Management + +Time-based data structures for window processing: + + type TimeSlot struct { + Start time.Time // Slot start time + End time.Time // Slot end time + Duration time.Duration // Slot duration + ID string // Unique slot identifier + } + + // Time slot operations + func NewTimeSlot(start time.Time, duration time.Duration) *TimeSlot + func (ts *TimeSlot) Contains(timestamp time.Time) bool + func (ts *TimeSlot) Overlaps(other *TimeSlot) bool + func (ts *TimeSlot) String() string + +# Configuration Presets + +Pre-defined configuration templates for common use cases: + + // High Performance Configuration + func NewHighPerformanceConfig() *PerformanceConfig { + return &PerformanceConfig{ + BufferSize: 50000, + BatchSize: 1000, + WorkerPoolSize: 8, + FlushInterval: 100 * time.Millisecond, + OverflowStrategy: "spill", + MetricsEnabled: true, + } + } + + // Low Latency Configuration + func NewLowLatencyConfig() *PerformanceConfig { + return &PerformanceConfig{ + BufferSize: 1000, + BatchSize: 10, + WorkerPoolSize: 2, + FlushInterval: 10 * time.Millisecond, + OverflowStrategy: "drop", + MetricsEnabled: false, + } + } + + // Zero Data Loss Configuration + func NewZeroDataLossConfig() *PerformanceConfig { + return &PerformanceConfig{ + BufferSize: 10000, + BatchSize: 100, + WorkerPoolSize: 4, + FlushInterval: time.Second, + OverflowStrategy: "block", + PersistenceEnabled: true, + RecoveryEnabled: true, + MetricsEnabled: true, + } + } + +# Usage Examples + +Basic configuration: + + config := &Config{ + WindowConfig: WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{"size": "5s"}, + }, + GroupFields: []string{"device_id"}, + SelectFields: map[string]aggregator.AggregateType{ + "temperature": aggregator.AggregateTypeAvg, + }, + NeedWindow: true, + } + +Data row operations: + + row := NewRow(map[string]interface{}{ + "device_id": "sensor001", + "temperature": 25.5, + }) + + deviceID := row.GetValue("device_id").(string) + row.SetValue("processed", true) + +# Integration + +Integrates with other StreamSQL components: + +• Stream Package - Core data types for stream processing +• Window Package - WindowConfig for window configurations +• Aggregator Package - AggregateType definitions +• Condition Package - Data structures for clause evaluation +• Functions Package - Type definitions for functions +• RSQL Package - Config structures for query execution +*/ +package types \ No newline at end of file diff --git a/types/row.go b/types/row.go index 030057b..ed4b162 100644 --- a/types/row.go +++ b/types/row.go @@ -30,7 +30,7 @@ type Row struct { Slot *TimeSlot } -// GetTimestamp 获取时间戳 +// GetTimestamp gets timestamp func (r *Row) GetTimestamp() time.Time { return r.Timestamp } diff --git a/types/timeslot.go b/types/timeslot.go index 743d3fa..5d445de 100644 --- a/types/timeslot.go +++ b/types/timeslot.go @@ -32,14 +32,14 @@ func NewTimeSlot(start, end *time.Time) *TimeSlot { } } -// Hash 生成槽位的哈希值 +// Hash generates slot hash value func (ts TimeSlot) Hash() uint64 { - // 将开始时间和结束时间转换为 Unix 时间戳(纳秒级) + // Convert start and end times to Unix timestamps (nanoseconds) startNano := ts.Start.UnixNano() endNano := ts.End.UnixNano() - // 使用简单但高效的哈希算法 - // 将两个时间戳组合成一个唯一的哈希值 + // Use simple but efficient hash algorithm + // Combine two timestamps into unique hash value hash := uint64(startNano) hash = (hash << 32) | (hash >> 32) hash = hash ^ uint64(endNano) @@ -47,7 +47,7 @@ func (ts TimeSlot) Hash() uint64 { return hash } -// Contains 检查给定时间是否在槽位范围内 +// Contains checks if given time is within slot range func (ts TimeSlot) Contains(t time.Time) bool { return (t.Equal(*ts.Start) || t.After(*ts.Start)) && t.Before(*ts.End) diff --git a/window/counting_window.go b/window/counting_window.go index 9d49dcb..53ccc04 100644 --- a/window/counting_window.go +++ b/window/counting_window.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package window import ( diff --git a/window/doc.go b/window/doc.go new file mode 100644 index 0000000..cfa1685 --- /dev/null +++ b/window/doc.go @@ -0,0 +1,228 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Package window provides windowing functionality for StreamSQL stream processing. + +This package implements various types of windows for aggregating streaming data +over time intervals or record counts. It supports tumbling, sliding, counting, +and session windows with efficient memory management and concurrent processing. + +# Core Features + +• Multiple Window Types - Tumbling, Sliding, Counting, and Session windows +• Time Management - Time-based window boundaries and event time processing +• Trigger Mechanisms - Triggering based on time, count, or custom conditions +• Memory Efficiency - Optimized data structures and memory management +• Concurrent Processing - Thread-safe operations +• Late Data Handling - Configurable policies for late-arriving data + +# Window Types + +Four distinct window types for different stream processing scenarios: + +• Tumbling Windows - Non-overlapping, fixed-size time windows +• Sliding Windows - Overlapping time windows with configurable slide interval +• Counting Windows - Count-based windows that trigger after N records +• Session Windows - Activity-based windows with configurable timeout + +# Window Interface + +All window types implement a unified Window interface: + + type Window interface { + Add(row types.Row) error // Add data to window + Reset() error // Reset window state + Start() error // Start window processing + OutputChan() <-chan []types.Row // Get output channel + SetCallback(func([]types.Row)) // Set callback function + Trigger() error // Manual trigger + } + +# Tumbling Windows + +Non-overlapping time-based windows: + + // Create tumbling window + config := types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{ + "size": "5s", // 5-second windows + }, + TsProp: "timestamp", + } + window, err := NewTumblingWindow(config) + + // Window characteristics: + // - Fixed size (e.g., 5 seconds) + // - No overlap between windows + // - Triggers at regular intervals + // - Memory efficient + // - Suitable for periodic aggregations + + // Example timeline: + // Window 1: [00:00 - 00:05) + // Window 2: [00:05 - 00:10) + // Window 3: [00:10 - 00:15) + +# Sliding Windows + +Overlapping time-based windows with configurable slide interval: + + // Create sliding window + config := types.WindowConfig{ + Type: "sliding", + Params: map[string]interface{}{ + "size": "30s", // 30-second window size + "slide": "10s", // 10-second slide interval + }, + TsProp: "timestamp", + } + window, err := NewSlidingWindow(config) + + // Window characteristics: + // - Fixed size with configurable slide + // - Overlapping windows + // - More frequent updates + // - Higher memory usage + // - Suitable for smooth trend analysis + + // Example timeline (30s window, 10s slide): + // Window 1: [00:00 - 00:30) + // Window 2: [00:10 - 00:40) + // Window 3: [00:20 - 00:50) + +# Counting Windows + +Count-based windows that trigger after a specified number of records: + + // Create counting window + config := types.WindowConfig{ + Type: "counting", + Params: map[string]interface{}{ + "count": 100, // Trigger every 100 records + }, + } + window, err := NewCountingWindow(config) + + // Window characteristics: + // - Fixed record count + // - Time-independent + // - Predictable memory usage + // - Suitable for batch processing + // - Handles variable data rates + + // Example: + // Window 1: Records 1-100 + // Window 2: Records 101-200 + // Window 3: Records 201-300 + +# Session Windows + +Activity-based windows with configurable session timeout: + + // Create session window + config := types.WindowConfig{ + Type: "session", + Params: map[string]interface{}{ + "timeout": "5m", // 5-minute session timeout + }, + GroupByKey: "user_id", // Group sessions by user + } + window, err := NewSessionWindow(config) + + // Window characteristics: + // - Variable window size + // - Activity-based triggers + // - Per-group session tracking + // - Automatic session expiration + // - Suitable for user behavior analysis + + // Example (5-minute timeout): + // User A: [10:00 - 10:15) - 15-minute session + // User B: [10:05 - 10:08) - 3-minute session + // User A: [10:20 - 10:25) - New 5-minute session + +# Window Factory + +Centralized window creation: + + func CreateWindow(config types.WindowConfig) (Window, error) + +# Time Management + +Time handling for window operations: + + func GetTimestamp(data interface{}, timeField string) (time.Time, error) + + type TimeSlot struct { + Start time.Time + End time.Time + Duration time.Duration + } + +# Performance Features + +• Memory Management - Efficient buffer management and garbage collection +• Concurrency - Thread-safe operations with minimal locking +• Time Efficiency - Optimized timestamp processing and timer management + +# Usage Examples + +Basic tumbling window: + + config := types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{"size": "10s"}, + TsProp: "timestamp", + } + window, err := CreateWindow(config) + window.SetCallback(func(results []types.Row) { + fmt.Printf("Window results: %d records\n", len(results)) + }) + window.Start() + +Sliding window: + + config := types.WindowConfig{ + Type: "sliding", + Params: map[string]interface{}{ + "size": "1m", + "slide": "10s", + }, + TsProp: "event_time", + } + window, err := NewSlidingWindow(config) + +Session window: + + config := types.WindowConfig{ + Type: "session", + Params: map[string]interface{}{"timeout": "30m"}, + GroupByKey: "user_id", + } + window, err := NewSessionWindow(config) + +# Integration + +Integrates with other StreamSQL components: + +• Stream package - Stream processing and data flow +• RSQL package - SQL-based window definitions +• Functions package - Aggregation functions for window results +• Types package - Shared data types and configuration +*/ +package window \ No newline at end of file diff --git a/window/factory.go b/window/factory.go index fc2f649..323ec13 100644 --- a/window/factory.go +++ b/window/factory.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package window import ( diff --git a/window/session_window.go b/window/session_window.go index 517c877..7194fac 100644 --- a/window/session_window.go +++ b/window/session_window.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package window import ( diff --git a/window/sliding_window.go b/window/sliding_window.go index 21ae38a..67ae00a 100644 --- a/window/sliding_window.go +++ b/window/sliding_window.go @@ -1,3 +1,19 @@ +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package window import ( diff --git a/window/tumbling_window.go b/window/tumbling_window.go index 113fc88..2b86560 100644 --- a/window/tumbling_window.go +++ b/window/tumbling_window.go @@ -1,4 +1,19 @@ -// Package window provides window operation implementations, including Tumbling Window +/* + * Copyright 2025 The RuleGo Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package window import (