From 343d0455542541c34ee7b3df2fe99474c3a34b21 Mon Sep 17 00:00:00 2001 From: rulego-team Date: Fri, 1 Aug 2025 18:55:32 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=E9=87=8D=E6=9E=84=20stream=20?= =?UTF-8?q?=E6=A8=A1=E5=9D=97=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/persistence/main.go | 75 +- options.go | 26 - stream/handler_data.go | 347 ++++ stream/handler_result.go | 155 ++ stream/manager_metrics.go | 78 + stream/metrics.go | 144 ++ stream/persistence.go | 796 ++++----- stream/processor_data.go | 484 ++++++ stream/processor_field.go | 571 +++++++ stream/stream.go | 1838 +-------------------- stream/stream_factory.go | 156 ++ stream/stream_field_test.go | 332 ++++ stream/stream_performance_test.go | 442 +++++ stream/stream_persistence_test.go | 309 ++++ stream/stream_test.go | 1018 ++---------- stream/stream_window_test.go | 244 +++ stream/unified_config_integration_test.go | 261 --- 17 files changed, 3929 insertions(+), 3347 deletions(-) create mode 100644 stream/handler_data.go create mode 100644 stream/handler_result.go create mode 100644 stream/manager_metrics.go create mode 100644 stream/metrics.go create mode 100644 stream/processor_data.go create mode 100644 stream/processor_field.go create mode 100644 stream/stream_factory.go create mode 100644 stream/stream_field_test.go create mode 100644 stream/stream_performance_test.go create mode 100644 stream/stream_persistence_test.go create mode 100644 stream/stream_window_test.go delete mode 100644 stream/unified_config_integration_test.go diff --git a/examples/persistence/main.go b/examples/persistence/main.go index 6fdeb4a..002c6f1 100644 --- a/examples/persistence/main.go +++ b/examples/persistence/main.go @@ -26,6 +26,12 @@ import ( "github.com/rulego/streamsql/types" ) +// 导入stream包中的常量 +const ( + StrategyDrop = stream.StrategyDrop +) + +// main 主函数,演示StreamSQL持久化功能的完整测试流程 func main() { fmt.Println("=== StreamSQL 持久化功能测试 ===") @@ -47,19 +53,38 @@ func main() { fmt.Println("✅ 真正持久化功能测试完成!") } +// testDataOverflowPersistence 测试数据溢出时的持久化功能 +// 通过创建小缓冲区并快速发送大量数据来触发溢出和持久化 func testDataOverflowPersistence() { config := types.Config{ SimpleFields: []string{"id", "value"}, } + overflowStrategy := "persist" + perfConfig := types.DefaultPerformanceConfig() + perfConfig.BufferConfig.DataChannelSize = 100 + perfConfig.BufferConfig.ResultChannelSize = 100 + perfConfig.WorkerConfig.SinkPoolSize = 50 + perfConfig.OverflowConfig.Strategy = overflowStrategy + perfConfig.OverflowConfig.BlockTimeout = 5 * time.Second + perfConfig.OverflowConfig.AllowDataLoss = (overflowStrategy == StrategyDrop) + // 配置持久化参数 + // 注意:当溢出策略设置为"persist"时,必须提供PersistenceConfig配置 + // 如果不提供此配置,系统会返回友好的错误提示和配置示例 + if overflowStrategy == "persist" { + perfConfig.OverflowConfig.PersistenceConfig = &types.PersistenceConfig{ + DataDir: "./streamsql_overflow_data", // 持久化数据存储目录 + MaxFileSize: 10 * 1024 * 1024, // 单个文件最大大小:10MB + FlushInterval: 5 * time.Second, // 数据刷新到磁盘的间隔:5秒 + MaxRetries: 3, // 持久化失败时的最大重试次数 + RetryInterval: 1 * time.Second, // 重试间隔:1秒 + } + } + config.PerformanceConfig = perfConfig // 创建小缓冲区的持久化流处理器 - stream, err := stream.NewStreamWithLossPolicy( + stream, err := stream.NewStreamWithCustomPerformance( config, - 100, // 很小的缓冲区,容易溢出 - 100, // 小结果缓冲区 - 50, // 小sink池 - "persist", // 持久化策略 - 5*time.Second, + perfConfig, ) if err != nil { fmt.Printf("创建流失败: %v\n", err) @@ -102,19 +127,38 @@ func testDataOverflowPersistence() { stream.Stop() } +// testDataRecovery 测试程序重启后的数据恢复功能 +// 模拟程序重启,加载之前持久化的数据并重新处理 func testDataRecovery() { config := types.Config{ SimpleFields: []string{"id", "value"}, } + overflowStrategy := "persist" + perfConfig := types.DefaultPerformanceConfig() + perfConfig.BufferConfig.DataChannelSize = 200 + perfConfig.BufferConfig.ResultChannelSize = 200 + perfConfig.WorkerConfig.SinkPoolSize = 100 + perfConfig.OverflowConfig.Strategy = overflowStrategy + perfConfig.OverflowConfig.BlockTimeout = 5 * time.Second + perfConfig.OverflowConfig.AllowDataLoss = (overflowStrategy == StrategyDrop) + // 配置持久化参数 + // 注意:当溢出策略设置为"persist"时,必须提供PersistenceConfig配置 + // 如果不提供此配置,系统会返回友好的错误提示和配置示例 + if overflowStrategy == "persist" { + perfConfig.OverflowConfig.PersistenceConfig = &types.PersistenceConfig{ + DataDir: "./streamsql_overflow_data", // 持久化数据存储目录 + MaxFileSize: 10 * 1024 * 1024, // 单个文件最大大小:10MB + FlushInterval: 5 * time.Second, // 数据刷新到磁盘的间隔:5秒 + MaxRetries: 3, // 持久化失败时的最大重试次数 + RetryInterval: 1 * time.Second, // 重试间隔:1秒 + } + } + config.PerformanceConfig = perfConfig // 创建新的持久化流处理器(模拟程序重启) - stream, err := stream.NewStreamWithLossPolicy( + stream, err := stream.NewStreamWithCustomPerformance( config, - 200, // 更大的缓冲区用于恢复 - 200, - 100, - "persist", // 持久化策略 - 5*time.Second, + perfConfig, ) if err != nil { fmt.Printf("创建流失败: %v\n", err) @@ -148,6 +192,8 @@ func testDataRecovery() { stream.Stop() } +// analyzePersistenceFiles 分析持久化文件的内容和统计信息 +// 检查持久化目录中的文件,显示文件大小和内容预览 func analyzePersistenceFiles() { dataDir := "./streamsql_overflow_data" @@ -187,6 +233,9 @@ func analyzePersistenceFiles() { } } +// showFileContent 显示指定文件的前几行内容 +// filename: 要读取的文件路径 +// maxLines: 最大显示行数 func showFileContent(filename string, maxLines int) { file, err := os.Open(filename) if err != nil { @@ -225,6 +274,8 @@ func showFileContent(filename string, maxLines int) { } } +// cleanupTestData 清理测试产生的持久化数据 +// 删除测试目录及其所有内容,为新的测试做准备 func cleanupTestData() { dataDir := "./streamsql_overflow_data" if err := os.RemoveAll(dataDir); err != nil { diff --git a/options.go b/options.go index 6b26a81..9881b27 100644 --- a/options.go +++ b/options.go @@ -145,29 +145,3 @@ func WithMonitoring(updateInterval time.Duration, enableDetailedStats bool) Opti s.customConfig = &config } } - -// Deprecated Options - 向后兼容,但建议使用新的配置方式 - -// WithBuffers 设置缓冲区大小 (已弃用,使用WithBufferSizes) -// Deprecated: 使用WithBufferSizes替代 -func WithBuffers(dataBufSize, resultBufSize, sinkPoolSize int) Option { - return WithBufferSizes(dataBufSize, resultBufSize, 1000) -} - -// WithHighPerf 启用高性能模式 (已弃用,使用WithHighPerformance) -// Deprecated: 使用WithHighPerformance替代 -func WithHighPerf() Option { - return WithHighPerformance() -} - -// WithOverflowPolicy 设置溢出策略 (已弃用,使用WithOverflowStrategy) -// Deprecated: 使用WithOverflowStrategy替代 -func WithOverflowPolicy(strategy string, timeout time.Duration) Option { - return WithOverflowStrategy(strategy, timeout) -} - -// WithPersistenceConfig 设置持久化配置 (已弃用,使用WithCustomPersistence) -// Deprecated: 使用WithCustomPersistence替代 -func WithPersistenceConfig(dataDir string, maxFileSize int64, flushInterval time.Duration) Option { - return WithCustomPersistence(dataDir, maxFileSize, flushInterval) -} diff --git a/stream/handler_data.go b/stream/handler_data.go new file mode 100644 index 0000000..fc0ca01 --- /dev/null +++ b/stream/handler_data.go @@ -0,0 +1,347 @@ +package stream + +import ( + "sync/atomic" + "time" + + "github.com/rulego/streamsql/logger" +) + +// DataHandler 数据处理器,负责不同策略的数据添加 +type DataHandler struct { + stream *Stream +} + +// NewDataHandler 创建数据处理器 +func NewDataHandler(stream *Stream) *DataHandler { + return &DataHandler{stream: stream} +} + +// addDataBlocking 阻塞模式添加数据,保证零数据丢失 +func (s *Stream) addDataBlocking(data interface{}) { + if s.blockingTimeout <= 0 { + // 无超时限制,永久阻塞直到成功 + dataChan := s.safeGetDataChan() + dataChan <- data + return + } + + // 带超时的阻塞 + timer := time.NewTimer(s.blockingTimeout) + defer timer.Stop() + + dataChan := s.safeGetDataChan() + select { + case dataChan <- data: + // 成功添加数据 + return + case <-timer.C: + // 超时但不丢弃数据,记录错误但继续阻塞 + logger.Error("Data addition timeout, but continue waiting to avoid data loss") + // 继续无限期阻塞,重新获取当前通道引用 + finalDataChan := s.safeGetDataChan() + finalDataChan <- data + } +} + +// addDataWithExpansion 动态扩容模式 +func (s *Stream) addDataWithExpansion(data interface{}) { + // 首次尝试添加数据 + if s.safeSendToDataChan(data) { + return + } + + // 通道满了,动态扩容 + s.expandDataChannel() + + // 扩容后重试,重新获取通道引用 + if s.safeSendToDataChan(data) { + logger.Debug("Successfully added data after data channel expansion") + return + } + + // 如果扩容后仍然满,则阻塞等待 + dataChan := s.safeGetDataChan() + dataChan <- data +} + +// addDataWithPersistence 持久化模式 +func (s *Stream) addDataWithPersistence(data interface{}) { + // 首次尝试添加数据 + if s.safeSendToDataChan(data) { + return + } + + // 通道满了,持久化到磁盘 + if s.persistenceManager != nil { + if err := s.persistenceManager.PersistData(data); err != nil { + logger.Error("Failed to persist data: %v", err) + atomic.AddInt64(&s.droppedCount, 1) + } else { + logger.Debug("Data has been persisted to disk") + } + } else { + logger.Error("Persistence manager not initialized, data will be lost") + atomic.AddInt64(&s.droppedCount, 1) + } + + // 启动异步重试 + go s.persistAndRetryData(data) +} + +// addDataWithDrop 丢弃模式 +func (s *Stream) addDataWithDrop(data interface{}) { + // 智能非阻塞添加,分层背压控制 + if s.safeSendToDataChan(data) { + return + } + + // 数据通道已满,使用分层背压策略,获取通道状态 + s.dataChanMux.RLock() + chanLen := len(s.dataChan) + chanCap := cap(s.dataChan) + currentDataChan := s.dataChan + s.dataChanMux.RUnlock() + + usage := float64(chanLen) / float64(chanCap) + + // 根据通道使用率和缓冲区大小调整策略 + var waitTime time.Duration + var maxRetries int + + switch { + case chanCap >= 100000: // 超大缓冲区(基准测试模式) + switch { + case usage > 0.99: + waitTime = 1 * time.Millisecond // 更长等待 + maxRetries = 3 + case usage > 0.95: + waitTime = 500 * time.Microsecond + maxRetries = 2 + case usage > 0.90: + waitTime = 100 * time.Microsecond + maxRetries = 1 + default: + // 立即丢弃 + logger.Warn("Data channel is full, dropping input data") + atomic.AddInt64(&s.droppedCount, 1) + return + } + + case chanCap >= 50000: // 高性能模式 + switch { + case usage > 0.99: + waitTime = 500 * time.Microsecond + maxRetries = 2 + case usage > 0.95: + waitTime = 200 * time.Microsecond + maxRetries = 1 + case usage > 0.90: + waitTime = 50 * time.Microsecond + maxRetries = 1 + default: + logger.Warn("Data channel is full, dropping input data") + atomic.AddInt64(&s.droppedCount, 1) + return + } + + default: // 默认模式 + switch { + case usage > 0.99: + waitTime = 100 * time.Microsecond + maxRetries = 1 + case usage > 0.95: + waitTime = 50 * time.Microsecond + maxRetries = 1 + default: + logger.Warn("Data channel is full, dropping input data") + atomic.AddInt64(&s.droppedCount, 1) + return + } + } + + // 多次重试添加数据,使用线程安全的方式 + for retry := 0; retry < maxRetries; retry++ { + timer := time.NewTimer(waitTime) + select { + case currentDataChan <- data: + // 重试成功 + timer.Stop() + return + case <-timer.C: + // 超时,继续下一次重试或者丢弃 + if retry == maxRetries-1 { + // 最后一次重试失败,记录丢弃 + logger.Warn("Data channel is full, dropping input data") + atomic.AddInt64(&s.droppedCount, 1) + } + } + } +} + +// safeGetDataChan 线程安全地获取dataChan引用 +func (s *Stream) safeGetDataChan() chan interface{} { + s.dataChanMux.RLock() + defer s.dataChanMux.RUnlock() + return s.dataChan +} + +// safeSendToDataChan 线程安全地向dataChan发送数据 +func (s *Stream) safeSendToDataChan(data interface{}) bool { + dataChan := s.safeGetDataChan() + select { + case dataChan <- data: + return true + default: + return false + } +} + +// expandDataChannel 动态扩容数据通道 +func (s *Stream) expandDataChannel() { + // 使用原子操作检查是否正在扩容,防止并发扩容 + if !atomic.CompareAndSwapInt32(&s.expanding, 0, 1) { + logger.Debug("Channel expansion already in progress, skipping") + return + } + defer atomic.StoreInt32(&s.expanding, 0) + + // 获取扩容锁,确保只有一个协程进行扩容 + s.expansionMux.Lock() + defer s.expansionMux.Unlock() + + // 再次检查是否需要扩容(双重检查锁定模式) + s.dataChanMux.RLock() + oldCap := cap(s.dataChan) + currentLen := len(s.dataChan) + s.dataChanMux.RUnlock() + + // 如果当前通道使用率低于80%,则不需要扩容 + if float64(currentLen)/float64(oldCap) < 0.8 { + logger.Debug("Channel usage below threshold, expansion not needed") + return + } + + newCap := int(float64(oldCap) * 1.5) // 扩容50% + if newCap < oldCap+1000 { + newCap = oldCap + 1000 // 至少增加1000 + } + + logger.Debug("Dynamic expansion of data channel: %d -> %d", oldCap, newCap) + + // 创建新的更大的通道 + newChan := make(chan interface{}, newCap) + + // 使用写锁安全地迁移数据 + s.dataChanMux.Lock() + oldChan := s.dataChan + + // 将旧通道中的数据快速迁移到新通道 + migrationTimeout := time.NewTimer(5 * time.Second) // 5秒迁移超时 + defer migrationTimeout.Stop() + + migratedCount := 0 + for { + select { + case data := <-oldChan: + select { + case newChan <- data: + migratedCount++ + case <-migrationTimeout.C: + logger.Warn("Data migration timeout, some data may be lost during expansion") + goto migration_done + } + case <-migrationTimeout.C: + logger.Warn("Data migration timeout during channel drain") + goto migration_done + default: + // 旧通道为空,迁移完成 + goto migration_done + } + } + +migration_done: + // 原子性地更新通道引用 + s.dataChan = newChan + s.dataChanMux.Unlock() + + logger.Debug("Channel expansion completed: migrated %d items", migratedCount) +} + +// persistAndRetryData 持久化数据并重试 (改进版本,具备指数退避和资源控制) +func (s *Stream) persistAndRetryData(data interface{}) { + // 检查活跃重试协程数量,防止资源泄漏 + currentRetries := atomic.LoadInt32(&s.activeRetries) + if currentRetries >= s.maxRetryRoutines { + logger.Warn("Maximum retry routines reached (%d), dropping data", currentRetries) + atomic.AddInt64(&s.droppedCount, 1) + return + } + + // 增加活跃重试计数 + atomic.AddInt32(&s.activeRetries, 1) + defer atomic.AddInt32(&s.activeRetries, -1) + + // 使用指数退避策略 + baseInterval := 50 * time.Millisecond + maxInterval := 2 * time.Second + maxRetries := 10 // 减少最大重试次数,防止长时间阻塞 + totalTimeout := 30 * time.Second // 总超时时间 + + retryTimer := time.NewTimer(totalTimeout) + defer retryTimer.Stop() + + for attempt := 0; attempt < maxRetries; attempt++ { + // 计算当前重试间隔(指数退避) + currentInterval := time.Duration(float64(baseInterval) * (1.5 * float64(attempt))) + if currentInterval > maxInterval { + currentInterval = maxInterval + } + + // 等待重试间隔 + waitTimer := time.NewTimer(currentInterval) + select { + case <-waitTimer.C: + // 继续重试 + case <-retryTimer.C: + waitTimer.Stop() + logger.Warn("Persistence retry timeout reached, dropping data") + atomic.AddInt64(&s.droppedCount, 1) + return + case <-s.done: + waitTimer.Stop() + logger.Debug("Stream stopped during retry, dropping data") + atomic.AddInt64(&s.droppedCount, 1) + return + } + waitTimer.Stop() + + // 使用线程安全方式尝试发送数据 + s.dataChanMux.RLock() + currentDataChan := s.dataChan + s.dataChanMux.RUnlock() + + select { + case currentDataChan <- data: + logger.Debug("Persistence data retry successful: attempt %d", attempt+1) + return + case <-retryTimer.C: + logger.Warn("Persistence retry timeout during send, dropping data") + atomic.AddInt64(&s.droppedCount, 1) + return + case <-s.done: + logger.Debug("Stream stopped during retry send, dropping data") + atomic.AddInt64(&s.droppedCount, 1) + return + default: + // 通道仍然满,继续下一次重试 + if attempt == maxRetries-1 { + logger.Error("Persistence data retry failed after %d attempts, dropping data", maxRetries) + atomic.AddInt64(&s.droppedCount, 1) + } else { + logger.Debug("Persistence retry attempt %d/%d failed, will retry with interval %v", + attempt+1, maxRetries, currentInterval) + } + } + } +} diff --git a/stream/handler_result.go b/stream/handler_result.go new file mode 100644 index 0000000..9c65a16 --- /dev/null +++ b/stream/handler_result.go @@ -0,0 +1,155 @@ +package stream + +import ( + "sync/atomic" + + "github.com/rulego/streamsql/logger" +) + +// ResultHandler 结果处理器,负责处理结果输出和Sink调用 +type ResultHandler struct { + stream *Stream +} + +// NewResultHandler 创建结果处理器 +func NewResultHandler(stream *Stream) *ResultHandler { + return &ResultHandler{stream: stream} +} + +// startSinkWorkerPool 启动Sink工作池,支持配置工作线程数 +func (s *Stream) startSinkWorkerPool(workerCount int) { + // 使用配置的工作线程数 + if workerCount <= 0 { + workerCount = 8 // 默认值 + } + + for i := 0; i < workerCount; i++ { + go func(workerID int) { + for { + select { + case task := <-s.sinkWorkerPool: + // 执行sink任务 + func() { + defer func() { + // 增强错误恢复,防止单个worker崩溃 + if r := recover(); r != nil { + logger.Error("Sink worker %d panic recovered: %v", workerID, r) + } + }() + task() + }() + case <-s.done: + return + } + } + }(i) + } +} + +// startResultConsumer 启动自动结果消费者,防止resultChan阻塞 +func (s *Stream) startResultConsumer() { + for { + select { + case <-s.resultChan: + // 自动消费结果,防止通道阻塞 + // 这是一个保底机制,确保即使没有外部消费者,系统也不会阻塞 + case <-s.done: + return + } + } +} + +// sendResultNonBlocking 非阻塞方式发送结果到resultChan (智能背压控制) +func (s *Stream) sendResultNonBlocking(results []map[string]interface{}) { + select { + case s.resultChan <- results: + // 成功发送到结果通道 + atomic.AddInt64(&s.outputCount, 1) + default: + // 结果通道已满,使用智能背压控制策略 + s.handleResultChannelBackpressure(results) + } +} + +// handleResultChannelBackpressure 处理结果通道背压 +func (s *Stream) handleResultChannelBackpressure(results []map[string]interface{}) { + chanLen := len(s.resultChan) + chanCap := cap(s.resultChan) + + // 如果通道使用率超过90%,进入背压模式 + if float64(chanLen)/float64(chanCap) > 0.9 { + // 尝试清理一些旧数据,为新数据腾出空间 + select { + case <-s.resultChan: + // 清理一个旧结果,然后尝试添加新结果 + select { + case s.resultChan <- results: + atomic.AddInt64(&s.outputCount, 1) + default: + logger.Warn("Result channel is full, dropping result data") + atomic.AddInt64(&s.droppedCount, 1) + } + default: + logger.Warn("Result channel is full, dropping result data") + atomic.AddInt64(&s.droppedCount, 1) + } + } else { + logger.Warn("Result channel is full, dropping result data") + atomic.AddInt64(&s.droppedCount, 1) + } +} + +// callSinksAsync 异步调用所有sink函数 +func (s *Stream) callSinksAsync(results []map[string]interface{}) { + // 使用读锁安全地访问sinks切片 + s.sinksMux.RLock() + defer s.sinksMux.RUnlock() + + if len(s.sinks) == 0 { + return + } + + // 直接遍历sinks切片,避免复制开销 + // 由于submitSinkTask是异步的,不会长时间持有锁 + for _, sink := range s.sinks { + s.submitSinkTask(sink, results) + } +} + +// submitSinkTask 提交Sink任务 +func (s *Stream) submitSinkTask(sink func(interface{}), results []map[string]interface{}) { + // 捕获sink变量,避免闭包问题 + currentSink := sink + + // 提交任务到工作池 + task := func() { + defer func() { + // 恢复panic,防止单个sink错误影响整个系统 + if r := recover(); r != nil { + logger.Error("Sink execution exception: %v", r) + } + }() + currentSink(results) + } + + // 非阻塞提交任务 + select { + case s.sinkWorkerPool <- task: + // 成功提交任务 + default: + // 工作池已满,直接在当前goroutine执行(降级处理) + go task() + } +} + +// AddSink 添加Sink函数 +func (s *Stream) AddSink(sink func(interface{})) { + s.sinksMux.Lock() + defer s.sinksMux.Unlock() + s.sinks = append(s.sinks, sink) +} + +// GetResultsChan 获取结果通道 +func (s *Stream) GetResultsChan() <-chan interface{} { + return s.resultChan +} diff --git a/stream/manager_metrics.go b/stream/manager_metrics.go new file mode 100644 index 0000000..b5a87ff --- /dev/null +++ b/stream/manager_metrics.go @@ -0,0 +1,78 @@ +package stream + +import ( + "sync/atomic" +) + +// StatsManager 统计信息管理器 +type StatsManager struct { + stream *Stream + statsCollector *StatsCollector +} + +// NewStatsManager 创建统计信息管理器 +func NewStatsManager(stream *Stream) *StatsManager { + return &StatsManager{ + stream: stream, + statsCollector: NewStatsCollector(), + } +} + +// GetStats 获取流处理统计信息 (线程安全版本) +func (s *Stream) GetStats() map[string]int64 { + // 线程安全地获取dataChan状态 + s.dataChanMux.RLock() + dataChanLen := int64(len(s.dataChan)) + dataChanCap := int64(cap(s.dataChan)) + s.dataChanMux.RUnlock() + + return map[string]int64{ + InputCount: atomic.LoadInt64(&s.inputCount), + OutputCount: atomic.LoadInt64(&s.outputCount), + DroppedCount: atomic.LoadInt64(&s.droppedCount), + DataChanLen: dataChanLen, + DataChanCap: dataChanCap, + ResultChanLen: int64(len(s.resultChan)), + ResultChanCap: int64(cap(s.resultChan)), + SinkPoolLen: int64(len(s.sinkWorkerPool)), + SinkPoolCap: int64(cap(s.sinkWorkerPool)), + ActiveRetries: int64(atomic.LoadInt32(&s.activeRetries)), + Expanding: int64(atomic.LoadInt32(&s.expanding)), + } +} + +// GetDetailedStats 获取详细的性能统计信息 +func (s *Stream) GetDetailedStats() map[string]interface{} { + basicStats := s.GetStats() + + // 计算使用率 + dataUsage := float64(basicStats[DataChanLen]) / float64(basicStats[DataChanCap]) * 100 + resultUsage := float64(basicStats[ResultChanLen]) / float64(basicStats[ResultChanCap]) * 100 + sinkUsage := float64(basicStats[SinkPoolLen]) / float64(basicStats[SinkPoolCap]) * 100 + + // 计算效率指标 + var processRate float64 = 100.0 + var dropRate float64 = 0.0 + + if basicStats[InputCount] > 0 { + processRate = float64(basicStats[OutputCount]) / float64(basicStats[InputCount]) * 100 + dropRate = float64(basicStats[DroppedCount]) / float64(basicStats[InputCount]) * 100 + } + + return map[string]interface{}{ + BasicStats: basicStats, + DataChanUsage: dataUsage, + ResultChanUsage: resultUsage, + SinkPoolUsage: sinkUsage, + ProcessRate: processRate, + DropRate: dropRate, + PerformanceLevel: AssessPerformanceLevel(dataUsage, dropRate), + } +} + +// ResetStats 重置统计信息 +func (s *Stream) ResetStats() { + atomic.StoreInt64(&s.inputCount, 0) + atomic.StoreInt64(&s.outputCount, 0) + atomic.StoreInt64(&s.droppedCount, 0) +} \ No newline at end of file diff --git a/stream/metrics.go b/stream/metrics.go new file mode 100644 index 0000000..6569114 --- /dev/null +++ b/stream/metrics.go @@ -0,0 +1,144 @@ +package stream + +import ( + "sync/atomic" +) + +// 统计信息字段常量 +const ( + InputCount = "input_count" + OutputCount = "output_count" + DroppedCount = "dropped_count" + DataChanLen = "data_chan_len" + DataChanCap = "data_chan_cap" + ResultChanLen = "result_chan_len" + ResultChanCap = "result_chan_cap" + SinkPoolLen = "sink_pool_len" + SinkPoolCap = "sink_pool_cap" + ActiveRetries = "active_retries" + Expanding = "expanding" +) + +// 详细统计信息字段常量 +const ( + BasicStats = "basic_stats" + DataChanUsage = "data_chan_usage" + ResultChanUsage = "result_chan_usage" + SinkPoolUsage = "sink_pool_usage" + ProcessRate = "process_rate" + DropRate = "drop_rate" + PerformanceLevel = "performance_level" +) + +// 性能级别常量已在 stream.go 中定义 + +// AssessPerformanceLevel 评估当前性能水平 +// 根据数据使用率和丢弃率评估流处理的性能等级 +func AssessPerformanceLevel(dataUsage, dropRate float64) string { + switch { + case dropRate > 50: + return PerformanceLevelCritical // 严重性能问题 + case dropRate > 20: + return PerformanceLevelWarning // 性能警告 + case dataUsage > 90: + return PerformanceLevelHighLoad // 高负载 + case dataUsage > 70: + return PerformanceLevelModerateLoad // 中等负载 + default: + return PerformanceLevelOptimal // 最佳状态 + } +} + +// StatsCollector 统计信息收集器 +// 提供线程安全的统计信息收集功能 +type StatsCollector struct { + inputCount int64 + outputCount int64 + droppedCount int64 +} + +// NewStatsCollector 创建新的统计信息收集器 +func NewStatsCollector() *StatsCollector { + return &StatsCollector{} +} + +// IncrementInput 增加输入计数 +func (sc *StatsCollector) IncrementInput() { + atomic.AddInt64(&sc.inputCount, 1) +} + +// IncrementOutput 增加输出计数 +func (sc *StatsCollector) IncrementOutput() { + atomic.AddInt64(&sc.outputCount, 1) +} + +// IncrementDropped 增加丢弃计数 +func (sc *StatsCollector) IncrementDropped() { + atomic.AddInt64(&sc.droppedCount, 1) +} + +// GetInputCount 获取输入计数 +func (sc *StatsCollector) GetInputCount() int64 { + return atomic.LoadInt64(&sc.inputCount) +} + +// GetOutputCount 获取输出计数 +func (sc *StatsCollector) GetOutputCount() int64 { + return atomic.LoadInt64(&sc.outputCount) +} + +// GetDroppedCount 获取丢弃计数 +func (sc *StatsCollector) GetDroppedCount() int64 { + return atomic.LoadInt64(&sc.droppedCount) +} + +// Reset 重置统计信息 +func (sc *StatsCollector) Reset() { + atomic.StoreInt64(&sc.inputCount, 0) + atomic.StoreInt64(&sc.outputCount, 0) + atomic.StoreInt64(&sc.droppedCount, 0) +} + +// GetBasicStats 获取基础统计信息 +func (sc *StatsCollector) GetBasicStats(dataChanLen, dataChanCap, resultChanLen, resultChanCap, sinkPoolLen, sinkPoolCap int, activeRetries, expanding int32) map[string]int64 { + return map[string]int64{ + InputCount: sc.GetInputCount(), + OutputCount: sc.GetOutputCount(), + DroppedCount: sc.GetDroppedCount(), + DataChanLen: int64(dataChanLen), + DataChanCap: int64(dataChanCap), + ResultChanLen: int64(resultChanLen), + ResultChanCap: int64(resultChanCap), + SinkPoolLen: int64(sinkPoolLen), + SinkPoolCap: int64(sinkPoolCap), + ActiveRetries: int64(activeRetries), + Expanding: int64(expanding), + } +} + +// GetDetailedStats 获取详细的性能统计信息 +func (sc *StatsCollector) GetDetailedStats(basicStats map[string]int64) map[string]interface{} { + // 计算使用率 + dataUsage := float64(basicStats[DataChanLen]) / float64(basicStats[DataChanCap]) * 100 + resultUsage := float64(basicStats[ResultChanLen]) / float64(basicStats[ResultChanCap]) * 100 + sinkUsage := float64(basicStats[SinkPoolLen]) / float64(basicStats[SinkPoolCap]) * 100 + + // 计算效率指标 + var processRate float64 = 100.0 + var dropRate float64 = 0.0 + + if basicStats[InputCount] > 0 { + processRate = float64(basicStats[OutputCount]) / float64(basicStats[InputCount]) * 100 + dropRate = float64(basicStats[DroppedCount]) / float64(basicStats[InputCount]) * 100 + } + + return map[string]interface{}{ + BasicStats: basicStats, + DataChanUsage: dataUsage, + ResultChanUsage: resultUsage, + SinkPoolUsage: sinkUsage, + ProcessRate: processRate, + DropRate: dropRate, + PerformanceLevel: AssessPerformanceLevel(dataUsage, dropRate), + } +} \ No newline at end of file diff --git a/stream/persistence.go b/stream/persistence.go index 3004fed..9fc3215 100644 --- a/stream/persistence.go +++ b/stream/persistence.go @@ -1,398 +1,398 @@ -package stream - -import ( - "bufio" - "encoding/json" - "fmt" - "os" - "path/filepath" - "sync" - "time" - - "github.com/rulego/streamsql/logger" -) - -// PersistenceManager 数据持久化管理器 -type PersistenceManager struct { - dataDir string // 持久化数据目录 - maxFileSize int64 // 单个文件最大大小(字节) - flushInterval time.Duration // 刷新间隔 - currentFile *os.File // 当前写入文件 - currentSize int64 // 当前文件大小 - fileIndex int // 文件索引 - writeMutex sync.Mutex // 写入互斥锁 - flushTimer *time.Timer // 刷新定时器 - pendingData []interface{} // 待写入数据 - pendingMutex sync.Mutex // 待写入数据互斥锁 - isRunning bool // 是否运行中 - runningMutex sync.RWMutex // 保护isRunning字段的读写锁 - stopChan chan struct{} // 停止通道 - - // 统计信息 (新增) - totalPersisted int64 - totalLoaded int64 - filesCreated int64 -} - -// NewPersistenceManager 创建默认配置的持久化管理器 -func NewPersistenceManager(dataDir string) *PersistenceManager { - pm := &PersistenceManager{ - dataDir: dataDir, - maxFileSize: 10 * 1024 * 1024, // 10MB per file - flushInterval: 5 * time.Second, // 5秒刷新一次 - fileIndex: 0, - pendingData: make([]interface{}, 0), - stopChan: make(chan struct{}), - } - - // 确保数据目录存在 - if err := os.MkdirAll(dataDir, 0755); err != nil { - logger.Error("Failed to create persistence directory: %v", err) - } - - return pm -} - -// NewPersistenceManagerWithConfig 创建自定义配置的持久化管理器 -func NewPersistenceManagerWithConfig(dataDir string, maxFileSize int64, flushInterval time.Duration) *PersistenceManager { - pm := &PersistenceManager{ - dataDir: dataDir, - maxFileSize: maxFileSize, - flushInterval: flushInterval, - fileIndex: 0, - pendingData: make([]interface{}, 0), - stopChan: make(chan struct{}), - } - - // 确保数据目录存在 - if err := os.MkdirAll(dataDir, 0755); err != nil { - logger.Error("Failed to create persistence directory: %v", err) - } - - return pm -} - -// Start 启动持久化管理器 -func (pm *PersistenceManager) Start() error { - // 检查是否已经在运行 - pm.runningMutex.RLock() - running := pm.isRunning - pm.runningMutex.RUnlock() - - if running { - return fmt.Errorf("persistence manager already running") - } - - // 创建初始文件 - pm.writeMutex.Lock() - if err := pm.createNewFile(); err != nil { - pm.writeMutex.Unlock() - return fmt.Errorf("failed to create initial file: %w", err) - } - pm.writeMutex.Unlock() - - // 设置运行状态 - pm.runningMutex.Lock() - pm.isRunning = true - pm.runningMutex.Unlock() - - // 启动定时刷新 - pm.startFlushTimer() - - // 启动后台处理协程 - go pm.backgroundProcessor() - - logger.Info("Persistence manager started successfully, data directory: %s", pm.dataDir) - return nil -} - -// Stop 停止持久化管理器 -func (pm *PersistenceManager) Stop() error { - // 检查是否正在运行 - pm.runningMutex.RLock() - running := pm.isRunning - pm.runningMutex.RUnlock() - - if !running { - return nil - } - - // 设置停止状态 - pm.runningMutex.Lock() - pm.isRunning = false - pm.runningMutex.Unlock() - - close(pm.stopChan) - - // 停止定时器 - pm.writeMutex.Lock() - if pm.flushTimer != nil { - pm.flushTimer.Stop() - } - pm.writeMutex.Unlock() - - // 刷新剩余数据 - pm.flushPendingData() - - // 关闭当前文件 - pm.writeMutex.Lock() - if pm.currentFile != nil { - pm.currentFile.Close() - pm.currentFile = nil - } - pm.writeMutex.Unlock() - - logger.Info("Persistence manager stopped") - return nil -} - -// PersistData 持久化单条数据 -func (pm *PersistenceManager) PersistData(data interface{}) error { - // 检查是否正在运行 - pm.runningMutex.RLock() - running := pm.isRunning - pm.runningMutex.RUnlock() - - if !running { - return fmt.Errorf("persistence manager not running") - } - - pm.pendingMutex.Lock() - pm.pendingData = append(pm.pendingData, data) - pm.pendingMutex.Unlock() - - return nil -} - -// LoadPersistedData 加载并删除持久化数据 -func (pm *PersistenceManager) LoadPersistedData() ([]interface{}, error) { - files, err := filepath.Glob(filepath.Join(pm.dataDir, "streamsql_overflow_*.log")) - if err != nil { - return nil, fmt.Errorf("failed to glob files: %w", err) - } - - var allData []interface{} - - for _, filename := range files { - data, err := pm.loadDataFromFile(filename) - if err != nil { - logger.Error("Failed to load file %s: %v", filename, err) - continue - } - allData = append(allData, data...) - - // 在锁保护下更新统计信息 - pm.writeMutex.Lock() - pm.totalLoaded += int64(len(data)) - pm.writeMutex.Unlock() - - // 加载后删除文件 - if err := os.Remove(filename); err != nil { - logger.Error("Failed to delete loaded file %s: %v", filename, err) - } - } - - logger.Info("Loaded %d data records from persistence files", len(allData)) - return allData, nil -} - -// GetStats 获取持久化统计信息 -func (pm *PersistenceManager) GetStats() map[string]interface{} { - pm.pendingMutex.Lock() - pendingCount := len(pm.pendingData) - pm.pendingMutex.Unlock() - - pm.writeMutex.Lock() - currentFileSize := pm.currentSize - fileIndex := pm.fileIndex - totalPersisted := pm.totalPersisted - totalLoaded := pm.totalLoaded - filesCreated := pm.filesCreated - pm.writeMutex.Unlock() - - // 安全地读取运行状态 - pm.runningMutex.RLock() - running := pm.isRunning - pm.runningMutex.RUnlock() - - return map[string]interface{}{ - "running": running, - "data_dir": pm.dataDir, - "pending_count": pendingCount, - "current_file_size": currentFileSize, - "file_index": fileIndex, - "max_file_size": pm.maxFileSize, - "flush_interval": pm.flushInterval.String(), - "total_persisted": totalPersisted, - "total_loaded": totalLoaded, - "files_created": filesCreated, - } -} - -// createNewFile 创建新的持久化文件 -func (pm *PersistenceManager) createNewFile() error { - // 关闭当前文件 - if pm.currentFile != nil { - pm.currentFile.Close() - } - - // 生成新文件名 - filename := fmt.Sprintf("streamsql_overflow_%d_%d.log", - time.Now().Unix(), pm.fileIndex) - filepath := filepath.Join(pm.dataDir, filename) - - // 创建新文件 - file, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) - if err != nil { - return fmt.Errorf("failed to create file %s: %w", filepath, err) - } - - pm.currentFile = file - pm.currentSize = 0 - pm.fileIndex++ - pm.filesCreated++ - - // logger.Info("Created new persistence file: %s", filepath) - return nil -} - -// writeDataToFile 将数据写入文件 -// 注意:此方法应该在writeMutex锁保护下调用 -func (pm *PersistenceManager) writeDataToFile(data interface{}) error { - if pm.currentFile == nil { - return fmt.Errorf("no current file") - } - - // 序列化数据 - jsonData, err := json.Marshal(map[string]interface{}{ - "timestamp": time.Now().Unix(), - "data": data, - }) - if err != nil { - return fmt.Errorf("failed to marshal data: %w", err) - } - - // 添加换行符 - jsonData = append(jsonData, '\n') - - // 检查文件大小 - if pm.currentSize+int64(len(jsonData)) > pm.maxFileSize { - if err := pm.createNewFile(); err != nil { - return fmt.Errorf("failed to create new file: %w", err) - } - } - - // 写入数据 - n, err := pm.currentFile.Write(jsonData) - if err != nil { - return fmt.Errorf("failed to write data: %w", err) - } - - pm.currentSize += int64(n) - pm.totalPersisted++ - return nil -} - -// flushPendingData 刷新待写入数据 -func (pm *PersistenceManager) flushPendingData() { - pm.pendingMutex.Lock() - dataToWrite := make([]interface{}, len(pm.pendingData)) - copy(dataToWrite, pm.pendingData) - pm.pendingData = pm.pendingData[:0] // 清空切片 - pm.pendingMutex.Unlock() - - if len(dataToWrite) == 0 { - return - } - - pm.writeMutex.Lock() - defer pm.writeMutex.Unlock() - - // 批量写入数据 - for _, data := range dataToWrite { - if err := pm.writeDataToFile(data); err != nil { - logger.Error("Failed to write persistence data: %v", err) - } - } - - // 同步到磁盘 - if pm.currentFile != nil { - _ = pm.currentFile.Sync() - } - - // logger.Info("Flushed %d pending data records to disk", len(dataToWrite)) -} - -// startFlushTimer 启动刷新定时器 -func (pm *PersistenceManager) startFlushTimer() { - pm.writeMutex.Lock() - pm.flushTimer = time.AfterFunc(pm.flushInterval, func() { - // 安全地检查运行状态 - pm.runningMutex.RLock() - running := pm.isRunning - pm.runningMutex.RUnlock() - - if running { - pm.flushPendingData() - pm.startFlushTimer() // 重新启动定时器 - } - }) - pm.writeMutex.Unlock() -} - -// backgroundProcessor 后台处理协程 -func (pm *PersistenceManager) backgroundProcessor() { - ticker := time.NewTicker(1 * time.Second) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - // 定期检查并处理 - pm.pendingMutex.Lock() - pendingCount := len(pm.pendingData) - pm.pendingMutex.Unlock() - - // 如果有大量待写入数据,立即刷新 - if pendingCount > 100 { - pm.flushPendingData() - } - - case <-pm.stopChan: - return - } - } -} - -// loadDataFromFile 从文件加载数据 -func (pm *PersistenceManager) loadDataFromFile(filename string) ([]interface{}, error) { - file, err := os.Open(filename) - if err != nil { - return nil, fmt.Errorf("failed to open file %s: %w", filename, err) - } - defer file.Close() - - var data []interface{} - scanner := bufio.NewScanner(file) - - for scanner.Scan() { - line := scanner.Text() - var record map[string]interface{} - - if err := json.Unmarshal([]byte(line), &record); err != nil { - logger.Error("Failed to parse data line: %v", err) - continue - } - - // 提取实际数据 - if actualData, ok := record["data"]; ok { - data = append(data, actualData) - } - } - - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("failed to scan file: %w", err) - } - - return data, nil -} +package stream + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/rulego/streamsql/logger" +) + +// PersistenceManager 数据持久化管理器 +type PersistenceManager struct { + dataDir string // 持久化数据目录 + maxFileSize int64 // 单个文件最大大小(字节) + flushInterval time.Duration // 刷新间隔 + currentFile *os.File // 当前写入文件 + currentSize int64 // 当前文件大小 + fileIndex int // 文件索引 + writeMutex sync.Mutex // 写入互斥锁 + flushTimer *time.Timer // 刷新定时器 + pendingData []interface{} // 待写入数据 + pendingMutex sync.Mutex // 待写入数据互斥锁 + isRunning bool // 是否运行中 + runningMutex sync.RWMutex // 保护isRunning字段的读写锁 + stopChan chan struct{} // 停止通道 + + // 统计信息 (新增) + totalPersisted int64 + totalLoaded int64 + filesCreated int64 +} + +// NewPersistenceManager 创建默认配置的持久化管理器 +func NewPersistenceManager(dataDir string) *PersistenceManager { + pm := &PersistenceManager{ + dataDir: dataDir, + maxFileSize: 10 * 1024 * 1024, // 10MB per file + flushInterval: 5 * time.Second, // 5秒刷新一次 + fileIndex: 0, + pendingData: make([]interface{}, 0), + stopChan: make(chan struct{}), + } + + // 确保数据目录存在 + if err := os.MkdirAll(dataDir, 0755); err != nil { + logger.Error("Failed to create persistence directory: %v", err) + } + + return pm +} + +// NewPersistenceManagerWithConfig 创建自定义配置的持久化管理器 +func NewPersistenceManagerWithConfig(dataDir string, maxFileSize int64, flushInterval time.Duration) *PersistenceManager { + pm := &PersistenceManager{ + dataDir: dataDir, + maxFileSize: maxFileSize, + flushInterval: flushInterval, + fileIndex: 0, + pendingData: make([]interface{}, 0), + stopChan: make(chan struct{}), + } + + // 确保数据目录存在 + if err := os.MkdirAll(dataDir, 0755); err != nil { + logger.Error("Failed to create persistence directory: %v", err) + } + + return pm +} + +// Start 启动持久化管理器 +func (pm *PersistenceManager) Start() error { + // 检查是否已经在运行 + pm.runningMutex.RLock() + running := pm.isRunning + pm.runningMutex.RUnlock() + + if running { + return fmt.Errorf("persistence manager already running") + } + + // 创建初始文件 + pm.writeMutex.Lock() + if err := pm.createNewFile(); err != nil { + pm.writeMutex.Unlock() + return fmt.Errorf("failed to create initial file: %w", err) + } + pm.writeMutex.Unlock() + + // 设置运行状态 + pm.runningMutex.Lock() + pm.isRunning = true + pm.runningMutex.Unlock() + + // 启动定时刷新 + pm.startFlushTimer() + + // 启动后台处理协程 + go pm.backgroundProcessor() + + logger.Info("Persistence manager started successfully, data directory: %s", pm.dataDir) + return nil +} + +// Stop 停止持久化管理器 +func (pm *PersistenceManager) Stop() error { + // 检查是否正在运行 + pm.runningMutex.RLock() + running := pm.isRunning + pm.runningMutex.RUnlock() + + if !running { + return nil + } + + // 设置停止状态 + pm.runningMutex.Lock() + pm.isRunning = false + pm.runningMutex.Unlock() + + close(pm.stopChan) + + // 停止定时器 + pm.writeMutex.Lock() + if pm.flushTimer != nil { + pm.flushTimer.Stop() + } + pm.writeMutex.Unlock() + + // 刷新剩余数据 + pm.flushPendingData() + + // 关闭当前文件 + pm.writeMutex.Lock() + if pm.currentFile != nil { + pm.currentFile.Close() + pm.currentFile = nil + } + pm.writeMutex.Unlock() + + logger.Info("Persistence manager stopped") + return nil +} + +// PersistData 持久化单条数据 +func (pm *PersistenceManager) PersistData(data interface{}) error { + // 检查是否正在运行 + pm.runningMutex.RLock() + running := pm.isRunning + pm.runningMutex.RUnlock() + + if !running { + return fmt.Errorf("persistence manager not running") + } + + pm.pendingMutex.Lock() + pm.pendingData = append(pm.pendingData, data) + pm.pendingMutex.Unlock() + + return nil +} + +// LoadPersistedData 加载并删除持久化数据 +func (pm *PersistenceManager) LoadPersistedData() ([]interface{}, error) { + files, err := filepath.Glob(filepath.Join(pm.dataDir, "streamsql_overflow_*.log")) + if err != nil { + return nil, fmt.Errorf("failed to glob files: %w", err) + } + + var allData []interface{} + + for _, filename := range files { + data, err := pm.loadDataFromFile(filename) + if err != nil { + logger.Error("Failed to load file %s: %v", filename, err) + continue + } + allData = append(allData, data...) + + // 在锁保护下更新统计信息 + pm.writeMutex.Lock() + pm.totalLoaded += int64(len(data)) + pm.writeMutex.Unlock() + + // 加载后删除文件 + if err := os.Remove(filename); err != nil { + logger.Error("Failed to delete loaded file %s: %v", filename, err) + } + } + + logger.Info("Loaded %d data records from persistence files", len(allData)) + return allData, nil +} + +// GetStats 获取持久化统计信息 +func (pm *PersistenceManager) GetStats() map[string]interface{} { + pm.pendingMutex.Lock() + pendingCount := len(pm.pendingData) + pm.pendingMutex.Unlock() + + pm.writeMutex.Lock() + currentFileSize := pm.currentSize + fileIndex := pm.fileIndex + totalPersisted := pm.totalPersisted + totalLoaded := pm.totalLoaded + filesCreated := pm.filesCreated + pm.writeMutex.Unlock() + + // 安全地读取运行状态 + pm.runningMutex.RLock() + running := pm.isRunning + pm.runningMutex.RUnlock() + + return map[string]interface{}{ + "running": running, + "data_dir": pm.dataDir, + "pending_count": pendingCount, + "current_file_size": currentFileSize, + "file_index": fileIndex, + "max_file_size": pm.maxFileSize, + "flush_interval": pm.flushInterval.String(), + "total_persisted": totalPersisted, + "total_loaded": totalLoaded, + "files_created": filesCreated, + } +} + +// createNewFile 创建新的持久化文件 +func (pm *PersistenceManager) createNewFile() error { + // 关闭当前文件 + if pm.currentFile != nil { + pm.currentFile.Close() + } + + // 生成新文件名 + filename := fmt.Sprintf("streamsql_overflow_%d_%d.log", + time.Now().Unix(), pm.fileIndex) + filepath := filepath.Join(pm.dataDir, filename) + + // 创建新文件 + file, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return fmt.Errorf("failed to create file %s: %w", filepath, err) + } + + pm.currentFile = file + pm.currentSize = 0 + pm.fileIndex++ + pm.filesCreated++ + + // logger.Info("Created new persistence file: %s", filepath) + return nil +} + +// writeDataToFile 将数据写入文件 +// 注意:此方法应该在writeMutex锁保护下调用 +func (pm *PersistenceManager) writeDataToFile(data interface{}) error { + if pm.currentFile == nil { + return fmt.Errorf("no current file") + } + + // 序列化数据 + jsonData, err := json.Marshal(map[string]interface{}{ + "timestamp": time.Now().Unix(), + "data": data, + }) + if err != nil { + return fmt.Errorf("failed to marshal data: %w", err) + } + + // 添加换行符 + jsonData = append(jsonData, '\n') + + // 检查文件大小 + if pm.currentSize+int64(len(jsonData)) > pm.maxFileSize { + if err := pm.createNewFile(); err != nil { + return fmt.Errorf("failed to create new file: %w", err) + } + } + + // 写入数据 + n, err := pm.currentFile.Write(jsonData) + if err != nil { + return fmt.Errorf("failed to write data: %w", err) + } + + pm.currentSize += int64(n) + pm.totalPersisted++ + return nil +} + +// flushPendingData 刷新待写入数据 +func (pm *PersistenceManager) flushPendingData() { + pm.pendingMutex.Lock() + dataToWrite := make([]interface{}, len(pm.pendingData)) + copy(dataToWrite, pm.pendingData) + pm.pendingData = pm.pendingData[:0] // 清空切片 + pm.pendingMutex.Unlock() + + if len(dataToWrite) == 0 { + return + } + + pm.writeMutex.Lock() + defer pm.writeMutex.Unlock() + + // 批量写入数据 + for _, data := range dataToWrite { + if err := pm.writeDataToFile(data); err != nil { + logger.Error("Failed to write persistence data: %v", err) + } + } + + // 同步到磁盘 + if pm.currentFile != nil { + _ = pm.currentFile.Sync() + } + + // logger.Info("Flushed %d pending data records to disk", len(dataToWrite)) +} + +// startFlushTimer 启动刷新定时器 +func (pm *PersistenceManager) startFlushTimer() { + pm.writeMutex.Lock() + pm.flushTimer = time.AfterFunc(pm.flushInterval, func() { + // 安全地检查运行状态 + pm.runningMutex.RLock() + running := pm.isRunning + pm.runningMutex.RUnlock() + + if running { + pm.flushPendingData() + pm.startFlushTimer() // 重新启动定时器 + } + }) + pm.writeMutex.Unlock() +} + +// backgroundProcessor 后台处理协程 +func (pm *PersistenceManager) backgroundProcessor() { + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + // 定期检查并处理 + pm.pendingMutex.Lock() + pendingCount := len(pm.pendingData) + pm.pendingMutex.Unlock() + + // 如果有大量待写入数据,立即刷新 + if pendingCount > 100 { + pm.flushPendingData() + } + + case <-pm.stopChan: + return + } + } +} + +// loadDataFromFile 从文件加载数据 +func (pm *PersistenceManager) loadDataFromFile(filename string) ([]interface{}, error) { + file, err := os.Open(filename) + if err != nil { + return nil, fmt.Errorf("failed to open file %s: %w", filename, err) + } + defer file.Close() + + var data []interface{} + scanner := bufio.NewScanner(file) + + for scanner.Scan() { + line := scanner.Text() + var record map[string]interface{} + + if err := json.Unmarshal([]byte(line), &record); err != nil { + logger.Error("Failed to parse data line: %v", err) + continue + } + + // 提取实际数据 + if actualData, ok := record["data"]; ok { + data = append(data, actualData) + } + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failed to scan file: %w", err) + } + + return data, nil +} diff --git a/stream/processor_data.go b/stream/processor_data.go new file mode 100644 index 0000000..05367c3 --- /dev/null +++ b/stream/processor_data.go @@ -0,0 +1,484 @@ +package stream + +import ( + "encoding/json" + "fmt" + "reflect" + "strings" + "sync/atomic" + "time" + + "github.com/rulego/streamsql/aggregator" + "github.com/rulego/streamsql/condition" + "github.com/rulego/streamsql/expr" + "github.com/rulego/streamsql/functions" + "github.com/rulego/streamsql/logger" + "github.com/rulego/streamsql/types" +) + +// DataProcessor 数据处理器,负责处理数据流 +type DataProcessor struct { + stream *Stream +} + +// NewDataProcessor 创建数据处理器 +func NewDataProcessor(stream *Stream) *DataProcessor { + return &DataProcessor{stream: stream} +} + +// Process 主处理循环 +func (dp *DataProcessor) Process() { + // 初始化聚合器,用于窗口模式 + if dp.stream.config.NeedWindow { + dp.initializeAggregator() + dp.startWindowProcessing() + } + + // 创建一个定时器,避免创建多个临时定时器导致资源泄漏 + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() // 确保在函数退出时停止定时器 + + // 主处理循环 + for { + // 使用读锁安全访问dataChan + dp.stream.dataChanMux.RLock() + currentDataChan := dp.stream.dataChan + dp.stream.dataChanMux.RUnlock() + + select { + case data, ok := <-currentDataChan: + if !ok { + // 通道已关闭 + return + } + // 应用过滤条件 + if dp.stream.filter == nil || dp.stream.filter.Evaluate(data) { + if dp.stream.config.NeedWindow { + // 窗口模式,添加数据到窗口 + dp.stream.Window.Add(data) + } else { + // 非窗口模式,直接处理数据并输出 + dp.processDirectData(data) + } + } + case <-dp.stream.done: + // 收到关闭信号 + return + case <-ticker.C: + // 定时器触发,什么都不做,只是防止 CPU 空转 + } + } +} + +// initializeAggregator 初始化聚合器 +func (dp *DataProcessor) initializeAggregator() { + // 转换为新的AggregationField格式 + aggregationFields := convertToAggregationFields(dp.stream.config.SelectFields, dp.stream.config.FieldAlias) + dp.stream.aggregator = aggregator.NewGroupAggregator(dp.stream.config.GroupFields, aggregationFields) + + // 注册表达式计算器 + for field, fieldExpr := range dp.stream.config.FieldExpressions { + dp.registerExpressionCalculator(field, fieldExpr) + } +} + +// registerExpressionCalculator 注册表达式计算器 +func (dp *DataProcessor) registerExpressionCalculator(field string, fieldExpr types.FieldExpression) { + // 创建局部变量避免闭包问题 + currentField := field + currentFieldExpr := fieldExpr + + // 注册表达式计算器 + dp.stream.aggregator.RegisterExpression( + currentField, + currentFieldExpr.Expression, + currentFieldExpr.Fields, + func(data interface{}) (interface{}, error) { + return dp.evaluateExpressionForAggregation(currentFieldExpr, data) + }, + ) +} + +// evaluateExpressionForAggregation 为聚合计算表达式 +func (dp *DataProcessor) evaluateExpressionForAggregation(fieldExpr types.FieldExpression, data interface{}) (interface{}, error) { + // 将数据转换为 map[string]interface{} 以便计算 + dataMap, err := dp.convertToDataMap(data) + if err != nil { + return nil, err + } + + // 检查表达式是否包含嵌套字段,如果有则直接使用自定义表达式引擎 + hasNestedFields := strings.Contains(fieldExpr.Expression, ".") + + if hasNestedFields { + return dp.evaluateNestedFieldExpression(fieldExpr.Expression, dataMap) + } + + // 检查是否为CASE表达式 + trimmedExpr := strings.TrimSpace(fieldExpr.Expression) + upperExpr := strings.ToUpper(trimmedExpr) + if strings.HasPrefix(upperExpr, SQLKeywordCase) { + return dp.evaluateCaseExpression(fieldExpr.Expression, dataMap) + } + + // 使用桥接器计算表达式,支持字符串拼接和IS NULL等语法 + bridge := functions.GetExprBridge() + + // 预处理表达式中的IS NULL和LIKE语法 + processedExpr := fieldExpr.Expression + if bridge.ContainsIsNullOperator(processedExpr) { + if processed, err := bridge.PreprocessIsNullExpression(processedExpr); err == nil { + processedExpr = processed + } + } + if bridge.ContainsLikeOperator(processedExpr) { + if processed, err := bridge.PreprocessLikeExpression(processedExpr); err == nil { + processedExpr = processed + } + } + + result, err := bridge.EvaluateExpression(processedExpr, dataMap) + if err != nil { + // 如果桥接器失败,回退到原来的表达式引擎 + return dp.fallbackExpressionEvaluation(fieldExpr.Expression, dataMap) + } + + return result, nil +} + +// convertToDataMap 将数据转换为map格式 +func (dp *DataProcessor) convertToDataMap(data interface{}) (map[string]interface{}, error) { + switch d := data.(type) { + case map[string]interface{}: + return d, nil + default: + // 如果不是 map,尝试转换 + v := reflect.ValueOf(data) + if v.Kind() == reflect.Ptr { + v = v.Elem() + } + + if v.Kind() == reflect.Struct { + // 将结构体转换为 map + dataMap := make(map[string]interface{}) + t := v.Type() + for i := 0; i < t.NumField(); i++ { + field := t.Field(i) + dataMap[field.Name] = v.Field(i).Interface() + } + return dataMap, nil + } else { + return nil, fmt.Errorf("unsupported data type for expression: %T", data) + } + } +} + +// evaluateNestedFieldExpression 计算嵌套字段表达式 +func (dp *DataProcessor) evaluateNestedFieldExpression(expression string, dataMap map[string]interface{}) (interface{}, error) { + // 直接使用自定义表达式引擎处理嵌套字段,支持NULL值 + // 预处理反引号标识符 + exprToUse := expression + bridge := functions.GetExprBridge() + if bridge.ContainsBacktickIdentifiers(exprToUse) { + if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { + exprToUse = processed + } + } + expr, parseErr := expr.NewExpression(exprToUse) + if parseErr != nil { + return nil, fmt.Errorf("expression parse failed: %w", parseErr) + } + + // 使用支持NULL的计算方法 + numResult, isNull, err := expr.EvaluateWithNull(dataMap) + if err != nil { + return nil, fmt.Errorf("expression evaluation failed: %w", err) + } + if isNull { + return nil, nil // 返回nil表示NULL值 + } + return numResult, nil +} + +// evaluateCaseExpression 计算CASE表达式 +func (dp *DataProcessor) evaluateCaseExpression(expression string, dataMap map[string]interface{}) (interface{}, error) { + // CASE表达式使用支持NULL的计算方法 + // 预处理反引号标识符 + exprToUse := expression + bridge := functions.GetExprBridge() + if bridge.ContainsBacktickIdentifiers(exprToUse) { + if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { + exprToUse = processed + } + } + expr, parseErr := expr.NewExpression(exprToUse) + if parseErr != nil { + return nil, fmt.Errorf("CASE expression parse failed: %w", parseErr) + } + + numResult, isNull, err := expr.EvaluateWithNull(dataMap) + if err != nil { + return nil, fmt.Errorf("CASE expression evaluation failed: %w", err) + } + if isNull { + return nil, nil // 返回nil表示NULL值 + } + return numResult, nil +} + +// fallbackExpressionEvaluation 回退表达式计算 +func (dp *DataProcessor) fallbackExpressionEvaluation(expression string, dataMap map[string]interface{}) (interface{}, error) { + // 预处理反引号标识符 + exprToUse := expression + bridge := functions.GetExprBridge() + if bridge.ContainsBacktickIdentifiers(exprToUse) { + if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { + exprToUse = processed + } + } + expr, parseErr := expr.NewExpression(exprToUse) + if parseErr != nil { + return nil, fmt.Errorf("expression parse failed: %w", parseErr) + } + + // 计算表达式,支持NULL值 + numResult, isNull, err := expr.EvaluateWithNull(dataMap) + if err != nil { + return nil, fmt.Errorf("expression evaluation failed: %w", err) + } + if isNull { + return nil, nil // 返回nil表示NULL值 + } + return numResult, nil +} + +// startWindowProcessing 启动窗口处理 +func (dp *DataProcessor) startWindowProcessing() { + // 启动窗口处理协程 + dp.stream.Window.Start() + + // 处理窗口模式 + go func() { + defer func() { + if r := recover(); r != nil { + logger.Error("Window processing goroutine panic recovered: %v", r) + } + }() + + for batch := range dp.stream.Window.OutputChan() { + dp.processWindowBatch(batch) + } + }() +} + +// processWindowBatch 处理窗口批数据 +func (dp *DataProcessor) processWindowBatch(batch []types.Row) { + // 处理窗口批数据 + for _, item := range batch { + if err := dp.stream.aggregator.Put(WindowStartField, item.Slot.WindowStart()); err != nil { + logger.Error("failed to put window start: %v", err) + } + if err := dp.stream.aggregator.Put(WindowEndField, item.Slot.WindowEnd()); err != nil { + logger.Error("failed to put window end: %v", err) + } + if err := dp.stream.aggregator.Add(item.Data); err != nil { + logger.Error("aggregate error: %v", err) + } + } + + // 获取并发送聚合结果 + if results, err := dp.stream.aggregator.GetResults(); err == nil { + dp.processAggregationResults(results) + dp.stream.aggregator.Reset() + } +} + +// processAggregationResults 处理聚合结果 +func (dp *DataProcessor) processAggregationResults(results []map[string]interface{}) { + var finalResults []map[string]interface{} + + // 处理DISTINCT + if dp.stream.config.Distinct { + finalResults = dp.applyDistinct(results) + } else { + finalResults = results + } + + // 应用 HAVING 过滤条件 + if dp.stream.config.Having != "" { + finalResults = dp.applyHavingFilter(finalResults) + } + + // 应用 LIMIT 限制 + if dp.stream.config.Limit > 0 && len(finalResults) > dp.stream.config.Limit { + finalResults = finalResults[:dp.stream.config.Limit] + } + + // 发送结果到结果通道和 Sink 函数 + if len(finalResults) > 0 { + // 非阻塞发送到结果通道 + dp.stream.sendResultNonBlocking(finalResults) + + // 异步调用所有sinks + dp.stream.callSinksAsync(finalResults) + } +} + +// applyDistinct 应用DISTINCT去重 +func (dp *DataProcessor) applyDistinct(results []map[string]interface{}) []map[string]interface{} { + seenResults := make(map[string]bool) + var finalResults []map[string]interface{} + + for _, result := range results { + serializedResult, jsonErr := json.Marshal(result) + if jsonErr != nil { + logger.Error("Error serializing result for distinct check: %v", jsonErr) + finalResults = append(finalResults, result) + continue + } + if !seenResults[string(serializedResult)] { + finalResults = append(finalResults, result) + seenResults[string(serializedResult)] = true + } + } + + return finalResults +} + +// applyHavingFilter 应用HAVING过滤 +func (dp *DataProcessor) applyHavingFilter(results []map[string]interface{}) []map[string]interface{} { + // 检查HAVING条件是否包含CASE表达式 + hasCaseExpression := strings.Contains(strings.ToUpper(dp.stream.config.Having), SQLKeywordCase) + + var filteredResults []map[string]interface{} + + if hasCaseExpression { + filteredResults = dp.applyHavingWithCaseExpression(results) + } else { + filteredResults = dp.applyHavingWithCondition(results) + } + + return filteredResults +} + +// applyHavingWithCaseExpression 使用CASE表达式应用HAVING过滤 +func (dp *DataProcessor) applyHavingWithCaseExpression(results []map[string]interface{}) []map[string]interface{} { + // HAVING条件包含CASE表达式,使用我们的表达式解析器 + // 预处理反引号标识符 + exprToUse := dp.stream.config.Having + bridge := functions.GetExprBridge() + if bridge.ContainsBacktickIdentifiers(exprToUse) { + if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { + exprToUse = processed + } + } + expression, err := expr.NewExpression(exprToUse) + if err != nil { + logger.Error("having filter error (CASE expression): %v", err) + return results + } + + var filteredResults []map[string]interface{} + // 应用 HAVING 过滤,使用CASE表达式计算器 + for _, result := range results { + // 使用EvaluateWithNull方法以支持NULL值处理 + havingResult, isNull, err := expression.EvaluateWithNull(result) + if err != nil { + logger.Error("having filter evaluation error: %v", err) + continue + } + + // 如果结果是NULL,则不满足条件(SQL标准行为) + if isNull { + continue + } + + // 对于数值结果,大于0视为true(满足HAVING条件) + if havingResult > 0 { + filteredResults = append(filteredResults, result) + } + } + + return filteredResults +} + +// applyHavingWithCondition 使用条件表达式应用HAVING过滤 +func (dp *DataProcessor) applyHavingWithCondition(results []map[string]interface{}) []map[string]interface{} { + // HAVING条件不包含CASE表达式,使用原有的expr-lang处理 + // 预处理HAVING条件中的LIKE语法,转换为expr-lang可理解的形式 + processedHaving := dp.stream.config.Having + bridge := functions.GetExprBridge() + if bridge.ContainsLikeOperator(dp.stream.config.Having) { + if processed, err := bridge.PreprocessLikeExpression(dp.stream.config.Having); err == nil { + processedHaving = processed + } + } + + // 预处理HAVING条件中的IS NULL语法 + if bridge.ContainsIsNullOperator(processedHaving) { + if processed, err := bridge.PreprocessIsNullExpression(processedHaving); err == nil { + processedHaving = processed + } + } + + // 创建 HAVING 条件 + havingFilter, err := condition.NewExprCondition(processedHaving) + if err != nil { + logger.Error("having filter error: %v", err) + return results + } + + var filteredResults []map[string]interface{} + // 应用 HAVING 过滤 + for _, result := range results { + if havingFilter.Evaluate(result) { + filteredResults = append(filteredResults, result) + } + } + + return filteredResults +} + +// processDirectData 直接处理非窗口数据 +func (dp *DataProcessor) processDirectData(data interface{}) { + // 直接将数据作为map处理 + dataMap, ok := data.(map[string]interface{}) + if !ok { + logger.Error("Unsupported data type: %T", data) + atomic.AddInt64(&dp.stream.droppedCount, 1) + return + } + + // 创建结果map,预分配合适容量 + estimatedSize := len(dp.stream.config.FieldExpressions) + len(dp.stream.config.SimpleFields) + if estimatedSize < 8 { + estimatedSize = 8 // 最小容量 + } + result := make(map[string]interface{}, estimatedSize) + + // 处理表达式字段(使用预编译信息) + for fieldName := range dp.stream.config.FieldExpressions { + dp.stream.processExpressionField(fieldName, dataMap, result) + } + + // 使用预编译的字段信息处理SimpleFields + if len(dp.stream.config.SimpleFields) > 0 { + for _, fieldSpec := range dp.stream.config.SimpleFields { + dp.stream.processSimpleField(fieldSpec, dataMap, data, result) + } + } else if len(dp.stream.config.FieldExpressions) == 0 { + // 如果没有指定字段且没有表达式字段,保留所有字段 + for k, v := range dataMap { + result[k] = v + } + } + + // 将结果包装为数组 + results := []map[string]interface{}{result} + + // 非阻塞发送结果到resultChan + dp.stream.sendResultNonBlocking(results) + + // 异步调用所有sinks,避免阻塞 + dp.stream.callSinksAsync(results) +} diff --git a/stream/processor_field.go b/stream/processor_field.go new file mode 100644 index 0000000..9786e18 --- /dev/null +++ b/stream/processor_field.go @@ -0,0 +1,571 @@ +package stream + +import ( + "fmt" + "strconv" + "strings" + + "github.com/rulego/streamsql/expr" + "github.com/rulego/streamsql/functions" + "github.com/rulego/streamsql/logger" + "github.com/rulego/streamsql/utils/fieldpath" +) + +// fieldProcessInfo 字段处理信息,用于缓存预编译的字段处理逻辑 +type fieldProcessInfo struct { + fieldName string // 原始字段名 + outputName string // 输出字段名 + isFunctionCall bool // 是否为函数调用 + hasNestedField bool // 是否包含嵌套字段 + isSelectAll bool // 是否为SELECT * + isStringLiteral bool // 是否为字符串字面量 + stringValue string // 预处理的字符串字面量值(去除引号) + alias string // 字段别名,用于快速访问 +} + +// expressionProcessInfo 表达式处理信息,用于缓存预编译的表达式处理逻辑 +type expressionProcessInfo struct { + originalExpr string // 原始表达式 + processedExpr string // 预处理后的表达式 + isFunctionCall bool // 是否为函数调用 + hasNestedFields bool // 是否包含嵌套字段 + compiledExpr *expr.Expression // 预编译的表达式对象 + needsBacktickPreprocess bool // 是否需要反引号预处理 +} + +// compileFieldProcessInfo 预编译字段处理信息,避免运行时重复解析 +func (s *Stream) compileFieldProcessInfo() { + s.compiledFieldInfo = make(map[string]*fieldProcessInfo) + s.compiledExprInfo = make(map[string]*expressionProcessInfo) + + // 编译SimpleFields信息 + for _, fieldSpec := range s.config.SimpleFields { + info := s.compileSimpleFieldInfo(fieldSpec) + s.compiledFieldInfo[fieldSpec] = info + } + + // 预编译表达式字段信息 + s.compileExpressionInfo() +} + +// compileSimpleFieldInfo 编译简单字段信息 +func (s *Stream) compileSimpleFieldInfo(fieldSpec string) *fieldProcessInfo { + info := &fieldProcessInfo{} + + if fieldSpec == "*" { + info.isSelectAll = true + info.fieldName = "*" + info.outputName = "*" + return info + } + + // 解析别名 + parts := strings.Split(fieldSpec, ":") + info.fieldName = parts[0] + // 去除字段名中的反引号 + if len(info.fieldName) >= 2 && info.fieldName[0] == '`' && info.fieldName[len(info.fieldName)-1] == '`' { + info.fieldName = info.fieldName[1 : len(info.fieldName)-1] + } + info.outputName = info.fieldName + if len(parts) > 1 { + info.outputName = parts[1] + // 去除输出名中的反引号 + if len(info.outputName) >= 2 && info.outputName[0] == '`' && info.outputName[len(info.outputName)-1] == '`' { + info.outputName = info.outputName[1 : len(info.outputName)-1] + } + } + + // 预判断字段特征 + info.isFunctionCall = strings.Contains(info.fieldName, "(") && strings.Contains(info.fieldName, ")") + info.hasNestedField = !info.isFunctionCall && fieldpath.IsNestedField(info.fieldName) + + // 检查是否为字符串字面量并预处理值 + info.isStringLiteral = (len(info.fieldName) >= 2 && + ((info.fieldName[0] == '\'' && info.fieldName[len(info.fieldName)-1] == '\'') || + (info.fieldName[0] == '"' && info.fieldName[len(info.fieldName)-1] == '"'))) + + // 预处理字符串字面量值,去除引号 + if info.isStringLiteral && len(info.fieldName) >= 2 { + info.stringValue = info.fieldName[1 : len(info.fieldName)-1] + } + + // 设置别名用于快速访问 + info.alias = info.outputName + + return info +} + +// compileExpressionInfo 预编译表达式处理信息 +func (s *Stream) compileExpressionInfo() { + bridge := functions.GetExprBridge() + + for fieldName, fieldExpr := range s.config.FieldExpressions { + exprInfo := &expressionProcessInfo{ + originalExpr: fieldExpr.Expression, + } + + // 预处理表达式 + processedExpr := fieldExpr.Expression + if bridge.ContainsIsNullOperator(processedExpr) { + if processed, err := bridge.PreprocessIsNullExpression(processedExpr); err == nil { + processedExpr = processed + } + } + if bridge.ContainsLikeOperator(processedExpr) { + if processed, err := bridge.PreprocessLikeExpression(processedExpr); err == nil { + processedExpr = processed + } + } + exprInfo.processedExpr = processedExpr + + // 预判断表达式特征 + exprInfo.isFunctionCall = strings.Contains(fieldExpr.Expression, "(") && strings.Contains(fieldExpr.Expression, ")") + exprInfo.hasNestedFields = !exprInfo.isFunctionCall && strings.Contains(fieldExpr.Expression, ".") + exprInfo.needsBacktickPreprocess = bridge.ContainsBacktickIdentifiers(fieldExpr.Expression) + + // 预编译表达式对象(仅对非函数调用的表达式) + if !exprInfo.isFunctionCall { + exprToCompile := fieldExpr.Expression + if exprInfo.needsBacktickPreprocess { + if processed, err := bridge.PreprocessBacktickIdentifiers(exprToCompile); err == nil { + exprToCompile = processed + } + } + if compiledExpr, err := expr.NewExpression(exprToCompile); err == nil { + exprInfo.compiledExpr = compiledExpr + } + } + + s.compiledExprInfo[fieldName] = exprInfo + } +} + +// processExpressionField 处理表达式字段 +func (s *Stream) processExpressionField(fieldName string, dataMap map[string]interface{}, result map[string]interface{}) { + exprInfo := s.compiledExprInfo[fieldName] + if exprInfo == nil { + // 回退到原逻辑(安全性保证) + s.processExpressionFieldFallback(fieldName, dataMap, result) + return + } + + var evalResult interface{} + bridge := functions.GetExprBridge() + + if exprInfo.isFunctionCall { + // 对于函数调用,使用桥接器处理 + exprResult, err := bridge.EvaluateExpression(exprInfo.processedExpr, dataMap) + if err != nil { + logger.Error("Function call evaluation failed for field %s: %v", fieldName, err) + result[fieldName] = nil + return + } + evalResult = exprResult + } else if exprInfo.hasNestedFields { + // 使用预编译的表达式对象 + if exprInfo.compiledExpr != nil { + numResult, err := exprInfo.compiledExpr.Evaluate(dataMap) + if err != nil { + logger.Error("Expression evaluation failed for field %s: %v", fieldName, err) + result[fieldName] = nil + return + } + evalResult = numResult + } else { + // 回退到动态编译 + s.processExpressionFieldFallback(fieldName, dataMap, result) + return + } + } else { + // 尝试使用桥接器处理其他表达式 + exprResult, err := bridge.EvaluateExpression(exprInfo.processedExpr, dataMap) + if err != nil { + // 如果桥接器失败,使用预编译的表达式对象 + if exprInfo.compiledExpr != nil { + numResult, evalErr := exprInfo.compiledExpr.Evaluate(dataMap) + if evalErr != nil { + logger.Error("Expression evaluation failed for field %s: %v", fieldName, evalErr) + result[fieldName] = nil + return + } + evalResult = numResult + } else { + // 回退到动态编译 + s.processExpressionFieldFallback(fieldName, dataMap, result) + return + } + } else { + evalResult = exprResult + } + } + + result[fieldName] = evalResult +} + +// processExpressionFieldFallback 表达式字段处理的回退逻辑 +func (s *Stream) processExpressionFieldFallback(fieldName string, dataMap map[string]interface{}, result map[string]interface{}) { + fieldExpr, exists := s.config.FieldExpressions[fieldName] + if !exists { + result[fieldName] = nil + return + } + + // 使用桥接器计算表达式,支持IS NULL等语法 + bridge := functions.GetExprBridge() + + // 预处理表达式中的IS NULL和LIKE语法 + processedExpr := fieldExpr.Expression + if bridge.ContainsIsNullOperator(processedExpr) { + if processed, err := bridge.PreprocessIsNullExpression(processedExpr); err == nil { + processedExpr = processed + } + } + if bridge.ContainsLikeOperator(processedExpr) { + if processed, err := bridge.PreprocessLikeExpression(processedExpr); err == nil { + processedExpr = processed + } + } + + // 检查表达式是否是函数调用(包含括号) + isFunctionCall := strings.Contains(fieldExpr.Expression, "(") && strings.Contains(fieldExpr.Expression, ")") + + // 检查表达式是否包含嵌套字段(但排除函数调用中的点号) + hasNestedFields := false + if !isFunctionCall && strings.Contains(fieldExpr.Expression, ".") { + hasNestedFields = true + } + + var evalResult interface{} + + if isFunctionCall { + // 对于函数调用,优先使用桥接器处理 + exprResult, err := bridge.EvaluateExpression(processedExpr, dataMap) + if err != nil { + logger.Error("Function call evaluation failed for field %s: %v", fieldName, err) + result[fieldName] = nil + return + } + evalResult = exprResult + } else if hasNestedFields { + // 检测到嵌套字段(非函数调用),使用自定义表达式引擎 + exprToUse := fieldExpr.Expression + if bridge.ContainsBacktickIdentifiers(exprToUse) { + if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { + exprToUse = processed + } + } + expression, parseErr := expr.NewExpression(exprToUse) + if parseErr != nil { + logger.Error("Expression parse failed for field %s: %v", fieldName, parseErr) + result[fieldName] = nil + return + } + + numResult, err := expression.Evaluate(dataMap) + if err != nil { + logger.Error("Expression evaluation failed for field %s: %v", fieldName, err) + result[fieldName] = nil + return + } + evalResult = numResult + } else { + // 尝试使用桥接器处理其他表达式 + exprResult, err := bridge.EvaluateExpression(processedExpr, dataMap) + if err != nil { + // 如果桥接器失败,回退到原来的表达式引擎 + exprToUse := fieldExpr.Expression + if bridge.ContainsBacktickIdentifiers(exprToUse) { + if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { + exprToUse = processed + } + } + expression, parseErr := expr.NewExpression(exprToUse) + if parseErr != nil { + logger.Error("Expression parse failed for field %s: %v", fieldName, parseErr) + result[fieldName] = nil + return + } + + numResult, evalErr := expression.Evaluate(dataMap) + if evalErr != nil { + logger.Error("Expression evaluation failed for field %s: %v", fieldName, evalErr) + result[fieldName] = nil + return + } + evalResult = numResult + } else { + evalResult = exprResult + } + } + + result[fieldName] = evalResult +} + +// processSimpleField 处理简单字段 +func (s *Stream) processSimpleField(fieldSpec string, dataMap map[string]interface{}, data interface{}, result map[string]interface{}) { + info := s.compiledFieldInfo[fieldSpec] + if info == nil { + // 如果没有预编译信息,回退到原逻辑(安全性保证) + s.processSingleFieldFallback(fieldSpec, dataMap, data, result) + return + } + + if info.isSelectAll { + // SELECT *:批量复制所有字段,跳过表达式字段 + for k, v := range dataMap { + if _, isExpression := s.config.FieldExpressions[k]; !isExpression { + result[k] = v + } + } + return + } + + // 跳过已经通过表达式字段处理的字段 + if _, isExpression := s.config.FieldExpressions[info.outputName]; isExpression { + return + } + + if info.isStringLiteral { + // 字符串字面量处理:使用预编译的字符串值 + result[info.alias] = info.stringValue + } else if info.isFunctionCall { + // 执行函数调用 + if funcResult, err := s.executeFunction(info.fieldName, dataMap); err == nil { + result[info.outputName] = funcResult + } else { + logger.Error("Function execution error %s: %v", info.fieldName, err) + result[info.outputName] = nil + } + } else { + // 普通字段处理 + var value interface{} + var exists bool + + if info.hasNestedField { + value, exists = fieldpath.GetNestedField(data, info.fieldName) + } else { + value, exists = dataMap[info.fieldName] + } + + if exists { + result[info.outputName] = value + } else { + result[info.outputName] = nil + } + } +} + +// processSingleFieldFallback 回退处理单个字段(当预编译信息缺失时) +func (s *Stream) processSingleFieldFallback(fieldSpec string, dataMap map[string]interface{}, data interface{}, result map[string]interface{}) { + // 处理SELECT *的特殊情况 + if fieldSpec == "*" { + // SELECT *:返回所有字段,但跳过已经通过表达式字段处理的字段 + for k, v := range dataMap { + // 如果该字段已经通过表达式字段处理,则跳过,保持表达式计算结果 + if _, isExpression := s.config.FieldExpressions[k]; !isExpression { + result[k] = v + } + } + return + } + + // 处理别名 + parts := strings.Split(fieldSpec, ":") + fieldName := parts[0] + outputName := fieldName + if len(parts) > 1 { + outputName = parts[1] + } + + // 跳过已经通过表达式字段处理的字段 + if _, isExpression := s.config.FieldExpressions[outputName]; isExpression { + return + } + + // 检查是否是函数调用 + if strings.Contains(fieldName, "(") && strings.Contains(fieldName, ")") { + // 执行函数调用 + if funcResult, err := s.executeFunction(fieldName, dataMap); err == nil { + result[outputName] = funcResult + } else { + logger.Error("Function execution error %s: %v", fieldName, err) + result[outputName] = nil + } + } else { + // 普通字段 - 支持嵌套字段 + var value interface{} + var exists bool + + if fieldpath.IsNestedField(fieldName) { + value, exists = fieldpath.GetNestedField(data, fieldName) + } else { + value, exists = dataMap[fieldName] + } + + if exists { + result[outputName] = value + } else { + result[outputName] = nil + } + } +} + +// executeFunction 执行函数调用 +func (s *Stream) executeFunction(funcExpr string, data map[string]interface{}) (interface{}, error) { + // 检查是否是自定义函数 + funcName := extractFunctionName(funcExpr) + if funcName != "" { + // 直接使用函数系统 + fn, exists := functions.Get(funcName) + if exists { + // 解析参数 + args, err := s.parseFunctionArgs(funcExpr, data) + if err != nil { + return nil, err + } + + // 创建函数上下文 + ctx := &functions.FunctionContext{Data: data} + + // 执行函数 + return fn.Execute(ctx, args) + } + } + + // 对于复杂的嵌套函数调用,直接使用ExprBridge + // 这样可以避免Expression.Evaluate的float64类型限制 + bridge := functions.GetExprBridge() + result, err := bridge.EvaluateExpression(funcExpr, data) + if err != nil { + return nil, fmt.Errorf("evaluate function expression failed: %w", err) + } + + return result, nil +} + +// extractFunctionName 从表达式中提取函数名 +func extractFunctionName(expr string) string { + parenIndex := strings.Index(expr, "(") + if parenIndex == -1 { + return "" + } + funcName := strings.TrimSpace(expr[:parenIndex]) + if strings.ContainsAny(funcName, " +-*/=<>!&|") { + return "" + } + return funcName +} + +// parseFunctionArgs 解析函数参数,支持嵌套函数调用 +func (s *Stream) parseFunctionArgs(funcExpr string, data map[string]interface{}) ([]interface{}, error) { + // 提取括号内的参数 + start := strings.Index(funcExpr, "(") + end := strings.LastIndex(funcExpr, ")") + if start == -1 || end == -1 || end <= start { + return nil, fmt.Errorf("invalid function expression: %s", funcExpr) + } + + argsStr := strings.TrimSpace(funcExpr[start+1 : end]) + if argsStr == "" { + return []interface{}{}, nil + } + + // 智能分割参数,处理嵌套函数和引号 + argParts, err := s.smartSplitArgs(argsStr) + if err != nil { + return nil, err + } + + args := make([]interface{}, len(argParts)) + + for i, arg := range argParts { + arg = strings.TrimSpace(arg) + + // 如果参数是字符串常量(用引号包围) + if strings.HasPrefix(arg, "'") && strings.HasSuffix(arg, "'") { + args[i] = strings.Trim(arg, "'") + } else if strings.HasPrefix(arg, "\"") && strings.HasSuffix(arg, "\"") { + args[i] = strings.Trim(arg, "\"") + } else if strings.Contains(arg, "(") { + // 如果参数包含函数调用,递归执行 + result, err := s.executeFunction(arg, data) + if err != nil { + return nil, fmt.Errorf("failed to execute nested function '%s': %v", arg, err) + } + args[i] = result + } else if value, exists := data[arg]; exists { + // 如果是数据字段 + args[i] = value + } else { + // 尝试解析为数字 + if val, err := strconv.ParseFloat(arg, 64); err == nil { + args[i] = val + } else { + args[i] = arg + } + } + } + + return args, nil +} + +// smartSplitArgs 智能分割参数,考虑括号嵌套和引号 +func (s *Stream) smartSplitArgs(argsStr string) ([]string, error) { + var args []string + var current strings.Builder + parenDepth := 0 + inQuotes := false + quoteChar := byte(0) + + for i := 0; i < len(argsStr); i++ { + ch := argsStr[i] + + switch ch { + case '\'': + if !inQuotes { + inQuotes = true + quoteChar = ch + } else if quoteChar == ch { + inQuotes = false + quoteChar = 0 + } + current.WriteByte(ch) + case '"': + if !inQuotes { + inQuotes = true + quoteChar = ch + } else if quoteChar == ch { + inQuotes = false + quoteChar = 0 + } + current.WriteByte(ch) + case '(': + if !inQuotes { + parenDepth++ + } + current.WriteByte(ch) + case ')': + if !inQuotes { + parenDepth-- + } + current.WriteByte(ch) + case ',': + if !inQuotes && parenDepth == 0 { + // 找到参数分隔符 + args = append(args, strings.TrimSpace(current.String())) + current.Reset() + } else { + current.WriteByte(ch) + } + default: + current.WriteByte(ch) + } + } + + // 添加最后一个参数 + if current.Len() > 0 { + args = append(args, strings.TrimSpace(current.String())) + } + + return args, nil +} \ No newline at end of file diff --git a/stream/stream.go b/stream/stream.go index 192fa85..1830bc9 100644 --- a/stream/stream.go +++ b/stream/stream.go @@ -1,20 +1,14 @@ package stream import ( - "encoding/json" "fmt" - "reflect" - "strconv" "strings" "sync" "sync/atomic" "time" - "github.com/rulego/streamsql/condition" - "github.com/rulego/streamsql/utils/fieldpath" - "github.com/rulego/streamsql/aggregator" - "github.com/rulego/streamsql/expr" + "github.com/rulego/streamsql/condition" "github.com/rulego/streamsql/functions" "github.com/rulego/streamsql/logger" "github.com/rulego/streamsql/types" @@ -83,28 +77,6 @@ const ( SQLKeywordCase = "CASE" ) -// fieldProcessInfo 字段处理信息,用于缓存预编译的字段处理逻辑 -type fieldProcessInfo struct { - fieldName string // 原始字段名 - outputName string // 输出字段名 - isFunctionCall bool // 是否为函数调用 - hasNestedField bool // 是否包含嵌套字段 - isSelectAll bool // 是否为SELECT * - isStringLiteral bool // 是否为字符串字面量 - stringValue string // 预处理的字符串字面量值(去除引号) - alias string // 字段别名,用于快速访问 -} - -// expressionProcessInfo 表达式处理信息,用于缓存预编译的表达式处理逻辑 -type expressionProcessInfo struct { - originalExpr string // 原始表达式 - processedExpr string // 预处理后的表达式 - isFunctionCall bool // 是否为函数调用 - hasNestedFields bool // 是否包含嵌套字段 - compiledExpr *expr.Expression // 预编译的表达式对象 - needsBacktickPreprocess bool // 是否需要反引号预处理 -} - type Stream struct { dataChan chan interface{} filter condition.Condition @@ -148,151 +120,32 @@ type Stream struct { // NewStream 使用统一配置创建Stream func NewStream(config types.Config) (*Stream, error) { - // 如果没有指定性能配置,使用默认配置 - if (config.PerformanceConfig == types.PerformanceConfig{}) { - config.PerformanceConfig = types.DefaultPerformanceConfig() - } - - return newStreamWithUnifiedConfig(config) + factory := NewStreamFactory() + return factory.CreateStream(config) } // NewStreamWithHighPerformance 创建高性能Stream func NewStreamWithHighPerformance(config types.Config) (*Stream, error) { - config.PerformanceConfig = types.HighPerformanceConfig() - return newStreamWithUnifiedConfig(config) + factory := NewStreamFactory() + return factory.CreateHighPerformanceStream(config) } // NewStreamWithLowLatency 创建低延迟Stream func NewStreamWithLowLatency(config types.Config) (*Stream, error) { - config.PerformanceConfig = types.LowLatencyConfig() - return newStreamWithUnifiedConfig(config) + factory := NewStreamFactory() + return factory.CreateLowLatencyStream(config) } // NewStreamWithZeroDataLoss 创建零数据丢失Stream func NewStreamWithZeroDataLoss(config types.Config) (*Stream, error) { - config.PerformanceConfig = types.ZeroDataLossConfig() - return newStreamWithUnifiedConfig(config) + factory := NewStreamFactory() + return factory.CreateZeroDataLossStream(config) } // NewStreamWithCustomPerformance 创建自定义性能配置的Stream func NewStreamWithCustomPerformance(config types.Config, perfConfig types.PerformanceConfig) (*Stream, error) { - config.PerformanceConfig = perfConfig - return newStreamWithUnifiedConfig(config) -} - -// newStreamWithUnifiedConfig 使用统一配置创建Stream的内部实现 -func newStreamWithUnifiedConfig(config types.Config) (*Stream, error) { - var win window.Window - var err error - - // 只有在需要窗口时才创建窗口 - if config.NeedWindow { - // 将统一的性能配置传递给窗口 - windowConfig := config.WindowConfig - if windowConfig.Params == nil { - windowConfig.Params = make(map[string]interface{}) - } - // 传递完整的性能配置给窗口 - windowConfig.Params[PerformanceConfigKey] = config.PerformanceConfig - - win, err = window.CreateWindow(windowConfig) - if err != nil { - return nil, err - } - } - - // 使用统一配置创建Stream - perfConfig := config.PerformanceConfig - stream := &Stream{ - dataChan: make(chan interface{}, perfConfig.BufferConfig.DataChannelSize), - config: config, - Window: win, - resultChan: make(chan interface{}, perfConfig.BufferConfig.ResultChannelSize), - seenResults: &sync.Map{}, - done: make(chan struct{}), - sinkWorkerPool: make(chan func(), perfConfig.WorkerConfig.SinkPoolSize), - allowDataDrop: perfConfig.OverflowConfig.AllowDataLoss, - blockingTimeout: perfConfig.OverflowConfig.BlockTimeout, - overflowStrategy: perfConfig.OverflowConfig.Strategy, - maxRetryRoutines: int32(perfConfig.WorkerConfig.MaxRetryRoutines), - } - - // 如果是持久化策略,初始化持久化管理器 - if perfConfig.OverflowConfig.Strategy == StrategyPersist && perfConfig.OverflowConfig.PersistenceConfig != nil { - persistConfig := perfConfig.OverflowConfig.PersistenceConfig - stream.persistenceManager = NewPersistenceManagerWithConfig( - persistConfig.DataDir, - persistConfig.MaxFileSize, - persistConfig.FlushInterval, - ) - if err := stream.persistenceManager.Start(); err != nil { - return nil, fmt.Errorf("failed to start persistence manager: %w", err) - } - } - - // 根据溢出策略预设AddData函数指针,避免运行时switch判断 - switch perfConfig.OverflowConfig.Strategy { - case StrategyBlock: - stream.addDataFunc = stream.addDataBlocking - case StrategyExpand: - stream.addDataFunc = stream.addDataWithExpansion - case StrategyPersist: - stream.addDataFunc = stream.addDataWithPersistence - default: - stream.addDataFunc = stream.addDataWithDrop - } - - // 预编译字段处理信息 - stream.compileFieldProcessInfo() - - // 启动工作协程,使用配置的工作线程数 - go stream.startSinkWorkerPool(perfConfig.WorkerConfig.SinkWorkerCount) - go stream.startResultConsumer() - - return stream, nil -} - -// startSinkWorkerPool 启动Sink工作池,支持配置工作线程数 -func (s *Stream) startSinkWorkerPool(workerCount int) { - // 使用配置的工作线程数 - if workerCount <= 0 { - workerCount = 8 // 默认值 - } - - for i := 0; i < workerCount; i++ { - go func(workerID int) { - for { - select { - case task := <-s.sinkWorkerPool: - // 执行sink任务 - func() { - defer func() { - // 增强错误恢复,防止单个worker崩溃 - if r := recover(); r != nil { - logger.Error("Sink worker %d panic recovered: %v", workerID, r) - } - }() - task() - }() - case <-s.done: - return - } - } - }(i) - } -} - -// startResultConsumer 启动自动结果消费者,防止resultChan阻塞 -func (s *Stream) startResultConsumer() { - for { - select { - case <-s.resultChan: - // 自动消费结果,防止通道阻塞 - // 这是一个保底机制,确保即使没有外部消费者,系统也不会阻塞 - case <-s.done: - return - } - } + factory := NewStreamFactory() + return factory.CreateCustomPerformanceStream(config, perfConfig) } // RegisterFilter 注册过滤条件,支持反引号标识符、LIKE语法和IS NULL语法 @@ -301,6 +154,17 @@ func (s *Stream) RegisterFilter(conditionStr string) error { return nil } + processedCondition := s.preprocessFilterCondition(conditionStr) + filter, err := condition.NewExprCondition(processedCondition) + if err != nil { + return fmt.Errorf("compile filter error: %w", err) + } + s.filter = filter + return nil +} + +// preprocessFilterCondition 预处理过滤条件 +func (s *Stream) preprocessFilterCondition(conditionStr string) string { processedCondition := conditionStr bridge := functions.GetExprBridge() @@ -325,12 +189,7 @@ func (s *Stream) RegisterFilter(conditionStr string) error { } } - filter, err := condition.NewExprCondition(processedCondition) - if err != nil { - return fmt.Errorf("compile filter error: %w", err) - } - s.filter = filter - return nil + return processedCondition } // convertToAggregationFields 将旧格式的配置转换为新的AggregationField格式 @@ -347,7 +206,7 @@ func convertToAggregationFields(selectFields map[string]aggregator.AggregateType if inputField, exists := fieldAlias[outputAlias]; exists { field.InputField = inputField } else { - // 如果没有别名映射,假设输入字段名等于输出别名 + // 如果没有别名映射,输入字段名等于输出别名 field.InputField = outputAlias } @@ -357,977 +216,10 @@ func convertToAggregationFields(selectFields map[string]aggregator.AggregateType return fields } -// compileFieldProcessInfo 预编译字段处理信息,避免运行时重复解析 -func (s *Stream) compileFieldProcessInfo() { - s.compiledFieldInfo = make(map[string]*fieldProcessInfo) - s.compiledExprInfo = make(map[string]*expressionProcessInfo) - - // 编译SimpleFields信息 - for _, fieldSpec := range s.config.SimpleFields { - info := &fieldProcessInfo{} - - if fieldSpec == "*" { - info.isSelectAll = true - info.fieldName = "*" - info.outputName = "*" - } else { - // 解析别名 - parts := strings.Split(fieldSpec, ":") - info.fieldName = parts[0] - // 去除字段名中的反引号 - if len(info.fieldName) >= 2 && info.fieldName[0] == '`' && info.fieldName[len(info.fieldName)-1] == '`' { - info.fieldName = info.fieldName[1 : len(info.fieldName)-1] - } - info.outputName = info.fieldName - if len(parts) > 1 { - info.outputName = parts[1] - // 去除输出名中的反引号 - if len(info.outputName) >= 2 && info.outputName[0] == '`' && info.outputName[len(info.outputName)-1] == '`' { - info.outputName = info.outputName[1 : len(info.outputName)-1] - } - } - - // 预判断字段特征 - info.isFunctionCall = strings.Contains(info.fieldName, "(") && strings.Contains(info.fieldName, ")") - info.hasNestedField = !info.isFunctionCall && fieldpath.IsNestedField(info.fieldName) - - // 检查是否为字符串字面量并预处理值 - info.isStringLiteral = (len(info.fieldName) >= 2 && - ((info.fieldName[0] == '\'' && info.fieldName[len(info.fieldName)-1] == '\'') || - (info.fieldName[0] == '"' && info.fieldName[len(info.fieldName)-1] == '"'))) - - // 预处理字符串字面量值,去除引号 - if info.isStringLiteral && len(info.fieldName) >= 2 { - info.stringValue = info.fieldName[1 : len(info.fieldName)-1] - } - - // 设置别名用于快速访问 - info.alias = info.outputName - } - - s.compiledFieldInfo[fieldSpec] = info - } - - // 预编译表达式字段信息 - s.compileExpressionInfo() -} - -// compileExpressionInfo 预编译表达式处理信息 -func (s *Stream) compileExpressionInfo() { - bridge := functions.GetExprBridge() - - for fieldName, fieldExpr := range s.config.FieldExpressions { - exprInfo := &expressionProcessInfo{ - originalExpr: fieldExpr.Expression, - } - - // 预处理表达式 - processedExpr := fieldExpr.Expression - if bridge.ContainsIsNullOperator(processedExpr) { - if processed, err := bridge.PreprocessIsNullExpression(processedExpr); err == nil { - processedExpr = processed - } - } - if bridge.ContainsLikeOperator(processedExpr) { - if processed, err := bridge.PreprocessLikeExpression(processedExpr); err == nil { - processedExpr = processed - } - } - exprInfo.processedExpr = processedExpr - - // 预判断表达式特征 - exprInfo.isFunctionCall = strings.Contains(fieldExpr.Expression, "(") && strings.Contains(fieldExpr.Expression, ")") - exprInfo.hasNestedFields = !exprInfo.isFunctionCall && strings.Contains(fieldExpr.Expression, ".") - exprInfo.needsBacktickPreprocess = bridge.ContainsBacktickIdentifiers(fieldExpr.Expression) - - // 预编译表达式对象(仅对非函数调用的表达式) - if !exprInfo.isFunctionCall { - exprToCompile := fieldExpr.Expression - if exprInfo.needsBacktickPreprocess { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToCompile); err == nil { - exprToCompile = processed - } - } - if compiledExpr, err := expr.NewExpression(exprToCompile); err == nil { - exprInfo.compiledExpr = compiledExpr - } - } - - s.compiledExprInfo[fieldName] = exprInfo - } -} - func (s *Stream) Start() { - // 启动处理协程 - go s.process() -} - -func (s *Stream) process() { - // 初始化聚合器,用于窗口模式 - if s.config.NeedWindow { - // 转换为新的AggregationField格式 - aggregationFields := convertToAggregationFields(s.config.SelectFields, s.config.FieldAlias) - s.aggregator = aggregator.NewGroupAggregator(s.config.GroupFields, aggregationFields) - - // 为表达式字段创建计算器 - for field, fieldExpr := range s.config.FieldExpressions { - // 创建局部变量避免闭包问题 - currentField := field - currentFieldExpr := fieldExpr - - // 注册表达式计算器 - s.aggregator.RegisterExpression( - currentField, - currentFieldExpr.Expression, - currentFieldExpr.Fields, - func(data interface{}) (interface{}, error) { - // 将数据转换为 map[string]interface{} 以便计算 - var dataMap map[string]interface{} - switch d := data.(type) { - case map[string]interface{}: - dataMap = d - default: - // 如果不是 map,尝试转换 - v := reflect.ValueOf(data) - if v.Kind() == reflect.Ptr { - v = v.Elem() - } - - if v.Kind() == reflect.Struct { - // 将结构体转换为 map - dataMap = make(map[string]interface{}) - t := v.Type() - for i := 0; i < t.NumField(); i++ { - field := t.Field(i) - dataMap[field.Name] = v.Field(i).Interface() - } - } else { - return nil, fmt.Errorf("unsupported data type for expression: %T", data) - } - } - - // 检查表达式是否包含嵌套字段,如果有则直接使用自定义表达式引擎 - hasNestedFields := strings.Contains(currentFieldExpr.Expression, ".") - - if hasNestedFields { - // 直接使用自定义表达式引擎处理嵌套字段,支持NULL值 - // 预处理反引号标识符 - exprToUse := currentFieldExpr.Expression - bridge := functions.GetExprBridge() - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, parseErr := expr.NewExpression(exprToUse) - if parseErr != nil { - return nil, fmt.Errorf("expression parse failed: %w", parseErr) - } - - // 使用支持NULL的计算方法 - numResult, isNull, err := expression.EvaluateWithNull(dataMap) - if err != nil { - return nil, fmt.Errorf("expression evaluation failed: %w", err) - } - if isNull { - return nil, nil // 返回nil表示NULL值 - } - return numResult, nil - } - - // 检查是否为CASE表达式 - trimmedExpr := strings.TrimSpace(currentFieldExpr.Expression) - upperExpr := strings.ToUpper(trimmedExpr) - if strings.HasPrefix(upperExpr, SQLKeywordCase) { - // CASE表达式使用支持NULL的计算方法 - // 预处理反引号标识符 - exprToUse := currentFieldExpr.Expression - bridge := functions.GetExprBridge() - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, parseErr := expr.NewExpression(exprToUse) - if parseErr != nil { - return nil, fmt.Errorf("CASE expression parse failed: %w", parseErr) - } - - numResult, isNull, err := expression.EvaluateWithNull(dataMap) - if err != nil { - return nil, fmt.Errorf("CASE expression evaluation failed: %w", err) - } - if isNull { - return nil, nil // 返回nil表示NULL值 - } - return numResult, nil - } - - // 使用桥接器计算表达式,支持字符串拼接和IS NULL等语法 - bridge := functions.GetExprBridge() - - // 预处理表达式中的IS NULL和LIKE语法 - processedExpr := currentFieldExpr.Expression - if bridge.ContainsIsNullOperator(processedExpr) { - if processed, err := bridge.PreprocessIsNullExpression(processedExpr); err == nil { - processedExpr = processed - } - } - if bridge.ContainsLikeOperator(processedExpr) { - if processed, err := bridge.PreprocessLikeExpression(processedExpr); err == nil { - processedExpr = processed - } - } - - result, err := bridge.EvaluateExpression(processedExpr, dataMap) - if err != nil { - // 如果桥接器失败,回退到原来的表达式引擎(使用原始表达式,不是预处理的) - // 预处理反引号标识符 - exprToUse := currentFieldExpr.Expression - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, parseErr := expr.NewExpression(exprToUse) - if parseErr != nil { - return nil, fmt.Errorf("expression parse failed: %w", parseErr) - } - - // 计算表达式,支持NULL值 - numResult, isNull, evalErr := expression.EvaluateWithNull(dataMap) - if evalErr != nil { - return nil, fmt.Errorf("expression evaluation failed: %w", evalErr) - } - if isNull { - return nil, nil // 返回nil表示NULL值 - } - return numResult, nil - } - - return result, nil - }, - ) - } - - // 启动窗口处理协程 - s.Window.Start() - - // 处理窗口模式 - go func() { - defer func() { - if r := recover(); r != nil { - logger.Error("Window processing goroutine panic recovered: %v", r) - } - }() - - for batch := range s.Window.OutputChan() { - // 处理窗口批数据 - for _, item := range batch { - if err := s.aggregator.Put(WindowStartField, item.Slot.WindowStart()); err != nil { - logger.Error("failed to put window start: %v", err) - } - if err := s.aggregator.Put(WindowEndField, item.Slot.WindowEnd()); err != nil { - logger.Error("failed to put window end: %v", err) - } - if err := s.aggregator.Add(item.Data); err != nil { - logger.Error("aggregate error: %v", err) - } - } - - // 获取并发送聚合结果 - if results, err := s.aggregator.GetResults(); err == nil { - var finalResults []map[string]interface{} - if s.config.Distinct { - seenResults := make(map[string]bool) - for _, result := range results { - serializedResult, jsonErr := json.Marshal(result) - if jsonErr != nil { - logger.Error("Error serializing result for distinct check: %v", jsonErr) - finalResults = append(finalResults, result) - continue - } - if !seenResults[string(serializedResult)] { - finalResults = append(finalResults, result) - seenResults[string(serializedResult)] = true - } - } - } else { - finalResults = results - } - - // 应用 HAVING 过滤条件 - if s.config.Having != "" { - // 检查HAVING条件是否包含CASE表达式 - hasCaseExpression := strings.Contains(strings.ToUpper(s.config.Having), SQLKeywordCase) - - var filteredResults []map[string]interface{} - - if hasCaseExpression { - // HAVING条件包含CASE表达式,使用我们的表达式解析器 - // 预处理反引号标识符 - exprToUse := s.config.Having - bridge := functions.GetExprBridge() - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, err := expr.NewExpression(exprToUse) - if err != nil { - logger.Error("having filter error (CASE expression): %v", err) - } else { - // 应用 HAVING 过滤,使用CASE表达式计算器 - for _, result := range finalResults { - // 使用EvaluateWithNull方法以支持NULL值处理 - havingResult, isNull, err := expression.EvaluateWithNull(result) - if err != nil { - logger.Error("having filter evaluation error: %v", err) - continue - } - - // 如果结果是NULL,则不满足条件(SQL标准行为) - if isNull { - continue - } - - // 对于数值结果,大于0视为true(满足HAVING条件) - if havingResult > 0 { - filteredResults = append(filteredResults, result) - } - } - } - } else { - // HAVING条件不包含CASE表达式,使用原有的expr-lang处理 - // 预处理HAVING条件中的LIKE语法,转换为expr-lang可理解的形式 - processedHaving := s.config.Having - bridge := functions.GetExprBridge() - if bridge.ContainsLikeOperator(s.config.Having) { - if processed, err := bridge.PreprocessLikeExpression(s.config.Having); err == nil { - processedHaving = processed - } - } - - // 预处理HAVING条件中的IS NULL语法 - if bridge.ContainsIsNullOperator(processedHaving) { - if processed, err := bridge.PreprocessIsNullExpression(processedHaving); err == nil { - processedHaving = processed - } - } - - // 创建 HAVING 条件 - havingFilter, err := condition.NewExprCondition(processedHaving) - if err != nil { - logger.Error("having filter error: %v", err) - } else { - // 应用 HAVING 过滤 - for _, result := range finalResults { - if havingFilter.Evaluate(result) { - filteredResults = append(filteredResults, result) - } - } - } - } - - finalResults = filteredResults - } - - // 应用 LIMIT 限制 - if s.config.Limit > 0 && len(finalResults) > s.config.Limit { - finalResults = finalResults[:s.config.Limit] - } - - // 发送结果到结果通道和 Sink 函数 - if len(finalResults) > 0 { - // 非阻塞发送到结果通道 - s.sendResultNonBlocking(finalResults) - - // 异步调用所有sinks - s.callSinksAsync(finalResults) - } - s.aggregator.Reset() - } - } - }() - } - - // 创建一个定时器,避免创建多个临时定时器导致资源泄漏 - ticker := time.NewTicker(100 * time.Millisecond) - defer ticker.Stop() // 确保在函数退出时停止定时器 - - // 主处理循环 - for { - // 使用读锁安全访问dataChan - s.dataChanMux.RLock() - currentDataChan := s.dataChan - s.dataChanMux.RUnlock() - - select { - case data, ok := <-currentDataChan: - if !ok { - // 通道已关闭 - return - } - // 应用过滤条件 - if s.filter == nil || s.filter.Evaluate(data) { - if s.config.NeedWindow { - // 窗口模式,添加数据到窗口 - s.Window.Add(data) - } else { - // 非窗口模式,直接处理数据并输出 - s.processDirectData(data) - } - } - case <-s.done: - // 收到关闭信号 - return - case <-ticker.C: - // 定时器触发,什么都不做,只是防止 CPU 空转 - } - } -} - -// processExpressionFieldFallback 表达式字段处理的回退逻辑 -func (s *Stream) processExpressionFieldFallback(fieldName string, dataMap map[string]interface{}, result map[string]interface{}) { - fieldExpr, exists := s.config.FieldExpressions[fieldName] - if !exists { - result[fieldName] = nil - return - } - - // 使用桥接器计算表达式,支持IS NULL等语法 - bridge := functions.GetExprBridge() - - // 预处理表达式中的IS NULL和LIKE语法 - processedExpr := fieldExpr.Expression - if bridge.ContainsIsNullOperator(processedExpr) { - if processed, err := bridge.PreprocessIsNullExpression(processedExpr); err == nil { - processedExpr = processed - } - } - if bridge.ContainsLikeOperator(processedExpr) { - if processed, err := bridge.PreprocessLikeExpression(processedExpr); err == nil { - processedExpr = processed - } - } - - // 检查表达式是否是函数调用(包含括号) - isFunctionCall := strings.Contains(fieldExpr.Expression, "(") && strings.Contains(fieldExpr.Expression, ")") - - // 检查表达式是否包含嵌套字段(但排除函数调用中的点号) - hasNestedFields := false - if !isFunctionCall && strings.Contains(fieldExpr.Expression, ".") { - hasNestedFields = true - } - - var evalResult interface{} - - if isFunctionCall { - // 对于函数调用,优先使用桥接器处理 - exprResult, err := bridge.EvaluateExpression(processedExpr, dataMap) - if err != nil { - logger.Error("Function call evaluation failed for field %s: %v", fieldName, err) - result[fieldName] = nil - return - } - evalResult = exprResult - } else if hasNestedFields { - // 检测到嵌套字段(非函数调用),使用自定义表达式引擎 - exprToUse := fieldExpr.Expression - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, parseErr := expr.NewExpression(exprToUse) - if parseErr != nil { - logger.Error("Expression parse failed for field %s: %v", fieldName, parseErr) - result[fieldName] = nil - return - } - - numResult, err := expression.Evaluate(dataMap) - if err != nil { - logger.Error("Expression evaluation failed for field %s: %v", fieldName, err) - result[fieldName] = nil - return - } - evalResult = numResult - } else { - // 尝试使用桥接器处理其他表达式 - exprResult, err := bridge.EvaluateExpression(processedExpr, dataMap) - if err != nil { - // 如果桥接器失败,回退到原来的表达式引擎 - exprToUse := fieldExpr.Expression - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, parseErr := expr.NewExpression(exprToUse) - if parseErr != nil { - logger.Error("Expression parse failed for field %s: %v", fieldName, parseErr) - result[fieldName] = nil - return - } - - numResult, evalErr := expression.Evaluate(dataMap) - if evalErr != nil { - logger.Error("Expression evaluation failed for field %s: %v", fieldName, evalErr) - result[fieldName] = nil - return - } - evalResult = numResult - } else { - evalResult = exprResult - } - } - - result[fieldName] = evalResult -} - -// processDirectData 直接处理非窗口数据 -func (s *Stream) processDirectData(data interface{}) { - // 增加输入计数 - atomic.AddInt64(&s.inputCount, 1) - - // 直接将数据作为map处理 - dataMap, ok := data.(map[string]interface{}) - if !ok { - logger.Error("Unsupported data type: %T", data) - atomic.AddInt64(&s.droppedCount, 1) - return - } - - // 创建结果map,预分配合适容量 - estimatedSize := len(s.config.FieldExpressions) + len(s.config.SimpleFields) - if estimatedSize < 8 { - estimatedSize = 8 // 最小容量 - } - result := make(map[string]interface{}, estimatedSize) - - // 处理表达式字段(使用预编译信息) - for fieldName := range s.config.FieldExpressions { - exprInfo := s.compiledExprInfo[fieldName] - if exprInfo == nil { - // 回退到原逻辑(安全性保证) - s.processExpressionFieldFallback(fieldName, dataMap, result) - continue - } - - var evalResult interface{} - bridge := functions.GetExprBridge() - - if exprInfo.isFunctionCall { - // 对于函数调用,使用桥接器处理 - exprResult, err := bridge.EvaluateExpression(exprInfo.processedExpr, dataMap) - if err != nil { - logger.Error("Function call evaluation failed for field %s: %v", fieldName, err) - result[fieldName] = nil - continue - } - evalResult = exprResult - } else if exprInfo.hasNestedFields { - // 使用预编译的表达式对象 - if exprInfo.compiledExpr != nil { - numResult, err := exprInfo.compiledExpr.Evaluate(dataMap) - if err != nil { - logger.Error("Expression evaluation failed for field %s: %v", fieldName, err) - result[fieldName] = nil - continue - } - evalResult = numResult - } else { - // 回退到动态编译 - s.processExpressionFieldFallback(fieldName, dataMap, result) - continue - } - } else { - // 尝试使用桥接器处理其他表达式 - exprResult, err := bridge.EvaluateExpression(exprInfo.processedExpr, dataMap) - if err != nil { - // 如果桥接器失败,使用预编译的表达式对象 - if exprInfo.compiledExpr != nil { - numResult, evalErr := exprInfo.compiledExpr.Evaluate(dataMap) - if evalErr != nil { - logger.Error("Expression evaluation failed for field %s: %v", fieldName, evalErr) - result[fieldName] = nil - continue - } - evalResult = numResult - } else { - // 回退到动态编译 - s.processExpressionFieldFallback(fieldName, dataMap, result) - continue - } - } else { - evalResult = exprResult - } - } - - result[fieldName] = evalResult - } - - // 使用预编译的字段信息处理SimpleFields - if len(s.config.SimpleFields) > 0 { - for _, fieldSpec := range s.config.SimpleFields { - info := s.compiledFieldInfo[fieldSpec] - if info == nil { - // 如果没有预编译信息,回退到原逻辑(安全性保证) - s.processSingleFieldFallback(fieldSpec, dataMap, data, result) - continue - } - - if info.isSelectAll { - // SELECT *:批量复制所有字段,跳过表达式字段 - for k, v := range dataMap { - if _, isExpression := s.config.FieldExpressions[k]; !isExpression { - result[k] = v - } - } - continue - } - - // 跳过已经通过表达式字段处理的字段 - if _, isExpression := s.config.FieldExpressions[info.outputName]; isExpression { - continue - } - - if info.isStringLiteral { - // 字符串字面量处理:使用预编译的字符串值 - result[info.alias] = info.stringValue - } else if info.isFunctionCall { - // 执行函数调用 - if funcResult, err := s.executeFunction(info.fieldName, dataMap); err == nil { - result[info.outputName] = funcResult - } else { - logger.Error("Function execution error %s: %v", info.fieldName, err) - result[info.outputName] = nil - } - } else { - // 普通字段处理 - var value interface{} - var exists bool - - if info.hasNestedField { - value, exists = fieldpath.GetNestedField(data, info.fieldName) - } else { - value, exists = dataMap[info.fieldName] - } - - if exists { - result[info.outputName] = value - } else { - result[info.outputName] = nil - } - } - } - } else if len(s.config.FieldExpressions) == 0 { - // 如果没有指定字段且没有表达式字段,保留所有字段 - for k, v := range dataMap { - result[k] = v - } - } - - // 将结果包装为数组 - results := []map[string]interface{}{result} - - // 非阻塞发送结果到resultChan - s.sendResultNonBlocking(results) - - // 异步调用所有sinks,避免阻塞 - s.callSinksAsync(results) -} - -// processSingleFieldFallback 回退处理单个字段(当预编译信息缺失时) -func (s *Stream) processSingleFieldFallback(fieldSpec string, dataMap map[string]interface{}, data interface{}, result map[string]interface{}) { - // 处理SELECT *的特殊情况 - if fieldSpec == "*" { - // SELECT *:返回所有字段,但跳过已经通过表达式字段处理的字段 - for k, v := range dataMap { - // 如果该字段已经通过表达式字段处理,则跳过,保持表达式计算结果 - if _, isExpression := s.config.FieldExpressions[k]; !isExpression { - result[k] = v - } - } - return - } - - // 处理别名 - parts := strings.Split(fieldSpec, ":") - fieldName := parts[0] - outputName := fieldName - if len(parts) > 1 { - outputName = parts[1] - } - - // 跳过已经通过表达式字段处理的字段 - if _, isExpression := s.config.FieldExpressions[outputName]; isExpression { - return - } - - // 检查是否是函数调用 - if strings.Contains(fieldName, "(") && strings.Contains(fieldName, ")") { - // 执行函数调用 - if funcResult, err := s.executeFunction(fieldName, dataMap); err == nil { - result[outputName] = funcResult - } else { - logger.Error("Function execution error %s: %v", fieldName, err) - result[outputName] = nil - } - } else { - // 普通字段 - 支持嵌套字段 - var value interface{} - var exists bool - - if fieldpath.IsNestedField(fieldName) { - value, exists = fieldpath.GetNestedField(data, fieldName) - } else { - value, exists = dataMap[fieldName] - } - - if exists { - result[outputName] = value - } else { - result[outputName] = nil - } - } -} - -// sendResultNonBlocking 非阻塞方式发送结果到resultChan (智能背压控制) -func (s *Stream) sendResultNonBlocking(results []map[string]interface{}) { - select { - case s.resultChan <- results: - // 成功发送到结果通道 - atomic.AddInt64(&s.outputCount, 1) - default: - // 结果通道已满,使用智能背压控制策略 - chanLen := len(s.resultChan) - chanCap := cap(s.resultChan) - - // 如果通道使用率超过90%,进入背压模式 - if float64(chanLen)/float64(chanCap) > 0.9 { - // 尝试清理一些旧数据,为新数据腾出空间 - select { - case <-s.resultChan: - // 清理一个旧结果,然后尝试添加新结果 - select { - case s.resultChan <- results: - atomic.AddInt64(&s.outputCount, 1) - default: - logger.Warn("Result channel is full, dropping result data") - atomic.AddInt64(&s.droppedCount, 1) - } - default: - logger.Warn("Result channel is full, dropping result data") - atomic.AddInt64(&s.droppedCount, 1) - } - } else { - logger.Warn("Result channel is full, dropping result data") - atomic.AddInt64(&s.droppedCount, 1) - } - } -} - -// callSinksAsync 异步调用所有sink函数 -func (s *Stream) callSinksAsync(results []map[string]interface{}) { - // 使用读锁安全地访问sinks切片 - s.sinksMux.RLock() - if len(s.sinks) == 0 { - s.sinksMux.RUnlock() - return - } - - // 复制sinks切片以避免在持有锁的情况下执行耗时操作 - sinksCopy := make([]func(interface{}), len(s.sinks)) - copy(sinksCopy, s.sinks) - s.sinksMux.RUnlock() - - // 为每个sink创建异步任务 - for _, sink := range sinksCopy { - // 捕获sink变量,避免闭包问题 - currentSink := sink - - // 提交任务到工作池 - task := func() { - defer func() { - // 恢复panic,防止单个sink错误影响整个系统 - if r := recover(); r != nil { - logger.Error("Sink execution exception: %v", r) - } - }() - currentSink(results) - } - - // 非阻塞提交任务 - select { - case s.sinkWorkerPool <- task: - // 成功提交任务 - default: - // 工作池已满,直接在当前goroutine执行(降级处理) - go task() - } - } -} - -// executeFunction 执行函数调用 -func (s *Stream) executeFunction(funcExpr string, data map[string]interface{}) (interface{}, error) { - // 检查是否是自定义函数 - funcName := extractFunctionName(funcExpr) - if funcName != "" { - // 直接使用函数系统 - fn, exists := functions.Get(funcName) - if exists { - // 解析参数 - args, err := s.parseFunctionArgs(funcExpr, data) - if err != nil { - return nil, err - } - - // 创建函数上下文 - ctx := &functions.FunctionContext{Data: data} - - // 执行函数 - return fn.Execute(ctx, args) - } - } - - // 对于复杂的嵌套函数调用,直接使用ExprBridge - // 这样可以避免Expression.Evaluate的float64类型限制 - bridge := functions.GetExprBridge() - result, err := bridge.EvaluateExpression(funcExpr, data) - if err != nil { - return nil, fmt.Errorf("evaluate function expression failed: %w", err) - } - - return result, nil -} - -// extractFunctionName 从表达式中提取函数名 -func extractFunctionName(expr string) string { - parenIndex := strings.Index(expr, "(") - if parenIndex == -1 { - return "" - } - funcName := strings.TrimSpace(expr[:parenIndex]) - if strings.ContainsAny(funcName, " +-*/=<>!&|") { - return "" - } - return funcName -} - -// parseFunctionArgs 解析函数参数,支持嵌套函数调用 -func (s *Stream) parseFunctionArgs(funcExpr string, data map[string]interface{}) ([]interface{}, error) { - // 提取括号内的参数 - start := strings.Index(funcExpr, "(") - end := strings.LastIndex(funcExpr, ")") - if start == -1 || end == -1 || end <= start { - return nil, fmt.Errorf("invalid function expression: %s", funcExpr) - } - - argsStr := strings.TrimSpace(funcExpr[start+1 : end]) - if argsStr == "" { - return []interface{}{}, nil - } - - // 智能分割参数,处理嵌套函数和引号 - argParts, err := s.smartSplitArgs(argsStr) - if err != nil { - return nil, err - } - - args := make([]interface{}, len(argParts)) - - for i, arg := range argParts { - arg = strings.TrimSpace(arg) - - // 如果参数是字符串常量(用引号包围) - if strings.HasPrefix(arg, "'") && strings.HasSuffix(arg, "'") { - args[i] = strings.Trim(arg, "'") - } else if strings.HasPrefix(arg, "\"") && strings.HasSuffix(arg, "\"") { - args[i] = strings.Trim(arg, "\"") - } else if strings.Contains(arg, "(") { - // 如果参数包含函数调用,递归执行 - result, err := s.executeFunction(arg, data) - if err != nil { - return nil, fmt.Errorf("failed to execute nested function '%s': %v", arg, err) - } - args[i] = result - } else if value, exists := data[arg]; exists { - // 如果是数据字段 - args[i] = value - } else { - // 尝试解析为数字 - if val, err := strconv.ParseFloat(arg, 64); err == nil { - args[i] = val - } else { - args[i] = arg - } - } - } - - return args, nil -} - -// smartSplitArgs 智能分割参数,考虑括号嵌套和引号 -func (s *Stream) smartSplitArgs(argsStr string) ([]string, error) { - var args []string - var current strings.Builder - parenDepth := 0 - inQuotes := false - quoteChar := byte(0) - - for i := 0; i < len(argsStr); i++ { - ch := argsStr[i] - - switch ch { - case '\'': - if !inQuotes { - inQuotes = true - quoteChar = ch - } else if quoteChar == ch { - inQuotes = false - quoteChar = 0 - } - current.WriteByte(ch) - case '"': - if !inQuotes { - inQuotes = true - quoteChar = ch - } else if quoteChar == ch { - inQuotes = false - quoteChar = 0 - } - current.WriteByte(ch) - case '(': - if !inQuotes { - parenDepth++ - } - current.WriteByte(ch) - case ')': - if !inQuotes { - parenDepth-- - } - current.WriteByte(ch) - case ',': - if !inQuotes && parenDepth == 0 { - // 找到参数分隔符 - args = append(args, strings.TrimSpace(current.String())) - current.Reset() - } else { - current.WriteByte(ch) - } - default: - current.WriteByte(ch) - } - } - - // 添加最后一个参数 - if current.Len() > 0 { - args = append(args, strings.TrimSpace(current.String())) - } - - return args, nil + // 创建数据处理器并启动 + processor := NewDataProcessor(s) + go processor.Process() } func (s *Stream) Emit(data interface{}) { @@ -1336,200 +228,6 @@ func (s *Stream) Emit(data interface{}) { s.addDataFunc(data) } -// addDataBlocking 阻塞模式添加数据,保证零数据丢失 (线程安全版本) -func (s *Stream) addDataBlocking(data interface{}) { - if s.blockingTimeout <= 0 { - // 无超时限制,永久阻塞直到成功 - dataChan := s.safeGetDataChan() - dataChan <- data - return - } - - // 带超时的阻塞 - timer := time.NewTimer(s.blockingTimeout) - defer timer.Stop() - - dataChan := s.safeGetDataChan() - select { - case dataChan <- data: - // 成功添加数据 - return - case <-timer.C: - // 超时但不丢弃数据,记录错误但继续阻塞 - logger.Error("Data addition timeout, but continue waiting to avoid data loss") - // 继续无限期阻塞,重新获取当前通道引用 - finalDataChan := s.safeGetDataChan() - finalDataChan <- data - } -} - -// addDataWithExpansion 动态扩容模式 (线程安全版本) -func (s *Stream) addDataWithExpansion(data interface{}) { - // 首次尝试添加数据 - if s.safeSendToDataChan(data) { - return - } - - // 通道满了,动态扩容 - s.expandDataChannel() - - // 扩容后重试,重新获取通道引用 - if s.safeSendToDataChan(data) { - logger.Debug("Successfully added data after data channel expansion") - return - } - - // 如果扩容后仍然满,则阻塞等待 - dataChan := s.safeGetDataChan() - dataChan <- data -} - -// addDataWithPersistence 持久化模式(线程安全完整实现) -func (s *Stream) addDataWithPersistence(data interface{}) { - // 首次尝试添加数据 - if s.safeSendToDataChan(data) { - return - } - - // 通道满了,持久化到磁盘 - if s.persistenceManager != nil { - if err := s.persistenceManager.PersistData(data); err != nil { - logger.Error("Failed to persist data: %v", err) - atomic.AddInt64(&s.droppedCount, 1) - } else { - logger.Debug("Data has been persisted to disk") - } - } else { - logger.Error("Persistence manager not initialized, data will be lost") - atomic.AddInt64(&s.droppedCount, 1) - } - - // 启动异步重试 - go s.persistAndRetryData(data) -} - -// addDataWithDrop 原有的丢弃模式 (线程安全版本) -func (s *Stream) addDataWithDrop(data interface{}) { - // 智能非阻塞添加,分层背压控制 - if s.safeSendToDataChan(data) { - return - } - - // 数据通道已满,使用分层背压策略,获取通道状态 - s.dataChanMux.RLock() - chanLen := len(s.dataChan) - chanCap := cap(s.dataChan) - currentDataChan := s.dataChan - s.dataChanMux.RUnlock() - - usage := float64(chanLen) / float64(chanCap) - - // 根据通道使用率和缓冲区大小调整策略 - var waitTime time.Duration - var maxRetries int - - switch { - case chanCap >= 100000: // 超大缓冲区(基准测试模式) - switch { - case usage > 0.99: - waitTime = 1 * time.Millisecond // 更长等待 - maxRetries = 3 - case usage > 0.95: - waitTime = 500 * time.Microsecond - maxRetries = 2 - case usage > 0.90: - waitTime = 100 * time.Microsecond - maxRetries = 1 - default: - // 立即丢弃 - logger.Warn("Data channel is full, dropping input data") - atomic.AddInt64(&s.droppedCount, 1) - return - } - - case chanCap >= 50000: // 高性能模式 - switch { - case usage > 0.99: - waitTime = 500 * time.Microsecond - maxRetries = 2 - case usage > 0.95: - waitTime = 200 * time.Microsecond - maxRetries = 1 - case usage > 0.90: - waitTime = 50 * time.Microsecond - maxRetries = 1 - default: - logger.Warn("Data channel is full, dropping input data") - atomic.AddInt64(&s.droppedCount, 1) - return - } - - default: // 默认模式 - switch { - case usage > 0.99: - waitTime = 100 * time.Microsecond - maxRetries = 1 - case usage > 0.95: - waitTime = 50 * time.Microsecond - maxRetries = 1 - default: - logger.Warn("Data channel is full, dropping input data") - atomic.AddInt64(&s.droppedCount, 1) - return - } - } - - // 多次重试添加数据,使用线程安全的方式 - for retry := 0; retry < maxRetries; retry++ { - timer := time.NewTimer(waitTime) - select { - case currentDataChan <- data: - // 重试成功 - timer.Stop() - return - case <-timer.C: - // 超时,继续下一次重试或者丢弃 - if retry == maxRetries-1 { - // 最后一次重试失败,记录丢弃 - logger.Warn("Data channel is full, dropping input data") - atomic.AddInt64(&s.droppedCount, 1) - } - } - } -} - -// safeGetDataChan 线程安全地获取dataChan引用 -func (s *Stream) safeGetDataChan() chan interface{} { - s.dataChanMux.RLock() - defer s.dataChanMux.RUnlock() - return s.dataChan -} - -// safeSendToDataChan 线程安全地向dataChan发送数据 -func (s *Stream) safeSendToDataChan(data interface{}) bool { - dataChan := s.safeGetDataChan() - select { - case dataChan <- data: - return true - default: - return false - } -} - -func (s *Stream) AddSink(sink func(interface{})) { - s.sinksMux.Lock() - s.sinks = append(s.sinks, sink) - s.sinksMux.Unlock() -} - -func (s *Stream) GetResultsChan() <-chan interface{} { - return s.resultChan -} - -func NewStreamProcessor() (*Stream, error) { - return NewStream(types.Config{}) -} - // Stop 停止流处理 func (s *Stream) Stop() { close(s.done) @@ -1542,229 +240,7 @@ func (s *Stream) Stop() { } } -// GetStats 获取流处理统计信息 (线程安全版本) -func (s *Stream) GetStats() map[string]int64 { - // 线程安全地获取dataChan状态 - s.dataChanMux.RLock() - dataChanLen := int64(len(s.dataChan)) - dataChanCap := int64(cap(s.dataChan)) - s.dataChanMux.RUnlock() - - return map[string]int64{ - StatsInputCount: atomic.LoadInt64(&s.inputCount), - StatsOutputCount: atomic.LoadInt64(&s.outputCount), - StatsDroppedCount: atomic.LoadInt64(&s.droppedCount), - StatsDataChanLen: dataChanLen, - StatsDataChanCap: dataChanCap, - StatsResultChanLen: int64(len(s.resultChan)), - StatsResultChanCap: int64(cap(s.resultChan)), - StatsSinkPoolLen: int64(len(s.sinkWorkerPool)), - StatsSinkPoolCap: int64(cap(s.sinkWorkerPool)), - StatsActiveRetries: int64(atomic.LoadInt32(&s.activeRetries)), - StatsExpanding: int64(atomic.LoadInt32(&s.expanding)), - } -} - -// GetDetailedStats 获取详细的性能统计信息 -func (s *Stream) GetDetailedStats() map[string]interface{} { - stats := s.GetStats() - - // 计算使用率 - dataUsage := float64(stats[StatsDataChanLen]) / float64(stats[StatsDataChanCap]) * 100 - resultUsage := float64(stats[StatsResultChanLen]) / float64(stats[StatsResultChanCap]) * 100 - sinkUsage := float64(stats[StatsSinkPoolLen]) / float64(stats[StatsSinkPoolCap]) * 100 - - // 计算效率指标 - var processRate float64 = 100.0 - var dropRate float64 = 0.0 - - if stats[StatsInputCount] > 0 { - processRate = float64(stats[StatsOutputCount]) / float64(stats[StatsInputCount]) * 100 - dropRate = float64(stats[StatsDroppedCount]) / float64(stats[StatsInputCount]) * 100 - } - - return map[string]interface{}{ - StatsBasicStats: stats, - StatsDataChanUsage: dataUsage, - StatsResultChanUsage: resultUsage, - StatsSinkPoolUsage: sinkUsage, - StatsProcessRate: processRate, - StatsDropRate: dropRate, - StatsPerformanceLevel: s.assessPerformanceLevel(dataUsage, dropRate), - } -} - -// assessPerformanceLevel 评估当前性能水平 -func (s *Stream) assessPerformanceLevel(dataUsage, dropRate float64) string { - switch { - case dropRate > 50: - return PerformanceLevelCritical // 严重性能问题 - case dropRate > 20: - return PerformanceLevelWarning // 性能警告 - case dataUsage > 90: - return PerformanceLevelHighLoad // 高负载 - case dataUsage > 70: - return PerformanceLevelModerateLoad // 中等负载 - default: - return PerformanceLevelOptimal // 最佳状态 - } -} - -// ResetStats 重置统计信息 -func (s *Stream) ResetStats() { - atomic.StoreInt64(&s.inputCount, 0) - atomic.StoreInt64(&s.outputCount, 0) - atomic.StoreInt64(&s.droppedCount, 0) -} - -// expandDataChannel 动态扩容数据通道 -func (s *Stream) expandDataChannel() { - // 使用原子操作检查是否正在扩容,防止并发扩容 - if !atomic.CompareAndSwapInt32(&s.expanding, 0, 1) { - logger.Debug("Channel expansion already in progress, skipping") - return - } - defer atomic.StoreInt32(&s.expanding, 0) - - // 获取扩容锁,确保只有一个协程进行扩容 - s.expansionMux.Lock() - defer s.expansionMux.Unlock() - - // 再次检查是否需要扩容(双重检查锁定模式) - s.dataChanMux.RLock() - oldCap := cap(s.dataChan) - currentLen := len(s.dataChan) - s.dataChanMux.RUnlock() - - // 如果当前通道使用率低于80%,则不需要扩容 - if float64(currentLen)/float64(oldCap) < 0.8 { - logger.Debug("Channel usage below threshold, expansion not needed") - return - } - - newCap := int(float64(oldCap) * 1.5) // 扩容50% - if newCap < oldCap+1000 { - newCap = oldCap + 1000 // 至少增加1000 - } - - logger.Debug("Dynamic expansion of data channel: %d -> %d", oldCap, newCap) - - // 创建新的更大的通道 - newChan := make(chan interface{}, newCap) - - // 使用写锁安全地迁移数据 - s.dataChanMux.Lock() - oldChan := s.dataChan - - // 将旧通道中的数据快速迁移到新通道 - migrationTimeout := time.NewTimer(5 * time.Second) // 5秒迁移超时 - defer migrationTimeout.Stop() - - migratedCount := 0 - for { - select { - case data := <-oldChan: - select { - case newChan <- data: - migratedCount++ - case <-migrationTimeout.C: - logger.Warn("Data migration timeout, some data may be lost during expansion") - goto migration_done - } - case <-migrationTimeout.C: - logger.Warn("Data migration timeout during channel drain") - goto migration_done - default: - // 旧通道为空,迁移完成 - goto migration_done - } - } - -migration_done: - // 原子性地更新通道引用 - s.dataChan = newChan - s.dataChanMux.Unlock() - - logger.Debug("Channel expansion completed: migrated %d items", migratedCount) -} - -// persistAndRetryData 持久化数据并重试 (改进版本,具备指数退避和资源控制) -func (s *Stream) persistAndRetryData(data interface{}) { - // 检查活跃重试协程数量,防止资源泄漏 - currentRetries := atomic.LoadInt32(&s.activeRetries) - if currentRetries >= s.maxRetryRoutines { - logger.Warn("Maximum retry routines reached (%d), dropping data", currentRetries) - atomic.AddInt64(&s.droppedCount, 1) - return - } - - // 增加活跃重试计数 - atomic.AddInt32(&s.activeRetries, 1) - defer atomic.AddInt32(&s.activeRetries, -1) - - // 使用指数退避策略 - baseInterval := 50 * time.Millisecond - maxInterval := 2 * time.Second - maxRetries := 10 // 减少最大重试次数,防止长时间阻塞 - totalTimeout := 30 * time.Second // 总超时时间 - - retryTimer := time.NewTimer(totalTimeout) - defer retryTimer.Stop() - - for attempt := 0; attempt < maxRetries; attempt++ { - // 计算当前重试间隔(指数退避) - currentInterval := time.Duration(float64(baseInterval) * (1.5 * float64(attempt))) - if currentInterval > maxInterval { - currentInterval = maxInterval - } - - // 等待重试间隔 - waitTimer := time.NewTimer(currentInterval) - select { - case <-waitTimer.C: - // 继续重试 - case <-retryTimer.C: - waitTimer.Stop() - logger.Warn("Persistence retry timeout reached, dropping data") - atomic.AddInt64(&s.droppedCount, 1) - return - case <-s.done: - waitTimer.Stop() - logger.Debug("Stream stopped during retry, dropping data") - atomic.AddInt64(&s.droppedCount, 1) - return - } - waitTimer.Stop() - - // 使用线程安全方式尝试发送数据 - s.dataChanMux.RLock() - currentDataChan := s.dataChan - s.dataChanMux.RUnlock() - - select { - case currentDataChan <- data: - logger.Debug("Persistence data retry successful: attempt %d", attempt+1) - return - case <-retryTimer.C: - logger.Warn("Persistence retry timeout during send, dropping data") - atomic.AddInt64(&s.droppedCount, 1) - return - case <-s.done: - logger.Debug("Stream stopped during retry send, dropping data") - atomic.AddInt64(&s.droppedCount, 1) - return - default: - // 通道仍然满,继续下一次重试 - if attempt == maxRetries-1 { - logger.Error("Persistence data retry failed after %d attempts, dropping data", maxRetries) - atomic.AddInt64(&s.droppedCount, 1) - } else { - logger.Debug("Persistence retry attempt %d/%d failed, will retry with interval %v", - attempt+1, maxRetries, currentInterval) - } - } - } -} +// GetStats, GetDetailedStats, ResetStats, expandDataChannel, persistAndRetryData 方法已移动到stats_manager.go和data_handler.go文件 // LoadAndReprocessPersistedData 加载并重新处理持久化数据 func (s *Stream) LoadAndReprocessPersistedData() error { @@ -1785,7 +261,7 @@ func (s *Stream) LoadAndReprocessPersistedData() error { logger.Info("Start reprocessing %d persistent data records", len(persistedData)) - // 重新处理每条数据(线程安全版本) + // 重新处理每条数据 successCount := 0 for i, data := range persistedData { // 使用线程安全方式尝试发送数据 @@ -1831,7 +307,7 @@ func (s *Stream) IsAggregationQuery() bool { func (s *Stream) ProcessSync(data interface{}) (interface{}, error) { // 检查是否为聚合查询 if s.config.NeedWindow { - return nil, fmt.Errorf("聚合查询不支持同步处理") + return nil, fmt.Errorf("Synchronous processing is not supported for aggregation queries.") } // 应用过滤条件 @@ -1845,14 +321,10 @@ func (s *Stream) ProcessSync(data interface{}) (interface{}, error) { // processDirectDataSync 同步版本的直接数据处理 func (s *Stream) processDirectDataSync(data interface{}) (interface{}, error) { - // 增加输入计数 - atomic.AddInt64(&s.inputCount, 1) - - // 简化:直接将数据作为map处理 dataMap, ok := data.(map[string]interface{}) if !ok { atomic.AddInt64(&s.droppedCount, 1) - return nil, fmt.Errorf("不支持的数据类型: %T", data) + return nil, fmt.Errorf("Unsupported data type:%T", data) } // 创建结果map,预分配合适容量 @@ -1863,165 +335,14 @@ func (s *Stream) processDirectDataSync(data interface{}) (interface{}, error) { result := make(map[string]interface{}, estimatedSize) // 处理表达式字段 - for fieldName, fieldExpr := range s.config.FieldExpressions { - // 使用桥接器计算表达式,支持IS NULL等语法 - bridge := functions.GetExprBridge() - - // 预处理表达式中的IS NULL和LIKE语法 - processedExpr := fieldExpr.Expression - if bridge.ContainsIsNullOperator(processedExpr) { - if processed, err := bridge.PreprocessIsNullExpression(processedExpr); err == nil { - processedExpr = processed - } - } - if bridge.ContainsLikeOperator(processedExpr) { - if processed, err := bridge.PreprocessLikeExpression(processedExpr); err == nil { - processedExpr = processed - } - } - - // 检查表达式是否是函数调用(包含括号) - isFunctionCall := strings.Contains(fieldExpr.Expression, "(") && strings.Contains(fieldExpr.Expression, ")") - - // 检查表达式是否包含嵌套字段(但排除函数调用中的点号) - hasNestedFields := false - if !isFunctionCall && strings.Contains(fieldExpr.Expression, ".") { - hasNestedFields = true - } - - // 检查是否为CASE表达式 - trimmedExpr := strings.TrimSpace(fieldExpr.Expression) - upperExpr := strings.ToUpper(trimmedExpr) - isCaseExpression := strings.HasPrefix(upperExpr, SQLKeywordCase) - - var evalResult interface{} - - if isFunctionCall { - // 对于函数调用,优先使用桥接器处理,这样可以保持原始类型 - exprResult, err := bridge.EvaluateExpression(processedExpr, dataMap) - if err != nil { - logger.Error("Function call evaluation failed for field %s: %v", fieldName, err) - result[fieldName] = nil - continue - } - evalResult = exprResult - } else if hasNestedFields || isCaseExpression { - // 检测到嵌套字段(非函数调用)或CASE表达式,使用自定义表达式引擎 - // 预处理反引号标识符 - exprToUse := fieldExpr.Expression - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, parseErr := expr.NewExpression(exprToUse) - if parseErr != nil { - logger.Error("Expression parse failed for field %s: %v", fieldName, parseErr) - result[fieldName] = nil - continue - } - - // 使用支持NULL的计算方法 - numResult, isNull, err := expression.EvaluateWithNull(dataMap) - if err != nil { - logger.Error("Expression evaluation failed for field %s: %v", fieldName, err) - result[fieldName] = nil - continue - } - if isNull { - evalResult = nil // NULL值 - } else { - evalResult = numResult - } - } else { - // 尝试使用桥接器处理其他表达式 - exprResult, err := bridge.EvaluateExpression(processedExpr, dataMap) - if err != nil { - // 如果桥接器失败,回退到原来的表达式引擎(使用原始表达式,不是预处理的) - // 预处理反引号标识符 - exprToUse := fieldExpr.Expression - if bridge.ContainsBacktickIdentifiers(exprToUse) { - if processed, err := bridge.PreprocessBacktickIdentifiers(exprToUse); err == nil { - exprToUse = processed - } - } - expression, parseErr := expr.NewExpression(exprToUse) - if parseErr != nil { - logger.Error("Expression parse failed for field %s: %v", fieldName, parseErr) - result[fieldName] = nil - continue - } - - // 计算表达式,支持NULL值 - numResult, isNull, evalErr := expression.EvaluateWithNull(dataMap) - if evalErr != nil { - logger.Error("Expression evaluation failed for field %s: %v", fieldName, evalErr) - result[fieldName] = nil - continue - } - if isNull { - evalResult = nil // NULL值 - } else { - evalResult = numResult - } - } else { - evalResult = exprResult - } - } - - result[fieldName] = evalResult + for fieldName := range s.config.FieldExpressions { + s.processExpressionField(fieldName, dataMap, result) } - // 处理SimpleFields(复用现有逻辑) + // 使用预编译的字段信息处理SimpleFields if len(s.config.SimpleFields) > 0 { for _, fieldSpec := range s.config.SimpleFields { - info := s.compiledFieldInfo[fieldSpec] - if info == nil { - // 如果没有预编译信息,回退到原逻辑(安全性保证) - s.processSingleFieldFallback(fieldSpec, dataMap, data, result) - continue - } - - if info.isSelectAll { - // SELECT *:批量复制所有字段,跳过表达式字段 - for k, v := range dataMap { - if _, isExpression := s.config.FieldExpressions[k]; !isExpression { - result[k] = v - } - } - continue - } - - // 跳过已经通过表达式字段处理的字段 - if _, isExpression := s.config.FieldExpressions[info.outputName]; isExpression { - continue - } - - if info.isFunctionCall { - // 执行函数调用 - if funcResult, err := s.executeFunction(info.fieldName, dataMap); err == nil { - result[info.outputName] = funcResult - } else { - logger.Error("Function execution error %s: %v", info.fieldName, err) - result[info.outputName] = nil - } - } else { - // 普通字段处理 - var value interface{} - var exists bool - - if info.hasNestedField { - value, exists = fieldpath.GetNestedField(data, info.fieldName) - } else { - value, exists = dataMap[info.fieldName] - } - - if exists { - result[info.outputName] = value - } else { - result[info.outputName] = nil - } - } + s.processSimpleField(fieldSpec, dataMap, data, result) } } else if len(s.config.FieldExpressions) == 0 { // 如果没有指定字段且没有表达式字段,保留所有字段 @@ -2042,92 +363,3 @@ func (s *Stream) processDirectDataSync(data interface{}) (interface{}, error) { return result, nil } - -// 向后兼容性函数 - -// NewStreamWithBuffers 创建带自定义缓冲区大小的Stream (已弃用,使用NewStreamWithCustomPerformance) -// Deprecated: 使用NewStreamWithCustomPerformance替代 -func NewStreamWithBuffers(config types.Config, dataBufSize, resultBufSize, sinkPoolSize int) (*Stream, error) { - perfConfig := types.DefaultPerformanceConfig() - perfConfig.BufferConfig.DataChannelSize = dataBufSize - perfConfig.BufferConfig.ResultChannelSize = resultBufSize - perfConfig.WorkerConfig.SinkPoolSize = sinkPoolSize - - config.PerformanceConfig = perfConfig - return newStreamWithUnifiedConfig(config) -} - -// NewHighPerformanceStream 创建高性能配置的Stream (已弃用,使用NewStreamWithHighPerformance) -// Deprecated: 使用NewStreamWithHighPerformance替代 -func NewHighPerformanceStream(config types.Config) (*Stream, error) { - return NewStreamWithHighPerformance(config) -} - -// NewStreamWithoutDataLoss 创建零数据丢失的流处理器 (已弃用,使用NewStreamWithZeroDataLoss) -// Deprecated: 使用NewStreamWithZeroDataLoss替代 -func NewStreamWithoutDataLoss(config types.Config, strategy string) (*Stream, error) { - perfConfig := types.ZeroDataLossConfig() - - // 应用用户指定的策略 - validStrategies := map[string]bool{ - StrategyDrop: true, - StrategyBlock: true, - StrategyExpand: true, - StrategyPersist: true, - } - - if validStrategies[strategy] { - perfConfig.OverflowConfig.Strategy = strategy - if strategy == StrategyDrop { - perfConfig.OverflowConfig.AllowDataLoss = true - } - } - - config.PerformanceConfig = perfConfig - return newStreamWithUnifiedConfig(config) -} - -// NewStreamWithLossPolicy 创建带数据丢失策略的流处理器 (已弃用,使用NewStreamWithCustomPerformance) -// Deprecated: 使用NewStreamWithCustomPerformance替代 -func NewStreamWithLossPolicy(config types.Config, dataBufSize, resultBufSize, sinkPoolSize int, - overflowStrategy string, timeout time.Duration) (*Stream, error) { - - perfConfig := types.DefaultPerformanceConfig() - perfConfig.BufferConfig.DataChannelSize = dataBufSize - perfConfig.BufferConfig.ResultChannelSize = resultBufSize - perfConfig.WorkerConfig.SinkPoolSize = sinkPoolSize - perfConfig.OverflowConfig.Strategy = overflowStrategy - perfConfig.OverflowConfig.BlockTimeout = timeout - perfConfig.OverflowConfig.AllowDataLoss = (overflowStrategy == StrategyDrop) - - config.PerformanceConfig = perfConfig - return newStreamWithUnifiedConfig(config) -} - -// NewStreamWithLossPolicyAndPersistence 创建带数据丢失策略和持久化配置的流处理器 (已弃用,使用NewStreamWithCustomPerformance) -// Deprecated: 使用NewStreamWithCustomPerformance替代 -func NewStreamWithLossPolicyAndPersistence(config types.Config, dataBufSize, resultBufSize, sinkPoolSize int, - overflowStrategy string, timeout time.Duration, persistDataDir string, persistMaxFileSize int64, persistFlushInterval time.Duration) (*Stream, error) { - - perfConfig := types.DefaultPerformanceConfig() - perfConfig.BufferConfig.DataChannelSize = dataBufSize - perfConfig.BufferConfig.ResultChannelSize = resultBufSize - perfConfig.WorkerConfig.SinkPoolSize = sinkPoolSize - perfConfig.OverflowConfig.Strategy = overflowStrategy - perfConfig.OverflowConfig.BlockTimeout = timeout - perfConfig.OverflowConfig.AllowDataLoss = (overflowStrategy == StrategyDrop) - - // 设置持久化配置 - if overflowStrategy == StrategyPersist { - perfConfig.OverflowConfig.PersistenceConfig = &types.PersistenceConfig{ - DataDir: persistDataDir, - MaxFileSize: persistMaxFileSize, - FlushInterval: persistFlushInterval, - MaxRetries: 5, - RetryInterval: 1 * time.Second, - } - } - - config.PerformanceConfig = perfConfig - return newStreamWithUnifiedConfig(config) -} diff --git a/stream/stream_factory.go b/stream/stream_factory.go new file mode 100644 index 0000000..af0be50 --- /dev/null +++ b/stream/stream_factory.go @@ -0,0 +1,156 @@ +package stream + +import ( + "fmt" + "sync" + + "github.com/rulego/streamsql/types" + "github.com/rulego/streamsql/window" +) + +// StreamFactory Stream工厂,负责创建不同类型的Stream +type StreamFactory struct{} + +// NewStreamFactory 创建Stream工厂 +func NewStreamFactory() *StreamFactory { + return &StreamFactory{} +} + +// CreateStream 使用统一配置创建Stream +func (sf *StreamFactory) CreateStream(config types.Config) (*Stream, error) { + // 如果没有指定性能配置,使用默认配置 + if (config.PerformanceConfig == types.PerformanceConfig{}) { + config.PerformanceConfig = types.DefaultPerformanceConfig() + } + + return sf.createStreamWithUnifiedConfig(config) +} + +// CreateHighPerformanceStream 创建高性能Stream +func (sf *StreamFactory) CreateHighPerformanceStream(config types.Config) (*Stream, error) { + config.PerformanceConfig = types.HighPerformanceConfig() + return sf.createStreamWithUnifiedConfig(config) +} + +// CreateLowLatencyStream 创建低延迟Stream +func (sf *StreamFactory) CreateLowLatencyStream(config types.Config) (*Stream, error) { + config.PerformanceConfig = types.LowLatencyConfig() + return sf.createStreamWithUnifiedConfig(config) +} + +// CreateZeroDataLossStream 创建零数据丢失Stream +func (sf *StreamFactory) CreateZeroDataLossStream(config types.Config) (*Stream, error) { + config.PerformanceConfig = types.ZeroDataLossConfig() + return sf.createStreamWithUnifiedConfig(config) +} + +// CreateCustomPerformanceStream 创建自定义性能配置的Stream +func (sf *StreamFactory) CreateCustomPerformanceStream(config types.Config, perfConfig types.PerformanceConfig) (*Stream, error) { + config.PerformanceConfig = perfConfig + return sf.createStreamWithUnifiedConfig(config) +} + +// createStreamWithUnifiedConfig 使用统一配置创建Stream的内部实现 +func (sf *StreamFactory) createStreamWithUnifiedConfig(config types.Config) (*Stream, error) { + var win window.Window + var err error + + // 只有在需要窗口时才创建窗口 + if config.NeedWindow { + win, err = sf.createWindow(config) + if err != nil { + return nil, err + } + } + + // 创建Stream实例 + stream := sf.createStreamInstance(config, win) + + // 初始化持久化管理器 + if err := sf.initializePersistenceManager(stream, config.PerformanceConfig); err != nil { + return nil, err + } + + // 设置数据处理策略 + sf.setupDataProcessingStrategy(stream, config.PerformanceConfig) + + // 预编译字段处理信息 + stream.compileFieldProcessInfo() + + // 启动工作协程 + sf.startWorkerRoutines(stream, config.PerformanceConfig) + + return stream, nil +} + +// createWindow 创建窗口 +func (sf *StreamFactory) createWindow(config types.Config) (window.Window, error) { + // 将统一的性能配置传递给窗口 + windowConfig := config.WindowConfig + if windowConfig.Params == nil { + windowConfig.Params = make(map[string]interface{}) + } + // 传递完整的性能配置给窗口 + windowConfig.Params[PerformanceConfigKey] = config.PerformanceConfig + + return window.CreateWindow(windowConfig) +} + +// createStreamInstance 创建Stream实例 +func (sf *StreamFactory) createStreamInstance(config types.Config, win window.Window) *Stream { + perfConfig := config.PerformanceConfig + return &Stream{ + dataChan: make(chan interface{}, perfConfig.BufferConfig.DataChannelSize), + config: config, + Window: win, + resultChan: make(chan interface{}, perfConfig.BufferConfig.ResultChannelSize), + seenResults: &sync.Map{}, + done: make(chan struct{}), + sinkWorkerPool: make(chan func(), perfConfig.WorkerConfig.SinkPoolSize), + allowDataDrop: perfConfig.OverflowConfig.AllowDataLoss, + blockingTimeout: perfConfig.OverflowConfig.BlockTimeout, + overflowStrategy: perfConfig.OverflowConfig.Strategy, + maxRetryRoutines: int32(perfConfig.WorkerConfig.MaxRetryRoutines), + } +} + +// initializePersistenceManager 初始化持久化管理器 +// 当溢出策略设置为持久化时,检查并初始化持久化配置 +func (sf *StreamFactory) initializePersistenceManager(stream *Stream, perfConfig types.PerformanceConfig) error { + if perfConfig.OverflowConfig.Strategy == StrategyPersist { + if perfConfig.OverflowConfig.PersistenceConfig == nil { + return fmt.Errorf("persistence strategy is enabled but PersistenceConfig is not provided. Please configure PersistenceConfig with DataDir, MaxFileSize, and FlushInterval. Example: perfConfig.OverflowConfig.PersistenceConfig = &types.PersistenceConfig{DataDir: \"./data\", MaxFileSize: 10*1024*1024, FlushInterval: 5*time.Second}") + } + persistConfig := perfConfig.OverflowConfig.PersistenceConfig + stream.persistenceManager = NewPersistenceManagerWithConfig( + persistConfig.DataDir, + persistConfig.MaxFileSize, + persistConfig.FlushInterval, + ) + if err := stream.persistenceManager.Start(); err != nil { + return fmt.Errorf("failed to start persistence manager: %w", err) + } + } + return nil +} + +// setupDataProcessingStrategy 设置数据处理策略 +func (sf *StreamFactory) setupDataProcessingStrategy(stream *Stream, perfConfig types.PerformanceConfig) { + // 根据溢出策略预设AddData函数指针,避免运行时switch判断 + switch perfConfig.OverflowConfig.Strategy { + case StrategyBlock: + stream.addDataFunc = stream.addDataBlocking + case StrategyExpand: + stream.addDataFunc = stream.addDataWithExpansion + case StrategyPersist: + stream.addDataFunc = stream.addDataWithPersistence + default: + stream.addDataFunc = stream.addDataWithDrop + } +} + +// startWorkerRoutines 启动工作协程 +func (sf *StreamFactory) startWorkerRoutines(stream *Stream, perfConfig types.PerformanceConfig) { + go stream.startSinkWorkerPool(perfConfig.WorkerConfig.SinkWorkerCount) + go stream.startResultConsumer() +} diff --git a/stream/stream_field_test.go b/stream/stream_field_test.go new file mode 100644 index 0000000..250d197 --- /dev/null +++ b/stream/stream_field_test.go @@ -0,0 +1,332 @@ +package stream + +import ( + "sync" + "testing" + "time" + + "github.com/rulego/streamsql/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestSelectStarWithExpressionFields 测试SELECT *与表达式字段的组合 +func TestSelectStarWithExpressionFields(t *testing.T) { + tests := []struct { + name string + simpleFields []string + fieldExpressions map[string]types.FieldExpression + testData map[string]interface{} + expectedFields map[string]interface{} + }{ + { + name: "SELECT * with additional expressions", + simpleFields: []string{"*"}, + fieldExpressions: map[string]types.FieldExpression{ + "name": { + Expression: "UPPER(name)", + Fields: []string{"name"}, + }, + "full_info": { + Expression: "CONCAT(name, ' - ', status)", + Fields: []string{"name", "status"}, + }, + }, + testData: map[string]interface{}{ + "name": "john", + "status": "active", + "age": 25, + }, + expectedFields: map[string]interface{}{ + "name": "JOHN", + "full_info": "john - active", + "status": "active", + "age": 25, + }, + }, + { + name: "SELECT * with field override", + simpleFields: []string{"*"}, + fieldExpressions: map[string]types.FieldExpression{ + "name": { + Expression: "UPPER(name)", + Fields: []string{"name"}, + }, + "age": { + Expression: "age * 2", + Fields: []string{"age"}, + }, + }, + testData: map[string]interface{}{ + "name": "alice", + "age": 30, + "status": "active", + }, + expectedFields: map[string]interface{}{ + "name": "ALICE", + "age": 60.0, // 表达式结果 + "status": "active", + }, + }, + { + name: "SELECT * without expressions", + simpleFields: []string{"*"}, + fieldExpressions: nil, + testData: map[string]interface{}{ + "name": "bob", + "age": 35, + "status": "inactive", + }, + expectedFields: map[string]interface{}{ + "name": "bob", + "age": 35, + "status": "inactive", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := types.Config{ + NeedWindow: false, + SimpleFields: tt.simpleFields, + FieldExpressions: tt.fieldExpressions, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + // 收集结果 + var mu sync.Mutex + var results []interface{} + stream.AddSink(func(result interface{}) { + mu.Lock() + defer mu.Unlock() + results = append(results, result) + }) + + stream.Start() + stream.Emit(tt.testData) + + // 等待处理完成 + time.Sleep(100 * time.Millisecond) + + // 验证结果 + mu.Lock() + defer mu.Unlock() + + require.Len(t, results, 1) + resultData := results[0].([]map[string]interface{})[0] + + for field, expected := range tt.expectedFields { + actual, exists := resultData[field] + assert.True(t, exists, "Field %s should exist", field) + if expected != nil { + // 处理数值类型的比较 + if expectedFloat, ok := expected.(float64); ok { + if actualFloat, ok := actual.(float64); ok { + assert.InEpsilon(t, expectedFloat, actualFloat, 0.0001) + } else if actualInt, ok := actual.(int); ok { + assert.InEpsilon(t, expectedFloat, float64(actualInt), 0.0001) + } else { + t.Errorf("Expected %s to be numeric, got %T", field, actual) + } + } else { + assert.Equal(t, expected, actual, "Field %s mismatch", field) + } + } + } + }) + } +} + +// TestFieldProcessor 测试字段处理器 +func TestFieldProcessor(t *testing.T) { + tests := []struct { + name string + simpleFields []string + testData map[string]interface{} + expected map[string]interface{} + }{ + { + name: "Specific fields", + simpleFields: []string{"name", "age"}, + testData: map[string]interface{}{ + "name": "test", + "age": 25, + "status": "active", + }, + expected: map[string]interface{}{ + "name": "test", + "age": 25, + }, + }, + { + name: "All fields with *", + simpleFields: []string{"*"}, + testData: map[string]interface{}{ + "name": "test", + "age": 25, + "status": "active", + }, + expected: map[string]interface{}{ + "name": "test", + "age": 25, + "status": "active", + }, + }, + { + name: "Mixed fields", + simpleFields: []string{"name", "*"}, + testData: map[string]interface{}{ + "name": "test", + "age": 25, + "status": "active", + }, + expected: map[string]interface{}{ + "name": "test", + "age": 25, + "status": "active", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := types.Config{ + NeedWindow: false, + SimpleFields: tt.simpleFields, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + var mu sync.Mutex + var results []interface{} + stream.AddSink(func(result interface{}) { + mu.Lock() + defer mu.Unlock() + results = append(results, result) + }) + + stream.Start() + stream.Emit(tt.testData) + + time.Sleep(100 * time.Millisecond) + + mu.Lock() + defer mu.Unlock() + + require.Len(t, results, 1) + resultData := results[0].([]map[string]interface{})[0] + + // 验证期望的字段都存在 + for field, expected := range tt.expected { + actual, exists := resultData[field] + assert.True(t, exists, "Field %s should exist", field) + assert.Equal(t, expected, actual, "Field %s value mismatch", field) + } + + // 如果不是 "*",验证没有额外的字段 + if len(tt.simpleFields) == 1 && tt.simpleFields[0] != "*" { + assert.Len(t, resultData, len(tt.expected), "Should only have expected fields") + } + }) + } +} + +// TestExpressionEvaluation 测试表达式计算 +func TestExpressionEvaluation(t *testing.T) { + tests := []struct { + name string + expression types.FieldExpression + testData map[string]interface{} + expected interface{} + }{ + { + name: "String concatenation", + expression: types.FieldExpression{ + Expression: "CONCAT(first_name, ' ', last_name)", + Fields: []string{"first_name", "last_name"}, + }, + testData: map[string]interface{}{ + "first_name": "John", + "last_name": "Doe", + }, + expected: "John Doe", + }, + { + name: "Arithmetic operation", + expression: types.FieldExpression{ + Expression: "price * quantity", + Fields: []string{"price", "quantity"}, + }, + testData: map[string]interface{}{ + "price": 10.5, + "quantity": 3, + }, + expected: 31.5, + }, + { + name: "String transformation", + expression: types.FieldExpression{ + Expression: "UPPER(name)", + Fields: []string{"name"}, + }, + testData: map[string]interface{}{ + "name": "alice", + }, + expected: "ALICE", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := types.Config{ + NeedWindow: false, + FieldExpressions: map[string]types.FieldExpression{ + "result": tt.expression, + }, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + var mu sync.Mutex + var results []interface{} + stream.AddSink(func(result interface{}) { + mu.Lock() + defer mu.Unlock() + results = append(results, result) + }) + + stream.Start() + stream.Emit(tt.testData) + + time.Sleep(100 * time.Millisecond) + + mu.Lock() + defer mu.Unlock() + + require.Len(t, results, 1) + resultData := results[0].([]map[string]interface{})[0] + + actual, exists := resultData["result"] + assert.True(t, exists, "Result field should exist") + + // 处理数值类型的比较 + if expectedFloat, ok := tt.expected.(float64); ok { + if actualFloat, ok := actual.(float64); ok { + assert.InEpsilon(t, expectedFloat, actualFloat, 0.0001) + } else { + t.Errorf("Expected float64, got %T", actual) + } + } else { + assert.Equal(t, tt.expected, actual) + } + }) + } +} \ No newline at end of file diff --git a/stream/stream_performance_test.go b/stream/stream_performance_test.go new file mode 100644 index 0000000..ccaa124 --- /dev/null +++ b/stream/stream_performance_test.go @@ -0,0 +1,442 @@ +package stream + +import ( + "sync" + "testing" + "time" + + "github.com/rulego/streamsql/aggregator" + "github.com/rulego/streamsql/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestStreamUnifiedConfigIntegration 测试Stream和Window统一配置的集成 +func TestStreamUnifiedConfigIntegration(t *testing.T) { + testCases := []struct { + name string + performanceConfig types.PerformanceConfig + expectedWindowBufferSize int + }{ + { + name: "默认配置", + performanceConfig: types.DefaultPerformanceConfig(), + expectedWindowBufferSize: 1000, + }, + { + name: "高性能配置", + performanceConfig: types.HighPerformanceConfig(), + expectedWindowBufferSize: 5000, + }, + { + name: "低延迟配置", + performanceConfig: types.LowLatencyConfig(), + expectedWindowBufferSize: 100, + }, + { + name: "零数据丢失配置", + performanceConfig: types.ZeroDataLossConfig(), + expectedWindowBufferSize: 2000, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + config := types.Config{ + NeedWindow: true, + WindowConfig: types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{ + "size": "5s", + }, + }, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Count, + }, + PerformanceConfig: tc.performanceConfig, + } + + s, err := NewStream(config) + require.NoError(t, err) + defer s.Stop() + + // 验证stream的缓冲区配置 + assert.Equal(t, tc.performanceConfig.BufferConfig.DataChannelSize, cap(s.dataChan), + "数据通道大小不匹配") + assert.Equal(t, tc.performanceConfig.BufferConfig.ResultChannelSize, cap(s.resultChan), + "结果通道大小不匹配") + + // 验证窗口创建成功 + assert.NotNil(t, s.Window, "窗口应该被创建") + t.Logf("窗口已创建,类型: %T", s.Window) + }) + } +} + +// TestStreamUnifiedConfigPerformanceImpact 测试统一配置对Stream性能的影响 +func TestStreamUnifiedConfigPerformanceImpact(t *testing.T) { + configs := map[string]types.PerformanceConfig{ + "默认配置": types.DefaultPerformanceConfig(), + "高性能配置": types.HighPerformanceConfig(), + "低延迟配置": types.LowLatencyConfig(), + } + + for name, perfConfig := range configs { + t.Run(name, func(t *testing.T) { + config := types.Config{ + NeedWindow: true, + WindowConfig: types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{ + "size": "1s", + }, + }, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Sum, + }, + PerformanceConfig: perfConfig, + } + + s, err := NewStream(config) + require.NoError(t, err) + defer s.Stop() + + go s.Start() + + // 发送测试数据并测量性能 + dataCount := 1000 + startTime := time.Now() + + for i := 0; i < dataCount; i++ { + data := map[string]interface{}{ + "value": i, + "timestamp": time.Now().Unix(), + } + + select { + case s.dataChan <- data: + // 成功发送 + case <-time.After(100 * time.Millisecond): + // 发送超时 + t.Logf("第%d条数据发送超时", i) + break + } + } + + processingTime := time.Since(startTime) + t.Logf("%s 处理%d条数据耗时: %v", name, dataCount, processingTime) + + // 等待一些结果 + time.Sleep(1500 * time.Millisecond) + + // 检查结果 + resultCount := 0 + for { + select { + case <-s.resultChan: + resultCount++ + default: + goto done + } + } + done: + // 性能测试主要关注处理时间,结果数量可能因窗口触发时机而变化 + }) + } +} + +// TestStreamUnifiedConfigErrorHandling 测试统一配置的错误处理 +func TestStreamUnifiedConfigErrorHandling(t *testing.T) { + tests := []struct { + name string + config types.Config + expectError bool + description string + }{ + { + name: "无效窗口类型", + config: types.Config{ + NeedWindow: true, + WindowConfig: types.WindowConfig{ + Type: "invalid_window_type", + Params: map[string]interface{}{ + "size": "5s", + }, + }, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Count, + }, + PerformanceConfig: types.DefaultPerformanceConfig(), + }, + expectError: true, + description: "无效的窗口类型应该导致创建失败", + }, + { + name: "缺少窗口大小参数", + config: types.Config{ + NeedWindow: true, + WindowConfig: types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{}, + }, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Count, + }, + PerformanceConfig: types.DefaultPerformanceConfig(), + }, + expectError: true, + description: "缺少size参数应该导致创建失败", + }, + { + name: "有效配置", + config: types.Config{ + NeedWindow: true, + WindowConfig: types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{ + "size": "5s", + }, + }, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Count, + }, + PerformanceConfig: types.DefaultPerformanceConfig(), + }, + expectError: false, + description: "有效配置应该创建成功", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + stream, err := NewStream(tt.config) + if tt.expectError { + assert.Error(t, err, tt.description) + assert.Nil(t, stream) + } else { + assert.NoError(t, err, tt.description) + assert.NotNil(t, stream) + if stream != nil { + defer stream.Stop() + } + } + }) + } +} + +// TestStreamUnifiedConfigCompatibility 测试统一配置的兼容性 +func TestStreamUnifiedConfigCompatibility(t *testing.T) { + // 测试新的统一配置 + newConfig := types.Config{ + NeedWindow: false, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Count, + }, + PerformanceConfig: types.HighPerformanceConfig(), + } + + s1, err := NewStream(newConfig) + require.NoError(t, err) + defer s1.Stop() + + // 验证新配置生效 + expectedDataSize := types.HighPerformanceConfig().BufferConfig.DataChannelSize + assert.Equal(t, expectedDataSize, cap(s1.dataChan), "高性能配置的数据通道大小不匹配") + + // 测试默认配置 + defaultConfig := types.Config{ + NeedWindow: false, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Count, + }, + PerformanceConfig: types.DefaultPerformanceConfig(), + } + + s2, err := NewStream(defaultConfig) + require.NoError(t, err) + defer s2.Stop() + + // 验证默认配置 + expectedDefaultSize := types.DefaultPerformanceConfig().BufferConfig.DataChannelSize + assert.Equal(t, expectedDefaultSize, cap(s2.dataChan), "默认配置的数据通道大小不匹配") + + t.Logf("高性能配置数据通道大小: %d", cap(s1.dataChan)) + t.Logf("默认配置数据通道大小: %d", cap(s2.dataChan)) +} + +// TestStatsManager 测试统计管理器 +func TestStatsManager(t *testing.T) { + config := types.Config{ + NeedWindow: false, + SimpleFields: []string{"value"}, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + // 启动流处理 + stream.Start() + + // 发送一些数据来生成统计信息 + for i := 0; i < 10; i++ { + stream.Emit(map[string]interface{}{"value": i}) + } + + // 等待处理完成 + time.Sleep(100 * time.Millisecond) + + // 测试基本统计 + stats := stream.GetStats() + assert.Equal(t, int64(10), stats[InputCount], "输入计数不匹配") + assert.GreaterOrEqual(t, stats[OutputCount], int64(1), "输出计数应该大于等于1") + + // 测试重置统计 + stream.ResetStats() + stats = stream.GetStats() + assert.Equal(t, int64(0), stats[InputCount], "重置后输入计数应该为0") +} + +// TestDataHandler 测试数据处理器 +func TestDataHandler(t *testing.T) { + tests := []struct { + name string + performanceConfig types.PerformanceConfig + dataCount int + expectedDrops bool + }{ + { + name: "高性能配置 - 无丢弃", + performanceConfig: types.HighPerformanceConfig(), + dataCount: 100, + expectedDrops: false, + }, + { + name: "低延迟配置 - 可能丢弃", + performanceConfig: types.LowLatencyConfig(), + dataCount: 1000, + expectedDrops: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := types.Config{ + NeedWindow: false, + SimpleFields: []string{"value"}, + PerformanceConfig: tt.performanceConfig, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + stream.Start() + + // 快速发送大量数据 + for i := 0; i < tt.dataCount; i++ { + stream.Emit(map[string]interface{}{"value": i}) + } + + time.Sleep(100 * time.Millisecond) + + stats := stream.GetStats() + droppedCount := stats[DroppedCount] + + if tt.expectedDrops { + // 在高负载下可能会有丢弃 + t.Logf("%s: 输入 %d, 丢弃 %d", tt.name, stats[InputCount], droppedCount) + } else { + // 高性能配置应该能处理所有数据 + assert.Equal(t, int64(0), droppedCount, "高性能配置不应该丢弃数据") + } + }) + } +} + +// TestResultHandler 测试结果处理器 +func TestResultHandler(t *testing.T) { + config := types.Config{ + NeedWindow: false, + SimpleFields: []string{"value"}, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + // 测试Sink功能 + var mu sync.Mutex + var receivedResults []interface{} + + stream.AddSink(func(result interface{}) { + mu.Lock() + defer mu.Unlock() + receivedResults = append(receivedResults, result) + }) + + stream.Start() + + // 发送测试数据 + testData := []map[string]interface{}{ + {"value": 1}, + {"value": 2}, + {"value": 3}, + } + + for _, data := range testData { + stream.Emit(data) + } + + time.Sleep(100 * time.Millisecond) + + // 验证结果 + mu.Lock() + defer mu.Unlock() + + assert.GreaterOrEqual(t, len(receivedResults), len(testData), "应该接收到所有结果") + + // 验证结果格式 + for _, result := range receivedResults { + assert.IsType(t, []map[string]interface{}{}, result, "结果应该是map切片类型") + resultSlice := result.([]map[string]interface{}) + assert.Greater(t, len(resultSlice), 0, "结果切片不应该为空") + } +} + +// TestPerformanceConfigurations 测试不同性能配置的效果 +func TestPerformanceConfigurations(t *testing.T) { + configs := map[string]types.PerformanceConfig{ + "Default": types.DefaultPerformanceConfig(), + "HighPerformance": types.HighPerformanceConfig(), + "LowLatency": types.LowLatencyConfig(), + "ZeroDataLoss": types.ZeroDataLossConfig(), + } + + for name, perfConfig := range configs { + t.Run(name, func(t *testing.T) { + config := types.Config{ + NeedWindow: false, + SimpleFields: []string{"value"}, + PerformanceConfig: perfConfig, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + // 验证缓冲区大小 + assert.Equal(t, perfConfig.BufferConfig.DataChannelSize, cap(stream.dataChan)) + assert.Equal(t, perfConfig.BufferConfig.ResultChannelSize, cap(stream.resultChan)) + + // 验证工作池配置 + assert.Equal(t, perfConfig.WorkerConfig.SinkPoolSize, cap(stream.sinkWorkerPool)) + + t.Logf("%s配置: 数据通道=%d, 结果通道=%d, Sink池=%d", + name, + cap(stream.dataChan), + cap(stream.resultChan), + cap(stream.sinkWorkerPool)) + }) + } +} \ No newline at end of file diff --git a/stream/stream_persistence_test.go b/stream/stream_persistence_test.go new file mode 100644 index 0000000..d8f9d40 --- /dev/null +++ b/stream/stream_persistence_test.go @@ -0,0 +1,309 @@ +package stream + +import ( + "fmt" + "os" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestPersistenceManagerBasic 测试持久化管理器基本功能 +func TestPersistenceManagerBasic(t *testing.T) { + // 创建临时目录 + tmpDir := t.TempDir() + + // 创建持久化管理器 + pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 100*time.Millisecond) + + // 启动管理器 + err := pm.Start() + require.NoError(t, err) + defer pm.Stop() + + // 测试数据持久化 + testData := []interface{}{ + map[string]interface{}{"id": 1, "value": "test1"}, + map[string]interface{}{"id": 2, "value": "test2"}, + map[string]interface{}{"id": 3, "value": "test3"}, + } + + // 写入数据 + for _, data := range testData { + err := pm.PersistData(data) + require.NoError(t, err) + } + + // 等待数据写入 + time.Sleep(200 * time.Millisecond) + + // 读取持久化数据 + loadedData, err := pm.LoadPersistedData() + require.NoError(t, err) + + // 验证数据 + assert.GreaterOrEqual(t, len(loadedData), len(testData)) +} + +// TestPersistenceManagerFileRotation 测试文件轮转功能 +func TestPersistenceManagerFileRotation(t *testing.T) { + tmpDir := t.TempDir() + + // 创建小文件大小的持久化管理器以触发文件轮转 + pm := NewPersistenceManagerWithConfig(tmpDir, 100, 50*time.Millisecond) + + err := pm.Start() + require.NoError(t, err) + defer pm.Stop() + + // 写入大量数据以触发文件轮转 + for i := 0; i < 50; i++ { + data := map[string]interface{}{ + "id": i, + "value": fmt.Sprintf("test_data_with_long_content_%d", i), + } + err := pm.PersistData(data) + require.NoError(t, err) + } + + // 等待数据写入和文件轮转 + time.Sleep(200 * time.Millisecond) + + // 验证创建了多个文件 + files, err := os.ReadDir(tmpDir) + require.NoError(t, err) + assert.Greater(t, len(files), 1, "应该创建多个持久化文件") +} + +// TestPersistenceManagerConcurrency 测试持久化管理器并发安全性 +func TestPersistenceManagerConcurrency(t *testing.T) { + tmpDir := t.TempDir() + + pm := NewPersistenceManagerWithConfig(tmpDir, 2048, 100*time.Millisecond) + + err := pm.Start() + require.NoError(t, err) + defer pm.Stop() + + // 并发写入数据 + const numGoroutines = 10 + const dataPerGoroutine = 20 + + var wg sync.WaitGroup + wg.Add(numGoroutines) + + for i := 0; i < numGoroutines; i++ { + go func(goroutineID int) { + defer wg.Done() + for j := 0; j < dataPerGoroutine; j++ { + data := map[string]interface{}{ + "goroutine": goroutineID, + "sequence": j, + "value": fmt.Sprintf("data_%d_%d", goroutineID, j), + } + err := pm.PersistData(data) + assert.NoError(t, err) + } + }(i) + } + + wg.Wait() + + // 等待所有数据写入 + time.Sleep(300 * time.Millisecond) + + // 验证数据完整性 + loadedData, err := pm.LoadPersistedData() + require.NoError(t, err) + + // 应该至少有部分数据被持久化 + assert.Greater(t, len(loadedData), 0) + t.Logf("并发测试: 持久化了 %d 条数据", len(loadedData)) +} + +// TestPersistenceManagerStats 测试持久化统计功能 +func TestPersistenceManagerStats(t *testing.T) { + tmpDir := t.TempDir() + + pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 50*time.Millisecond) + + err := pm.Start() + require.NoError(t, err) + defer pm.Stop() + + // 写入一些数据 + for i := 0; i < 10; i++ { + data := map[string]interface{}{"index": i, "data": "test"} + err := pm.PersistData(data) + require.NoError(t, err) + } + + // 等待数据处理 + time.Sleep(200 * time.Millisecond) + + // 获取统计信息 + stats := pm.GetStats() + require.NotNil(t, stats) + + // 验证统计信息包含预期字段 + assert.Contains(t, stats, "data_dir") + assert.Contains(t, stats, "max_file_size") + assert.Contains(t, stats, "flush_interval") + assert.Contains(t, stats, "running") + assert.Equal(t, tmpDir, stats["data_dir"]) + assert.Equal(t, int64(1024), stats["max_file_size"]) + assert.Equal(t, true, stats["running"]) + + t.Logf("持久化统计信息: %+v", stats) +} + +// TestPersistenceManagerConfiguration 测试不同配置的持久化管理器 +func TestPersistenceManagerConfiguration(t *testing.T) { + tests := []struct { + name string + maxFileSize int64 + flushInterval time.Duration + dataCount int + expectedFiles int // 预期的最小文件数 + }{ + { + name: "Small files, fast flush", + maxFileSize: 50, + flushInterval: 10 * time.Millisecond, + dataCount: 20, + expectedFiles: 2, + }, + { + name: "Large files, slow flush", + maxFileSize: 2048, + flushInterval: 100 * time.Millisecond, + dataCount: 10, + expectedFiles: 1, + }, + { + name: "Medium files, medium flush", + maxFileSize: 512, + flushInterval: 50 * time.Millisecond, + dataCount: 30, + expectedFiles: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + pm := NewPersistenceManagerWithConfig(tmpDir, tt.maxFileSize, tt.flushInterval) + + err := pm.Start() + require.NoError(t, err) + defer pm.Stop() + + // 写入测试数据 + for i := 0; i < tt.dataCount; i++ { + data := map[string]interface{}{ + "index": i, + "data": fmt.Sprintf("test_data_item_%d_with_some_content", i), + } + err := pm.PersistData(data) + require.NoError(t, err) + } + + // 等待数据写入 + time.Sleep(tt.flushInterval*3 + 100*time.Millisecond) + + // 检查文件数量 + files, err := os.ReadDir(tmpDir) + require.NoError(t, err) + assert.GreaterOrEqual(t, len(files), tt.expectedFiles, "文件数量不符合预期") + + // 验证数据完整性 + loadedData, err := pm.LoadPersistedData() + require.NoError(t, err) + assert.GreaterOrEqual(t, len(loadedData), tt.dataCount/2, "加载的数据数量过少") + + t.Logf("%s: 创建了 %d 个文件,加载了 %d 条数据", tt.name, len(files), len(loadedData)) + }) + } +} + +// TestPersistenceManagerErrorHandling 测试持久化管理器错误处理 +func TestPersistenceManagerErrorHandling(t *testing.T) { + t.Run("Stop before start", func(t *testing.T) { + tmpDir := t.TempDir() + pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 100*time.Millisecond) + + // 在启动前停止不应该出错 + err := pm.Stop() + assert.NoError(t, err, "在启动前停止不应该出错") + }) + + t.Run("Persist data before start", func(t *testing.T) { + tmpDir := t.TempDir() + pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 100*time.Millisecond) + + // 在启动前持久化数据应该失败 + data := map[string]interface{}{"test": "data"} + err := pm.PersistData(data) + assert.Error(t, err, "在启动前持久化数据应该失败") + }) + + t.Run("Load data from empty directory", func(t *testing.T) { + tmpDir := t.TempDir() + pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 100*time.Millisecond) + + err := pm.Start() + require.NoError(t, err) + defer pm.Stop() + + // 从空目录加载数据应该返回空切片 + loadedData, err := pm.LoadPersistedData() + assert.NoError(t, err) + assert.Empty(t, loadedData, "从空目录加载应该返回空数据") + }) +} + +// TestPersistenceManagerLifecycle 测试持久化管理器生命周期 +func TestPersistenceManagerLifecycle(t *testing.T) { + tmpDir := t.TempDir() + pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 50*time.Millisecond) + + // 初始状态 + stats := pm.GetStats() + assert.Equal(t, false, stats["running"], "初始状态应该是未运行") + + // 启动 + err := pm.Start() + require.NoError(t, err) + defer func() { + // 安全停止 + if stats := pm.GetStats(); stats["running"].(bool) { + pm.Stop() + } + }() + + stats = pm.GetStats() + assert.Equal(t, true, stats["running"], "启动后应该是运行状态") + + // 写入一些数据 + for i := 0; i < 5; i++ { + data := map[string]interface{}{"id": i, "value": fmt.Sprintf("test_%d", i)} + err := pm.PersistData(data) + require.NoError(t, err) + } + + // 等待数据写入 + time.Sleep(100 * time.Millisecond) + + // 验证数据已持久化 + files, err := os.ReadDir(tmpDir) + require.NoError(t, err) + assert.Greater(t, len(files), 0, "应该有持久化文件") + + // 加载数据 + loadedData, err := pm.LoadPersistedData() + require.NoError(t, err) + assert.GreaterOrEqual(t, len(loadedData), 5, "应该能加载持久化的数据") +} diff --git a/stream/stream_test.go b/stream/stream_test.go index ee3e5fb..6f43efb 100644 --- a/stream/stream_test.go +++ b/stream/stream_test.go @@ -2,10 +2,6 @@ package stream import ( "context" - "fmt" - "os" - "sync" - "sync/atomic" "testing" "time" @@ -15,104 +11,127 @@ import ( "github.com/stretchr/testify/require" ) -func TestStreamProcess(t *testing.T) { - config := types.Config{ - WindowConfig: types.WindowConfig{ - Type: "tumbling", - Params: map[string]interface{}{"size": 500 * time.Millisecond}, // 减少窗口大小以更快触发 +// TestStreamBasicFunctionality 测试Stream基本功能 +func TestStreamBasicFunctionality(t *testing.T) { + tests := []struct { + name string + config types.Config + filter string + testData []interface{} + expectedDevice string + expectedTemp float64 + expectedHum float64 + }{ + { + name: "带过滤器的窗口聚合", + config: types.Config{ + WindowConfig: types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{"size": 500 * time.Millisecond}, + }, + GroupFields: []string{"device"}, + SelectFields: map[string]aggregator.AggregateType{ + "temperature": aggregator.Avg, + "humidity": aggregator.Sum, + }, + NeedWindow: true, + }, + filter: "device == 'aa' && temperature > 10", + testData: []interface{}{ + map[string]interface{}{"device": "aa", "temperature": 25.0, "humidity": 60}, + map[string]interface{}{"device": "aa", "temperature": 30.0, "humidity": 55}, + map[string]interface{}{"device": "bb", "temperature": 22.0, "humidity": 70}, + }, + expectedDevice: "aa", + expectedTemp: 27.5, + expectedHum: 115.0, }, - GroupFields: []string{"device"}, - SelectFields: map[string]aggregator.AggregateType{ - "temperature": aggregator.Avg, - "humidity": aggregator.Sum, + { + name: "不完整数据处理", + config: types.Config{ + WindowConfig: types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{"size": 500 * time.Millisecond}, + }, + GroupFields: []string{"device"}, + SelectFields: map[string]aggregator.AggregateType{ + "temperature": aggregator.Avg, + "humidity": aggregator.Sum, + }, + NeedWindow: true, + }, + filter: "device == 'aa'", + testData: []interface{}{ + map[string]interface{}{"device": "aa", "temperature": 25.0}, + map[string]interface{}{"device": "aa", "humidity": 60}, + map[string]interface{}{"device": "aa", "temperature": 30.0}, + map[string]interface{}{"device": "aa", "humidity": 55}, + map[string]interface{}{"device": "bb", "temperature": 22.0, "humidity": 70}, + }, + expectedDevice: "aa", + expectedTemp: 27.5, + expectedHum: 115.0, }, - NeedWindow: true, } - strm, err := NewStream(config) - require.NoError(t, err) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + strm, err := NewStream(tt.config) + require.NoError(t, err) + defer strm.Stop() - err = strm.RegisterFilter("device == 'aa' && temperature > 10") - require.NoError(t, err) + if tt.filter != "" { + err = strm.RegisterFilter(tt.filter) + require.NoError(t, err) + } - // 添加 Sink 函数来捕获结果 - resultChan := make(chan interface{}, 1) // 添加缓冲 - strm.AddSink(func(result interface{}) { - select { - case resultChan <- result: - default: - // 防止阻塞 - } - }) + // 添加 Sink 函数来捕获结果 + resultChan := make(chan interface{}, 1) + strm.AddSink(func(result interface{}) { + select { + case resultChan <- result: + default: + // 防止阻塞 + } + }) - strm.Start() + strm.Start() - // 准备测试数据 - testData := []interface{}{ - map[string]interface{}{"device": "aa", "temperature": 25.0, "humidity": 60}, - map[string]interface{}{"device": "aa", "temperature": 30.0, "humidity": 55}, - map[string]interface{}{"device": "bb", "temperature": 22.0, "humidity": 70}, - } + // 发送测试数据 + for _, data := range tt.testData { + strm.Emit(data) + } - for _, data := range testData { - strm.Emit(data) - } + // 等待窗口关闭并触发结果 + time.Sleep(700 * time.Millisecond) - // 等待窗口关闭并触发结果 - time.Sleep(700 * time.Millisecond) // 等待窗口关闭 + // 等待结果 + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() - // 等待结果,并设置超时 - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() + var actual interface{} + select { + case actual = <-resultChan: + cancel() + case <-ctx.Done(): + t.Fatal("No results received within 3 seconds") + } - var actual interface{} - select { - case actual = <-resultChan: // 从sink的channel读取 - cancel() - case <-ctx.Done(): - t.Fatal("No results received within 3 seconds") - } + // 验证结果 + require.NotNil(t, actual) + assert.IsType(t, []map[string]interface{}{}, actual) + resultMap := actual.([]map[string]interface{}) + require.Greater(t, len(resultMap), 0) - // 预期结果:只有 device='aa' 且 temperature>10 的数据会被聚合 - expected := map[string]interface{}{ - "device": "aa", - "temperature": 27.5, // (25+30)/2 - "humidity": 115.0, // 60+55 - } - - // 验证结果 - t.Logf("Received result: %+v (type: %T)", actual, actual) - if actual == nil { - t.Fatal("Received nil result") - } - assert.IsType(t, []map[string]interface{}{}, actual) - t.Logf("Type assertion successful") - resultMap := actual.([]map[string]interface{}) - t.Logf("Result map length: %d", len(resultMap)) - if len(resultMap) > 0 { - t.Logf("First result: %+v", resultMap[0]) - - // 检查temperature字段 - if tempAvg, ok := resultMap[0]["temperature"]; ok { - t.Logf("temperature: %+v (type: %T)", tempAvg, tempAvg) - assert.InEpsilon(t, expected["temperature"].(float64), tempAvg.(float64), 0.0001) - } else { - t.Fatal("temperature field not found in result") - } - - // 检查humidity字段 - if humSum, ok := resultMap[0]["humidity"]; ok { - t.Logf("humidity: %+v (type: %T)", humSum, humSum) - assert.InDelta(t, expected["humidity"].(float64), humSum.(float64), 0.0001) - } else { - t.Fatal("humidity field not found in result") - } - } else { - t.Fatal("No results in result map") + firstResult := resultMap[0] + assert.Equal(t, tt.expectedDevice, firstResult["device"]) + assert.InEpsilon(t, tt.expectedTemp, firstResult["temperature"].(float64), 0.0001) + assert.InDelta(t, tt.expectedHum, firstResult["humidity"].(float64), 0.0001) + }) } } -// 不设置过滤器 +// TestStreamWithoutFilter 测试无过滤器的流处理 func TestStreamWithoutFilter(t *testing.T) { config := types.Config{ WindowConfig: types.WindowConfig{ @@ -129,6 +148,7 @@ func TestStreamWithoutFilter(t *testing.T) { strm, err := NewStream(config) require.NoError(t, err) + defer strm.Stop() strm.Start() @@ -147,7 +167,8 @@ func TestStreamWithoutFilter(t *testing.T) { strm.AddSink(func(result interface{}) { resultChan <- result }) - // 等待 3 秒触发窗口 + + // 等待窗口触发 time.Sleep(3 * time.Second) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) @@ -162,23 +183,14 @@ func TestStreamWithoutFilter(t *testing.T) { } expected := []map[string]interface{}{ - { - "device": "aa", - "temperature": 30.0, - "humidity": 55.0, - }, - { - "device": "bb", - "temperature": 22.0, - "humidity": 70.0, - }, + {"device": "aa", "temperature": 30.0, "humidity": 55.0}, + {"device": "bb", "temperature": 22.0, "humidity": 70.0}, } assert.IsType(t, []map[string]interface{}{}, actual) - resultSlice, ok := actual.([]map[string]interface{}) - require.True(t, ok) - + resultSlice := actual.([]map[string]interface{}) assert.Len(t, resultSlice, 2) + for _, expectedResult := range expected { found := false for _, resultMap := range resultSlice { @@ -189,784 +201,96 @@ func TestStreamWithoutFilter(t *testing.T) { break } } - assert.True(t, found, fmt.Sprintf("Expected result for device %v not found", expectedResult["device"])) + assert.True(t, found, "Expected result for device %v not found", expectedResult["device"]) } } -func TestIncompleteStreamProcess(t *testing.T) { +// TestStreamRefactoring 测试重构后的Stream功能 +func TestStreamRefactoring(t *testing.T) { config := types.Config{ - WindowConfig: types.WindowConfig{ - Type: "tumbling", - Params: map[string]interface{}{"size": 500 * time.Millisecond}, // 减少窗口大小 - }, - GroupFields: []string{"device"}, + SimpleFields: []string{"name", "age"}, SelectFields: map[string]aggregator.AggregateType{ - "temperature": aggregator.Avg, - "humidity": aggregator.Sum, + "count": aggregator.Count, }, - NeedWindow: true, + GroupFields: []string{"category"}, + NeedWindow: false, } - strm, err := NewStream(config) - require.NoError(t, err) - - err = strm.RegisterFilter("device == 'aa' ") - require.NoError(t, err) - - // 添加 Sink 函数来捕获结果 - resultChan := make(chan interface{}, 1) // 添加缓冲 - strm.AddSink(func(result interface{}) { - select { - case resultChan <- result: - default: - // 防止阻塞 - } - }) - - strm.Start() - - // 准备测试数据 - testData := []interface{}{ - map[string]interface{}{"device": "aa", "temperature": 25.0}, - map[string]interface{}{"device": "aa", "humidity": 60}, - map[string]interface{}{"device": "aa", "temperature": 30.0}, - map[string]interface{}{"device": "aa", "humidity": 55}, - map[string]interface{}{"device": "bb", "temperature": 22.0, "humidity": 70}, - } - - for _, data := range testData { - strm.Emit(data) - } - - // 等待窗口关闭并触发结果 - time.Sleep(700 * time.Millisecond) // 等待窗口关闭 - - // 等待结果,并设置超时 - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - - var actual interface{} - select { - case actual = <-resultChan: // 从sink的channel读取 - cancel() - case <-ctx.Done(): - t.Fatal("No results received within 3 seconds") - } - - // 预期结果:只有 device='aa' 且 temperature>10 的数据会被聚合 - expected := map[string]interface{}{ - "device": "aa", - "temperature": 27.5, // (25+30)/2 - "humidity": 115.0, // 60+55 - } - - // 验证结果 - t.Logf("Received result: %+v (type: %T)", actual, actual) - if actual == nil { - t.Fatal("Received nil result") - } - assert.IsType(t, []map[string]interface{}{}, actual) - t.Logf("Type assertion successful") - resultMap := actual.([]map[string]interface{}) - t.Logf("Result map length: %d", len(resultMap)) - if len(resultMap) > 0 { - t.Logf("First result: %+v", resultMap[0]) - - // 检查temperature字段 - if tempAvg, ok := resultMap[0]["temperature"]; ok { - t.Logf("temperature: %+v (type: %T)", tempAvg, tempAvg) - assert.InEpsilon(t, expected["temperature"].(float64), tempAvg.(float64), 0.0001) - } else { - t.Fatal("temperature field not found in result") - } - - // 检查humidity字段 - if humSum, ok := resultMap[0]["humidity"]; ok { - t.Logf("humidity: %+v (type: %T)", humSum, humSum) - assert.InDelta(t, expected["humidity"].(float64), humSum.(float64), 0.0001) - } else { - t.Fatal("humidity field not found in result") - } - } else { - t.Fatal("No results in result map") - } -} - -func TestWindowSlotAgg(t *testing.T) { - config := types.Config{ - WindowConfig: types.WindowConfig{ - Type: "sliding", - Params: map[string]interface{}{"size": 2 * time.Second, "slide": 1 * time.Second}, - TsProp: "ts", - }, - GroupFields: []string{"device"}, - SelectFields: map[string]aggregator.AggregateType{ - "temperature": aggregator.Max, - "humidity": aggregator.Min, - "start": aggregator.WindowStart, - "end": aggregator.WindowEnd, - }, - NeedWindow: true, - } - - strm, err := NewStream(config) - require.NoError(t, err) - - strm.Start() - // Add data every 500ms - baseTime := time.Date(2025, 4, 7, 16, 46, 0, 0, time.UTC) - - testData := []interface{}{ - map[string]interface{}{"device": "aa", "temperature": 25.0, "humidity": 60, "ts": baseTime}, - map[string]interface{}{"device": "aa", "temperature": 30.0, "humidity": 55, "ts": baseTime.Add(1 * time.Second)}, - map[string]interface{}{"device": "bb", "temperature": 22.0, "humidity": 70, "ts": baseTime}, - } - - for _, data := range testData { - strm.Emit(data) - } - - // 捕获结果 - resultChan := make(chan interface{}) - strm.AddSink(func(result interface{}) { - resultChan <- result - }) - // 等待 3 秒触发窗口 - time.Sleep(3 * time.Second) - - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - var actual interface{} - select { - case actual = <-resultChan: - cancel() - case <-ctx.Done(): - t.Fatal("Timeout waiting for results") - } - - expected := []map[string]interface{}{ - { - "device": "aa", - "temperature": 30.0, - "humidity": 55.0, - "start": baseTime.UnixNano(), - "end": baseTime.Add(2 * time.Second).UnixNano(), - }, - { - "device": "bb", - "temperature": 22.0, - "humidity": 70.0, - "start": baseTime.UnixNano(), - "end": baseTime.Add(2 * time.Second).UnixNano(), - }, - } - - assert.IsType(t, []map[string]interface{}{}, actual) - resultSlice, ok := actual.([]map[string]interface{}) - require.True(t, ok) - - assert.Len(t, resultSlice, 2) - for _, expectedResult := range expected { - found := false - for _, resultMap := range resultSlice { - if resultMap["device"] == expectedResult["device"] { - assert.InEpsilon(t, expectedResult["temperature"].(float64), resultMap["temperature"].(float64), 0.0001) - assert.InEpsilon(t, expectedResult["humidity"].(float64), resultMap["humidity"].(float64), 0.0001) - assert.Equal(t, expectedResult["start"].(int64), resultMap["start"].(int64)) - assert.Equal(t, expectedResult["end"].(int64), resultMap["end"].(int64)) - found = true - break - } - } - assert.True(t, found, fmt.Sprintf("Expected result for device %v not found", expectedResult["device"])) - } -} - -// TestPersistenceManagerBasic 测试持久化管理器基本功能 -func TestPersistenceManagerBasic(t *testing.T) { - // 创建临时目录 - tmpDir := t.TempDir() - - // 创建持久化管理器 - pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 100*time.Millisecond) - - // 启动管理器 - err := pm.Start() - require.NoError(t, err) - defer pm.Stop() - - // 测试数据持久化 - testData := []interface{}{ - map[string]interface{}{"id": 1, "value": "test1"}, - map[string]interface{}{"id": 2, "value": "test2"}, - map[string]interface{}{"id": 3, "value": "test3"}, - } - - // 写入数据 - for _, data := range testData { - err := pm.PersistData(data) - require.NoError(t, err) - } - - // 等待数据写入 - time.Sleep(200 * time.Millisecond) - - // 读取持久化数据 - loadedData, err := pm.LoadPersistedData() - require.NoError(t, err) - - // 验证数据 - assert.GreaterOrEqual(t, len(loadedData), len(testData)) -} - -// TestPersistenceManagerFileRotation 测试文件轮转功能 -func TestPersistenceManagerFileRotation(t *testing.T) { - tmpDir := t.TempDir() - - // 创建小文件大小的持久化管理器以触发文件轮转 - pm := NewPersistenceManagerWithConfig(tmpDir, 100, 50*time.Millisecond) - - err := pm.Start() - require.NoError(t, err) - defer pm.Stop() - - // 写入大量数据以触发文件轮转 - for i := 0; i < 50; i++ { - data := map[string]interface{}{ - "id": i, - "value": fmt.Sprintf("test_data_with_long_content_%d", i), - } - err := pm.PersistData(data) - require.NoError(t, err) - } - - // 等待数据写入和文件轮转 - time.Sleep(200 * time.Millisecond) - - // 验证创建了多个文件 - files, err := os.ReadDir(tmpDir) - require.NoError(t, err) - assert.Greater(t, len(files), 1, "应该创建多个持久化文件") -} - -// TestStreamWithPersistenceStrategy 测试流处理器的持久化策略 -func TestStreamWithPersistenceStrategy(t *testing.T) { - tmpDir := t.TempDir() - - config := types.Config{ - WindowConfig: types.WindowConfig{ - Type: "tumbling", - Params: map[string]interface{}{"size": 100 * time.Millisecond}, - }, - GroupFields: []string{"device"}, - SelectFields: map[string]aggregator.AggregateType{ - "temperature": aggregator.Avg, - }, - NeedWindow: true, - } - - // 创建带持久化策略的流处理器,使用小缓冲区以触发持久化 - stream, err := NewStreamWithLossPolicyAndPersistence(config, - 2, 2, 2, // 小缓冲区 - "persist", 100*time.Millisecond, - tmpDir, 1024, 50*time.Millisecond) + stream, err := NewStream(config) require.NoError(t, err) defer stream.Stop() - // 添加结果收集器 - var results []interface{} - var resultMutex sync.Mutex - stream.AddSink(func(result interface{}) { - resultMutex.Lock() - defer resultMutex.Unlock() - results = append(results, result) - }) + testData := map[string]interface{}{ + "name": "test", + "age": 25, + "category": "A", + } + // 启动Stream stream.Start() - // 快速添加大量数据以触发持久化 - for i := 0; i < 20; i++ { - data := map[string]interface{}{ - "device": fmt.Sprintf("device_%d", i%3), - "temperature": float64(20 + i), - "timestamp": time.Now(), - } - stream.Emit(data) - } + // 发送测试数据 + stream.Emit(testData) // 等待处理完成 - time.Sleep(300 * time.Millisecond) - - // 验证持久化文件已创建 - files, err := os.ReadDir(tmpDir) - require.NoError(t, err) - if len(files) > 0 { - t.Logf("创建了 %d 个持久化文件", len(files)) - } - - // 验证可以加载持久化数据 - if stream.persistenceManager != nil { - loadedData, err := stream.persistenceManager.LoadPersistedData() - require.NoError(t, err) - t.Logf("加载了 %d 条持久化数据", len(loadedData)) - } -} - -// TestStreamPersistenceRecovery 测试持久化数据恢复功能 -func TestStreamPersistenceRecovery(t *testing.T) { - tmpDir := t.TempDir() - - config := types.Config{ - WindowConfig: types.WindowConfig{ - Type: "tumbling", - Params: map[string]interface{}{"size": 500 * time.Millisecond}, - }, - GroupFields: []string{"device"}, - SelectFields: map[string]aggregator.AggregateType{ - "temperature": aggregator.Sum, - }, - NeedWindow: true, - } - - // 第一阶段:创建流并持久化数据 - stream1, err := NewStreamWithLossPolicyAndPersistence(config, - 1, 1, 1, // 极小缓冲区强制持久化 - "persist", 50*time.Millisecond, - tmpDir, 512, 30*time.Millisecond) - require.NoError(t, err) - - stream1.Start() - - // 添加测试数据 - testData := []map[string]interface{}{ - {"device": "sensor1", "temperature": 25.0}, - {"device": "sensor2", "temperature": 30.0}, - {"device": "sensor1", "temperature": 27.0}, - } - - for _, data := range testData { - stream1.Emit(data) - } - - // 等待数据持久化 - time.Sleep(200 * time.Millisecond) - stream1.Stop() - - // 第二阶段:创建新流并恢复数据 - stream2, err := NewStreamWithLossPolicyAndPersistence(config, - 10, 10, 10, - "persist", 100*time.Millisecond, - tmpDir, 1024, 100*time.Millisecond) - require.NoError(t, err) - defer stream2.Stop() - - // 恢复持久化数据 - err = stream2.LoadAndReprocessPersistedData() - require.NoError(t, err) - - // 验证数据恢复成功 - if stream2.persistenceManager != nil { - stats := stream2.persistenceManager.GetStats() - t.Logf("持久化统计: %+v", stats) - } -} - -// TestPersistenceManagerConcurrency 测试持久化管理器并发安全性 -func TestPersistenceManagerConcurrency(t *testing.T) { - tmpDir := t.TempDir() - - pm := NewPersistenceManagerWithConfig(tmpDir, 2048, 100*time.Millisecond) - - err := pm.Start() - require.NoError(t, err) - defer pm.Stop() - - // 并发写入数据 - const numGoroutines = 10 - const dataPerGoroutine = 20 - - var wg sync.WaitGroup - wg.Add(numGoroutines) - - for i := 0; i < numGoroutines; i++ { - go func(goroutineID int) { - defer wg.Done() - for j := 0; j < dataPerGoroutine; j++ { - data := map[string]interface{}{ - "goroutine": goroutineID, - "sequence": j, - "value": fmt.Sprintf("data_%d_%d", goroutineID, j), - } - err := pm.PersistData(data) - assert.NoError(t, err) - } - }(i) - } - - wg.Wait() - - // 等待所有数据写入 - time.Sleep(300 * time.Millisecond) - - // 验证数据完整性 - loadedData, err := pm.LoadPersistedData() - require.NoError(t, err) - - // 应该至少有部分数据被持久化 - assert.Greater(t, len(loadedData), 0) - t.Logf("并发测试: 持久化了 %d 条数据", len(loadedData)) -} - -// TestPersistenceManagerStats 测试持久化统计功能 -func TestPersistenceManagerStats(t *testing.T) { - tmpDir := t.TempDir() - - pm := NewPersistenceManagerWithConfig(tmpDir, 1024, 50*time.Millisecond) - - err := pm.Start() - require.NoError(t, err) - defer pm.Stop() - - // 写入一些数据 - for i := 0; i < 10; i++ { - data := map[string]interface{}{"index": i, "data": "test"} - err := pm.PersistData(data) - require.NoError(t, err) - } - - // 等待数据处理 - time.Sleep(200 * time.Millisecond) + time.Sleep(100 * time.Millisecond) // 获取统计信息 - stats := pm.GetStats() - require.NotNil(t, stats) - - // 验证统计信息包含预期字段 - assert.Contains(t, stats, "data_dir") - assert.Contains(t, stats, "max_file_size") - assert.Contains(t, stats, "flush_interval") - assert.Contains(t, stats, "running") - assert.Equal(t, tmpDir, stats["data_dir"]) - assert.Equal(t, int64(1024), stats["max_file_size"]) - assert.Equal(t, true, stats["running"]) - - t.Logf("持久化统计信息: %+v", stats) + stats := stream.GetStats() + assert.Equal(t, int64(1), stats[InputCount]) } -// TestStreamPersistencePerformance 测试持久化性能 -func TestStreamPersistencePerformance(t *testing.T) { - if testing.Short() { - t.Skip("跳过性能测试 (使用 -short 标志)") - } - - tmpDir := t.TempDir() +// TestStreamFactory 测试StreamFactory功能 +func TestStreamFactory(t *testing.T) { + factory := NewStreamFactory() + require.NotNil(t, factory) config := types.Config{ - GroupFields: []string{"type"}, - SelectFields: map[string]aggregator.AggregateType{ - "value": aggregator.Count, - }, - NeedWindow: false, // 无窗口,直接处理 - } - - // 创建高性能持久化配置 - stream, err := NewStreamWithLossPolicyAndPersistence(config, - 1000, 1000, 100, - "persist", 1*time.Second, - tmpDir, 10*1024*1024, 500*time.Millisecond) // 10MB文件,500ms刷新 - require.NoError(t, err) - defer stream.Stop() - - var processedCount int64 - stream.AddSink(func(result interface{}) { - atomic.AddInt64(&processedCount, 1) - }) - - stream.Start() - - // 性能测试:快速添加大量数据 - const numData = 10000 - start := time.Now() - - for i := 0; i < numData; i++ { - data := map[string]interface{}{ - "type": fmt.Sprintf("type_%d", i%10), - "value": i, - "data": fmt.Sprintf("performance_test_data_%d", i), - } - stream.Emit(data) - } - - elapsed := time.Since(start) - - // 等待处理完成 - time.Sleep(2 * time.Second) - - processed := atomic.LoadInt64(&processedCount) - - t.Logf("性能测试结果:") - t.Logf("- 数据量: %d", numData) - t.Logf("- 耗时: %v", elapsed) - t.Logf("- 吞吐量: %.2f ops/sec", float64(numData)/elapsed.Seconds()) - t.Logf("- 处理结果数: %d", processed) - - // 验证持久化文件 - files, err := os.ReadDir(tmpDir) - require.NoError(t, err) - t.Logf("- 持久化文件数: %d", len(files)) - - // 基本性能要求(可根据实际情况调整) - assert.Less(t, elapsed, 10*time.Second, "持久化处理耗时应在合理范围内") -} - -// TestStreamsqlPersistenceConfigPassing 测试Streamsql持久化配置的传递 -func TestStreamsqlPersistenceConfigPassing(t *testing.T) { - tmpDir := t.TempDir() - - // 测试自定义持久化配置是否正确传递 - config := types.Config{ - GroupFields: []string{"device"}, - SelectFields: map[string]aggregator.AggregateType{ - "temperature": aggregator.Count, - }, - NeedWindow: false, - } - - // 创建带自定义持久化配置的流 - stream, err := NewStreamWithLossPolicyAndPersistence(config, - 100, 100, 10, - "persist", 1*time.Second, - tmpDir, 2048, 200*time.Millisecond) // 自定义配置:2KB文件,200ms刷新 - require.NoError(t, err) - defer stream.Stop() - - // 验证持久化管理器配置 - require.NotNil(t, stream.persistenceManager) - - stats := stream.persistenceManager.GetStats() - require.NotNil(t, stats) - - // 验证配置是否正确传递 - assert.Equal(t, tmpDir, stats["data_dir"]) - assert.Equal(t, int64(2048), stats["max_file_size"]) - assert.Contains(t, stats["flush_interval"], "200ms") - - t.Logf("持久化配置验证通过: %+v", stats) -} - -func TestSelectStarWithExpressionFields(t *testing.T) { - config := types.Config{ + SimpleFields: []string{"value"}, NeedWindow: false, - SimpleFields: []string{"*"}, // SELECT * - FieldExpressions: map[string]types.FieldExpression{ - "name": { - Expression: "UPPER(name)", - Fields: []string{"name"}, - }, - "full_info": { - Expression: "CONCAT(name, ' - ', status)", - Fields: []string{"name", "status"}, - }, - }, } - stream, err := NewStream(config) - if err != nil { - t.Fatalf("Failed to create stream: %v", err) - } - defer stream.Stop() - - // 收集结果 - 使用sync.Mutex防止数据竞争 - var mu sync.Mutex - var results []interface{} - stream.AddSink(func(result interface{}) { - mu.Lock() - defer mu.Unlock() - results = append(results, result) - }) - - stream.Start() - - // 添加测试数据 - testData := map[string]interface{}{ - "name": "john", - "status": "active", - "age": 25, + // 测试不同的创建方法 + tests := []struct { + name string + create func() (*Stream, error) + }{ + {"CreateStream", func() (*Stream, error) { return factory.CreateStream(config) }}, + {"CreateHighPerformanceStream", func() (*Stream, error) { return factory.CreateHighPerformanceStream(config) }}, + {"CreateLowLatencyStream", func() (*Stream, error) { return factory.CreateLowLatencyStream(config) }}, + {"CreateZeroDataLossStream", func() (*Stream, error) { return factory.CreateZeroDataLossStream(config) }}, } - stream.Emit(testData) - - // 等待处理完成 - time.Sleep(100 * time.Millisecond) - - // 验证结果 - 使用互斥锁保护读取 - mu.Lock() - resultsLen := len(results) - var resultData map[string]interface{} - if resultsLen > 0 { - resultData = results[0].([]map[string]interface{})[0] - } - mu.Unlock() - - if resultsLen != 1 { - t.Fatalf("Expected 1 result, got %d", resultsLen) - } - - // 验证表达式字段的结果没有被覆盖 - if resultData["name"] != "JOHN" { - t.Errorf("Expected name to be 'JOHN' (uppercase), got %v", resultData["name"]) - } - - if resultData["full_info"] != "john - active" { - t.Errorf("Expected full_info to be 'john - active', got %v", resultData["full_info"]) - } - - // 验证原始字段仍然存在 - if resultData["status"] != "active" { - t.Errorf("Expected status to be 'active', got %v", resultData["status"]) - } - - if resultData["age"] != 25 { - t.Errorf("Expected age to be 25, got %v", resultData["age"]) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + stream, err := tt.create() + require.NoError(t, err) + require.NotNil(t, stream) + defer stream.Stop() + }) } } -func TestSelectStarWithExpressionFieldsOverride(t *testing.T) { - // 测试表达式字段名与原始字段名相同的情况 - config := types.Config{ - NeedWindow: false, - SimpleFields: []string{"*"}, // SELECT * - FieldExpressions: map[string]types.FieldExpression{ - "name": { - Expression: "UPPER(name)", - Fields: []string{"name"}, - }, - "age": { - Expression: "age * 2", - Fields: []string{"age"}, - }, - }, +// TestMigratedFunctions 测试迁移后的函数 +func TestMigratedFunctions(t *testing.T) { + // 测试性能评估函数 + tests := []struct { + name string + dataUsage float64 + dropRate float64 + expected string + }{ + {"Critical", 50.0, 60.0, PerformanceLevelCritical}, + {"Warning", 50.0, 30.0, PerformanceLevelWarning}, + {"HighLoad", 95.0, 5.0, PerformanceLevelHighLoad}, + {"ModerateLoad", 75.0, 5.0, PerformanceLevelModerateLoad}, + {"Optimal", 50.0, 5.0, PerformanceLevelOptimal}, } - stream, err := NewStream(config) - if err != nil { - t.Fatalf("Failed to create stream: %v", err) - } - defer stream.Stop() - - // 收集结果 - 使用sync.Mutex防止数据竞争 - var mu sync.Mutex - var results []interface{} - stream.AddSink(func(result interface{}) { - mu.Lock() - defer mu.Unlock() - results = append(results, result) - }) - - stream.Start() - - // 添加测试数据 - testData := map[string]interface{}{ - "name": "alice", - "age": 30, - "status": "active", - } - - stream.Emit(testData) - - // 等待处理完成 - time.Sleep(100 * time.Millisecond) - - // 验证结果 - 使用互斥锁保护读取 - mu.Lock() - resultsLen := len(results) - var resultData map[string]interface{} - if resultsLen > 0 { - resultData = results[0].([]map[string]interface{})[0] - } - mu.Unlock() - - if resultsLen != 1 { - t.Fatalf("Expected 1 result, got %d", resultsLen) - } - - // 验证表达式字段的结果覆盖了原始字段 - if resultData["name"] != "ALICE" { - t.Errorf("Expected name to be 'ALICE' (expression result), got %v", resultData["name"]) - } - - // 检查age表达式的结果(可能是int或float64类型) - ageResult := resultData["age"] - if ageResult != 60 && ageResult != 60.0 { - t.Errorf("Expected age to be 60 (expression result), got %v (type: %T)", resultData["age"], resultData["age"]) - } - - // 验证没有表达式的字段保持原值 - if resultData["status"] != "active" { - t.Errorf("Expected status to be 'active', got %v", resultData["status"]) - } -} - -func TestSelectStarWithoutExpressionFields(t *testing.T) { - // 测试没有表达式字段时SELECT *的行为 - config := types.Config{ - NeedWindow: false, - SimpleFields: []string{"*"}, // SELECT * - } - - stream, err := NewStream(config) - if err != nil { - t.Fatalf("Failed to create stream: %v", err) - } - defer stream.Stop() - - // 收集结果 - 使用sync.Mutex防止数据竞争 - var mu sync.Mutex - var results []interface{} - stream.AddSink(func(result interface{}) { - mu.Lock() - defer mu.Unlock() - results = append(results, result) - }) - - stream.Start() - - // 添加测试数据 - testData := map[string]interface{}{ - "name": "bob", - "age": 35, - "status": "inactive", - } - - stream.Emit(testData) - - // 等待处理完成 - time.Sleep(100 * time.Millisecond) - - // 验证结果 - 使用互斥锁保护读取 - mu.Lock() - resultsLen := len(results) - var resultData map[string]interface{} - if resultsLen > 0 { - resultData = results[0].([]map[string]interface{})[0] - } - mu.Unlock() - - if resultsLen != 1 { - t.Fatalf("Expected 1 result, got %d", resultsLen) - } - - // 验证所有原始字段都被保留 - if resultData["name"] != "bob" { - t.Errorf("Expected name to be 'bob', got %v", resultData["name"]) - } - - if resultData["age"] != 35 { - t.Errorf("Expected age to be 35, got %v", resultData["age"]) - } - - if resultData["status"] != "inactive" { - t.Errorf("Expected status to be 'inactive', got %v", resultData["status"]) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := AssessPerformanceLevel(tt.dataUsage, tt.dropRate) + assert.Equal(t, tt.expected, result) + }) } } diff --git a/stream/stream_window_test.go b/stream/stream_window_test.go new file mode 100644 index 0000000..85af2b7 --- /dev/null +++ b/stream/stream_window_test.go @@ -0,0 +1,244 @@ +package stream + +import ( + "fmt" + "testing" + "time" + + "github.com/rulego/streamsql/aggregator" + "github.com/rulego/streamsql/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestWindowSlotAggregation 测试窗口时间槽聚合 +func TestWindowSlotAggregation(t *testing.T) { + config := types.Config{ + WindowConfig: types.WindowConfig{ + Type: "sliding", + Params: map[string]interface{}{"size": 2 * time.Second, "slide": 1 * time.Second}, + TsProp: "ts", + }, + GroupFields: []string{"device"}, + SelectFields: map[string]aggregator.AggregateType{ + "temperature": aggregator.Max, + "humidity": aggregator.Min, + "start": aggregator.WindowStart, + "end": aggregator.WindowEnd, + }, + NeedWindow: true, + } + + strm, err := NewStream(config) + require.NoError(t, err) + defer strm.Stop() + + strm.Start() + + // 使用固定时间戳的测试数据 + baseTime := time.Date(2025, 4, 7, 16, 46, 0, 0, time.UTC) + testData := []interface{}{ + map[string]interface{}{"device": "aa", "temperature": 25.0, "humidity": 60, "ts": baseTime}, + map[string]interface{}{"device": "aa", "temperature": 30.0, "humidity": 55, "ts": baseTime.Add(1 * time.Second)}, + map[string]interface{}{"device": "bb", "temperature": 22.0, "humidity": 70, "ts": baseTime}, + } + + for _, data := range testData { + strm.Emit(data) + } + + // 捕获结果 + resultChan := make(chan interface{}) + strm.AddSink(func(result interface{}) { + resultChan <- result + }) + + // 等待窗口触发 + time.Sleep(3 * time.Second) + + select { + case actual := <-resultChan: + expected := []map[string]interface{}{ + { + "device": "aa", + "temperature": 30.0, + "humidity": 55.0, + "start": baseTime.UnixNano(), + "end": baseTime.Add(2 * time.Second).UnixNano(), + }, + { + "device": "bb", + "temperature": 22.0, + "humidity": 70.0, + "start": baseTime.UnixNano(), + "end": baseTime.Add(2 * time.Second).UnixNano(), + }, + } + + assert.IsType(t, []map[string]interface{}{}, actual) + resultSlice := actual.([]map[string]interface{}) + assert.Len(t, resultSlice, 2) + + for _, expectedResult := range expected { + found := false + for _, resultMap := range resultSlice { + if resultMap["device"] == expectedResult["device"] { + assert.InEpsilon(t, expectedResult["temperature"].(float64), resultMap["temperature"].(float64), 0.0001) + assert.InEpsilon(t, expectedResult["humidity"].(float64), resultMap["humidity"].(float64), 0.0001) + assert.Equal(t, expectedResult["start"].(int64), resultMap["start"].(int64)) + assert.Equal(t, expectedResult["end"].(int64), resultMap["end"].(int64)) + found = true + break + } + } + assert.True(t, found, fmt.Sprintf("Expected result for device %v not found", expectedResult["device"])) + } + case <-time.After(10 * time.Second): + t.Fatal("Timeout waiting for results") + } +} + +// TestWindowTypes 测试不同类型的窗口 +func TestWindowTypes(t *testing.T) { + tests := []struct { + name string + windowType string + windowParams map[string]interface{} + expectError bool + }{ + { + name: "Tumbling Window", + windowType: "tumbling", + windowParams: map[string]interface{}{ + "size": "5s", + }, + expectError: false, + }, + { + name: "Sliding Window", + windowType: "sliding", + windowParams: map[string]interface{}{ + "size": "10s", + "slide": "5s", + }, + expectError: false, + }, + { + name: "Session Window", + windowType: "session", + windowParams: map[string]interface{}{ + "timeout": "30s", + }, + expectError: false, + }, + { + name: "Invalid Window Type", + windowType: "invalid_window_type", + windowParams: map[string]interface{}{"size": "5s"}, + expectError: true, + }, + { + name: "Missing Size Parameter", + windowType: "tumbling", + windowParams: map[string]interface{}{}, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := types.Config{ + NeedWindow: true, + WindowConfig: types.WindowConfig{ + Type: tt.windowType, + Params: tt.windowParams, + }, + SelectFields: map[string]aggregator.AggregateType{ + "value": aggregator.Count, + }, + PerformanceConfig: types.DefaultPerformanceConfig(), + } + + stream, err := NewStream(config) + if tt.expectError { + assert.Error(t, err) + assert.Nil(t, stream) + } else { + assert.NoError(t, err) + assert.NotNil(t, stream) + if stream != nil { + defer stream.Stop() + assert.NotNil(t, stream.Window) + } + } + }) + } +} + +// TestAggregationTypes 测试不同的聚合类型 +func TestAggregationTypes(t *testing.T) { + tests := []struct { + name string + aggType aggregator.AggregateType + testData []float64 + expected float64 + }{ + {"Sum", aggregator.Sum, []float64{1, 2, 3, 4, 5}, 15.0}, + {"Avg", aggregator.Avg, []float64{2, 4, 6, 8}, 5.0}, + {"Min", aggregator.Min, []float64{5, 2, 8, 1, 9}, 1.0}, + {"Max", aggregator.Max, []float64{5, 2, 8, 1, 9}, 9.0}, + {"Count", aggregator.Count, []float64{1, 2, 3}, 3.0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := types.Config{ + WindowConfig: types.WindowConfig{ + Type: "tumbling", + Params: map[string]interface{}{"size": 500 * time.Millisecond}, + }, + GroupFields: []string{"group"}, + SelectFields: map[string]aggregator.AggregateType{ + "value": tt.aggType, + }, + NeedWindow: true, + } + + stream, err := NewStream(config) + require.NoError(t, err) + defer stream.Stop() + + resultChan := make(chan interface{}, 1) + stream.AddSink(func(result interface{}) { + select { + case resultChan <- result: + default: + } + }) + + stream.Start() + + // 发送测试数据 + for _, value := range tt.testData { + data := map[string]interface{}{ + "group": "test", + "value": value, + } + stream.Emit(data) + } + + // 等待窗口关闭 + time.Sleep(700 * time.Millisecond) + + select { + case result := <-resultChan: + resultSlice := result.([]map[string]interface{}) + require.Len(t, resultSlice, 1) + actual := resultSlice[0]["value"].(float64) + assert.InEpsilon(t, tt.expected, actual, 0.0001) + case <-time.After(3 * time.Second): + t.Fatal("Timeout waiting for aggregation result") + } + }) + } +} \ No newline at end of file diff --git a/stream/unified_config_integration_test.go b/stream/unified_config_integration_test.go deleted file mode 100644 index ec29b17..0000000 --- a/stream/unified_config_integration_test.go +++ /dev/null @@ -1,261 +0,0 @@ -package stream - -import ( - "testing" - "time" - - "github.com/rulego/streamsql/aggregator" - "github.com/rulego/streamsql/types" -) - -// TestStreamWindowUnifiedConfigIntegration 测试Stream和Window统一配置的集成 -func TestStreamWindowUnifiedConfigIntegration(t *testing.T) { - // 测试不同性能配置下,Stream创建的窗口是否正确应用了缓冲区配置 - testCases := []struct { - name string - performanceConfig types.PerformanceConfig - expectedWindowBufferSize int - }{ - { - name: "默认配置", - performanceConfig: types.DefaultPerformanceConfig(), - expectedWindowBufferSize: 1000, - }, - { - name: "高性能配置", - performanceConfig: types.HighPerformanceConfig(), - expectedWindowBufferSize: 5000, - }, - { - name: "低延迟配置", - performanceConfig: types.LowLatencyConfig(), - expectedWindowBufferSize: 100, - }, - { - name: "零数据丢失配置", - performanceConfig: types.ZeroDataLossConfig(), - expectedWindowBufferSize: 2000, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - // 创建包含窗口的配置 - config := types.Config{ - NeedWindow: true, - WindowConfig: types.WindowConfig{ - Type: "tumbling", - Params: map[string]interface{}{ - "size": "5s", - }, - }, - SelectFields: map[string]aggregator.AggregateType{ - "value": aggregator.Count, - }, - PerformanceConfig: tc.performanceConfig, - } - - // 创建stream - s, err := NewStream(config) - if err != nil { - t.Fatalf("创建Stream失败: %v", err) - } - defer s.Stop() - - // 验证stream的缓冲区配置 - if cap(s.dataChan) != tc.performanceConfig.BufferConfig.DataChannelSize { - t.Errorf("期望数据通道大小 %d,实际得到 %d", - tc.performanceConfig.BufferConfig.DataChannelSize, cap(s.dataChan)) - } - - if cap(s.resultChan) != tc.performanceConfig.BufferConfig.ResultChannelSize { - t.Errorf("期望结果通道大小 %d,实际得到 %d", - tc.performanceConfig.BufferConfig.ResultChannelSize, cap(s.resultChan)) - } - - // 验证窗口的缓冲区配置 (需要访问窗口的内部状态) - // 这需要窗口实现暴露缓冲区大小的方法,或者通过类型断言访问 - if s.Window != nil { - // 通过反射或类型断言来验证窗口缓冲区大小 - // 这里简化测试,只验证窗口不为nil - t.Logf("窗口已创建,类型: %T", s.Window) - } else { - t.Error("期望创建窗口,但窗口为nil") - } - }) - } -} - -// TestStreamUnifiedConfigPerformanceImpact 测试统一配置对Stream性能的影响 -func TestStreamUnifiedConfigPerformanceImpact(t *testing.T) { - // 基准测试:比较不同配置下的性能 - configs := map[string]types.PerformanceConfig{ - "默认配置": types.DefaultPerformanceConfig(), - "高性能配置": types.HighPerformanceConfig(), - "低延迟配置": types.LowLatencyConfig(), - } - - for name, perfConfig := range configs { - t.Run(name, func(t *testing.T) { - config := types.Config{ - NeedWindow: true, - WindowConfig: types.WindowConfig{ - Type: "tumbling", - Params: map[string]interface{}{ - "size": "1s", - }, - }, - SelectFields: map[string]aggregator.AggregateType{ - "value": aggregator.Sum, - }, - PerformanceConfig: perfConfig, - } - - s, err := NewStream(config) - if err != nil { - t.Fatalf("创建Stream失败: %v", err) - } - defer s.Stop() - - // 启动stream - go s.Start() - - // 发送测试数据 - dataCount := 1000 - startTime := time.Now() - - for i := 0; i < dataCount; i++ { - data := map[string]interface{}{ - "value": i, - "timestamp": time.Now().Unix(), - } - - select { - case s.dataChan <- data: - // 成功发送 - case <-time.After(100 * time.Millisecond): - // 发送超时,这在低缓冲区配置下可能发生 - t.Logf("第%d条数据发送超时", i) - break - } - } - - processingTime := time.Since(startTime) - t.Logf("%s 处理%d条数据耗时: %v", name, dataCount, processingTime) - - // 等待一些结果 - time.Sleep(1500 * time.Millisecond) - - // 检查结果 - resultCount := 0 - for { - select { - case <-s.resultChan: - resultCount++ - default: - goto done - } - } - done: - //t.Logf("%s 产生结果数量: %d", name, resultCount) - }) - } -} - -// TestStreamUnifiedConfigErrorHandling 测试统一配置的错误处理 -func TestStreamUnifiedConfigErrorHandling(t *testing.T) { - // 测试无效的窗口配置(无效的窗口类型) - invalidConfig := types.Config{ - NeedWindow: true, - WindowConfig: types.WindowConfig{ - Type: "invalid_window_type", // 无效的窗口类型 - Params: map[string]interface{}{ - "size": "5s", - }, - }, - SelectFields: map[string]aggregator.AggregateType{ - "value": aggregator.Count, - }, - PerformanceConfig: types.DefaultPerformanceConfig(), - } - - // Stream应该无法创建,因为窗口类型无效 - _, err := NewStream(invalidConfig) - if err == nil { - t.Error("期望创建失败,但实际成功了") - return - } - //t.Logf("正确捕获到错误: %v", err) - - // 测试无效的窗口参数(缺少必要参数) - invalidSizeConfig := types.Config{ - NeedWindow: true, - WindowConfig: types.WindowConfig{ - Type: "tumbling", - Params: map[string]interface{}{ - // 缺少 "size" 参数 - }, - }, - SelectFields: map[string]aggregator.AggregateType{ - "value": aggregator.Count, - }, - PerformanceConfig: types.DefaultPerformanceConfig(), - } - - _, err = NewStream(invalidSizeConfig) - if err == nil { - t.Error("期望因为缺少size参数而创建失败,但实际成功了") - return - } - //t.Logf("正确捕获到size参数错误: %v", err) -} - -// TestStreamUnifiedConfigCompatibility 测试统一配置的兼容性 -func TestStreamUnifiedConfigCompatibility(t *testing.T) { - // 测试新的统一配置与旧API的兼容性 - - // 1. 使用新的统一配置 - newConfig := types.Config{ - NeedWindow: false, - SelectFields: map[string]aggregator.AggregateType{ - "value": aggregator.Count, - }, - PerformanceConfig: types.HighPerformanceConfig(), - } - - s1, err := NewStream(newConfig) - if err != nil { - t.Fatalf("使用新配置创建Stream失败: %v", err) - } - defer s1.Stop() - - // 验证新配置生效 - expectedDataSize := types.HighPerformanceConfig().BufferConfig.DataChannelSize - if cap(s1.dataChan) != expectedDataSize { - t.Errorf("新配置期望数据通道大小 %d,实际得到 %d", expectedDataSize, cap(s1.dataChan)) - } - - // 2. 测试默认配置 - defaultConfig := types.Config{ - NeedWindow: false, - SelectFields: map[string]aggregator.AggregateType{ - "value": aggregator.Count, - }, - PerformanceConfig: types.DefaultPerformanceConfig(), - } - - s2, err := NewStream(defaultConfig) - if err != nil { - t.Fatalf("使用默认配置创建Stream失败: %v", err) - } - defer s2.Stop() - - // 验证默认配置 - expectedDefaultSize := types.DefaultPerformanceConfig().BufferConfig.DataChannelSize - if cap(s2.dataChan) != expectedDefaultSize { - t.Errorf("默认配置期望数据通道大小 %d,实际得到 %d", expectedDefaultSize, cap(s2.dataChan)) - } - - //t.Logf("新配置数据通道大小: %d", cap(s1.dataChan)) - //t.Logf("默认配置数据通道大小: %d", cap(s2.dataChan)) -}