Files
streamsql/stream/doc.go
T
2025-08-04 14:45:43 +08:00

281 lines
8.3 KiB
Go

/*
* Copyright 2025 The RuleGo Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
Package stream provides the core stream processing engine for StreamSQL.
This package implements the central stream processing pipeline that orchestrates data flow,
window management, aggregation, filtering, and result generation. It serves as the execution
engine that brings together all StreamSQL components into a cohesive streaming system.
# Core Features
• Real-time Stream Processing - High-throughput, low-latency data stream processing
• Window Management - Integration with all window types (tumbling, sliding, counting, session)
• Aggregation Engine - Efficient aggregation with incremental computation
• Filtering Pipeline - Multi-stage filtering with WHERE and HAVING clause support
• Performance Modes - Configurable performance profiles for different use cases
• Metrics and Monitoring - Comprehensive performance metrics and health monitoring
• Persistence Support - Optional data persistence for reliability and recovery
• Backpressure Handling - Intelligent backpressure management and overflow strategies
# Stream Architecture
The stream processing pipeline consists of several key components:
type Stream struct {
dataChan chan map[string]interface{} // Input data channel
filter condition.Condition // WHERE clause filter
Window window.Window // Window manager
aggregator aggregator.Aggregator // Aggregation engine
config types.Config // Stream configuration
sinks []func([]map[string]interface{}) // Result processors
resultChan chan []map[string]interface{} // Result channel
persistenceManager *PersistenceManager // Data persistence
dataStrategy DataProcessingStrategy // Data processing strategy
}
# Performance Modes
Configurable performance profiles for different scenarios:
// High Performance Mode
// - Optimized for maximum throughput
// - Larger buffer sizes
// - Batch processing optimization
stream := NewStreamWithHighPerformance(config)
// Low Latency Mode
// - Optimized for minimal processing delay
// - Smaller buffer sizes
// - Immediate processing
stream := NewStreamWithLowLatency(config)
// Zero Data Loss Mode
// - Guaranteed data persistence
// - Synchronous processing
// - Enhanced error recovery
stream := NewStreamWithZeroDataLoss(config)
// Custom Performance Mode
// - User-defined performance parameters
customConfig := &PerformanceConfig{
BufferSize: 1000,
BatchSize: 50,
FlushInterval: time.Second,
WorkerPoolSize: 4,
}
stream := NewStreamWithCustomPerformance(config, *customConfig)
# Data Processing Pipeline
Multi-stage processing pipeline with optimized data flow:
1. Data Ingestion
├── Input validation and type checking
├── Timestamp extraction and normalization
└── Initial data transformation
2. Filtering (WHERE clause)
├── Field-based filtering
├── Expression evaluation
└── Early data rejection
3. Window Processing
├── Window assignment
├── Data buffering
└── Window trigger management
4. Aggregation
├── Group-by processing
├── Aggregate function execution
└── Incremental computation
5. Post-Aggregation Filtering (HAVING clause)
├── Aggregate result filtering
├── Complex condition evaluation
└── Final result validation
6. Result Generation
├── Field projection
├── Alias application
└── Output formatting
# Window Integration
Seamless integration with all window types:
// Tumbling Windows - Non-overlapping time-based windows
config.WindowConfig = WindowConfig{
Type: "tumbling",
Params: map[string]interface{}{
"size": "5s",
},
}
// Sliding Windows - Overlapping time-based windows
config.WindowConfig = WindowConfig{
Type: "sliding",
Params: map[string]interface{}{
"size": "30s",
"slide": "10s",
},
}
// Counting Windows - Count-based windows
config.WindowConfig = WindowConfig{
Type: "counting",
Params: map[string]interface{}{
"count": 100,
},
}
// Session Windows - Activity-based windows
config.WindowConfig = WindowConfig{
Type: "session",
Params: map[string]interface{}{
"timeout": "5m",
"groupBy": "user_id",
},
}
# Metrics and Monitoring
Comprehensive performance monitoring:
type MetricsManager struct {
processedCount int64 // Total processed records
filteredCount int64 // Filtered out records
aggregatedCount int64 // Aggregated records
errorCount int64 // Processing errors
processingTime time.Duration // Average processing time
throughput float64 // Records per second
memoryUsage int64 // Memory consumption
bufferUtilization float64 // Buffer usage percentage
}
// Get basic statistics
stats := stream.GetStats()
fmt.Printf("Processed: %d, Errors: %d\n", stats["processed"], stats["errors"])
// Get detailed performance metrics
detailed := stream.GetDetailedStats()
fmt.Printf("Throughput: %.2f records/sec\n", detailed["throughput"])
fmt.Printf("Memory Usage: %d bytes\n", detailed["memory_usage"])
# Persistence and Reliability
Optional data persistence for enhanced reliability:
type PersistenceManager struct {
enabled bool
storageType string // "memory", "file", "database"
batchSize int // Persistence batch size
flushInterval time.Duration // Automatic flush interval
recoveryMode string // Recovery strategy
}
// Enable persistence
stream.EnablePersistence(PersistenceConfig{
StorageType: "file",
BatchSize: 100,
FlushInterval: 5 * time.Second,
RecoveryMode: "automatic",
})
# Backpressure Management
Intelligent handling of system overload:
// Overflow strategies
const (
OverflowStrategyDrop = "drop" // Drop oldest data
OverflowStrategyBlock = "block" // Block new data
OverflowStrategySpill = "spill" // Spill to disk
OverflowStrategyCompress = "compress" // Compress data
)
// Configure backpressure handling
config.PerformanceConfig.OverflowStrategy = OverflowStrategySpill
config.PerformanceConfig.BufferSize = 10000
config.PerformanceConfig.HighWaterMark = 0.8
# Usage Examples
Basic stream processing:
// Create stream with default configuration
stream, err := NewStream(config)
if err != nil {
log.Fatal(err)
}
// Register result handler
stream.AddSink(func(results []map[string]interface{}) {
fmt.Printf("Results: %v\n", results)
})
// Start processing
stream.Start()
// Send data
stream.Emit(map[string]interface{}{
"device_id": "sensor001",
"temperature": 25.5,
"timestamp": time.Now(),
})
High-performance stream processing:
// Create high-performance stream
stream, err := NewStreamWithHighPerformance(config)
// Configure for maximum throughput
stream.SetBufferSize(50000)
stream.SetBatchSize(1000)
stream.SetWorkerPoolSize(8)
// Enable metrics monitoring
stream.EnableMetrics(true)
// Process data in batches
for _, batch := range dataBatches {
stream.EmitBatch(batch)
}
Synchronous processing for non-aggregation queries:
// Process single record synchronously
result, err := stream.ProcessSync(data)
if err != nil {
log.Printf("Processing error: %v", err)
} else if result != nil {
fmt.Printf("Immediate result: %v\n", result)
}
# Integration
Central integration point for all StreamSQL components:
• RSQL package - Configuration parsing and application
• Window package - Window lifecycle management
• Aggregator package - Aggregation execution
• Functions package - Function execution in expressions
• Condition package - Filter condition evaluation
• Types package - Data type handling and configuration
• Logger package - Comprehensive logging and debugging
*/
package stream