Files
streamsql/stream/persistence.go
T
2025-08-06 17:16:47 +08:00

931 lines
26 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* Copyright 2025 The RuleGo Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package stream
import (
"bufio"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/rulego/streamsql/logger"
)
// OrderedDataItem ordered data item with sequence number and timestamp
type OrderedDataItem struct {
SequenceID int64 `json:"sequence_id"` // Global incremental sequence number
Timestamp int64 `json:"timestamp"` // Data reception timestamp
Data map[string]interface{} `json:"data"` // Actual data
RetryCount int `json:"retry_count"` // Retry count
LastRetry int64 `json:"last_retry"` // Last retry timestamp
}
// DeadLetterItem dead letter queue item
type DeadLetterItem struct {
OriginalData OrderedDataItem `json:"original_data"` // Original data
FailureTime int64 `json:"failure_time"` // Failure time
Reason string `json:"reason"` // Failure reason
}
// PersistenceManager persistence manager
// Solves data timing issues, ensures first-in-first-out (FIFO) processing
// Optimized version: adds retry limits, dead letter queue and backoff strategy
type PersistenceManager struct {
// Basic configuration
dataDir string // Persistence data directory
maxFileSize int64 // Maximum size per file (bytes)
flushInterval time.Duration // Flush interval
// Sequence number management
sequenceCounter int64 // Global sequence counter, using atomic operations
// File management
currentFile *os.File // Current write file
currentSize int64 // Current file size
fileIndex int // File index
// Concurrency control
writeMutex sync.Mutex // Write mutex
pendingMutex sync.Mutex // Pending data mutex
runningMutex sync.RWMutex // Read-write lock protecting isRunning field
// Data buffering
pendingData []OrderedDataItem // Pending data to write, sorted by sequence number
// State management
isRunning bool // Whether running
stopChan chan struct{} // Stop channel
flushTimer *time.Timer // Flush timer
// Recovery management
recoveryQueue chan OrderedDataItem // Recovery data queue
recoveryMode bool // Whether in recovery mode
recoveryMutex sync.RWMutex // Recovery mode protection lock
// Retry and dead letter queue management
maxRetryCount int // Maximum retry count
deadLetterQueue []DeadLetterItem // Dead letter queue
deadLetterMutex sync.Mutex // Dead letter queue protection lock
retryDataMap map[int64]*OrderedDataItem // Retry data mapping (indexed by sequence number)
retryMapMutex sync.RWMutex // Retry mapping protection lock
// Statistics
totalPersisted int64 // Total persisted data count
totalLoaded int64 // Total loaded data count
filesCreated int64 // Number of files created
totalRecovered int64 // Total recovered data count
totalDropped int64 // Total dropped data count (entered dead letter queue)
totalRetried int64 // Total retried data count
}
// NewPersistenceManager creates a persistence manager
// Parameters:
// - dataDir: data storage directory
//
// Returns:
// - *PersistenceManager: persistence manager instance
func NewPersistenceManager(dataDir string) *PersistenceManager {
pm := &PersistenceManager{
dataDir: dataDir,
maxFileSize: 10 * 1024 * 1024, // 10MB per file
flushInterval: 2 * time.Second, // Flush every 2 seconds, more frequent to ensure timing
fileIndex: 0,
pendingData: make([]OrderedDataItem, 0, 1000), // Pre-allocate capacity
stopChan: make(chan struct{}),
recoveryQueue: make(chan OrderedDataItem, 10000), // Recovery queue
sequenceCounter: 0,
// Retry and dead letter queue configuration
maxRetryCount: 3, // Default maximum 3 retries
deadLetterQueue: make([]DeadLetterItem, 0, 1000), // Dead letter queue
retryDataMap: make(map[int64]*OrderedDataItem), // Retry data mapping
}
// Ensure data directory exists
if err := os.MkdirAll(dataDir, 0755); err != nil {
logger.Error("Failed to create persistence directory: %v", err)
}
return pm
}
// NewPersistenceManagerWithConfig creates a persistence manager with custom configuration
// Parameters:
// - dataDir: data storage directory
// - maxFileSize: maximum size per file
// - flushInterval: flush interval
//
// Returns:
// - *PersistenceManager: persistence manager instance
func NewPersistenceManagerWithConfig(dataDir string, maxFileSize int64, flushInterval time.Duration) *PersistenceManager {
pm := &PersistenceManager{
dataDir: dataDir,
maxFileSize: maxFileSize,
flushInterval: flushInterval,
fileIndex: 0,
pendingData: make([]OrderedDataItem, 0, 1000),
stopChan: make(chan struct{}),
recoveryQueue: make(chan OrderedDataItem, 10000),
sequenceCounter: 0,
// Retry and dead letter queue configuration
maxRetryCount: 3, // Default maximum 3 retries
deadLetterQueue: make([]DeadLetterItem, 0, 1000), // Dead letter queue
retryDataMap: make(map[int64]*OrderedDataItem), // Retry data mapping
}
// Ensure data directory exists
if err := os.MkdirAll(dataDir, 0755); err != nil {
logger.Error("Failed to create persistence directory: %v", err)
}
return pm
}
// Start starts the persistence manager
// Returns:
// - error: error during startup process
func (pm *PersistenceManager) Start() error {
// Check if already running
pm.runningMutex.RLock()
running := pm.isRunning
pm.runningMutex.RUnlock()
if running {
return fmt.Errorf("ordered persistence manager already running")
}
// Reinitialize channels if they were closed
pm.stopChan = make(chan struct{})
pm.recoveryQueue = make(chan OrderedDataItem, 10000)
// Create initial file
pm.writeMutex.Lock()
if err := pm.createNewFile(); err != nil {
pm.writeMutex.Unlock()
return fmt.Errorf("failed to create initial file: %w", err)
}
pm.writeMutex.Unlock()
// Set running state
pm.runningMutex.Lock()
pm.isRunning = true
pm.runningMutex.Unlock()
// Start timed flush
pm.startFlushTimer()
// Start background processing goroutines
go pm.backgroundProcessor()
go pm.recoveryProcessor()
// Load and recover existing data
if err := pm.LoadAndRecoverData(); err != nil {
logger.Error("Failed to load and recover data: %v", err)
// Don't return error, continue running
}
logger.Info("Ordered persistence manager started successfully, data directory: %s", pm.dataDir)
return nil
}
// Stop stops the persistence manager
// Returns:
// - error: error during stop process
func (pm *PersistenceManager) Stop() error {
// Check if running
pm.runningMutex.RLock()
running := pm.isRunning
pm.runningMutex.RUnlock()
if !running {
return nil
}
// Set stop state
pm.runningMutex.Lock()
pm.isRunning = false
pm.runningMutex.Unlock()
// Close stop channel safely
select {
case <-pm.stopChan:
// Channel already closed
default:
close(pm.stopChan)
}
// Stop timer
pm.writeMutex.Lock()
if pm.flushTimer != nil {
pm.flushTimer.Stop()
}
pm.writeMutex.Unlock()
// Flush remaining data
pm.flushPendingData()
// Close current file with proper synchronization
pm.writeMutex.Lock()
if pm.currentFile != nil {
pm.currentFile.Close()
pm.currentFile = nil
}
pm.writeMutex.Unlock()
// Close recovery queue safely
go func() {
// Drain the recovery queue in a separate goroutine
for {
select {
case <-pm.recoveryQueue:
// Continue draining
default:
// Queue is empty, safe to close
close(pm.recoveryQueue)
return
}
}
}()
// Give some time for the goroutine to drain the queue
time.Sleep(100 * time.Millisecond)
logger.Info("Ordered persistence manager stopped")
return nil
}
// PersistData persists data ensuring timing order (compatibility method)
// Parameters:
// - data: data to persist, must be map[string]interface{} type
//
// Returns:
// - error: error during persistence process
func (pm *PersistenceManager) PersistData(data map[string]interface{}) error {
return pm.PersistDataWithRetryLimit(data, 0)
}
// PersistDataWithRetryLimit persists data with retry limit support
// Parameters:
// - data: data to persist, must be map[string]interface{} type
// - retryCount: current retry count
//
// Returns:
// - error: error during persistence process
func (pm *PersistenceManager) PersistDataWithRetryLimit(data map[string]interface{}, retryCount int) error {
// Check if running
pm.runningMutex.RLock()
running := pm.isRunning
pm.runningMutex.RUnlock()
if !running {
return fmt.Errorf("ordered persistence manager not running")
}
// Assign globally unique sequence number to ensure timing order
sequenceID := atomic.AddInt64(&pm.sequenceCounter, 1)
// Create ordered data item
item := OrderedDataItem{
SequenceID: sequenceID,
Timestamp: time.Now().UnixNano(), // Use nanosecond timestamp
Data: data,
RetryCount: retryCount,
LastRetry: time.Now().UnixNano(),
}
// If retry data, update retry mapping
if retryCount > 0 {
pm.retryMapMutex.Lock()
pm.retryDataMap[sequenceID] = &item
pm.retryMapMutex.Unlock()
atomic.AddInt64(&pm.totalRetried, 1)
}
// Add to pending write queue
pm.pendingMutex.Lock()
pm.pendingData = append(pm.pendingData, item)
pm.pendingMutex.Unlock()
return nil
}
// LoadAndRecoverData loads persisted data and starts ordered recovery
// Returns:
// - error: error during loading process
func (pm *PersistenceManager) LoadAndRecoverData() error {
// 只加载未处理的文件(排除.processed文件
allFiles, err := filepath.Glob(filepath.Join(pm.dataDir, "streamsql_ordered_*.log"))
if err != nil {
return fmt.Errorf("failed to glob files: %w", err)
}
// 过滤掉已处理的文件(.processed后缀的文件
var files []string
for _, file := range allFiles {
if !strings.HasSuffix(file, ".processed") {
files = append(files, file)
}
}
if len(files) == 0 {
logger.Info("No persistence files found for recovery")
return nil
}
// Collect all data items
var allItems []OrderedDataItem
for _, filename := range files {
items, err := pm.loadItemsFromFile(filename)
if err != nil {
logger.Error("Failed to load file %s: %v", filename, err)
continue
}
allItems = append(allItems, items...)
// 加载后直接删除文件
if deleteErr := os.Remove(filename); deleteErr != nil {
logger.Error("Failed to delete file %s: %v", filename, deleteErr)
} else {
logger.Info("File %s processed and deleted", filename)
}
}
// 按序列号排序,确保时序性
sort.Slice(allItems, func(i, j int) bool {
return allItems[i].SequenceID < allItems[j].SequenceID
})
// 更新序列号计数器,确保新数据的序列号不会冲突
if len(allItems) > 0 {
lastSequenceID := allItems[len(allItems)-1].SequenceID
atomic.StoreInt64(&pm.sequenceCounter, lastSequenceID)
}
// 启动恢复模式
pm.recoveryMutex.Lock()
pm.recoveryMode = true
pm.recoveryMutex.Unlock()
// 如果没有数据需要恢复,立即退出恢复模式
if len(allItems) == 0 {
pm.recoveryMutex.Lock()
pm.recoveryMode = false
pm.recoveryMutex.Unlock()
logger.Info("No data to recover, exiting recovery mode")
return nil
}
// 将数据放入恢复队列
for _, item := range allItems {
select {
case pm.recoveryQueue <- item:
// 数据已放入恢复队列
case <-pm.stopChan:
return nil
}
}
logger.Info("Data recovery completed, %d items recovered in order", len(allItems))
// 启动一个goroutine来监控恢复队列当队列为空时退出恢复模式
go func() {
ticker := time.NewTicker(1 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if len(pm.recoveryQueue) == 0 {
pm.recoveryMutex.Lock()
pm.recoveryMode = false
pm.recoveryMutex.Unlock()
return
}
case <-pm.stopChan:
return
}
}
}()
atomic.AddInt64(&pm.totalLoaded, int64(len(allItems)))
logger.Info("Started ordered recovery of %d data items", len(allItems))
return nil
}
// IsInRecoveryMode 检查是否处于恢复模式
// 返回值:
// - bool: 是否处于恢复模式
func (pm *PersistenceManager) IsInRecoveryMode() bool {
pm.recoveryMutex.RLock()
defer pm.recoveryMutex.RUnlock()
return pm.recoveryMode
}
// GetRecoveryData 获取一条恢复数据(非阻塞)
// 返回值:
// - map[string]interface{}: 恢复的数据如果没有数据则返回nil
// - bool: 是否成功获取到数据
func (pm *PersistenceManager) GetRecoveryData() (map[string]interface{}, bool) {
select {
case item := <-pm.recoveryQueue:
atomic.AddInt64(&pm.totalRecovered, 1)
// 检查队列是否为空,如果为空则退出恢复模式
if len(pm.recoveryQueue) == 0 {
pm.recoveryMutex.Lock()
pm.recoveryMode = false
pm.recoveryMutex.Unlock()
}
return item.Data, true
default:
// 队列为空,退出恢复模式
pm.recoveryMutex.Lock()
pm.recoveryMode = false
pm.recoveryMutex.Unlock()
return nil, false
}
}
// GetStats 获取持久化统计信息
// 返回值:
// - map[string]interface{}: 统计信息映射
func (pm *PersistenceManager) GetStats() map[string]interface{} {
pm.pendingMutex.Lock()
pendingCount := len(pm.pendingData)
pm.pendingMutex.Unlock()
pm.writeMutex.Lock()
currentFileSize := pm.currentSize
fileIndex := pm.fileIndex
totalPersisted := pm.totalPersisted
totalLoaded := pm.totalLoaded
filesCreated := pm.filesCreated
pm.writeMutex.Unlock()
pm.runningMutex.RLock()
running := pm.isRunning
pm.runningMutex.RUnlock()
pm.recoveryMutex.RLock()
recoveryMode := pm.recoveryMode
pm.recoveryMutex.RUnlock()
sequenceCounter := atomic.LoadInt64(&pm.sequenceCounter)
totalRecovered := atomic.LoadInt64(&pm.totalRecovered)
recoveryQueueLen := len(pm.recoveryQueue)
// 获取死信队列和重试统计
pm.deadLetterMutex.Lock()
deadLetterCount := len(pm.deadLetterQueue)
pm.deadLetterMutex.Unlock()
pm.retryMapMutex.RLock()
retryMapCount := len(pm.retryDataMap)
pm.retryMapMutex.RUnlock()
totalDropped := atomic.LoadInt64(&pm.totalDropped)
totalRetried := atomic.LoadInt64(&pm.totalRetried)
return map[string]interface{}{
"running": running,
"recovery_mode": recoveryMode,
"data_dir": pm.dataDir,
"pending_count": pendingCount,
"current_file_size": currentFileSize,
"file_index": fileIndex,
"max_file_size": pm.maxFileSize,
"flush_interval": pm.flushInterval.String(),
"total_persisted": totalPersisted,
"total_loaded": totalLoaded,
"total_recovered": totalRecovered,
"files_created": filesCreated,
"sequence_counter": sequenceCounter,
"recovery_queue_len": recoveryQueueLen,
"max_retry_count": pm.maxRetryCount,
"dead_letter_count": deadLetterCount,
"retry_map_count": retryMapCount,
"total_dropped": totalDropped,
"total_retried": totalRetried,
}
}
// createNewFile 创建新的持久化文件
// 返回值:
// - error: 创建过程中的错误
func (pm *PersistenceManager) createNewFile() error {
// 关闭当前文件
if pm.currentFile != nil {
pm.currentFile.Close()
}
// 生成新文件名使用ordered前缀区分
filename := fmt.Sprintf("streamsql_ordered_%d_%d.log",
time.Now().Unix(), pm.fileIndex)
filepath := filepath.Join(pm.dataDir, filename)
// 创建新文件
file, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return fmt.Errorf("failed to create file %s: %w", filepath, err)
}
pm.currentFile = file
pm.currentSize = 0
pm.fileIndex++
pm.filesCreated++
return nil
}
// writeItemToFile 将有序数据项写入文件
// 注意此方法应该在writeMutex锁保护下调用
// 参数:
// - item: 要写入的有序数据项
//
// 返回值:
// - error: 写入过程中的错误
func (pm *PersistenceManager) writeItemToFile(item OrderedDataItem) error {
if pm.currentFile == nil {
return fmt.Errorf("no current file")
}
// 序列化数据项
jsonData, err := json.Marshal(item)
if err != nil {
return fmt.Errorf("failed to marshal item: %w", err)
}
// 添加换行符
jsonData = append(jsonData, '\n')
// 检查文件大小
if pm.currentSize+int64(len(jsonData)) > pm.maxFileSize {
if err := pm.createNewFile(); err != nil {
return fmt.Errorf("failed to create new file: %w", err)
}
}
// 写入数据
n, err := pm.currentFile.Write(jsonData)
if err != nil {
return fmt.Errorf("failed to write data: %w", err)
}
pm.currentSize += int64(n)
pm.totalPersisted++
return nil
}
// flushPendingData 刷新待写入数据,按序列号排序后写入
func (pm *PersistenceManager) flushPendingData() {
pm.pendingMutex.Lock()
if len(pm.pendingData) == 0 {
pm.pendingMutex.Unlock()
return
}
// 复制数据并按序列号排序
dataToWrite := make([]OrderedDataItem, len(pm.pendingData))
copy(dataToWrite, pm.pendingData)
pm.pendingData = pm.pendingData[:0] // 清空切片
pm.pendingMutex.Unlock()
// 按序列号排序,确保写入顺序正确
sort.Slice(dataToWrite, func(i, j int) bool {
return dataToWrite[i].SequenceID < dataToWrite[j].SequenceID
})
pm.writeMutex.Lock()
defer pm.writeMutex.Unlock()
// 按序写入数据
for _, item := range dataToWrite {
if err := pm.writeItemToFile(item); err != nil {
logger.Error("Failed to write persistence item: %v", err)
}
}
// 同步到磁盘
if pm.currentFile != nil {
_ = pm.currentFile.Sync()
}
}
// startFlushTimer 启动刷新定时器
func (pm *PersistenceManager) startFlushTimer() {
pm.writeMutex.Lock()
pm.flushTimer = time.AfterFunc(pm.flushInterval, func() {
// 安全地检查运行状态
pm.runningMutex.RLock()
running := pm.isRunning
pm.runningMutex.RUnlock()
if running {
pm.flushPendingData()
pm.startFlushTimer() // 重新启动定时器
}
})
pm.writeMutex.Unlock()
}
// backgroundProcessor 后台处理协程
func (pm *PersistenceManager) backgroundProcessor() {
ticker := time.NewTicker(500 * time.Millisecond) // 更频繁的检查
defer ticker.Stop()
for {
select {
case <-ticker.C:
// 定期检查并处理
pm.pendingMutex.Lock()
pendingCount := len(pm.pendingData)
pm.pendingMutex.Unlock()
// 如果有待写入数据,立即刷新以保证时序性
if pendingCount > 50 { // 降低阈值,更快响应
pm.flushPendingData()
}
case <-pm.stopChan:
return
}
}
}
// recoveryProcessor 恢复处理协程
func (pm *PersistenceManager) recoveryProcessor() {
// 这个协程主要用于监控恢复状态实际恢复数据由GetRecoveryData方法提供
ticker := time.NewTicker(100 * time.Millisecond) // 更频繁地检查
defer ticker.Stop()
for {
select {
case <-ticker.C:
pm.recoveryMutex.RLock()
recoveryMode := pm.recoveryMode
pm.recoveryMutex.RUnlock()
if recoveryMode {
queueLen := len(pm.recoveryQueue)
if queueLen == 0 {
// 队列为空,退出恢复模式
pm.recoveryMutex.Lock()
pm.recoveryMode = false
pm.recoveryMutex.Unlock()
logger.Debug("Recovery queue empty, exiting recovery mode")
return
} else {
logger.Debug("Recovery in progress, %d items remaining in queue", queueLen)
}
} else {
// 不在恢复模式,退出协程
return
}
case <-pm.stopChan:
return
}
}
}
// loadItemsFromFile 从文件加载有序数据项
// 参数:
// - filename: 要加载的文件名
//
// 返回值:
// - []OrderedDataItem: 加载的有序数据项列表
// - error: 加载过程中的错误
func (pm *PersistenceManager) loadItemsFromFile(filename string) ([]OrderedDataItem, error) {
file, err := os.Open(filename)
if err != nil {
return nil, fmt.Errorf("failed to open file %s: %w", filename, err)
}
defer file.Close()
var items []OrderedDataItem
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
var item OrderedDataItem
if err := json.Unmarshal([]byte(line), &item); err != nil {
logger.Error("Failed to parse data line: %v", err)
continue
}
items = append(items, item)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to scan file: %w", err)
}
return items, nil
}
// RetryFailedData 重试失败的数据
// 参数:
// - sequenceID: 要重试的数据序列号
// - reason: 失败原因
//
// 返回值:
// - error: 重试过程中的错误
func (pm *PersistenceManager) RetryFailedData(sequenceID int64, reason string) error {
pm.retryMapMutex.RLock()
item, exists := pm.retryDataMap[sequenceID]
pm.retryMapMutex.RUnlock()
if !exists {
return fmt.Errorf("data with sequence ID %d not found in retry map", sequenceID)
}
// 检查重试次数
if item.RetryCount >= pm.maxRetryCount {
// 移动到死信队列
return pm.moveToDeadLetterQueue(*item, reason)
}
// 增加重试次数并重新持久化
return pm.PersistDataWithRetryLimit(item.Data, item.RetryCount+1)
}
// moveToDeadLetterQueue 将数据移动到死信队列
// 参数:
// - item: 要移动的数据项
// - reason: 失败原因
//
// 返回值:
// - error: 移动过程中的错误
func (pm *PersistenceManager) moveToDeadLetterQueue(item OrderedDataItem, reason string) error {
deadLetterItem := DeadLetterItem{
OriginalData: item,
FailureTime: time.Now().UnixNano(),
Reason: reason,
}
pm.deadLetterMutex.Lock()
pm.deadLetterQueue = append(pm.deadLetterQueue, deadLetterItem)
pm.deadLetterMutex.Unlock()
// 从重试映射中移除
pm.retryMapMutex.Lock()
delete(pm.retryDataMap, item.SequenceID)
pm.retryMapMutex.Unlock()
atomic.AddInt64(&pm.totalDropped, 1)
logger.Warn("Data moved to dead letter queue, sequence ID: %d, reason: %s", item.SequenceID, reason)
return nil
}
// GetDeadLetterQueue 获取死信队列数据
// 返回值:
// - []DeadLetterItem: 死信队列中的所有数据
func (pm *PersistenceManager) GetDeadLetterQueue() []DeadLetterItem {
pm.deadLetterMutex.Lock()
defer pm.deadLetterMutex.Unlock()
// 返回副本以避免并发问题
result := make([]DeadLetterItem, len(pm.deadLetterQueue))
copy(result, pm.deadLetterQueue)
return result
}
// ClearDeadLetterQueue 清空死信队列
// 返回值:
// - int: 清空的数据项数量
func (pm *PersistenceManager) ClearDeadLetterQueue() int {
pm.deadLetterMutex.Lock()
defer pm.deadLetterMutex.Unlock()
count := len(pm.deadLetterQueue)
pm.deadLetterQueue = pm.deadLetterQueue[:0]
return count
}
// SetMaxRetryCount 设置最大重试次数
// 参数:
// - maxRetryCount: 最大重试次数
func (pm *PersistenceManager) SetMaxRetryCount(maxRetryCount int) {
pm.maxRetryCount = maxRetryCount
}
// ShouldRetryRecoveredData 检查恢复数据是否应该重试
// 参数:
// - data: 恢复的数据
//
// 返回值:
// - bool: 是否应该重试
func (pm *PersistenceManager) ShouldRetryRecoveredData(data map[string]interface{}) bool {
// 检查数据中的重试次数支持retry和_retry_count字段
if retryCountFloat, exists := data["retry"]; exists {
if retryCount, ok := retryCountFloat.(float64); ok {
if int(retryCount) >= pm.maxRetryCount {
return false
}
}
if retryCount, ok := retryCountFloat.(int); ok {
if retryCount >= pm.maxRetryCount {
return false
}
}
}
// 兼容性检查也检查_retry_count字段
if retryCountFloat, exists := data["_retry_count"]; exists {
if retryCount, ok := retryCountFloat.(float64); ok {
if int(retryCount) >= pm.maxRetryCount {
return false
}
}
if retryCount, ok := retryCountFloat.(int); ok {
if retryCount >= pm.maxRetryCount {
return false
}
}
}
// 尝试从数据中获取序列号和重试次数
if sequenceIDFloat, exists := data["_sequence_id"]; exists {
if sequenceID, ok := sequenceIDFloat.(float64); ok {
pm.retryMapMutex.RLock()
item, exists := pm.retryDataMap[int64(sequenceID)]
pm.retryMapMutex.RUnlock()
if exists && item.RetryCount >= pm.maxRetryCount {
return false
}
}
}
// 如果没有找到重试信息,允许重试(可能是第一次失败)
return true
}
// MoveToDeadLetterQueue 将数据移动到死信队列(公共方法)
// 参数:
// - data: 要移动的数据
func (pm *PersistenceManager) MoveToDeadLetterQueue(data map[string]interface{}) {
// 创建一个临时的OrderedDataItem
item := OrderedDataItem{
SequenceID: atomic.AddInt64(&pm.sequenceCounter, 1),
Timestamp: time.Now().UnixNano(),
Data: data,
RetryCount: pm.maxRetryCount + 1, // 标记为超过重试限制
LastRetry: time.Now().UnixNano(),
}
pm.moveToDeadLetterQueue(item, "exceeded retry limit during recovery")
}
// RePersistRecoveredData 重新持久化恢复数据(增加重试计数)
// 参数:
// - data: 要重新持久化的数据
//
// 返回值:
// - error: 重新持久化过程中的错误
func (pm *PersistenceManager) RePersistRecoveredData(data map[string]interface{}) error {
// 尝试从数据中获取序列号和重试次数
retryCount := 1 // 默认重试次数
if sequenceIDFloat, exists := data["_sequence_id"]; exists {
if sequenceID, ok := sequenceIDFloat.(float64); ok {
pm.retryMapMutex.RLock()
item, exists := pm.retryDataMap[int64(sequenceID)]
pm.retryMapMutex.RUnlock()
if exists {
retryCount = item.RetryCount + 1
}
}
}
// 在数据中添加序列号信息以便后续跟踪
data["_sequence_id"] = atomic.LoadInt64(&pm.sequenceCounter)
data["_retry_count"] = retryCount
data["_last_retry"] = time.Now().UnixNano()
return pm.PersistDataWithRetryLimit(data, retryCount)
}