fix: msg dispatcher lost data at streaming service (#42670)

issue: #41570

Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
Zhen Ye 2025-06-13 11:54:36 +08:00 committed by GitHub
parent 469677cb81
commit ca48603f35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 49 additions and 31 deletions

View File

@ -487,7 +487,7 @@ func (node *QueryNode) Init() error {
node.loader = segments.NewLoader(node.ctx, node.manager, node.chunkManager) node.loader = segments.NewLoader(node.ctx, node.manager, node.chunkManager)
node.manager.SetLoader(node.loader) node.manager.SetLoader(node.loader)
if streamingutil.IsStreamingServiceEnabled() { if streamingutil.IsStreamingServiceEnabled() {
node.dispClient = msgdispatcher.NewClient(streaming.NewDelegatorMsgstreamFactory(), typeutil.QueryNodeRole, node.GetNodeID()) node.dispClient = msgdispatcher.NewClientWithIncludeSkipWhenSplit(streaming.NewDelegatorMsgstreamFactory(), typeutil.QueryNodeRole, node.GetNodeID())
} else { } else {
node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, node.GetNodeID()) node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, node.GetNodeID())
} }

View File

@ -66,6 +66,13 @@ type client struct {
managers *typeutil.ConcurrentMap[string, DispatcherManager] managers *typeutil.ConcurrentMap[string, DispatcherManager]
managerMut *lock.KeyLock[string] managerMut *lock.KeyLock[string]
factory msgstream.Factory factory msgstream.Factory
includeSkipWhenSplit bool
}
func NewClientWithIncludeSkipWhenSplit(factory msgstream.Factory, role string, nodeID int64) Client {
c := NewClient(factory, role, nodeID)
c.(*client).includeSkipWhenSplit = true
return c
} }
func NewClient(factory msgstream.Factory, role string, nodeID int64) Client { func NewClient(factory msgstream.Factory, role string, nodeID int64) Client {
@ -75,6 +82,7 @@ func NewClient(factory msgstream.Factory, role string, nodeID int64) Client {
factory: factory, factory: factory,
managers: typeutil.NewConcurrentMap[string, DispatcherManager](), managers: typeutil.NewConcurrentMap[string, DispatcherManager](),
managerMut: lock.NewKeyLock[string](), managerMut: lock.NewKeyLock[string](),
includeSkipWhenSplit: false,
} }
} }
@ -91,7 +99,7 @@ func (c *client) Register(ctx context.Context, streamConfig *StreamConfig) (<-ch
var manager DispatcherManager var manager DispatcherManager
manager, ok := c.managers.Get(pchannel) manager, ok := c.managers.Get(pchannel)
if !ok { if !ok {
manager = NewDispatcherManager(pchannel, c.role, c.nodeID, c.factory) manager = NewDispatcherManager(pchannel, c.role, c.nodeID, c.factory, c.includeSkipWhenSplit)
c.managers.Insert(pchannel, manager) c.managers.Insert(pchannel, manager)
go manager.Run() go manager.Run()
} }

View File

@ -78,6 +78,8 @@ type Dispatcher struct {
targets *typeutil.ConcurrentMap[string, *target] targets *typeutil.ConcurrentMap[string, *target]
stream msgstream.MsgStream stream msgstream.MsgStream
includeSkipWhenSplit bool
} }
func NewDispatcher( func NewDispatcher(
@ -89,6 +91,7 @@ func NewDispatcher(
subPos SubPos, subPos SubPos,
includeCurrentMsg bool, includeCurrentMsg bool,
pullbackEndTs typeutil.Timestamp, pullbackEndTs typeutil.Timestamp,
includeSkipWhenSplit bool,
) (*Dispatcher, error) { ) (*Dispatcher, error) {
subName := fmt.Sprintf("%s-%d-%d", pchannel, id, time.Now().UnixNano()) subName := fmt.Sprintf("%s-%d-%d", pchannel, id, time.Now().UnixNano())
@ -252,7 +255,11 @@ func (d *Dispatcher) work() {
isReplicateChannel := strings.Contains(vchannel, paramtable.Get().CommonCfg.ReplicateMsgChannel.GetValue()) isReplicateChannel := strings.Contains(vchannel, paramtable.Get().CommonCfg.ReplicateMsgChannel.GetValue())
// The dispatcher seeks from the oldest target, // The dispatcher seeks from the oldest target,
// so for each target, msg before the target position must be filtered out. // so for each target, msg before the target position must be filtered out.
if p.EndTs <= t.pos.GetTimestamp() && !isReplicateChannel { //
// From 2.6.0, every message has a unique timetick, so we can filter out the msg by < but not <=.
if ((d.includeSkipWhenSplit && p.EndTs < t.pos.GetTimestamp()) ||
(!d.includeSkipWhenSplit && p.EndTs <= t.pos.GetTimestamp())) &&
!isReplicateChannel {
log.Info("skip msg", log.Info("skip msg",
zap.String("vchannel", vchannel), zap.String("vchannel", vchannel),
zap.Int("msgCount", len(p.Msgs)), zap.Int("msgCount", len(p.Msgs)),

View File

@ -36,7 +36,7 @@ func TestDispatcher(t *testing.T) {
ctx := context.Background() ctx := context.Background()
t.Run("test base", func(t *testing.T) { t.Run("test base", func(t *testing.T) {
d, err := NewDispatcher(ctx, newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0", d, err := NewDispatcher(ctx, newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0",
nil, common.SubscriptionPositionEarliest, false, 0) nil, common.SubscriptionPositionEarliest, false, 0, false)
assert.NoError(t, err) assert.NoError(t, err)
assert.NotPanics(t, func() { assert.NotPanics(t, func() {
d.Handle(start) d.Handle(start)
@ -65,7 +65,7 @@ func TestDispatcher(t *testing.T) {
}, },
} }
d, err := NewDispatcher(ctx, factory, time.Now().UnixNano(), "mock_pchannel_0", d, err := NewDispatcher(ctx, factory, time.Now().UnixNano(), "mock_pchannel_0",
nil, common.SubscriptionPositionEarliest, false, 0) nil, common.SubscriptionPositionEarliest, false, 0, false)
assert.Error(t, err) assert.Error(t, err)
assert.Nil(t, d) assert.Nil(t, d)
@ -73,7 +73,7 @@ func TestDispatcher(t *testing.T) {
t.Run("test target", func(t *testing.T) { t.Run("test target", func(t *testing.T) {
d, err := NewDispatcher(ctx, newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0", d, err := NewDispatcher(ctx, newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0",
nil, common.SubscriptionPositionEarliest, false, 0) nil, common.SubscriptionPositionEarliest, false, 0, false)
assert.NoError(t, err) assert.NoError(t, err)
output := make(chan *msgstream.MsgPack, 1024) output := make(chan *msgstream.MsgPack, 1024)
@ -128,7 +128,7 @@ func TestDispatcher(t *testing.T) {
func BenchmarkDispatcher_handle(b *testing.B) { func BenchmarkDispatcher_handle(b *testing.B) {
d, err := NewDispatcher(context.Background(), newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0", d, err := NewDispatcher(context.Background(), newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0",
nil, common.SubscriptionPositionEarliest, false, 0) nil, common.SubscriptionPositionEarliest, false, 0, false)
assert.NoError(b, err) assert.NoError(b, err)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
@ -143,7 +143,7 @@ func BenchmarkDispatcher_handle(b *testing.B) {
func TestGroupMessage(t *testing.T) { func TestGroupMessage(t *testing.T) {
d, err := NewDispatcher(context.Background(), newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0", d, err := NewDispatcher(context.Background(), newMockFactory(), time.Now().UnixNano(), "mock_pchannel_0",
nil, common.SubscriptionPositionEarliest, false, 0) nil, common.SubscriptionPositionEarliest, false, 0, false)
assert.NoError(t, err) assert.NoError(t, err)
d.AddTarget(newTarget(&StreamConfig{VChannel: "mock_pchannel_0_1v0"})) d.AddTarget(newTarget(&StreamConfig{VChannel: "mock_pchannel_0_1v0"}))
d.AddTarget(newTarget(&StreamConfig{ d.AddTarget(newTarget(&StreamConfig{

View File

@ -63,9 +63,11 @@ type dispatcherManager struct {
factory msgstream.Factory factory msgstream.Factory
closeChan chan struct{} closeChan chan struct{}
closeOnce sync.Once closeOnce sync.Once
includeSkipWhenSplit bool
} }
func NewDispatcherManager(pchannel string, role string, nodeID int64, factory msgstream.Factory) DispatcherManager { func NewDispatcherManager(pchannel string, role string, nodeID int64, factory msgstream.Factory, includeSkipWhenSplit bool) DispatcherManager {
log.Info("create new dispatcherManager", zap.String("role", role), log.Info("create new dispatcherManager", zap.String("role", role),
zap.Int64("nodeID", nodeID), zap.String("pchannel", pchannel)) zap.Int64("nodeID", nodeID), zap.String("pchannel", pchannel))
c := &dispatcherManager{ c := &dispatcherManager{
@ -76,6 +78,7 @@ func NewDispatcherManager(pchannel string, role string, nodeID int64, factory ms
deputyDispatchers: make(map[int64]*Dispatcher), deputyDispatchers: make(map[int64]*Dispatcher),
factory: factory, factory: factory,
closeChan: make(chan struct{}), closeChan: make(chan struct{}),
includeSkipWhenSplit: includeSkipWhenSplit,
} }
return c return c
} }
@ -269,7 +272,7 @@ OUTER:
// TODO: add newDispatcher timeout param and init context // TODO: add newDispatcher timeout param and init context
id := c.idAllocator.Inc() id := c.idAllocator.Inc()
d, err := NewDispatcher(context.Background(), c.factory, id, c.pchannel, earliestTarget.pos, earliestTarget.subPos, includeCurrentMsg, latestTarget.pos.GetTimestamp()) d, err := NewDispatcher(context.Background(), c.factory, id, c.pchannel, earliestTarget.pos, earliestTarget.subPos, includeCurrentMsg, latestTarget.pos.GetTimestamp(), c.includeSkipWhenSplit)
if err != nil { if err != nil {
panic(err) panic(err)
} }

View File

@ -46,7 +46,7 @@ func TestManager(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
go produceTimeTick(t, ctx, producer) go produceTimeTick(t, ctx, producer)
c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory) c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory, false)
assert.NotNil(t, c) assert.NotNil(t, c)
go c.Run() go c.Run()
defer c.Close() defer c.Close()
@ -93,7 +93,7 @@ func TestManager(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
go produceTimeTick(t, ctx, producer) go produceTimeTick(t, ctx, producer)
c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory) c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory, false)
assert.NotNil(t, c) assert.NotNil(t, c)
go c.Run() go c.Run()
@ -157,7 +157,7 @@ func TestManager(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
go produceTimeTick(t, ctx, producer) go produceTimeTick(t, ctx, producer)
c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory) c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory, false)
assert.NotNil(t, c) assert.NotNil(t, c)
go c.Run() go c.Run()
@ -202,7 +202,7 @@ func TestManager(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
go produceTimeTick(t, ctx, producer) go produceTimeTick(t, ctx, producer)
c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory) c := NewDispatcherManager(pchannel, typeutil.ProxyRole, 1, factory, false)
go c.Run() go c.Run()
defer c.Close() defer c.Close()