enhance: Maintain compatibility with the legacy FlushAll (#46564)

issue: https://github.com/milvus-io/milvus/issues/45919

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: FlushAll verification must accept both per-channel
FlushAllTss (new schema) and the legacy single FlushAllTs;
GetFlushAllState chooses the verification path based on which field is
present and treats a channel as flushed only if its channel checkpoint
timestamp >= the applicable threshold (per-channel timestamp or legacy
FlushAllTs).
- Logic removed/simplified: The previous mixed/ambiguous checks were
split into two focused
routines—verifyFlushAllStateByChannelFlushAllTs(logger, channel,
flushAllTss) and verifyFlushAllStateByLegacyFlushAllTs(logger, channel,
flushAllTs)—and GetFlushAllState now selects one path. This centralizes
compatibility logic, eliminates interleaved/duplicated checks, and
retains the outer-loop short-circuiting on the first unflushed channel.
- Why this does NOT cause data loss or regressions: Changes only affect
read-only verification paths (GetFlushAllState/GetFlushState) that
compare in-memory channel checkpoints (meta.GetChannelCheckpoint) to
provided thresholds; no writes to checkpoints or persisted state occur
and FlushAll enqueue/wait behavior is unchanged. Unit tests were added
to cover legacy FlushAllTs behavior and the new FlushAllMsgs→FlushAllTs
extraction, exercising both code paths.
- Enhancement scope and location: Adds backward-compatible support and
concrete FlushAllTs extraction from streaming FlushAllMsgs in Proxy
(internal/proxy/task_flush_all_streaming.go) and compatibility verifiers
in DataCoord (internal/datacoord/services.go), plus corresponding tests
(internal/datacoord/services_test.go, internal/proxy/*_test.go).
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
This commit is contained in:
yihao.dai 2025-12-26 18:59:20 +08:00 committed by GitHub
parent 8d12bfb436
commit 512884524b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 183 additions and 24 deletions

View File

@ -1550,35 +1550,70 @@ OUTER:
return resp, nil
}
for _, channel := range describeColRsp.GetVirtualChannelNames() {
channelCP := s.meta.GetChannelCheckpoint(channel)
pchannel := funcutil.ToPhysicalChannel(channel)
flushAllTs, ok := req.GetFlushAllTss()[pchannel]
if !ok || flushAllTs == 0 {
log.Warn("FlushAllTs not found for pchannel", zap.String("pchannel", pchannel), zap.Uint64("flushAllTs", flushAllTs))
resp.Status = merr.Status(merr.WrapErrParameterInvalidMsg("FlushAllTs not found for pchannel %s", pchannel))
if len(req.GetFlushAllTss()) > 0 {
ok, err := s.verifyFlushAllStateByChannelFlushAllTs(log, channel, req.GetFlushAllTss())
if err != nil {
resp.Status = merr.Status(err)
return resp, nil
}
if !ok {
allFlushed = false
break OUTER
}
} else if req.GetFlushAllTs() != 0 {
// For compatibility, if deprecated FlushAllTs is provided, use it to verify the flush state.
if !s.verifyFlushAllStateByLegacyFlushAllTs(log, channel, req.GetFlushAllTs()) {
allFlushed = false
break OUTER
}
} else {
resp.Status = merr.Status(merr.WrapErrParameterInvalidMsg("FlushAllTss or FlushAllTs is required"))
return resp, nil
}
if channelCP == nil || channelCP.GetTimestamp() < flushAllTs {
allFlushed = false
log.RatedInfo(10, "channel unflushed",
zap.String("vchannel", channel),
zap.Uint64("flushAllTs", flushAllTs),
zap.Uint64("channelCP", channelCP.GetTimestamp()),
)
break OUTER
}
}
}
}
if allFlushed {
log.Info("GetFlushAllState all flushed", zap.Any("flushAllTss", req.GetFlushAllTss()))
log.Info("GetFlushAllState all flushed", zap.Any("flushAllTss", req.GetFlushAllTss()), zap.Uint64("FlushAllTs", req.GetFlushAllTs()))
}
resp.Flushed = allFlushed
return resp, nil
}
func (s *Server) verifyFlushAllStateByChannelFlushAllTs(logger *log.MLogger, channel string, flushAllTss map[string]uint64) (bool, error) {
channelCP := s.meta.GetChannelCheckpoint(channel)
pchannel := funcutil.ToPhysicalChannel(channel)
flushAllTs, ok := flushAllTss[pchannel]
if !ok || flushAllTs == 0 {
logger.Warn("FlushAllTs not found for pchannel", zap.String("pchannel", pchannel), zap.Uint64("flushAllTs", flushAllTs))
return false, merr.WrapErrParameterInvalidMsg("FlushAllTs not found for pchannel %s", pchannel)
}
if channelCP == nil || channelCP.GetTimestamp() < flushAllTs {
logger.RatedInfo(10, "channel unflushed",
zap.String("vchannel", channel),
zap.Uint64("flushAllTs", flushAllTs),
zap.Uint64("channelCP", channelCP.GetTimestamp()),
)
return false, nil
}
return true, nil
}
func (s *Server) verifyFlushAllStateByLegacyFlushAllTs(logger *log.MLogger, channel string, flushAllTs uint64) bool {
channelCP := s.meta.GetChannelCheckpoint(channel)
if channelCP == nil || channelCP.GetTimestamp() < flushAllTs {
logger.RatedInfo(10, "channel unflushed",
zap.String("vchannel", channel),
zap.Uint64("flushAllTs", flushAllTs),
zap.Uint64("channelCP", channelCP.GetTimestamp()),
)
return false
}
return true
}
// Deprecated
// UpdateSegmentStatistics updates a segment's stats.
func (s *Server) UpdateSegmentStatistics(ctx context.Context, req *datapb.UpdateSegmentStatisticsRequest) (*commonpb.Status, error) {

View File

@ -2334,6 +2334,82 @@ func TestServer_GetFlushAllState(t *testing.T) {
assert.NoError(t, merr.CheckRPCCall(resp, err))
assert.False(t, resp.GetFlushed())
})
t.Run("test legacy FlushAllTs provided and flushed", func(t *testing.T) {
server := createTestGetFlushAllStateServer()
// Mock ListDatabases
mockListDatabases := mockey.Mock(mockey.GetMethod(server.broker, "ListDatabases")).Return(&milvuspb.ListDatabasesResponse{
Status: merr.Success(),
DbNames: []string{"test-db"},
}, nil).Build()
defer mockListDatabases.UnPatch()
// Mock ShowCollections
mockShowCollections := mockey.Mock(mockey.GetMethod(server.broker, "ShowCollections")).Return(&milvuspb.ShowCollectionsResponse{
Status: merr.Success(),
CollectionIds: []int64{100},
CollectionNames: []string{"collection1"},
}, nil).Build()
defer mockShowCollections.UnPatch()
// Mock DescribeCollectionInternal
mockDescribeCollection := mockey.Mock(mockey.GetMethod(server.broker, "DescribeCollectionInternal")).Return(&milvuspb.DescribeCollectionResponse{
Status: merr.Success(),
VirtualChannelNames: []string{"channel1"},
}, nil).Build()
defer mockDescribeCollection.UnPatch()
// Setup channel checkpoint with timestamp >= deprecated FlushAllTs
server.meta.channelCPs.checkpoints["channel1"] = &msgpb.MsgPosition{Timestamp: 15000}
req := &milvuspb.GetFlushAllStateRequest{
FlushAllTs: 15000, // deprecated field
}
resp, err := server.GetFlushAllState(context.Background(), req)
assert.NoError(t, merr.CheckRPCCall(resp, err))
assert.True(t, resp.GetFlushed())
})
t.Run("test legacy FlushAllTs provided and not flushed", func(t *testing.T) {
server := createTestGetFlushAllStateServer()
// Mock ListDatabases
mockListDatabases := mockey.Mock(mockey.GetMethod(server.broker, "ListDatabases")).Return(&milvuspb.ListDatabasesResponse{
Status: merr.Success(),
DbNames: []string{"test-db"},
}, nil).Build()
defer mockListDatabases.UnPatch()
// Mock ShowCollections
mockShowCollections := mockey.Mock(mockey.GetMethod(server.broker, "ShowCollections")).Return(&milvuspb.ShowCollectionsResponse{
Status: merr.Success(),
CollectionIds: []int64{100},
CollectionNames: []string{"collection1"},
}, nil).Build()
defer mockShowCollections.UnPatch()
// Mock DescribeCollectionInternal
mockDescribeCollection := mockey.Mock(mockey.GetMethod(server.broker, "DescribeCollectionInternal")).Return(&milvuspb.DescribeCollectionResponse{
Status: merr.Success(),
VirtualChannelNames: []string{"channel1"},
}, nil).Build()
defer mockDescribeCollection.UnPatch()
// Setup channel checkpoint with timestamp < deprecated FlushAllTs
server.meta.channelCPs.checkpoints["channel1"] = &msgpb.MsgPosition{Timestamp: 10000}
req := &milvuspb.GetFlushAllStateRequest{
FlushAllTs: 15000, // deprecated field
}
resp, err := server.GetFlushAllState(context.Background(), req)
assert.NoError(t, merr.CheckRPCCall(resp, err))
assert.False(t, resp.GetFlushed())
})
}
func getWatchKV(t *testing.T) kv.WatchKV {

View File

@ -4183,16 +4183,12 @@ func (node *Proxy) FlushAll(ctx context.Context, request *milvuspb.FlushAllReque
return resp, nil
}
logger.Debug(rpcEnqueued(method),
zap.Uint64("BeginTs", ft.BeginTs()),
zap.Uint64("EndTs", ft.EndTs()))
logger.Debug(rpcEnqueued(method))
if err := ft.WaitToFinish(); err != nil {
logger.Warn(
rpcFailedToWaitToFinish(method),
zap.Error(err),
zap.Uint64("BeginTs", ft.BeginTs()),
zap.Uint64("EndTs", ft.EndTs()))
zap.Error(err))
resp.Status = merr.Status(err)
return resp, nil
@ -4868,7 +4864,10 @@ func (node *Proxy) GetFlushState(ctx context.Context, req *milvuspb.GetFlushStat
func (node *Proxy) GetFlushAllState(ctx context.Context, req *milvuspb.GetFlushAllStateRequest) (*milvuspb.GetFlushAllStateResponse, error) {
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-GetFlushAllState")
defer sp.End()
log := log.Ctx(ctx).With(zap.Any("FlushAllTss", req.GetFlushAllTss()))
log := log.Ctx(ctx).With(
zap.Any("FlushAllTss", req.GetFlushAllTss()),
zap.Uint64("FlushAllTs", req.GetFlushAllTs()), // for compatibility
)
log.Debug("receive GetFlushAllState request")
var err error

View File

@ -24,6 +24,7 @@ import (
"net/http/httptest"
"testing"
"github.com/apache/pulsar-client-go/pulsar"
"github.com/bytedance/mockey"
"github.com/cockroachdb/errors"
"github.com/gin-gonic/gin"
@ -49,6 +50,8 @@ import (
"github.com/milvus-io/milvus/pkg/v2/proto/proxypb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/proto/rootcoordpb"
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
pulsar2 "github.com/milvus-io/milvus/pkg/v2/streaming/walimpls/impls/pulsar"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/ratelimitutil"
@ -494,10 +497,24 @@ func TestProxy_FlushAll_Success(t *testing.T) {
node := createTestProxy()
defer node.sched.Close()
messageID := pulsar2.NewPulsarID(pulsar.EarliestMessageID())
msg := message.NewFlushAllMessageBuilderV2().
WithVChannel("test-vchannel").
WithHeader(&message.FlushAllMessageHeader{}).
WithBody(&message.FlushAllMessageBody{}).
MustBuildMutable().WithTimeTick(1000).
WithLastConfirmed(messageID)
milvusMsg := message.ImmutableMessageToMilvusMessage(commonpb.WALName_Pulsar.String(), msg.IntoImmutableMessage(messageID))
mixcoord := &grpcmixcoordclient.Client{}
node.mixCoord = mixcoord
mockey.Mock((*grpcmixcoordclient.Client).FlushAll).To(func(ctx context.Context, req *datapb.FlushAllRequest, opts ...grpc.CallOption) (*datapb.FlushAllResponse, error) {
return &datapb.FlushAllResponse{Status: successStatus}, nil
return &datapb.FlushAllResponse{
Status: successStatus,
FlushAllMsgs: map[string]*commonpb.ImmutableMessage{
"channel1": milvusMsg,
},
}, nil
}).Build()
resp, err := node.FlushAll(context.Background(), &milvuspb.FlushAllRequest{})

View File

@ -20,9 +20,12 @@ import (
"context"
"fmt"
"github.com/samber/lo"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
"github.com/milvus-io/milvus/pkg/v2/util/commonpbutil"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
)
@ -39,5 +42,12 @@ func (t *flushAllTask) Execute(ctx context.Context) error {
FlushAllMsgs: resp.GetFlushAllMsgs(),
ClusterInfo: resp.GetClusterInfo(),
}
// Assign the flush all ts to the result for compatibility.
// Use the max time tick of the flush all messages as the flush all ts.
t.result.FlushAllTs = lo.MaxBy(message.MilvusMessagesToImmutableMessages(lo.Values(resp.GetFlushAllMsgs())), func(a, b message.ImmutableMessage) bool {
return a.TimeTick() > b.TimeTick()
}).TimeTick()
return nil
}

View File

@ -20,12 +20,16 @@ import (
"context"
"testing"
"github.com/apache/pulsar-client-go/pulsar"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
pulsar2 "github.com/milvus-io/milvus/pkg/v2/streaming/walimpls/impls/pulsar"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
)
@ -39,13 +43,31 @@ func TestFlushAllTask_Success(t *testing.T) {
mixCoord: mixCoord,
}
messageID := pulsar2.NewPulsarID(pulsar.EarliestMessageID())
msg := message.NewFlushAllMessageBuilderV2().
WithVChannel("test-vchannel").
WithHeader(&message.FlushAllMessageHeader{}).
WithBody(&message.FlushAllMessageBody{}).
MustBuildMutable().WithTimeTick(1000).
WithLastConfirmed(messageID)
milvusMsg := message.ImmutableMessageToMilvusMessage(commonpb.WALName_Pulsar.String(), msg.IntoImmutableMessage(messageID))
mixCoord.EXPECT().FlushAll(mock.Anything, mock.Anything).Return(&datapb.FlushAllResponse{
Status: merr.Success(),
FlushAllMsgs: map[string]*commonpb.ImmutableMessage{
"channel1": milvusMsg,
},
ClusterInfo: &milvuspb.ClusterInfo{
ClusterId: "cluster1",
},
}, nil)
err := task.Execute(ctx)
assert.NoError(t, err)
assert.NotNil(t, task.result)
assert.True(t, len(task.result.FlushAllMsgs) > 0)
assert.True(t, task.result.FlushAllTs > 0)
assert.NotNil(t, task.result.ClusterInfo)
}
func TestFlushAllTask_Failed(t *testing.T) {