mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: #46540 Empty timetick is just used to sync up the time clock between different component in milvus. So empty timetick can be ignored if we achieve the lsn/mvcc semantic for timetick. Currently, some components need the empty timetick to trigger some operation, such as flush/tsafe. So we only slow down the empty time tick for 5 seconds. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant: with LSN/MVCC semantics consumers only need (a) the first timetick that advances the latest-required-MVCC to unblock MVCC-dependent waits and (b) occasional periodic timeticks (~≤5s) for clock synchronization—therefore frequent non-persisted empty timeticks can be suppressed without breaking MVCC correctness. - Logic removed/simplified: per-message dispatch/consumption of frequent non-persisted empty timeticks is suppressed — an MVCC-aware filter emptyTimeTickSlowdowner (internal/util/pipeline/consuming_slowdown.go) short-circuits frequent empty timeticks in the stream pipeline (internal/util/pipeline/stream_pipeline.go), and the WAL flusher rate-limits non-persisted timetick dispatch to one emission per ~5s (internal/streamingnode/server/flusher/flusherimpl/wal_flusher.go); the delegator exposes GetLatestRequiredMVCCTimeTick to drive the filter (internal/querynodev2/delegator/delegator.go). - Why this does NOT introduce data loss or regressions: the slowdowner always refreshes latestRequiredMVCCTimeTick via GetLatestRequiredMVCCTimeTick and (1) never filters timeticks < latestRequiredMVCCTimeTick (so existing tsafe/flush waits stay unblocked) and (2) always lets the first timetick ≥ latestRequiredMVCCTimeTick pass to notify pending MVCC waits; separately, WAL flusher suppression applies only to non-persisted timeticks and still emits when the 5s threshold elapses, preserving periodic clock-sync messages used by flush/tsafe. - Enhancement summary (where it takes effect): adds GetLatestRequiredMVCCTimeTick on ShardDelegator and LastestMVCCTimeTickGetter, wires emptyTimeTickSlowdowner into NewPipelineWithStream (internal/util/pipeline), and adds WAL flusher rate-limiting + metrics (internal/streamingnode/server/flusher/flusherimpl/wal_flusher.go, pkg/metrics) to reduce CPU/dispatch overhead while keeping MVCC correctness and periodic synchronization. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: chyezh <chyezh@outlook.com>
170 lines
5.7 KiB
Go
170 lines
5.7 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package pipeline
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"github.com/samber/lo"
|
|
"github.com/stretchr/testify/mock"
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/mocks/util/mock_segcore"
|
|
"github.com/milvus-io/milvus/internal/querynodev2/delegator"
|
|
"github.com/milvus-io/milvus/internal/querynodev2/segments"
|
|
"github.com/milvus-io/milvus/pkg/v2/mq/msgdispatcher"
|
|
"github.com/milvus-io/milvus/pkg/v2/mq/msgstream"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
|
|
)
|
|
|
|
type PipelineTestSuite struct {
|
|
suite.Suite
|
|
// datas
|
|
collectionName string
|
|
collectionID int64
|
|
partitionIDs []int64
|
|
channel string
|
|
insertSegmentIDs []int64
|
|
deletePKs []int64
|
|
|
|
// mocks
|
|
segmentManager *segments.MockSegmentManager
|
|
collectionManager *segments.MockCollectionManager
|
|
delegator *delegator.MockShardDelegator
|
|
msgDispatcher *msgdispatcher.MockClient
|
|
msgChan chan *msgstream.MsgPack
|
|
}
|
|
|
|
func (suite *PipelineTestSuite) SetupSuite() {
|
|
suite.collectionID = 111
|
|
suite.collectionName = "test-collection"
|
|
suite.channel = "test-channel"
|
|
suite.partitionIDs = []int64{11, 22}
|
|
suite.insertSegmentIDs = []int64{1, 2, 3}
|
|
suite.deletePKs = []int64{1, 2, 3}
|
|
suite.msgChan = make(chan *msgstream.MsgPack, 1)
|
|
}
|
|
|
|
func (suite *PipelineTestSuite) buildMsgPack(schema *schemapb.CollectionSchema) *msgstream.MsgPack {
|
|
msgPack := &msgstream.MsgPack{
|
|
BeginTs: 0,
|
|
EndTs: 1,
|
|
Msgs: []msgstream.TsMsg{},
|
|
}
|
|
|
|
for id, segmentID := range suite.insertSegmentIDs {
|
|
insertMsg := buildInsertMsg(suite.collectionID, suite.partitionIDs[id%len(suite.partitionIDs)], segmentID, suite.channel, 1)
|
|
insertMsg.FieldsData = genFiledDataWithSchema(schema, 1)
|
|
msgPack.Msgs = append(msgPack.Msgs, insertMsg)
|
|
}
|
|
|
|
for id, pk := range suite.deletePKs {
|
|
deleteMsg := buildDeleteMsg(suite.collectionID, suite.partitionIDs[id%len(suite.partitionIDs)], suite.channel, 1)
|
|
deleteMsg.PrimaryKeys = genDeletePK(pk)
|
|
msgPack.Msgs = append(msgPack.Msgs, deleteMsg)
|
|
}
|
|
return msgPack
|
|
}
|
|
|
|
func (suite *PipelineTestSuite) SetupTest() {
|
|
paramtable.Init()
|
|
// init mock
|
|
// init manager
|
|
suite.collectionManager = segments.NewMockCollectionManager(suite.T())
|
|
suite.segmentManager = segments.NewMockSegmentManager(suite.T())
|
|
// init delegator
|
|
suite.delegator = delegator.NewMockShardDelegator(suite.T())
|
|
// init mq dispatcher
|
|
suite.msgDispatcher = msgdispatcher.NewMockClient(suite.T())
|
|
}
|
|
|
|
func (suite *PipelineTestSuite) TestBasic() {
|
|
// init mock
|
|
// mock collection manager
|
|
schema := mock_segcore.GenTestCollectionSchema(suite.collectionName, schemapb.DataType_Int64, true)
|
|
collection, err := segments.NewCollection(suite.collectionID, schema, mock_segcore.GenTestIndexMeta(suite.collectionID, schema), &querypb.LoadMetaInfo{
|
|
LoadType: querypb.LoadType_LoadCollection,
|
|
})
|
|
suite.Require().NoError(err)
|
|
suite.collectionManager.EXPECT().Get(suite.collectionID).Return(collection)
|
|
|
|
// mock mq factory
|
|
suite.msgDispatcher.EXPECT().Register(mock.Anything, mock.Anything).Return(suite.msgChan, nil)
|
|
suite.msgDispatcher.EXPECT().Deregister(suite.channel)
|
|
|
|
// mock delegator
|
|
suite.delegator.EXPECT().AddExcludedSegments(mock.Anything).Maybe()
|
|
suite.delegator.EXPECT().VerifyExcludedSegments(mock.Anything, mock.Anything).Return(true).Maybe()
|
|
suite.delegator.EXPECT().TryCleanExcludedSegments(mock.Anything).Maybe()
|
|
|
|
suite.delegator.EXPECT().ProcessInsert(mock.Anything).Run(
|
|
func(insertRecords map[int64]*delegator.InsertData) {
|
|
for segmentID := range insertRecords {
|
|
suite.True(lo.Contains(suite.insertSegmentIDs, segmentID))
|
|
}
|
|
})
|
|
|
|
suite.delegator.EXPECT().ProcessDelete(mock.Anything, mock.Anything).Run(
|
|
func(deleteData []*delegator.DeleteData, ts uint64) {
|
|
for _, data := range deleteData {
|
|
for _, pk := range data.PrimaryKeys {
|
|
suite.True(lo.Contains(suite.deletePKs, pk.GetValue().(int64)))
|
|
}
|
|
}
|
|
})
|
|
|
|
// build input msg ahead of time to set up expectations
|
|
in := suite.buildMsgPack(schema)
|
|
|
|
// use a channel to signal when UpdateTSafe is called (last operation in pipeline)
|
|
done := make(chan struct{})
|
|
suite.delegator.EXPECT().UpdateTSafe(in.EndTs).Run(func(ts uint64) {
|
|
close(done)
|
|
}).Return()
|
|
suite.delegator.EXPECT().GetLatestRequiredMVCCTimeTick().Return(0).Maybe()
|
|
|
|
// build pipleine
|
|
manager := &segments.Manager{
|
|
Collection: suite.collectionManager,
|
|
Segment: suite.segmentManager,
|
|
}
|
|
pipelineObj, err := NewPipeLine(collection, suite.channel, manager, suite.msgDispatcher, suite.delegator)
|
|
suite.NoError(err)
|
|
|
|
// Init Consumer
|
|
err = pipelineObj.ConsumeMsgStream(context.Background(), &msgpb.MsgPosition{})
|
|
suite.NoError(err)
|
|
|
|
err = pipelineObj.Start()
|
|
suite.NoError(err)
|
|
defer pipelineObj.Close()
|
|
|
|
// send message to pipeline
|
|
suite.msgChan <- in
|
|
|
|
// wait for pipeline to process the message
|
|
<-done
|
|
}
|
|
|
|
func TestQueryNodePipeline(t *testing.T) {
|
|
suite.Run(t, new(PipelineTestSuite))
|
|
}
|