// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package external import ( "context" "testing" "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus/pkg/v2/proto/datapb" "github.com/milvus-io/milvus/pkg/v2/proto/indexpb" ) type UpdateExternalTaskSuite struct { suite.Suite collectionID int64 taskID int64 } func (s *UpdateExternalTaskSuite) SetupSuite() { s.collectionID = 1000 s.taskID = 1 } func (s *UpdateExternalTaskSuite) TestNewUpdateExternalTask() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, ExternalSource: "test_source", ExternalSpec: "test_spec", } task := NewUpdateExternalTask(ctx, cancel, req) s.NotNil(task) s.Equal(s.collectionID, task.req.GetCollectionID()) s.Equal(s.taskID, task.req.GetTaskID()) s.Equal(indexpb.JobState_JobStateInit, task.GetState()) s.Contains(task.Name(), "UpdateExternalTask") } func (s *UpdateExternalTaskSuite) TestTaskLifecycle() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, ExternalSource: "test_source", CurrentSegments: []*datapb.SegmentInfo{ {ID: 1, CollectionID: s.collectionID, NumOfRows: 1000}, {ID: 2, CollectionID: s.collectionID, NumOfRows: 2000}, }, } task := NewUpdateExternalTask(ctx, cancel, req) // Test OnEnqueue err := task.OnEnqueue(ctx) s.NoError(err) // Test PreExecute err = task.PreExecute(ctx) s.NoError(err) // Test Execute err = task.Execute(ctx) s.NoError(err) // Test PostExecute err = task.PostExecute(ctx) s.NoError(err) // Test GetSlot s.Equal(int64(1), task.GetSlot()) } func (s *UpdateExternalTaskSuite) TestPreExecuteWithNilRequest() { ctx, cancel := context.WithCancel(context.Background()) task := &UpdateExternalTask{ ctx: ctx, cancel: cancel, req: nil, } err := task.PreExecute(ctx) s.Error(err) } func (s *UpdateExternalTaskSuite) TestSetAndGetState() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) task.SetState(indexpb.JobState_JobStateInProgress, "") s.Equal(indexpb.JobState_JobStateInProgress, task.GetState()) task.SetState(indexpb.JobState_JobStateFailed, "test failure") s.Equal(indexpb.JobState_JobStateFailed, task.GetState()) s.Equal("test failure", task.failReason) } func (s *UpdateExternalTaskSuite) TestReset() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) task.Reset() s.Nil(task.ctx) s.Nil(task.cancel) s.Nil(task.req) s.Nil(task.tr) s.Nil(task.updatedSegments) } func (s *UpdateExternalTaskSuite) TestBalanceFragmentsToSegments_Empty() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) result, err := task.balanceFragmentsToSegments(context.Background(), []Fragment{}) s.NoError(err) s.Nil(result) } func (s *UpdateExternalTaskSuite) TestBalanceFragmentsToSegments_SingleFragment() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) fragments := []Fragment{ {FragmentID: 1, RowCount: 500}, } result, err := task.balanceFragmentsToSegments(context.Background(), fragments) s.NoError(err) s.Len(result, 1) s.Equal(int64(500), result[0].GetNumOfRows()) } func (s *UpdateExternalTaskSuite) TestBalanceFragmentsToSegments_MultipleFragments() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) fragments := []Fragment{ {FragmentID: 1, RowCount: 300000}, {FragmentID: 2, RowCount: 400000}, {FragmentID: 3, RowCount: 500000}, {FragmentID: 4, RowCount: 600000}, {FragmentID: 5, RowCount: 200000}, } result, err := task.balanceFragmentsToSegments(context.Background(), fragments) s.NoError(err) // Verify total rows are preserved var totalRows int64 for _, seg := range result { totalRows += seg.GetNumOfRows() } s.Equal(int64(2000000), totalRows) // Verify segments are reasonably balanced if len(result) > 1 { var minRows, maxRows int64 = result[0].GetNumOfRows(), result[0].GetNumOfRows() for _, seg := range result { if seg.GetNumOfRows() < minRows { minRows = seg.GetNumOfRows() } if seg.GetNumOfRows() > maxRows { maxRows = seg.GetNumOfRows() } } // The difference between max and min should be reasonable // (less than 2x the average fragment size) avgFragmentSize := int64(2000000 / 5) s.Less(maxRows-minRows, avgFragmentSize*2) } } func (s *UpdateExternalTaskSuite) TestPreExecuteContextCanceled() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) cancel() err := task.PreExecute(ctx) s.ErrorIs(err, context.Canceled) } func (s *UpdateExternalTaskSuite) TestExecuteContextCanceled() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) cancel() err := task.Execute(ctx) s.ErrorIs(err, context.Canceled) } func (s *UpdateExternalTaskSuite) TestBalanceFragmentsToSegmentsContextCanceled() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, } task := NewUpdateExternalTask(ctx, cancel, req) cancel() result, err := task.balanceFragmentsToSegments(ctx, []Fragment{{FragmentID: 1, RowCount: 10}}) s.ErrorIs(err, context.Canceled) s.Nil(result) } func (s *UpdateExternalTaskSuite) TestOrganizeSegments_AllFragmentsExist() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, CurrentSegments: []*datapb.SegmentInfo{ {ID: 1, CollectionID: s.collectionID, NumOfRows: 1000}, {ID: 2, CollectionID: s.collectionID, NumOfRows: 2000}, }, } task := NewUpdateExternalTask(ctx, cancel, req) // Simulate current segment fragments mapping currentSegmentFragments := SegmentFragments{ 1: []Fragment{{FragmentID: 101, RowCount: 1000}}, 2: []Fragment{{FragmentID: 102, RowCount: 2000}}, } // New fragments contain all existing fragments newFragments := []Fragment{ {FragmentID: 101, RowCount: 1000}, {FragmentID: 102, RowCount: 2000}, } result, err := task.organizeSegments(context.Background(), currentSegmentFragments, newFragments) s.NoError(err) // Both segments should be kept s.Len(result, 2) } func (s *UpdateExternalTaskSuite) TestOrganizeSegments_FragmentRemoved() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, CurrentSegments: []*datapb.SegmentInfo{ {ID: 1, CollectionID: s.collectionID, NumOfRows: 1000}, {ID: 2, CollectionID: s.collectionID, NumOfRows: 2000}, }, } task := NewUpdateExternalTask(ctx, cancel, req) // Segment 1 has fragment 101, Segment 2 has fragments 102 and 103 currentSegmentFragments := SegmentFragments{ 1: []Fragment{{FragmentID: 101, RowCount: 1000}}, 2: []Fragment{{FragmentID: 102, RowCount: 1000}, {FragmentID: 103, RowCount: 1000}}, } // Fragment 103 is removed - segment 2 should be invalidated newFragments := []Fragment{ {FragmentID: 101, RowCount: 1000}, {FragmentID: 102, RowCount: 1000}, } result, err := task.organizeSegments(context.Background(), currentSegmentFragments, newFragments) s.NoError(err) // Segment 1 should be kept, segment 2 invalidated, fragment 102 becomes orphan // Result should have segment 1 kept + new segment for orphan fragment 102 s.GreaterOrEqual(len(result), 1) // Verify segment 1 is in the result hasSegment1 := false for _, seg := range result { if seg.GetID() == 1 { hasSegment1 = true break } } s.True(hasSegment1, "Segment 1 should be kept") } func (s *UpdateExternalTaskSuite) TestOrganizeSegments_NewFragmentsAdded() { ctx, cancel := context.WithCancel(context.Background()) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, CurrentSegments: []*datapb.SegmentInfo{ {ID: 1, CollectionID: s.collectionID, NumOfRows: 1000}, }, } task := NewUpdateExternalTask(ctx, cancel, req) currentSegmentFragments := SegmentFragments{ 1: []Fragment{{FragmentID: 101, RowCount: 1000}}, } // New fragments include existing + new ones newFragments := []Fragment{ {FragmentID: 101, RowCount: 1000}, {FragmentID: 102, RowCount: 2000}, // new {FragmentID: 103, RowCount: 3000}, // new } result, err := task.organizeSegments(context.Background(), currentSegmentFragments, newFragments) s.NoError(err) // Should have segment 1 kept + new segments for orphan fragments s.GreaterOrEqual(len(result), 2) // Verify total rows var totalRows int64 for _, seg := range result { totalRows += seg.GetNumOfRows() } s.Equal(int64(6000), totalRows) } func (s *UpdateExternalTaskSuite) TestNewSegmentRowMapping() { fragments := []Fragment{ {FragmentID: 1, RowCount: 100}, {FragmentID: 2, RowCount: 200}, {FragmentID: 3, RowCount: 150}, } mapping := NewSegmentRowMapping(1001, fragments) s.Equal(int64(1001), mapping.SegmentID) s.Equal(int64(450), mapping.TotalRows) s.Len(mapping.Ranges, 3) // Check ranges s.Equal(int64(1), mapping.Ranges[0].FragmentID) s.Equal(int64(0), mapping.Ranges[0].StartRow) s.Equal(int64(100), mapping.Ranges[0].EndRow) s.Equal(int64(2), mapping.Ranges[1].FragmentID) s.Equal(int64(100), mapping.Ranges[1].StartRow) s.Equal(int64(300), mapping.Ranges[1].EndRow) s.Equal(int64(3), mapping.Ranges[2].FragmentID) s.Equal(int64(300), mapping.Ranges[2].StartRow) s.Equal(int64(450), mapping.Ranges[2].EndRow) } func (s *UpdateExternalTaskSuite) TestGetFragmentByRowIndex() { fragments := []Fragment{ {FragmentID: 1, RowCount: 100}, {FragmentID: 2, RowCount: 200}, {FragmentID: 3, RowCount: 150}, } mapping := NewSegmentRowMapping(1001, fragments) // Test first fragment r := mapping.GetFragmentByRowIndex(0) s.NotNil(r) s.Equal(int64(1), r.FragmentID) r = mapping.GetFragmentByRowIndex(99) s.NotNil(r) s.Equal(int64(1), r.FragmentID) // Test second fragment r = mapping.GetFragmentByRowIndex(100) s.NotNil(r) s.Equal(int64(2), r.FragmentID) r = mapping.GetFragmentByRowIndex(299) s.NotNil(r) s.Equal(int64(2), r.FragmentID) // Test third fragment r = mapping.GetFragmentByRowIndex(300) s.NotNil(r) s.Equal(int64(3), r.FragmentID) r = mapping.GetFragmentByRowIndex(449) s.NotNil(r) s.Equal(int64(3), r.FragmentID) // Test out of range r = mapping.GetFragmentByRowIndex(-1) s.Nil(r) r = mapping.GetFragmentByRowIndex(450) s.Nil(r) r = mapping.GetFragmentByRowIndex(1000) s.Nil(r) } func (s *UpdateExternalTaskSuite) TestGetFragmentByRowIndex_LocalIndex() { fragments := []Fragment{ {FragmentID: 1, RowCount: 100}, {FragmentID: 2, RowCount: 200}, } mapping := NewSegmentRowMapping(1001, fragments) // Row 0 -> fragment 1, local index 0 r := mapping.GetFragmentByRowIndex(0) s.NotNil(r) s.Equal(int64(1), r.FragmentID) s.Equal(int64(0), 0-r.StartRow) // local index // Row 50 -> fragment 1, local index 50 r = mapping.GetFragmentByRowIndex(50) s.NotNil(r) s.Equal(int64(1), r.FragmentID) s.Equal(int64(50), 50-r.StartRow) // Row 100 -> fragment 2, local index 0 r = mapping.GetFragmentByRowIndex(100) s.NotNil(r) s.Equal(int64(2), r.FragmentID) s.Equal(int64(0), 100-r.StartRow) // Row 150 -> fragment 2, local index 50 r = mapping.GetFragmentByRowIndex(150) s.NotNil(r) s.Equal(int64(2), r.FragmentID) s.Equal(int64(50), 150-r.StartRow) // Row 299 -> fragment 2, local index 199 r = mapping.GetFragmentByRowIndex(299) s.NotNil(r) s.Equal(int64(2), r.FragmentID) s.Equal(int64(199), 299-r.StartRow) } func (s *UpdateExternalTaskSuite) TestSegmentRowMapping_EmptyFragments() { mapping := NewSegmentRowMapping(1001, []Fragment{}) s.Equal(int64(0), mapping.TotalRows) s.Len(mapping.Ranges, 0) r := mapping.GetFragmentByRowIndex(0) s.Nil(r) } func (s *UpdateExternalTaskSuite) TestMappingsComputedDuringOrganize() { ctx, cancel := context.WithCancel(context.Background()) // Use segment ID 100 to avoid collision with placeholder ID (1) req := &datapb.UpdateExternalCollectionRequest{ CollectionID: s.collectionID, TaskID: s.taskID, CurrentSegments: []*datapb.SegmentInfo{ {ID: 100, CollectionID: s.collectionID, NumOfRows: 1000}, }, } task := NewUpdateExternalTask(ctx, cancel, req) // Simulate current segment has fragment 101 currentSegmentFragments := SegmentFragments{ 100: []Fragment{{FragmentID: 101, RowCount: 1000}}, } // New fragments include existing + new ones newFragments := []Fragment{ {FragmentID: 101, RowCount: 1000}, {FragmentID: 102, RowCount: 500}, } _, err := task.organizeSegments(context.Background(), currentSegmentFragments, newFragments) s.NoError(err) mappings := task.GetSegmentMappings() s.Len(mappings, 2) // Check mapping for kept segment (ID=100) mapping100 := mappings[100] s.NotNil(mapping100) s.Equal(int64(1000), mapping100.TotalRows) s.Len(mapping100.Ranges, 1) s.Equal(int64(101), mapping100.Ranges[0].FragmentID) // Check mapping for new segment (ID=1, placeholder) mapping1 := mappings[1] s.NotNil(mapping1) s.Equal(int64(500), mapping1.TotalRows) s.Len(mapping1.Ranges, 1) s.Equal(int64(102), mapping1.Ranges[0].FragmentID) } func TestUpdateExternalTaskSuite(t *testing.T) { suite.Run(t, new(UpdateExternalTaskSuite)) }