milvus/internal/datacoord/file_resource_manager_test.go
aoiasd ee216877bb
enhance: support compaction with file resource in ref mode (#46399)
Add support for DataNode compaction using file resources in ref mode.
SortCompation and StatsJobs will build text indexes, which may use file
resources.
relate: https://github.com/milvus-io/milvus/issues/43687

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: file resources (analyzer binaries/metadata) are only
fetched, downloaded and used when the node is configured in Ref mode
(fileresource.IsRefMode via CommonCfg.QNFileResourceMode /
DNFileResourceMode); Sync now carries a version and managers track
per-resource versions/resource IDs so newer resource sets win and older
entries are pruned (RefManager/SynchManager resource maps).
- Logic removed / simplified: component-specific FileResourceMode flags
and an indirection through a long-lived BinlogIO wrapper were
consolidated — file-resource mode moved to CommonCfg, Sync/Download APIs
became version- and context-aware, and compaction/index tasks accept a
ChunkManager directly (binlog IO wrapper creation inlined). This
eliminates duplicated config checks and wrapper indirection while
preserving the same chunk/IO semantics.
- Why no data loss or behavior regression: all file-resource code paths
are gated by the configured mode (default remains "sync"); when not in
ref-mode or when no resources exist, compaction and stats flows follow
existing code paths unchanged. Versioned Sync + resourceID maps ensure
newly synced sets replace older ones and RefManager prunes stale files;
GetFileResources returns an error if requested IDs are missing (prevents
silent use of wrong resources). Analyzer naming/parameter changes add
analyzer_extra_info but default-callers pass "" so existing analyzers
and index contents remain unchanged.
- New capability: DataNode compaction and StatsJobs can now build text
indexes using external file resources in Ref mode — DataCoord exposes
GetFileResources and populates CompactionPlan.file_resources;
SortCompaction/StatsTask download resources via fileresource.Manager,
produce an analyzer_extra_info JSON (storage + resource->id map) via
analyzer.BuildExtraResourceInfo, and propagate analyzer_extra_info into
BuildIndexInfo so the tantivy bindings can load custom analyzers during
text index creation.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
2026-01-06 16:31:31 +08:00

224 lines
6.8 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"testing"
"time"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"google.golang.org/grpc"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/datacoord/session"
metamock "github.com/milvus-io/milvus/internal/metastore/mocks"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/pkg/v2/proto/internalpb"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
type FileResourceManagerSuite struct {
suite.Suite
ctx context.Context
manager *FileResourceManager
// Test objects
testMeta *meta
mockNodeManager *session.MockNodeManager
mockDataNode *mocks.MockDataNodeClient
mockCatalog *metamock.DataCoordCatalog
}
func (suite *FileResourceManagerSuite) SetupSuite() {
paramtable.Init()
}
func (suite *FileResourceManagerSuite) SetupTest() {
suite.ctx = context.Background()
// Create mocks
suite.mockNodeManager = session.NewMockNodeManager(suite.T())
suite.mockDataNode = mocks.NewMockDataNodeClient(suite.T())
suite.mockCatalog = metamock.NewDataCoordCatalog(suite.T())
// Create test meta with minimal initialization
suite.testMeta = &meta{
catalog: suite.mockCatalog,
resourceMeta: make(map[string]*internalpb.FileResourceInfo),
resourceIDMap: make(map[int64]*internalpb.FileResourceInfo),
resourceVersion: 0,
}
// Create FileResourceManager
suite.manager = NewFileResourceManager(suite.ctx, suite.testMeta, suite.mockNodeManager)
suite.manager.Start()
}
func (suite *FileResourceManagerSuite) TearDownTest() {
suite.manager.Close()
// Assert mock expectations
suite.mockNodeManager.AssertExpectations(suite.T())
suite.mockDataNode.AssertExpectations(suite.T())
}
func (suite *FileResourceManagerSuite) TestNormal() {
testResource := &internalpb.FileResourceInfo{
Id: 1,
Name: "test",
Path: "/tmp/test",
}
suite.mockNodeManager.EXPECT().GetClientIDs().Return([]int64{1})
suite.mockNodeManager.EXPECT().GetClient(int64(1)).Return(suite.mockDataNode, nil)
syncCh := make(chan struct{}, 1)
suite.mockDataNode.EXPECT().SyncFileResource(mock.Anything, mock.Anything, mock.Anything).Run(func(ctx context.Context, in *internalpb.SyncFileResourceRequest, opts ...grpc.CallOption) {
suite.Equal(1, len(in.Resources))
suite.Equal(testResource.Id, in.Resources[0].Id)
suite.Equal(testResource.Name, in.Resources[0].Name)
suite.Equal(testResource.Path, in.Resources[0].Path)
syncCh <- struct{}{}
}).Return(merr.Success(), nil).Once()
suite.mockCatalog.EXPECT().SaveFileResource(mock.Anything, mock.Anything, mock.Anything).Return(nil)
suite.testMeta.AddFileResource(suite.ctx, testResource)
// notify sync
suite.manager.Notify()
suite.Eventually(func() bool {
select {
case <-syncCh:
return true
default:
return false
}
}, 2*time.Second, 100*time.Millisecond)
}
func (suite *FileResourceManagerSuite) TestSync_Success() {
// Prepare test data
nodeID := int64(1)
resources := []*internalpb.FileResourceInfo{
{
Id: 1,
Name: "test.file",
Path: "/test/test.file",
},
}
version := uint64(100)
// Setup meta state directly
suite.testMeta.resourceMeta["test.file"] = resources[0]
suite.testMeta.resourceVersion = version
// Setup mocks
suite.mockNodeManager.EXPECT().GetClientIDs().Return([]int64{nodeID})
suite.mockNodeManager.EXPECT().GetClient(nodeID).Return(suite.mockDataNode, nil)
suite.mockDataNode.EXPECT().SyncFileResource(
suite.ctx,
&internalpb.SyncFileResourceRequest{
Resources: resources,
Version: version,
},
).Return(merr.Success(), nil)
// Execute sync
err := suite.manager.sync()
// Verify
suite.NoError(err)
suite.Equal(version, suite.manager.distribution[nodeID])
}
func (suite *FileResourceManagerSuite) TestSync_NodeClientError() {
// Prepare test data
nodeID := int64(1)
version := uint64(100)
// Setup meta state directly
suite.testMeta.resourceVersion = version
// Setup mocks - GetClient fails
suite.mockNodeManager.EXPECT().GetClientIDs().Return([]int64{nodeID})
suite.mockNodeManager.EXPECT().GetClient(nodeID).Return(nil, merr.WrapErrNodeNotFound(nodeID))
// Execute sync
err := suite.manager.sync()
// Verify error is returned and distribution not updated
suite.Error(err)
suite.Equal(uint64(0), suite.manager.distribution[nodeID])
}
func (suite *FileResourceManagerSuite) TestSync_SyncFileResourceError() {
// Prepare test data
nodeID := int64(1)
version := uint64(100)
// Setup meta state directly
suite.testMeta.resourceVersion = version
// Setup mocks - SyncFileResource fails
suite.mockNodeManager.EXPECT().GetClientIDs().Return([]int64{nodeID})
suite.mockNodeManager.EXPECT().GetClient(nodeID).Return(suite.mockDataNode, nil)
suite.mockDataNode.EXPECT().SyncFileResource(
suite.ctx,
mock.AnythingOfType("*internalpb.SyncFileResourceRequest"),
).Return(nil, merr.WrapErrServiceInternal("sync failed"))
// Execute sync
err := suite.manager.sync()
// Verify error is returned and distribution not updated
suite.Error(err)
suite.Equal(uint64(0), suite.manager.distribution[nodeID])
}
func (suite *FileResourceManagerSuite) TestSync_SyncFileResourceStatusError() {
// Prepare test data
nodeID := int64(1)
version := uint64(100)
// Setup mocks - SyncFileResource returns error status
// Setup meta state directly
suite.testMeta.resourceVersion = version
suite.mockNodeManager.EXPECT().GetClientIDs().Return([]int64{nodeID})
suite.mockNodeManager.EXPECT().GetClient(nodeID).Return(suite.mockDataNode, nil)
suite.mockDataNode.EXPECT().SyncFileResource(
suite.ctx,
mock.AnythingOfType("*internalpb.SyncFileResourceRequest"),
).Return(&commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: "internal error",
}, nil)
// Execute sync
err := suite.manager.sync()
// Verify error is returned and distribution not updated
suite.Error(err)
suite.Equal(uint64(0), suite.manager.distribution[nodeID])
}
func TestFileResourceManagerSuite(t *testing.T) {
suite.Run(t, new(FileResourceManagerSuite))
}