mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
Signed-off-by: wayblink <anyang.wang@zilliz.com> Signed-off-by: Cai Zhang <cai.zhang@zilliz.com> Signed-off-by: chasingegg <chao.gao@zilliz.com> Co-authored-by: chasingegg <chao.gao@zilliz.com>
445 lines
14 KiB
Go
445 lines
14 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package indexnode
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/metautil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/indexparams"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
|
|
"github.com/apache/arrow/go/v12/arrow"
|
|
"github.com/apache/arrow/go/v12/arrow/array"
|
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
|
"github.com/milvus-io/milvus/internal/util/dependency"
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
"github.com/milvus-io/milvus/pkg/util/metric"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
|
)
|
|
|
|
type IndexBuildTaskSuite struct {
|
|
suite.Suite
|
|
schema *schemapb.CollectionSchema
|
|
collectionID int64
|
|
partitionID int64
|
|
segmentID int64
|
|
dataPath string
|
|
}
|
|
|
|
func (suite *IndexBuildTaskSuite) SetupSuite() {
|
|
paramtable.Init()
|
|
suite.collectionID = 1000
|
|
suite.partitionID = 1001
|
|
suite.segmentID = 1002
|
|
suite.dataPath = "/tmp/milvus/data/1000/1001/1002/3/1"
|
|
}
|
|
|
|
func (suite *IndexBuildTaskSuite) SetupTest() {
|
|
suite.schema = &schemapb.CollectionSchema{
|
|
Name: "test",
|
|
Description: "test",
|
|
AutoID: false,
|
|
Fields: []*schemapb.FieldSchema{
|
|
{FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
{FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
{FieldID: 100, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
{FieldID: 101, Name: "ts", DataType: schemapb.DataType_Int64},
|
|
{FieldID: 102, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (suite *IndexBuildTaskSuite) serializeData() ([]*storage.Blob, error) {
|
|
insertCodec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{
|
|
Schema: suite.schema,
|
|
})
|
|
return insertCodec.Serialize(suite.partitionID, suite.segmentID, &storage.InsertData{
|
|
Data: map[storage.FieldID]storage.FieldData{
|
|
0: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
1: &storage.Int64FieldData{Data: []int64{1, 2, 3}},
|
|
100: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
101: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
102: &storage.FloatVectorFieldData{Data: []float32{1, 2, 3}, Dim: 1},
|
|
},
|
|
Infos: []storage.BlobInfo{{3}},
|
|
})
|
|
}
|
|
|
|
func (suite *IndexBuildTaskSuite) TestBuildMemoryIndex() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
req := &indexpb.CreateJobRequest{
|
|
BuildID: 1,
|
|
IndexVersion: 1,
|
|
DataPaths: []string{suite.dataPath},
|
|
IndexID: 0,
|
|
IndexName: "",
|
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}, {Key: common.DimKey, Value: "1"}},
|
|
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}},
|
|
NumRows: 10,
|
|
StorageConfig: &indexpb.StorageConfig{
|
|
RootPath: "/tmp/milvus/data",
|
|
StorageType: "local",
|
|
},
|
|
CollectionID: 1,
|
|
PartitionID: 1,
|
|
SegmentID: 1,
|
|
FieldID: 3,
|
|
FieldName: "vec",
|
|
FieldType: schemapb.DataType_FloatVector,
|
|
}
|
|
|
|
cm, err := NewChunkMgrFactory().NewChunkManager(ctx, req.GetStorageConfig())
|
|
suite.NoError(err)
|
|
blobs, err := suite.serializeData()
|
|
suite.NoError(err)
|
|
err = cm.Write(ctx, suite.dataPath, blobs[0].Value)
|
|
suite.NoError(err)
|
|
|
|
t := &indexBuildTask{
|
|
ident: "",
|
|
cancel: cancel,
|
|
ctx: ctx,
|
|
cm: cm,
|
|
req: req,
|
|
currentIndexVersion: 0,
|
|
BuildID: req.GetBuildID(),
|
|
nodeID: 1,
|
|
ClusterID: req.GetClusterID(),
|
|
collectionID: req.GetCollectionID(),
|
|
partitionID: req.GetPartitionID(),
|
|
segmentID: req.GetSegmentID(),
|
|
fieldID: req.GetFieldID(),
|
|
fieldName: req.GetFieldName(),
|
|
fieldType: req.GetFieldType(),
|
|
tr: timerecord.NewTimeRecorder("test-indexBuildTask"),
|
|
queueDur: 0,
|
|
statistic: indexpb.JobInfo{},
|
|
node: NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)),
|
|
}
|
|
|
|
err = t.Prepare(context.Background())
|
|
suite.NoError(err)
|
|
err = t.BuildIndex(context.Background())
|
|
suite.NoError(err)
|
|
err = t.SaveIndexFiles(context.Background())
|
|
suite.NoError(err)
|
|
}
|
|
|
|
func (suite *IndexBuildTaskSuite) Test_BuildDISKANNIndex() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
req := &indexpb.CreateJobRequest{
|
|
BuildID: 1,
|
|
IndexVersion: 1,
|
|
DataPaths: []string{suite.dataPath},
|
|
IndexID: 0,
|
|
IndexName: "",
|
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "DISKANN"},
|
|
{Key: common.MetricTypeKey, Value: metric.L2},
|
|
{Key: indexparams.PQCodeBudgetRatioKey, Value: "0.125"},
|
|
{Key: indexparams.NumBuildThreadRatioKey, Value: "1.0"},
|
|
{Key: indexparams.SearchCacheBudgetRatioKey, Value: "0.10"},
|
|
{Key: indexparams.SearchCacheBudgetRatioKey, Value: "0.10"},
|
|
{Key: indexparams.NumLoadThreadRatioKey, Value: "8.0"},
|
|
{Key: indexparams.BeamWidthRatioKey, Value: "4.0"},
|
|
},
|
|
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}},
|
|
NumRows: 10,
|
|
StorageConfig: &indexpb.StorageConfig{
|
|
RootPath: "/tmp/milvus/data",
|
|
StorageType: "local",
|
|
},
|
|
CollectionID: 1,
|
|
PartitionID: 1,
|
|
SegmentID: 1,
|
|
FieldID: 3,
|
|
FieldName: "vec",
|
|
FieldType: schemapb.DataType_FloatVector,
|
|
}
|
|
|
|
cm, err := NewChunkMgrFactory().NewChunkManager(ctx, req.GetStorageConfig())
|
|
suite.NoError(err)
|
|
blobs, err := suite.serializeData()
|
|
suite.NoError(err)
|
|
err = cm.Write(ctx, suite.dataPath, blobs[0].Value)
|
|
suite.NoError(err)
|
|
|
|
t := &indexBuildTask{
|
|
ident: "",
|
|
cancel: cancel,
|
|
ctx: ctx,
|
|
cm: cm,
|
|
req: req,
|
|
currentIndexVersion: 0,
|
|
BuildID: req.GetBuildID(),
|
|
nodeID: 1,
|
|
ClusterID: req.GetClusterID(),
|
|
collectionID: req.GetCollectionID(),
|
|
partitionID: req.GetPartitionID(),
|
|
segmentID: req.GetSegmentID(),
|
|
fieldID: req.GetFieldID(),
|
|
fieldName: req.GetFieldName(),
|
|
fieldType: req.GetFieldType(),
|
|
tr: timerecord.NewTimeRecorder("test-indexBuildTask"),
|
|
queueDur: 0,
|
|
statistic: indexpb.JobInfo{},
|
|
node: NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)),
|
|
}
|
|
|
|
err = t.Prepare(context.Background())
|
|
suite.NoError(err)
|
|
err = t.BuildIndex(context.Background())
|
|
suite.NoError(err)
|
|
err = t.SaveIndexFiles(context.Background())
|
|
suite.NoError(err)
|
|
}
|
|
|
|
func TestIndexBuildTask(t *testing.T) {
|
|
suite.Run(t, new(IndexBuildTaskSuite))
|
|
}
|
|
|
|
type IndexBuildTaskV2Suite struct {
|
|
suite.Suite
|
|
schema *schemapb.CollectionSchema
|
|
arrowSchema *arrow.Schema
|
|
space *milvus_storage.Space
|
|
}
|
|
|
|
func (suite *IndexBuildTaskV2Suite) SetupSuite() {
|
|
paramtable.Init()
|
|
}
|
|
|
|
func (suite *IndexBuildTaskV2Suite) SetupTest() {
|
|
suite.schema = &schemapb.CollectionSchema{
|
|
Name: "test",
|
|
Description: "test",
|
|
AutoID: false,
|
|
Fields: []*schemapb.FieldSchema{
|
|
{FieldID: 1, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
{FieldID: 2, Name: "ts", DataType: schemapb.DataType_Int64},
|
|
{FieldID: 3, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
|
|
},
|
|
}
|
|
|
|
var err error
|
|
suite.arrowSchema, err = typeutil.ConvertToArrowSchema(suite.schema.Fields)
|
|
suite.NoError(err)
|
|
|
|
tmpDir := suite.T().TempDir()
|
|
opt := options.NewSpaceOptionBuilder().
|
|
SetSchema(schema.NewSchema(
|
|
suite.arrowSchema,
|
|
&schema.SchemaOptions{
|
|
PrimaryColumn: "pk",
|
|
VectorColumn: "vec",
|
|
VersionColumn: "ts",
|
|
})).
|
|
Build()
|
|
suite.space, err = milvus_storage.Open("file://"+tmpDir, opt)
|
|
suite.NoError(err)
|
|
|
|
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.arrowSchema)
|
|
defer b.Release()
|
|
b.Field(0).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
|
b.Field(1).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
|
fb := b.Field(2).(*array.FixedSizeBinaryBuilder)
|
|
fb.Reserve(1)
|
|
fb.Append([]byte{1, 2, 3, 4})
|
|
|
|
rec := b.NewRecord()
|
|
defer rec.Release()
|
|
reader, err := array.NewRecordReader(suite.arrowSchema, []arrow.Record{rec})
|
|
suite.NoError(err)
|
|
err = suite.space.Write(reader, &options.DefaultWriteOptions)
|
|
suite.NoError(err)
|
|
}
|
|
|
|
func (suite *IndexBuildTaskV2Suite) TestBuildIndex() {
|
|
req := &indexpb.CreateJobRequest{
|
|
BuildID: 1,
|
|
IndexVersion: 1,
|
|
IndexID: 0,
|
|
IndexName: "",
|
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}, {Key: common.DimKey, Value: "1"}},
|
|
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}},
|
|
NumRows: 10,
|
|
StorageConfig: &indexpb.StorageConfig{
|
|
RootPath: "/tmp/milvus/data",
|
|
StorageType: "local",
|
|
},
|
|
CollectionID: 1,
|
|
PartitionID: 1,
|
|
SegmentID: 1,
|
|
FieldID: 3,
|
|
FieldName: "vec",
|
|
FieldType: schemapb.DataType_FloatVector,
|
|
StorePath: "file://" + suite.space.Path(),
|
|
StoreVersion: suite.space.GetCurrentVersion(),
|
|
IndexStorePath: "file://" + suite.space.Path(),
|
|
Dim: 4,
|
|
OptionalScalarFields: []*indexpb.OptionalFieldInfo{
|
|
{FieldID: 1, FieldName: "pk", FieldType: 5, DataIds: []int64{0}},
|
|
},
|
|
}
|
|
|
|
task := &indexBuildTaskV2{
|
|
indexBuildTask: &indexBuildTask{
|
|
ident: "test",
|
|
ctx: context.Background(),
|
|
BuildID: req.GetBuildID(),
|
|
ClusterID: req.GetClusterID(),
|
|
req: req,
|
|
tr: timerecord.NewTimeRecorder("test"),
|
|
node: NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)),
|
|
},
|
|
}
|
|
|
|
var err error
|
|
err = task.Prepare(context.Background())
|
|
suite.NoError(err)
|
|
err = task.BuildIndex(context.Background())
|
|
suite.NoError(err)
|
|
err = task.SaveIndexFiles(context.Background())
|
|
suite.NoError(err)
|
|
}
|
|
|
|
func TestIndexBuildTaskV2Suite(t *testing.T) {
|
|
suite.Run(t, new(IndexBuildTaskV2Suite))
|
|
}
|
|
|
|
type AnalysisTaskSuite struct {
|
|
suite.Suite
|
|
schema *schemapb.CollectionSchema
|
|
collectionID int64
|
|
partitionID int64
|
|
segmentID int64
|
|
fieldID int64
|
|
taskID int64
|
|
}
|
|
|
|
func (suite *AnalysisTaskSuite) SetupSuite() {
|
|
paramtable.Init()
|
|
suite.collectionID = 1000
|
|
suite.partitionID = 1001
|
|
suite.segmentID = 1002
|
|
suite.fieldID = 102
|
|
suite.taskID = 1004
|
|
}
|
|
|
|
func (suite *AnalysisTaskSuite) SetupTest() {
|
|
suite.schema = &schemapb.CollectionSchema{
|
|
Name: "test",
|
|
Description: "test",
|
|
AutoID: false,
|
|
Fields: []*schemapb.FieldSchema{
|
|
{FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
{FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
{FieldID: 100, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
{FieldID: 101, Name: "ts", DataType: schemapb.DataType_Int64},
|
|
{FieldID: 102, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (suite *AnalysisTaskSuite) serializeData() ([]*storage.Blob, error) {
|
|
insertCodec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{
|
|
Schema: suite.schema,
|
|
})
|
|
return insertCodec.Serialize(suite.partitionID, suite.segmentID, &storage.InsertData{
|
|
Data: map[storage.FieldID]storage.FieldData{
|
|
0: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
1: &storage.Int64FieldData{Data: []int64{1, 2, 3}},
|
|
100: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
101: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
102: &storage.FloatVectorFieldData{Data: []float32{1, 2, 3}, Dim: 1},
|
|
},
|
|
Infos: []storage.BlobInfo{{3}},
|
|
})
|
|
}
|
|
|
|
func (suite *AnalysisTaskSuite) TestAnalysis() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
req := &indexpb.AnalysisRequest{
|
|
ClusterID: "test",
|
|
TaskID: 1,
|
|
CollectionID: suite.collectionID,
|
|
PartitionID: suite.partitionID,
|
|
FieldID: suite.fieldID,
|
|
FieldName: "vec",
|
|
FieldType: schemapb.DataType_FloatVector,
|
|
SegmentStats: map[int64]*indexpb.SegmentStats{
|
|
suite.segmentID: {
|
|
ID: suite.segmentID,
|
|
NumRows: 1024,
|
|
LogIDs: []int64{1},
|
|
},
|
|
},
|
|
Version: 1,
|
|
StorageConfig: &indexpb.StorageConfig{
|
|
RootPath: "/tmp/milvus/data",
|
|
StorageType: "local",
|
|
},
|
|
Dim: 1,
|
|
}
|
|
|
|
cm, err := NewChunkMgrFactory().NewChunkManager(ctx, req.GetStorageConfig())
|
|
suite.NoError(err)
|
|
blobs, err := suite.serializeData()
|
|
suite.NoError(err)
|
|
dataPath := metautil.BuildInsertLogPath(cm.RootPath(), suite.collectionID, suite.partitionID, suite.segmentID,
|
|
suite.fieldID, 1)
|
|
|
|
err = cm.Write(ctx, dataPath, blobs[0].Value)
|
|
suite.NoError(err)
|
|
|
|
t := &analysisTask{
|
|
ident: "",
|
|
cancel: cancel,
|
|
ctx: ctx,
|
|
req: req,
|
|
tr: timerecord.NewTimeRecorder("test-indexBuildTask"),
|
|
queueDur: 0,
|
|
node: NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)),
|
|
}
|
|
|
|
err = t.Prepare(context.Background())
|
|
suite.NoError(err)
|
|
//err = t.BuildIndex(context.Background())
|
|
//suite.NoError(err)
|
|
//err = t.SaveIndexFiles(context.Background())
|
|
//suite.NoError(err)
|
|
}
|
|
|
|
func TestAnalysisTaskSuite(t *testing.T) {
|
|
suite.Run(t, new(AnalysisTaskSuite))
|
|
}
|