From ba4cd68edb8922fe14f04cf72d296d48a8f4e9f2 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Sat, 6 Sep 2025 14:13:54 +0800 Subject: [PATCH] fix: adjust params to make CPP UT run faster (#44223) fix: https://github.com/milvus-io/milvus/issues/44224 --------- Signed-off-by: SpadeA --- internal/core/unittest/test_binlog_index.cpp | 6 +- internal/core/unittest/test_c_api.cpp | 48 +++--- .../core/unittest/test_c_stream_reduce.cpp | 2 +- internal/core/unittest/test_growing_index.cpp | 47 +++--- internal/core/unittest/test_index_c_api.cpp | 8 +- internal/core/unittest/test_index_wrapper.cpp | 22 ++- internal/core/unittest/test_indexing.cpp | 27 +-- internal/core/unittest/test_sealed.cpp | 158 ++++++------------ .../unittest/test_utils/c_api_test_utils.h | 4 +- .../test_utils/indexbuilder_test_utils.h | 8 +- .../unittest/test_vector_array_storage_v2.cpp | 12 +- 11 files changed, 158 insertions(+), 184 deletions(-) diff --git a/internal/core/unittest/test_binlog_index.cpp b/internal/core/unittest/test_binlog_index.cpp index f961a3fb01..7245292a56 100644 --- a/internal/core/unittest/test_binlog_index.cpp +++ b/internal/core/unittest/test_binlog_index.cpp @@ -117,7 +117,7 @@ class BinlogIndexTest : public ::testing::TestWithParam { {"index_type", index_type}, {"metric_type", metric_type}, {"nlist", "64"}}; - std::map type_params = {{"dim", "128"}}; + std::map type_params = {{"dim", "4"}}; FieldIndexMeta fieldIndexMeta( vec_field_id, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); @@ -166,7 +166,7 @@ class BinlogIndexTest : public ::testing::TestWithParam { std::optional dense_vec_intermin_index_type = std::nullopt; std::string index_type; size_t data_n = 5000; - size_t data_d = 128; + size_t data_d = 4; size_t topk = 10; milvus::FieldDataPtr vec_field_data = nullptr; milvus::segcore::SegmentSealedUPtr segment = nullptr; @@ -459,4 +459,4 @@ TEST_P(BinlogIndexTest, LoadBinlogWithoutIndexMeta) { EXPECT_FALSE(segment->HasIndex(vec_field_id)); EXPECT_EQ(segment->get_row_count(), data_n); EXPECT_TRUE(segment->HasFieldData(vec_field_id)); -} +} \ No newline at end of file diff --git a/internal/core/unittest/test_c_api.cpp b/internal/core/unittest/test_c_api.cpp index 1180b6601d..38de89d225 100644 --- a/internal/core/unittest/test_c_api.cpp +++ b/internal/core/unittest/test_c_api.cpp @@ -63,7 +63,7 @@ using milvus::index::VectorIndex; using milvus::segcore::LoadIndexInfo; namespace { -// const int DIM = 16; +// const int DIM = 4; const int64_t ROW_COUNT = 10 * 1000; const int64_t BIAS = 4200; @@ -130,7 +130,7 @@ get_default_index_meta() { index_name: "test-index" type_params: < key: "dim" - value: "16" + value: "4" > index_params: < key: "index_type" @@ -3006,12 +3006,13 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) { } TEST(CApiTest, Indexing_With_binary_Predicate_Range) { + auto dim = 16; // insert data to segment constexpr auto TOPK = 5; std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM); + knowhere::metric::JACCARD, dim); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3022,7 +3023,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) { auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); - auto query_ptr = vec_col.data() + BIAS * DIM / 8; + auto query_ptr = vec_col.data() + BIAS * dim / 8; int64_t offset; PreInsert(segment, N, &offset); @@ -3081,7 +3082,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) { // create place_holder_group int num_queries = 5; auto raw_group = CreatePlaceholderGroupFromBlob( - num_queries, DIM, query_ptr); + num_queries, dim, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3113,11 +3114,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) { DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, - DIM, + dim, N); // gen query dataset - auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr); + auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; @@ -3189,10 +3190,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) { TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { // insert data to segment constexpr auto TOPK = 5; + auto dim = 16; std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM); + knowhere::metric::JACCARD, dim); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3203,7 +3205,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); - auto query_ptr = vec_col.data() + BIAS * DIM / 8; + auto query_ptr = vec_col.data() + BIAS * dim / 8; int64_t offset; PreInsert(segment, N, &offset); @@ -3261,7 +3263,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { // create place_holder_group int num_queries = 5; auto raw_group = CreatePlaceholderGroupFromBlob( - num_queries, DIM, query_ptr); + num_queries, dim, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3294,11 +3296,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, - DIM, + dim, N); // gen query dataset - auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr); + auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; @@ -3370,10 +3372,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { TEST(CApiTest, Indexing_With_binary_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; + auto dim = 16; std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM); + knowhere::metric::JACCARD, dim); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3384,7 +3387,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) { auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); - auto query_ptr = vec_col.data() + BIAS * DIM / 8; + auto query_ptr = vec_col.data() + BIAS * dim / 8; int64_t offset; PreInsert(segment, N, &offset); @@ -3438,7 +3441,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) { int num_queries = 5; int topK = 5; auto raw_group = CreatePlaceholderGroupFromBlob( - num_queries, DIM, query_ptr); + num_queries, dim, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3469,11 +3472,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) { DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, - DIM, + dim, N); // gen query dataset - auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr); + auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; @@ -3568,10 +3571,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) { TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { // insert data to segment constexpr auto TOPK = 5; + auto dim = 16; std::string schema_string = generate_collection_schema( - knowhere::metric::JACCARD, DIM); + knowhere::metric::JACCARD, dim); auto collection = NewCollection(schema_string.c_str(), knowhere::metric::JACCARD); auto schema = ((segcore::Collection*)collection)->get_schema(); @@ -3582,7 +3586,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { auto N = ROW_COUNT; auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(FieldId(100)); - auto query_ptr = vec_col.data() + BIAS * DIM / 8; + auto query_ptr = vec_col.data() + BIAS * dim / 8; int64_t offset; PreInsert(segment, N, &offset); @@ -3635,7 +3639,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { int num_queries = 5; int topK = 5; auto raw_group = CreatePlaceholderGroupFromBlob( - num_queries, DIM, query_ptr); + num_queries, dim, query_ptr); auto blob = raw_group.SerializeAsString(); // search on segment's small index @@ -3667,11 +3671,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { DataType::VECTOR_BINARY, knowhere::metric::JACCARD, IndexEnum::INDEX_FAISS_BIN_IVFFLAT, - DIM, + dim, N); // gen query dataset - auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr); + auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr); auto vec_index = dynamic_cast(indexing.get()); auto search_plan = reinterpret_cast(plan); SearchInfo search_info = search_plan->plan_node_->search_info_; diff --git a/internal/core/unittest/test_c_stream_reduce.cpp b/internal/core/unittest/test_c_stream_reduce.cpp index 9e558b5297..2d3f410b77 100644 --- a/internal/core/unittest/test_c_stream_reduce.cpp +++ b/internal/core/unittest/test_c_stream_reduce.cpp @@ -154,7 +154,7 @@ TEST(CApiTest, StreamReduceGroupBY) { int N = 300; int topK = 100; int num_queries = 2; - int dim = 16; + int dim = 4; namespace schema = milvus::proto::schema; void* c_collection; diff --git a/internal/core/unittest/test_growing_index.cpp b/internal/core/unittest/test_growing_index.cpp index 19c95e833c..597ef5be97 100644 --- a/internal/core/unittest/test_growing_index.cpp +++ b/internal/core/unittest/test_growing_index.cpp @@ -156,17 +156,19 @@ INSTANTIATE_TEST_SUITE_P( "FLOAT16"))); TEST_P(GrowingIndexTest, Correctness) { + auto dim = 4; auto schema = std::make_shared(); auto pk = schema->AddDebugField("pk", DataType::INT64); auto random = schema->AddDebugField("random", DataType::DOUBLE); - auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type); + auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type); schema->set_primary_field_id(pk); std::map index_params = { {"index_type", index_type}, {"metric_type", metric_type}, {"nlist", "128"}}; - std::map type_params = {{"dim", "128"}}; + std::map type_params = { + {"dim", std::to_string(dim)}}; FieldIndexMeta fieldIndexMeta( vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); @@ -249,7 +251,7 @@ TEST_P(GrowingIndexTest, Correctness) { auto range_plan_str = range_query_plan_node.SerializeAsString(); int64_t per_batch = 10000; - int64_t n_batch = 20; + int64_t n_batch = 5; int64_t top_k = 5; for (int64_t i = 0; i < n_batch; i++) { auto dataset = DataGen(schema, per_batch); @@ -295,12 +297,12 @@ TEST_P(GrowingIndexTest, Correctness) { ph_group_raw = CreateSparseFloatPlaceholderGroup(num_queries); } else if (data_type == DataType::VECTOR_FLOAT16) { ph_group_raw = CreatePlaceholderGroup( - num_queries, 128, 1024); + num_queries, dim, 1024); } else if (data_type == DataType::VECTOR_BFLOAT16) { ph_group_raw = CreatePlaceholderGroup( - num_queries, 128, 1024); + num_queries, dim, 1024); } else { - ph_group_raw = CreatePlaceholderGroup(num_queries, 128, 1024); + ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); } auto plan = milvus::query::CreateSearchPlanByExpr( @@ -338,7 +340,7 @@ TEST_P(GrowingIndexTest, Correctness) { TEST_P(GrowingIndexTest, AddWithoutBuildPool) { constexpr int N = 1024; constexpr int TOPK = 100; - constexpr int dim = 128; + constexpr int dim = 4; constexpr int add_cont = 5; milvus::index::CreateIndexInfo create_index_info; @@ -351,7 +353,7 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) { auto schema = std::make_shared(); auto pk = schema->AddDebugField("pk", DataType::INT64); auto random = schema->AddDebugField("random", DataType::DOUBLE); - auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type); + auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type); schema->set_primary_field_id(pk); auto dataset = DataGen(schema, N); @@ -434,10 +436,11 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) { } TEST_P(GrowingIndexTest, MissIndexMeta) { + auto dim = 4; auto schema = std::make_shared(); auto pk = schema->AddDebugField("pk", DataType::INT64); auto random = schema->AddDebugField("random", DataType::DOUBLE); - auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type); + auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type); schema->set_primary_field_id(pk); auto& config = SegcoreConfig::default_config(); @@ -447,17 +450,19 @@ TEST_P(GrowingIndexTest, MissIndexMeta) { } TEST_P(GrowingIndexTest, GetVector) { + auto dim = 4; auto schema = std::make_shared(); auto pk = schema->AddDebugField("pk", DataType::INT64); auto random = schema->AddDebugField("random", DataType::DOUBLE); - auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type); + auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type); schema->set_primary_field_id(pk); std::map index_params = { {"index_type", index_type}, {"metric_type", metric_type}, {"nlist", "128"}}; - std::map type_params = {{"dim", "128"}}; + std::map type_params = { + {"dim", std::to_string(dim)}}; FieldIndexMeta fieldIndexMeta( vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); @@ -473,11 +478,10 @@ TEST_P(GrowingIndexTest, GetVector) { auto segment_growing = CreateGrowingSegment(schema, metaPtr); auto segment = dynamic_cast(segment_growing.get()); + int64_t per_batch = 1000; + int64_t n_batch = 5; if (data_type == DataType::VECTOR_FLOAT) { // GetVector for VECTOR_FLOAT - int64_t per_batch = 5000; - int64_t n_batch = 20; - int64_t dim = 128; for (int64_t i = 0; i < n_batch; i++) { auto dataset = DataGen(schema, per_batch); auto fakevec = dataset.get_col(vec); @@ -497,7 +501,7 @@ TEST_P(GrowingIndexTest, GetVector) { EXPECT_TRUE(vector.size() == num_inserted * dim); for (size_t i = 0; i < num_inserted; ++i) { auto id = ids_ds->GetIds()[i]; - for (size_t j = 0; j < 128; ++j) { + for (size_t j = 0; j < dim; ++j) { EXPECT_TRUE(vector[i * dim + j] == fakevec[(id % per_batch) * dim + j]); } @@ -505,9 +509,6 @@ TEST_P(GrowingIndexTest, GetVector) { } } else if (data_type == DataType::VECTOR_FLOAT16) { // GetVector for VECTOR_FLOAT16 - int64_t per_batch = 5000; - int64_t n_batch = 20; - int64_t dim = 128; for (int64_t i = 0; i < n_batch; i++) { auto dataset = DataGen(schema, per_batch); auto fakevec = dataset.get_col(vec); @@ -525,7 +526,7 @@ TEST_P(GrowingIndexTest, GetVector) { EXPECT_TRUE(vector.size() == num_inserted * dim * sizeof(float16)); for (size_t i = 0; i < num_inserted; ++i) { auto id = ids_ds->GetIds()[i]; - for (size_t j = 0; j < 128; ++j) { + for (size_t j = 0; j < dim; ++j) { EXPECT_TRUE(reinterpret_cast( vector.data())[i * dim + j] == fakevec[(id % per_batch) * dim + j]); @@ -534,9 +535,6 @@ TEST_P(GrowingIndexTest, GetVector) { } } else if (data_type == DataType::VECTOR_BFLOAT16) { // GetVector for VECTOR_FLOAT16 - int64_t per_batch = 5000; - int64_t n_batch = 20; - int64_t dim = 128; for (int64_t i = 0; i < n_batch; i++) { auto dataset = DataGen(schema, per_batch); auto fakevec = dataset.get_col(vec); @@ -555,7 +553,7 @@ TEST_P(GrowingIndexTest, GetVector) { EXPECT_TRUE(vector.size() == num_inserted * dim * sizeof(bfloat16)); for (size_t i = 0; i < num_inserted; ++i) { auto id = ids_ds->GetIds()[i]; - for (size_t j = 0; j < 128; ++j) { + for (size_t j = 0; j < dim; ++j) { EXPECT_TRUE(reinterpret_cast( vector.data())[i * dim + j] == fakevec[(id % per_batch) * dim + j]); @@ -564,9 +562,6 @@ TEST_P(GrowingIndexTest, GetVector) { } } else if (is_sparse) { // GetVector for VECTOR_SPARSE_U32_F32 - int64_t per_batch = 5000; - int64_t n_batch = 20; - int64_t dim = 128; for (int64_t i = 0; i < n_batch; i++) { auto dataset = DataGen(schema, per_batch); auto fakevec = dataset.get_col< diff --git a/internal/core/unittest/test_index_c_api.cpp b/internal/core/unittest/test_index_c_api.cpp index c76af437a2..e65d3dadb4 100644 --- a/internal/core/unittest/test_index_c_api.cpp +++ b/internal/core/unittest/test_index_c_api.cpp @@ -51,7 +51,10 @@ TestVecIndex() { ok = google::protobuf::TextFormat::PrintToString(index_params, &index_params_str); assert(ok); - auto dataset = GenFieldData(NB, metric_type, TraitType::data_type); + auto dataset = + std::is_same_v + ? GenFieldData(NB, metric_type, TraitType::data_type, BINARY_DIM) + : GenFieldData(NB, metric_type, TraitType::data_type); CDataType dtype = TraitType::c_data_type; CIndex index; @@ -65,7 +68,8 @@ TestVecIndex() { if (std::is_same_v) { auto xb_data = dataset.template get_col(milvus::FieldId(100)); - status = BuildBinaryVecIndex(index, NB * DIM / 8, xb_data.data()); + status = + BuildBinaryVecIndex(index, NB * BINARY_DIM / 8, xb_data.data()); } else if (std::is_same_v) { auto xb_data = dataset.template get_col< knowhere::sparse::SparseRow>( diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index f454d39475..577ef2f237 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -76,6 +76,11 @@ class IndexWrapperTest : public ::testing::TestWithParam { }; vec_field_data_type = index_to_vec_type[index_type]; + + // Set correct dimension for binary vectors + if (vec_field_data_type == DataType::VECTOR_BINARY) { + config["dim"] = std::to_string(BINARY_DIM); + } } void @@ -128,9 +133,10 @@ TEST_P(IndexWrapperTest, BuildAndQuery) { vec_field_data_type, config, file_manager_context); knowhere::DataSetPtr xb_dataset; if (vec_field_data_type == DataType::VECTOR_BINARY) { - auto dataset = GenFieldData(NB, metric_type, vec_field_data_type); + auto dataset = + GenFieldData(NB, metric_type, vec_field_data_type, BINARY_DIM); auto bin_vecs = dataset.get_col(milvus::FieldId(100)); - xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data()); + xb_dataset = knowhere::GenDataSet(NB, BINARY_DIM, bin_vecs.data()); ASSERT_NO_THROW(index->Build(xb_dataset)); } else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) { auto dataset = GenFieldData(NB, metric_type, vec_field_data_type); @@ -161,7 +167,9 @@ TEST_P(IndexWrapperTest, BuildAndQuery) { vec_field_data_type, config, file_manager_context); auto vec_index = static_cast(copy_index.get()); - if (vec_field_data_type != DataType::VECTOR_SPARSE_U32_F32) { + if (vec_field_data_type == DataType::VECTOR_BINARY) { + ASSERT_EQ(vec_index->dim(), BINARY_DIM); + } else if (vec_field_data_type != DataType::VECTOR_SPARSE_U32_F32) { ASSERT_EQ(vec_index->dim(), DIM); } @@ -191,12 +199,14 @@ TEST_P(IndexWrapperTest, BuildAndQuery) { result = vec_index->Query(xq_dataset, search_info, nullptr); } else { auto nb_for_nq = NQ + query_offset; - auto dataset = - GenFieldData(nb_for_nq, metric_type, DataType::VECTOR_BINARY); + auto dataset = GenFieldData( + nb_for_nq, metric_type, DataType::VECTOR_BINARY, BINARY_DIM); auto xb_bin_data = dataset.get_col(milvus::FieldId(100)); // offset of binary vector is 8-aligned bit-wise representation. auto xq_dataset = knowhere::GenDataSet( - NQ, DIM, xb_bin_data.data() + ((DIM + 7) / 8) * query_offset); + NQ, + BINARY_DIM, + xb_bin_data.data() + ((BINARY_DIM + 7) / 8) * query_offset); result = vec_index->Query(xq_dataset, search_info, nullptr); } diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index dd85df8c69..d42b120f76 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -339,13 +339,17 @@ class IndexTest : public ::testing::TestWithParam { vec_field_data_type = milvus::DataType::VECTOR_FLOAT; } - auto dataset = GenFieldData(NB, metric_type, vec_field_data_type); + auto dataset = + is_binary + ? GenFieldData(NB, metric_type, vec_field_data_type, BINARY_DIM) + : GenFieldData(NB, metric_type, vec_field_data_type); if (is_binary) { // binary vector xb_bin_data = dataset.get_col(milvus::FieldId(100)); - xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data()); + xb_dataset = + knowhere::GenDataSet(NB, BINARY_DIM, xb_bin_data.data()); xq_dataset = knowhere::GenDataSet( - NQ, DIM, xb_bin_data.data() + DIM * query_offset); + NQ, BINARY_DIM, xb_bin_data.data() + BINARY_DIM * query_offset); } else if (is_sparse) { // sparse vector xb_sparse_data = dataset.get_col< @@ -415,7 +419,7 @@ INSTANTIATE_TEST_SUITE_P( TEST(Indexing, Iterator) { constexpr int N = 10240; constexpr int TOPK = 100; - constexpr int dim = 128; + constexpr int dim = 4; auto [raw_data, timestamps, uids] = generate_data(N); milvus::index::CreateIndexInfo create_index_info; @@ -467,6 +471,7 @@ TEST(Indexing, Iterator) { } TEST_P(IndexTest, BuildAndQuery) { + auto dim = is_binary ? BINARY_DIM : DIM; milvus::index::CreateIndexInfo create_index_info; create_index_info.index_type = index_type; create_index_info.metric_type = metric_type; @@ -506,7 +511,7 @@ TEST_P(IndexTest, BuildAndQuery) { ASSERT_NO_THROW(vec_index->Load(milvus::tracer::TraceContext{}, load_conf)); EXPECT_EQ(vec_index->Count(), NB); if (!is_sparse) { - EXPECT_EQ(vec_index->GetDim(), DIM); + EXPECT_EQ(vec_index->GetDim(), dim); } milvus::SearchInfo search_info; @@ -535,6 +540,7 @@ TEST_P(IndexTest, BuildAndQuery) { } TEST_P(IndexTest, Mmap) { + auto dim = is_binary ? BINARY_DIM : DIM; milvus::index::CreateIndexInfo create_index_info; create_index_info.index_type = index_type; create_index_info.metric_type = metric_type; @@ -577,7 +583,7 @@ TEST_P(IndexTest, Mmap) { milvus::proto::common::LoadPriority::HIGH; vec_index->Load(milvus::tracer::TraceContext{}, load_conf); EXPECT_EQ(vec_index->Count(), NB); - EXPECT_EQ(vec_index->GetDim(), is_sparse ? kTestSparseDim : DIM); + EXPECT_EQ(vec_index->GetDim(), is_sparse ? kTestSparseDim : dim); milvus::SearchInfo search_info; search_info.topk_ = K; @@ -597,6 +603,7 @@ TEST_P(IndexTest, Mmap) { } TEST_P(IndexTest, GetVector) { + auto dim = is_binary ? BINARY_DIM : DIM; milvus::index::CreateIndexInfo create_index_info; create_index_info.index_type = index_type; create_index_info.metric_type = metric_type; @@ -634,7 +641,7 @@ TEST_P(IndexTest, GetVector) { milvus::proto::common::LoadPriority::HIGH; vec_index->Load(milvus::tracer::TraceContext{}, load_conf); if (!is_sparse) { - EXPECT_EQ(vec_index->GetDim(), DIM); + EXPECT_EQ(vec_index->GetDim(), dim); } EXPECT_EQ(vec_index->Count(), NB); @@ -646,7 +653,7 @@ TEST_P(IndexTest, GetVector) { if (is_binary) { auto results = vec_index->GetVector(ids_ds); EXPECT_EQ(results.size(), xb_bin_data.size()); - const auto data_bytes = DIM / 8; + const auto data_bytes = dim / 8; for (size_t i = 0; i < NB; ++i) { auto id = ids_ds->GetIds()[i]; for (size_t j = 0; j < data_bytes; ++j) { @@ -672,8 +679,8 @@ TEST_P(IndexTest, GetVector) { ASSERT_EQ(result_vectors.size(), xb_data.size()); for (size_t i = 0; i < NB; ++i) { auto id = ids_ds->GetIds()[i]; - for (size_t j = 0; j < DIM; ++j) { - ASSERT_EQ(result_vectors[i * DIM + j], xb_data[id * DIM + j]); + for (size_t j = 0; j < dim; ++j) { + ASSERT_EQ(result_vectors[i * dim + j], xb_data[id * dim + j]); } } } diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index d2dac32d20..8311995a94 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -31,8 +31,8 @@ using namespace milvus::segcore; using milvus::segcore::LoadIndexInfo; -const int64_t ROW_COUNT = 10 * 1000; -const int64_t BIAS = 4200; +const int64_t ROW_COUNT = 2 * 1000; +const int64_t BIAS = 1000; using Param = std::string; class SealedTest : public ::testing::TestWithParam { @@ -44,7 +44,7 @@ class SealedTest : public ::testing::TestWithParam { TEST(Sealed, without_predicate) { auto schema = std::make_shared(); - auto dim = 16; + auto dim = 4; auto topK = 5; auto metric_type = knowhere::metric::L2; auto fake_id = schema->AddDebugField( @@ -68,7 +68,7 @@ TEST(Sealed, without_predicate) { auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(fake_id); - for (int64_t i = 0; i < 1000 * dim; ++i) { + for (int64_t i = 0; i < (ROW_COUNT / 2) * dim; ++i) { vec_col.push_back(0); } auto query_ptr = vec_col.data() + BIAS * dim; @@ -85,7 +85,7 @@ TEST(Sealed, without_predicate) { CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; auto ph_group_raw = - CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr); + CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp timestamp = 1000000; @@ -111,7 +111,8 @@ TEST(Sealed, without_predicate) { auto search_conf = knowhere::Json{{knowhere::indexparam::NPROBE, 10}}; - auto database = knowhere::GenDataSet(N, dim, vec_col.data() + 1000 * dim); + auto database = + knowhere::GenDataSet(N, dim, vec_col.data() + (ROW_COUNT / 2) * dim); indexing->BuildWithDataset(database, build_conf); auto vec_index = dynamic_cast(indexing.get()); @@ -155,7 +156,7 @@ TEST(Sealed, without_predicate) { TEST(Sealed, without_search_ef_less_than_limit) { auto schema = std::make_shared(); - auto dim = 16; + auto dim = 4; auto topK = 5; auto metric_type = knowhere::metric::L2; auto fake_id = schema->AddDebugField( @@ -186,7 +187,7 @@ TEST(Sealed, without_search_ef_less_than_limit) { CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; auto ph_group_raw = - CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr); + CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp timestamp = 1000000; @@ -238,7 +239,7 @@ TEST(Sealed, without_search_ef_less_than_limit) { TEST(Sealed, with_predicate) { auto schema = std::make_shared(); - auto dim = 16; + auto dim = 4; auto topK = 5; auto metric_type = knowhere::metric::L2; auto fake_id = schema->AddDebugField( @@ -256,10 +257,10 @@ TEST(Sealed, with_predicate) { lower_inclusive: true, upper_inclusive: false, lower_value: < - int64_val: 4200 + int64_val: 1000 > upper_value: < - int64_val: 4205 + int64_val: 1005 > > > @@ -290,7 +291,7 @@ TEST(Sealed, with_predicate) { CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; auto ph_group_raw = - CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr); + CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp timestamp = 1000000; @@ -353,7 +354,7 @@ TEST(Sealed, with_predicate) { TEST(Sealed, with_predicate_filter_all) { auto schema = std::make_shared(); - auto dim = 16; + auto dim = 4; auto topK = 5; // auto metric_type = MetricType::METRIC_L2; auto metric_type = knowhere::metric::L2; @@ -398,7 +399,7 @@ TEST(Sealed, with_predicate_filter_all) { CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; auto ph_group_raw = - CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr); + CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp timestamp = 1000000; @@ -482,7 +483,7 @@ TEST(Sealed, with_predicate_filter_all) { } TEST(Sealed, LoadFieldData) { - auto dim = 16; + auto dim = 4; auto topK = 5; auto N = ROW_COUNT; auto metric_type = knowhere::metric::L2; @@ -553,7 +554,7 @@ TEST(Sealed, LoadFieldData) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -657,7 +658,7 @@ TEST(Sealed, LoadFieldData) { } TEST(Sealed, ClearData) { - auto dim = 16; + auto dim = 4; auto topK = 5; auto N = ROW_COUNT; auto metric_type = knowhere::metric::L2; @@ -714,7 +715,7 @@ TEST(Sealed, ClearData) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -762,7 +763,7 @@ TEST(Sealed, ClearData) { } TEST(Sealed, LoadFieldDataMmap) { - auto dim = 16; + auto dim = 4; auto topK = 5; auto N = ROW_COUNT; auto metric_type = knowhere::metric::L2; @@ -819,7 +820,7 @@ TEST(Sealed, LoadFieldDataMmap) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -882,7 +883,7 @@ TEST(Sealed, LoadPkScalarIndex) { } TEST(Sealed, LoadScalarIndex) { - auto dim = 16; + auto dim = 4; size_t N = ROW_COUNT; auto metric_type = knowhere::metric::L2; auto schema = std::make_shared(); @@ -931,7 +932,7 @@ TEST(Sealed, LoadScalarIndex) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -985,7 +986,7 @@ TEST(Sealed, LoadScalarIndex) { } TEST(Sealed, Delete) { - auto dim = 16; + auto dim = 4; auto topK = 5; auto N = 10; auto metric_type = knowhere::metric::L2; @@ -1033,7 +1034,7 @@ TEST(Sealed, Delete) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -1067,7 +1068,7 @@ TEST(Sealed, Delete) { } TEST(Sealed, OverlapDelete) { - auto dim = 16; + auto dim = 4; auto topK = 5; auto N = 10; auto metric_type = knowhere::metric::L2; @@ -1115,7 +1116,7 @@ TEST(Sealed, OverlapDelete) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -1193,7 +1194,7 @@ GenQueryVecs(int N, int dim) { TEST(Sealed, BF) { auto schema = std::make_shared(); - auto dim = 128; + auto dim = 4; auto metric_type = "L2"; auto fake_id = schema->AddDebugField( "fakevec", DataType::VECTOR_FLOAT, dim, metric_type); @@ -1255,7 +1256,7 @@ TEST(Sealed, BF) { TEST(Sealed, BF_Overflow) { auto schema = std::make_shared(); - auto dim = 128; + auto dim = 4; auto metric_type = "L2"; auto fake_id = schema->AddDebugField( "fakevec", DataType::VECTOR_FLOAT, dim, metric_type); @@ -1400,7 +1401,7 @@ TEST(Sealed, RealCount) { } TEST(Sealed, GetVector) { - auto dim = 16; + auto dim = 4; auto N = ROW_COUNT; auto metric_type = knowhere::metric::L2; auto schema = std::make_shared(); @@ -1448,7 +1449,7 @@ TEST(Sealed, GetVector) { } TEST(Sealed, LoadArrayFieldData) { - auto dim = 16; + auto dim = 4; auto topK = 5; auto N = 10; auto metric_type = knowhere::metric::L2; @@ -1490,7 +1491,7 @@ TEST(Sealed, LoadArrayFieldData) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -1505,7 +1506,7 @@ TEST(Sealed, LoadArrayFieldData) { } TEST(Sealed, LoadArrayFieldDataWithMMap) { - auto dim = 16; + auto dim = 4; auto topK = 5; auto N = ROW_COUNT; auto metric_type = knowhere::metric::L2; @@ -1547,7 +1548,7 @@ TEST(Sealed, LoadArrayFieldDataWithMMap) { auto plan = CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); auto num_queries = 5; - auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); + auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); @@ -1557,7 +1558,7 @@ TEST(Sealed, LoadArrayFieldDataWithMMap) { TEST(Sealed, SkipIndexSkipUnaryRange) { auto schema = std::make_shared(); - auto dim = 128; + auto dim = 4; auto metrics_type = "L2"; auto fake_vec_fid = schema->AddDebugField( "fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type); @@ -1706,7 +1707,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) { TEST(Sealed, SkipIndexSkipBinaryRange) { auto schema = std::make_shared(); - auto dim = 128; + auto dim = 4; auto metrics_type = "L2"; auto fake_vec_fid = schema->AddDebugField( "fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type); @@ -1749,7 +1750,7 @@ TEST(Sealed, SkipIndexSkipBinaryRange) { TEST(Sealed, SkipIndexSkipUnaryRangeNullable) { auto schema = std::make_shared(); - auto dim = 128; + auto dim = 4; auto metrics_type = "L2"; auto fake_vec_fid = schema->AddDebugField( "fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type); @@ -1821,7 +1822,7 @@ TEST(Sealed, SkipIndexSkipUnaryRangeNullable) { TEST(Sealed, SkipIndexSkipBinaryRangeNullable) { auto schema = std::make_shared(); - auto dim = 128; + auto dim = 4; auto metrics_type = "L2"; auto fake_vec_fid = schema->AddDebugField( "fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type); @@ -1864,7 +1865,7 @@ TEST(Sealed, SkipIndexSkipBinaryRangeNullable) { TEST(Sealed, SkipIndexSkipStringRange) { auto schema = std::make_shared(); - auto dim = 128; + auto dim = 4; auto metrics_type = "L2"; auto pk_fid = schema->AddDebugField("pk", DataType::INT64); auto string_fid = schema->AddDebugField("string_field", DataType::VARCHAR); @@ -1927,6 +1928,7 @@ TEST(Sealed, SkipIndexSkipStringRange) { TEST(Sealed, QueryAllFields) { auto schema = std::make_shared(); auto metric_type = knowhere::metric::L2; + auto dim = 4; auto bool_field = schema->AddDebugField("bool", DataType::BOOL); auto int8_field = schema->AddDebugField("int8", DataType::INT8); auto int16_field = schema->AddDebugField("int16", DataType::INT16); @@ -1949,20 +1951,21 @@ TEST(Sealed, QueryAllFields) { auto float_array_field = schema->AddDebugField("float_array", DataType::ARRAY, DataType::FLOAT); auto vec = schema->AddDebugField( - "embeddings", DataType::VECTOR_FLOAT, 128, metric_type); + "embeddings", DataType::VECTOR_FLOAT, dim, metric_type); auto float16_vec = schema->AddDebugField( - "float16_vec", DataType::VECTOR_FLOAT16, 128, metric_type); + "float16_vec", DataType::VECTOR_FLOAT16, dim, metric_type); auto bfloat16_vec = schema->AddDebugField( - "bfloat16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type); + "bfloat16_vec", DataType::VECTOR_BFLOAT16, dim, metric_type); auto int8_vec = schema->AddDebugField( - "int8_vec", DataType::VECTOR_INT8, 128, metric_type); + "int8_vec", DataType::VECTOR_INT8, dim, metric_type); schema->set_primary_field_id(int64_field); std::map index_params = { {"index_type", "IVF_FLAT"}, {"metric_type", metric_type}, {"nlist", "128"}}; - std::map type_params = {{"dim", "128"}}; + std::map type_params = { + {"dim", std::to_string(dim)}}; FieldIndexMeta fieldIndexMeta( vec, std::move(index_params), std::move(type_params)); std::map filedMap = {{vec, fieldIndexMeta}}; @@ -1973,7 +1976,6 @@ TEST(Sealed, QueryAllFields) { dynamic_cast(segment_sealed.get()); int64_t dataset_size = 1000; - int64_t dim = 128; auto dataset = DataGen(schema, dataset_size); segment_sealed = CreateSealedWithFieldDataLoaded(schema, dataset); segment = dynamic_cast(segment_sealed.get()); @@ -2094,6 +2096,7 @@ TEST(Sealed, QueryAllFields) { TEST(Sealed, QueryAllNullableFields) { auto schema = std::make_shared(); auto metric_type = knowhere::metric::L2; + auto dim = 4; auto bool_field = schema->AddDebugField("bool", DataType::BOOL, true); auto int8_field = schema->AddDebugField("int8", DataType::INT8, true); auto int16_field = schema->AddDebugField("int16", DataType::INT16, true); @@ -2117,14 +2120,15 @@ TEST(Sealed, QueryAllNullableFields) { auto float_array_field = schema->AddDebugField( "float_array", DataType::ARRAY, DataType::FLOAT, true); auto vec = schema->AddDebugField( - "embeddings", DataType::VECTOR_FLOAT, 128, metric_type); + "embeddings", DataType::VECTOR_FLOAT, dim, metric_type); schema->set_primary_field_id(int64_field); std::map index_params = { {"index_type", "IVF_FLAT"}, {"metric_type", metric_type}, {"nlist", "128"}}; - std::map type_params = {{"dim", "128"}}; + std::map type_params = { + {"dim", std::to_string(dim)}}; FieldIndexMeta fieldIndexMeta( vec, std::move(index_params), std::move(type_params)); std::map filedMap = {{vec, fieldIndexMeta}}; @@ -2135,7 +2139,6 @@ TEST(Sealed, QueryAllNullableFields) { dynamic_cast(segment_sealed.get()); int64_t dataset_size = 1000; - int64_t dim = 128; auto dataset = DataGen(schema, dataset_size); segment_sealed = CreateSealedWithFieldDataLoaded(schema, dataset); segment = dynamic_cast(segment_sealed.get()); @@ -2278,9 +2281,10 @@ TEST(Sealed, SearchSortedPk) { TEST(Sealed, QueryVectorArrayAllFields) { auto schema = std::make_shared(); auto metric_type = knowhere::metric::MAX_SIM; + int64_t dim = 4; auto int64_field = schema->AddDebugField("int64", DataType::INT64); auto array_vec = schema->AddDebugVectorArrayField( - "array_vec", DataType::VECTOR_FLOAT, 128, metric_type); + "array_vec", DataType::VECTOR_FLOAT, dim, metric_type); schema->set_primary_field_id(int64_field); std::map filedMap{}; @@ -2288,7 +2292,6 @@ TEST(Sealed, QueryVectorArrayAllFields) { std::make_shared(100000, std::move(filedMap)); int64_t dataset_size = 1000; - int64_t dim = 128; auto dataset = DataGen(schema, dataset_size); auto segment_sealed = CreateSealedWithFieldDataLoaded(schema, dataset); auto segment = @@ -2336,7 +2339,7 @@ TEST(Sealed, SearchVectorArray) { int64_t index_build_id = 4000; int64_t index_version = 4000; int64_t index_id = 5000; - int64_t dim = 32; + int64_t dim = 4; auto schema = std::make_shared(); auto metric_type = knowhere::metric::MAX_SIM; @@ -2359,8 +2362,8 @@ TEST(Sealed, SearchVectorArray) { IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); - int64_t dataset_size = 1000; - auto emb_list_len = 10; + int64_t dataset_size = 100; + auto emb_list_len = 2; auto dataset = DataGen(schema, dataset_size, 42, 0, 1, emb_list_len); // create field data @@ -2493,57 +2496,6 @@ TEST(Sealed, SearchVectorArray) { std::cout << sr_parsed.dump(1) << std::endl; } - // // brute force search with iterative filter - // { - // auto [min, max] = - // std::minmax_element(int_values.begin(), int_values.end()); - // auto min_val = *min; - // auto max_val = *max; - - // auto raw_plan = fmt::format(R"(vector_anns: < - // field_id: 101 - // predicates: < - // binary_range_expr: < - // column_info: < - // field_id: 100 - // data_type: Int64 - // > - // lower_inclusive: true - // upper_inclusive: true - // lower_value: < - // int64_val: {} - // > - // upper_value: < - // int64_val: {} - // > - // > - // > - // query_info: < - // topk: 5 - // round_decimal: 3 - // metric_type: "MAX_SIM" - // hints: "iterative_filter" - // search_params: "{{\"nprobe\": 10}}" - // > - // placeholder_tag: "$0" - // >)", - // min_val, - // max_val); - // auto plan_str = translate_text_plan_to_binary_plan(raw_plan.c_str()); - // auto plan = - // CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size()); - // auto ph_group_raw = CreatePlaceholderGroupFromBlob( - // vec_num, dim, query_vec.data(), query_vec_lims); - // auto ph_group = - // ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); - // Timestamp timestamp = 1000000; - // std::vector ph_group_arr = {ph_group.get()}; - - // auto sr = sealed_segment->Search(plan.get(), ph_group.get(), timestamp); - // auto sr_parsed = SearchResultToJson(*sr); - // std::cout << sr_parsed.dump(1) << std::endl; - // } - // search with index { LoadIndexInfo load_info; diff --git a/internal/core/unittest/test_utils/c_api_test_utils.h b/internal/core/unittest/test_utils/c_api_test_utils.h index db8cfccf50..d63f3c6005 100644 --- a/internal/core/unittest/test_utils/c_api_test_utils.h +++ b/internal/core/unittest/test_utils/c_api_test_utils.h @@ -165,7 +165,7 @@ get_default_schema_config() { data_type: %1% type_params: < key: "dim" - value: "16" + value: "4" > index_params: < key: "metric_type" @@ -191,7 +191,7 @@ get_default_schema_config_nullable() { data_type: FloatVector type_params: < key: "dim" - value: "16" + value: "4" > index_params: < key: "metric_type" diff --git a/internal/core/unittest/test_utils/indexbuilder_test_utils.h b/internal/core/unittest/test_utils/indexbuilder_test_utils.h index 2952f2b967..6e4ff41b06 100644 --- a/internal/core/unittest/test_utils/indexbuilder_test_utils.h +++ b/internal/core/unittest/test_utils/indexbuilder_test_utils.h @@ -30,7 +30,9 @@ #include "storage/Types.h" #include "knowhere/comp/index_param.h" -constexpr int64_t DIM = 16; +constexpr int64_t DIM = 4; +constexpr int64_t BINARY_DIM = + 8; // Binary vectors need dim to be multiple of 8 constexpr int64_t NQ = 10; constexpr int64_t K = 4; @@ -76,13 +78,13 @@ generate_build_conf(const milvus::IndexType& index_type, } else if (index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { return knowhere::Json{ {knowhere::meta::METRIC_TYPE, metric_type}, - {knowhere::meta::DIM, std::to_string(DIM)}, + {knowhere::meta::DIM, std::to_string(BINARY_DIM)}, {knowhere::indexparam::NLIST, "16"}, }; } else if (index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) { return knowhere::Json{ {knowhere::meta::METRIC_TYPE, metric_type}, - {knowhere::meta::DIM, std::to_string(DIM)}, + {knowhere::meta::DIM, std::to_string(BINARY_DIM)}, }; } else if (index_type == knowhere::IndexEnum::INDEX_HNSW) { return knowhere::Json{ diff --git a/internal/core/unittest/test_vector_array_storage_v2.cpp b/internal/core/unittest/test_vector_array_storage_v2.cpp index ac90523a7a..66422cc00a 100644 --- a/internal/core/unittest/test_vector_array_storage_v2.cpp +++ b/internal/core/unittest/test_vector_array_storage_v2.cpp @@ -52,7 +52,7 @@ using namespace milvus; using namespace milvus::segcore; using namespace milvus::storage; -const int64_t DIM = 32; +const int64_t DIM = 4; SchemaPtr GenVectorArrayTestSchema() { @@ -170,12 +170,12 @@ class TestVectorArrayStorageV2 : public testing::Test { arrow::default_memory_pool(), value_builder); for (int row = 0; row < test_data_count_; row++) { - // Each row contains 10 vectors of dimension DIM + // Each row contains 3 vectors of dimension DIM auto status = list_builder->Append(); EXPECT_TRUE(status.ok()); - // Generate 10 vectors for this row - auto data = generate_float_vector(10, DIM); + // Generate 3 vectors for this row + auto data = generate_float_vector(3, DIM); auto float_builder = std::static_pointer_cast( value_builder); @@ -321,8 +321,8 @@ TEST_F(TestVectorArrayStorageV2, BuildEmbListHNSWIndex) { auto vec_index = dynamic_cast(emb_list_hnsw_index.get()); - // Each row has 10 vectors, so total count should be rows * 10 - EXPECT_EQ(vec_index->Count(), test_data_count_ * chunk_num_ * 10); + // Each row has 3 vectors, so total count should be rows * 3 + EXPECT_EQ(vec_index->Count(), test_data_count_ * chunk_num_ * 3); EXPECT_EQ(vec_index->GetDim(), DIM); {