// Copyright (C) 2019-2020 Zilliz. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License #include #include "segcore/SegmentGrowing.h" #include "segcore/SegmentGrowingImpl.h" #include "pb/schema.pb.h" #include "test_utils/DataGen.h" #include "query/Plan.h" using namespace milvus::segcore; using namespace milvus; namespace pb = milvus::proto; TEST(GrowingIndex, Correctness) { auto schema = std::make_shared(); auto pk = schema->AddDebugField("pk", DataType::INT64); auto random = schema->AddDebugField("random", DataType::DOUBLE); auto vec = schema->AddDebugField( "embeddings", DataType::VECTOR_FLOAT, 128, knowhere::metric::L2); schema->set_primary_field_id(pk); std::map index_params = { {"index_type", "IVF_FLAT"}, {"metric_type", "L2"}, {"nlist", "128"}}; std::map type_params = {{"dim", "128"}}; FieldIndexMeta fieldIndexMeta( vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); config.set_enable_growing_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(226985, std::move(filedMap)); auto segment = CreateGrowingSegment(schema, metaPtr); std::string dsl = R"({ "bool": { "must": [ { "vector": { "embeddings": { "metric_type": "l2", "params": { "nprobe": 16 }, "query": "$0", "topk": 5, "round_decimal":3 } } } ] } })"; int64_t per_batch = 10000; int64_t n_batch = 20; int64_t top_k = 5; for (int64_t i = 0; i < n_batch; i++) { auto dataset = DataGen(schema, per_batch); auto offset = segment->PreInsert(per_batch); auto pks = dataset.get_col(pk); segment->Insert(offset, per_batch, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); auto plan = milvus::query::CreatePlan(*schema, dsl); auto num_queries = 5; auto ph_group_raw = CreatePlaceholderGroup(num_queries, 128, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); Timestamp time = 1000000; auto sr = segment->Search(plan.get(), ph_group.get(), time); EXPECT_EQ(sr->total_nq_, num_queries); EXPECT_EQ(sr->unity_topK_, top_k); EXPECT_EQ(sr->distances_.size(), num_queries * top_k); EXPECT_EQ(sr->seg_offsets_.size(), num_queries * top_k); } } TEST(GrowingIndex, GetVector) { auto schema = std::make_shared(); auto pk = schema->AddDebugField("pk", DataType::INT64); auto random = schema->AddDebugField("random", DataType::DOUBLE); auto vec = schema->AddDebugField( "embeddings", DataType::VECTOR_FLOAT, 128, knowhere::metric::L2); schema->set_primary_field_id(pk); std::map index_params = { {"index_type", "IVF_FLAT"}, {"metric_type", "L2"}, {"nlist", "128"}}; std::map type_params = {{"dim", "128"}}; FieldIndexMeta fieldIndexMeta( vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); config.set_enable_growing_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); auto segment_growing = CreateGrowingSegment(schema, metaPtr); auto segment = dynamic_cast(segment_growing.get()); int64_t per_batch = 5000; int64_t n_batch = 20; int64_t dim = 128; for (int64_t i = 0; i < n_batch; i++) { auto dataset = DataGen(schema, per_batch); auto fakevec = dataset.get_col(vec); auto offset = segment->PreInsert(per_batch); segment->Insert(offset, per_batch, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); auto num_inserted = (i + 1) * per_batch; auto ids_ds = GenRandomIds(num_inserted); auto result = segment->bulk_subscript(vec, ids_ds->GetIds(), num_inserted); auto vector = result.get()->mutable_vectors()->float_vector().data(); EXPECT_TRUE(vector.size() == num_inserted * dim); for (size_t i = 0; i < num_inserted; ++i) { auto id = ids_ds->GetIds()[i]; for (size_t j = 0; j < 128; ++j) { EXPECT_TRUE(vector[i * dim + j] == fakevec[(id % per_batch) * dim + j]); } } } }