mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
fix: adjust params to make CPP UT run faster (#44223)
fix: https://github.com/milvus-io/milvus/issues/44224 --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
parent
c71b47b52c
commit
ba4cd68edb
@ -117,7 +117,7 @@ class BinlogIndexTest : public ::testing::TestWithParam<Param> {
|
||||
{"index_type", index_type},
|
||||
{"metric_type", metric_type},
|
||||
{"nlist", "64"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "4"}};
|
||||
FieldIndexMeta fieldIndexMeta(
|
||||
vec_field_id, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
@ -166,7 +166,7 @@ class BinlogIndexTest : public ::testing::TestWithParam<Param> {
|
||||
std::optional<std::string> dense_vec_intermin_index_type = std::nullopt;
|
||||
std::string index_type;
|
||||
size_t data_n = 5000;
|
||||
size_t data_d = 128;
|
||||
size_t data_d = 4;
|
||||
size_t topk = 10;
|
||||
milvus::FieldDataPtr vec_field_data = nullptr;
|
||||
milvus::segcore::SegmentSealedUPtr segment = nullptr;
|
||||
|
||||
@ -63,7 +63,7 @@ using milvus::index::VectorIndex;
|
||||
using milvus::segcore::LoadIndexInfo;
|
||||
|
||||
namespace {
|
||||
// const int DIM = 16;
|
||||
// const int DIM = 4;
|
||||
const int64_t ROW_COUNT = 10 * 1000;
|
||||
const int64_t BIAS = 4200;
|
||||
|
||||
@ -130,7 +130,7 @@ get_default_index_meta() {
|
||||
index_name: "test-index"
|
||||
type_params: <
|
||||
key: "dim"
|
||||
value: "16"
|
||||
value: "4"
|
||||
>
|
||||
index_params: <
|
||||
key: "index_type"
|
||||
@ -3006,12 +3006,13 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
|
||||
}
|
||||
|
||||
TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
|
||||
auto dim = 16;
|
||||
// insert data to segment
|
||||
constexpr auto TOPK = 5;
|
||||
|
||||
std::string schema_string =
|
||||
generate_collection_schema<milvus::BinaryVector>(
|
||||
knowhere::metric::JACCARD, DIM);
|
||||
knowhere::metric::JACCARD, dim);
|
||||
auto collection =
|
||||
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
|
||||
auto schema = ((segcore::Collection*)collection)->get_schema();
|
||||
@ -3022,7 +3023,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
|
||||
auto N = ROW_COUNT;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
|
||||
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
|
||||
auto query_ptr = vec_col.data() + BIAS * dim / 8;
|
||||
|
||||
int64_t offset;
|
||||
PreInsert(segment, N, &offset);
|
||||
@ -3081,7 +3082,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
|
||||
// create place_holder_group
|
||||
int num_queries = 5;
|
||||
auto raw_group = CreatePlaceholderGroupFromBlob<milvus::BinaryVector>(
|
||||
num_queries, DIM, query_ptr);
|
||||
num_queries, dim, query_ptr);
|
||||
auto blob = raw_group.SerializeAsString();
|
||||
|
||||
// search on segment's small index
|
||||
@ -3113,11 +3114,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
|
||||
DataType::VECTOR_BINARY,
|
||||
knowhere::metric::JACCARD,
|
||||
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
DIM,
|
||||
dim,
|
||||
N);
|
||||
|
||||
// gen query dataset
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
|
||||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
@ -3189,10 +3190,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
|
||||
TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
|
||||
// insert data to segment
|
||||
constexpr auto TOPK = 5;
|
||||
auto dim = 16;
|
||||
|
||||
std::string schema_string =
|
||||
generate_collection_schema<milvus::BinaryVector>(
|
||||
knowhere::metric::JACCARD, DIM);
|
||||
knowhere::metric::JACCARD, dim);
|
||||
auto collection =
|
||||
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
|
||||
auto schema = ((segcore::Collection*)collection)->get_schema();
|
||||
@ -3203,7 +3205,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
|
||||
auto N = ROW_COUNT;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
|
||||
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
|
||||
auto query_ptr = vec_col.data() + BIAS * dim / 8;
|
||||
|
||||
int64_t offset;
|
||||
PreInsert(segment, N, &offset);
|
||||
@ -3261,7 +3263,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
|
||||
// create place_holder_group
|
||||
int num_queries = 5;
|
||||
auto raw_group = CreatePlaceholderGroupFromBlob<milvus::BinaryVector>(
|
||||
num_queries, DIM, query_ptr);
|
||||
num_queries, dim, query_ptr);
|
||||
auto blob = raw_group.SerializeAsString();
|
||||
|
||||
// search on segment's small index
|
||||
@ -3294,11 +3296,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
|
||||
DataType::VECTOR_BINARY,
|
||||
knowhere::metric::JACCARD,
|
||||
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
DIM,
|
||||
dim,
|
||||
N);
|
||||
|
||||
// gen query dataset
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
|
||||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
@ -3370,10 +3372,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
|
||||
TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
|
||||
// insert data to segment
|
||||
constexpr auto TOPK = 5;
|
||||
auto dim = 16;
|
||||
|
||||
std::string schema_string =
|
||||
generate_collection_schema<milvus::BinaryVector>(
|
||||
knowhere::metric::JACCARD, DIM);
|
||||
knowhere::metric::JACCARD, dim);
|
||||
auto collection =
|
||||
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
|
||||
auto schema = ((segcore::Collection*)collection)->get_schema();
|
||||
@ -3384,7 +3387,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
|
||||
auto N = ROW_COUNT;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
|
||||
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
|
||||
auto query_ptr = vec_col.data() + BIAS * dim / 8;
|
||||
|
||||
int64_t offset;
|
||||
PreInsert(segment, N, &offset);
|
||||
@ -3438,7 +3441,7 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
|
||||
int num_queries = 5;
|
||||
int topK = 5;
|
||||
auto raw_group = CreatePlaceholderGroupFromBlob<milvus::BinaryVector>(
|
||||
num_queries, DIM, query_ptr);
|
||||
num_queries, dim, query_ptr);
|
||||
auto blob = raw_group.SerializeAsString();
|
||||
|
||||
// search on segment's small index
|
||||
@ -3469,11 +3472,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
|
||||
DataType::VECTOR_BINARY,
|
||||
knowhere::metric::JACCARD,
|
||||
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
DIM,
|
||||
dim,
|
||||
N);
|
||||
|
||||
// gen query dataset
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
|
||||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
@ -3568,10 +3571,11 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
|
||||
TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
|
||||
// insert data to segment
|
||||
constexpr auto TOPK = 5;
|
||||
auto dim = 16;
|
||||
|
||||
std::string schema_string =
|
||||
generate_collection_schema<milvus::BinaryVector>(
|
||||
knowhere::metric::JACCARD, DIM);
|
||||
knowhere::metric::JACCARD, dim);
|
||||
auto collection =
|
||||
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
|
||||
auto schema = ((segcore::Collection*)collection)->get_schema();
|
||||
@ -3582,7 +3586,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
|
||||
auto N = ROW_COUNT;
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
|
||||
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
|
||||
auto query_ptr = vec_col.data() + BIAS * dim / 8;
|
||||
|
||||
int64_t offset;
|
||||
PreInsert(segment, N, &offset);
|
||||
@ -3635,7 +3639,7 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
|
||||
int num_queries = 5;
|
||||
int topK = 5;
|
||||
auto raw_group = CreatePlaceholderGroupFromBlob<milvus::BinaryVector>(
|
||||
num_queries, DIM, query_ptr);
|
||||
num_queries, dim, query_ptr);
|
||||
auto blob = raw_group.SerializeAsString();
|
||||
|
||||
// search on segment's small index
|
||||
@ -3667,11 +3671,11 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
|
||||
DataType::VECTOR_BINARY,
|
||||
knowhere::metric::JACCARD,
|
||||
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
|
||||
DIM,
|
||||
dim,
|
||||
N);
|
||||
|
||||
// gen query dataset
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
|
||||
auto query_dataset = knowhere::GenDataSet(num_queries, dim, query_ptr);
|
||||
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
|
||||
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
|
||||
SearchInfo search_info = search_plan->plan_node_->search_info_;
|
||||
|
||||
@ -154,7 +154,7 @@ TEST(CApiTest, StreamReduceGroupBY) {
|
||||
int N = 300;
|
||||
int topK = 100;
|
||||
int num_queries = 2;
|
||||
int dim = 16;
|
||||
int dim = 4;
|
||||
namespace schema = milvus::proto::schema;
|
||||
|
||||
void* c_collection;
|
||||
|
||||
@ -156,17 +156,19 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
"FLOAT16")));
|
||||
|
||||
TEST_P(GrowingIndexTest, Correctness) {
|
||||
auto dim = 4;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto pk = schema->AddDebugField("pk", DataType::INT64);
|
||||
auto random = schema->AddDebugField("random", DataType::DOUBLE);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type);
|
||||
schema->set_primary_field_id(pk);
|
||||
|
||||
std::map<std::string, std::string> index_params = {
|
||||
{"index_type", index_type},
|
||||
{"metric_type", metric_type},
|
||||
{"nlist", "128"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||
std::map<std::string, std::string> type_params = {
|
||||
{"dim", std::to_string(dim)}};
|
||||
FieldIndexMeta fieldIndexMeta(
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
@ -249,7 +251,7 @@ TEST_P(GrowingIndexTest, Correctness) {
|
||||
auto range_plan_str = range_query_plan_node.SerializeAsString();
|
||||
|
||||
int64_t per_batch = 10000;
|
||||
int64_t n_batch = 20;
|
||||
int64_t n_batch = 5;
|
||||
int64_t top_k = 5;
|
||||
for (int64_t i = 0; i < n_batch; i++) {
|
||||
auto dataset = DataGen(schema, per_batch);
|
||||
@ -295,12 +297,12 @@ TEST_P(GrowingIndexTest, Correctness) {
|
||||
ph_group_raw = CreateSparseFloatPlaceholderGroup(num_queries);
|
||||
} else if (data_type == DataType::VECTOR_FLOAT16) {
|
||||
ph_group_raw = CreatePlaceholderGroup<milvus::Float16Vector>(
|
||||
num_queries, 128, 1024);
|
||||
num_queries, dim, 1024);
|
||||
} else if (data_type == DataType::VECTOR_BFLOAT16) {
|
||||
ph_group_raw = CreatePlaceholderGroup<milvus::BFloat16Vector>(
|
||||
num_queries, 128, 1024);
|
||||
num_queries, dim, 1024);
|
||||
} else {
|
||||
ph_group_raw = CreatePlaceholderGroup(num_queries, 128, 1024);
|
||||
ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
}
|
||||
|
||||
auto plan = milvus::query::CreateSearchPlanByExpr(
|
||||
@ -338,7 +340,7 @@ TEST_P(GrowingIndexTest, Correctness) {
|
||||
TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
|
||||
constexpr int N = 1024;
|
||||
constexpr int TOPK = 100;
|
||||
constexpr int dim = 128;
|
||||
constexpr int dim = 4;
|
||||
constexpr int add_cont = 5;
|
||||
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
@ -351,7 +353,7 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto pk = schema->AddDebugField("pk", DataType::INT64);
|
||||
auto random = schema->AddDebugField("random", DataType::DOUBLE);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type);
|
||||
schema->set_primary_field_id(pk);
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
@ -434,10 +436,11 @@ TEST_P(GrowingIndexTest, AddWithoutBuildPool) {
|
||||
}
|
||||
|
||||
TEST_P(GrowingIndexTest, MissIndexMeta) {
|
||||
auto dim = 4;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto pk = schema->AddDebugField("pk", DataType::INT64);
|
||||
auto random = schema->AddDebugField("random", DataType::DOUBLE);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type);
|
||||
schema->set_primary_field_id(pk);
|
||||
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
@ -447,17 +450,19 @@ TEST_P(GrowingIndexTest, MissIndexMeta) {
|
||||
}
|
||||
|
||||
TEST_P(GrowingIndexTest, GetVector) {
|
||||
auto dim = 4;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto pk = schema->AddDebugField("pk", DataType::INT64);
|
||||
auto random = schema->AddDebugField("random", DataType::DOUBLE);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, 128, metric_type);
|
||||
auto vec = schema->AddDebugField("embeddings", data_type, dim, metric_type);
|
||||
schema->set_primary_field_id(pk);
|
||||
|
||||
std::map<std::string, std::string> index_params = {
|
||||
{"index_type", index_type},
|
||||
{"metric_type", metric_type},
|
||||
{"nlist", "128"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||
std::map<std::string, std::string> type_params = {
|
||||
{"dim", std::to_string(dim)}};
|
||||
FieldIndexMeta fieldIndexMeta(
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
@ -473,11 +478,10 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
auto segment_growing = CreateGrowingSegment(schema, metaPtr);
|
||||
auto segment = dynamic_cast<SegmentGrowingImpl*>(segment_growing.get());
|
||||
|
||||
int64_t per_batch = 1000;
|
||||
int64_t n_batch = 5;
|
||||
if (data_type == DataType::VECTOR_FLOAT) {
|
||||
// GetVector for VECTOR_FLOAT
|
||||
int64_t per_batch = 5000;
|
||||
int64_t n_batch = 20;
|
||||
int64_t dim = 128;
|
||||
for (int64_t i = 0; i < n_batch; i++) {
|
||||
auto dataset = DataGen(schema, per_batch);
|
||||
auto fakevec = dataset.get_col<float>(vec);
|
||||
@ -497,7 +501,7 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
EXPECT_TRUE(vector.size() == num_inserted * dim);
|
||||
for (size_t i = 0; i < num_inserted; ++i) {
|
||||
auto id = ids_ds->GetIds()[i];
|
||||
for (size_t j = 0; j < 128; ++j) {
|
||||
for (size_t j = 0; j < dim; ++j) {
|
||||
EXPECT_TRUE(vector[i * dim + j] ==
|
||||
fakevec[(id % per_batch) * dim + j]);
|
||||
}
|
||||
@ -505,9 +509,6 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
}
|
||||
} else if (data_type == DataType::VECTOR_FLOAT16) {
|
||||
// GetVector for VECTOR_FLOAT16
|
||||
int64_t per_batch = 5000;
|
||||
int64_t n_batch = 20;
|
||||
int64_t dim = 128;
|
||||
for (int64_t i = 0; i < n_batch; i++) {
|
||||
auto dataset = DataGen(schema, per_batch);
|
||||
auto fakevec = dataset.get_col<float16>(vec);
|
||||
@ -525,7 +526,7 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
EXPECT_TRUE(vector.size() == num_inserted * dim * sizeof(float16));
|
||||
for (size_t i = 0; i < num_inserted; ++i) {
|
||||
auto id = ids_ds->GetIds()[i];
|
||||
for (size_t j = 0; j < 128; ++j) {
|
||||
for (size_t j = 0; j < dim; ++j) {
|
||||
EXPECT_TRUE(reinterpret_cast<float16*>(
|
||||
vector.data())[i * dim + j] ==
|
||||
fakevec[(id % per_batch) * dim + j]);
|
||||
@ -534,9 +535,6 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
}
|
||||
} else if (data_type == DataType::VECTOR_BFLOAT16) {
|
||||
// GetVector for VECTOR_FLOAT16
|
||||
int64_t per_batch = 5000;
|
||||
int64_t n_batch = 20;
|
||||
int64_t dim = 128;
|
||||
for (int64_t i = 0; i < n_batch; i++) {
|
||||
auto dataset = DataGen(schema, per_batch);
|
||||
auto fakevec = dataset.get_col<bfloat16>(vec);
|
||||
@ -555,7 +553,7 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
EXPECT_TRUE(vector.size() == num_inserted * dim * sizeof(bfloat16));
|
||||
for (size_t i = 0; i < num_inserted; ++i) {
|
||||
auto id = ids_ds->GetIds()[i];
|
||||
for (size_t j = 0; j < 128; ++j) {
|
||||
for (size_t j = 0; j < dim; ++j) {
|
||||
EXPECT_TRUE(reinterpret_cast<bfloat16*>(
|
||||
vector.data())[i * dim + j] ==
|
||||
fakevec[(id % per_batch) * dim + j]);
|
||||
@ -564,9 +562,6 @@ TEST_P(GrowingIndexTest, GetVector) {
|
||||
}
|
||||
} else if (is_sparse) {
|
||||
// GetVector for VECTOR_SPARSE_U32_F32
|
||||
int64_t per_batch = 5000;
|
||||
int64_t n_batch = 20;
|
||||
int64_t dim = 128;
|
||||
for (int64_t i = 0; i < n_batch; i++) {
|
||||
auto dataset = DataGen(schema, per_batch);
|
||||
auto fakevec = dataset.get_col<
|
||||
|
||||
@ -51,7 +51,10 @@ TestVecIndex() {
|
||||
ok = google::protobuf::TextFormat::PrintToString(index_params,
|
||||
&index_params_str);
|
||||
assert(ok);
|
||||
auto dataset = GenFieldData(NB, metric_type, TraitType::data_type);
|
||||
auto dataset =
|
||||
std::is_same_v<TraitType, milvus::BinaryVector>
|
||||
? GenFieldData(NB, metric_type, TraitType::data_type, BINARY_DIM)
|
||||
: GenFieldData(NB, metric_type, TraitType::data_type);
|
||||
|
||||
CDataType dtype = TraitType::c_data_type;
|
||||
CIndex index;
|
||||
@ -65,7 +68,8 @@ TestVecIndex() {
|
||||
|
||||
if (std::is_same_v<TraitType, milvus::BinaryVector>) {
|
||||
auto xb_data = dataset.template get_col<uint8_t>(milvus::FieldId(100));
|
||||
status = BuildBinaryVecIndex(index, NB * DIM / 8, xb_data.data());
|
||||
status =
|
||||
BuildBinaryVecIndex(index, NB * BINARY_DIM / 8, xb_data.data());
|
||||
} else if (std::is_same_v<TraitType, milvus::SparseFloatVector>) {
|
||||
auto xb_data = dataset.template get_col<
|
||||
knowhere::sparse::SparseRow<milvus::SparseValueType>>(
|
||||
|
||||
@ -76,6 +76,11 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> {
|
||||
};
|
||||
|
||||
vec_field_data_type = index_to_vec_type[index_type];
|
||||
|
||||
// Set correct dimension for binary vectors
|
||||
if (vec_field_data_type == DataType::VECTOR_BINARY) {
|
||||
config["dim"] = std::to_string(BINARY_DIM);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -128,9 +133,10 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||
vec_field_data_type, config, file_manager_context);
|
||||
knowhere::DataSetPtr xb_dataset;
|
||||
if (vec_field_data_type == DataType::VECTOR_BINARY) {
|
||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
auto dataset =
|
||||
GenFieldData(NB, metric_type, vec_field_data_type, BINARY_DIM);
|
||||
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
|
||||
xb_dataset = knowhere::GenDataSet(NB, BINARY_DIM, bin_vecs.data());
|
||||
ASSERT_NO_THROW(index->Build(xb_dataset));
|
||||
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_U32_F32) {
|
||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
@ -161,7 +167,9 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||
vec_field_data_type, config, file_manager_context);
|
||||
auto vec_index =
|
||||
static_cast<milvus::indexbuilder::VecIndexCreator*>(copy_index.get());
|
||||
if (vec_field_data_type != DataType::VECTOR_SPARSE_U32_F32) {
|
||||
if (vec_field_data_type == DataType::VECTOR_BINARY) {
|
||||
ASSERT_EQ(vec_index->dim(), BINARY_DIM);
|
||||
} else if (vec_field_data_type != DataType::VECTOR_SPARSE_U32_F32) {
|
||||
ASSERT_EQ(vec_index->dim(), DIM);
|
||||
}
|
||||
|
||||
@ -191,12 +199,14 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
|
||||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
} else {
|
||||
auto nb_for_nq = NQ + query_offset;
|
||||
auto dataset =
|
||||
GenFieldData(nb_for_nq, metric_type, DataType::VECTOR_BINARY);
|
||||
auto dataset = GenFieldData(
|
||||
nb_for_nq, metric_type, DataType::VECTOR_BINARY, BINARY_DIM);
|
||||
auto xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
// offset of binary vector is 8-aligned bit-wise representation.
|
||||
auto xq_dataset = knowhere::GenDataSet(
|
||||
NQ, DIM, xb_bin_data.data() + ((DIM + 7) / 8) * query_offset);
|
||||
NQ,
|
||||
BINARY_DIM,
|
||||
xb_bin_data.data() + ((BINARY_DIM + 7) / 8) * query_offset);
|
||||
result = vec_index->Query(xq_dataset, search_info, nullptr);
|
||||
}
|
||||
|
||||
|
||||
@ -339,13 +339,17 @@ class IndexTest : public ::testing::TestWithParam<Param> {
|
||||
vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
|
||||
}
|
||||
|
||||
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
auto dataset =
|
||||
is_binary
|
||||
? GenFieldData(NB, metric_type, vec_field_data_type, BINARY_DIM)
|
||||
: GenFieldData(NB, metric_type, vec_field_data_type);
|
||||
if (is_binary) {
|
||||
// binary vector
|
||||
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
|
||||
xb_dataset = knowhere::GenDataSet(NB, DIM, xb_bin_data.data());
|
||||
xb_dataset =
|
||||
knowhere::GenDataSet(NB, BINARY_DIM, xb_bin_data.data());
|
||||
xq_dataset = knowhere::GenDataSet(
|
||||
NQ, DIM, xb_bin_data.data() + DIM * query_offset);
|
||||
NQ, BINARY_DIM, xb_bin_data.data() + BINARY_DIM * query_offset);
|
||||
} else if (is_sparse) {
|
||||
// sparse vector
|
||||
xb_sparse_data = dataset.get_col<
|
||||
@ -415,7 +419,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
TEST(Indexing, Iterator) {
|
||||
constexpr int N = 10240;
|
||||
constexpr int TOPK = 100;
|
||||
constexpr int dim = 128;
|
||||
constexpr int dim = 4;
|
||||
|
||||
auto [raw_data, timestamps, uids] = generate_data<dim>(N);
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
@ -467,6 +471,7 @@ TEST(Indexing, Iterator) {
|
||||
}
|
||||
|
||||
TEST_P(IndexTest, BuildAndQuery) {
|
||||
auto dim = is_binary ? BINARY_DIM : DIM;
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.index_type = index_type;
|
||||
create_index_info.metric_type = metric_type;
|
||||
@ -506,7 +511,7 @@ TEST_P(IndexTest, BuildAndQuery) {
|
||||
ASSERT_NO_THROW(vec_index->Load(milvus::tracer::TraceContext{}, load_conf));
|
||||
EXPECT_EQ(vec_index->Count(), NB);
|
||||
if (!is_sparse) {
|
||||
EXPECT_EQ(vec_index->GetDim(), DIM);
|
||||
EXPECT_EQ(vec_index->GetDim(), dim);
|
||||
}
|
||||
|
||||
milvus::SearchInfo search_info;
|
||||
@ -535,6 +540,7 @@ TEST_P(IndexTest, BuildAndQuery) {
|
||||
}
|
||||
|
||||
TEST_P(IndexTest, Mmap) {
|
||||
auto dim = is_binary ? BINARY_DIM : DIM;
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.index_type = index_type;
|
||||
create_index_info.metric_type = metric_type;
|
||||
@ -577,7 +583,7 @@ TEST_P(IndexTest, Mmap) {
|
||||
milvus::proto::common::LoadPriority::HIGH;
|
||||
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
||||
EXPECT_EQ(vec_index->Count(), NB);
|
||||
EXPECT_EQ(vec_index->GetDim(), is_sparse ? kTestSparseDim : DIM);
|
||||
EXPECT_EQ(vec_index->GetDim(), is_sparse ? kTestSparseDim : dim);
|
||||
|
||||
milvus::SearchInfo search_info;
|
||||
search_info.topk_ = K;
|
||||
@ -597,6 +603,7 @@ TEST_P(IndexTest, Mmap) {
|
||||
}
|
||||
|
||||
TEST_P(IndexTest, GetVector) {
|
||||
auto dim = is_binary ? BINARY_DIM : DIM;
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.index_type = index_type;
|
||||
create_index_info.metric_type = metric_type;
|
||||
@ -634,7 +641,7 @@ TEST_P(IndexTest, GetVector) {
|
||||
milvus::proto::common::LoadPriority::HIGH;
|
||||
vec_index->Load(milvus::tracer::TraceContext{}, load_conf);
|
||||
if (!is_sparse) {
|
||||
EXPECT_EQ(vec_index->GetDim(), DIM);
|
||||
EXPECT_EQ(vec_index->GetDim(), dim);
|
||||
}
|
||||
EXPECT_EQ(vec_index->Count(), NB);
|
||||
|
||||
@ -646,7 +653,7 @@ TEST_P(IndexTest, GetVector) {
|
||||
if (is_binary) {
|
||||
auto results = vec_index->GetVector(ids_ds);
|
||||
EXPECT_EQ(results.size(), xb_bin_data.size());
|
||||
const auto data_bytes = DIM / 8;
|
||||
const auto data_bytes = dim / 8;
|
||||
for (size_t i = 0; i < NB; ++i) {
|
||||
auto id = ids_ds->GetIds()[i];
|
||||
for (size_t j = 0; j < data_bytes; ++j) {
|
||||
@ -672,8 +679,8 @@ TEST_P(IndexTest, GetVector) {
|
||||
ASSERT_EQ(result_vectors.size(), xb_data.size());
|
||||
for (size_t i = 0; i < NB; ++i) {
|
||||
auto id = ids_ds->GetIds()[i];
|
||||
for (size_t j = 0; j < DIM; ++j) {
|
||||
ASSERT_EQ(result_vectors[i * DIM + j], xb_data[id * DIM + j]);
|
||||
for (size_t j = 0; j < dim; ++j) {
|
||||
ASSERT_EQ(result_vectors[i * dim + j], xb_data[id * dim + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -31,8 +31,8 @@ using namespace milvus::segcore;
|
||||
|
||||
using milvus::segcore::LoadIndexInfo;
|
||||
|
||||
const int64_t ROW_COUNT = 10 * 1000;
|
||||
const int64_t BIAS = 4200;
|
||||
const int64_t ROW_COUNT = 2 * 1000;
|
||||
const int64_t BIAS = 1000;
|
||||
|
||||
using Param = std::string;
|
||||
class SealedTest : public ::testing::TestWithParam<Param> {
|
||||
@ -44,7 +44,7 @@ class SealedTest : public ::testing::TestWithParam<Param> {
|
||||
|
||||
TEST(Sealed, without_predicate) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto fake_id = schema->AddDebugField(
|
||||
@ -68,7 +68,7 @@ TEST(Sealed, without_predicate) {
|
||||
|
||||
auto dataset = DataGen(schema, N);
|
||||
auto vec_col = dataset.get_col<float>(fake_id);
|
||||
for (int64_t i = 0; i < 1000 * dim; ++i) {
|
||||
for (int64_t i = 0; i < (ROW_COUNT / 2) * dim; ++i) {
|
||||
vec_col.push_back(0);
|
||||
}
|
||||
auto query_ptr = vec_col.data() + BIAS * dim;
|
||||
@ -85,7 +85,7 @@ TEST(Sealed, without_predicate) {
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp timestamp = 1000000;
|
||||
@ -111,7 +111,8 @@ TEST(Sealed, without_predicate) {
|
||||
|
||||
auto search_conf = knowhere::Json{{knowhere::indexparam::NPROBE, 10}};
|
||||
|
||||
auto database = knowhere::GenDataSet(N, dim, vec_col.data() + 1000 * dim);
|
||||
auto database =
|
||||
knowhere::GenDataSet(N, dim, vec_col.data() + (ROW_COUNT / 2) * dim);
|
||||
indexing->BuildWithDataset(database, build_conf);
|
||||
|
||||
auto vec_index = dynamic_cast<milvus::index::VectorIndex*>(indexing.get());
|
||||
@ -155,7 +156,7 @@ TEST(Sealed, without_predicate) {
|
||||
|
||||
TEST(Sealed, without_search_ef_less_than_limit) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto fake_id = schema->AddDebugField(
|
||||
@ -186,7 +187,7 @@ TEST(Sealed, without_search_ef_less_than_limit) {
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp timestamp = 1000000;
|
||||
@ -238,7 +239,7 @@ TEST(Sealed, without_search_ef_less_than_limit) {
|
||||
|
||||
TEST(Sealed, with_predicate) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto fake_id = schema->AddDebugField(
|
||||
@ -256,10 +257,10 @@ TEST(Sealed, with_predicate) {
|
||||
lower_inclusive: true,
|
||||
upper_inclusive: false,
|
||||
lower_value: <
|
||||
int64_val: 4200
|
||||
int64_val: 1000
|
||||
>
|
||||
upper_value: <
|
||||
int64_val: 4205
|
||||
int64_val: 1005
|
||||
>
|
||||
>
|
||||
>
|
||||
@ -290,7 +291,7 @@ TEST(Sealed, with_predicate) {
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp timestamp = 1000000;
|
||||
@ -353,7 +354,7 @@ TEST(Sealed, with_predicate) {
|
||||
|
||||
TEST(Sealed, with_predicate_filter_all) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
// auto metric_type = MetricType::METRIC_L2;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -398,7 +399,7 @@ TEST(Sealed, with_predicate_filter_all) {
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr);
|
||||
CreatePlaceholderGroupFromBlob(num_queries, dim, query_ptr);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
Timestamp timestamp = 1000000;
|
||||
@ -482,7 +483,7 @@ TEST(Sealed, with_predicate_filter_all) {
|
||||
}
|
||||
|
||||
TEST(Sealed, LoadFieldData) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -553,7 +554,7 @@ TEST(Sealed, LoadFieldData) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -657,7 +658,7 @@ TEST(Sealed, LoadFieldData) {
|
||||
}
|
||||
|
||||
TEST(Sealed, ClearData) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -714,7 +715,7 @@ TEST(Sealed, ClearData) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -762,7 +763,7 @@ TEST(Sealed, ClearData) {
|
||||
}
|
||||
|
||||
TEST(Sealed, LoadFieldDataMmap) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -819,7 +820,7 @@ TEST(Sealed, LoadFieldDataMmap) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -882,7 +883,7 @@ TEST(Sealed, LoadPkScalarIndex) {
|
||||
}
|
||||
|
||||
TEST(Sealed, LoadScalarIndex) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
size_t N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
@ -931,7 +932,7 @@ TEST(Sealed, LoadScalarIndex) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -985,7 +986,7 @@ TEST(Sealed, LoadScalarIndex) {
|
||||
}
|
||||
|
||||
TEST(Sealed, Delete) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto N = 10;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -1033,7 +1034,7 @@ TEST(Sealed, Delete) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -1067,7 +1068,7 @@ TEST(Sealed, Delete) {
|
||||
}
|
||||
|
||||
TEST(Sealed, OverlapDelete) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto N = 10;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -1115,7 +1116,7 @@ TEST(Sealed, OverlapDelete) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -1193,7 +1194,7 @@ GenQueryVecs(int N, int dim) {
|
||||
|
||||
TEST(Sealed, BF) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto dim = 4;
|
||||
auto metric_type = "L2";
|
||||
auto fake_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
@ -1255,7 +1256,7 @@ TEST(Sealed, BF) {
|
||||
|
||||
TEST(Sealed, BF_Overflow) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto dim = 4;
|
||||
auto metric_type = "L2";
|
||||
auto fake_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
@ -1400,7 +1401,7 @@ TEST(Sealed, RealCount) {
|
||||
}
|
||||
|
||||
TEST(Sealed, GetVector) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
@ -1448,7 +1449,7 @@ TEST(Sealed, GetVector) {
|
||||
}
|
||||
|
||||
TEST(Sealed, LoadArrayFieldData) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto N = 10;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -1490,7 +1491,7 @@ TEST(Sealed, LoadArrayFieldData) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -1505,7 +1506,7 @@ TEST(Sealed, LoadArrayFieldData) {
|
||||
}
|
||||
|
||||
TEST(Sealed, LoadArrayFieldDataWithMMap) {
|
||||
auto dim = 16;
|
||||
auto dim = 4;
|
||||
auto topK = 5;
|
||||
auto N = ROW_COUNT;
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
@ -1547,7 +1548,7 @@ TEST(Sealed, LoadArrayFieldDataWithMMap) {
|
||||
auto plan =
|
||||
CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
auto num_queries = 5;
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
||||
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
@ -1557,7 +1558,7 @@ TEST(Sealed, LoadArrayFieldDataWithMMap) {
|
||||
|
||||
TEST(Sealed, SkipIndexSkipUnaryRange) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto dim = 4;
|
||||
auto metrics_type = "L2";
|
||||
auto fake_vec_fid = schema->AddDebugField(
|
||||
"fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type);
|
||||
@ -1706,7 +1707,7 @@ TEST(Sealed, SkipIndexSkipUnaryRange) {
|
||||
|
||||
TEST(Sealed, SkipIndexSkipBinaryRange) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto dim = 4;
|
||||
auto metrics_type = "L2";
|
||||
auto fake_vec_fid = schema->AddDebugField(
|
||||
"fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type);
|
||||
@ -1749,7 +1750,7 @@ TEST(Sealed, SkipIndexSkipBinaryRange) {
|
||||
|
||||
TEST(Sealed, SkipIndexSkipUnaryRangeNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto dim = 4;
|
||||
auto metrics_type = "L2";
|
||||
auto fake_vec_fid = schema->AddDebugField(
|
||||
"fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type);
|
||||
@ -1821,7 +1822,7 @@ TEST(Sealed, SkipIndexSkipUnaryRangeNullable) {
|
||||
|
||||
TEST(Sealed, SkipIndexSkipBinaryRangeNullable) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto dim = 4;
|
||||
auto metrics_type = "L2";
|
||||
auto fake_vec_fid = schema->AddDebugField(
|
||||
"fakeVec", DataType::VECTOR_FLOAT, dim, metrics_type);
|
||||
@ -1864,7 +1865,7 @@ TEST(Sealed, SkipIndexSkipBinaryRangeNullable) {
|
||||
|
||||
TEST(Sealed, SkipIndexSkipStringRange) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto dim = 128;
|
||||
auto dim = 4;
|
||||
auto metrics_type = "L2";
|
||||
auto pk_fid = schema->AddDebugField("pk", DataType::INT64);
|
||||
auto string_fid = schema->AddDebugField("string_field", DataType::VARCHAR);
|
||||
@ -1927,6 +1928,7 @@ TEST(Sealed, SkipIndexSkipStringRange) {
|
||||
TEST(Sealed, QueryAllFields) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto dim = 4;
|
||||
auto bool_field = schema->AddDebugField("bool", DataType::BOOL);
|
||||
auto int8_field = schema->AddDebugField("int8", DataType::INT8);
|
||||
auto int16_field = schema->AddDebugField("int16", DataType::INT16);
|
||||
@ -1949,20 +1951,21 @@ TEST(Sealed, QueryAllFields) {
|
||||
auto float_array_field =
|
||||
schema->AddDebugField("float_array", DataType::ARRAY, DataType::FLOAT);
|
||||
auto vec = schema->AddDebugField(
|
||||
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
|
||||
"embeddings", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
auto float16_vec = schema->AddDebugField(
|
||||
"float16_vec", DataType::VECTOR_FLOAT16, 128, metric_type);
|
||||
"float16_vec", DataType::VECTOR_FLOAT16, dim, metric_type);
|
||||
auto bfloat16_vec = schema->AddDebugField(
|
||||
"bfloat16_vec", DataType::VECTOR_BFLOAT16, 128, metric_type);
|
||||
"bfloat16_vec", DataType::VECTOR_BFLOAT16, dim, metric_type);
|
||||
auto int8_vec = schema->AddDebugField(
|
||||
"int8_vec", DataType::VECTOR_INT8, 128, metric_type);
|
||||
"int8_vec", DataType::VECTOR_INT8, dim, metric_type);
|
||||
schema->set_primary_field_id(int64_field);
|
||||
|
||||
std::map<std::string, std::string> index_params = {
|
||||
{"index_type", "IVF_FLAT"},
|
||||
{"metric_type", metric_type},
|
||||
{"nlist", "128"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||
std::map<std::string, std::string> type_params = {
|
||||
{"dim", std::to_string(dim)}};
|
||||
FieldIndexMeta fieldIndexMeta(
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {{vec, fieldIndexMeta}};
|
||||
@ -1973,7 +1976,6 @@ TEST(Sealed, QueryAllFields) {
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
|
||||
int64_t dataset_size = 1000;
|
||||
int64_t dim = 128;
|
||||
auto dataset = DataGen(schema, dataset_size);
|
||||
segment_sealed = CreateSealedWithFieldDataLoaded(schema, dataset);
|
||||
segment = dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
@ -2094,6 +2096,7 @@ TEST(Sealed, QueryAllFields) {
|
||||
TEST(Sealed, QueryAllNullableFields) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto metric_type = knowhere::metric::L2;
|
||||
auto dim = 4;
|
||||
auto bool_field = schema->AddDebugField("bool", DataType::BOOL, true);
|
||||
auto int8_field = schema->AddDebugField("int8", DataType::INT8, true);
|
||||
auto int16_field = schema->AddDebugField("int16", DataType::INT16, true);
|
||||
@ -2117,14 +2120,15 @@ TEST(Sealed, QueryAllNullableFields) {
|
||||
auto float_array_field = schema->AddDebugField(
|
||||
"float_array", DataType::ARRAY, DataType::FLOAT, true);
|
||||
auto vec = schema->AddDebugField(
|
||||
"embeddings", DataType::VECTOR_FLOAT, 128, metric_type);
|
||||
"embeddings", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
schema->set_primary_field_id(int64_field);
|
||||
|
||||
std::map<std::string, std::string> index_params = {
|
||||
{"index_type", "IVF_FLAT"},
|
||||
{"metric_type", metric_type},
|
||||
{"nlist", "128"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||
std::map<std::string, std::string> type_params = {
|
||||
{"dim", std::to_string(dim)}};
|
||||
FieldIndexMeta fieldIndexMeta(
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {{vec, fieldIndexMeta}};
|
||||
@ -2135,7 +2139,6 @@ TEST(Sealed, QueryAllNullableFields) {
|
||||
dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
|
||||
int64_t dataset_size = 1000;
|
||||
int64_t dim = 128;
|
||||
auto dataset = DataGen(schema, dataset_size);
|
||||
segment_sealed = CreateSealedWithFieldDataLoaded(schema, dataset);
|
||||
segment = dynamic_cast<ChunkedSegmentSealedImpl*>(segment_sealed.get());
|
||||
@ -2278,9 +2281,10 @@ TEST(Sealed, SearchSortedPk) {
|
||||
TEST(Sealed, QueryVectorArrayAllFields) {
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto metric_type = knowhere::metric::MAX_SIM;
|
||||
int64_t dim = 4;
|
||||
auto int64_field = schema->AddDebugField("int64", DataType::INT64);
|
||||
auto array_vec = schema->AddDebugVectorArrayField(
|
||||
"array_vec", DataType::VECTOR_FLOAT, 128, metric_type);
|
||||
"array_vec", DataType::VECTOR_FLOAT, dim, metric_type);
|
||||
schema->set_primary_field_id(int64_field);
|
||||
|
||||
std::map<FieldId, FieldIndexMeta> filedMap{};
|
||||
@ -2288,7 +2292,6 @@ TEST(Sealed, QueryVectorArrayAllFields) {
|
||||
std::make_shared<CollectionIndexMeta>(100000, std::move(filedMap));
|
||||
|
||||
int64_t dataset_size = 1000;
|
||||
int64_t dim = 128;
|
||||
auto dataset = DataGen(schema, dataset_size);
|
||||
auto segment_sealed = CreateSealedWithFieldDataLoaded(schema, dataset);
|
||||
auto segment =
|
||||
@ -2336,7 +2339,7 @@ TEST(Sealed, SearchVectorArray) {
|
||||
int64_t index_build_id = 4000;
|
||||
int64_t index_version = 4000;
|
||||
int64_t index_id = 5000;
|
||||
int64_t dim = 32;
|
||||
int64_t dim = 4;
|
||||
|
||||
auto schema = std::make_shared<Schema>();
|
||||
auto metric_type = knowhere::metric::MAX_SIM;
|
||||
@ -2359,8 +2362,8 @@ TEST(Sealed, SearchVectorArray) {
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(100000, std::move(filedMap));
|
||||
|
||||
int64_t dataset_size = 1000;
|
||||
auto emb_list_len = 10;
|
||||
int64_t dataset_size = 100;
|
||||
auto emb_list_len = 2;
|
||||
auto dataset = DataGen(schema, dataset_size, 42, 0, 1, emb_list_len);
|
||||
|
||||
// create field data
|
||||
@ -2493,57 +2496,6 @@ TEST(Sealed, SearchVectorArray) {
|
||||
std::cout << sr_parsed.dump(1) << std::endl;
|
||||
}
|
||||
|
||||
// // brute force search with iterative filter
|
||||
// {
|
||||
// auto [min, max] =
|
||||
// std::minmax_element(int_values.begin(), int_values.end());
|
||||
// auto min_val = *min;
|
||||
// auto max_val = *max;
|
||||
|
||||
// auto raw_plan = fmt::format(R"(vector_anns: <
|
||||
// field_id: 101
|
||||
// predicates: <
|
||||
// binary_range_expr: <
|
||||
// column_info: <
|
||||
// field_id: 100
|
||||
// data_type: Int64
|
||||
// >
|
||||
// lower_inclusive: true
|
||||
// upper_inclusive: true
|
||||
// lower_value: <
|
||||
// int64_val: {}
|
||||
// >
|
||||
// upper_value: <
|
||||
// int64_val: {}
|
||||
// >
|
||||
// >
|
||||
// >
|
||||
// query_info: <
|
||||
// topk: 5
|
||||
// round_decimal: 3
|
||||
// metric_type: "MAX_SIM"
|
||||
// hints: "iterative_filter"
|
||||
// search_params: "{{\"nprobe\": 10}}"
|
||||
// >
|
||||
// placeholder_tag: "$0"
|
||||
// >)",
|
||||
// min_val,
|
||||
// max_val);
|
||||
// auto plan_str = translate_text_plan_to_binary_plan(raw_plan.c_str());
|
||||
// auto plan =
|
||||
// CreateSearchPlanByExpr(schema, plan_str.data(), plan_str.size());
|
||||
// auto ph_group_raw = CreatePlaceholderGroupFromBlob<EmbListFloatVector>(
|
||||
// vec_num, dim, query_vec.data(), query_vec_lims);
|
||||
// auto ph_group =
|
||||
// ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
// Timestamp timestamp = 1000000;
|
||||
// std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
|
||||
|
||||
// auto sr = sealed_segment->Search(plan.get(), ph_group.get(), timestamp);
|
||||
// auto sr_parsed = SearchResultToJson(*sr);
|
||||
// std::cout << sr_parsed.dump(1) << std::endl;
|
||||
// }
|
||||
|
||||
// search with index
|
||||
{
|
||||
LoadIndexInfo load_info;
|
||||
|
||||
@ -165,7 +165,7 @@ get_default_schema_config() {
|
||||
data_type: %1%
|
||||
type_params: <
|
||||
key: "dim"
|
||||
value: "16"
|
||||
value: "4"
|
||||
>
|
||||
index_params: <
|
||||
key: "metric_type"
|
||||
@ -191,7 +191,7 @@ get_default_schema_config_nullable() {
|
||||
data_type: FloatVector
|
||||
type_params: <
|
||||
key: "dim"
|
||||
value: "16"
|
||||
value: "4"
|
||||
>
|
||||
index_params: <
|
||||
key: "metric_type"
|
||||
|
||||
@ -30,7 +30,9 @@
|
||||
#include "storage/Types.h"
|
||||
#include "knowhere/comp/index_param.h"
|
||||
|
||||
constexpr int64_t DIM = 16;
|
||||
constexpr int64_t DIM = 4;
|
||||
constexpr int64_t BINARY_DIM =
|
||||
8; // Binary vectors need dim to be multiple of 8
|
||||
constexpr int64_t NQ = 10;
|
||||
constexpr int64_t K = 4;
|
||||
|
||||
@ -76,13 +78,13 @@ generate_build_conf(const milvus::IndexType& index_type,
|
||||
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
|
||||
return knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{knowhere::meta::DIM, std::to_string(BINARY_DIM)},
|
||||
{knowhere::indexparam::NLIST, "16"},
|
||||
};
|
||||
} else if (index_type == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) {
|
||||
return knowhere::Json{
|
||||
{knowhere::meta::METRIC_TYPE, metric_type},
|
||||
{knowhere::meta::DIM, std::to_string(DIM)},
|
||||
{knowhere::meta::DIM, std::to_string(BINARY_DIM)},
|
||||
};
|
||||
} else if (index_type == knowhere::IndexEnum::INDEX_HNSW) {
|
||||
return knowhere::Json{
|
||||
|
||||
@ -52,7 +52,7 @@ using namespace milvus;
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus::storage;
|
||||
|
||||
const int64_t DIM = 32;
|
||||
const int64_t DIM = 4;
|
||||
|
||||
SchemaPtr
|
||||
GenVectorArrayTestSchema() {
|
||||
@ -170,12 +170,12 @@ class TestVectorArrayStorageV2 : public testing::Test {
|
||||
arrow::default_memory_pool(), value_builder);
|
||||
|
||||
for (int row = 0; row < test_data_count_; row++) {
|
||||
// Each row contains 10 vectors of dimension DIM
|
||||
// Each row contains 3 vectors of dimension DIM
|
||||
auto status = list_builder->Append();
|
||||
EXPECT_TRUE(status.ok());
|
||||
|
||||
// Generate 10 vectors for this row
|
||||
auto data = generate_float_vector(10, DIM);
|
||||
// Generate 3 vectors for this row
|
||||
auto data = generate_float_vector(3, DIM);
|
||||
auto float_builder =
|
||||
std::static_pointer_cast<arrow::FloatBuilder>(
|
||||
value_builder);
|
||||
@ -321,8 +321,8 @@ TEST_F(TestVectorArrayStorageV2, BuildEmbListHNSWIndex) {
|
||||
auto vec_index =
|
||||
dynamic_cast<milvus::index::VectorIndex*>(emb_list_hnsw_index.get());
|
||||
|
||||
// Each row has 10 vectors, so total count should be rows * 10
|
||||
EXPECT_EQ(vec_index->Count(), test_data_count_ * chunk_num_ * 10);
|
||||
// Each row has 3 vectors, so total count should be rows * 3
|
||||
EXPECT_EQ(vec_index->Count(), test_data_count_ * chunk_num_ * 3);
|
||||
EXPECT_EQ(vec_index->GetDim(), DIM);
|
||||
|
||||
{
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user