milvus/internal/core/unittest/test_c_api.cpp
Alexander Guzhva c4b37fb285
enhance: Custom bitset and bitsetview prototypes (#30454)
Issue: #31285 

Basically, I've replaced `FixedVector<bool>` and `boost::dynamic_bitset`
with custom bitset and bitsetview in order to reduce the memory
bandwidth & increase performance for the filtering.

This PR is for internal use only. 

Current progress (numbers are for GCC 9.5.0 on Ubuntu 22.04 LTS;
clang-17 produces better performance numbers):
Baseline:
```
[ RUN      ] CApiTest.AssembeChunkPerfTest
start test
cost: 17903us
[       OK ] CApiTest.AssembeChunkPerfTest (183 ms)

[ RUN      ] Expr.TestMultiLogicalExprsOptimization
cost: 1391us
cost: 5us
cost: 4us
cost: 4us
cost: 6us
cost: 4us
cost: 4us
cost: 4us
cost: 4us
cost: 4us
143
cost: 10us
cost: 8us
cost: 10us
cost: 8us
cost: 8us
cost: 8us
cost: 8us
cost: 8us
cost: 8us
cost: 9us
8
/home/ubuntu/zilliz/milvus4/milvus/internal/core/unittest/test_expr.cpp:1561: Failure
Expected: (cost_op) < (cost_no_op), actual: 143 vs 8
[  FAILED  ] Expr.TestMultiLogicalExprsOptimization (7 ms)
[ RUN      ] Expr.TestExprs
start test
3cost: 889us
start test
10cost: 2us
start test
20cost: 2us
start test
30cost: 2us
start test
50cost: 3us
start test
100cost: 7us
start test
200cost: 16us
[       OK ] Expr.TestExprs (9 ms)

[ RUN      ] Expr.TestUnaryBenchTest
start test type:2
 cost: 124.8us
start test type:3
 cost: 163.1us
start test type:4
 cost: 275.9us
start test type:5
 cost: 590.9us
start test type:10
 cost: 62.7us
start test type:11
 cost: 65.9us
[       OK ] Expr.TestUnaryBenchTest (1153 ms)
[ RUN      ] Expr.TestBinaryRangeBenchTest
start test type:2
 cost: 151.4us
start test type:3
 cost: 198.4us
start test type:4
 cost: 361.9us
start test type:5
 cost: 753.9us
start test type:10
 cost: 64.6us
start test type:11
 cost: 62.2us
[       OK ] Expr.TestBinaryRangeBenchTest (1151 ms)
[ RUN      ] Expr.TestLogicalUnaryBenchTest
start test type:2
 cost: 121.14us
start test type:3
 cost: 156.84us
start test type:4
 cost: 249.76us
start test type:5
 cost: 534.44us
start test type:10
 cost: 82.2us
start test type:11
 cost: 83.52us
[       OK ] Expr.TestLogicalUnaryBenchTest (1202 ms)
[ RUN      ] Expr.TestBinaryLogicalBenchTest
start test type:2
 cost: 80.64us
start test type:3
 cost: 78.22us
start test type:4
 cost: 255.76us
start test type:5
 cost: 532.04us
start test type:10
 cost: 89.26us
start test type:11
 cost: 90us
[       OK ] Expr.TestBinaryLogicalBenchTest (1198 ms)
[ RUN      ] Expr.TestBinaryArithOpEvalRangeBenchExpr
start test type:2
 cost: 401.7us
start test type:3
 cost: 420.96us
start test type:4
 cost: 418.04us
start test type:5
 cost: 470.54us
start test type:10
 cost: 250.32us
start test type:11
 cost: 850.08us
[       OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1273 ms)
[ RUN      ] Expr.TestCompareExprBenchTest
start test type:2
 cost: 162us
start test type:3
 cost: 142us
start test type:4
 cost: 374us
start test type:5
 cost: 674us
start test type:10
 cost: 366us
start test type:11
 cost: 645us
[       OK ] Expr.TestCompareExprBenchTest (1214 ms)
[ RUN      ] Expr.TestRefactorExprs
start test
3cost: 1253us
start test
10cost: 1060us
start test
20cost: 681us
start test
30cost: 522us
start test
50cost: 511us
start test
100cost: 506us
start test
200cost: 497us
[       OK ] Expr.TestRefactorExprs (1142 ms)

```

Candidate:
```
[ RUN      ] CApiTest.AssembeChunkPerfTest
start test
cost: 6099us
[       OK ] CApiTest.AssembeChunkPerfTest (153 ms)

[ RUN      ] Expr.TestMultiLogicalExprsOptimization
cost: 42us
cost: 15us
cost: 15us
cost: 14us
cost: 15us
cost: 15us
cost: 15us
cost: 15us
cost: 15us
cost: 15us
17
cost: 41us
cost: 39us
cost: 33us
cost: 33us
cost: 33us
cost: 33us
cost: 34us
cost: 41us
cost: 34us
cost: 34us
35
[       OK ] Expr.TestMultiLogicalExprsOptimization (6 ms)
[ RUN      ] Expr.TestExprs
start test
3cost: 20us
start test
10cost: 2us
start test
20cost: 2us
start test
30cost: 2us
start test
50cost: 4us
start test
100cost: 8us
start test
200cost: 15us
[       OK ] Expr.TestExprs (8 ms)

[ RUN      ] Expr.TestUnaryBenchTest
start test type:2
 cost: 55.7us
start test type:3
 cost: 79.8us
start test type:4
 cost: 177.6us
start test type:5
 cost: 337.2us
start test type:10
 cost: 16.9us
start test type:11
 cost: 15.7us
[       OK ] Expr.TestUnaryBenchTest (1140 ms)
[ RUN      ] Expr.TestBinaryRangeBenchTest
start test type:2
 cost: 57.1us
start test type:3
 cost: 87us
start test type:4
 cost: 177.5us
start test type:5
 cost: 342.7us
start test type:10
 cost: 17.9us
start test type:11
 cost: 16.7us
[       OK ] Expr.TestBinaryRangeBenchTest (1152 ms)
[ RUN      ] Expr.TestLogicalUnaryBenchTest
start test type:2
 cost: 34.58us
start test type:3
 cost: 68.86us
start test type:4
 cost: 151.38us
start test type:5
 cost: 286.8us
start test type:10
 cost: 16.54us
start test type:11
 cost: 16.7us
[       OK ] Expr.TestLogicalUnaryBenchTest (1165 ms)
[ RUN      ] Expr.TestBinaryLogicalBenchTest
start test type:2
 cost: 20us
start test type:3
 cost: 17.1us
start test type:4
 cost: 154.12us
start test type:5
 cost: 286.1us
start test type:10
 cost: 19.6us
start test type:11
 cost: 19.24us
[       OK ] Expr.TestBinaryLogicalBenchTest (1188 ms)
[ RUN      ] Expr.TestBinaryArithOpEvalRangeBenchExpr
start test type:2
 cost: 125.7us
start test type:3
 cost: 111.34us
start test type:4
 cost: 148.02us
start test type:5
 cost: 306.7us
start test type:10
 cost: 149.3us
start test type:11
 cost: 282.94us
[       OK ] Expr.TestBinaryArithOpEvalRangeBenchExpr (1221 ms)
[ RUN      ] Expr.TestCompareExprBenchTest
start test type:2
 cost: 89us
start test type:3
 cost: 79us
start test type:4
 cost: 323us
start test type:5
 cost: 629us
start test type:10
 cost: 313us
start test type:11
 cost: 591us
[       OK ] Expr.TestCompareExprBenchTest (1228 ms)
[ RUN      ] Expr.TestRefactorExprs
start test
3cost: 874us
start test
10cost: 611us
start test
20cost: 290us
start test
30cost: 294us
start test
50cost: 272us
start test
100cost: 278us
start test
200cost: 279us
[       OK ] Expr.TestRefactorExprs (1149 ms)

```

Signed-off-by: Alexandr Guzhva <alexanderguzhva@gmail.com>
2024-03-24 21:49:07 +08:00

5275 lines
208 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <array>
#include <boost/format.hpp>
#include <chrono>
#include <iostream>
#include <memory>
#include <random>
#include <string>
#include <unordered_set>
#include "boost/container/vector.hpp"
#include "common/EasyAssert.h"
#include "common/LoadInfo.h"
#include "common/Types.h"
#include "common/type_c.h"
#include "index/IndexFactory.h"
#include "knowhere/comp/index_param.h"
#include "pb/plan.pb.h"
#include "query/ExprImpl.h"
#include "segcore/Collection.h"
#include "segcore/Reduce.h"
#include "segcore/reduce_c.h"
#include "segcore/segment_c.h"
#include "test_utils/DataGen.h"
#include "test_utils/PbHelper.h"
#include "test_utils/indexbuilder_test_utils.h"
#include "test_utils/storage_test_utils.h"
#include "query/generated/ExecExprVisitor.h"
#include "expr/ITypeExpr.h"
#include "plan/PlanNode.h"
#include "exec/expression/Expr.h"
namespace chrono = std::chrono;
using namespace milvus;
using namespace milvus::index;
using namespace milvus::segcore;
using namespace milvus::tracer;
using namespace knowhere;
using milvus::index::VectorIndex;
using milvus::segcore::LoadIndexInfo;
namespace {
// const int DIM = 16;
const int64_t ROW_COUNT = 10 * 1000;
const int64_t BIAS = 4200;
CStatus
CSearch(CSegmentInterface c_segment,
CSearchPlan c_plan,
CPlaceholderGroup c_placeholder_group,
uint64_t timestamp,
CSearchResult* result) {
return Search(
{}, c_segment, c_plan, c_placeholder_group, timestamp, result);
}
CStatus
CRetrieve(CSegmentInterface c_segment,
CRetrievePlan c_plan,
uint64_t timestamp,
CRetrieveResult* result) {
return Retrieve(
{}, c_segment, c_plan, timestamp, result, DEFAULT_MAX_OUTPUT_SIZE);
}
const char*
get_default_schema_config() {
static std::string conf = R"(name: "default-collection"
fields: <
fieldID: 100
name: "fakevec"
data_type: FloatVector
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "metric_type"
value: "L2"
>
>
fields: <
fieldID: 101
name: "age"
data_type: Int64
is_primary_key: true
>)";
static std::string fake_conf = "";
return conf.c_str();
}
const char*
get_float16_schema_config() {
static std::string conf = R"(name: "float16-collection"
fields: <
fieldID: 100
name: "fakevec"
data_type: Float16Vector
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "metric_type"
value: "L2"
>
>
fields: <
fieldID: 101
name: "age"
data_type: Int64
is_primary_key: true
>)";
static std::string fake_conf = "";
return conf.c_str();
}
const char*
get_bfloat16_schema_config() {
static std::string conf = R"(name: "bfloat16-collection"
fields: <
fieldID: 100
name: "fakevec"
data_type: BFloat16Vector
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "metric_type"
value: "L2"
>
>
fields: <
fieldID: 101
name: "age"
data_type: Int64
is_primary_key: true
>)";
static std::string fake_conf = "";
return conf.c_str();
}
const char*
get_default_index_meta() {
static std::string conf = R"(maxIndexRowCount: 1000
index_metas: <
fieldID: 100
collectionID: 1001
index_name: "test-index"
type_params: <
key: "dim"
value: "16"
>
index_params: <
key: "index_type"
value: "IVF_FLAT"
>
index_params: <
key: "metric_type"
value: "L2"
>
index_params: <
key: "nlist"
value: "128"
>
>)";
return conf.c_str();
}
auto
generate_data(int N) {
std::vector<char> raw_data;
std::vector<uint64_t> timestamps;
std::vector<int64_t> uids;
std::default_random_engine e(42);
std::normal_distribution<> dis(0.0, 1.0);
for (int i = 0; i < N; ++i) {
uids.push_back(10 * N + i);
timestamps.push_back(0);
float vec[DIM];
for (auto& x : vec) {
x = dis(e);
}
raw_data.insert(raw_data.end(),
(const char*)std::begin(vec),
(const char*)std::end(vec));
int age = e() % 100;
raw_data.insert(raw_data.end(),
(const char*)&age,
((const char*)&age) + sizeof(age));
}
return std::make_tuple(raw_data, timestamps, uids);
}
std::string
generate_max_float_query_data(int all_nq, int max_float_nq) {
assert(max_float_nq <= all_nq);
namespace ser = milvus::proto::common;
int dim = DIM;
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::FloatVector);
for (int i = 0; i < all_nq; ++i) {
std::vector<float> vec;
if (i < max_float_nq) {
for (int d = 0; d < dim; ++d) {
vec.push_back(std::numeric_limits<float>::max());
}
} else {
for (int d = 0; d < dim; ++d) {
vec.push_back(1);
}
}
value->add_values(vec.data(), vec.size() * sizeof(float));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
std::string
generate_query_data(int nq) {
namespace ser = milvus::proto::common;
std::default_random_engine e(67);
int dim = DIM;
std::normal_distribution<double> dis(0.0, 1.0);
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::FloatVector);
for (int i = 0; i < nq; ++i) {
std::vector<float> vec;
for (int d = 0; d < dim; ++d) {
vec.push_back(dis(e));
}
value->add_values(vec.data(), vec.size() * sizeof(float));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
std::string
generate_query_data_float16(int nq) {
namespace ser = milvus::proto::common;
std::default_random_engine e(67);
int dim = DIM;
std::normal_distribution<double> dis(0.0, 1.0);
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::Float16Vector);
for (int i = 0; i < nq; ++i) {
std::vector<float16> vec;
for (int d = 0; d < dim; ++d) {
vec.push_back(float16(dis(e)));
}
value->add_values(vec.data(), vec.size() * sizeof(float16));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
std::string
generate_query_data_bfloat16(int nq) {
namespace ser = milvus::proto::common;
std::default_random_engine e(67);
int dim = DIM;
std::normal_distribution<double> dis(0.0, 1.0);
ser::PlaceholderGroup raw_group;
auto value = raw_group.add_placeholders();
value->set_tag("$0");
value->set_type(ser::PlaceholderType::BFloat16Vector);
for (int i = 0; i < nq; ++i) {
std::vector<bfloat16> vec;
for (int d = 0; d < dim; ++d) {
vec.push_back(bfloat16(dis(e)));
}
value->add_values(vec.data(), vec.size() * sizeof(bfloat16));
}
auto blob = raw_group.SerializeAsString();
return blob;
}
// 创建枚举包含schema::DataType::BinaryVectorschema::DataType::FloatVector
enum VectorType {
BinaryVector = 0,
FloatVector = 1,
Float16Vector = 2,
BFloat16Vector = 3,
};
std::string
generate_collection_schema(std::string metric_type,
int dim,
VectorType vector_type) {
namespace schema = milvus::proto::schema;
schema::CollectionSchema collection_schema;
collection_schema.set_name("collection_test");
auto vec_field_schema = collection_schema.add_fields();
vec_field_schema->set_name("fakevec");
vec_field_schema->set_fieldid(100);
if (vector_type == VectorType::BinaryVector) {
vec_field_schema->set_data_type(schema::DataType::BinaryVector);
} else if (vector_type == VectorType::Float16Vector) {
vec_field_schema->set_data_type(schema::DataType::Float16Vector);
} else if (vector_type == VectorType::BFloat16Vector) {
vec_field_schema->set_data_type(schema::DataType::BFloat16Vector);
} else {
vec_field_schema->set_data_type(schema::DataType::FloatVector);
}
auto metric_type_param = vec_field_schema->add_index_params();
metric_type_param->set_key("metric_type");
metric_type_param->set_value(metric_type);
auto dim_param = vec_field_schema->add_type_params();
dim_param->set_key("dim");
dim_param->set_value(std::to_string(dim));
auto other_field_schema = collection_schema.add_fields();
other_field_schema->set_name("counter");
other_field_schema->set_fieldid(101);
other_field_schema->set_data_type(schema::DataType::Int64);
other_field_schema->set_is_primary_key(true);
auto other_field_schema2 = collection_schema.add_fields();
other_field_schema2->set_name("doubleField");
other_field_schema2->set_fieldid(102);
other_field_schema2->set_data_type(schema::DataType::Double);
std::string schema_string;
auto marshal = google::protobuf::TextFormat::PrintToString(
collection_schema, &schema_string);
assert(marshal);
return schema_string;
}
// VecIndexPtr
// generate_index(
// void* raw_data, knowhere::Config conf, int64_t dim, int64_t topK, int64_t N, knowhere::IndexType index_type) {
// auto indexing = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type);
//
// auto database = knowhere::GenDataset(N, dim, raw_data);
// indexing->Train(database, conf);
// indexing->AddWithoutIds(database, conf);
// EXPECT_EQ(indexing->Count(), N);
// EXPECT_EQ(indexing->Dim(), dim);
//
// EXPECT_EQ(indexing->Count(), N);
// EXPECT_EQ(indexing->Dim(), dim);
// return indexing;
//}
//} // namespace
IndexBasePtr
generate_index(void* raw_data,
DataType field_type,
MetricType metric_type,
IndexType index_type,
int64_t dim,
int64_t N) {
auto engine_version =
knowhere::Version::GetCurrentVersion().VersionNumber();
CreateIndexInfo create_index_info{
field_type, index_type, metric_type, engine_version};
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
create_index_info, milvus::storage::FileManagerContext());
auto database = knowhere::GenDataSet(N, dim, raw_data);
auto build_config = generate_build_conf(index_type, metric_type);
indexing->BuildWithDataset(database, build_config);
auto vec_indexing = dynamic_cast<VectorIndex*>(indexing.get());
EXPECT_EQ(vec_indexing->Count(), N);
EXPECT_EQ(vec_indexing->GetDim(), dim);
return indexing;
}
} // namespace
TEST(CApiTest, CollectionTest) {
auto collection = NewCollection(get_default_schema_config());
DeleteCollection(collection);
}
TEST(CApiTest, LoadInfoTest) {
auto load_info = std::make_shared<LoadFieldDataInfo>();
auto c_load_info = reinterpret_cast<CLoadFieldDataInfo*>(load_info.get());
AppendLoadFieldInfo(c_load_info, 100, 100);
EnableMmap(c_load_info, 100, true);
EXPECT_TRUE(load_info->field_infos.at(100).enable_mmap);
}
TEST(CApiTest, SetIndexMetaTest) {
auto collection = NewCollection(get_default_schema_config());
milvus::proto::segcore::CollectionIndexMeta indexMeta;
indexMeta.ParseFromString(get_default_index_meta());
char buffer[indexMeta.ByteSizeLong()];
indexMeta.SerializeToArray(buffer, indexMeta.ByteSizeLong());
SetIndexMeta(collection, buffer, indexMeta.ByteSizeLong());
DeleteCollection(collection);
}
TEST(CApiTest, GetCollectionNameTest) {
auto collection = NewCollection(get_default_schema_config());
auto name = GetCollectionName(collection);
ASSERT_EQ(strcmp(name, "default-collection"), 0);
DeleteCollection(collection);
free((void*)(name));
}
TEST(CApiTest, SegmentTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
CSegmentInterface a_segment;
status = NewSegment(collection, Invalid, -1, &a_segment);
ASSERT_NE(status.error_code, Success);
DeleteCollection(collection);
DeleteSegment(segment);
free((char*)status.error_msg);
}
TEST(CApiTest, CPlan) {
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection = NewCollection(schema_string.c_str());
// const char* dsl_string = R"(
// {
// "bool": {
// "vector": {
// "fakevec": {
// "metric_type": "L2",
// "params": {
// "nprobe": 10
// },
// "query": "$0",
// "topk": 10,
// "round_decimal": 3
// }
// }
// }
// })";
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::BinaryVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
void* plan = nullptr;
auto status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
int64_t field_id = -1;
status = GetFieldID(plan, &field_id);
ASSERT_EQ(status.error_code, Success);
auto col = static_cast<Collection*>(collection);
for (auto& [target_field_id, field_meta] :
col->get_schema()->get_fields()) {
if (field_meta.is_vector()) {
ASSERT_EQ(field_id, target_field_id.get());
}
}
ASSERT_NE(field_id, -1);
DeleteSearchPlan(plan);
DeleteCollection(collection);
}
TEST(CApiTest, CApiCPlan_float16) {
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, 16, VectorType::Float16Vector);
auto collection = NewCollection(schema_string.c_str());
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::Float16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
void* plan = nullptr;
auto status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
int64_t field_id = -1;
status = GetFieldID(plan, &field_id);
ASSERT_EQ(status.error_code, Success);
auto col = static_cast<Collection*>(collection);
for (auto& [target_field_id, field_meta] :
col->get_schema()->get_fields()) {
if (field_meta.is_vector()) {
ASSERT_EQ(field_id, target_field_id.get());
}
}
ASSERT_NE(field_id, -1);
DeleteSearchPlan(plan);
DeleteCollection(collection);
}
TEST(CApiTest, CApiCPlan_bfloat16) {
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, 16, VectorType::BFloat16Vector);
auto collection = NewCollection(schema_string.c_str());
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::BFloat16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
void* plan = nullptr;
auto status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
int64_t field_id = -1;
status = GetFieldID(plan, &field_id);
ASSERT_EQ(status.error_code, Success);
auto col = static_cast<Collection*>(collection);
for (auto& [target_field_id, field_meta] :
col->get_schema()->get_fields()) {
if (field_meta.is_vector()) {
ASSERT_EQ(field_id, target_field_id.get());
}
}
ASSERT_NE(field_id, -1);
DeleteSearchPlan(plan);
DeleteCollection(collection);
}
TEST(CApiTest, InsertTest) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, DeleteTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {0, 0, 0};
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps);
ASSERT_EQ(del_res.error_code, Success);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, MultiDeleteGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto insert_data = serialize(dataset.raw_);
// insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// delete data pks = {1}
std::vector<int64_t> delete_pks = {1};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(1, dataset.timestamps_[N - 1]);
offset = 0;
auto del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks = {1}
std::vector<proto::plan::GenericValue> retrive_pks;
{
proto::plan::GenericValue value;
value.set_int64_val(1);
retrive_pks.push_back(value);
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
auto max_ts = dataset.timestamps_[N - 1] + 10;
CRetrieveResult retrieve_result;
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrieveResult(&retrieve_result);
// retrieve pks = {2}
{
proto::plan::GenericValue value;
value.set_int64_val(2);
retrive_pks.push_back(value);
}
term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 1);
DeleteRetrieveResult(&retrieve_result);
// delete pks = {2}
delete_pks = {2};
ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
delete_data = serialize(ids.get());
delete_timestamps[0]++;
offset = 0;
del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {2}
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, MultiDeleteSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto segment_interface = reinterpret_cast<SegmentInterface*>(segment);
auto sealed_segment = dynamic_cast<SegmentSealed*>(segment_interface);
SealedLoadFieldData(dataset, *sealed_segment);
// delete data pks = {1}
std::vector<int64_t> delete_pks = {1};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(1, dataset.timestamps_[N - 1]);
auto offset = 0;
auto del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks = {1}
std::vector<proto::plan::GenericValue> retrive_pks;
{
proto::plan::GenericValue value;
value.set_int64_val(1);
retrive_pks.push_back(value);
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
auto max_ts = dataset.timestamps_[N - 1] + 10;
CRetrieveResult retrieve_result;
auto res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrieveResult(&retrieve_result);
// retrieve pks = {2}
{
proto::plan::GenericValue value;
value.set_int64_val(2);
retrive_pks.push_back(value);
}
term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_pks);
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 1);
DeleteRetrieveResult(&retrieve_result);
// delete pks = {2}
delete_pks = {2};
ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_pks.begin(),
delete_pks.end());
delete_data = serialize(ids.get());
delete_timestamps[0]++;
offset = 0;
del_res = Delete(segment,
offset,
1,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {2}
res = CRetrieve(segment, plan.get(), max_ts, &retrieve_result);
ASSERT_EQ(res.error_code, Success);
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto insert_data = serialize(dataset.raw_);
// first insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// second insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
PreInsert(segment, N, &offset);
res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// create retrieve plan pks in {1, 2, 3}
std::vector<proto::plan::GenericValue> retrive_row_ids;
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 6);
DeleteRetrieveResult(&retrieve_result);
// delete data pks = {1, 2, 3}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {1, 2, 3}
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
query_result = std::make_unique<proto::segcore::RetrieveResults>();
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 20;
auto dataset = DataGen(col->get_schema(), N, 42, 0, 2);
auto segment_interface = reinterpret_cast<SegmentInterface*>(segment);
auto sealed_segment = dynamic_cast<SegmentSealed*>(segment_interface);
SealedLoadFieldData(dataset, *sealed_segment);
std::vector<proto::plan::GenericValue> retrive_row_ids;
// create retrieve plan pks in {1, 2, 3}
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
auto res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 6);
DeleteRetrieveResult(&retrieve_result);
// delete data pks = {1, 2, 3}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// retrieve pks in {1, 2, 3}
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
query_result = std::make_unique<proto::segcore::RetrieveResults>();
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, InsertSamePkAfterDeleteOnGrowingSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
auto insert_data = serialize(dataset.raw_);
// first insert data
// insert data with pks = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} , timestamps = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// delete data pks = {1, 2, 3}, timestamps = {9, 9, 9}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// create retrieve plan pks in {1, 2, 3}, timestamp = 9
std::vector<proto::plan::GenericValue> retrive_row_ids;
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 0);
DeleteRetrieveResult(&retrieve_result);
// second insert data
// insert data with pks = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} , timestamps = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}
dataset = DataGen(col->get_schema(), N, 42, N);
insert_data = serialize(dataset.raw_);
PreInsert(segment, N, &offset);
res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
// retrieve pks in {1, 2, 3}, timestamp = 19
res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
query_result = std::make_unique<proto::segcore::RetrieveResults>();
suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 3);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, InsertSamePkAfterDeleteOnSealedSegment) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)collection;
int N = 10;
auto dataset = DataGen(col->get_schema(), N, 42, 0, 2);
// insert data with pks = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4} , timestamps = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
auto segment_interface = reinterpret_cast<SegmentInterface*>(segment);
auto sealed_segment = dynamic_cast<SegmentSealed*>(segment_interface);
SealedLoadFieldData(dataset, *sealed_segment);
// delete data pks = {1, 2, 3}, timestamps = {4, 4, 4}
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[4]);
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps.data());
ASSERT_EQ(del_res.error_code, Success);
// create retrieve plan pks in {1, 2, 3}, timestamp = 9
std::vector<proto::plan::GenericValue> retrive_row_ids;
{
for (auto v : {1, 2, 3}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
retrive_row_ids.push_back(val);
}
}
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
auto res = CRetrieve(
segment, plan.get(), dataset.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 4);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SearchTest) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
CSearchResult search_result2;
auto res2 =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result2);
ASSERT_EQ(res2.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteSearchResult(search_result2);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, SearchTestWithExpr) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
c_collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
CSearchResult search_result;
auto res = CSearch(segment,
plan,
placeholderGroup,
dataset.timestamps_[0],
&search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RetrieveTestWithExpr) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
// create retrieve plan "age in [0]"
std::vector<proto::plan::GenericValue> values;
{
for (auto v : {1, 0}) {
proto::plan::GenericValue val;
val.set_int64_val(v);
values.push_back(val);
}
}
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
FieldId(101), DataType::INT64, std::vector<std::string>()),
values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids{FieldId(100), FieldId(101)};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
auto res = CRetrieve(
segment, plan.get(), dataset.timestamps_[0], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetMemoryUsageInBytesTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto old_memory_usage_size = GetMemoryUsageInBytes(segment);
// std::cout << "old_memory_usage_size = " << old_memory_usage_size << std::endl;
ASSERT_EQ(old_memory_usage_size, 0);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetDeletedCountTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {0, 0, 0};
auto offset = 0;
auto del_res = Delete(segment,
offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps);
ASSERT_EQ(del_res.error_code, Success);
// TODO: assert(deleted_count == len(delete_row_ids))
auto deleted_count = GetDeletedCount(segment);
ASSERT_EQ(deleted_count, 0);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetRowCountTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
auto row_count = GetRowCount(segment);
ASSERT_EQ(row_count, N);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, GetRealCount) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(res.error_code, Success);
auto pks = dataset.get_col<int64_t>(schema->get_primary_field_id().value());
std::vector<int64_t> delete_row_ids(pks.begin(), pks.begin() + 3);
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(),
delete_row_ids.end());
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {dataset.timestamps_[N - 1] + 1,
dataset.timestamps_[N - 1] + 2,
dataset.timestamps_[N - 1] + 3};
auto del_offset = 0;
auto del_res = Delete(segment,
del_offset,
3,
delete_data.data(),
delete_data.size(),
delete_timestamps);
ASSERT_EQ(del_res.error_code, Success);
auto real_count = GetRealCount(segment);
ASSERT_EQ(real_count, N - delete_row_ids.size());
DeleteCollection(collection);
DeleteSegment(segment);
}
void
CheckSearchResultDuplicate(const std::vector<CSearchResult>& results) {
auto nq = ((SearchResult*)results[0])->total_nq_;
std::unordered_set<PkType> pk_set;
for (int qi = 0; qi < nq; qi++) {
pk_set.clear();
for (size_t i = 0; i < results.size(); i++) {
auto search_result = (SearchResult*)results[i];
ASSERT_EQ(nq, search_result->total_nq_);
auto topk_beg = search_result->topk_per_nq_prefix_sum_[qi];
auto topk_end = search_result->topk_per_nq_prefix_sum_[qi + 1];
for (size_t ki = topk_beg; ki < topk_end; ki++) {
ASSERT_NE(search_result->seg_offsets_[ki], INVALID_SEG_OFFSET);
auto ret = pk_set.insert(search_result->primary_keys_[ki]);
ASSERT_TRUE(ret.second);
}
}
}
}
TEST(CApiTest, ReduceNullResult) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
int num_queries = 10;
auto blob = generate_max_float_query_data(num_queries, num_queries / 2);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
{
auto slice_nqs = std::vector<int64_t>{10};
auto slice_topKs = std::vector<int64_t>{1};
std::vector<CSearchResult> results;
CSearchResult res;
status = CSearch(segment, plan, placeholderGroup, 1L << 63, &res);
ASSERT_EQ(status.error_code, Success);
results.push_back(res);
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
auto search_result = (SearchResult*)results[0];
auto size = search_result->result_offsets_.size();
EXPECT_EQ(size, num_queries / 2);
DeleteSearchResult(res);
DeleteSearchResultDataBlobs(cSearchResultData);
}
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, ReduceRemoveDuplicates) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
int N = 10000;
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(10);
query_info->set_round_decimal(3);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
int num_queries = 10;
int topK = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
{
auto slice_nqs = std::vector<int64_t>{num_queries / 2, num_queries / 2};
auto slice_topKs = std::vector<int64_t>{topK / 2, topK};
std::vector<CSearchResult> results;
CSearchResult res1, res2;
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res1);
ASSERT_EQ(status.error_code, Success);
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res2);
ASSERT_EQ(status.error_code, Success);
results.push_back(res1);
results.push_back(res2);
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
// TODO:: insert no duplicate pks and check reduce results
CheckSearchResultDuplicate(results);
DeleteSearchResult(res1);
DeleteSearchResult(res2);
DeleteSearchResultDataBlobs(cSearchResultData);
}
{
int nq1 = num_queries / 3;
int nq2 = num_queries / 3;
int nq3 = num_queries - nq1 - nq2;
auto slice_nqs = std::vector<int64_t>{nq1, nq2, nq3};
auto slice_topKs = std::vector<int64_t>{topK / 2, topK, topK};
std::vector<CSearchResult> results;
CSearchResult res1, res2, res3;
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res1);
ASSERT_EQ(status.error_code, Success);
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res2);
ASSERT_EQ(status.error_code, Success);
status = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[0], &res3);
ASSERT_EQ(status.error_code, Success);
results.push_back(res1);
results.push_back(res2);
results.push_back(res3);
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
// TODO:: insert no duplicate pks and check reduce results
CheckSearchResultDuplicate(results);
DeleteSearchResult(res1);
DeleteSearchResult(res2);
DeleteSearchResult(res3);
DeleteSearchResultDataBlobs(cSearchResultData);
}
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteCollection(collection);
DeleteSegment(segment);
}
void
testReduceSearchWithExpr(int N,
int topK,
int num_queries,
bool filter_all = false) {
std::cerr << "testReduceSearchWithExpr(" << N << ", " << topK << ", "
<< num_queries << ")" << std::endl;
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto schema = ((milvus::segcore::Collection*)collection)->get_schema();
auto dataset = DataGen(schema, N);
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
auto fmt = boost::format(R"(vector_anns: <
field_id: 100
query_info: <
topk: %1%
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0">
output_field_ids: 100)") %
topK;
// construct the predicate that filter out all data
if (filter_all) {
fmt = boost::format(R"(vector_anns: <
field_id: 100
predicates: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterThan
value: <
int64_val: %2%
>
>
>
query_info: <
topk: %1%
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0">
output_field_ids: 100)") %
topK % N;
}
auto serialized_expr_plan = fmt.str();
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
auto binary_plan =
translate_text_plan_to_binary_plan(serialized_expr_plan.data());
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
dataset.timestamps_.clear();
dataset.timestamps_.push_back(1);
std::vector<CSearchResult> results;
CSearchResult res1;
CSearchResult res2;
auto res = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[N - 1], &res1);
ASSERT_EQ(res.error_code, Success);
res = CSearch(
segment, plan, placeholderGroup, dataset.timestamps_[N - 1], &res2);
ASSERT_EQ(res.error_code, Success);
results.push_back(res1);
results.push_back(res2);
auto slice_nqs = std::vector<int64_t>{num_queries / 2, num_queries / 2};
if (num_queries == 1) {
slice_nqs = std::vector<int64_t>{num_queries};
}
auto slice_topKs = std::vector<int64_t>{topK / 2, topK};
if (topK == 1) {
slice_topKs = std::vector<int64_t>{topK, topK};
}
// 1. reduce
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
auto search_result_data_blobs =
reinterpret_cast<milvus::segcore::SearchResultDataBlobs*>(
cSearchResultData);
// check result
for (size_t i = 0; i < slice_nqs.size(); i++) {
milvus::proto::schema::SearchResultData search_result_data;
auto suc = search_result_data.ParseFromArray(
search_result_data_blobs->blobs[i].data(),
search_result_data_blobs->blobs[i].size());
ASSERT_TRUE(suc);
ASSERT_EQ(search_result_data.num_queries(), slice_nqs[i]);
ASSERT_EQ(search_result_data.top_k(), slice_topKs[i]);
ASSERT_EQ(search_result_data.ids().int_id().data_size(),
search_result_data.topks().at(0) * slice_nqs[i]);
ASSERT_EQ(search_result_data.scores().size(),
search_result_data.topks().at(0) * slice_nqs[i]);
// check real topks
ASSERT_EQ(search_result_data.topks().size(), slice_nqs[i]);
for (auto real_topk : search_result_data.topks()) {
ASSERT_LE(real_topk, slice_topKs[i]);
if (filter_all) {
ASSERT_EQ(real_topk, 0);
}
}
}
DeleteSearchResultDataBlobs(cSearchResultData);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(res1);
DeleteSearchResult(res2);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, ReduceSearchWithExpr) {
testReduceSearchWithExpr(2, 1, 1);
testReduceSearchWithExpr(2, 10, 10);
testReduceSearchWithExpr(100, 1, 1);
testReduceSearchWithExpr(100, 10, 10);
testReduceSearchWithExpr(10000, 1, 1);
testReduceSearchWithExpr(10000, 10, 10);
}
TEST(CApiTest, ReduceSearchWithExprFilterAll) {
testReduceSearchWithExpr(2, 1, 1, true);
testReduceSearchWithExpr(2, 10, 10, true);
}
TEST(CApiTest, LoadIndexInfo) {
// generator index
constexpr auto TOPK = 10;
auto N = 1024 * 10;
auto [raw_data, timestamps, uids] = generate_data(N);
auto indexing = knowhere::IndexFactory::Instance().Create<float>(
knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
knowhere::Version::GetCurrentVersion().VersionNumber());
auto conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4}};
auto database = knowhere::GenDataSet(N, DIM, raw_data.data());
indexing.Train(*database, conf);
indexing.Add(*database, conf);
EXPECT_EQ(indexing.Count(), N);
EXPECT_EQ(indexing.Dim(), DIM);
knowhere::BinarySet binary_set;
indexing.Serialize(binary_set);
CBinarySet c_binary_set = (CBinarySet)&binary_set;
void* c_load_index_info = nullptr;
auto status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_param_key1 = "index_type";
std::string index_param_value1 = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8;
status = AppendIndexParam(
c_load_index_info, index_param_key1.data(), index_param_value1.data());
std::string index_param_key2 = knowhere::meta::METRIC_TYPE;
std::string index_param_value2 = knowhere::metric::L2;
status = AppendIndexParam(
c_load_index_info, index_param_key2.data(), index_param_value2.data());
ASSERT_EQ(status.error_code, Success);
std::string field_name = "field0";
status = AppendFieldInfo(
c_load_index_info, 0, 0, 0, 0, CDataType::FloatVector, false, "");
ASSERT_EQ(status.error_code, Success);
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
status = AppendIndex(c_load_index_info, c_binary_set);
ASSERT_EQ(status.error_code, Success);
DeleteLoadIndexInfo(c_load_index_info);
}
TEST(CApiTest, LoadIndexSearch) {
// generator index
constexpr auto TOPK = 10;
auto N = 1024 * 10;
auto num_query = 100;
auto [raw_data, timestamps, uids] = generate_data(N);
auto indexing = knowhere::IndexFactory::Instance().Create<float>(
knowhere::IndexEnum::INDEX_FAISS_IVFSQ8,
knowhere::Version::GetCurrentVersion().VersionNumber());
auto conf =
knowhere::Json{{knowhere::meta::METRIC_TYPE, knowhere::metric::L2},
{knowhere::meta::DIM, DIM},
{knowhere::meta::TOPK, TOPK},
{knowhere::indexparam::NLIST, 100},
{knowhere::indexparam::NPROBE, 4}};
auto database = knowhere::GenDataSet(N, DIM, raw_data.data());
indexing.Train(*database, conf);
indexing.Add(*database, conf);
EXPECT_EQ(indexing.Count(), N);
EXPECT_EQ(indexing.Dim(), DIM);
// serializ index to binarySet
knowhere::BinarySet binary_set;
indexing.Serialize(binary_set);
// fill loadIndexInfo
milvus::segcore::LoadIndexInfo load_index_info;
auto& index_params = load_index_info.index_params;
index_params["index_type"] = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8;
load_index_info.index = std::make_unique<VectorMemIndex<float>>(
index_params["index_type"],
knowhere::metric::L2,
knowhere::Version::GetCurrentVersion().VersionNumber());
load_index_info.index->Load(binary_set);
// search
auto query_dataset =
knowhere::GenDataSet(num_query, DIM, raw_data.data() + BIAS * DIM);
auto result = indexing.Search(*query_dataset, conf, nullptr);
}
TEST(CApiTest, Indexing_Without_Predicate) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(5);
query_info->set_round_decimal(-1);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestmap = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestmap,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestmap,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_Without_Predicate) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_float_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = 1000 * 10;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
{
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
}
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_float_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = 1000 * 10;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(
vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_TRUE(res_before_load_index.error_code == Success)
<< res_before_load_index.error_msg;
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[offset]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 5;
int topK = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
std::vector<CSearchResult> results;
results.push_back(c_search_result_on_bigIndex);
auto slice_nqs = std::vector<int64_t>{num_queries};
auto slice_topKs = std::vector<int64_t>{topK};
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
// status = ReduceSearchResultsAndFillData(plan, results.data(), results.size());
// ASSERT_EQ(status.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
ASSERT_EQ(search_result_on_bigIndex->topk_per_nq_prefix_sum_.size(),
search_result_on_bigIndex->total_nq_ + 1);
auto offset = search_result_on_bigIndex->topk_per_nq_prefix_sum_[i];
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[i * TOPK]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
DeleteSearchResultDataBlobs(cSearchResultData);
}
TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::JACCARD, DIM, VectorType::BinaryVector);
auto collection =
NewCollection(schema_string.c_str(), knowhere::metric::JACCARD);
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<uint8_t>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM / 8;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
term_expr: <
column_info: <
field_id: 101
data_type: Int64
>
values: <
int64_val: 4200
>
values: <
int64_val: 4201
>
values: <
int64_val: 4202
>
values: <
int64_val: 4203
>
values: <
int64_val: 4204
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "JACCARD"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 5;
int topK = 5;
auto raw_group =
CreateBinaryPlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BINARY,
knowhere::metric::JACCARD,
IndexEnum::INDEX_FAISS_BIN_IVFFLAT,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::JACCARD;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BinaryVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
std::vector<CSearchResult> results;
results.push_back(c_search_result_on_bigIndex);
auto slice_nqs = std::vector<int64_t>{num_queries};
auto slice_topKs = std::vector<int64_t>{topK};
CSearchResultDataBlobs cSearchResultData;
status = ReduceSearchResultsAndFillData(&cSearchResultData,
plan,
results.data(),
results.size(),
slice_nqs.data(),
slice_topKs.data(),
slice_nqs.size());
ASSERT_EQ(status.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
ASSERT_EQ(search_result_on_bigIndex->topk_per_nq_prefix_sum_.size(),
search_result_on_bigIndex->total_nq_ + 1);
auto offset = search_result_on_bigIndex->topk_per_nq_prefix_sum_[i];
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
ASSERT_EQ(search_result_on_bigIndex->distances_[offset],
search_result_on_raw_index->distances_[i * TOPK]);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
DeleteSearchResultDataBlobs(cSearchResultData);
}
TEST(CApiTest, SealedSegmentTest) {
auto collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
int N = 1000;
std::default_random_engine e(67);
auto ages = std::vector<int64_t>(N);
for (auto& age : ages) {
age = e() % 2000;
}
auto res = LoadFieldRawData(segment, 101, ages.data(), N);
ASSERT_EQ(res.error_code, Success);
auto count = GetRowCount(segment);
ASSERT_EQ(count, N);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
const char* raw_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
SearchInfo search_info;
search_info.topk_ = TOPK;
search_info.metric_type_ = knowhere::metric::L2;
search_info.search_params_ = generate_search_conf(
IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::metric::L2);
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
EXPECT_EQ(result_on_index.distances_.size(), num_queries * TOPK);
status = LoadFieldRawData(segment, 101, counter_col.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 0, dataset.row_ids_.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 1, dataset.timestamps_.data(), N);
ASSERT_EQ(status.error_code, Success);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_search_without_predicates) {
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
uint64_t ts_offset = 1000;
auto dataset = DataGen(schema, N, ts_offset);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
auto vec_array = dataset.get_col(FieldId(100));
auto vec_data = serialize(vec_array.get());
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
status = LoadFieldRawData(segment, 100, vec_data.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 101, counter_col.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 0, dataset.row_ids_.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 1, dataset.timestamps_.data(), N);
ASSERT_EQ(status.error_code, Success);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, N + ts_offset, &search_result);
std::cout << res.error_msg << std::endl;
ASSERT_EQ(res.error_code, Success);
CSearchResult search_result2;
auto res2 = CSearch(
segment, plan, placeholderGroup, N + ts_offset, &search_result2);
ASSERT_EQ(res2.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteSearchResult(search_result2);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::FloatVector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Sealed, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
predicates: <
binary_expr: <
op: LogicalAnd
left: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: GreaterEqual
value: <
int64_val: 4200
>
>
>
right: <
unary_range_expr: <
column_info: <
field_id: 101
data_type: Int64
>
op: LessThan
value: <
int64_val: 4210
>
>
>
>
>
query_info: <
topk: 5
round_decimal: -1
metric_type: "L2"
search_params: "{\"nprobe\": 10}"
>
placeholder_tag: "$0"
>)";
// create place_holder_group
int num_queries = 10;
auto raw_group =
CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
auto binary_plan = translate_text_plan_to_binary_plan(serialized_expr_plan);
status = CreateSearchPlanByExpr(
collection, binary_plan.data(), binary_plan.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestamp = 10000000;
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IVFSQ8,
DIM,
N);
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IVFSQ8;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::FloatVector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load vec index
status = UpdateSealedSegmentIndex(segment, c_load_index_info);
ASSERT_EQ(status.error_code, Success);
// load raw data
status = LoadFieldRawData(segment, 101, counter_col.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 0, dataset.row_ids_.data(), N);
ASSERT_EQ(status.error_code, Success);
status = LoadFieldRawData(segment, 1, dataset.timestamps_.data(), N);
ASSERT_EQ(status.error_code, Success);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(segment,
plan,
placeholderGroup,
timestamp,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_bigIndex = (SearchResult*)c_search_result_on_bigIndex;
for (int i = 0; i < num_queries; ++i) {
auto offset = i * TOPK;
ASSERT_EQ(search_result_on_bigIndex->seg_offsets_[offset], BIAS + i);
}
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, SealedSegment_Update_Field_Size) {
auto schema = std::make_shared<Schema>();
auto str_fid = schema->AddDebugField("string", DataType::VARCHAR);
auto vec_fid = schema->AddDebugField(
"vector_float", DataType::VECTOR_FLOAT, DIM, "L2");
schema->set_primary_field_id(str_fid);
auto segment = CreateSealedSegment(schema).release();
auto N = ROW_COUNT;
int row_size = 10;
// update row_size =10 with n rows
auto status =
UpdateFieldRawDataSize(segment, str_fid.get(), N, N * row_size);
ASSERT_EQ(status.error_code, Success);
ASSERT_EQ(segment->get_field_avg_size(str_fid), row_size);
// load data and update avg field size
std::vector<std::string> str_datas;
int64_t total_size = 0;
for (int i = 0; i < N; ++i) {
auto str = "string_data_" + std::to_string(i);
total_size += str.size();
str_datas.emplace_back(str);
}
auto res = LoadFieldRawData(segment, str_fid.get(), str_datas.data(), N);
ASSERT_EQ(res.error_code, Success);
ASSERT_EQ(segment->get_field_avg_size(str_fid),
(row_size * N + total_size) / (2 * N));
DeleteSegment(segment);
}
TEST(CApiTest, GrowingSegment_Load_Field_Data) {
auto schema = std::make_shared<Schema>();
schema->AddField(FieldName("RowID"), FieldId(0), DataType::INT64);
schema->AddField(FieldName("Timestamp"), FieldId(1), DataType::INT64);
auto str_fid = schema->AddDebugField("string", DataType::VARCHAR);
auto vec_fid = schema->AddDebugField(
"vector_float", DataType::VECTOR_FLOAT, DIM, "L2");
schema->set_primary_field_id(str_fid);
auto segment = CreateGrowingSegment(schema, empty_index_meta).release();
int N = ROW_COUNT;
auto raw_data = DataGen(schema, N);
auto storage_config = get_default_local_storage_config();
auto cm = storage::CreateChunkManager(storage_config);
auto load_info =
PrepareInsertBinlog(1,
2,
3,
storage_config.root_path + "/" + "test_load_sealed",
raw_data,
cm);
auto status = LoadFieldData(segment, &load_info);
ASSERT_EQ(status.error_code, Success);
ASSERT_EQ(segment->get_real_count(), ROW_COUNT);
ASSERT_NE(segment->get_field_avg_size(str_fid), 0);
DeleteSegment(segment);
}
TEST(CApiTest, RetriveScalarFieldFromSealedSegmentWithIndex) {
auto schema = std::make_shared<Schema>();
auto i8_fid = schema->AddDebugField("age8", DataType::INT8);
auto i16_fid = schema->AddDebugField("age16", DataType::INT16);
auto i32_fid = schema->AddDebugField("age32", DataType::INT32);
auto i64_fid = schema->AddDebugField("age64", DataType::INT64);
auto float_fid = schema->AddDebugField("age_float", DataType::FLOAT);
auto double_fid = schema->AddDebugField("age_double", DataType::DOUBLE);
schema->set_primary_field_id(i64_fid);
auto segment = CreateSealedSegment(schema).release();
int N = ROW_COUNT;
auto raw_data = DataGen(schema, N);
LoadIndexInfo load_index_info;
// load timestamp field
auto res = LoadFieldRawData(
segment, TimestampFieldID.get(), raw_data.timestamps_.data(), N);
ASSERT_EQ(res.error_code, Success);
auto count = GetRowCount(segment);
ASSERT_EQ(count, N);
// load rowid field
res = LoadFieldRawData(
segment, RowFieldID.get(), raw_data.row_ids_.data(), N);
ASSERT_EQ(res.error_code, Success);
count = GetRowCount(segment);
ASSERT_EQ(count, N);
// load index for int8 field
auto age8_col = raw_data.get_col<int8_t>(i8_fid);
GenScalarIndexing(N, age8_col.data());
auto age8_index = milvus::index::CreateScalarIndexSort<int8_t>();
age8_index->Build(N, age8_col.data());
load_index_info.field_id = i8_fid.get();
load_index_info.field_type = DataType::INT8;
load_index_info.index = std::move(age8_index);
segment->LoadIndex(load_index_info);
// load index for 16 field
auto age16_col = raw_data.get_col<int16_t>(i16_fid);
GenScalarIndexing(N, age16_col.data());
auto age16_index = milvus::index::CreateScalarIndexSort<int16_t>();
age16_index->Build(N, age16_col.data());
load_index_info.field_id = i16_fid.get();
load_index_info.field_type = DataType::INT16;
load_index_info.index = std::move(age16_index);
segment->LoadIndex(load_index_info);
// load index for int32 field
auto age32_col = raw_data.get_col<int32_t>(i32_fid);
GenScalarIndexing(N, age32_col.data());
auto age32_index = milvus::index::CreateScalarIndexSort<int32_t>();
age32_index->Build(N, age32_col.data());
load_index_info.field_id = i32_fid.get();
load_index_info.field_type = DataType::INT32;
load_index_info.index = std::move(age32_index);
segment->LoadIndex(load_index_info);
// load index for int64 field
auto age64_col = raw_data.get_col<int64_t>(i64_fid);
GenScalarIndexing(N, age64_col.data());
auto age64_index = milvus::index::CreateScalarIndexSort<int64_t>();
age64_index->Build(N, age64_col.data());
load_index_info.field_id = i64_fid.get();
load_index_info.field_type = DataType::INT64;
load_index_info.index = std::move(age64_index);
segment->LoadIndex(load_index_info);
// load index for float field
auto age_float_col = raw_data.get_col<float>(float_fid);
GenScalarIndexing(N, age_float_col.data());
auto age_float_index = milvus::index::CreateScalarIndexSort<float>();
age_float_index->Build(N, age_float_col.data());
load_index_info.field_id = float_fid.get();
load_index_info.field_type = DataType::FLOAT;
load_index_info.index = std::move(age_float_index);
segment->LoadIndex(load_index_info);
// load index for double field
auto age_double_col = raw_data.get_col<double>(double_fid);
GenScalarIndexing(N, age_double_col.data());
auto age_double_index = milvus::index::CreateScalarIndexSort<double>();
age_double_index->Build(N, age_double_col.data());
load_index_info.field_id = double_fid.get();
load_index_info.field_type = DataType::FLOAT;
load_index_info.index = std::move(age_double_index);
segment->LoadIndex(load_index_info);
// create retrieve plan
auto plan = std::make_unique<query::RetrievePlan>(*schema);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
std::vector<proto::plan::GenericValue> retrive_row_ids;
proto::plan::GenericValue val;
val.set_int64_val(age64_col[0]);
retrive_row_ids.push_back(val);
auto term_expr = std::make_shared<milvus::expr::TermFilterExpr>(
milvus::expr::ColumnInfo(
i64_fid, DataType::INT64, std::vector<std::string>()),
retrive_row_ids);
plan->plan_node_->filter_plannode_ =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, term_expr);
std::vector<FieldId> target_field_ids;
// retrieve value
target_field_ids = {
i8_fid, i16_fid, i32_fid, i64_fid, float_fid, double_fid};
plan->field_ids_ = target_field_ids;
CRetrieveResult retrieve_result;
res = CRetrieve(
segment, plan.get(), raw_data.timestamps_[N - 1], &retrieve_result);
ASSERT_EQ(res.error_code, Success);
auto query_result = std::make_unique<proto::segcore::RetrieveResults>();
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->fields_data().size(), 6);
auto fields_data = query_result->fields_data();
for (auto iter = fields_data.begin(); iter < fields_data.end(); ++iter) {
switch (iter->type()) {
case proto::schema::DataType::Int8: {
ASSERT_EQ(iter->scalars().int_data().data(0), age8_col[0]);
break;
}
case proto::schema::DataType::Int16: {
ASSERT_EQ(iter->scalars().int_data().data(0), age16_col[0]);
break;
}
case proto::schema::DataType::Int32: {
ASSERT_EQ(iter->scalars().int_data().data(0), age32_col[0]);
break;
}
case proto::schema::DataType::Int64: {
ASSERT_EQ(iter->scalars().long_data().data(0), age64_col[0]);
break;
}
case proto::schema::DataType::Float: {
ASSERT_EQ(iter->scalars().float_data().data(0),
age_float_col[0]);
break;
}
case proto::schema::DataType::Double: {
ASSERT_EQ(iter->scalars().double_data().data(0),
age_double_col[0]);
break;
}
default: {
PanicInfo(DataTypeInvalid, "not supported type");
}
}
}
DeleteRetrievePlan(plan.release());
DeleteRetrieveResult(&retrieve_result);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_IP) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP) {
auto c_collection =
NewCollection(get_default_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_WHEN_L2) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "L2"
search_params: "{\"nprobe\": 10,\"radius\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_L2) {
auto c_collection = NewCollection(get_default_schema_config());
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "L2"
search_params: "{\"nprobe\": 10,\"radius\": 20, \"range_filter\": 10}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, AssembeChunkTest) {
TargetBitmap chunk(1000);
for (size_t i = 0; i < 1000; ++i) {
chunk[i] = (i % 2 == 0);
}
BitsetType result;
milvus::query::AppendOneChunk(result, chunk);
// std::string s;
// boost::to_string(result, s);
// std::cout << s << std::endl;
int index = 0;
for (size_t i = 0; i < 1000; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
chunk = TargetBitmap(934);
for (int i = 0; i < 934; ++i) {
chunk[i] = (i % 2 == 0);
}
milvus::query::AppendOneChunk(result, chunk);
for (size_t i = 0; i < 934; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
chunk = TargetBitmap(62);
for (int i = 0; i < 62; ++i) {
chunk[i] = (i % 2 == 0);
}
milvus::query::AppendOneChunk(result, chunk);
for (size_t i = 0; i < 62; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
chunk = TargetBitmap(105);
for (int i = 0; i < 105; ++i) {
chunk[i] = (i % 2 == 0);
}
milvus::query::AppendOneChunk(result, chunk);
for (size_t i = 0; i < 105; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
}
std::vector<SegOffset>
search_id(const BitsetType& bitset,
Timestamp* timestamps,
Timestamp timestamp,
bool use_find) {
std::vector<SegOffset> dst_offset;
if (use_find) {
auto i = bitset.find_first();
while (i.has_value()) {
auto offset = SegOffset(i.value());
if (timestamps[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
i = bitset.find_next(i.value());
}
return dst_offset;
} else {
for (int i = 0; i < bitset.size(); i++) {
if (bitset[i]) {
auto offset = SegOffset(i);
if (timestamps[offset.get()] <= timestamp) {
dst_offset.push_back(offset);
}
}
}
}
return dst_offset;
}
TEST(CApiTest, SearchIdTest) {
// using BitsetType = boost::dynamic_bitset<>;
auto test = [&](int NT) {
BitsetType bitset(1000000);
Timestamp* timestamps = new Timestamp[1000000];
srand(time(NULL));
for (int i = 0; i < 1000000; i++) {
timestamps[i] = i;
bitset[i] = false;
}
for (int i = 0; i < NT; i++) {
bitset[1000000 * ((double)rand() / RAND_MAX)] = true;
}
auto start = std::chrono::steady_clock::now();
auto res1 = search_id(bitset, timestamps, 1000000, true);
std::cout << "search id cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
start = std::chrono::steady_clock::now();
auto res2 = search_id(bitset, timestamps, 1000000, false);
std::cout << "search id origin cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
ASSERT_EQ(res1.size(), res2.size());
for (int i = 0; i < res1.size(); i++) {
if (res1[i].get() != res2[i].get()) {
std::cout << "error:" << i;
}
}
start = std::chrono::steady_clock::now();
bitset.flip();
std::cout << "bit set flip cost:"
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
delete[] timestamps;
};
int test_nt[] = {10, 50, 100};
for (auto nt : test_nt) {
test(nt);
}
}
TEST(CApiTest, AssembeChunkPerfTest) {
TargetBitmap chunk(100000000);
for (size_t i = 0; i < 100000000; ++i) {
chunk[i] = (i % 2 == 0);
}
BitsetType result;
// while (true) {
std::cout << "start test" << std::endl;
auto start = std::chrono::steady_clock::now();
milvus::query::AppendOneChunk(result, chunk);
std::cout << "cost: "
<< std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - start)
.count()
<< "us" << std::endl;
int index = 0;
for (size_t i = 0; i < 1000; i++) {
ASSERT_EQ(result[index++], chunk[i]) << i;
}
// }
// std::string s;
// boost::to_string(result, s);
// std::cout << s << std::endl;
}
TEST(CApiTest, Indexing_Without_Predicate_float16) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::Float16Vector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<float16>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::Float16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(5);
query_info->set_round_decimal(-1);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateFloat16PlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestmap = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestmap,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_FLOAT16,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IDMAP,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IDMAP;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::Float16Vector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestmap,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, Indexing_Without_Predicate_bfloat16) {
// insert data to segment
constexpr auto TOPK = 5;
std::string schema_string = generate_collection_schema(
knowhere::metric::L2, DIM, VectorType::BFloat16Vector);
auto collection = NewCollection(schema_string.c_str());
auto schema = ((segcore::Collection*)collection)->get_schema();
CSegmentInterface segment;
auto status = NewSegment(collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto N = ROW_COUNT;
auto dataset = DataGen(schema, N);
auto vec_col = dataset.get_col<bfloat16>(FieldId(100));
auto query_ptr = vec_col.data() + BIAS * DIM;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
milvus::proto::plan::PlanNode plan_node;
auto vector_anns = plan_node.mutable_vector_anns();
vector_anns->set_vector_type(
milvus::proto::plan::VectorType::BFloat16Vector);
vector_anns->set_placeholder_tag("$0");
vector_anns->set_field_id(100);
auto query_info = vector_anns->mutable_query_info();
query_info->set_topk(5);
query_info->set_round_decimal(-1);
query_info->set_metric_type("L2");
query_info->set_search_params(R"({"nprobe": 10})");
auto plan_str = plan_node.SerializeAsString();
// create place_holder_group
int num_queries = 5;
auto raw_group =
CreateBFloat16PlaceholderGroupFromBlob(num_queries, DIM, query_ptr);
auto blob = raw_group.SerializeAsString();
// search on segment's small index
void* plan = nullptr;
status = CreateSearchPlanByExpr(
collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
Timestamp timestmap = 10000000;
CSearchResult c_search_result_on_smallIndex;
auto res_before_load_index = CSearch(segment,
plan,
placeholderGroup,
timestmap,
&c_search_result_on_smallIndex);
ASSERT_EQ(res_before_load_index.error_code, Success);
// load index to segment
auto indexing = generate_index(vec_col.data(),
DataType::VECTOR_BFLOAT16,
knowhere::metric::L2,
IndexEnum::INDEX_FAISS_IDMAP,
DIM,
N);
// gen query dataset
auto query_dataset = knowhere::GenDataSet(num_queries, DIM, query_ptr);
auto vec_index = dynamic_cast<VectorIndex*>(indexing.get());
auto search_plan = reinterpret_cast<milvus::query::Plan*>(plan);
SearchInfo search_info = search_plan->plan_node_->search_info_;
SearchResult result_on_index;
vec_index->Query(query_dataset, search_info, nullptr, result_on_index);
auto ids = result_on_index.seg_offsets_.data();
auto dis = result_on_index.distances_.data();
std::vector<int64_t> vec_ids(ids, ids + TOPK * num_queries);
std::vector<float> vec_dis;
for (int j = 0; j < TOPK * num_queries; ++j) {
vec_dis.push_back(dis[j] * -1);
}
auto search_result_on_raw_index =
(SearchResult*)c_search_result_on_smallIndex;
search_result_on_raw_index->seg_offsets_ = vec_ids;
search_result_on_raw_index->distances_ = vec_dis;
auto binary_set = indexing->Serialize(milvus::Config{});
void* c_load_index_info = nullptr;
status = NewLoadIndexInfo(&c_load_index_info);
ASSERT_EQ(status.error_code, Success);
std::string index_type_key = "index_type";
std::string index_type_value = IndexEnum::INDEX_FAISS_IDMAP;
std::string metric_type_key = "metric_type";
std::string metric_type_value = knowhere::metric::L2;
AppendIndexParam(
c_load_index_info, index_type_key.c_str(), index_type_value.c_str());
AppendIndexParam(
c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str());
AppendFieldInfo(
c_load_index_info, 0, 0, 0, 100, CDataType::BFloat16Vector, false, "");
AppendIndexEngineVersionToLoadInfo(
c_load_index_info,
knowhere::Version::GetCurrentVersion().VersionNumber());
AppendIndex(c_load_index_info, (CBinarySet)&binary_set);
// load index for vec field, load raw data for scalar field
auto sealed_segment = SealedCreator(schema, dataset);
sealed_segment->DropFieldData(FieldId(100));
sealed_segment->LoadIndex(*(LoadIndexInfo*)c_load_index_info);
CSearchResult c_search_result_on_bigIndex;
auto res_after_load_index = CSearch(sealed_segment.get(),
plan,
placeholderGroup,
timestmap,
&c_search_result_on_bigIndex);
ASSERT_EQ(res_after_load_index.error_code, Success);
auto search_result_on_raw_index_json =
SearchResultToJson(*search_result_on_raw_index);
auto search_result_on_bigIndex_json =
SearchResultToJson((*(SearchResult*)c_search_result_on_bigIndex));
ASSERT_EQ(search_result_on_raw_index_json.dump(1),
search_result_on_bigIndex_json.dump(1));
DeleteLoadIndexInfo(c_load_index_info);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(c_search_result_on_smallIndex);
DeleteSearchResult(c_search_result_on_bigIndex);
DeleteCollection(collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_FLOAT16) {
auto c_collection =
NewCollection(get_float16_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data_float16(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}
TEST(CApiTest, RANGE_SEARCH_WITH_RADIUS_AND_RANGE_FILTER_WHEN_IP_BFLOAT16) {
auto c_collection =
NewCollection(get_bfloat16_schema_config(), knowhere::metric::IP);
CSegmentInterface segment;
auto status = NewSegment(c_collection, Growing, -1, &segment);
ASSERT_EQ(status.error_code, Success);
auto col = (milvus::segcore::Collection*)c_collection;
int N = 10000;
auto dataset = DataGen(col->get_schema(), N);
int64_t ts_offset = 1000;
int64_t offset;
PreInsert(segment, N, &offset);
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment,
offset,
N,
dataset.row_ids_.data(),
dataset.timestamps_.data(),
insert_data.data(),
insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* raw_plan = R"(vector_anns: <
field_id: 100
query_info: <
topk: 10
round_decimal: 3
metric_type: "IP"
search_params: "{\"nprobe\": 10,\"radius\": 10, \"range_filter\": 20}"
>
placeholder_tag: "$0"
>)";
auto plan_str = translate_text_plan_to_binary_plan(raw_plan);
int num_queries = 10;
auto blob = generate_query_data_bfloat16(num_queries);
void* plan = nullptr;
status = CreateSearchPlanByExpr(
c_collection, plan_str.data(), plan_str.size(), &plan);
ASSERT_EQ(status.error_code, Success);
void* placeholderGroup = nullptr;
status = ParsePlaceholderGroup(
plan, blob.data(), blob.length(), &placeholderGroup);
ASSERT_EQ(status.error_code, Success);
std::vector<CPlaceholderGroup> placeholderGroups;
placeholderGroups.push_back(placeholderGroup);
CSearchResult search_result;
auto res =
CSearch(segment, plan, placeholderGroup, ts_offset, &search_result);
ASSERT_EQ(res.error_code, Success);
DeleteSearchPlan(plan);
DeletePlaceholderGroup(placeholderGroup);
DeleteSearchResult(search_result);
DeleteCollection(c_collection);
DeleteSegment(segment);
}