enhance: speed up array-equal operator via inverted index (#33633)

fix: #33632

---------

Signed-off-by: longjiquan <jiquan.long@zilliz.com>
This commit is contained in:
Jiquan Long 2024-06-11 14:13:54 +08:00 committed by GitHub
parent fa26953168
commit ecf2bcee42
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 614 additions and 21 deletions

View File

@ -51,6 +51,15 @@ class Schema {
return field_id;
}
FieldId
AddDebugArrayField(const std::string& name, DataType element_type) {
auto field_id = FieldId(debug_id);
debug_id++;
this->AddField(
FieldName(name), field_id, DataType::ARRAY, element_type);
return field_id;
}
// auto gen field_id for convenience
FieldId
AddDebugField(const std::string& name,

View File

@ -280,6 +280,22 @@ class SegmentExpr : public Expr {
return result;
}
template <typename T, typename FUNC, typename... ValTypes>
void
ProcessIndexChunksV2(FUNC func, ValTypes... values) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
using Index = index::ScalarIndex<IndexInnerType>;
for (size_t i = current_index_chunk_; i < num_index_chunk_; i++) {
const Index& index =
segment_->chunk_scalar_index<IndexInnerType>(field_id_, i);
auto* index_ptr = const_cast<Index*>(&index);
func(index_ptr, values...);
}
}
template <typename T>
bool
CanUseIndex(OpType op) const {

View File

@ -20,6 +20,66 @@
namespace milvus {
namespace exec {
template <typename T>
VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex() {
return ExecRangeVisitorImplArray<T>();
}
template <>
VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArrayForIndex<
proto::plan::Array>() {
switch (expr_->op_type_) {
case proto::plan::Equal:
case proto::plan::NotEqual: {
switch (expr_->column_.element_type_) {
case DataType::BOOL: {
return ExecArrayEqualForIndex<bool>(expr_->op_type_ ==
proto::plan::NotEqual);
}
case DataType::INT8: {
return ExecArrayEqualForIndex<int8_t>(
expr_->op_type_ == proto::plan::NotEqual);
}
case DataType::INT16: {
return ExecArrayEqualForIndex<int16_t>(
expr_->op_type_ == proto::plan::NotEqual);
}
case DataType::INT32: {
return ExecArrayEqualForIndex<int32_t>(
expr_->op_type_ == proto::plan::NotEqual);
}
case DataType::INT64: {
return ExecArrayEqualForIndex<int64_t>(
expr_->op_type_ == proto::plan::NotEqual);
}
case DataType::FLOAT:
case DataType::DOUBLE: {
// not accurate on floating point number, rollback to bruteforce.
return ExecRangeVisitorImplArray<proto::plan::Array>();
}
case DataType::VARCHAR: {
if (segment_->type() == SegmentType::Growing) {
return ExecArrayEqualForIndex<std::string>(
expr_->op_type_ == proto::plan::NotEqual);
} else {
return ExecArrayEqualForIndex<std::string_view>(
expr_->op_type_ == proto::plan::NotEqual);
}
}
default:
PanicInfo(DataTypeInvalid,
"unsupported element type when execute array "
"equal for index: {}",
expr_->column_.element_type_);
}
}
default:
return ExecRangeVisitorImplArray<proto::plan::Array>();
}
}
void
PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
switch (expr_->column_.data_type_) {
@ -99,7 +159,13 @@ PhyUnaryRangeFilterExpr::Eval(EvalCtx& context, VectorPtr& result) {
result = ExecRangeVisitorImplArray<std::string>();
break;
case proto::plan::GenericValue::ValCase::kArrayVal:
result = ExecRangeVisitorImplArray<proto::plan::Array>();
if (is_index_mode_) {
result = ExecRangeVisitorImplArrayForIndex<
proto::plan::Array>();
} else {
result =
ExecRangeVisitorImplArray<proto::plan::Array>();
}
break;
default:
PanicInfo(
@ -196,6 +262,104 @@ PhyUnaryRangeFilterExpr::ExecRangeVisitorImplArray() {
return res_vec;
}
template <typename T>
VectorPtr
PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) {
typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType;
using Index = index::ScalarIndex<IndexInnerType>;
auto real_batch_size = GetNextBatchSize();
if (real_batch_size == 0) {
return nullptr;
}
// get all elements.
auto val = GetValueFromProto<proto::plan::Array>(expr_->val_);
if (val.array_size() == 0) {
// rollback to bruteforce. no candidates will be filtered out via index.
return ExecRangeVisitorImplArray<proto::plan::Array>();
}
// cache the result to suit the framework.
auto batch_res =
ProcessIndexChunks<IndexInnerType>([this, &val, reverse](Index* _) {
boost::container::vector<IndexInnerType> elems;
for (auto const& element : val.array()) {
auto e = GetValueFromProto<IndexInnerType>(element);
if (std::find(elems.begin(), elems.end(), e) == elems.end()) {
elems.push_back(e);
}
}
// filtering by index, get candidates.
auto size_per_chunk = segment_->size_per_chunk();
auto retrieve = [ size_per_chunk, this ](int64_t offset) -> auto {
auto chunk_idx = offset / size_per_chunk;
auto chunk_offset = offset % size_per_chunk;
const auto& chunk =
segment_->template chunk_data<milvus::ArrayView>(field_id_,
chunk_idx);
return chunk.data() + chunk_offset;
};
// compare the array via the raw data.
auto filter = [&retrieve, &val, reverse](size_t offset) -> bool {
auto data_ptr = retrieve(offset);
return data_ptr->is_same_array(val) ^ reverse;
};
// collect all candidates.
std::unordered_set<size_t> candidates;
std::unordered_set<size_t> tmp_candidates;
auto first_callback = [&candidates](size_t offset) -> void {
candidates.insert(offset);
};
auto callback = [&candidates,
&tmp_candidates](size_t offset) -> void {
if (candidates.find(offset) != candidates.end()) {
tmp_candidates.insert(offset);
}
};
auto execute_sub_batch =
[](Index* index_ptr,
const IndexInnerType& val,
const std::function<void(size_t /* offset */)>& callback) {
index_ptr->InApplyCallback(1, &val, callback);
};
// run in-filter.
for (size_t idx = 0; idx < elems.size(); idx++) {
if (idx == 0) {
ProcessIndexChunksV2<IndexInnerType>(
execute_sub_batch, elems[idx], first_callback);
} else {
ProcessIndexChunksV2<IndexInnerType>(
execute_sub_batch, elems[idx], callback);
candidates = std::move(tmp_candidates);
}
// the size of candidates is small enough.
if (candidates.size() * 100 < active_count_) {
break;
}
}
TargetBitmap res(active_count_);
// run post-filter. The filter will only be executed once in the framework.
for (const auto& candidate : candidates) {
res[candidate] = filter(candidate);
}
return res;
});
AssertInfo(batch_res.size() == real_batch_size,
"internal error: expr processed rows {} not equal "
"expect batch size {}",
batch_res.size(),
real_batch_size);
// return the result.
return std::make_shared<ColumnVector>(std::move(batch_res));
}
template <typename ExprValueType>
VectorPtr
PhyUnaryRangeFilterExpr::ExecRangeVisitorImplJson() {

View File

@ -310,6 +310,14 @@ class PhyUnaryRangeFilterExpr : public SegmentExpr {
VectorPtr
ExecRangeVisitorImplArray();
template <typename T>
VectorPtr
ExecRangeVisitorImplArrayForIndex();
template <typename T>
VectorPtr
ExecArrayEqualForIndex(bool reverse);
// Check overflow and cache result for performace
template <typename T>
ColumnVectorPtr

View File

@ -204,6 +204,25 @@ apply_hits(TargetBitmap& bitset, const RustArrayWrapper& w, bool v) {
}
}
inline void
apply_hits_with_filter(TargetBitmap& bitset,
const RustArrayWrapper& w,
const std::function<bool(size_t /* offset */)>& filter) {
for (size_t j = 0; j < w.array_.len; j++) {
auto the_offset = w.array_.array[j];
bitset[the_offset] = filter(the_offset);
}
}
inline void
apply_hits_with_callback(
const RustArrayWrapper& w,
const std::function<void(size_t /* offset */)>& callback) {
for (size_t j = 0; j < w.array_.len; j++) {
callback(w.array_.array[j]);
}
}
template <typename T>
const TargetBitmap
InvertedIndexTantivy<T>::In(size_t n, const T* values) {
@ -215,6 +234,28 @@ InvertedIndexTantivy<T>::In(size_t n, const T* values) {
return bitset;
}
template <typename T>
const TargetBitmap
InvertedIndexTantivy<T>::InApplyFilter(
size_t n, const T* values, const std::function<bool(size_t)>& filter) {
TargetBitmap bitset(Count());
for (size_t i = 0; i < n; ++i) {
auto array = wrapper_->term_query(values[i]);
apply_hits_with_filter(bitset, array, filter);
}
return bitset;
}
template <typename T>
void
InvertedIndexTantivy<T>::InApplyCallback(
size_t n, const T* values, const std::function<void(size_t)>& callback) {
for (size_t i = 0; i < n; ++i) {
auto array = wrapper_->term_query(values[i]);
apply_hits_with_callback(array, callback);
}
}
template <typename T>
const TargetBitmap
InvertedIndexTantivy<T>::NotIn(size_t n, const T* values) {
@ -311,6 +352,9 @@ void
InvertedIndexTantivy<T>::BuildWithRawData(size_t n,
const void* values,
const Config& config) {
if constexpr (std::is_same_v<bool, T>) {
schema_.set_data_type(proto::schema::DataType::Bool);
}
if constexpr (std::is_same_v<int8_t, T>) {
schema_.set_data_type(proto::schema::DataType::Int8);
}
@ -341,7 +385,15 @@ InvertedIndexTantivy<T>::BuildWithRawData(size_t n,
std::string field = "test_inverted_index";
wrapper_ = std::make_shared<TantivyIndexWrapper>(
field.c_str(), d_type_, path_.c_str());
wrapper_->add_data<T>(static_cast<const T*>(values), n);
if (config.find("is_array") != config.end()) {
// only used in ut.
auto arr = static_cast<const boost::container::vector<T>*>(values);
for (size_t i = 0; i < n; i++) {
wrapper_->template add_multi_data(arr[i].data(), arr[i].size());
}
} else {
wrapper_->add_data<T>(static_cast<const T*>(values), n);
}
finish();
}

View File

@ -111,6 +111,18 @@ class InvertedIndexTantivy : public ScalarIndex<T> {
const TargetBitmap
In(size_t n, const T* values) override;
const TargetBitmap
InApplyFilter(
size_t n,
const T* values,
const std::function<bool(size_t /* offset */)>& filter) override;
void
InApplyCallback(
size_t n,
const T* values,
const std::function<void(size_t /* offset */)>& callback) override;
const TargetBitmap
NotIn(size_t n, const T* values) override;

View File

@ -50,6 +50,20 @@ class ScalarIndex : public IndexBase {
virtual const TargetBitmap
In(size_t n, const T* values) = 0;
virtual const TargetBitmap
InApplyFilter(size_t n,
const T* values,
const std::function<bool(size_t /* offset */)>& filter) {
PanicInfo(ErrorCode::Unsupported, "InApplyFilter is not implemented");
}
virtual void
InApplyCallback(size_t n,
const T* values,
const std::function<void(size_t /* offset */)>& callback) {
PanicInfo(ErrorCode::Unsupported, "InApplyCallback is not implemented");
}
virtual const TargetBitmap
NotIn(size_t n, const T* values) = 0;

View File

@ -51,15 +51,6 @@ struct RustArrayWrapper {
std::cout << ss.str() << std::endl;
}
std::set<uint32_t>
to_set() {
std::set<uint32_t> s;
for (int i = 0; i < array_.len; i++) {
s.insert(array_.array[i]);
}
return s;
}
RustArray array_;
private:

View File

@ -200,6 +200,12 @@ test_32717() {
}
}
std::set<uint32_t>
to_set(const RustArrayWrapper& w) {
std::set<uint32_t> s(w.array_.array, w.array_.array + w.array_.len);
return s;
}
template <typename T>
std::map<T, std::set<uint32_t>>
build_inverted_index(const std::vector<std::vector<T>>& vec_of_array) {
@ -236,7 +242,7 @@ test_array_int() {
auto inverted_index = build_inverted_index(vec_of_array);
for (const auto& [term, posting_list] : inverted_index) {
auto hits = w.term_query(term).to_set();
auto hits = to_set(w.term_query(term));
assert(posting_list == hits);
}
}
@ -266,7 +272,7 @@ test_array_string() {
auto inverted_index = build_inverted_index(vec_of_array);
for (const auto& [term, posting_list] : inverted_index) {
auto hits = w.term_query(term).to_set();
auto hits = to_set(w.term_query(term));
assert(posting_list == hits);
}
}

View File

@ -68,6 +68,7 @@ set(MILVUS_TEST_FILES
test_regex_query_util.cpp
test_regex_query.cpp
test_futures.cpp
test_array_inverted_index.cpp
)
if ( INDEX_ENGINE STREQUAL "cardinal" )

View File

@ -0,0 +1,297 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICEN_SE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRAN_TIES OR CON_DITION_S OF AN_Y KIN_D, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <gtest/gtest.h>
#include <regex>
#include "pb/plan.pb.h"
#include "index/InvertedIndexTantivy.h"
#include "common/Schema.h"
#include "segcore/SegmentSealedImpl.h"
#include "test_utils/DataGen.h"
#include "test_utils/GenExprProto.h"
#include "query/PlanProto.h"
#include "query/generated/ExecPlanNodeVisitor.h"
using namespace milvus;
using namespace milvus::query;
using namespace milvus::segcore;
template <typename T>
SchemaPtr
GenTestSchema() {
auto schema_ = std::make_shared<Schema>();
schema_->AddDebugField(
"fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
auto pk = schema_->AddDebugField("pk", DataType::INT64);
schema_->set_primary_field_id(pk);
if constexpr (std::is_same_v<T, bool>) {
schema_->AddDebugArrayField("array", DataType::BOOL);
} else if constexpr (std::is_same_v<T, int8_t>) {
schema_->AddDebugArrayField("array", DataType::INT8);
} else if constexpr (std::is_same_v<T, int16_t>) {
schema_->AddDebugArrayField("array", DataType::INT16);
} else if constexpr (std::is_same_v<T, int32_t>) {
schema_->AddDebugArrayField("array", DataType::INT32);
} else if constexpr (std::is_same_v<T, int64_t>) {
schema_->AddDebugArrayField("array", DataType::INT64);
} else if constexpr (std::is_same_v<T, float>) {
schema_->AddDebugArrayField("array", DataType::FLOAT);
} else if constexpr (std::is_same_v<T, double>) {
schema_->AddDebugArrayField("array", DataType::DOUBLE);
} else if constexpr (std::is_same_v<T, std::string>) {
schema_->AddDebugArrayField("array", DataType::VARCHAR);
}
return schema_;
}
template <typename T>
class ArrayInvertedIndexTest : public ::testing::Test {
public:
void
SetUp() override {
schema_ = GenTestSchema<T>();
seg_ = CreateSealedSegment(schema_);
N_ = 3000;
uint64_t seed = 19190504;
auto raw_data = DataGen(schema_, N_, seed);
auto array_col =
raw_data.get_col(schema_->get_field_id(FieldName("array")))
->scalars()
.array_data()
.data();
for (size_t i = 0; i < N_; i++) {
boost::container::vector<T> array;
if constexpr (std::is_same_v<T, bool>) {
for (size_t j = 0; j < array_col[i].bool_data().data_size();
j++) {
array.push_back(array_col[i].bool_data().data(j));
}
} else if constexpr (std::is_same_v<T, int64_t>) {
for (size_t j = 0; j < array_col[i].long_data().data_size();
j++) {
array.push_back(array_col[i].long_data().data(j));
}
} else if constexpr (std::is_integral_v<T>) {
for (size_t j = 0; j < array_col[i].int_data().data_size();
j++) {
array.push_back(array_col[i].int_data().data(j));
}
} else if constexpr (std::is_floating_point_v<T>) {
for (size_t j = 0; j < array_col[i].float_data().data_size();
j++) {
array.push_back(array_col[i].float_data().data(j));
}
} else if constexpr (std::is_same_v<T, std::string>) {
for (size_t j = 0; j < array_col[i].string_data().data_size();
j++) {
array.push_back(array_col[i].string_data().data(j));
}
}
vec_of_array_.push_back(array);
}
SealedLoadFieldData(raw_data, *seg_);
LoadInvertedIndex();
}
void
TearDown() override {
}
void
LoadInvertedIndex() {
auto index = std::make_unique<index::InvertedIndexTantivy<T>>();
Config cfg;
cfg["is_array"] = true;
index->BuildWithRawData(N_, vec_of_array_.data(), cfg);
LoadIndexInfo info{
.field_id = schema_->get_field_id(FieldName("array")).get(),
.index = std::move(index),
};
seg_->LoadIndex(info);
}
public:
SchemaPtr schema_;
SegmentSealedUPtr seg_;
int64_t N_;
std::vector<boost::container::vector<T>> vec_of_array_;
};
TYPED_TEST_SUITE_P(ArrayInvertedIndexTest);
TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAny) {
const auto& meta = this->schema_->operator[](FieldName("array"));
auto column_info = test::GenColumnInfo(
meta.get_id().get(),
static_cast<proto::schema::DataType>(meta.get_data_type()),
false,
false,
static_cast<proto::schema::DataType>(meta.get_element_type()));
auto contains_expr = std::make_unique<proto::plan::JSONContainsExpr>();
contains_expr->set_allocated_column_info(column_info);
contains_expr->set_op(proto::plan::JSONContainsExpr_JSONOp::
JSONContainsExpr_JSONOp_ContainsAny);
contains_expr->set_elements_same_type(true);
for (const auto& elem : this->vec_of_array_[0]) {
auto t = test::GenGenericValue(elem);
contains_expr->mutable_elements()->AddAllocated(t);
}
auto expr = test::GenExpr();
expr->set_allocated_json_contains_expr(contains_expr.release());
auto parser = ProtoParser(*this->schema_);
auto typed_expr = parser.ParseExprs(*expr);
auto parsed =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
auto segpromote = dynamic_cast<SegmentSealedImpl*>(this->seg_.get());
query::ExecPlanNodeVisitor visitor(*segpromote, MAX_TIMESTAMP);
BitsetType final;
visitor.ExecuteExprNode(parsed, segpromote, this->N_, final);
std::unordered_set<TypeParam> elems(this->vec_of_array_[0].begin(),
this->vec_of_array_[0].end());
auto ref = [this, &elems](size_t offset) -> bool {
std::unordered_set<TypeParam> row(this->vec_of_array_[offset].begin(),
this->vec_of_array_[offset].end());
for (const auto& elem : elems) {
if (row.find(elem) != row.end()) {
return true;
}
}
return false;
};
ASSERT_EQ(final.size(), this->N_);
for (size_t i = 0; i < this->N_; i++) {
ASSERT_EQ(final[i], ref(i)) << "i: " << i << ", final[i]: " << final[i]
<< ", ref(i): " << ref(i);
}
}
TYPED_TEST_P(ArrayInvertedIndexTest, ArrayContainsAll) {
const auto& meta = this->schema_->operator[](FieldName("array"));
auto column_info = test::GenColumnInfo(
meta.get_id().get(),
static_cast<proto::schema::DataType>(meta.get_data_type()),
false,
false,
static_cast<proto::schema::DataType>(meta.get_element_type()));
auto contains_expr = std::make_unique<proto::plan::JSONContainsExpr>();
contains_expr->set_allocated_column_info(column_info);
contains_expr->set_op(proto::plan::JSONContainsExpr_JSONOp::
JSONContainsExpr_JSONOp_ContainsAll);
contains_expr->set_elements_same_type(true);
for (const auto& elem : this->vec_of_array_[0]) {
auto t = test::GenGenericValue(elem);
contains_expr->mutable_elements()->AddAllocated(t);
}
auto expr = test::GenExpr();
expr->set_allocated_json_contains_expr(contains_expr.release());
auto parser = ProtoParser(*this->schema_);
auto typed_expr = parser.ParseExprs(*expr);
auto parsed =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
auto segpromote = dynamic_cast<SegmentSealedImpl*>(this->seg_.get());
query::ExecPlanNodeVisitor visitor(*segpromote, MAX_TIMESTAMP);
BitsetType final;
visitor.ExecuteExprNode(parsed, segpromote, this->N_, final);
std::unordered_set<TypeParam> elems(this->vec_of_array_[0].begin(),
this->vec_of_array_[0].end());
auto ref = [this, &elems](size_t offset) -> bool {
std::unordered_set<TypeParam> row(this->vec_of_array_[offset].begin(),
this->vec_of_array_[offset].end());
for (const auto& elem : elems) {
if (row.find(elem) == row.end()) {
return false;
}
}
return true;
};
ASSERT_EQ(final.size(), this->N_);
for (size_t i = 0; i < this->N_; i++) {
ASSERT_EQ(final[i], ref(i)) << "i: " << i << ", final[i]: " << final[i]
<< ", ref(i): " << ref(i);
}
}
TYPED_TEST_P(ArrayInvertedIndexTest, ArrayEqual) {
if (std::is_floating_point_v<TypeParam>) {
GTEST_SKIP() << "not accurate to perform equal comparison on floating "
"point number";
}
const auto& meta = this->schema_->operator[](FieldName("array"));
auto column_info = test::GenColumnInfo(
meta.get_id().get(),
static_cast<proto::schema::DataType>(meta.get_data_type()),
false,
false,
static_cast<proto::schema::DataType>(meta.get_element_type()));
auto unary_range_expr = std::make_unique<proto::plan::UnaryRangeExpr>();
unary_range_expr->set_allocated_column_info(column_info);
unary_range_expr->set_op(proto::plan::OpType::Equal);
auto arr = new proto::plan::GenericValue;
arr->mutable_array_val()->set_element_type(
static_cast<proto::schema::DataType>(meta.get_element_type()));
arr->mutable_array_val()->set_same_type(true);
for (const auto& elem : this->vec_of_array_[0]) {
auto e = test::GenGenericValue(elem);
arr->mutable_array_val()->mutable_array()->AddAllocated(e);
}
unary_range_expr->set_allocated_value(arr);
auto expr = test::GenExpr();
expr->set_allocated_unary_range_expr(unary_range_expr.release());
auto parser = ProtoParser(*this->schema_);
auto typed_expr = parser.ParseExprs(*expr);
auto parsed =
std::make_shared<plan::FilterBitsNode>(DEFAULT_PLANNODE_ID, typed_expr);
auto segpromote = dynamic_cast<SegmentSealedImpl*>(this->seg_.get());
query::ExecPlanNodeVisitor visitor(*segpromote, MAX_TIMESTAMP);
BitsetType final;
visitor.ExecuteExprNode(parsed, segpromote, this->N_, final);
auto ref = [this](size_t offset) -> bool {
if (this->vec_of_array_[0].size() !=
this->vec_of_array_[offset].size()) {
return false;
}
auto size = this->vec_of_array_[0].size();
for (size_t i = 0; i < size; i++) {
if (this->vec_of_array_[0][i] != this->vec_of_array_[offset][i]) {
return false;
}
}
return true;
};
ASSERT_EQ(final.size(), this->N_);
for (size_t i = 0; i < this->N_; i++) {
ASSERT_EQ(final[i], ref(i)) << "i: " << i << ", final[i]: " << final[i]
<< ", ref(i): " << ref(i);
}
}
using ElementType = testing::
Types<bool, int8_t, int16_t, int32_t, int64_t, float, double, std::string>;
REGISTER_TYPED_TEST_CASE_P(ArrayInvertedIndexTest,
ArrayContainsAny,
ArrayContainsAll,
ArrayEqual);
INSTANTIATE_TYPED_TEST_SUITE_P(Naive, ArrayInvertedIndexTest, ElementType);

View File

@ -25,8 +25,6 @@
using namespace milvus;
// TODO: I would suggest that our all indexes use this test to simulate the real production environment.
namespace milvus::test {
auto
gen_field_meta(int64_t collection_id = 1,

View File

@ -491,8 +491,30 @@ inline GeneratedData DataGen(SchemaPtr schema,
}
break;
}
case DataType::INT8:
case DataType::INT16:
case DataType::INT8: {
for (int i = 0; i < N / repeat_count; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_int_data()->add_data(
static_cast<int8_t>(random()));
}
data[i] = field_data;
}
break;
}
case DataType::INT16: {
for (int i = 0; i < N / repeat_count; i++) {
milvus::proto::schema::ScalarField field_data;
for (int j = 0; j < array_len; j++) {
field_data.mutable_int_data()->add_data(
static_cast<int16_t>(random()));
}
data[i] = field_data;
}
break;
}
case DataType::INT32: {
for (int i = 0; i < N / repeat_count; i++) {
milvus::proto::schema::ScalarField field_data;

View File

@ -15,15 +15,18 @@
namespace milvus::test {
inline auto
GenColumnInfo(int64_t field_id,
proto::schema::DataType field_type,
bool auto_id,
bool is_pk) {
GenColumnInfo(
int64_t field_id,
proto::schema::DataType field_type,
bool auto_id,
bool is_pk,
proto::schema::DataType element_type = proto::schema::DataType::None) {
auto column_info = new proto::plan::ColumnInfo();
column_info->set_field_id(field_id);
column_info->set_data_type(field_type);
column_info->set_is_autoid(auto_id);
column_info->set_is_primary_key(is_pk);
column_info->set_element_type(element_type);
return column_info;
}