// Copyright(C) 2019 - 2020 Zilliz.All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License #include #include #include #include #include #include "common/Tracer.h" #include "index/BitmapIndex.h" #include "index/HybridScalarIndex.h" #include "storage/Util.h" #include "storage/InsertData.h" #include "indexbuilder/IndexFactory.h" #include "index/IndexFactory.h" #include "test_utils/indexbuilder_test_utils.h" #include "index/Meta.h" using namespace milvus::index; using namespace milvus::indexbuilder; using namespace milvus; using namespace milvus::index; template static std::vector GenerateData(const size_t size, const size_t cardinality) { std::vector result; for (size_t i = 0; i < size; ++i) { result.push_back(rand() % cardinality); } return result; } template <> std::vector GenerateData(const size_t size, const size_t cardinality) { std::vector result; for (size_t i = 0; i < size; ++i) { result.push_back(rand() % 2 == 0); } return result; } template <> std::vector GenerateData(const size_t size, const size_t cardinality) { std::vector result; for (size_t i = 0; i < size; ++i) { result.push_back(std::to_string(rand() % cardinality)); } return result; } template class HybridIndexTestV1 : public testing::Test { protected: void Init(int64_t collection_id, int64_t partition_id, int64_t segment_id, int64_t field_id, int64_t index_build_id, int64_t index_version) { auto field_meta = storage::FieldDataMeta{ collection_id, partition_id, segment_id, field_id}; auto index_meta = storage::IndexMeta{ segment_id, field_id, index_build_id, index_version}; std::vector data_gen; data_gen = GenerateData(nb_, cardinality_); for (auto x : data_gen) { data_.push_back(x); } auto field_data = storage::CreateFieldData(type_); field_data->FillFieldData(data_.data(), data_.size()); storage::InsertData insert_data(field_data); insert_data.SetFieldDataMeta(field_meta); insert_data.SetTimestamps(0, 100); auto serialized_bytes = insert_data.Serialize(storage::Remote); auto log_path = fmt::format("/{}/{}/{}/{}/{}/{}", "/tmp/test_hybrid/", collection_id, partition_id, segment_id, field_id, 0); chunk_manager_->Write( log_path, serialized_bytes.data(), serialized_bytes.size()); storage::FileManagerContext ctx(field_meta, index_meta, chunk_manager_); std::vector index_files; Config config; config["index_type"] = milvus::index::BITMAP_INDEX_TYPE; config["insert_files"] = std::vector{log_path}; config["bitmap_cardinality_limit"] = "1000"; auto build_index = indexbuilder::IndexFactory::GetInstance().CreateIndex( type_, config, ctx); build_index->Build(); auto binary_set = build_index->Upload(); for (const auto& [key, _] : binary_set.binary_map_) { index_files.push_back(key); } index::CreateIndexInfo index_info{}; index_info.index_type = milvus::index::BITMAP_INDEX_TYPE; index_info.field_type = type_; config["index_files"] = index_files; index_ = index::IndexFactory::GetInstance().CreateIndex(index_info, ctx); index_->Load(milvus::tracer::TraceContext{}, config); } virtual void SetParam() { nb_ = 10000; cardinality_ = 30; } void SetUp() override { SetParam(); if constexpr (std::is_same_v) { type_ = DataType::INT8; } else if constexpr (std::is_same_v) { type_ = DataType::INT16; } else if constexpr (std::is_same_v) { type_ = DataType::INT32; } else if constexpr (std::is_same_v) { type_ = DataType::INT64; } else if constexpr (std::is_same_v) { type_ = DataType::VARCHAR; } int64_t collection_id = 1; int64_t partition_id = 2; int64_t segment_id = 3; int64_t field_id = 101; int64_t index_build_id = 1000; int64_t index_version = 10000; std::string root_path = "/tmp/test-bitmap-index/"; storage::StorageConfig storage_config; storage_config.storage_type = "local"; storage_config.root_path = root_path; chunk_manager_ = storage::CreateChunkManager(storage_config); Init(collection_id, partition_id, segment_id, field_id, index_build_id, index_version); } virtual ~HybridIndexTestV1() override { boost::filesystem::remove_all(chunk_manager_->GetRootPath()); } public: void TestInFunc() { boost::container::vector test_data; std::unordered_set s; size_t nq = 10; for (size_t i = 0; i < nq; i++) { test_data.push_back(data_[i]); s.insert(data_[i]); } auto index_ptr = dynamic_cast*>(index_.get()); auto bitset = index_ptr->In(test_data.size(), test_data.data()); for (size_t i = 0; i < bitset.size(); i++) { ASSERT_EQ(bitset[i], s.find(data_[i]) != s.end()); } } void TestNotInFunc() { boost::container::vector test_data; std::unordered_set s; size_t nq = 10; for (size_t i = 0; i < nq; i++) { test_data.push_back(data_[i]); s.insert(data_[i]); } auto index_ptr = dynamic_cast*>(index_.get()); auto bitset = index_ptr->NotIn(test_data.size(), test_data.data()); for (size_t i = 0; i < bitset.size(); i++) { ASSERT_EQ(bitset[i], s.find(data_[i]) == s.end()); } } void TestCompareValueFunc() { if constexpr (!std::is_same_v) { using RefFunc = std::function; std::vector> test_cases{ {10, OpType::GreaterThan, [&](int64_t i) -> bool { return data_[i] > 10; }}, {10, OpType::GreaterEqual, [&](int64_t i) -> bool { return data_[i] >= 10; }}, {10, OpType::LessThan, [&](int64_t i) -> bool { return data_[i] < 10; }}, {10, OpType::LessEqual, [&](int64_t i) -> bool { return data_[i] <= 10; }}, }; for (const auto& [test_value, op, ref] : test_cases) { auto index_ptr = dynamic_cast*>(index_.get()); auto bitset = index_ptr->Range(test_value, op); for (size_t i = 0; i < bitset.size(); i++) { auto ans = bitset[i]; auto should = ref(i); ASSERT_EQ(ans, should) << "op: " << op << ", @" << i << ", ans: " << ans << ", ref: " << should; } } } } void TestRangeCompareFunc() { if constexpr (!std::is_same_v) { using RefFunc = std::function; struct TestParam { int64_t lower_val; int64_t upper_val; bool lower_inclusive; bool upper_inclusive; RefFunc ref; }; std::vector test_cases = { { 10, 30, false, false, [&](int64_t i) { return 10 < data_[i] && data_[i] < 30; }, }, { 10, 30, true, false, [&](int64_t i) { return 10 <= data_[i] && data_[i] < 30; }, }, { 10, 30, true, true, [&](int64_t i) { return 10 <= data_[i] && data_[i] <= 30; }, }, { 10, 30, false, true, [&](int64_t i) { return 10 < data_[i] && data_[i] <= 30; }, }}; for (const auto& test_case : test_cases) { auto index_ptr = dynamic_cast*>(index_.get()); auto bitset = index_ptr->Range(test_case.lower_val, test_case.lower_inclusive, test_case.upper_val, test_case.upper_inclusive); for (size_t i = 0; i < bitset.size(); i++) { auto ans = bitset[i]; auto should = test_case.ref(i); ASSERT_EQ(ans, should) << "lower:" << test_case.lower_val << "upper:" << test_case.upper_val << ", @" << i << ", ans: " << ans << ", ref: " << should; } } } } public: IndexBasePtr index_; DataType type_; size_t nb_; size_t cardinality_; boost::container::vector data_; std::shared_ptr chunk_manager_; }; TYPED_TEST_SUITE_P(HybridIndexTestV1); TYPED_TEST_P(HybridIndexTestV1, CountFuncTest) { auto count = this->index_->Count(); EXPECT_EQ(count, this->nb_); } TYPED_TEST_P(HybridIndexTestV1, INFuncTest) { this->TestInFunc(); } TYPED_TEST_P(HybridIndexTestV1, NotINFuncTest) { this->TestNotInFunc(); } TYPED_TEST_P(HybridIndexTestV1, CompareValFuncTest) { this->TestCompareValueFunc(); } TYPED_TEST_P(HybridIndexTestV1, TestRangeCompareFuncTest) { this->TestRangeCompareFunc(); } using BitmapType = testing::Types; REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV1, CountFuncTest, INFuncTest, NotINFuncTest, CompareValFuncTest, TestRangeCompareFuncTest); INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_LowCardinality, HybridIndexTestV1, BitmapType); template class HybridIndexTestV2 : public HybridIndexTestV1 { public: virtual void SetParam() override { this->nb_ = 10000; this->cardinality_ = 2000; } virtual ~HybridIndexTestV2() { } }; TYPED_TEST_SUITE_P(HybridIndexTestV2); TYPED_TEST_P(HybridIndexTestV2, CountFuncTest) { auto count = this->index_->Count(); EXPECT_EQ(count, this->nb_); } TYPED_TEST_P(HybridIndexTestV2, INFuncTest) { this->TestInFunc(); } TYPED_TEST_P(HybridIndexTestV2, NotINFuncTest) { this->TestNotInFunc(); } TYPED_TEST_P(HybridIndexTestV2, CompareValFuncTest) { this->TestCompareValueFunc(); } TYPED_TEST_P(HybridIndexTestV2, TestRangeCompareFuncTest) { this->TestRangeCompareFunc(); } using BitmapType = testing::Types; REGISTER_TYPED_TEST_SUITE_P(HybridIndexTestV2, CountFuncTest, INFuncTest, NotINFuncTest, CompareValFuncTest, TestRangeCompareFuncTest); INSTANTIATE_TYPED_TEST_SUITE_P(HybridIndexE2ECheck_HighCardinality, HybridIndexTestV2, BitmapType);