// Copyright (C) 2019-2020 Zilliz. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License #include #include #include #include "common/BitsetView.h" #include "common/QueryInfo.h" #include "common/Schema.h" #include "knowhere/comp/index_param.h" #include "mmap/ChunkedColumn.h" #include "query/SearchOnSealed.h" #include "test_utils/DataGen.h" #include struct DeferRelease { using functype = std::function; void AddDefer(const functype& closure) { closures.push_back(closure); } ~DeferRelease() { for (auto& closure : closures) { closure(); } } std::vector closures; }; using namespace milvus; TEST(test_chunk_segment, TestSearchOnSealed) { DeferRelease defer; int dim = 16; int chunk_num = 3; int chunk_size = 100; int total_row_count = chunk_num * chunk_size; int bitset_size = (total_row_count + 7) / 8; int chunk_bitset_size = (chunk_size + 7) / 8; auto column = std::make_shared(); auto schema = std::make_shared(); auto fakevec_id = schema->AddDebugField( "fakevec", DataType::VECTOR_FLOAT, dim, knowhere::metric::COSINE); for (int i = 0; i < chunk_num; i++) { auto dataset = segcore::DataGen(schema, chunk_size); auto data = dataset.get_col(fakevec_id); auto buf_size = chunk_bitset_size + 4 * data.size(); char* buf = new char[buf_size]; defer.AddDefer([buf]() { delete[] buf; }); memcpy(buf + chunk_bitset_size, data.data(), 4 * data.size()); auto chunk = std::make_shared( chunk_size, dim, buf, buf_size, 4, false); column->AddChunk(chunk); } SearchInfo search_info; auto search_conf = knowhere::Json{ {knowhere::meta::METRIC_TYPE, knowhere::metric::COSINE}, }; search_info.search_params_ = search_conf; search_info.field_id_ = fakevec_id; search_info.metric_type_ = knowhere::metric::COSINE; // expect to return all rows search_info.topk_ = total_row_count; uint8_t* bitset_data = new uint8_t[bitset_size]; defer.AddDefer([bitset_data]() { delete[] bitset_data; }); std::fill(bitset_data, bitset_data + bitset_size, 0); BitsetView bv(bitset_data, total_row_count); auto query_ds = segcore::DataGen(schema, 1); auto col_query_data = query_ds.get_col(fakevec_id); auto query_data = col_query_data.data(); SearchResult search_result; query::SearchOnSealed(*schema, column, search_info, query_data, 1, total_row_count, bv, search_result); std::set offsets; for (auto& offset : search_result.seg_offsets_) { if (offset != -1) { offsets.insert(offset); } } // check all rows are returned ASSERT_EQ(total_row_count, offsets.size()); for (int i = 0; i < total_row_count; i++) { ASSERT_TRUE(offsets.find(i) != offsets.end()); } // test with group by search_info.group_by_field_id_ = fakevec_id; std::fill(bitset_data, bitset_data + bitset_size, 0); query::SearchOnSealed(*schema, column, search_info, query_data, 1, total_row_count, bv, search_result); ASSERT_EQ(1, search_result.vector_iterators_->size()); auto iter = search_result.vector_iterators_->at(0); // collect all offsets offsets.clear(); while (iter->HasNext()) { auto [offset, distance] = iter->Next().value(); offsets.insert(offset); } ASSERT_EQ(total_row_count, offsets.size()); for (int i = 0; i < total_row_count; i++) { ASSERT_TRUE(offsets.find(i) != offsets.end()); } }