diff --git a/internal/core/src/common/Span.h b/internal/core/src/common/Span.h index 0f2c3a7a46..51398808c0 100644 --- a/internal/core/src/common/Span.h +++ b/internal/core/src/common/Span.h @@ -13,6 +13,8 @@ #include #include "common/Types.h" #include +#include "VectorTrait.h" + namespace milvus { // type erasure to work around virtual restriction class SpanBase { diff --git a/internal/core/src/common/Types.h b/internal/core/src/common/Types.h index c6511dfb03..ee41de8086 100644 --- a/internal/core/src/common/Types.h +++ b/internal/core/src/common/Types.h @@ -11,6 +11,7 @@ #pragma once #include "utils/Types.h" +#include "faiss/utils/BitsetView.h" #include #include #include @@ -75,6 +76,14 @@ using FieldId = fluent::NamedType; using FieldOffset = fluent::NamedType; -} // namespace milvus +using BitsetView = faiss::BitsetView; +inline BitsetView +BitsetSubView(const BitsetView& view, int64_t offset, int64_t size) { + if (view.empty()) { + return BitsetView(); + } + assert(offset % 8 == 0); + return BitsetView(view.data() + offset / 8, size); +} -#include "VectorTrait.h" +} // namespace milvus diff --git a/internal/core/src/query/Search.cpp b/internal/core/src/query/Search.cpp index 6202e0c300..3ba611637b 100644 --- a/internal/core/src/query/Search.cpp +++ b/internal/core/src/query/Search.cpp @@ -40,7 +40,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, const float* query_data, int64_t num_queries, Timestamp timestamp, - std::optional bitmaps_opt, + const BitsetView& bitset, QueryResult& results) { auto& schema = segment.get_schema(); auto& indexing_record = segment.get_indexing_record(); @@ -79,14 +79,16 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, // TODO: use sub_qr for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) { - auto bitset = create_bitmap_view(bitmaps_opt, chunk_id); + auto chunk_size = indexing_entry.get_chunk_size(); auto indexing = indexing_entry.get_vec_indexing(chunk_id); - auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, bitset); + + auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size); + auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view); // convert chunk uid to segment uid for (auto& x : sub_qr.mutable_labels()) { if (x != -1) { - x += chunk_id * indexing_entry.get_chunk_size(); + x += chunk_id * chunk_size; } } @@ -100,15 +102,14 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, auto max_chunk = upper_div(ins_barrier, vec_chunk_size); for (int chunk_id = max_indexed_id; chunk_id < max_chunk; ++chunk_id) { - auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id); - auto& chunk = vec_ptr->get_chunk(chunk_id); auto element_begin = chunk_id * vec_chunk_size; auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size); auto chunk_size = element_end - element_begin; - auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), chunk_size, bitmap_view); + auto sub_view = BitsetSubView(bitset, element_begin, chunk_size); + auto sub_qr = FloatSearchBruteForce(query_dataset, chunk.data(), chunk_size, sub_view); // convert chunk uid to segment uid for (auto& x : sub_qr.mutable_labels()) { @@ -133,7 +134,7 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment, const uint8_t* query_data, int64_t num_queries, Timestamp timestamp, - std::optional bitmaps_opt, + const faiss::BitsetView& bitset, QueryResult& results) { auto& schema = segment.get_schema(); auto& indexing_record = segment.get_indexing_record(); @@ -160,8 +161,6 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment, auto total_count = topK * num_queries; // step 3: small indexing search - // TODO: this is too intrusive - // TODO: use QuerySubResult instead query::dataset::BinaryQueryDataset query_dataset{metric_type, num_queries, topK, dim, query_data}; auto vec_ptr = record.get_entity(vecfield_offset); @@ -178,8 +177,8 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment, auto element_end = std::min(ins_barrier, (chunk_id + 1) * vec_chunk_size); auto nsize = element_end - element_begin; - auto bitmap_view = create_bitmap_view(bitmaps_opt, chunk_id); - auto sub_result = BinarySearchBruteForce(query_dataset, chunk.data(), nsize, bitmap_view); + auto sub_view = BitsetSubView(bitset, element_begin, nsize); + auto sub_result = BinarySearchBruteForce(query_dataset, chunk.data(), nsize, sub_view); // convert chunk uid to segment uid for (auto& x : sub_result.mutable_labels()) { diff --git a/internal/core/src/query/Search.h b/internal/core/src/query/Search.h index 1782f0c4da..3aa8d1ec9b 100644 --- a/internal/core/src/query/Search.h +++ b/internal/core/src/query/Search.h @@ -28,7 +28,7 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, const float* query_data, int64_t num_queries, Timestamp timestamp, - std::optional bitmap_opt, + const faiss::BitsetView& bitset, QueryResult& results); Status @@ -37,6 +37,6 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment, const uint8_t* query_data, int64_t num_queries, Timestamp timestamp, - std::optional bitmaps_opt, + const faiss::BitsetView& bitset, QueryResult& results); } // namespace milvus::query diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index 58ffed4f29..83b265a721 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -20,8 +20,9 @@ namespace milvus::query { +// negate bitset, and merge them into one aligned_vector -AssembleBitmap(const BitmapSimple& bitmap_simple) { +AssembleNegBitmap(const BitmapSimple& bitmap_simple) { int64_t N = 0; for (auto& bitmap : bitmap_simple) { @@ -52,7 +53,7 @@ SearchOnSealed(const Schema& schema, const void* query_data, int64_t num_queries, Timestamp timestamp, - std::optional bitmaps_opt, + const faiss::BitsetView& bitset, QueryResult& result) { auto topK = query_info.topK_; @@ -73,12 +74,7 @@ SearchOnSealed(const Schema& schema, auto conf = query_info.search_params_; conf[milvus::knowhere::meta::TOPK] = query_info.topK_; conf[milvus::knowhere::Metric::TYPE] = MetricTypeToName(indexing_entry->metric_type_); - if (bitmaps_opt.has_value()) { - auto bitmap = AssembleBitmap(*bitmaps_opt.value()); - return indexing_entry->indexing_->Query(ds, conf, faiss::BitsetView(bitmap.data(), num_queries)); - } else { - return indexing_entry->indexing_->Query(ds, conf, nullptr); - } + return indexing_entry->indexing_->Query(ds, conf, bitset); }(); auto ids = final->Get(knowhere::meta::IDS); diff --git a/internal/core/src/query/SearchOnSealed.h b/internal/core/src/query/SearchOnSealed.h index c79127dd44..01f3864e0d 100644 --- a/internal/core/src/query/SearchOnSealed.h +++ b/internal/core/src/query/SearchOnSealed.h @@ -16,6 +16,10 @@ #include "query/Search.h" namespace milvus::query { + +aligned_vector +AssembleNegBitmap(const BitmapSimple& bitmap_simple); + void SearchOnSealed(const Schema& schema, const segcore::SealedIndexingRecord& record, @@ -23,7 +27,7 @@ SearchOnSealed(const Schema& schema, const void* query_data, int64_t num_queries, Timestamp timestamp, - std::optional bitmaps_opt, + const faiss::BitsetView& view, QueryResult& result); } // namespace milvus::query diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index dbeb339042..fbe99079c4 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -67,21 +67,20 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) { auto src_data = ph.get_blob(); auto num_queries = ph.num_of_queries_; - ExecExprVisitor::RetType bitmap_holder; - std::optional bitset_pack; - + aligned_vector bitset_holder; + BitsetView view; if (node.predicate_.has_value()) { - bitmap_holder = ExecExprVisitor(*segment).call_child(*node.predicate_.value()); - bitset_pack = &bitmap_holder; + ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value()); + bitset_holder = AssembleNegBitmap(expr_ret); + view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8); } auto& sealed_indexing = segment->get_sealed_indexing_record(); - if (sealed_indexing.is_ready(node.query_info_.field_offset_)) { SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_, - bitset_pack, ret); + view, ret); } else { - FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret); + FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret); } ret_ = ret; @@ -98,20 +97,20 @@ ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) { auto src_data = ph.get_blob(); auto num_queries = ph.num_of_queries_; - ExecExprVisitor::RetType bitmap_holder; - std::optional bitset_pack; - + aligned_vector bitset_holder; + BitsetView view; if (node.predicate_.has_value()) { - bitmap_holder = ExecExprVisitor(*segment).call_child(*node.predicate_.value()); - bitset_pack = &bitmap_holder; + ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value()); + bitset_holder = AssembleNegBitmap(expr_ret); + view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8); } auto& sealed_indexing = segment->get_sealed_indexing_record(); if (sealed_indexing.is_ready(node.query_info_.field_offset_)) { SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_, - bitset_pack, ret); + view, ret); } else { - BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret); + BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret); } ret_ = ret; } diff --git a/internal/core/unittest/test_span.cpp b/internal/core/unittest/test_span.cpp index e21bc1a35c..7d9a6f3be7 100644 --- a/internal/core/unittest/test_span.cpp +++ b/internal/core/unittest/test_span.cpp @@ -9,7 +9,6 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License - #include #include "utils/tools.h" #include "test_utils/DataGen.h" @@ -22,14 +21,17 @@ TEST(Span, Naive) { int64_t N = 1000 * 1000; constexpr int64_t chunk_size = 32 * 1024; auto schema = std::make_shared(); - schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard); + schema->AddDebugField("binaryvec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard); schema->AddDebugField("age", DataType::FLOAT); + schema->AddDebugField("floatvec", DataType::VECTOR_FLOAT, 32, MetricType::METRIC_L2); + auto dataset = DataGen(schema, N); auto segment = CreateGrowingSegment(schema, chunk_size); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); auto vec_ptr = dataset.get_col(0); auto age_ptr = dataset.get_col(1); + auto float_ptr = dataset.get_col(2); SegmentInternalInterface& interface = *segment; auto num_chunk = interface.get_safe_num_chunk(); ASSERT_EQ(num_chunk, upper_div(N, chunk_size)); @@ -38,6 +40,7 @@ TEST(Span, Naive) { for (auto chunk_id = 0; chunk_id < num_chunk; ++chunk_id) { auto vec_span = interface.chunk_data(FieldOffset(0), chunk_id); auto age_span = interface.chunk_data(FieldOffset(1), chunk_id); + auto float_span = interface.chunk_data(FieldOffset(2), chunk_id); auto begin = chunk_id * chunk_size; auto end = std::min((chunk_id + 1) * chunk_size, N); auto chunk_size = end - begin; @@ -47,5 +50,8 @@ TEST(Span, Naive) { for (int i = 0; i < chunk_size; ++i) { ASSERT_EQ(age_span.data()[i], age_ptr[i + begin]); } + for (int i = 0; i < chunk_size; ++i) { + ASSERT_EQ(float_span.data()[i], float_ptr[i + begin * 32]); + } } } \ No newline at end of file