From 63ca5f803150c0a04c435b34fe7cb2e5c5425067 Mon Sep 17 00:00:00 2001 From: FluorineDog Date: Wed, 13 Jan 2021 18:46:25 +0800 Subject: [PATCH] Add span and segment interface, rename segment_growing and use interface as base Signed-off-by: FluorineDog --- internal/core/src/common/Span.h | 134 ++++++++++++++++++ internal/core/src/query/Search.cpp | 4 +- internal/core/src/query/Search.h | 6 +- .../src/query/generated/ExecExprVisitor.h | 6 +- .../src/query/generated/ExecPlanNodeVisitor.h | 8 +- .../src/query/generated/VerifyExprVisitor.h | 2 +- .../query/generated/VerifyPlanNodeVisitor.h | 2 +- .../src/query/visitors/ExecExprVisitor.cpp | 6 +- .../query/visitors/ExecPlanNodeVisitor.cpp | 14 +- .../query/visitors/VerifyPlanNodeVisitor.cpp | 2 +- internal/core/src/segcore/CMakeLists.txt | 4 +- internal/core/src/segcore/SegmentBase.cpp | 52 ------- internal/core/src/segcore/SegmentGrowing.cpp | 28 ++++ .../{SegmentBase.h => SegmentGrowing.h} | 31 +--- ...tSmallIndex.cpp => SegmentGrowingImpl.cpp} | 59 ++++---- ...gmentSmallIndex.h => SegmentGrowingImpl.h} | 34 +++-- internal/core/src/segcore/SegmentInterface.h | 61 ++++++++ internal/core/src/segcore/SegmentSealed.h | 26 ++++ internal/core/src/segcore/segment_c.cpp | 36 ++--- internal/core/unittest/CMakeLists.txt | 2 + internal/core/unittest/test_binary.cpp | 2 +- internal/core/unittest/test_common.cpp | 18 +++ .../core/unittest/test_concurrent_vector.cpp | 4 +- internal/core/unittest/test_expr.cpp | 14 +- internal/core/unittest/test_indexing.cpp | 4 +- internal/core/unittest/test_interface.cpp | 18 +++ internal/core/unittest/test_query.cpp | 32 ++--- internal/core/unittest/test_sealed.cpp | 12 +- internal/core/unittest/test_segcore.cpp | 6 +- internal/core/unittest/test_utils/DataGen.h | 2 +- scripts/run_go_unittest.sh | 2 +- 31 files changed, 429 insertions(+), 202 deletions(-) create mode 100644 internal/core/src/common/Span.h delete mode 100644 internal/core/src/segcore/SegmentBase.cpp create mode 100644 internal/core/src/segcore/SegmentGrowing.cpp rename internal/core/src/segcore/{SegmentBase.h => SegmentGrowing.h} (73%) rename internal/core/src/segcore/{SegmentSmallIndex.cpp => SegmentGrowingImpl.cpp} (87%) rename internal/core/src/segcore/{SegmentSmallIndex.h => SegmentGrowingImpl.h} (84%) create mode 100644 internal/core/src/segcore/SegmentInterface.h create mode 100644 internal/core/src/segcore/SegmentSealed.h create mode 100644 internal/core/unittest/test_interface.cpp diff --git a/internal/core/src/common/Span.h b/internal/core/src/common/Span.h new file mode 100644 index 0000000000..50bdb9d8ce --- /dev/null +++ b/internal/core/src/common/Span.h @@ -0,0 +1,134 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once +#include +#include "common/Types.h" +#include +namespace milvus { +// type erasure to work around virtual restriction +class SpanBase { + public: + explicit SpanBase(const void* data, int64_t row_count, int64_t element_sizeof) + : data_(data), row_count_(row_count), element_sizeof_(element_sizeof) { + } + + int64_t + get_element_sizeof() const { + return element_sizeof_; + } + + int64_t + row_count() const { + return row_count_; + } + + int64_t + element_sizeof() const { + return element_sizeof_; + } + + const void* + data() const { + return data_; + } + + private: + const void* data_; + int64_t row_count_; + int64_t element_sizeof_; +}; + +template +class Span; + +// TODO: refine Span to support T=FloatVector +template +class Span>> { + public: + using embeded_type = T; + explicit Span(const T* data, int64_t row_count) : data_(data), row_count_(row_count) { + } + + operator SpanBase() const { + return SpanBase(data_, row_count_, sizeof(T)); + } + + explicit Span(const SpanBase& base) : Span(reinterpret_cast(base.data()), base.row_count()) { + assert(base.element_sizeof() == sizeof(T)); + } + + int64_t + element_sizeof() const { + return sizeof(T); + } + + const T* + data() const { + return data_; + } + + int64_t + row_count() const { + return row_count_; + } + + private: + const T* data_; + const int64_t row_count_; +}; + +namespace segcore { +class VectorTrait; +class FloatVector; +class BinaryVector; +} // namespace segcore + +template +class Span>> { + public: + using embedded_type = typename VectorType::embedded_type; + + Span(const embedded_type* data, int64_t row_count, int64_t element_sizeof) + : row_count_(row_count), data_(data), element_sizeof_(element_sizeof) { + } + + explicit Span(const SpanBase& base) + : data_(reinterpret_cast(base.data())), + row_count_(base.row_count()), + element_sizeof_(base.element_sizeof()) { + } + + operator SpanBase() const { + return SpanBase(data_, row_count_, element_sizeof_); + } + + int64_t + element_sizeof() const { + return element_sizeof_; + } + + const embedded_type* + data() const { + return data_; + } + + int64_t + row_count() const { + return row_count_; + } + + private: + const embedded_type* data_; + const int64_t row_count_; + const int64_t element_sizeof_; +}; +} // namespace milvus diff --git a/internal/core/src/query/Search.cpp b/internal/core/src/query/Search.cpp index f812c9fd6f..79756fbec4 100644 --- a/internal/core/src/query/Search.cpp +++ b/internal/core/src/query/Search.cpp @@ -35,7 +35,7 @@ create_bitmap_view(std::optional bitmaps_opt, int64_t chunk } Status -FloatSearch(const segcore::SegmentSmallIndex& segment, +FloatSearch(const segcore::SegmentGrowingImpl& segment, const query::QueryInfo& info, const float* query_data, int64_t num_queries, @@ -129,7 +129,7 @@ FloatSearch(const segcore::SegmentSmallIndex& segment, } Status -BinarySearch(const segcore::SegmentSmallIndex& segment, +BinarySearch(const segcore::SegmentGrowingImpl& segment, const query::QueryInfo& info, const uint8_t* query_data, int64_t num_queries, diff --git a/internal/core/src/query/Search.h b/internal/core/src/query/Search.h index f130b9aaca..1782f0c4da 100644 --- a/internal/core/src/query/Search.h +++ b/internal/core/src/query/Search.h @@ -11,7 +11,7 @@ #pragma once #include -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include #include #include "query/SubQueryResult.h" @@ -23,7 +23,7 @@ using BitmapSimple = std::deque; // TODO: merge these two search into one // note: c++17 don't support optional ref Status -FloatSearch(const segcore::SegmentSmallIndex& segment, +FloatSearch(const segcore::SegmentGrowingImpl& segment, const QueryInfo& info, const float* query_data, int64_t num_queries, @@ -32,7 +32,7 @@ FloatSearch(const segcore::SegmentSmallIndex& segment, QueryResult& results); Status -BinarySearch(const segcore::SegmentSmallIndex& segment, +BinarySearch(const segcore::SegmentGrowingImpl& segment, const query::QueryInfo& info, const uint8_t* query_data, int64_t num_queries, diff --git a/internal/core/src/query/generated/ExecExprVisitor.h b/internal/core/src/query/generated/ExecExprVisitor.h index a9e0574a6e..981be05a16 100644 --- a/internal/core/src/query/generated/ExecExprVisitor.h +++ b/internal/core/src/query/generated/ExecExprVisitor.h @@ -16,7 +16,7 @@ #include #include #include -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include "query/ExprImpl.h" #include "ExprVisitor.h" @@ -37,7 +37,7 @@ class ExecExprVisitor : public ExprVisitor { public: using RetType = std::deque>; - explicit ExecExprVisitor(segcore::SegmentSmallIndex& segment) : segment_(segment) { + explicit ExecExprVisitor(const segcore::SegmentGrowingImpl& segment) : segment_(segment) { } RetType call_child(Expr& expr) { @@ -63,7 +63,7 @@ class ExecExprVisitor : public ExprVisitor { ExecTermVisitorImpl(TermExpr& expr_raw) -> RetType; private: - segcore::SegmentSmallIndex& segment_; + const segcore::SegmentGrowingImpl& segment_; std::optional ret_; }; } // namespace milvus::query diff --git a/internal/core/src/query/generated/ExecPlanNodeVisitor.h b/internal/core/src/query/generated/ExecPlanNodeVisitor.h index c026c68985..2cd629edf5 100644 --- a/internal/core/src/query/generated/ExecPlanNodeVisitor.h +++ b/internal/core/src/query/generated/ExecPlanNodeVisitor.h @@ -14,7 +14,7 @@ // DO NOT EDIT #include "utils/Json.h" #include "query/PlanImpl.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include #include "PlanNodeVisitor.h" @@ -29,7 +29,9 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor { public: using RetType = QueryResult; - ExecPlanNodeVisitor(segcore::SegmentBase& segment, Timestamp timestamp, const PlaceholderGroup& placeholder_group) + ExecPlanNodeVisitor(const segcore::SegmentGrowing& segment, + Timestamp timestamp, + const PlaceholderGroup& placeholder_group) : segment_(segment), timestamp_(timestamp), placeholder_group_(placeholder_group) { } // using RetType = nlohmann::json; @@ -46,7 +48,7 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor { private: // std::optional ret_; - segcore::SegmentBase& segment_; + const segcore::SegmentGrowing& segment_; Timestamp timestamp_; const PlaceholderGroup& placeholder_group_; diff --git a/internal/core/src/query/generated/VerifyExprVisitor.h b/internal/core/src/query/generated/VerifyExprVisitor.h index 6b04a76978..940240c9b4 100644 --- a/internal/core/src/query/generated/VerifyExprVisitor.h +++ b/internal/core/src/query/generated/VerifyExprVisitor.h @@ -16,7 +16,7 @@ #include #include #include -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include "query/ExprImpl.h" #include "ExprVisitor.h" diff --git a/internal/core/src/query/generated/VerifyPlanNodeVisitor.h b/internal/core/src/query/generated/VerifyPlanNodeVisitor.h index a964e6c08f..3ba4611afe 100644 --- a/internal/core/src/query/generated/VerifyPlanNodeVisitor.h +++ b/internal/core/src/query/generated/VerifyPlanNodeVisitor.h @@ -14,7 +14,7 @@ // DO NOT EDIT #include "utils/Json.h" #include "query/PlanImpl.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include #include "PlanNodeVisitor.h" diff --git a/internal/core/src/query/visitors/ExecExprVisitor.cpp b/internal/core/src/query/visitors/ExecExprVisitor.cpp index a15214c0c2..37353faa5a 100644 --- a/internal/core/src/query/visitors/ExecExprVisitor.cpp +++ b/internal/core/src/query/visitors/ExecExprVisitor.cpp @@ -13,7 +13,7 @@ #include #include #include -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include "query/ExprImpl.h" #include "query/generated/ExecExprVisitor.h" @@ -25,7 +25,7 @@ namespace impl { class ExecExprVisitor : ExprVisitor { public: using RetType = std::deque>; - explicit ExecExprVisitor(segcore::SegmentSmallIndex& segment) : segment_(segment) { + explicit ExecExprVisitor(const segcore::SegmentGrowingImpl& segment) : segment_(segment) { } RetType call_child(Expr& expr) { @@ -51,7 +51,7 @@ class ExecExprVisitor : ExprVisitor { ExecTermVisitorImpl(TermExpr& expr_raw) -> RetType; private: - segcore::SegmentSmallIndex& segment_; + const segcore::SegmentGrowingImpl& segment_; std::optional ret_; }; } // namespace impl diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index a32347fa9c..dbeb339042 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -11,10 +11,10 @@ #include "utils/Json.h" #include "query/PlanImpl.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include #include "query/generated/ExecPlanNodeVisitor.h" -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include "query/generated/ExecExprVisitor.h" #include "query/Search.h" #include "query/SearchOnSealed.h" @@ -28,7 +28,9 @@ namespace impl { class ExecPlanNodeVisitor : PlanNodeVisitor { public: using RetType = QueryResult; - ExecPlanNodeVisitor(segcore::SegmentBase& segment, Timestamp timestamp, const PlaceholderGroup& placeholder_group) + ExecPlanNodeVisitor(const segcore::SegmentGrowing& segment, + Timestamp timestamp, + const PlaceholderGroup& placeholder_group) : segment_(segment), timestamp_(timestamp), placeholder_group_(placeholder_group) { } // using RetType = nlohmann::json; @@ -45,7 +47,7 @@ class ExecPlanNodeVisitor : PlanNodeVisitor { private: // std::optional ret_; - segcore::SegmentBase& segment_; + const segcore::SegmentGrowing& segment_; Timestamp timestamp_; const PlaceholderGroup& placeholder_group_; @@ -58,7 +60,7 @@ void ExecPlanNodeVisitor::visit(FloatVectorANNS& node) { // TODO: optimize here, remove the dynamic cast assert(!ret_.has_value()); - auto segment = dynamic_cast(&segment_); + auto segment = dynamic_cast(&segment_); AssertInfo(segment, "support SegmentSmallIndex Only"); RetType ret; auto& ph = placeholder_group_.at(0); @@ -89,7 +91,7 @@ void ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) { // TODO: optimize here, remove the dynamic cast assert(!ret_.has_value()); - auto segment = dynamic_cast(&segment_); + auto segment = dynamic_cast(&segment_); AssertInfo(segment, "support SegmentSmallIndex Only"); RetType ret; auto& ph = placeholder_group_.at(0); diff --git a/internal/core/src/query/visitors/VerifyPlanNodeVisitor.cpp b/internal/core/src/query/visitors/VerifyPlanNodeVisitor.cpp index 28b0730e6a..fed1a445ea 100644 --- a/internal/core/src/query/visitors/VerifyPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/VerifyPlanNodeVisitor.cpp @@ -11,7 +11,7 @@ #include "query/generated/VerifyPlanNodeVisitor.h" #include "knowhere/index/vector_index/ConfAdapterMgr.h" -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include "knowhere/index/vector_index/ConfAdapter.h" #include "knowhere/index/vector_index/helpers/IndexParameter.h" diff --git a/internal/core/src/segcore/CMakeLists.txt b/internal/core/src/segcore/CMakeLists.txt index 3646097a7a..2a5fa4b8f6 100644 --- a/internal/core/src/segcore/CMakeLists.txt +++ b/internal/core/src/segcore/CMakeLists.txt @@ -1,10 +1,10 @@ set(SEGCORE_FILES - SegmentSmallIndex.cpp + SegmentGrowingImpl.cpp Collection.cpp collection_c.cpp segment_c.cpp - SegmentBase.cpp + SegmentGrowing.cpp IndexingEntry.cpp InsertRecord.cpp Reduce.cpp diff --git a/internal/core/src/segcore/SegmentBase.cpp b/internal/core/src/segcore/SegmentBase.cpp deleted file mode 100644 index 315d278234..0000000000 --- a/internal/core/src/segcore/SegmentBase.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (C) 2019-2020 Zilliz. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software distributed under the License -// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -// or implied. See the License for the specific language governing permissions and limitations under the License - -#include "segcore/SegmentBase.h" -#include "segcore/SegmentSmallIndex.h" - -namespace milvus::segcore { - -// seems to be deprecated -struct ColumnBasedDataChunk { - std::vector> entity_vecs; - - static ColumnBasedDataChunk - from(const RowBasedRawData& source, const Schema& schema) { - ColumnBasedDataChunk dest; - auto count = source.count; - auto raw_data = reinterpret_cast(source.raw_data); - auto align = source.sizeof_per_row; - for (auto& field : schema) { - auto len = field.get_sizeof(); - Assert(len % sizeof(float) == 0); - std::vector new_col(len * count / sizeof(float)); - for (int64_t i = 0; i < count; ++i) { - memcpy(new_col.data() + i * len / sizeof(float), raw_data + i * align, len); - } - dest.entity_vecs.push_back(std::move(new_col)); - // offset the raw_data - raw_data += len / sizeof(float); - } - return dest; - } -}; - -int -TestABI() { - return 42; -} - -std::unique_ptr -CreateSegment(SchemaPtr schema, int64_t chunk_size) { - auto segment = std::make_unique(schema, chunk_size); - return segment; -} -} // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentGrowing.cpp b/internal/core/src/segcore/SegmentGrowing.cpp new file mode 100644 index 0000000000..065f7464ac --- /dev/null +++ b/internal/core/src/segcore/SegmentGrowing.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "segcore/SegmentGrowing.h" +#include "segcore/SegmentGrowingImpl.h" + +namespace milvus::segcore { + +int +TestABI() { + return 42; +} + +std::unique_ptr +CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size) { + auto segment = std::make_unique(schema, chunk_size); + return segment; +} + +} // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentBase.h b/internal/core/src/segcore/SegmentGrowing.h similarity index 73% rename from internal/core/src/segcore/SegmentBase.h rename to internal/core/src/segcore/SegmentGrowing.h index 8e2d130dd1..ffd794ca52 100644 --- a/internal/core/src/segcore/SegmentBase.h +++ b/internal/core/src/segcore/SegmentGrowing.h @@ -19,6 +19,7 @@ #include "query/deprecated/GeneralQuery.h" #include "query/Plan.h" #include "common/LoadIndex.h" +#include "segcore/SegmentInterface.h" namespace milvus { namespace segcore { @@ -34,7 +35,7 @@ struct RowBasedRawData { int TestABI(); -class SegmentBase { +class SegmentGrowing : public SegmentInternalInterface { public: // definitions enum class SegmentState { @@ -44,9 +45,6 @@ class SegmentBase { }; public: - virtual ~SegmentBase() = default; - // SegmentBase(std::shared_ptr collection); - virtual int64_t PreInsert(int64_t size) = 0; @@ -59,36 +57,19 @@ class SegmentBase { virtual int64_t PreDelete(int64_t size) = 0; - // TODO: add id into delete log, possibly bitmap virtual Status Delete(int64_t reserved_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) = 0; - public: virtual Status - Search(const query::Plan* Plan, - const query::PlaceholderGroup* placeholder_groups[], - const Timestamp timestamps[], - int num_groups, - QueryResult& results) = 0; - - virtual Status - FillTargetEntry(const query::Plan* Plan, QueryResult& results) = 0; + LoadIndexing(const LoadIndexInfo& info) = 0; // stop receive insert requests virtual Status Close() = 0; - virtual Status - LoadIndexing(const LoadIndexInfo& info) = 0; - - virtual int64_t - GetMemoryUsageInBytes() = 0; - public: - virtual ssize_t - get_row_count() const = 0; - + // feature not implemented virtual SegmentState get_state() const = 0; @@ -96,10 +77,10 @@ class SegmentBase { get_deleted_count() const = 0; }; -using SegmentBasePtr = std::unique_ptr; +using SegmentBasePtr = std::unique_ptr; SegmentBasePtr -CreateSegment(SchemaPtr schema, int64_t chunk_size = 32 * 1024); +CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size = 32 * 1024); } // namespace segcore } // namespace milvus diff --git a/internal/core/src/segcore/SegmentSmallIndex.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp similarity index 87% rename from internal/core/src/segcore/SegmentSmallIndex.cpp rename to internal/core/src/segcore/SegmentGrowingImpl.cpp index 2bfcbbd486..68a0b01079 100644 --- a/internal/core/src/segcore/SegmentSmallIndex.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -20,7 +20,7 @@ #include #include #include "query/generated/ExecPlanNodeVisitor.h" -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include "query/PlanNode.h" #include "query/PlanImpl.h" #include "segcore/Reduce.h" @@ -29,22 +29,22 @@ namespace milvus::segcore { int64_t -SegmentSmallIndex::PreInsert(int64_t size) { +SegmentGrowingImpl::PreInsert(int64_t size) { auto reserved_begin = record_.reserved.fetch_add(size); return reserved_begin; } int64_t -SegmentSmallIndex::PreDelete(int64_t size) { +SegmentGrowingImpl::PreDelete(int64_t size) { auto reserved_begin = deleted_record_.reserved.fetch_add(size); return reserved_begin; } auto -SegmentSmallIndex::get_deleted_bitmap(int64_t del_barrier, - Timestamp query_timestamp, - int64_t insert_barrier, - bool force) -> std::shared_ptr { +SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier, + Timestamp query_timestamp, + int64_t insert_barrier, + bool force) -> std::shared_ptr { auto old = deleted_record_.get_lru_entry(); if (!force || old->bitmap_ptr->count() == insert_barrier) { @@ -113,11 +113,11 @@ SegmentSmallIndex::get_deleted_bitmap(int64_t del_barrier, } Status -SegmentSmallIndex::Insert(int64_t reserved_begin, - int64_t size, - const int64_t* uids_raw, - const Timestamp* timestamps_raw, - const RowBasedRawData& entities_raw) { +SegmentGrowingImpl::Insert(int64_t reserved_begin, + int64_t size, + const int64_t* uids_raw, + const Timestamp* timestamps_raw, + const RowBasedRawData& entities_raw) { Assert(entities_raw.count == size); // step 1: check schema if valid if (entities_raw.sizeof_per_row != schema_->get_total_sizeof()) { @@ -184,10 +184,10 @@ SegmentSmallIndex::Insert(int64_t reserved_begin, } Status -SegmentSmallIndex::Delete(int64_t reserved_begin, - int64_t size, - const int64_t* uids_raw, - const Timestamp* timestamps_raw) { +SegmentGrowingImpl::Delete(int64_t reserved_begin, + int64_t size, + const int64_t* uids_raw, + const Timestamp* timestamps_raw) { std::vector> ordering; ordering.resize(size); // #pragma omp parallel for @@ -216,7 +216,7 @@ SegmentSmallIndex::Delete(int64_t reserved_begin, } Status -SegmentSmallIndex::Close() { +SegmentGrowingImpl::Close() { if (this->record_.reserved != this->record_.ack_responder_.GetAck()) { PanicInfo("insert not ready"); } @@ -228,7 +228,7 @@ SegmentSmallIndex::Close() { } int64_t -SegmentSmallIndex::GetMemoryUsageInBytes() { +SegmentGrowingImpl::GetMemoryUsageInBytes() const { int64_t total_bytes = 0; int64_t ins_n = upper_align(record_.reserved, chunk_size_); total_bytes += ins_n * (schema_->get_total_sizeof() + 16 + 1); @@ -237,19 +237,19 @@ SegmentSmallIndex::GetMemoryUsageInBytes() { return total_bytes; } -Status -SegmentSmallIndex::Search(const query::Plan* plan, - const query::PlaceholderGroup** placeholder_groups, - const Timestamp* timestamps, - int num_groups, - QueryResult& results) { +QueryResult +SegmentGrowingImpl::Search(const query::Plan* plan, + const query::PlaceholderGroup** placeholder_groups, + const Timestamp* timestamps, + int64_t num_groups) const { Assert(num_groups == 1); query::ExecPlanNodeVisitor visitor(*this, timestamps[0], *placeholder_groups[0]); - results = visitor.get_moved_result(*plan->plan_node_); - return Status::OK(); + auto results = visitor.get_moved_result(*plan->plan_node_); + return results; } -Status -SegmentSmallIndex::FillTargetEntry(const query::Plan* plan, QueryResult& results) { + +void +SegmentGrowingImpl::FillTargetEntry(const query::Plan* plan, QueryResult& results) const { AssertInfo(plan, "empty plan"); auto size = results.result_distances_.size(); Assert(results.internal_seg_offsets_.size() == size); @@ -282,11 +282,10 @@ SegmentSmallIndex::FillTargetEntry(const query::Plan* plan, QueryResult& results results.row_data_.emplace_back(std::move(blob)); } } - return Status::OK(); } Status -SegmentSmallIndex::LoadIndexing(const LoadIndexInfo& info) { +SegmentGrowingImpl::LoadIndexing(const LoadIndexInfo& info) { auto field_offset = schema_->get_offset(FieldName(info.field_name)); Assert(info.index_params.count("metric_type")); diff --git a/internal/core/src/segcore/SegmentSmallIndex.h b/internal/core/src/segcore/SegmentGrowingImpl.h similarity index 84% rename from internal/core/src/segcore/SegmentSmallIndex.h rename to internal/core/src/segcore/SegmentGrowingImpl.h index 8d89954954..48cb673b23 100644 --- a/internal/core/src/segcore/SegmentSmallIndex.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -22,7 +22,7 @@ #include "AckResponder.h" #include "SealedIndexingRecord.h" #include "ConcurrentVector.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include "query/deprecated/GeneralQuery.h" #include "utils/Status.h" #include "segcore/DeletedRecord.h" @@ -34,7 +34,7 @@ namespace milvus::segcore { -class SegmentSmallIndex : public SegmentBase { +class SegmentGrowingImpl : public SegmentGrowing { public: int64_t PreInsert(int64_t size) override; @@ -55,12 +55,11 @@ class SegmentSmallIndex : public SegmentBase { Status Delete(int64_t reserverd_offset, int64_t size, const int64_t* row_ids, const Timestamp* timestamps) override; - Status + QueryResult Search(const query::Plan* Plan, const query::PlaceholderGroup* placeholder_groups[], const Timestamp timestamps[], - int num_groups, - QueryResult& results) override; + int64_t num_groups) const override; // stop receive insert requests // will move data to immutable vector or something @@ -68,7 +67,7 @@ class SegmentSmallIndex : public SegmentBase { Close() override; int64_t - GetMemoryUsageInBytes() override; + GetMemoryUsageInBytes() const override; public: const InsertRecord& @@ -92,7 +91,7 @@ class SegmentSmallIndex : public SegmentBase { } const Schema& - get_schema() const { + get_schema() const override { return *schema_; } @@ -112,14 +111,19 @@ class SegmentSmallIndex : public SegmentBase { return 0; } + int64_t + get_num_chunk() const override { + PanicInfo("unimplemented"); + } + Status LoadIndexing(const LoadIndexInfo& info) override; public: - friend std::unique_ptr - CreateSegment(SchemaPtr schema, int64_t chunk_size); + friend std::unique_ptr + CreateGrowingSegment(SchemaPtr schema, int64_t chunk_size); - explicit SegmentSmallIndex(SchemaPtr schema, int64_t chunk_size) + explicit SegmentGrowingImpl(SchemaPtr schema, int64_t chunk_size) : chunk_size_(chunk_size), schema_(std::move(schema)), record_(*schema_, chunk_size), @@ -130,8 +134,14 @@ class SegmentSmallIndex : public SegmentBase { std::shared_ptr get_deleted_bitmap(int64_t del_barrier, Timestamp query_timestamp, int64_t insert_barrier, bool force = false); - Status - FillTargetEntry(const query::Plan* Plan, QueryResult& results) override; + void + FillTargetEntry(const query::Plan* Plan, QueryResult& results) const override; + + protected: + SpanBase + chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const override { + PanicInfo("unimplemented"); + } private: int64_t chunk_size_; diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h new file mode 100644 index 0000000000..48fb37e1b7 --- /dev/null +++ b/internal/core/src/segcore/SegmentInterface.h @@ -0,0 +1,61 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once +#include "common/Types.h" +#include "common/Schema.h" +#include "query/Plan.h" +#include "common/Span.h" + +namespace milvus::segcore { + +class SegmentInterface { + public: + virtual void + FillTargetEntry(const query::Plan* Plan, QueryResult& results) const = 0; + + virtual QueryResult + Search(const query::Plan* Plan, + const query::PlaceholderGroup* placeholder_groups[], + const Timestamp timestamps[], + int64_t num_groups) const = 0; + + virtual int64_t + GetMemoryUsageInBytes() const = 0; + + virtual int64_t + get_row_count() const = 0; + + virtual ~SegmentInterface() = default; +}; + +// internal API for DSL calculation +class SegmentInternalInterface : public SegmentInterface { + public: + virtual const Schema& + get_schema() const = 0; + + virtual int64_t + get_num_chunk() const = 0; + + template + Span + chunk_data(FieldOffset field_offset, int64_t chunk_id) const { + auto span = chunk_data_impl(field_offset, chunk_id); + return static_cast>(span); + } + + protected: + virtual SpanBase + chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0; +}; + +} // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentSealed.h b/internal/core/src/segcore/SegmentSealed.h new file mode 100644 index 0000000000..a784f3f246 --- /dev/null +++ b/internal/core/src/segcore/SegmentSealed.h @@ -0,0 +1,26 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "SegmentInterface.h" + +// class SegmentSealed : public SegmentInternalInterface { +// public: +// const Schema& get_schema() = 0; +// int64_t get_num_chunk() = 0; +// +// explicit SegmentSealed(SchemaPtr schema); +// void set_size(); +// void load_data(FieldId field_id, void* blob, int64_t blob_size); +// +// +// private: +// SchemaPtr schema_; +// } diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index 479de9decb..d7e6d36638 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -11,7 +11,7 @@ #include -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include "segcore/Collection.h" #include "segcore/segment_c.h" #include @@ -25,7 +25,7 @@ CSegmentBase NewSegment(CCollection collection, uint64_t segment_id) { auto col = (milvus::segcore::Collection*)collection; - auto segment = milvus::segcore::CreateSegment(col->get_schema()); + auto segment = milvus::segcore::CreateGrowingSegment(col->get_schema()); // TODO: delete print std::cout << "create segment " << segment_id << std::endl; @@ -34,7 +34,7 @@ NewSegment(CCollection collection, uint64_t segment_id) { void DeleteSegment(CSegmentBase segment) { - auto s = (milvus::segcore::SegmentBase*)segment; + auto s = (milvus::segcore::SegmentGrowing*)segment; // TODO: delete print std::cout << "delete segment " << std::endl; @@ -58,7 +58,7 @@ Insert(CSegmentBase c_segment, void* raw_data, int sizeof_per_row, int64_t count) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; milvus::segcore::RowBasedRawData dataChunk{}; dataChunk.raw_data = raw_data; @@ -85,7 +85,7 @@ Insert(CSegmentBase c_segment, int64_t PreInsert(CSegmentBase c_segment, int64_t size) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; // TODO: delete print // std::cout << "PreInsert segment " << std::endl; @@ -95,7 +95,7 @@ PreInsert(CSegmentBase c_segment, int64_t size) { CStatus Delete( CSegmentBase c_segment, int64_t reserved_offset, int64_t size, const int64_t* row_ids, const uint64_t* timestamps) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; try { auto res = segment->Delete(reserved_offset, size, row_ids, timestamps); @@ -114,7 +114,7 @@ Delete( int64_t PreDelete(CSegmentBase c_segment, int64_t size) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; // TODO: delete print // std::cout << "PreDelete segment " << std::endl; @@ -128,7 +128,7 @@ Search(CSegmentBase c_segment, uint64_t* timestamps, int num_groups, CQueryResult* result) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto plan = (milvus::query::Plan*)c_plan; std::vector placeholder_groups; for (int i = 0; i < num_groups; ++i) { @@ -139,7 +139,7 @@ Search(CSegmentBase c_segment, auto status = CStatus(); try { - auto res = segment->Search(plan, placeholder_groups.data(), timestamps, num_groups, *query_result); + *query_result = segment->Search(plan, placeholder_groups.data(), timestamps, num_groups); if (plan->plan_node_->query_info_.metric_type_ != "IP") { for (auto& dis : query_result->result_distances_) { dis *= -1; @@ -163,13 +163,13 @@ Search(CSegmentBase c_segment, CStatus FillTargetEntry(CSegmentBase c_segment, CPlan c_plan, CQueryResult c_result) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto plan = (milvus::query::Plan*)c_plan; auto result = (milvus::QueryResult*)c_result; auto status = CStatus(); try { - auto res = segment->FillTargetEntry(plan, *result); + segment->FillTargetEntry(plan, *result); status.error_code = Success; status.error_msg = ""; } catch (std::runtime_error& e) { @@ -183,7 +183,7 @@ CStatus UpdateSegmentIndex(CSegmentBase c_segment, CLoadIndexInfo c_load_index_info) { auto status = CStatus(); try { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto load_index_info = (LoadIndexInfo*)c_load_index_info; auto res = segment->LoadIndexing(*load_index_info); status.error_code = Success; @@ -199,7 +199,7 @@ UpdateSegmentIndex(CSegmentBase c_segment, CLoadIndexInfo c_load_index_info) { int Close(CSegmentBase c_segment) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto status = segment->Close(); return status.code(); } @@ -211,14 +211,14 @@ BuildIndex(CCollection c_collection, CSegmentBase c_segment) { bool IsOpened(CSegmentBase c_segment) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto status = segment->get_state(); - return status == milvus::segcore::SegmentBase::SegmentState::Open; + return status == milvus::segcore::SegmentGrowing::SegmentState::Open; } int64_t GetMemoryUsageInBytes(CSegmentBase c_segment) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto mem_size = segment->GetMemoryUsageInBytes(); return mem_size; } @@ -227,14 +227,14 @@ GetMemoryUsageInBytes(CSegmentBase c_segment) { int64_t GetRowCount(CSegmentBase c_segment) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto row_count = segment->get_row_count(); return row_count; } int64_t GetDeletedCount(CSegmentBase c_segment) { - auto segment = (milvus::segcore::SegmentBase*)c_segment; + auto segment = (milvus::segcore::SegmentGrowing*)c_segment; auto deleted_count = segment->get_deleted_count(); return deleted_count; } diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index ce3f8ab702..644979bef8 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -13,8 +13,10 @@ set(MILVUS_TEST_FILES test_bitmap.cpp test_binary.cpp test_index_wrapper.cpp + test_common.cpp test_sealed.cpp test_reduce.cpp + test_interface.cpp ) add_executable(all_tests ${MILVUS_TEST_FILES} diff --git a/internal/core/unittest/test_binary.cpp b/internal/core/unittest/test_binary.cpp index bf5bb25944..419668fcff 100644 --- a/internal/core/unittest/test_binary.cpp +++ b/internal/core/unittest/test_binary.cpp @@ -24,7 +24,7 @@ TEST(Binary, Insert) { schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard); schema->AddDebugField("age", DataType::INT32); auto dataset = DataGen(schema, N, 10); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); int i = 1 + 1; diff --git a/internal/core/unittest/test_common.cpp b/internal/core/unittest/test_common.cpp index 2be5c0bee0..71d8efc88e 100644 --- a/internal/core/unittest/test_common.cpp +++ b/internal/core/unittest/test_common.cpp @@ -10,3 +10,21 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include +#include +#include "common/Types.h" +#include "common/Span.h" + +TEST(Common, Span) { + using namespace milvus; + using namespace milvus::segcore; + + Span s1(nullptr, 100); + Span s2(nullptr, 10, 16 * sizeof(float)); + SpanBase b1 = s1; + SpanBase b2 = s2; + auto r1 = static_cast>(b1); + auto r2 = static_cast>(b2); + ASSERT_EQ(r1.row_count(), 100); + ASSERT_EQ(r2.row_count(), 10); + ASSERT_EQ(r2.element_sizeof(), 16 * sizeof(float)); +} diff --git a/internal/core/unittest/test_concurrent_vector.cpp b/internal/core/unittest/test_concurrent_vector.cpp index 5ef658d535..e7e7d409bb 100644 --- a/internal/core/unittest/test_concurrent_vector.cpp +++ b/internal/core/unittest/test_concurrent_vector.cpp @@ -18,10 +18,10 @@ #include #include "segcore/ConcurrentVector.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" // #include "knowhere/index/vector_index/helpers/IndexParameter.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include "segcore/AckResponder.h" using std::cin; diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index 621d6839e9..a979ac929a 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -21,7 +21,7 @@ #include "query/Plan.h" #include "utils/tools.h" #include -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" using namespace milvus; TEST(Expr, Naive) { @@ -293,7 +293,7 @@ TEST(Expr, TestRange) { schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); schema->AddDebugField("age", DataType::INT32); - auto seg = CreateSegment(schema); + auto seg = CreateGrowingSegment(schema); int N = 10000; std::vector age_col; int num_iters = 100; @@ -305,7 +305,7 @@ TEST(Expr, TestRange) { seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_); } - auto seg_promote = dynamic_cast(seg.get()); + auto seg_promote = dynamic_cast(seg.get()); ExecExprVisitor visitor(*seg_promote); for (auto [clause, ref_func] : testcases) { auto loc = dsl_string_tmp.find("@@@@"); @@ -377,7 +377,7 @@ TEST(Expr, TestTerm) { schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); schema->AddDebugField("age", DataType::INT32); - auto seg = CreateSegment(schema); + auto seg = CreateGrowingSegment(schema); int N = 10000; std::vector age_col; int num_iters = 100; @@ -389,7 +389,7 @@ TEST(Expr, TestTerm) { seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_); } - auto seg_promote = dynamic_cast(seg.get()); + auto seg_promote = dynamic_cast(seg.get()); ExecExprVisitor visitor(*seg_promote); for (auto [clause, ref_func] : testcases) { auto loc = dsl_string_tmp.find("@@@@"); @@ -480,7 +480,7 @@ TEST(Expr, TestSimpleDsl) { schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); schema->AddDebugField("age", DataType::INT32); - auto seg = CreateSegment(schema); + auto seg = CreateGrowingSegment(schema); std::vector age_col; int num_iters = 100; for (int iter = 0; iter < num_iters; ++iter) { @@ -491,7 +491,7 @@ TEST(Expr, TestSimpleDsl) { seg->Insert(iter * N, N, raw_data.row_ids_.data(), raw_data.timestamps_.data(), raw_data.raw_); } - auto seg_promote = dynamic_cast(seg.get()); + auto seg_promote = dynamic_cast(seg.get()); ExecExprVisitor visitor(*seg_promote); for (auto [clause, ref_func] : testcases) { Json dsl; diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 4ff065e871..beedf9113f 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -19,10 +19,10 @@ #include #include "segcore/ConcurrentVector.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" // #include "knowhere/index/vector_index/helpers/IndexParameter.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include "segcore/AckResponder.h" #include #include diff --git a/internal/core/unittest/test_interface.cpp b/internal/core/unittest/test_interface.cpp new file mode 100644 index 0000000000..2bc0c17d53 --- /dev/null +++ b/internal/core/unittest/test_interface.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "common/Span.h" +#include + +#include "segcore/SegmentInterface.h" + +TEST(Interface, Naive) { +} \ No newline at end of file diff --git a/internal/core/unittest/test_query.cpp b/internal/core/unittest/test_query.cpp index db6f37b2b6..e8124ac746 100644 --- a/internal/core/unittest/test_query.cpp +++ b/internal/core/unittest/test_query.cpp @@ -19,7 +19,7 @@ #include "query/generated/ShowPlanNodeVisitor.h" #include "query/generated/ExecPlanNodeVisitor.h" #include "query/PlanImpl.h" -#include "segcore/SegmentSmallIndex.h" +#include "segcore/SegmentGrowingImpl.h" #include "pb/schema.pb.h" using namespace milvus; @@ -202,7 +202,7 @@ TEST(Query, ExecWithPredicate) { })"; int64_t N = 1000 * 1000; auto dataset = DataGen(schema, N); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); @@ -210,10 +210,9 @@ TEST(Query, ExecWithPredicate) { auto num_queries = 5; auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); - QueryResult qr; Timestamp time = 1000000; std::vector ph_group_arr = {ph_group.get()}; - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + auto qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); int topk = 5; Json json = QueryResultToJson(qr); @@ -292,7 +291,7 @@ TEST(Query, ExecTerm) { })"; int64_t N = 1000 * 1000; auto dataset = DataGen(schema, N); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); @@ -303,7 +302,7 @@ TEST(Query, ExecTerm) { QueryResult qr; Timestamp time = 1000000; std::vector ph_group_arr = {ph_group.get()}; - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); std::vector> results; int topk = 5; auto json = QueryResultToJson(qr); @@ -337,15 +336,14 @@ TEST(Query, ExecEmpty) { } })"; int64_t N = 1000 * 1000; - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); auto plan = CreatePlan(*schema, dsl); auto num_queries = 5; auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024); auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); - QueryResult qr; Timestamp time = 1000000; std::vector ph_group_arr = {ph_group.get()}; - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + auto qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); std::cout << QueryResultToJson(qr); for (auto i : qr.internal_seg_offsets_) { @@ -384,7 +382,7 @@ TEST(Query, ExecWithoutPredicate) { auto plan = CreatePlan(*schema, dsl); int64_t N = 1000 * 1000; auto dataset = DataGen(schema, N); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); @@ -394,7 +392,7 @@ TEST(Query, ExecWithoutPredicate) { QueryResult qr; Timestamp time = 1000000; std::vector ph_group_arr = {ph_group.get()}; - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); std::vector> results; int topk = 5; auto json = QueryResultToJson(qr); @@ -468,7 +466,7 @@ TEST(Indexing, InnerProduct) { })"; schema->AddDebugField("normalized", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_INNER_PRODUCT); auto dataset = DataGen(schema, N); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); auto plan = CreatePlan(*schema, dsl); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); @@ -479,7 +477,7 @@ TEST(Indexing, InnerProduct) { std::vector ts{(Timestamp)N * 2}; const auto* ptr = ph_group.get(); QueryResult qr; - segment->Search(plan.get(), &ptr, ts.data(), 1, qr); + qr = segment->Search(plan.get(), &ptr, ts.data(), 1); std::cout << QueryResultToJson(qr).dump(2); } @@ -515,7 +513,7 @@ TEST(Query, FillSegment) { } auto schema = Schema::ParseFrom(proto); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); int N = 100000; auto dataset = DataGen(schema, N); segment->PreInsert(N); @@ -544,7 +542,7 @@ TEST(Query, FillSegment) { std::vector groups = {ph.get()}; std::vector timestamps = {N * 2UL}; QueryResult result; - segment->Search(plan.get(), groups.data(), timestamps.data(), 1, result); + result = segment->Search(plan.get(), groups.data(), timestamps.data(), 1); auto topk = 5; auto num_queries = 10; @@ -601,7 +599,7 @@ TEST(Query, ExecWithPredicateBinary) { })"; int64_t N = 1000 * 1000; auto dataset = DataGen(schema, N); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); auto vec_ptr = dataset.get_col(0); @@ -613,7 +611,7 @@ TEST(Query, ExecWithPredicateBinary) { QueryResult qr; Timestamp time = 1000000; std::vector ph_group_arr = {ph_group.get()}; - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); int topk = 5; Json json = QueryResultToJson(qr); diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index 38305ddaed..09cafb2937 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -59,7 +59,7 @@ TEST(Sealed, without_predicate) { vec_col.push_back(0); } auto query_ptr = vec_col.data() + 4200 * dim; - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); @@ -72,7 +72,7 @@ TEST(Sealed, without_predicate) { Timestamp time = 1000000; std::vector ph_group_arr = {ph_group.get()}; - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); auto pre_result = QueryResultToJson(qr); auto indexing = std::make_shared(); @@ -112,7 +112,7 @@ TEST(Sealed, without_predicate) { segment->LoadIndexing(load_info); qr = QueryResult(); - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); auto post_result = QueryResultToJson(qr); std::cout << ref_result.dump(1); @@ -161,7 +161,7 @@ TEST(Sealed, with_predicate) { auto dataset = DataGen(schema, N); auto vec_col = dataset.get_col(0); auto query_ptr = vec_col.data() + 420000 * dim; - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_); @@ -174,7 +174,7 @@ TEST(Sealed, with_predicate) { Timestamp time = 10000000; std::vector ph_group_arr = {ph_group.get()}; - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); auto pre_qr = qr; auto indexing = std::make_shared(); @@ -205,7 +205,7 @@ TEST(Sealed, with_predicate) { segment->LoadIndexing(load_info); qr = QueryResult(); - segment->Search(plan.get(), ph_group_arr.data(), &time, 1, qr); + qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1); auto post_qr = qr; for (int i = 0; i < num_queries; ++i) { diff --git a/internal/core/unittest/test_segcore.cpp b/internal/core/unittest/test_segcore.cpp index df4aec443b..d1e0befc57 100644 --- a/internal/core/unittest/test_segcore.cpp +++ b/internal/core/unittest/test_segcore.cpp @@ -17,7 +17,7 @@ // #include "knowhere/index/vector_index/helpers/IndexParameter.h" // #include "segment/SegmentReader.h" // #include "segment/SegmentWriter.h" -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" // #include "utils/Json.h" #include "test_utils/DataGen.h" #include @@ -67,7 +67,7 @@ TEST(SegmentCoreTest, NormalDistributionTest) { schema->AddDebugField("age", DataType::INT32); int N = 1000 * 1000; auto [raw_data, timestamps, uids] = generate_data(N); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); segment->PreInsert(N); segment->PreDelete(N); } @@ -99,7 +99,7 @@ TEST(SegmentCoreTest, MockTest) { assert(raw_data.size() == line_sizeof * N); // auto index_meta = std::make_shared(schema); - auto segment = CreateSegment(schema); + auto segment = CreateGrowingSegment(schema); RowBasedRawData data_chunk{raw_data.data(), (int)line_sizeof, N}; auto offset = segment->PreInsert(N); diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index 8eb7adcdec..dc89559d3f 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -14,7 +14,7 @@ #include #include #include -#include "segcore/SegmentBase.h" +#include "segcore/SegmentGrowing.h" #include "Constants.h" #include using boost::algorithm::starts_with; diff --git a/scripts/run_go_unittest.sh b/scripts/run_go_unittest.sh index b48b9f6717..28ed949b4b 100755 --- a/scripts/run_go_unittest.sh +++ b/scripts/run_go_unittest.sh @@ -18,5 +18,5 @@ go test -race -cover "${MILVUS_DIR}/kv/..." -failfast go test -race -cover "${MILVUS_DIR}/proxy/..." -failfast go test -race -cover "${MILVUS_DIR}/writenode/..." -failfast go test -race -cover "${MILVUS_DIR}/master/..." -failfast -go test -cover "${MILVUS_DIR}/msgstream/..." "${MILVUS_DIR}/querynode/..." "${MILVUS_DIR}/storage" "${MILVUS_DIR}/util/..." -failfast +go test -race -cover "${MILVUS_DIR}/msgstream/..." "${MILVUS_DIR}/querynode/..." "${MILVUS_DIR}/storage" "${MILVUS_DIR}/util/..." -failfast #go test -race -cover "${MILVUS_DIR}/kv/..." "${MILVUS_DIR}/msgstream/..." "${MILVUS_DIR}/master/..." "${MILVUS_DIR}/querynode/..." -failfast