From 2b58bd5c0af076aadf993b72b4877f993722bcbb Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Fri, 23 Sep 2022 17:08:51 +0800 Subject: [PATCH] Optimize large memory usage of InsertRecord by using vector instead of unordered_map if InsertRecord used in sealed segment (#19245) Signed-off-by: aoiasd Signed-off-by: aoiasd --- internal/core/src/query/SearchOnSealed.cpp | 2 +- internal/core/src/query/SearchOnSealed.h | 2 +- internal/core/src/segcore/FieldIndexing.cpp | 23 -- internal/core/src/segcore/FieldIndexing.h | 23 +- internal/core/src/segcore/InsertRecord.cpp | 75 ------- internal/core/src/segcore/InsertRecord.h | 200 +++++++++++++++--- .../core/src/segcore/SegmentGrowingImpl.h | 4 +- .../core/src/segcore/SegmentSealedImpl.cpp | 3 + internal/core/src/segcore/SegmentSealedImpl.h | 2 +- internal/core/src/segcore/Utils.cpp | 79 ------- internal/core/src/segcore/Utils.h | 70 +++++- internal/core/unittest/test_segcore.cpp | 78 +++++-- 12 files changed, 324 insertions(+), 237 deletions(-) diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index 8f561bec78..5772cb30a1 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -71,7 +71,7 @@ SearchOnSealedIndex(const Schema& schema, void SearchOnSealed(const Schema& schema, - const segcore::InsertRecord& record, + const segcore::InsertRecord& record, const SearchInfo& search_info, const void* query_data, int64_t num_queries, diff --git a/internal/core/src/query/SearchOnSealed.h b/internal/core/src/query/SearchOnSealed.h index 756fc987c7..044d3d39b7 100644 --- a/internal/core/src/query/SearchOnSealed.h +++ b/internal/core/src/query/SearchOnSealed.h @@ -29,7 +29,7 @@ SearchOnSealedIndex(const Schema& schema, void SearchOnSealed(const Schema& schema, - const segcore::InsertRecord& record, + const segcore::InsertRecord& record, const SearchInfo& search_info, const void* query_data, int64_t num_queries, diff --git a/internal/core/src/segcore/FieldIndexing.cpp b/internal/core/src/segcore/FieldIndexing.cpp index 3239f6cade..7c0c97e0f4 100644 --- a/internal/core/src/segcore/FieldIndexing.cpp +++ b/internal/core/src/segcore/FieldIndexing.cpp @@ -73,29 +73,6 @@ VectorFieldIndexing::get_search_params(int top_K) const { return base_params; } -void -IndexingRecord::UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record) { - if (resource_ack_ >= chunk_ack) { - return; - } - - std::unique_lock lck(mutex_); - int64_t old_ack = resource_ack_; - if (old_ack >= chunk_ack) { - return; - } - resource_ack_ = chunk_ack; - lck.unlock(); - - // std::thread([this, old_ack, chunk_ack, &record] { - for (auto& [field_offset, entry] : field_indexings_) { - auto vec_base = record.get_field_data_base(field_offset); - entry->BuildIndexRange(old_ack, chunk_ack, vec_base); - } - finished_ack_.AddSegment(old_ack, chunk_ack); - // }).detach(); -} - template void ScalarFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) { diff --git a/internal/core/src/segcore/FieldIndexing.h b/internal/core/src/segcore/FieldIndexing.h index 44c5d4f0a9..4cf2affb93 100644 --- a/internal/core/src/segcore/FieldIndexing.h +++ b/internal/core/src/segcore/FieldIndexing.h @@ -138,8 +138,29 @@ class IndexingRecord { } // concurrent, reentrant + template void - UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record); + UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record) { + if (resource_ack_ >= chunk_ack) { + return; + } + + std::unique_lock lck(mutex_); + int64_t old_ack = resource_ack_; + if (old_ack >= chunk_ack) { + return; + } + resource_ack_ = chunk_ack; + lck.unlock(); + + // std::thread([this, old_ack, chunk_ack, &record] { + for (auto& [field_offset, entry] : field_indexings_) { + auto vec_base = record.get_field_data_base(field_offset); + entry->BuildIndexRange(old_ack, chunk_ack, vec_base); + } + finished_ack_.AddSegment(old_ack, chunk_ack); + // }).detach(); + } // concurrent int64_t diff --git a/internal/core/src/segcore/InsertRecord.cpp b/internal/core/src/segcore/InsertRecord.cpp index 10586c54ad..be9cc0a85a 100644 --- a/internal/core/src/segcore/InsertRecord.cpp +++ b/internal/core/src/segcore/InsertRecord.cpp @@ -9,79 +9,4 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License -#include #include "InsertRecord.h" - -namespace milvus::segcore { - -InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk) - : row_ids_(size_per_chunk), timestamps_(size_per_chunk) { - std::optional pk_field_id = schema.get_primary_field_id(); - - for (auto& field : schema) { - auto field_id = field.first; - auto& field_meta = field.second; - if (pk2offset_ == nullptr && pk_field_id.has_value() && pk_field_id.value() == field_id) { - switch (field_meta.get_data_type()) { - case DataType::INT64: { - pk2offset_ = std::make_unique>(); - break; - } - case DataType::VARCHAR: { - pk2offset_ = std::make_unique>(); - break; - } - } - } - if (field_meta.is_vector()) { - if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) { - this->append_field_data(field_id, field_meta.get_dim(), size_per_chunk); - continue; - } else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) { - this->append_field_data(field_id, field_meta.get_dim(), size_per_chunk); - continue; - } else { - PanicInfo("unsupported"); - } - } - switch (field_meta.get_data_type()) { - case DataType::BOOL: { - this->append_field_data(field_id, size_per_chunk); - break; - } - case DataType::INT8: { - this->append_field_data(field_id, size_per_chunk); - break; - } - case DataType::INT16: { - this->append_field_data(field_id, size_per_chunk); - break; - } - case DataType::INT32: { - this->append_field_data(field_id, size_per_chunk); - break; - } - case DataType::INT64: { - this->append_field_data(field_id, size_per_chunk); - break; - } - case DataType::FLOAT: { - this->append_field_data(field_id, size_per_chunk); - break; - } - case DataType::DOUBLE: { - this->append_field_data(field_id, size_per_chunk); - break; - } - case DataType::VARCHAR: { - this->append_field_data(field_id, size_per_chunk); - break; - } - default: { - PanicInfo("unsupported"); - } - } - } -} - -} // namespace milvus::segcore diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index d17fdc5f70..8b95c2f32c 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -13,6 +13,8 @@ #include #include +#include +#include #include #include @@ -28,15 +30,15 @@ class OffsetMap { public: virtual ~OffsetMap() = default; - virtual std::vector - find_with_timestamp(const PkType pk, Timestamp timestamp, const ConcurrentVector& timestamps) const = 0; - - virtual std::vector - find_with_barrier(const PkType pk, int64_t barrier) const = 0; + virtual std::vector + find(const PkType pk) const = 0; virtual void insert(const PkType pk, int64_t offset) = 0; + virtual void + seal() = 0; + virtual bool empty() const = 0; }; @@ -44,32 +46,10 @@ class OffsetMap { template class OffsetHashMap : public OffsetMap { public: - std::vector - find_with_timestamp(const PkType pk, Timestamp timestamp, const ConcurrentVector& timestamps) const { - std::vector res_offsets; - auto offset_iter = map_.find(std::get(pk)); - if (offset_iter != map_.end()) { - for (auto offset : offset_iter->second) { - if (timestamps[offset] <= timestamp) { - res_offsets.push_back(SegOffset(offset)); - } - } - } - return res_offsets; - } - - std::vector - find_with_barrier(const PkType pk, int64_t barrier) const { - std::vector res_offsets; - auto offset_iter = map_.find(std::get(pk)); - if (offset_iter != map_.end()) { - for (auto offset : offset_iter->second) { - if (offset <= barrier) { - res_offsets.push_back(SegOffset(offset)); - } - } - } - return res_offsets; + std::vector + find(const PkType pk) const { + auto offset_vector = map_.find(std::get(pk)); + return offset_vector != map_.end() ? offset_vector->second : std::vector(); } void @@ -77,6 +57,11 @@ class OffsetHashMap : public OffsetMap { map_[std::get(pk)].emplace_back(offset); } + void + seal() { + PanicInfo("OffsetHashMap used for growing segment could not be sealed."); + } + bool empty() const { return map_.empty(); @@ -86,6 +71,58 @@ class OffsetHashMap : public OffsetMap { std::unordered_map> map_; }; +template +class OffsetOrderedArray : public OffsetMap { + public: + std::vector + find(const PkType pk) const { + int left = 0, right = array_.size() - 1; + T target = std::get(pk); + if (!is_sealed) + PanicInfo("OffsetOrderedArray could not search before seal"); + + while (left < right) { + int mid = (left + right) >> 1; + if (array_[mid].first < target) + left = mid + 1; + else + right = mid; + } + + std::vector offset_vector; + for (int offset_id = right; offset_id < array_.size(); offset_id++) { + if (offset_id < 0 || array_[offset_id].first != target) + break; + offset_vector.push_back(array_[offset_id].second); + } + + return offset_vector; + } + + void + insert(const PkType pk, int64_t offset) { + if (is_sealed) + PanicInfo("OffsetOrderedArray could not insert after seal"); + array_.push_back(std::make_pair(std::get(pk), offset)); + } + + void + seal() { + sort(array_.begin(), array_.end()); + is_sealed = true; + } + + bool + empty() const { + return array_.empty(); + } + + private: + bool is_sealed = false; + std::vector> array_; +}; + +template struct InsertRecord { ConcurrentVector timestamps_; ConcurrentVector row_ids_; @@ -100,18 +137,108 @@ struct InsertRecord { // pks to row offset std::unique_ptr pk2offset_; - explicit InsertRecord(const Schema& schema, int64_t size_per_chunk); + InsertRecord(const Schema& schema, int64_t size_per_chunk) : row_ids_(size_per_chunk), timestamps_(size_per_chunk) { + std::optional pk_field_id = schema.get_primary_field_id(); + + for (auto& field : schema) { + auto field_id = field.first; + auto& field_meta = field.second; + if (pk2offset_ == nullptr && pk_field_id.has_value() && pk_field_id.value() == field_id) { + switch (field_meta.get_data_type()) { + case DataType::INT64: { + if (is_sealed) + pk2offset_ = std::make_unique>(); + else + pk2offset_ = std::make_unique>(); + break; + } + case DataType::VARCHAR: { + if (is_sealed) + pk2offset_ = std::make_unique>(); + else + pk2offset_ = std::make_unique>(); + break; + } + default: { + PanicInfo("unsupported pk type"); + } + } + } + if (field_meta.is_vector()) { + if (field_meta.get_data_type() == DataType::VECTOR_FLOAT) { + this->append_field_data(field_id, field_meta.get_dim(), size_per_chunk); + continue; + } else if (field_meta.get_data_type() == DataType::VECTOR_BINARY) { + this->append_field_data(field_id, field_meta.get_dim(), size_per_chunk); + continue; + } else { + PanicInfo("unsupported"); + } + } + switch (field_meta.get_data_type()) { + case DataType::BOOL: { + this->append_field_data(field_id, size_per_chunk); + break; + } + case DataType::INT8: { + this->append_field_data(field_id, size_per_chunk); + break; + } + case DataType::INT16: { + this->append_field_data(field_id, size_per_chunk); + break; + } + case DataType::INT32: { + this->append_field_data(field_id, size_per_chunk); + break; + } + case DataType::INT64: { + this->append_field_data(field_id, size_per_chunk); + break; + } + case DataType::FLOAT: { + this->append_field_data(field_id, size_per_chunk); + break; + } + case DataType::DOUBLE: { + this->append_field_data(field_id, size_per_chunk); + break; + } + case DataType::VARCHAR: { + this->append_field_data(field_id, size_per_chunk); + break; + } + default: { + PanicInfo("unsupported"); + } + } + } + } std::vector search_pk(const PkType pk, Timestamp timestamp) const { std::shared_lock lck(shared_mutex_); - return pk2offset_->find_with_timestamp(pk, timestamp, timestamps_); + std::vector res_offsets; + auto offset_iter = pk2offset_->find(pk); + for (auto offset : offset_iter) { + if (timestamps_[offset] <= timestamp) { + res_offsets.push_back(SegOffset(offset)); + } + } + return res_offsets; } std::vector search_pk(const PkType pk, int64_t insert_barrier) const { std::shared_lock lck(shared_mutex_); - return pk2offset_->find_with_barrier(pk, insert_barrier); + std::vector res_offsets; + auto offset_iter = pk2offset_->find(pk); + for (auto offset : offset_iter) { + if (offset <= insert_barrier) { + res_offsets.push_back(SegOffset(offset)); + } + } + return res_offsets; } void @@ -126,6 +253,11 @@ struct InsertRecord { return pk2offset_->empty(); } + void + seal_pks() { + pk2offset_->seal(); + } + // get field data without knowing the type VectorBase* get_field_data_base(FieldId field_id) const { diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index adc9525dd4..7b87fe8e51 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -70,7 +70,7 @@ class SegmentGrowingImpl : public SegmentGrowing { } public: - const InsertRecord& + const InsertRecord<>& get_insert_record() const { return insert_record_; } @@ -225,7 +225,7 @@ class SegmentGrowingImpl : public SegmentGrowing { SealedIndexingRecord sealed_indexing_record_; // not used // inserted fields data and row_ids, timestamps - InsertRecord insert_record_; + InsertRecord insert_record_; // deleted pks mutable DeletedRecord deleted_record_; diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index 7231693aac..4ccd89878e 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -117,6 +117,7 @@ SegmentSealedImpl::LoadScalarIndex(const index::LoadIndexInfo& info) { for (int i = 0; i < row_count; ++i) { insert_record_.insert_pk(int64_index->Reverse_Lookup(i), i); } + insert_record_.seal_pks(); break; } case DataType::VARCHAR: { @@ -124,6 +125,7 @@ SegmentSealedImpl::LoadScalarIndex(const index::LoadIndexInfo& info) { for (int i = 0; i < row_count; ++i) { insert_record_.insert_pk(string_index->Reverse_Lookup(i), i); } + insert_record_.seal_pks(); break; } default: { @@ -207,6 +209,7 @@ SegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& info) { for (int i = 0; i < size; ++i) { insert_record_.insert_pk(pks[i], i); } + insert_record_.seal_pks(); } set_bit(field_data_ready_bitset_, field_id, true); diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 7cc846d680..e328b0e4e2 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -193,7 +193,7 @@ class SegmentSealedImpl : public SegmentSealed { SealedIndexingRecord vector_indexings_; // inserted fields data and row_ids, timestamps - InsertRecord insert_record_; + InsertRecord insert_record_; // deleted pks mutable DeletedRecord deleted_record_; diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index 1a54774949..d746a57911 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -366,83 +366,4 @@ ReverseDataFromIndex(const index::IndexBase* index, return data_array; } -// insert_barrier means num row of insert data in a segment -// del_barrier means that if the pk of the insert data is in delete record[0 : del_barrier] -// then the data corresponding to this pk may be ignored when searching/querying -// and refer to func get_barrier, all ts in delete record[0 : del_barrier] < query_timestamp -// assert old insert record pks = [5, 2, 4, 1, 3, 8, 7, 6] -// assert old delete record pks = [2, 4, 3, 8, 5], old delete record ts = [100, 100, 150, 200, 400, 500, 500, 500] -// if delete_barrier = 3, query time = 180, then insert records with pks in [2, 4, 3] will be deleted -// then the old bitmap = [0, 1, 1, 0, 1, 0, 0, 0] -std::shared_ptr -get_deleted_bitmap(int64_t del_barrier, - int64_t insert_barrier, - DeletedRecord& delete_record, - const InsertRecord& insert_record, - Timestamp query_timestamp) { - // if insert_barrier and del_barrier have not changed, use cache data directly - bool hit_cache = false; - int64_t old_del_barrier = 0; - auto current = delete_record.clone_lru_entry(insert_barrier, del_barrier, old_del_barrier, hit_cache); - if (hit_cache) { - return current; - } - - auto bitmap = current->bitmap_ptr; - - int64_t start, end; - if (del_barrier < old_del_barrier) { - // in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp - // so these deletion records do not take effect in query/search - // so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] wil be reset to 0 - // for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0] - start = del_barrier; - end = old_del_barrier; - } else { - // the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier] - // for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0] - start = old_del_barrier; - end = del_barrier; - } - - // Avoid invalid calculations when there are a lot of repeated delete pks - std::unordered_map delete_timestamps; - for (auto del_index = start; del_index < end; ++del_index) { - auto pk = delete_record.pks_[del_index]; - auto timestamp = delete_record.timestamps_[del_index]; - - delete_timestamps[pk] = timestamp > delete_timestamps[pk] ? timestamp : delete_timestamps[pk]; - } - - for (auto iter = delete_timestamps.begin(); iter != delete_timestamps.end(); iter++) { - auto pk = iter->first; - auto delete_timestamp = iter->second; - auto segOffsets = insert_record.search_pk(pk, insert_barrier); - for (auto offset : segOffsets) { - int64_t insert_row_offset = offset.get(); - // for now, insert_barrier == insert count of segment, so this Assert will always work - AssertInfo(insert_row_offset < insert_barrier, "Timestamp offset is larger than insert barrier"); - - // insert after delete with same pk, delete will not task effect on this insert record - // and reset bitmap to 0 - if (insert_record.timestamps_[insert_row_offset] > delete_timestamp) { - bitmap->reset(insert_row_offset); - continue; - } - - // the deletion record do not take effect in search/query - // and reset bitmap to 0 - if (delete_timestamp > query_timestamp) { - bitmap->reset(insert_row_offset); - continue; - } - // insert data corresponding to the insert_row_offset will be ignored in search/query - bitmap->set(insert_row_offset); - } - } - - delete_record.insert_lru_entry(current); - return current; -} - } // namespace milvus::segcore diff --git a/internal/core/src/segcore/Utils.h b/internal/core/src/segcore/Utils.h index fe28667259..363eac7f34 100644 --- a/internal/core/src/segcore/Utils.h +++ b/internal/core/src/segcore/Utils.h @@ -9,6 +9,7 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#include #include #include #include @@ -48,12 +49,77 @@ CreateDataArrayFrom(const void* data_raw, int64_t count, const FieldMeta& field_ std::unique_ptr MergeDataArray(std::vector>& result_offsets, const FieldMeta& field_meta); +template std::shared_ptr get_deleted_bitmap(int64_t del_barrier, int64_t insert_barrier, DeletedRecord& delete_record, - const InsertRecord& insert_record, - Timestamp query_timestamp); + const InsertRecord& insert_record, + Timestamp query_timestamp) { + // if insert_barrier and del_barrier have not changed, use cache data directly + bool hit_cache = false; + int64_t old_del_barrier = 0; + auto current = delete_record.clone_lru_entry(insert_barrier, del_barrier, old_del_barrier, hit_cache); + if (hit_cache) { + return current; + } + + auto bitmap = current->bitmap_ptr; + + int64_t start, end; + if (del_barrier < old_del_barrier) { + // in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp + // so these deletion records do not take effect in query/search + // so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] wil be reset to 0 + // for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0] + start = del_barrier; + end = old_del_barrier; + } else { + // the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier] + // for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0] + start = old_del_barrier; + end = del_barrier; + } + + // Avoid invalid calculations when there are a lot of repeated delete pks + std::unordered_map delete_timestamps; + for (auto del_index = start; del_index < end; ++del_index) { + auto pk = delete_record.pks_[del_index]; + auto timestamp = delete_record.timestamps_[del_index]; + + delete_timestamps[pk] = timestamp > delete_timestamps[pk] ? timestamp : delete_timestamps[pk]; + } + + for (auto iter = delete_timestamps.begin(); iter != delete_timestamps.end(); iter++) { + auto pk = iter->first; + auto delete_timestamp = iter->second; + auto segOffsets = insert_record.search_pk(pk, insert_barrier); + for (auto offset : segOffsets) { + int64_t insert_row_offset = offset.get(); + // for now, insert_barrier == insert count of segment, so this Assert will always work + AssertInfo(insert_row_offset < insert_barrier, "Timestamp offset is larger than insert barrier"); + + // insert after delete with same pk, delete will not task effect on this insert record + // and reset bitmap to 0 + if (insert_record.timestamps_[insert_row_offset] > delete_timestamp) { + bitmap->reset(insert_row_offset); + continue; + } + + // the deletion record do not take effect in search/query + // and reset bitmap to 0 + if (delete_timestamp > query_timestamp) { + bitmap->reset(insert_row_offset); + continue; + } + // insert data corresponding to the insert_row_offset will be ignored in search/query + bitmap->set(insert_row_offset); + } + } + + delete_record.insert_lru_entry(current); + return current; +} std::unique_ptr ReverseDataFromIndex(const index::IndexBase* index, diff --git a/internal/core/unittest/test_segcore.cpp b/internal/core/unittest/test_segcore.cpp index 74aabfb881..b926f1c5b5 100644 --- a/internal/core/unittest/test_segcore.cpp +++ b/internal/core/unittest/test_segcore.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "segcore/SegmentGrowingImpl.h" #include "test_utils/DataGen.h" @@ -80,36 +81,77 @@ TEST(SegmentCoreTest, SmallIndex) { schema->AddDebugField("age", DataType::INT32); } -TEST(OffsetMap, int64_t) { - using namespace milvus::segcore; - OffsetMap* map = new OffsetHashMap(); - map->insert(PkType(int64_t(10)), 3); - std::vector offset = map->find_with_barrier(PkType(int64_t(10)), 10); - ASSERT_EQ(offset[0].get(), int64_t(3)); -} - -TEST(InsertRecordTest, int64_t) { +TEST(InsertRecordTest, growing_int64_t) { using namespace milvus::segcore; auto schema = std::make_shared(); schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); auto i64_fid = schema->AddDebugField("age", DataType::INT64); schema->set_primary_field_id(i64_fid); + auto record = milvus::segcore::InsertRecord(*schema, int64_t(32)); + const int N=100000; - auto record = milvus::segcore::InsertRecord(*schema, int64_t(32)); - record.insert_pk(PkType(int64_t(12)), int64_t(3)); - std::vector offset = record.search_pk(PkType(int64_t(12)), int64_t(10)); - ASSERT_EQ(offset[0].get(), int64_t(3)); + for (int i = 1; i <= N; i++) + record.insert_pk(PkType(int64_t(i)), int64_t(i)); + + for (int i = 1; i <= N; i++){ + std::vector offset = record.search_pk(PkType(int64_t(i)), int64_t(N + 1)); + ASSERT_EQ(offset[0].get(), int64_t(i)); + } } -TEST(InsertRecordTest, string) { +TEST(InsertRecordTest, growing_string) { using namespace milvus::segcore; auto schema = std::make_shared(); schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR); schema->set_primary_field_id(i64_fid); + auto record = milvus::segcore::InsertRecord(*schema, int64_t(32)); + const int N = 100000; - auto record = milvus::segcore::InsertRecord(*schema, int64_t(32)); - record.insert_pk(PkType(std::string("test")), int64_t(3)); - std::vector offset = record.search_pk(PkType(std::string("test")), int64_t(10)); - ASSERT_EQ(offset[0].get(), int64_t(3)); + for (int i = 1; i <= N; i++) + record.insert_pk(PkType(std::to_string(i)), int64_t(i)); + + for (int i = 1; i <= N; i++){ + std::vector offset = record.search_pk(std::to_string(i), int64_t(N + 1)); + ASSERT_EQ(offset[0].get(), int64_t(i)); + } +} + +TEST(InsertRecordTest, sealed_int64_t) { + using namespace milvus::segcore; + auto schema = std::make_shared(); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); + auto i64_fid = schema->AddDebugField("age", DataType::INT64); + schema->set_primary_field_id(i64_fid); + auto record = milvus::segcore::InsertRecord(*schema, int64_t(32)); + const int N = 100000; + + for (int i = N; i >= 1; i--) + record.insert_pk(PkType(int64_t(i)), int64_t(i)); + record.seal_pks(); + + for (int i = 1;i <= N; i++){ + std::vector offset = record.search_pk(PkType(int64_t(i)), int64_t(N + 1)); + ASSERT_EQ(offset[0].get(), int64_t(i)); + } +} + +TEST(InsertRecordTest, sealed_string) { + using namespace milvus::segcore; + auto schema = std::make_shared(); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); + auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR); + schema->set_primary_field_id(i64_fid); + auto record = milvus::segcore::InsertRecord(*schema, int64_t(32)); + const int N = 100000; + + for (int i = 1; i <= N; i++) + record.insert_pk(PkType(std::to_string(i)), int64_t(i)); + + record.seal_pks(); + + for (int i = 1; i <= N; i++){ + std::vector offset = record.search_pk(std::to_string(i), int64_t(N + 1)); + ASSERT_EQ(offset[0].get(), int64_t(i)); + } } \ No newline at end of file