From 0ff4ddc76c14fc6bc4db0441ebb75b4d6e940bf4 Mon Sep 17 00:00:00 2001 From: cqy123456 <39671710+cqy123456@users.noreply.github.com> Date: Tue, 12 Sep 2023 17:13:18 +0800 Subject: [PATCH] remove VectorMemNMIndex (#27000) Signed-off-by: cqy123456 --- internal/core/src/index/CMakeLists.txt | 1 - internal/core/src/index/IndexFactory.cpp | 5 - internal/core/src/index/VectorMemNMIndex.cpp | 133 ------------------- internal/core/src/index/VectorMemNMIndex.h | 74 ----------- internal/core/src/segcore/FieldIndexing.cpp | 4 +- internal/core/unittest/test_utils/DataGen.h | 4 +- 6 files changed, 4 insertions(+), 217 deletions(-) delete mode 100644 internal/core/src/index/VectorMemNMIndex.cpp delete mode 100644 internal/core/src/index/VectorMemNMIndex.h diff --git a/internal/core/src/index/CMakeLists.txt b/internal/core/src/index/CMakeLists.txt index 19ecde2cf2..feec1b22e3 100644 --- a/internal/core/src/index/CMakeLists.txt +++ b/internal/core/src/index/CMakeLists.txt @@ -14,7 +14,6 @@ set(INDEX_FILES Utils.cpp VectorMemIndex.cpp IndexFactory.cpp - VectorMemNMIndex.cpp ) if ( BUILD_DISK_ANN STREQUAL "ON" ) diff --git a/internal/core/src/index/IndexFactory.cpp b/internal/core/src/index/IndexFactory.cpp index c4c6178e96..ab3bd42248 100644 --- a/internal/core/src/index/IndexFactory.cpp +++ b/internal/core/src/index/IndexFactory.cpp @@ -16,7 +16,6 @@ #include "index/IndexFactory.h" #include "index/VectorMemIndex.h" -#include "index/VectorMemNMIndex.h" #include "index/Utils.h" #include "index/Meta.h" @@ -93,10 +92,6 @@ IndexFactory::CreateVectorIndex(const CreateIndexInfo& create_index_info, } #endif - if (is_in_nm_list(index_type)) { - return std::make_unique( - index_type, metric_type, file_manager); - } // create mem index return std::make_unique( index_type, metric_type, file_manager); diff --git a/internal/core/src/index/VectorMemNMIndex.cpp b/internal/core/src/index/VectorMemNMIndex.cpp deleted file mode 100644 index b18de825ef..0000000000 --- a/internal/core/src/index/VectorMemNMIndex.cpp +++ /dev/null @@ -1,133 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "common/Slice.h" -#include "common/Utils.h" -#include "common/BitsetView.h" -#include "index/VectorMemNMIndex.h" -#include "log/Log.h" - -#include "knowhere/factory.h" -#include "knowhere/comp/time_recorder.h" -#define RAW_DATA "RAW_DATA" -#include "common/Tracer.h" - -namespace milvus::index { - -BinarySet -VectorMemNMIndex::Serialize(const Config& config) { - knowhere::BinarySet ret; - auto stat = index_.Serialize(ret); - if (stat != knowhere::Status::success) - PanicCodeInfo( - ErrorCodeEnum::UnexpectedError, - "failed to serialize index, " + KnowhereStatusString(stat)); - - auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction - auto raw_data = std::shared_ptr( - static_cast(raw_data_.data()), deleter); - ret.Append(RAW_DATA, raw_data, raw_data_.size()); - Disassemble(ret); - - return ret; -} - -void -VectorMemNMIndex::BuildWithDataset(const DatasetPtr& dataset, - const Config& config) { - VectorMemIndex::BuildWithDataset(dataset, config); - knowhere::TimeRecorder rc("store_raw_data", 1); - store_raw_data(dataset); - rc.ElapseFromBegin("Done"); -} - -void -VectorMemNMIndex::LoadWithoutAssemble(const BinarySet& binary_set, - const Config& config) { - VectorMemIndex::LoadWithoutAssemble(binary_set, config); - if (binary_set.Contains(RAW_DATA)) { - std::call_once(raw_data_loaded_, [&]() { - LOG_SEGCORE_INFO_ << "NM index load raw data done!"; - }); - } -} - -void -VectorMemNMIndex::AddWithDataset(const DatasetPtr& /*dataset*/, - const Config& /*config*/) { -} - -void -VectorMemNMIndex::Load(const BinarySet& binary_set, const Config& config) { - VectorMemIndex::Load(binary_set, config); - if (binary_set.Contains(RAW_DATA)) { - std::call_once(raw_data_loaded_, [&]() { - LOG_SEGCORE_INFO_ << "NM index load raw data done!"; - }); - } -} - -std::unique_ptr -VectorMemNMIndex::Query(const DatasetPtr dataset, - const SearchInfo& search_info, - const BitsetView& bitset) { - auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer - // load -> query, raw data has been loaded - // build -> query, this case just for test, should load raw data before query - std::call_once(raw_data_loaded_, load_raw_data_closure); - return VectorMemIndex::Query(dataset, search_info, bitset); -} - -void -VectorMemNMIndex::store_raw_data(const DatasetPtr& dataset) { - auto index_type = GetIndexType(); - auto tensor = dataset->GetTensor(); - auto row_num = dataset->GetRows(); - auto dim = dataset->GetDim(); - int64_t data_size; - if (is_in_bin_list(index_type)) { - data_size = dim / 8 * row_num; - } else { - data_size = dim * row_num * sizeof(float); - } - raw_data_.resize(data_size); - memcpy(raw_data_.data(), tensor, data_size); -} - -void -VectorMemNMIndex::LoadRawData() { - knowhere::BinarySet bs; - auto stat = index_.Serialize(bs); - if (stat != knowhere::Status::success) - PanicCodeInfo( - ErrorCodeEnum::UnexpectedError, - "failed to Serialize index, " + KnowhereStatusString(stat)); - - auto bptr = std::make_shared(); - auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction - bptr->data = std::shared_ptr( - static_cast(raw_data_.data()), deleter); - bptr->size = raw_data_.size(); - bs.Append(RAW_DATA, bptr); - stat = index_.Deserialize(bs); - if (stat != knowhere::Status::success) - PanicCodeInfo( - ErrorCodeEnum::UnexpectedError, - "failed to Deserialize index, " + KnowhereStatusString(stat)); - milvus::tracer::AddEvent("VectorMemNMIndex_Loaded_RawData"); -} - -} // namespace milvus::index diff --git a/internal/core/src/index/VectorMemNMIndex.h b/internal/core/src/index/VectorMemNMIndex.h deleted file mode 100644 index 56ff474e09..0000000000 --- a/internal/core/src/index/VectorMemNMIndex.h +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "index/Utils.h" -#include "index/VectorMemIndex.h" - -namespace milvus::index { - -class VectorMemNMIndex : public VectorMemIndex { - public: - explicit VectorMemNMIndex( - const IndexType& index_type, - const MetricType& metric_type, - storage::FileManagerImplPtr file_manager = nullptr) - : VectorMemIndex(index_type, metric_type, file_manager) { - AssertInfo(is_in_nm_list(index_type), "not valid nm index type"); - } - - BinarySet - Serialize(const Config& config) override; - - void - BuildWithDataset(const DatasetPtr& dataset, - const Config& config = {}) override; - - void - AddWithDataset(const DatasetPtr& dataset, const Config& config) override; - - void - Load(const BinarySet& binary_set, const Config& config = {}) override; - - std::unique_ptr - Query(const DatasetPtr dataset, - const SearchInfo& search_info, - const BitsetView& bitset) override; - - void - LoadWithoutAssemble(const BinarySet& binary_set, - const Config& config) override; - - private: - void - store_raw_data(const DatasetPtr& dataset); - - void - LoadRawData(); - - private: - std::vector raw_data_; - std::once_flag raw_data_loaded_; -}; - -using VectorMemNMIndexPtr = std::unique_ptr; -} // namespace milvus::index diff --git a/internal/core/src/segcore/FieldIndexing.cpp b/internal/core/src/segcore/FieldIndexing.cpp index eff8c39fa4..aeb0abcf35 100644 --- a/internal/core/src/segcore/FieldIndexing.cpp +++ b/internal/core/src/segcore/FieldIndexing.cpp @@ -16,7 +16,7 @@ #include "common/SystemProperty.h" #include "segcore/FieldIndexing.h" -#include "index/VectorMemNMIndex.h" +#include "index/VectorMemIndex.h" #include "IndexConfigGenerator.h" namespace milvus::segcore { @@ -50,7 +50,7 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, data_.grow_to_at_least(ack_end); for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) { const auto& chunk = source->get_chunk(chunk_id); - auto indexing = std::make_unique( + auto indexing = std::make_unique( knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2); auto dataset = knowhere::GenDataSet( source->get_size_per_chunk(), dim, chunk.data()); diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index 7cdb722508..c8dc4fc222 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -22,7 +22,7 @@ #include "common/Schema.h" #include "index/ScalarIndexSort.h" #include "index/StringIndexSort.h" -#include "index/VectorMemNMIndex.h" +#include "index/VectorMemIndex.h" #include "query/SearchOnIndex.h" #include "segcore/SegmentGrowingImpl.h" #include "segcore/SegmentSealedImpl.h" @@ -793,7 +793,7 @@ GenVecIndexing(int64_t N, int64_t dim, const float* vec) { {knowhere::indexparam::NLIST, "1024"}, {knowhere::meta::DEVICE_ID, 0}}; auto database = knowhere::GenDataSet(N, dim, vec); - auto indexing = std::make_unique( + auto indexing = std::make_unique( knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::metric::L2); indexing->BuildWithDataset(database, conf); return indexing;