diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d5c151199..c6b8d5e388 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,13 @@ Please mark all change in change log and use the issue from GitHub - \#1762 Server is not forbidden to create new partition which tag is "_default" ## Feature +- \#261 Integrate ANNOY into Milvus +- \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure - \#1655 GPU index support delete vectors +- \#1660 IVF PQ CPU support deleted vectors searching +- \#1661 HNSW support deleted vectors searching - \#1825 Add annoy index type in C++ sdk +- \#1849 NSG support deleted vectors searching ## Improvement - \#1784 Add Substructure and Superstructure in http module @@ -39,10 +44,7 @@ Please mark all change in change log and use the issue from GitHub - \#1818 Duplicate data generated after restart milvus server ## Feature -- \#261 Integrate ANNOY into Milvus - \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure -- \#1660 IVF PQ CPU support deleted vectors searching -- \#1661 HNSW support deleted vectors searching ## Improvement - \#267 Improve search performance: reduce delay diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index f437bee3c0..a9f8582701 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -87,12 +87,15 @@ NSG::Query(const DatasetPtr& dataset_ptr, const Config& config) { auto p_id = (int64_t*)malloc(p_id_size); auto p_dist = (float*)malloc(p_dist_size); + faiss::ConcurrentBitsetPtr blacklist = GetBlacklist(); + impl::SearchParams s_params; s_params.search_length = config[IndexParams::search_length]; s_params.k = config[meta::TOPK]; { std::lock_guard lk(mutex_); - index_->Search((float*)p_data, rows, dim, config[meta::TOPK].get(), p_dist, p_id, s_params); + index_->Search((float*)p_data, rows, dim, config[meta::TOPK].get(), p_dist, p_id, s_params, + blacklist); } auto ret_ds = std::make_shared(); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp index 1f06247833..332c6db675 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.cpp @@ -329,7 +329,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& KNOWHERE_THROW_MSG("Build Error, search_length > ntotal"); } - // std::vector init_ids; std::vector init_ids(buffer_size); resset.resize(buffer_size); boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; @@ -342,8 +341,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& // Get all neighbors for (size_t i = 0; i < init_ids.size() && i < graph[navigation_point].size(); ++i) { - // for (size_t i = 0; i < graph[navigation_point].size(); ++i) { - // init_ids.push_back(graph[navigation_point][i]); init_ids[i] = graph[navigation_point][i]; has_calculated_dist[init_ids[i]] = true; ++count; @@ -352,7 +349,6 @@ NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& node_t id = rand_r(&seed) % ntotal; if (has_calculated_dist[id]) continue; // duplicate id - // init_ids.push_back(id); init_ids[count] = id; ++count; has_calculated_dist[id] = true; @@ -832,7 +828,7 @@ NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root void NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, - int64_t* ids, SearchParams& params) { + int64_t* ids, SearchParams& params, faiss::ConcurrentBitsetPtr bitset) { std::vector> resset(nq); TimeRecorder rc("NsgIndex::search", 1); @@ -847,22 +843,21 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co } rc.RecordSection("search"); for (unsigned int i = 0; i < nq; ++i) { - int64_t var = resset[i].size() - k; - if (var >= 0) { - for (unsigned int j = 0; j < k; ++j) { - ids[i * k + j] = ids_[resset[i][j].id]; - dist[i * k + j] = resset[i][j].distance; - } - } else { - for (unsigned int j = 0; j < resset[i].size(); ++j) { - ids[i * k + j] = ids_[resset[i][j].id]; - dist[i * k + j] = resset[i][j].distance; - } - for (unsigned int j = resset[i].size(); j < k; ++j) { - ids[i * k + j] = -1; - dist[i * k + j] = -1; + unsigned int pos = 0; + for (unsigned int j = 0; j < resset[i].size(); ++j) { + if (pos >= k) + break; // already top k + if (!bitset || !bitset->test((faiss::ConcurrentBitset::id_type_t)resset[i][j].id)) { + ids[i * k + pos] = ids_[resset[i][j].id]; + dist[i * k + pos] = resset[i][j].distance; + ++pos; } } + // fill with -1 + for (unsigned int j = pos; j < k; ++j) { + ids[i * k + j] = -1; + dist[i * k + j] = -1; + } } rc.RecordSection("merge"); } diff --git a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h index a357dac564..603af1417d 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/impl/nsg/NSG.h @@ -79,7 +79,7 @@ class NsgIndex { void Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids, - SearchParams& params); + SearchParams& params, faiss::ConcurrentBitsetPtr bitset = nullptr); // Not support yet. // virtual void Add() = 0; diff --git a/core/src/index/unittest/test_nsg/test_nsg.cpp b/core/src/index/unittest/test_nsg/test_nsg.cpp index 867c84ce9b..7ff8402e99 100644 --- a/core/src/index/unittest/test_nsg/test_nsg.cpp +++ b/core/src/index/unittest/test_nsg/test_nsg.cpp @@ -39,12 +39,11 @@ class NSGInterfaceTest : public DataGen, public ::testing::Test { protected: void SetUp() override { -// Init_with_default(); #ifdef MILVUS_GPU_VERSION int64_t MB = 1024 * 1024; milvus::knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, MB * 200, MB * 600, 1); #endif - Generate(256, 1000000 / 100, 1); + Generate(256, 1000000 / 100, 10); index_ = std::make_shared(); train_conf = milvus::knowhere::Config{{milvus::knowhere::meta::DIM, 256}, @@ -117,7 +116,7 @@ TEST_F(NSGInterfaceTest, basic_test) { // }); } -TEST_F(NSGInterfaceTest, comparetest) { +TEST_F(NSGInterfaceTest, compare_test) { milvus::knowhere::impl::DistanceL2 distanceL2; milvus::knowhere::impl::DistanceIP distanceIP; @@ -132,7 +131,58 @@ TEST_F(NSGInterfaceTest, comparetest) { tc.RecordSection("IP"); } -//#include +TEST_F(NSGInterfaceTest, delete_test) { + assert(!xb.empty()); + + train_conf[milvus::knowhere::meta::DEVICEID] = DEVICEID; + index_->Train(base_dataset, train_conf); + + auto result = index_->Query(query_dataset, search_conf); + AssertAnns(result, nq, k); + + ASSERT_EQ(index_->Count(), nb); + ASSERT_EQ(index_->Dim(), dim); + + faiss::ConcurrentBitsetPtr bitset = std::make_shared(nb); + for (int i = 0; i < nq; i++) { + bitset->set(i); + } + + auto I_before = result->Get(milvus::knowhere::meta::IDS); + /* + printf("I=\n"); + for (int i = 0; i < nq; i++) { + for (int j = 0; j < k; j++) printf("%5ld ", I_before[i * k + j]); + printf("\n"); + }*/ + + // search xq with delete + index_->SetBlacklist(bitset); + auto result_after = index_->Query(query_dataset, search_conf); + AssertAnns(result_after, nq, k, CheckMode::CHECK_NOT_EQUAL); + auto I_after = result_after->Get(milvus::knowhere::meta::IDS); + + /* + printf("I=\n"); + for (int i = 0; i < nq; i++) { + for (int j = 0; j < k; j++) printf("%5ld ", I_after[i * k + j]); + printf("\n"); + }*/ + + // First vector deleted + for (int i = 0; i < nq; i++) { + ASSERT_NE(I_before[i * k], I_after[i * k]); + } + + /* + // Other results are the same + for (int i = 0; i < nq; i++) { + for (int j = 1; j <= k / 2; j++) { + ASSERT_EQ(I_before[i * k + j], I_after[i * k + j - 1]); + } + }*/ +} + // TEST(test, ori_nsg) { // // float* p_data = nullptr; // size_t rows, dim; diff --git a/core/src/server/delivery/request/DeleteByIDRequest.cpp b/core/src/server/delivery/request/DeleteByIDRequest.cpp index 28e13e7891..f9ead40625 100644 --- a/core/src/server/delivery/request/DeleteByIDRequest.cpp +++ b/core/src/server/delivery/request/DeleteByIDRequest.cpp @@ -68,15 +68,8 @@ DeleteByIDRequest::OnExecute() { } // Check collection's index type supports delete - if (table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IDMAP && - table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IDMAP && - table_schema.engine_type_ != (int32_t)engine::EngineType::HNSW && - table_schema.engine_type_ != (int32_t)engine::EngineType::ANNOY && - table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFFLAT && - table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT && - table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8 && - table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_PQ && - table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8H) { + if (table_schema.engine_type_ == (int32_t)engine::EngineType::SPTAG_BKT || + table_schema.engine_type_ == (int32_t)engine::EngineType::SPTAG_KDT) { std::string err_msg = "Index type " + std::to_string(table_schema.engine_type_) + " does not support delete operation"; SERVER_LOG_ERROR << err_msg;