From f891a590b5b45eb088be4c36dfe73241bf377a3b Mon Sep 17 00:00:00 2001 From: "xiaojun.lin" Date: Mon, 9 Mar 2020 15:06:26 +0800 Subject: [PATCH 1/3] fix Signed-off-by: xiaojun.lin --- CHANGELOG.md | 1 + core/src/scheduler/task/SearchTask.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f86f15976a..2ba1428637 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Please mark all change in change log and use the issue from GitHub - \#1549 Fix server/wal config setting bug - \#1556 Index file not created after table and index created - \#1560 Search crashed with Super-high dimensional binary vector +- \#1564 Too low recall for glove-200-angular, ivf_pq index ## Feature - \#216 Add CLI to get server info diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 0b32f35ab5..cf96dc504a 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -101,7 +101,8 @@ XSearchTask::XSearchTask(const std::shared_ptr& context, TableF if (file_) { // distance -- value 0 means two vectors equal, ascending reduce, L2/HAMMING/JACCARD/TONIMOTO ... // similarity -- infinity value means two vectors equal, descending reduce, IP - if (file_->metric_type_ == static_cast(MetricType::IP)) { + if (file_->metric_type_ == static_cast(MetricType::IP) && + file_->engine_type_ != static_cast(EngineType::FAISS_PQ)) { ascending_reduce = false; } From 281d8ec53f03acb8db5ef32b37740227a629cc97 Mon Sep 17 00:00:00 2001 From: Zhiru Zhu Date: Mon, 9 Mar 2020 15:16:36 +0800 Subject: [PATCH 2/3] update Signed-off-by: Zhiru Zhu --- CHANGELOG.md | 1 + core/src/scheduler/task/BuildIndexTask.cpp | 3 +- core/unittest/db/test_delete.cpp | 76 ++++++++++++++++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70ac6ced65..1486fc5633 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ Please mark all change in change log and use the issue from GitHub - \#1556 Index file not created after table and index created - \#1560 Search crashed with Super-high dimensional binary vector - \#1574 Set all existing bitset in cache when applying deletes +- \#1577 Row count incorrect if delete vectors then create index ## Feature - \#216 Add CLI to get server info diff --git a/core/src/scheduler/task/BuildIndexTask.cpp b/core/src/scheduler/task/BuildIndexTask.cpp index 13d3b4a611..1ae0def7b7 100644 --- a/core/src/scheduler/task/BuildIndexTask.cpp +++ b/core/src/scheduler/task/BuildIndexTask.cpp @@ -12,6 +12,7 @@ #include "scheduler/task/BuildIndexTask.h" #include + #include #include #include @@ -206,7 +207,7 @@ XBuildIndexTask::Execute() { // step 6: update meta table_file.file_type_ = engine::meta::TableFileSchema::INDEX; table_file.file_size_ = index->PhysicalSize(); - table_file.row_count_ = index->Count(); + table_file.row_count_ = file_->row_count_; // index->Count(); auto origin_file = *file_; origin_file.file_type_ = engine::meta::TableFileSchema::BACKUP; diff --git a/core/unittest/db/test_delete.cpp b/core/unittest/db/test_delete.cpp index 15618a950e..97c91fab64 100644 --- a/core/unittest/db/test_delete.cpp +++ b/core/unittest/db/test_delete.cpp @@ -264,6 +264,82 @@ TEST_F(DeleteTest, delete_multiple_times) { } } +TEST_F(DeleteTest, delete_before_create_index) { + milvus::engine::meta::TableSchema table_info = BuildTableSchema(); + table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT; + auto stat = db_->CreateTable(table_info); + + milvus::engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = table_info.table_id_; + stat = db_->DescribeTable(table_info_get); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + int64_t nb = 10000; + milvus::engine::VectorsData xb; + BuildVectors(nb, xb); + + for (int64_t i = 0; i < nb; i++) { + xb.id_array_.push_back(i); + } + + stat = db_->InsertVectors(table_info.table_id_, "", xb); + ASSERT_TRUE(stat.ok()); + + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, nb - 1); + + int64_t num_query = 10; + std::map search_vectors; + for (int64_t i = 0; i < num_query; ++i) { + int64_t index = dis(gen); + milvus::engine::VectorsData search; + search.vector_count_ = 1; + for (int64_t j = 0; j < TABLE_DIM; j++) { + search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]); + } + search_vectors.insert(std::make_pair(xb.id_array_[index], search)); + } + + milvus::engine::IDNumbers ids_to_delete; + for (auto& kv : search_vectors) { + ids_to_delete.emplace_back(kv.first); + } + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); + + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + milvus::engine::TableIndex index; + index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; + index.extra_params_ = {{"nlist", 100}}; + stat = db_->CreateIndex(table_info.table_id_, index); + ASSERT_TRUE(stat.ok()); + + uint64_t row_count; + stat = db_->GetTableRowCount(table_info.table_id_, row_count); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(row_count, nb - ids_to_delete.size()); + + int topk = 10, nprobe = 10; + for (auto& pair : search_vectors) { + auto& search = pair.second; + + std::vector tags; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); + ASSERT_NE(result_ids[0], pair.first); + // ASSERT_LT(result_distances[0], 1e-4); + ASSERT_GT(result_distances[0], 1); + } +} + TEST_F(DeleteTest, delete_with_index) { milvus::engine::meta::TableSchema table_info = BuildTableSchema(); table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT; From 2d96646356142a5b73478085780ff24731e28089 Mon Sep 17 00:00:00 2001 From: Nicky Date: Mon, 9 Mar 2020 16:33:28 +0800 Subject: [PATCH 3/3] update. Signed-off-by: Nicky --- .../knowhere/knowhere/index/vector_index/IndexIVF.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 523cdbaaf3..674638bcca 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -118,12 +118,12 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { // std::stringstream ss_res_id, ss_res_dist; // for (int i = 0; i < 10; ++i) { - // printf("%llu", res_ids[i]); + // printf("%llu", p_id[i]); // printf("\n"); - // printf("%.6f", res_dis[i]); + // printf("%.6f", p_dist[i]); // printf("\n"); - // ss_res_id << res_ids[i] << " "; - // ss_res_dist << res_dis[i] << " "; + // ss_res_id << p_id[i] << " "; + // ss_res_dist << p_dist[i] << " "; // } // std::cout << std::endl << "after search: " << std::endl; // std::cout << ss_res_id.str() << std::endl;