diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e607f92e4..ee5f9efbd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,8 +46,10 @@ Please mark all change in change log and use the issue from GitHub - \#1549 Fix server/wal config setting bug - \#1556 Index file not created after table and index created - \#1560 Search crashed with Super-high dimensional binary vector +- \#1564 Too low recall for glove-200-angular, ivf_pq index - \#1571 Meta engine type become IDMAP after dropping index for BINARY table - \#1574 Set all existing bitset in cache when applying deletes +- \#1577 Row count incorrect if delete vectors then create index ## Feature - \#216 Add CLI to get server info diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 523cdbaaf3..674638bcca 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -118,12 +118,12 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { // std::stringstream ss_res_id, ss_res_dist; // for (int i = 0; i < 10; ++i) { - // printf("%llu", res_ids[i]); + // printf("%llu", p_id[i]); // printf("\n"); - // printf("%.6f", res_dis[i]); + // printf("%.6f", p_dist[i]); // printf("\n"); - // ss_res_id << res_ids[i] << " "; - // ss_res_dist << res_dis[i] << " "; + // ss_res_id << p_id[i] << " "; + // ss_res_dist << p_dist[i] << " "; // } // std::cout << std::endl << "after search: " << std::endl; // std::cout << ss_res_id.str() << std::endl; diff --git a/core/src/scheduler/task/BuildIndexTask.cpp b/core/src/scheduler/task/BuildIndexTask.cpp index 666b977731..feec750e5a 100644 --- a/core/src/scheduler/task/BuildIndexTask.cpp +++ b/core/src/scheduler/task/BuildIndexTask.cpp @@ -12,6 +12,7 @@ #include "scheduler/task/BuildIndexTask.h" #include + #include #include #include @@ -207,7 +208,7 @@ XBuildIndexTask::Execute() { // step 6: update meta table_file.file_type_ = engine::meta::TableFileSchema::INDEX; table_file.file_size_ = index->PhysicalSize(); - table_file.row_count_ = index->Count(); + table_file.row_count_ = file_->row_count_; // index->Count(); auto origin_file = *file_; origin_file.file_type_ = engine::meta::TableFileSchema::BACKUP; diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 6331c727c7..49b225eea8 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -101,7 +101,8 @@ XSearchTask::XSearchTask(const std::shared_ptr& context, TableF if (file_) { // distance -- value 0 means two vectors equal, ascending reduce, L2/HAMMING/JACCARD/TONIMOTO ... // similarity -- infinity value means two vectors equal, descending reduce, IP - if (file_->metric_type_ == static_cast(MetricType::IP)) { + if (file_->metric_type_ == static_cast(MetricType::IP) && + file_->engine_type_ != static_cast(EngineType::FAISS_PQ)) { ascending_reduce = false; } diff --git a/core/unittest/db/test_delete.cpp b/core/unittest/db/test_delete.cpp index 15618a950e..97c91fab64 100644 --- a/core/unittest/db/test_delete.cpp +++ b/core/unittest/db/test_delete.cpp @@ -264,6 +264,82 @@ TEST_F(DeleteTest, delete_multiple_times) { } } +TEST_F(DeleteTest, delete_before_create_index) { + milvus::engine::meta::TableSchema table_info = BuildTableSchema(); + table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT; + auto stat = db_->CreateTable(table_info); + + milvus::engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = table_info.table_id_; + stat = db_->DescribeTable(table_info_get); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + int64_t nb = 10000; + milvus::engine::VectorsData xb; + BuildVectors(nb, xb); + + for (int64_t i = 0; i < nb; i++) { + xb.id_array_.push_back(i); + } + + stat = db_->InsertVectors(table_info.table_id_, "", xb); + ASSERT_TRUE(stat.ok()); + + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, nb - 1); + + int64_t num_query = 10; + std::map search_vectors; + for (int64_t i = 0; i < num_query; ++i) { + int64_t index = dis(gen); + milvus::engine::VectorsData search; + search.vector_count_ = 1; + for (int64_t j = 0; j < TABLE_DIM; j++) { + search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]); + } + search_vectors.insert(std::make_pair(xb.id_array_[index], search)); + } + + milvus::engine::IDNumbers ids_to_delete; + for (auto& kv : search_vectors) { + ids_to_delete.emplace_back(kv.first); + } + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); + + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + milvus::engine::TableIndex index; + index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; + index.extra_params_ = {{"nlist", 100}}; + stat = db_->CreateIndex(table_info.table_id_, index); + ASSERT_TRUE(stat.ok()); + + uint64_t row_count; + stat = db_->GetTableRowCount(table_info.table_id_, row_count); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(row_count, nb - ids_to_delete.size()); + + int topk = 10, nprobe = 10; + for (auto& pair : search_vectors) { + auto& search = pair.second; + + std::vector tags; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); + ASSERT_NE(result_ids[0], pair.first); + // ASSERT_LT(result_distances[0], 1e-4); + ASSERT_GT(result_distances[0], 1); + } +} + TEST_F(DeleteTest, delete_with_index) { milvus::engine::meta::TableSchema table_info = BuildTableSchema(); table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT;