diff --git a/CHANGELOG.md b/CHANGELOG.md index fa2116a63b..47ab1cb94e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#39 - Create SQ8H index hang if using github server version - \#30 - Some troubleshoot messages in Milvus do not provide enough information - \#48 - Config unittest failed +- \#59 - Topk result is incorrect for small dataset ## Improvement - MS-552 - Add and change the easylogging library diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index ee1d88ee32..99f52dc284 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -243,7 +243,8 @@ if(CUSTOMIZATION) # set(FAISS_MD5 "57da9c4f599cc8fa4260488b1c96e1cc") # commit-id 6dbdf75987c34a2c853bd172ea0d384feea8358c branch-0.2.0 # set(FAISS_MD5 "21deb1c708490ca40ecb899122c01403") # commit-id 643e48f479637fd947e7b93fa4ca72b38ecc9a39 branch-0.2.0 # set(FAISS_MD5 "072db398351cca6e88f52d743bbb9fa0") # commit-id 3a2344d04744166af41ef1a74449d68a315bfe17 branch-0.2.1 - set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + # set(FAISS_MD5 "c89ea8e655f5cdf58f42486f13614714") # commit-id 9c28a1cbb88f41fa03b03d7204106201ad33276b branch-0.2.1 + set(FAISS_MD5 "87fdd86351ffcaf3f80dc26ade63c44b") # commit-id 841a156e67e8e22cd8088e1b58c00afbf2efc30b branch-0.2.1 endif() else() set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz") diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 0c4856f2b6..fba2e11e2e 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -24,17 +24,21 @@ #include #include #include +#include #include #include #include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" +#include "knowhere/common/Log.h" #include "knowhere/index/vector_index/IndexGPUIVF.h" #include "knowhere/index/vector_index/IndexIVF.h" namespace knowhere { +using stdclock = std::chrono::high_resolution_clock; + IndexModelPtr IVF::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); @@ -216,7 +220,15 @@ IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const C void IVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { auto params = GenParams(cfg); + stdclock::time_point before = stdclock::now(); faiss::ivflib::search_with_parameters(index_.get(), n, (float*)data, k, distances, labels, params.get()); + stdclock::time_point after = stdclock::now(); + double search_cost = (std::chrono::duration(after - before)).count(); + KNOWHERE_LOG_DEBUG << "IVF search cost: " << search_cost + << ", quantization cost: " << faiss::indexIVF_stats.quantization_time + << ", data search cost: " << faiss::indexIVF_stats.search_time; + faiss::indexIVF_stats.quantization_time = 0; + faiss::indexIVF_stats.search_time = 0; } VectorIndexPtr diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index b7a1e211d2..2836d41dd4 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -253,7 +253,7 @@ XSearchTask::MergeTopkToResultSet(const std::vector& input_ids, const s if (result[i].empty()) { result_buf.resize(input_k, scheduler::IdDistPair(-1, 0.0)); - uint64_t input_k_multi_i = input_k * i; + uint64_t input_k_multi_i = topk * i; for (auto k = 0; k < input_k; ++k) { uint64_t idx = input_k_multi_i + k; auto& result_buf_item = result_buf[k]; @@ -266,7 +266,7 @@ XSearchTask::MergeTopkToResultSet(const std::vector& input_ids, const s result_buf.resize(output_k, scheduler::IdDistPair(-1, 0.0)); size_t buf_k = 0, src_k = 0, tar_k = 0; uint64_t src_idx; - uint64_t input_k_multi_i = input_k * i; + uint64_t input_k_multi_i = topk * i; while (buf_k < output_k && src_k < input_k && tar_k < tar_size) { src_idx = input_k_multi_i + src_k; auto& result_buf_item = result_buf[buf_k];