diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fa431015c..427265974a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ Please mark all change in change log and use the issue from GitHub - \#977 Server crash when create tables concurrently - \#990 Check gpu resources setting when assign repeated value - \#995 table count set to 0 if no tables found -- \#1010 improve error message when offset or page_size is equal 0 +- \#1010 Improve error message when offset or page_size is equal 0 - \#1022 check if partition name is legal - \#1028 check if table exists when show partitions - \#1029 check if table exists when try to delete partition @@ -29,10 +29,10 @@ Please mark all change in change log and use the issue from GitHub - \#1359 Negative distance value returned when searching with HNSW index type - \#1429 Server crashed when searching vectors with GPU - \#1476 Fix vectors results bug when getting vectors from segments -- \#1484 Index type changed to IDMAP after compacted +- \#1484 Index type changed to IDMAP after compacted +- \#1491 Server crashed during adding vectors - \#1499 Fix duplicated ID number issue -- \#1491 Server crashed during adding vectors -- \#1504 Avoid possible race condition between delete and search +- \#1504 Avoid possible race condition between delete and search - \#1507 set_config for insert_buffer_size is wrong - \#1510 Add set interfaces for WAL configurations - \#1511 Fix big integer cannot pass to server correctly @@ -41,8 +41,10 @@ Please mark all change in change log and use the issue from GitHub - \#1525 Add setter API for config preload_table - \#1529 Fix server crash when cache_insert_data enabled - \#1530 Set table file with correct engine type in meta +- \#1532 Search with ivf_flat failed with open-dataset: sift-256-hamming - \#1535 Degradation searching performance with metric_type: binary_idmap - \#1549 Fix server/wal config setting bug +- \#1556 Index file not created after table and index created ## Feature - \#216 Add CLI to get server info diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 96f04a758c..29ed17308a 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -812,7 +812,7 @@ DBImpl::CompactFile(const std::string& table_id, const meta::TableFileSchema& fi // Update table files state // if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size // else set file type to RAW, no need to build index - if (compacted_file.engine_type_ != (int)EngineType::FAISS_IDMAP) { + if (!utils::IsRawIndexType(compacted_file.engine_type_)) { compacted_file.file_type_ = (segment_writer_ptr->Size() >= compacted_file.index_file_size_) ? meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; @@ -1468,7 +1468,7 @@ DBImpl::MergeFiles(const std::string& table_id, const meta::TableFilesSchema& fi // step 4: update table files state // if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size // else set file type to RAW, no need to build index - if (table_file.engine_type_ != (int)EngineType::FAISS_IDMAP) { + if (!utils::IsRawIndexType(table_file.engine_type_)) { table_file.file_type_ = (segment_writer_ptr->Size() >= table_file.index_file_size_) ? meta::TableFileSchema::TO_INDEX : meta::TableFileSchema::RAW; @@ -1770,7 +1770,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex // for IDMAP type, only wait all NEW file converted to RAW file // for other type, wait NEW/RAW/NEW_MERGE/NEW_INDEX/TO_INDEX files converted to INDEX files std::vector file_types; - if (index.engine_type_ == static_cast(EngineType::FAISS_IDMAP)) { + if (utils::IsRawIndexType(index.engine_type_)) { file_types = { static_cast(meta::TableFileSchema::NEW), static_cast(meta::TableFileSchema::NEW_MERGE), @@ -1792,7 +1792,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex while (!table_files.empty()) { ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; - if (index.engine_type_ != (int)EngineType::FAISS_IDMAP) { + if (!utils::IsRawIndexType(index.engine_type_)) { status = meta_ptr_->UpdateTableFilesToIndex(table_id); } diff --git a/core/src/db/Utils.cpp b/core/src/db/Utils.cpp index 3fa69c083a..f9ac5dfe40 100644 --- a/core/src/db/Utils.cpp +++ b/core/src/db/Utils.cpp @@ -215,6 +215,11 @@ IsSameIndex(const TableIndex& index1, const TableIndex& index2) { index1.metric_type_ == index2.metric_type_; } +bool +IsRawIndexType(int32_t type) { + return (type == (int32_t)EngineType::FAISS_IDMAP) || (type == (int32_t)EngineType::FAISS_BIN_IDMAP); +} + meta::DateT GetDate(const std::time_t& t, int day_delta) { struct tm ltm; diff --git a/core/src/db/Utils.h b/core/src/db/Utils.h index 810a0b5292..88197d7d5c 100644 --- a/core/src/db/Utils.h +++ b/core/src/db/Utils.h @@ -45,6 +45,9 @@ GetParentPath(const std::string& path, std::string& parent_path); bool IsSameIndex(const TableIndex& index1, const TableIndex& index2); +bool +IsRawIndexType(int32_t type); + meta::DateT GetDate(const std::time_t& t, int day_delta = 0); meta::DateT diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index eb7b41d5d6..1fe820aa7a 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -406,7 +406,7 @@ ExecutionEngineImpl::Load(bool to_cache) { utils::GetParentPath(location_, segment_dir); auto segment_reader_ptr = std::make_shared(segment_dir); - if (index_type_ == EngineType::FAISS_IDMAP || index_type_ == EngineType::FAISS_BIN_IDMAP) { + if (utils::IsRawIndexType((int32_t)index_type_)) { index_ = index_type_ == EngineType::FAISS_IDMAP ? GetVecIndexFactory(IndexType::FAISS_IDMAP) : GetVecIndexFactory(IndexType::FAISS_BIN_IDMAP); milvus::json conf{{knowhere::meta::DEVICEID, gpu_num_}, {knowhere::meta::DIM, dim_}}; diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index 8862ab5ee8..b1d28c2d34 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -674,16 +674,7 @@ MySQLMetaImpl::CreateTableFile(TableFileSchema& file_schema) { file_schema.updated_time_ = file_schema.created_on_; file_schema.index_file_size_ = table_schema.index_file_size_; file_schema.index_params_ = table_schema.index_params_; - - if (file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW || - file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW_MERGE) { - file_schema.engine_type_ = server::ValidationUtil::IsBinaryMetricType(table_schema.metric_type_) - ? (int32_t)EngineType::FAISS_BIN_IDMAP - : (int32_t)EngineType::FAISS_IDMAP; - } else { - file_schema.engine_type_ = table_schema.engine_type_; - } - + file_schema.engine_type_ = table_schema.engine_type_; file_schema.metric_type_ = table_schema.metric_type_; std::string id = "NULL"; // auto-increment @@ -2086,8 +2077,7 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) // If we are deleting a raw table file, it means it's okay to delete the entire segment directory. // Else, we can only delete the single file // TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky - if (table_file.engine_type_ == (int32_t)EngineType::FAISS_IDMAP || - table_file.engine_type_ == (int32_t)EngineType::FAISS_BIN_IDMAP) { + if (utils::IsRawIndexType(table_file.engine_type_)) { utils::DeleteSegment(options_, table_file); std::string segment_dir; utils::GetParentPath(table_file.location_, segment_dir); diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index 0521680946..ca37f2c258 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -375,16 +375,7 @@ SqliteMetaImpl::CreateTableFile(TableFileSchema& file_schema) { file_schema.updated_time_ = file_schema.created_on_; file_schema.index_file_size_ = table_schema.index_file_size_; file_schema.index_params_ = table_schema.index_params_; - - if (file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW || - file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW_MERGE) { - file_schema.engine_type_ = server::ValidationUtil::IsBinaryMetricType(table_schema.metric_type_) - ? (int32_t)EngineType::FAISS_BIN_IDMAP - : (int32_t)EngineType::FAISS_IDMAP; - } else { - file_schema.engine_type_ = table_schema.engine_type_; - } - + file_schema.engine_type_ = table_schema.engine_type_; file_schema.metric_type_ = table_schema.metric_type_; // multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here @@ -1425,8 +1416,7 @@ SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/ // If we are deleting a raw table file, it means it's okay to delete the entire segment directory. // Else, we can only delete the single file // TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky - if (table_file.engine_type_ == (int32_t)EngineType::FAISS_IDMAP || - table_file.engine_type_ == (int32_t)EngineType::FAISS_BIN_IDMAP) { + if (utils::IsRawIndexType(table_file.engine_type_)) { utils::DeleteSegment(options_, table_file); std::string segment_dir; utils::GetParentPath(table_file.location_, segment_dir); diff --git a/sdk/examples/simple/src/ClientTest.cpp b/sdk/examples/simple/src/ClientTest.cpp index 572f120dcc..635dabdc16 100644 --- a/sdk/examples/simple/src/ClientTest.cpp +++ b/sdk/examples/simple/src/ClientTest.cpp @@ -159,13 +159,6 @@ ClientTest::SearchVectors(const std::string& table_name, int64_t topk, int64_t n topk_query_result); } -void -ClientTest::SearchVectorsByIds(const std::string& table_name, int64_t topk, int64_t nprobe) { - std::vector partition_tags; - milvus::TopKQueryResult topk_query_result; - milvus_sdk::Utils::DoSearch(conn_, table_name, partition_tags, topk, nprobe, search_id_array_, topk_query_result); -} - void ClientTest::CreateIndex(const std::string& table_name, milvus::IndexType type, int64_t nlist) { milvus_sdk::TimeRecorder rc("Create index"); @@ -245,7 +238,6 @@ ClientTest::Test() { GetVectorById(table_name, search_id_array_[0]); SearchVectors(table_name, TOP_K, NPROBE); - SearchVectorsByIds(table_name, TOP_K, NPROBE); CreateIndex(table_name, INDEX_TYPE, NLIST); ShowTableInfo(table_name); diff --git a/sdk/examples/simple/src/ClientTest.h b/sdk/examples/simple/src/ClientTest.h index 1082878109..0c62b29729 100644 --- a/sdk/examples/simple/src/ClientTest.h +++ b/sdk/examples/simple/src/ClientTest.h @@ -29,36 +29,49 @@ class ClientTest { private: void ShowServerVersion(); + void ShowSdkVersion(); + void ShowTables(std::vector&); + void CreateTable(const std::string&, int64_t, milvus::MetricType); + void DescribeTable(const std::string&); + void InsertVectors(const std::string&, int64_t); + void BuildSearchVectors(int64_t, int64_t); + void Flush(const std::string&); + void ShowTableInfo(const std::string&); + void GetVectorById(const std::string&, int64_t); + void SearchVectors(const std::string&, int64_t, int64_t); - void - SearchVectorsByIds(const std::string&, int64_t, int64_t); + void CreateIndex(const std::string&, milvus::IndexType, int64_t); + void PreloadTable(const std::string&); + void DeleteByIds(const std::string&, const std::vector&); + void DropIndex(const std::string&); + void DropTable(const std::string&); diff --git a/sdk/examples/utils/Utils.cpp b/sdk/examples/utils/Utils.cpp index 068e5099b0..c9294cb3aa 100644 --- a/sdk/examples/utils/Utils.cpp +++ b/sdk/examples/utils/Utils.cpp @@ -220,68 +220,6 @@ Utils::DoSearch(std::shared_ptr conn, const std::string& tab CheckSearchResult(search_record_array, topk_query_result); } -void -Utils::DoSearch(std::shared_ptr conn, const std::string& table_name, - const std::vector& partition_tags, int64_t top_k, int64_t nprobe, - const std::vector& search_id_array, milvus::TopKQueryResult& topk_query_result) { - topk_query_result.clear(); - - { - BLOCK_SPLITER - JSON json_params = {{"nprobe", nprobe}}; - for (auto& search_id : search_id_array) { - milvus_sdk::TimeRecorder rc("search by id " + std::to_string(search_id)); - milvus::TopKQueryResult result; - milvus::Status - stat = conn->SearchByID(table_name, partition_tags, search_id, top_k, json_params.dump(), result); - topk_query_result.insert(topk_query_result.end(), std::make_move_iterator(result.begin()), - std::make_move_iterator(result.end())); - std::cout << "SearchByID function call status: " << stat.message() << std::endl; - } - BLOCK_SPLITER - } - - if (topk_query_result.size() != search_id_array.size()) { - std::cout << "ERROR: Returned result count does not equal nq" << std::endl; - return; - } - - BLOCK_SPLITER - for (size_t i = 0; i < topk_query_result.size(); i++) { - const milvus::QueryResult& one_result = topk_query_result[i]; - size_t topk = one_result.ids.size(); - auto search_id = search_id_array[i]; - std::cout << "No." << i << " vector " << search_id << " top " << topk << " search result:" << std::endl; - for (size_t j = 0; j < topk; j++) { - std::cout << "\t" << one_result.ids[j] << "\t" << one_result.distances[j] << std::endl; - } - } - BLOCK_SPLITER - - BLOCK_SPLITER - size_t nq = topk_query_result.size(); - for (size_t i = 0; i < nq; i++) { - const milvus::QueryResult& one_result = topk_query_result[i]; - auto search_id = search_id_array[i]; - - uint64_t match_index = one_result.ids.size(); - for (uint64_t index = 0; index < one_result.ids.size(); index++) { - if (search_id == one_result.ids[index]) { - match_index = index; - break; - } - } - - if (match_index >= one_result.ids.size()) { - std::cout << "The topk result is wrong: not return search target in result set" << std::endl; - } else { - std::cout << "No." << i << " Check result successfully for target: " << search_id << " at top " - << match_index << std::endl; - } - } - BLOCK_SPLITER -} - void PrintPartitionStat(const milvus::PartitionStat& partition_stat) { std::cout << "\tPartition " << partition_stat.tag << " row count: " << partition_stat.row_count << std::endl; diff --git a/sdk/examples/utils/Utils.h b/sdk/examples/utils/Utils.h index c89f5151a9..4f33dafc33 100644 --- a/sdk/examples/utils/Utils.h +++ b/sdk/examples/utils/Utils.h @@ -70,12 +70,6 @@ class Utils { const std::vector>& search_record_array, milvus::TopKQueryResult& topk_query_result); - static void - DoSearch(std::shared_ptr conn, const std::string& table_name, - const std::vector& partition_tags, int64_t top_k, int64_t nprobe, - const std::vector& search_id_array, - milvus::TopKQueryResult& topk_query_result); - static void PrintTableInfo(const milvus::TableInfo& info); }; diff --git a/sdk/grpc/ClientProxy.cpp b/sdk/grpc/ClientProxy.cpp index 7dae7b3b16..ad23f9b954 100644 --- a/sdk/grpc/ClientProxy.cpp +++ b/sdk/grpc/ClientProxy.cpp @@ -314,49 +314,6 @@ ClientProxy::Search(const std::string& table_name, const std::vector& partition_tag_array, - int64_t query_id, - int64_t topk, - const std::string& extra_params, - TopKQueryResult& topk_query_result) { - try { - // step 1: convert vector id array - ::milvus::grpc::SearchByIDParam search_param; - ConstructSearchParam(table_name, - partition_tag_array, - topk, - extra_params, - search_param); - search_param.set_id(query_id); - - // step 2: search vectors - ::milvus::grpc::TopKQueryResult result; - Status status = client_ptr_->SearchByID(search_param, result); - if (result.row_num() == 0) { - return status; - } - - // step 4: convert result array - topk_query_result.reserve(result.row_num()); - int64_t nq = result.row_num(); - int64_t topk = result.ids().size() / nq; - for (int64_t i = 0; i < result.row_num(); i++) { - milvus::QueryResult one_result; - one_result.ids.resize(topk); - one_result.distances.resize(topk); - memcpy(one_result.ids.data(), result.ids().data() + topk * i, topk * sizeof(int64_t)); - memcpy(one_result.distances.data(), result.distances().data() + topk * i, topk * sizeof(float)); - topk_query_result.emplace_back(one_result); - } - - return status; - } catch (std::exception& ex) { - return Status(StatusCode::UnknownError, "Failed to search vectors: " + std::string(ex.what())); - } -} - Status ClientProxy::DescribeTable(const std::string& table_name, TableSchema& table_schema) { try { diff --git a/sdk/grpc/ClientProxy.h b/sdk/grpc/ClientProxy.h index 30bffedf0f..0fd493dcd2 100644 --- a/sdk/grpc/ClientProxy.h +++ b/sdk/grpc/ClientProxy.h @@ -63,11 +63,6 @@ class ClientProxy : public Connection { const std::vector& query_record_array, int64_t topk, const std::string& extra_params, TopKQueryResult& topk_query_result) override; - Status - SearchByID(const std::string& table_name, const std::vector& partition_tag_array, - int64_t query_id, int64_t topk, - const std::string& extra_params, TopKQueryResult& topk_query_result) override; - Status DescribeTable(const std::string& table_name, TableSchema& table_schema) override; diff --git a/sdk/grpc/GrpcClient.cpp b/sdk/grpc/GrpcClient.cpp index 47c6c9e8ab..96773f9c75 100644 --- a/sdk/grpc/GrpcClient.cpp +++ b/sdk/grpc/GrpcClient.cpp @@ -178,26 +178,6 @@ GrpcClient::Search( return Status::OK(); } -Status -GrpcClient::SearchByID(const ::milvus::grpc::SearchByIDParam& search_param, - ::milvus::grpc::TopKQueryResult& topk_query_result) { - ::milvus::grpc::TopKQueryResult query_result; - ClientContext context; - ::grpc::Status grpc_status = stub_->SearchByID(&context, search_param, &topk_query_result); - - if (!grpc_status.ok()) { - std::cerr << "SearchByID rpc failed!" << std::endl; - std::cerr << grpc_status.error_message() << std::endl; - return Status(StatusCode::RPCFailed, grpc_status.error_message()); - } - if (topk_query_result.status().error_code() != grpc::SUCCESS) { - std::cerr << topk_query_result.status().reason() << std::endl; - return Status(StatusCode::ServerFailed, topk_query_result.status().reason()); - } - - return Status::OK(); -} - Status GrpcClient::DescribeTable(const std::string& table_name, ::milvus::grpc::TableSchema& grpc_schema) { ClientContext context; diff --git a/sdk/grpc/GrpcClient.h b/sdk/grpc/GrpcClient.h index 51af08a2c7..d7b7ae42c5 100644 --- a/sdk/grpc/GrpcClient.h +++ b/sdk/grpc/GrpcClient.h @@ -59,9 +59,6 @@ class GrpcClient { Status Search(const grpc::SearchParam& search_param, ::milvus::grpc::TopKQueryResult& topk_query_result); - Status - SearchByID(const grpc::SearchByIDParam& search_param, ::milvus::grpc::TopKQueryResult& topk_query_result); - Status DescribeTable(const std::string& table_name, grpc::TableSchema& grpc_schema); diff --git a/sdk/include/MilvusApi.h b/sdk/include/MilvusApi.h index 8196bc676f..111cc79dc3 100644 --- a/sdk/include/MilvusApi.h +++ b/sdk/include/MilvusApi.h @@ -334,24 +334,6 @@ class Connection { const std::vector& query_record_array, int64_t topk, const std::string& extra_params, TopKQueryResult& topk_query_result) = 0; - /** - * @brief Search vector by ID - * - * This method is used to query vector in table. - * - * @param table_name, target table's name. - * @param partition_tag_array, target partitions, keep empty if no partition. - * @param query_id, vector id to be queried. - * @param topk, how many similarity vectors will be returned. - * @param extra_params, extra search parameters according to different index type, must be json format. - * @param topk_query_result, result array. - * - * @return Indicate if query is successful. - */ - virtual Status - SearchByID(const std::string& table_name, const PartitionTagList& partition_tag_array, int64_t query_id, - int64_t topk, const std::string& extra_params, TopKQueryResult& topk_query_result) = 0; - /** * @brief Show table description * diff --git a/sdk/interface/ConnectionImpl.cpp b/sdk/interface/ConnectionImpl.cpp index 54c3aded92..4acbcec7b8 100644 --- a/sdk/interface/ConnectionImpl.cpp +++ b/sdk/interface/ConnectionImpl.cpp @@ -100,16 +100,6 @@ ConnectionImpl::Search(const std::string& table_name, const std::vectorSearch(table_name, partition_tags, query_record_array, topk, extra_params, topk_query_result); } -Status -ConnectionImpl::SearchByID(const std::string& table_name, - const std::vector& partition_tags, - int64_t query_id, - int64_t topk, - const std::string& extra_params, - TopKQueryResult& topk_query_result) { - return client_proxy_->SearchByID(table_name, partition_tags, query_id, topk, extra_params, topk_query_result); -} - Status ConnectionImpl::DescribeTable(const std::string& table_name, TableSchema& table_schema) { return client_proxy_->DescribeTable(table_name, table_schema); diff --git a/sdk/interface/ConnectionImpl.h b/sdk/interface/ConnectionImpl.h index 0a89a7e9a7..ec0f423857 100644 --- a/sdk/interface/ConnectionImpl.h +++ b/sdk/interface/ConnectionImpl.h @@ -65,10 +65,6 @@ class ConnectionImpl : public Connection { const std::vector& query_record_array, int64_t topk, const std::string& extra_params, TopKQueryResult& topk_query_result) override; - Status - SearchByID(const std::string& table_name, const std::vector& partition_tag_array, int64_t query_id, - int64_t topk, const std::string& extra_params, TopKQueryResult& topk_query_result) override; - Status DescribeTable(const std::string& table_name, TableSchema& table_schema) override; diff --git a/shards/requirements.txt b/shards/requirements.txt index 47e1e521c2..8f4667f34d 100644 --- a/shards/requirements.txt +++ b/shards/requirements.txt @@ -30,7 +30,7 @@ requests-oauthlib==1.2.0 rsa==4.0 six==1.12.0 SQLAlchemy==1.3.5 -urllib3==1.25.3 +urllib3==1.25.8 jaeger-client>=3.4.0 grpcio-opentracing>=1.0 mock==2.0.0