diff --git a/CHANGELOG.md b/CHANGELOG.md index f86f15976a..b6b4119fda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,16 +12,20 @@ Please mark all change in change log and use the issue from GitHub - \#831 Judge branch error in CommonUtil.cpp - \#977 Server crash when create tables concurrently - \#990 Check gpu resources setting when assign repeated value -- \#995 table count set to 0 if no tables found +- \#995 Table count set to 0 if no tables found - \#1010 Improve error message when offset or page_size is equal 0 -- \#1022 check if partition name is legal +- \#1022 Check if partition name is valid - \#1028 check if table exists when show partitions - \#1029 check if table exists when try to delete partition - \#1066 optimize http insert and search speed +- \#1022 Check if partition name is legal +- \#1028 Check if table exists when show partitions +- \#1029 Check if table exists when try to delete partition +- \#1066 Optimize http insert and search speed - \#1067 Add binary vectors support in http server - \#1075 Improve error message when page size or offset is illegal - \#1082 Check page_size or offset value to avoid float -- \#1115 http server support load table into memory +- \#1115 Http server support load table into memory - \#1152 Error log output continuously after server start - \#1211 Server down caused by searching with index_type: HNSW - \#1240 Update license declaration @@ -36,6 +40,7 @@ Please mark all change in change log and use the issue from GitHub - \#1507 set_config for insert_buffer_size is wrong - \#1510 Add set interfaces for WAL configurations - \#1511 Fix big integer cannot pass to server correctly +- \#1517 result is not correct when search vectors in multi partition, index type is RNSG - \#1518 Table count did not match after deleting vectors and compact - \#1521 Make cache_insert_data take effect in-service - \#1525 Add setter API for config preload_table @@ -46,6 +51,15 @@ Please mark all change in change log and use the issue from GitHub - \#1549 Fix server/wal config setting bug - \#1556 Index file not created after table and index created - \#1560 Search crashed with Super-high dimensional binary vector +- \#1564 Too low recall for glove-200-angular, ivf_pq index +- \#1571 Meta engine type become IDMAP after dropping index for BINARY table +- \#1574 Set all existing bitset in cache when applying deletes +- \#1577 Row count incorrect if delete vectors then create index +- \#1580 Old segment folder not removed after merge/compact if create_index is called before adding data +- \#1590 Server down caused by failure to write file during concurrent mixed operations +- \#1598 Server down during mixed operations +- \#1601 External link bug in HTTP doc +- \#1609 Refine Compact function ## Feature - \#216 Add CLI to get server info @@ -74,7 +88,7 @@ Please mark all change in change log and use the issue from GitHub - \#738 Use Openblas / lapack from apt install - \#758 Enhance config description - \#791 Remove Arrow -- \#834 add cpu mode for built-in Faiss +- \#834 Add cpu mode for built-in Faiss - \#848 Add ready-to-use config files to the Milvus repo for enhanced user experince - \#860 Remove redundant checks in CacheMgr's constructor - \#908 Move "primary_path" and "secondary_path" to storage config diff --git a/core/src/codecs/default/DefaultDeletedDocsFormat.cpp b/core/src/codecs/default/DefaultDeletedDocsFormat.cpp index fb211f6969..0b0039cd4f 100644 --- a/core/src/codecs/default/DefaultDeletedDocsFormat.cpp +++ b/core/src/codecs/default/DefaultDeletedDocsFormat.cpp @@ -43,7 +43,7 @@ DefaultDeletedDocsFormat::read(const store::DirectoryPtr& directory_ptr, segment int del_fd = open(del_file_path.c_str(), O_RDONLY, 00664); if (del_fd == -1) { - std::string err_msg = "Failed to open file: " + del_file_path; + std::string err_msg = "Failed to open file: " + del_file_path + ", error: " + std::strerror(errno); ENGINE_LOG_ERROR << err_msg; throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg); } @@ -91,7 +91,7 @@ DefaultDeletedDocsFormat::write(const store::DirectoryPtr& directory_ptr, const // Write to the temp file, in order to avoid possible race condition with search (concurrent read and write) int del_fd = open(temp_path.c_str(), O_RDWR | O_CREAT, 00664); if (del_fd == -1) { - std::string err_msg = "Failed to open file: " + temp_path; + std::string err_msg = "Failed to open file: " + temp_path + ", error: " + std::strerror(errno); ENGINE_LOG_ERROR << err_msg; throw Exception(SERVER_CANNOT_CREATE_FILE, err_msg); } @@ -130,7 +130,7 @@ DefaultDeletedDocsFormat::write(const store::DirectoryPtr& directory_ptr, const ENGINE_LOG_ERROR << err_msg; throw Exception(SERVER_WRITE_ERROR, err_msg); } - if (::write(del_fd, deleted_docs_list.data(), new_num_bytes) == -1) { + if (::write(del_fd, deleted_docs_list.data(), sizeof(segment::offset_t) * deleted_docs->GetSize()) == -1) { std::string err_msg = "Failed to write to file" + temp_path + ", error: " + std::strerror(errno); ENGINE_LOG_ERROR << err_msg; throw Exception(SERVER_WRITE_ERROR, err_msg); diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 29ed17308a..5bc7ae50ba 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -366,8 +366,8 @@ DBImpl::PreloadTable(const std::string& table_id) { if (file.file_type_ == meta::TableFileSchema::FILE_TYPE::RAW || file.file_type_ == meta::TableFileSchema::FILE_TYPE::TO_INDEX || file.file_type_ == meta::TableFileSchema::FILE_TYPE::BACKUP) { - engine_type = server::ValidationUtil::IsBinaryMetricType(file.metric_type_) ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + engine_type = + utils::IsBinaryMetricType(file.metric_type_) ? EngineType::FAISS_BIN_IDMAP : EngineType::FAISS_IDMAP; } else { engine_type = (EngineType)file.engine_type_; } @@ -664,33 +664,13 @@ DBImpl::Compact(const std::string& table_id) { ENGINE_LOG_DEBUG << "Before compacting, wait for build index thread to finish..."; - WaitBuildIndexFinish(); + // WaitBuildIndexFinish(); - std::lock_guard index_lock(index_result_mutex_); + const std::lock_guard index_lock(build_index_mutex_); const std::lock_guard merge_lock(flush_merge_compact_mutex_); ENGINE_LOG_DEBUG << "Compacting table: " << table_id; - /* - // Save table index - TableIndex table_index; - status = DescribeIndex(table_id, table_index); - if (!status.ok()) { - return status; - } - - // Drop all index - status = DropIndex(table_id); - if (!status.ok()) { - return status; - } - - // Then update table index to the previous index - status = UpdateTableIndexRecursively(table_id, table_index); - if (!status.ok()) { - return status; - } - */ // Get files to compact from meta. std::vector file_types{meta::TableFileSchema::FILE_TYPE::RAW, meta::TableFileSchema::FILE_TYPE::TO_INDEX, meta::TableFileSchema::FILE_TYPE::BACKUP}; @@ -706,9 +686,11 @@ DBImpl::Compact(const std::string& table_id) { OngoingFileChecker::GetInstance().MarkOngoingFiles(files_to_compact); - meta::TableFilesSchema files_to_update; Status compact_status; - for (auto& file : files_to_compact) { + for (meta::TableFilesSchema::iterator iter = files_to_compact.begin(); iter != files_to_compact.end();) { + meta::TableFileSchema file = *iter; + iter = files_to_compact.erase(iter); + // Check if the segment needs compacting std::string segment_dir; utils::GetParentPath(file.location_, segment_dir); @@ -719,52 +701,42 @@ DBImpl::Compact(const std::string& table_id) { if (!status.ok()) { std::string msg = "Failed to load deleted_docs from " + segment_dir; ENGINE_LOG_ERROR << msg; - return Status(DB_ERROR, msg); + OngoingFileChecker::GetInstance().UnmarkOngoingFile(file); + continue; // skip this file and try compact next one } + meta::TableFilesSchema files_to_update; if (deleted_docs->GetSize() != 0) { compact_status = CompactFile(table_id, file, files_to_update); if (!compact_status.ok()) { ENGINE_LOG_ERROR << "Compact failed for segment " << file.segment_id_ << ": " << compact_status.message(); - break; + OngoingFileChecker::GetInstance().UnmarkOngoingFile(file); + continue; // skip this file and try compact next one } } else { - ENGINE_LOG_ERROR << "Segment " << file.segment_id_ << " has no deleted data. No need to compact"; + OngoingFileChecker::GetInstance().UnmarkOngoingFile(file); + ENGINE_LOG_DEBUG << "Segment " << file.segment_id_ << " has no deleted data. No need to compact"; + continue; // skip this file and try compact next one + } + + ENGINE_LOG_DEBUG << "Updating meta after compaction..."; + status = meta_ptr_->UpdateTableFiles(files_to_update); + OngoingFileChecker::GetInstance().UnmarkOngoingFile(file); + if (!status.ok()) { + compact_status = status; + break; // meta error, could not go on } } + OngoingFileChecker::GetInstance().UnmarkOngoingFiles(files_to_compact); + if (compact_status.ok()) { ENGINE_LOG_DEBUG << "Finished compacting table: " << table_id; } - ENGINE_LOG_ERROR << "Updating meta after compaction..."; - - /* - // Drop index again, in case some files were in the index building process during compacting - status = DropIndex(table_id); - if (!status.ok()) { - return status; - } - - // Update index - status = UpdateTableIndexRecursively(table_id, table_index); - if (!status.ok()) { - return status; - } - */ - - status = meta_ptr_->UpdateTableFiles(files_to_update); - if (!status.ok()) { - return status; - } - - OngoingFileChecker::GetInstance().UnmarkOngoingFiles(files_to_compact); - - ENGINE_LOG_DEBUG << "Finished updating meta after compaction"; - - return status; + return compact_status; } Status @@ -1019,7 +991,7 @@ DBImpl::GetVectorByIdHelper(const std::string& table_id, IDNumber vector_id, Vec auto deleted = std::find(deleted_docs.begin(), deleted_docs.end(), offset); if (deleted == deleted_docs.end()) { // Load raw vector - bool is_binary = server::ValidationUtil::IsBinaryMetricType(file.metric_type_); + bool is_binary = utils::IsBinaryMetricType(file.metric_type_); size_t single_vector_bytes = is_binary ? file.dimension_ / 8 : file.dimension_ * sizeof(float); std::vector raw_vector; status = segment_reader.LoadVectors(offset * single_vector_bytes, single_vector_bytes, raw_vector); @@ -1086,7 +1058,7 @@ DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) { // step 4: wait and build index status = index_failed_checker_.CleanFailedIndexFileOfTable(table_id); - status = BuildTableIndexRecursively(table_id, index); + status = WaitTableIndexRecursively(table_id, index); return status; } @@ -1766,7 +1738,7 @@ DBImpl::UpdateTableIndexRecursively(const std::string& table_id, const TableInde } Status -DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex& index) { +DBImpl::WaitTableIndexRecursively(const std::string& table_id, const TableIndex& index) { // for IDMAP type, only wait all NEW file converted to RAW file // for other type, wait NEW/RAW/NEW_MERGE/NEW_INDEX/TO_INDEX files converted to INDEX files std::vector file_types; @@ -1807,8 +1779,8 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex std::vector partition_array; status = meta_ptr_->ShowPartitions(table_id, partition_array); for (auto& schema : partition_array) { - status = BuildTableIndexRecursively(schema.table_id_, index); - fiu_do_on("DBImpl.BuildTableIndexRecursively.fail_build_table_Index_for_partition", + status = WaitTableIndexRecursively(schema.table_id_, index); + fiu_do_on("DBImpl.WaitTableIndexRecursively.fail_build_table_Index_for_partition", status = Status(DB_ERROR, "")); if (!status.ok()) { return status; @@ -1818,7 +1790,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex // failed to build index for some files, return error std::string err_msg; index_failed_checker_.GetErrMsgForTable(table_id, err_msg); - fiu_do_on("DBImpl.BuildTableIndexRecursively.not_empty_err_msg", err_msg.append("fiu")); + fiu_do_on("DBImpl.WaitTableIndexRecursively.not_empty_err_msg", err_msg.append("fiu")); if (!err_msg.empty()) { return Status(DB_ERROR, err_msg); } diff --git a/core/src/db/DBImpl.h b/core/src/db/DBImpl.h index 79c7e75a8c..bdec4b3491 100644 --- a/core/src/db/DBImpl.h +++ b/core/src/db/DBImpl.h @@ -216,7 +216,7 @@ class DBImpl : public DB, public server::CacheConfigHandler { UpdateTableIndexRecursively(const std::string& table_id, const TableIndex& index); Status - BuildTableIndexRecursively(const std::string& table_id, const TableIndex& index); + WaitTableIndexRecursively(const std::string& table_id, const TableIndex& index); Status DropTableIndexRecursively(const std::string& table_id); diff --git a/core/src/db/Utils.cpp b/core/src/db/Utils.cpp index f9ac5dfe40..d184cea085 100644 --- a/core/src/db/Utils.cpp +++ b/core/src/db/Utils.cpp @@ -220,6 +220,19 @@ IsRawIndexType(int32_t type) { return (type == (int32_t)EngineType::FAISS_IDMAP) || (type == (int32_t)EngineType::FAISS_BIN_IDMAP); } +bool +IsBinaryIndexType(int32_t index_type) { + return (index_type == (int32_t)engine::EngineType::FAISS_BIN_IDMAP) || + (index_type == (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT); +} + +bool +IsBinaryMetricType(int32_t metric_type) { + return (metric_type == (int32_t)engine::MetricType::HAMMING) || + (metric_type == (int32_t)engine::MetricType::JACCARD) || + (metric_type == (int32_t)engine::MetricType::TANIMOTO); +} + meta::DateT GetDate(const std::time_t& t, int day_delta) { struct tm ltm; diff --git a/core/src/db/Utils.h b/core/src/db/Utils.h index 88197d7d5c..c78b4fd717 100644 --- a/core/src/db/Utils.h +++ b/core/src/db/Utils.h @@ -48,6 +48,12 @@ IsSameIndex(const TableIndex& index1, const TableIndex& index2); bool IsRawIndexType(int32_t type); +static bool +IsBinaryIndexType(int32_t index_type); + +bool +IsBinaryMetricType(int32_t metric_type); + meta::DateT GetDate(const std::time_t& t, int day_delta = 0); meta::DateT diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 1fe820aa7a..9cb1dfb09f 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -100,9 +100,8 @@ ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string& index_type_(index_type), metric_type_(metric_type), index_params_(index_params) { - EngineType tmp_index_type = server::ValidationUtil::IsBinaryMetricType((int32_t)metric_type) - ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + EngineType tmp_index_type = + utils::IsBinaryMetricType((int32_t)metric_type) ? EngineType::FAISS_BIN_IDMAP : EngineType::FAISS_IDMAP; index_ = CreatetVecIndex(tmp_index_type); if (!index_) { throw Exception(DB_ERROR, "Unsupported index type"); diff --git a/core/src/db/insert/MemTable.cpp b/core/src/db/insert/MemTable.cpp index f0bdb7dedf..f0bf8c94d9 100644 --- a/core/src/db/insert/MemTable.cpp +++ b/core/src/db/insert/MemTable.cpp @@ -236,11 +236,27 @@ MemTable::ApplyDeletes() { utils::GetParentPath(table_file.location_, segment_dir); segment::SegmentReader segment_reader(segment_dir); - auto index = - std::static_pointer_cast(cache::CpuCacheMgr::GetInstance()->GetIndex(table_file.location_)); - faiss::ConcurrentBitsetPtr blacklist = nullptr; - if (index != nullptr) { - status = index->GetBlacklist(blacklist); + auto& segment_id = table_file.segment_id_; + meta::TableFilesSchema segment_files; + status = meta_->GetTableFilesBySegmentId(segment_id, segment_files); + if (!status.ok()) { + break; + } + + // Get all index that contains blacklist in cache + std::vector indexes; + std::vector blacklists; + for (auto& file : segment_files) { + auto index = + std::static_pointer_cast(cache::CpuCacheMgr::GetInstance()->GetIndex(file.location_)); + faiss::ConcurrentBitsetPtr blacklist = nullptr; + if (index != nullptr) { + index->GetBlacklist(blacklist); + if (blacklist != nullptr) { + indexes.emplace_back(index); + blacklists.emplace_back(blacklist); + } + } } std::vector uids; @@ -293,7 +309,7 @@ MemTable::ApplyDeletes() { id_bloom_filter_ptr->Remove(uids[i]); } - if (blacklist != nullptr) { + for (auto& blacklist : blacklists) { if (!blacklist->test(i)) { blacklist->set(i); } @@ -308,8 +324,8 @@ MemTable::ApplyDeletes() { << find_diff.count() << " s in total"; ENGINE_LOG_DEBUG << "Setting deleted docs and bloom filter took " << set_diff.count() << " s in total"; - if (index != nullptr) { - index->SetBlacklist(blacklist); + for (auto i = 0; i < indexes.size(); ++i) { + indexes[i]->SetBlacklist(blacklists[i]); } start = std::chrono::high_resolution_clock::now(); @@ -339,12 +355,6 @@ MemTable::ApplyDeletes() { << " s"; // Update table file row count - auto& segment_id = table_file.segment_id_; - meta::TableFilesSchema segment_files; - status = meta_->GetTableFilesBySegmentId(segment_id, segment_files); - if (!status.ok()) { - break; - } for (auto& file : segment_files) { if (file.file_type_ == meta::TableFileSchema::RAW || file.file_type_ == meta::TableFileSchema::TO_INDEX || file.file_type_ == meta::TableFileSchema::INDEX || file.file_type_ == meta::TableFileSchema::BACKUP) { @@ -354,7 +364,7 @@ MemTable::ApplyDeletes() { } } - status = meta_->UpdateTableFiles(table_files_to_update); + status = meta_->UpdateTableFilesRowCount(table_files_to_update); if (!status.ok()) { std::string err_msg = "Failed to apply deletes: " + status.ToString(); diff --git a/core/src/db/meta/Meta.h b/core/src/db/meta/Meta.h index b7cba78ca3..8c2f237e26 100644 --- a/core/src/db/meta/Meta.h +++ b/core/src/db/meta/Meta.h @@ -87,6 +87,9 @@ class Meta { virtual Status UpdateTableFiles(TableFilesSchema& files) = 0; + virtual Status + UpdateTableFilesRowCount(TableFilesSchema& files) = 0; + virtual Status UpdateTableIndex(const std::string& table_id, const TableIndex& index) = 0; diff --git a/core/src/db/meta/MetaTypes.h b/core/src/db/meta/MetaTypes.h index d8df8c2622..611df80d98 100644 --- a/core/src/db/meta/MetaTypes.h +++ b/core/src/db/meta/MetaTypes.h @@ -54,7 +54,7 @@ struct TableSchema { int64_t flag_ = 0; int64_t index_file_size_ = DEFAULT_INDEX_FILE_SIZE; int32_t engine_type_ = DEFAULT_ENGINE_TYPE; - std::string index_params_ = "{ \"nlist\": 16384 }"; + std::string index_params_ = "{}"; int32_t metric_type_ = DEFAULT_METRIC_TYPE; std::string owner_table_; std::string partition_tag_; diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index b1d28c2d34..3539f1f1b1 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "MetaConsts.h" #include "db/IDGenerator.h" @@ -1239,6 +1240,46 @@ MySQLMetaImpl::UpdateTableFiles(TableFilesSchema& files) { return Status::OK(); } +Status +MySQLMetaImpl::UpdateTableFilesRowCount(TableFilesSchema& files) { + try { + server::MetricCollector metric; + { + mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_); + + bool is_null_connection = (connectionPtr == nullptr); + if (is_null_connection) { + return Status(DB_ERROR, "Failed to connect to meta server(mysql)"); + } + + mysqlpp::Query updateTableFilesQuery = connectionPtr->query(); + + for (auto& file : files) { + std::string row_count = std::to_string(file.row_count_); + std::string updated_time = std::to_string(utils::GetMicroSecTimeStamp()); + + updateTableFilesQuery << "UPDATE " << META_TABLEFILES << " SET row_count = " << row_count + << " , updated_time = " << updated_time << " WHERE file_id = " << file.file_id_ + << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::UpdateTableFilesRowCount: " << updateTableFilesQuery.str(); + + if (!updateTableFilesQuery.exec()) { + return HandleException("QUERY ERROR WHEN UPDATING TABLE FILES", updateTableFilesQuery.error()); + } + + ENGINE_LOG_DEBUG << "Update file " << file.file_id_ << " row count to " << file.row_count_; + } + } // Scoped Connection + + ENGINE_LOG_DEBUG << "Update " << files.size() << " table files"; + } catch (std::exception& e) { + return HandleException("GENERAL ERROR WHEN UPDATING TABLE FILES ROW COUNT", e.what()); + } + + return Status::OK(); +} + Status MySQLMetaImpl::DescribeTableIndex(const std::string& table_id, TableIndex& index) { try { @@ -1326,8 +1367,12 @@ MySQLMetaImpl::DropTableIndex(const std::string& table_id) { } // set table index type to raw - dropTableIndexQuery << "UPDATE " << META_TABLES - << " SET engine_type = " << std::to_string(DEFAULT_ENGINE_TYPE) + dropTableIndexQuery << "UPDATE " << META_TABLES << " SET engine_type = " + << " (CASE" + << " WHEN metric_type in (" << (int32_t)MetricType::HAMMING << " ," + << (int32_t)MetricType::JACCARD << " ," << (int32_t)MetricType::TANIMOTO << ")" + << " THEN " << (int32_t)EngineType::FAISS_BIN_IDMAP << " ELSE " + << (int32_t)EngineType::FAISS_IDMAP << " END)" << " , index_params = '{}'" << " WHERE table_id = " << mysqlpp::quote << table_id << ";"; @@ -2022,6 +2067,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) { auto now = utils::GetMicroSecTimeStamp(); std::set table_ids; + std::map segment_ids; // remove to_delete files try { @@ -2049,7 +2095,7 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) mysqlpp::StoreQueryResult res = query.store(); TableFileSchema table_file; - std::vector idsToDelete; + std::vector delete_ids; int64_t clean_files = 0; for (auto& resRow : res) { @@ -2074,30 +2120,22 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) server::CommonUtil::EraseFromCache(table_file.location_); if (table_file.file_type_ == (int)TableFileSchema::TO_DELETE) { - // If we are deleting a raw table file, it means it's okay to delete the entire segment directory. - // Else, we can only delete the single file - // TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky - if (utils::IsRawIndexType(table_file.engine_type_)) { - utils::DeleteSegment(options_, table_file); - std::string segment_dir; - utils::GetParentPath(table_file.location_, segment_dir); - ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir; - } else { - utils::DeleteTableFilePath(options_, table_file); - ENGINE_LOG_DEBUG << "Remove table file: " << table_file.location_; - } + // delete file from disk storage + utils::DeleteTableFilePath(options_, table_file); + ENGINE_LOG_DEBUG << "Remove file id:" << table_file.id_ << " location:" << table_file.location_; - idsToDelete.emplace_back(std::to_string(table_file.id_)); + delete_ids.emplace_back(std::to_string(table_file.id_)); table_ids.insert(table_file.table_id_); + segment_ids.insert(std::make_pair(table_file.segment_id_, table_file)); - ++clean_files; + clean_files++; } } // delete file from meta - if (!idsToDelete.empty()) { + if (!delete_ids.empty()) { std::stringstream idsToDeleteSS; - for (auto& id : idsToDelete) { + for (auto& id : delete_ids) { idsToDeleteSS << "id = " << id << " OR "; } @@ -2213,6 +2251,51 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) return HandleException("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", e.what()); } + // remove deleted segment folder + // don't remove segment folder until all its tablefiles has been deleted + try { + server::MetricCollector metric; + + { + mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_); + + bool is_null_connection = (connectionPtr == nullptr); + fiu_do_on("MySQLMetaImpl.CleanUpFilesWithTTL.RemoveDeletedSegmentFolder_NUllConnection", + is_null_connection = true); + fiu_do_on("MySQLMetaImpl.CleanUpFilesWithTTL.RemoveDeletedSegmentFolder_ThrowException", + throw std::exception();); + if (is_null_connection) { + return Status(DB_ERROR, "Failed to connect to meta server(mysql)"); + } + + int64_t remove_segments = 0; + for (auto& segment_id : segment_ids) { + mysqlpp::Query query = connectionPtr->query(); + query << "SELECT id" + << " FROM " << META_TABLEFILES << " WHERE segment_id = " << mysqlpp::quote << segment_id.first + << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUpFilesWithTTL: " << query.str(); + + mysqlpp::StoreQueryResult res = query.store(); + + if (res.empty()) { + utils::DeleteSegment(options_, segment_id.second); + std::string segment_dir; + utils::GetParentPath(segment_id.second.location_, segment_dir); + ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir; + ++remove_segments; + } + } + + if (remove_segments > 0) { + ENGINE_LOG_DEBUG << "Remove " << remove_segments << " segments folder"; + } + } + } catch (std::exception& e) { + return HandleException("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", e.what()); + } + return Status::OK(); } diff --git a/core/src/db/meta/MySQLMetaImpl.h b/core/src/db/meta/MySQLMetaImpl.h index 452e04f362..5f65351650 100644 --- a/core/src/db/meta/MySQLMetaImpl.h +++ b/core/src/db/meta/MySQLMetaImpl.h @@ -82,6 +82,9 @@ class MySQLMetaImpl : public Meta { Status UpdateTableFiles(TableFilesSchema& files) override; + Status + UpdateTableFilesRowCount(TableFilesSchema& files) override; + Status DescribeTableIndex(const std::string& table_id, TableIndex& index) override; diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index ca37f2c258..5bf44f9776 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -685,6 +685,26 @@ SqliteMetaImpl::UpdateTableFiles(TableFilesSchema& files) { return Status::OK(); } +Status +SqliteMetaImpl::UpdateTableFilesRowCount(TableFilesSchema& files) { + try { + server::MetricCollector metric; + + // multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + + for (auto& file : files) { + ConnectorPtr->update_all(set(c(&TableFileSchema::row_count_) = file.row_count_, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp()), + where(c(&TableFileSchema::file_id_) == file.file_id_)); + ENGINE_LOG_DEBUG << "Update file " << file.file_id_ << " row count to " << file.row_count_; + } + } catch (std::exception& e) { + return HandleException("Encounter exception when update table files row count", e.what()); + } + return Status::OK(); +} + Status SqliteMetaImpl::UpdateTableIndex(const std::string& table_id, const TableIndex& index) { try { @@ -804,8 +824,18 @@ SqliteMetaImpl::DropTableIndex(const std::string& table_id) { c(&TableFileSchema::file_type_) == (int)TableFileSchema::BACKUP)); // set table index type to raw + auto groups = ConnectorPtr->select(columns(&TableSchema::metric_type_), + where(c(&TableSchema::table_id_) == table_id)); + + int32_t raw_engine_type = DEFAULT_ENGINE_TYPE; + if (groups.size() == 1) { + int32_t metric_type_ = std::get<0>(groups[0]); + if (engine::utils::IsBinaryMetricType(metric_type_)) { + raw_engine_type = (int32_t)EngineType::FAISS_BIN_IDMAP; + } + } ConnectorPtr->update_all( - set(c(&TableSchema::engine_type_) = DEFAULT_ENGINE_TYPE, c(&TableSchema::index_params_) = "{}"), + set(c(&TableSchema::engine_type_) = raw_engine_type, c(&TableSchema::index_params_) = "{}"), where(c(&TableSchema::table_id_) == table_id)); ENGINE_LOG_DEBUG << "Successfully drop table index, table id = " << table_id; @@ -1189,29 +1219,21 @@ SqliteMetaImpl::FilesByType(const std::string& table_id, const std::vector& file_schema.metric_type_ = table_schema.metric_type_; switch (file_schema.file_type_) { - case (int)TableFileSchema::RAW: - ++raw_count; + case (int)TableFileSchema::RAW:++raw_count; break; - case (int)TableFileSchema::NEW: - ++new_count; + case (int)TableFileSchema::NEW:++new_count; break; - case (int)TableFileSchema::NEW_MERGE: - ++new_merge_count; + case (int)TableFileSchema::NEW_MERGE:++new_merge_count; break; - case (int)TableFileSchema::NEW_INDEX: - ++new_index_count; + case (int)TableFileSchema::NEW_INDEX:++new_index_count; break; - case (int)TableFileSchema::TO_INDEX: - ++to_index_count; + case (int)TableFileSchema::TO_INDEX:++to_index_count; break; - case (int)TableFileSchema::INDEX: - ++index_count; + case (int)TableFileSchema::INDEX:++index_count; break; - case (int)TableFileSchema::BACKUP: - ++backup_count; - break; - default: + case (int)TableFileSchema::BACKUP:++backup_count; break; + default:break; } auto status = utils::GetTableFilePath(options_, file_schema); @@ -1225,29 +1247,25 @@ SqliteMetaImpl::FilesByType(const std::string& table_id, const std::vector& std::string msg = "Get table files by type."; for (int file_type : file_types) { switch (file_type) { - case (int)TableFileSchema::RAW: - msg = msg + " raw files:" + std::to_string(raw_count); + case (int)TableFileSchema::RAW:msg = msg + " raw files:" + std::to_string(raw_count); break; - case (int)TableFileSchema::NEW: - msg = msg + " new files:" + std::to_string(new_count); + case (int)TableFileSchema::NEW:msg = msg + " new files:" + std::to_string(new_count); break; case (int)TableFileSchema::NEW_MERGE: - msg = msg + " new_merge files:" + std::to_string(new_merge_count); + msg = msg + " new_merge files:" + + std::to_string(new_merge_count); break; case (int)TableFileSchema::NEW_INDEX: - msg = msg + " new_index files:" + std::to_string(new_index_count); + msg = msg + " new_index files:" + + std::to_string(new_index_count); break; - case (int)TableFileSchema::TO_INDEX: - msg = msg + " to_index files:" + std::to_string(to_index_count); + case (int)TableFileSchema::TO_INDEX:msg = msg + " to_index files:" + std::to_string(to_index_count); break; - case (int)TableFileSchema::INDEX: - msg = msg + " index files:" + std::to_string(index_count); + case (int)TableFileSchema::INDEX:msg = msg + " index files:" + std::to_string(index_count); break; - case (int)TableFileSchema::BACKUP: - msg = msg + " backup files:" + std::to_string(backup_count); - break; - default: + case (int)TableFileSchema::BACKUP:msg = msg + " backup files:" + std::to_string(backup_count); break; + default:break; } } ENGINE_LOG_DEBUG << msg; @@ -1364,6 +1382,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/) { auto now = utils::GetMicroSecTimeStamp(); std::set table_ids; + std::map segment_ids; // remove to_delete files try { @@ -1413,23 +1432,16 @@ SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/ server::CommonUtil::EraseFromCache(table_file.location_); if (table_file.file_type_ == (int)TableFileSchema::TO_DELETE) { - // If we are deleting a raw table file, it means it's okay to delete the entire segment directory. - // Else, we can only delete the single file - // TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky - if (utils::IsRawIndexType(table_file.engine_type_)) { - utils::DeleteSegment(options_, table_file); - std::string segment_dir; - utils::GetParentPath(table_file.location_, segment_dir); - ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir; - } else { - utils::DeleteTableFilePath(options_, table_file); - ENGINE_LOG_DEBUG << "Remove table file: " << table_file.location_; - } - // delete file from meta ConnectorPtr->remove(table_file.id_); + // delete file from disk storage + utils::DeleteTableFilePath(options_, table_file); + + ENGINE_LOG_DEBUG << "Remove file id:" << table_file.file_id_ << " location:" + << table_file.location_; table_ids.insert(table_file.table_id_); + segment_ids.insert(std::make_pair(table_file.segment_id_, table_file)); ++clean_files; } @@ -1504,6 +1516,32 @@ SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/ return HandleException("Encounter exception when delete table folder", e.what()); } + // remove deleted segment folder + // don't remove segment folder until all its tablefiles has been deleted + try { + fiu_do_on("SqliteMetaImpl.CleanUpFilesWithTTL.RemoveSegmentFolder_ThrowException", throw std::exception()); + server::MetricCollector metric; + + int64_t remove_segments = 0; + for (auto& segment_id : segment_ids) { + auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_), + where(c(&TableFileSchema::segment_id_) == segment_id.first)); + if (selected.size() == 0) { + utils::DeleteSegment(options_, segment_id.second); + std::string segment_dir; + utils::GetParentPath(segment_id.second.location_, segment_dir); + ENGINE_LOG_DEBUG << "Remove segment directory: " << segment_dir; + ++remove_segments; + } + } + + if (remove_segments > 0) { + ENGINE_LOG_DEBUG << "Remove " << remove_segments << " segments folder"; + } + } catch (std::exception& e) { + return HandleException("Encounter exception when delete table folder", e.what()); + } + return Status::OK(); } diff --git a/core/src/db/meta/SqliteMetaImpl.h b/core/src/db/meta/SqliteMetaImpl.h index e0b596da6c..0b65525e67 100644 --- a/core/src/db/meta/SqliteMetaImpl.h +++ b/core/src/db/meta/SqliteMetaImpl.h @@ -81,6 +81,9 @@ class SqliteMetaImpl : public Meta { Status UpdateTableFiles(TableFilesSchema& files) override; + Status + UpdateTableFilesRowCount(TableFilesSchema& files) override; + Status DescribeTableIndex(const std::string& table_id, TableIndex& index) override; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 523cdbaaf3..674638bcca 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -118,12 +118,12 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { // std::stringstream ss_res_id, ss_res_dist; // for (int i = 0; i < 10; ++i) { - // printf("%llu", res_ids[i]); + // printf("%llu", p_id[i]); // printf("\n"); - // printf("%.6f", res_dis[i]); + // printf("%.6f", p_dist[i]); // printf("\n"); - // ss_res_id << res_ids[i] << " "; - // ss_res_dist << res_dis[i] << " "; + // ss_res_id << p_id[i] << " "; + // ss_res_dist << p_dist[i] << " "; // } // std::cout << std::endl << "after search: " << std::endl; // std::cout << ss_res_id.str() << std::endl; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp index 16d20142b3..e220b42c64 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/nsg/NSG.cpp @@ -698,13 +698,8 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co int64_t* ids, SearchParams& params) { std::vector> resset(nq); - if (k >= 45) { - params.search_length = k; - } - TimeRecorder rc("NsgIndex::search", 1); - // TODO(linxj): when to use openmp - if (nq <= 4) { + if (nq == 1) { GetNeighbors(query, resset[0], nsg, ¶ms); } else { #pragma omp parallel for @@ -733,15 +728,6 @@ NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, co } } rc.RecordSection("merge"); - - // ProfilerStart("xx.prof"); - // std::vector resset; - // GetNeighbors(query, resset, nsg, ¶ms); - // for (int i = 0; i < k; ++i) { - // ids[i] = resset[i].id; - // dist[i] = resset[i].distance; - //} - // ProfilerStop(); } void diff --git a/core/src/index/unittest/test_nsg/test_nsg.cpp b/core/src/index/unittest/test_nsg/test_nsg.cpp index 76db0cd8c2..074e3f2736 100644 --- a/core/src/index/unittest/test_nsg/test_nsg.cpp +++ b/core/src/index/unittest/test_nsg/test_nsg.cpp @@ -233,7 +233,7 @@ TEST_F(NSGInterfaceTest, comparetest) { // } // } // } -// printf("R@1 = %.4f\n", n_1 / float(nq)); +// printf("R@1 = %.4f\n", n_1 / float(nq));; // printf("R@10 = %.4f\n", n_10 / float(nq)); // printf("R@100 = %.4f\n", n_100 / float(nq)); //} diff --git a/core/src/scheduler/job/BuildIndexJob.cpp b/core/src/scheduler/job/BuildIndexJob.cpp index 35af905a75..0da63214be 100644 --- a/core/src/scheduler/job/BuildIndexJob.cpp +++ b/core/src/scheduler/job/BuildIndexJob.cpp @@ -10,10 +10,11 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "scheduler/job/BuildIndexJob.h" -#include "utils/Log.h" #include +#include "utils/Log.h" + namespace milvus { namespace scheduler { @@ -34,7 +35,8 @@ BuildIndexJob::AddToIndexFiles(const engine::meta::TableFileSchemaPtr& to_index_ return false; } - SERVER_LOG_DEBUG << "BuildIndexJob " << id() << " add to_index file: " << to_index_file->id_; + SERVER_LOG_DEBUG << "BuildIndexJob " << id() << " add to_index file: " << to_index_file->id_ + << ", location: " << to_index_file->location_; to_index_files_[to_index_file->id_] = to_index_file; } diff --git a/core/src/scheduler/task/BuildIndexTask.cpp b/core/src/scheduler/task/BuildIndexTask.cpp index 13d3b4a611..feec750e5a 100644 --- a/core/src/scheduler/task/BuildIndexTask.cpp +++ b/core/src/scheduler/task/BuildIndexTask.cpp @@ -12,11 +12,13 @@ #include "scheduler/task/BuildIndexTask.h" #include + #include #include #include #include +#include "db/Utils.h" #include "db/engine/EngineFactory.h" #include "metrics/Metrics.h" #include "scheduler/job/BuildIndexJob.h" @@ -35,8 +37,8 @@ XBuildIndexTask::XBuildIndexTask(TableFileSchemaPtr file, TaskLabelPtr label) if (file->file_type_ == TableFileSchema::FILE_TYPE::RAW || file->file_type_ == TableFileSchema::FILE_TYPE::TO_INDEX || file->file_type_ == TableFileSchema::FILE_TYPE::BACKUP) { - engine_type = server::ValidationUtil::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + engine_type = engine::utils::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP + : EngineType::FAISS_IDMAP; } else { engine_type = (EngineType)file->engine_type_; } @@ -206,7 +208,7 @@ XBuildIndexTask::Execute() { // step 6: update meta table_file.file_type_ = engine::meta::TableFileSchema::INDEX; table_file.file_size_ = index->PhysicalSize(); - table_file.row_count_ = index->Count(); + table_file.row_count_ = file_->row_count_; // index->Count(); auto origin_file = *file_; origin_file.file_type_ = engine::meta::TableFileSchema::BACKUP; diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 0b32f35ab5..857965dd2d 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -9,7 +9,10 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License. +#include "scheduler/task/SearchTask.h" + #include + #include #include #include @@ -21,7 +24,6 @@ #include "metrics/Metrics.h" #include "scheduler/SchedInst.h" #include "scheduler/job/SearchJob.h" -#include "scheduler/task/SearchTask.h" #include "segment/SegmentReader.h" #include "utils/Log.h" #include "utils/TimeRecorder.h" @@ -101,7 +103,8 @@ XSearchTask::XSearchTask(const std::shared_ptr& context, TableF if (file_) { // distance -- value 0 means two vectors equal, ascending reduce, L2/HAMMING/JACCARD/TONIMOTO ... // similarity -- infinity value means two vectors equal, descending reduce, IP - if (file_->metric_type_ == static_cast(MetricType::IP)) { + if (file_->metric_type_ == static_cast(MetricType::IP) && + file_->engine_type_ != static_cast(EngineType::FAISS_PQ)) { ascending_reduce = false; } @@ -109,8 +112,8 @@ XSearchTask::XSearchTask(const std::shared_ptr& context, TableF if (file->file_type_ == TableFileSchema::FILE_TYPE::RAW || file->file_type_ == TableFileSchema::FILE_TYPE::TO_INDEX || file->file_type_ == TableFileSchema::FILE_TYPE::BACKUP) { - engine_type = server::ValidationUtil::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + engine_type = engine::utils::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP + : EngineType::FAISS_IDMAP; } else { engine_type = (EngineType)file->engine_type_; } @@ -263,13 +266,21 @@ XSearchTask::Execute() { // step 3: pick up topk result auto spec_k = file_->row_count_ < topk ? file_->row_count_ : topk; - if (search_job->GetResultIds().front() == -1 && search_job->GetResultIds().size() > spec_k) { - // initialized results set - search_job->GetResultIds().resize(spec_k); - search_job->GetResultDistances().resize(spec_k); + if (spec_k == 0) { + ENGINE_LOG_WARNING << "Searching in an empty file. file location = " << file_->location_; } + { std::unique_lock lock(search_job->mutex()); + + if (search_job->GetResultIds().size() > spec_k) { + if (search_job->GetResultIds().front() == -1) { + // initialized results set + search_job->GetResultIds().resize(spec_k * nq); + search_job->GetResultDistances().resize(spec_k * nq); + } + } + XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, ascending_reduce, search_job->GetResultIds(), search_job->GetResultDistances()); } diff --git a/core/src/server/Config.cpp b/core/src/server/Config.cpp index f1a3a421f7..0395d673a4 100644 --- a/core/src/server/Config.cpp +++ b/core/src/server/Config.cpp @@ -667,7 +667,7 @@ Config::CheckConfigVersion(const std::string& value) { std::string msg = "Invalid config version: " + value + ". Expected config version: " + milvus_config_version_map.at(MILVUS_VERSION); SERVER_LOG_ERROR << msg; - // return Status(SERVER_INVALID_ARGUMENT, msg); + return Status(SERVER_INVALID_ARGUMENT, msg); } } return Status::OK(); diff --git a/core/src/server/delivery/request/CreateIndexRequest.cpp b/core/src/server/delivery/request/CreateIndexRequest.cpp index 5836b3cc09..521fe7aa58 100644 --- a/core/src/server/delivery/request/CreateIndexRequest.cpp +++ b/core/src/server/delivery/request/CreateIndexRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/CreateIndexRequest.h" +#include "db/Utils.h" #include "server/Config.h" #include "server/DBWrapper.h" #include "utils/Log.h" @@ -83,7 +84,7 @@ CreateIndexRequest::OnExecute() { status = DBWrapper::DB()->DescribeTable(table_info); int32_t adapter_index_type = index_type_; - if (ValidationUtil::IsBinaryMetricType(table_info.metric_type_)) { // binary vector not allow + if (engine::utils::IsBinaryMetricType(table_info.metric_type_)) { // binary vector not allow if (adapter_index_type == static_cast(engine::EngineType::FAISS_IDMAP)) { adapter_index_type = static_cast(engine::EngineType::FAISS_BIN_IDMAP); } else if (adapter_index_type == static_cast(engine::EngineType::FAISS_IVFFLAT)) { diff --git a/core/src/server/delivery/request/CreateTableRequest.cpp b/core/src/server/delivery/request/CreateTableRequest.cpp index 15d07b8cfd..d92db5a94d 100644 --- a/core/src/server/delivery/request/CreateTableRequest.cpp +++ b/core/src/server/delivery/request/CreateTableRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/CreateTableRequest.h" +#include "db/Utils.h" #include "server/DBWrapper.h" #include "server/delivery/request/BaseRequest.h" #include "utils/Log.h" @@ -78,7 +79,7 @@ CreateTableRequest::OnExecute() { table_info.metric_type_ = metric_type_; // some metric type only support binary vector, adapt the index type - if (ValidationUtil::IsBinaryMetricType(metric_type_)) { + if (engine::utils::IsBinaryMetricType(metric_type_)) { if (table_info.engine_type_ == static_cast(engine::EngineType::FAISS_IDMAP)) { table_info.engine_type_ = static_cast(engine::EngineType::FAISS_BIN_IDMAP); } else if (table_info.engine_type_ == static_cast(engine::EngineType::FAISS_IVFFLAT)) { diff --git a/core/src/server/delivery/request/InsertRequest.cpp b/core/src/server/delivery/request/InsertRequest.cpp index 729dc4d292..a739d86b9f 100644 --- a/core/src/server/delivery/request/InsertRequest.cpp +++ b/core/src/server/delivery/request/InsertRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/InsertRequest.h" +#include "db/Utils.h" #include "server/DBWrapper.h" #include "utils/CommonUtil.h" #include "utils/Log.h" @@ -115,7 +116,7 @@ InsertRequest::OnExecute() { #endif // step 4: some metric type doesn't support float vectors if (!vectors_data_.float_data_.empty()) { // insert float vectors - if (ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)) { + if (engine::utils::IsBinaryMetricType(table_schema.metric_type_)) { return Status(SERVER_INVALID_ROWRECORD_ARRAY, "Table metric type doesn't support float vectors."); } @@ -131,7 +132,7 @@ InsertRequest::OnExecute() { "The vector dimension must be equal to the table dimension."); } } else if (!vectors_data_.binary_data_.empty()) { // insert binary vectors - if (!ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)) { + if (!engine::utils::IsBinaryMetricType(table_schema.metric_type_)) { return Status(SERVER_INVALID_ROWRECORD_ARRAY, "Table metric type doesn't support binary vectors."); } diff --git a/core/src/server/delivery/request/SearchRequest.cpp b/core/src/server/delivery/request/SearchRequest.cpp index 0b12f91f5d..e31be87b40 100644 --- a/core/src/server/delivery/request/SearchRequest.cpp +++ b/core/src/server/delivery/request/SearchRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/SearchRequest.h" +#include "db/Utils.h" #include "server/DBWrapper.h" #include "utils/CommonUtil.h" #include "utils/Log.h" @@ -103,7 +104,7 @@ SearchRequest::OnExecute() { rc.RecordSection("check validation"); // step 4: check metric type - if (ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)) { + if (engine::utils::IsBinaryMetricType(table_schema.metric_type_)) { // check prepared binary data if (vectors_data_.binary_data_.size() % vector_count != 0) { return Status(SERVER_INVALID_ROWRECORD_ARRAY, diff --git a/core/src/server/web_impl/README.md b/core/src/server/web_impl/README.md index ac447a529c..3abe5a462c 100644 --- a/core/src/server/web_impl/README.md +++ b/core/src/server/web_impl/README.md @@ -4,34 +4,38 @@ - [Overview](#overview) - [API Reference](#api-reference) - - [`/state`](#state) - - [`/devices`](#devices) - - [`/config/advanced` (GET)](#configadvanced-get) - - [`/config/advanced` (PUT)](#configadvanced-put) - - [`/config/advanced` (OPTIONS)](#configadvanced-options) - - [`/config/gpu_resources` (GET)](#configgpu_resources-get) - - [`/config/gpu_resources` (PUT)](#configgpu_resources-put) - - [`/config/gpu_resources` (OPTIONS)](#configgpu_resources-options) - - [`/tables` (GET)](#tables-get) - - [`/tables` (POST)](#tables-post) - - [`/tables` (OPTIONS)](#tables-options) - - [`/tables/{table_name}` (GET)](#tablestable_name-get) - - [`/tables/{table_name}` (DELETE)](#tablestable_name-delete) - - [`/tables/{table_name}` (OPTIONS)](#tablestable_name-options) - - [`/tables/{table_name}/indexes` (GET)](#tablestable_nameindexes-get) - - [`/tables/{table_name}/indexes` (POST)](#tablestable_nameindexes-post) - - [`/tables/{table_name}/indexes` (DELETE)](#tablestable_nameindexes-delete) - - [`/tables/{table_name}/indexes` (OPTIONS)](#tablestable_nameindexes-options) - - [`/tables/{table_name}/partitions` (GET)](#tablestable_namepartitions-get) - - [`/tables/{table_name}/partitions` (POST)](#tablestable_namepartitions-post) - - [`/tables/{table_name}/partitions` (OPTIONS)](#tablestable_namepartitions-options) - - [`/tables/{table_name}/partitions/{partition_tag}` (DELETE)](#tablestable_namepartitionspartition_tag-delete) - - [`/tables/{table_name}/partitions/{partition_tag}` (OPTIONS)](#tablestable_namepartitionspartition_tag-options) - - [`/tables/{table_name}/vectors` (PUT)](#tablestable_namevectors-put) - - [`/tables/{table_name}/vectors` (POST)](#tablestable_namevectors-post) - - [`/tables/{table_name}/vectors` (OPTIONS)](#tablestable_namevectors-options) - - [`/system/{msg}` (GET)](#systemmsg-get) -- [Error Codes](#error-codes) + - [`/state`](#state) + - [`/devices`](#devices) + - [`/config/advanced` (GET)](#configadvanced-get) + - [`/config/advanced` (PUT)](#configadvanced-put) + - [`/config/advanced` (OPTIONS)](#configadvanced-options) + - [`/config/gpu_resources` (GET)](#configgpu_resources-get) + - [`/config/gpu_resources` (PUT)](#configgpu_resources-put) + - [`/config/gpu_resources` (OPTIONS)](#configgpu_resources-options) + - [`/collections` (GET)](#collections-get) + - [`/collections` (POST)](#collections-post) + - [`/collections` (OPTIONS)](#collections-options) + - [`/collections/{collection_name}` (GET)](#collectionscollection_name-get) + - [`/collections/{collection_name}` (DELETE)](#collectionscollection_name-delete) + - [`/collections/{collection_name}` (OPTIONS)](#collectionscollection_name-options) + - [`/collections/{collection_name}/indexes` (GET)](#collectionscollection_nameindexes-get) + - [`/collections/{collection_name}/indexes` (POST)](#collectionscollection_nameindexes-post) + - [`/collections/{collection_name}/indexes` (DELETE)](#collectionscollection_nameindexes-delete) + - [`/collections/{collection_name}/indexes` (OPTIONS)](#collectionscollection_nameindexes-options) + - [`/collections/{collection_name}/partitions` (GET)](#collectionscollection_namepartitions-get) + - [`/collections/{collection_name}/partitions` (POST)](#collectionscollection_namepartitions-post) + - [`/collections/{collection_name}/partitions` (OPTIONS)](#collectionscollection_namepartitions-options) + - [`/collections/{collection_name}/partitions` (DELETE)](#collectionscollection_namepartitions-delete) + - [`/collections/{collection_name}/segments` (GET)](#collectionscollection_namesegments-get) + - [`/collections/{collection_name}/segments/{segment_name}/vectors` (GET)](#collectionscollection_namesegmentssegment_namevectors-get) + - [`/collections/{collection_name}/segments/{segment_name}/ids` (GET)](#collectionscollection_namesegmentssegment_nameids-get) + - [`/collections/{collection_name}/vectors` (PUT)](#collectionscollection_namevectors-put) + - [`/collections/{collection_name}/vectors` (POST)](#collectionscollection_namevectors-post) + - [`/collections/{collection_name}/vectors` (GET)](#collectionscollection_namevectorsidvector_id-get) + - [`/collections/{collection_name}/vectors` (OPTIONS)](#collectionscollection_namevectors-options) + - [`/system/{msg}` (GET)](#systemmsg-get) + - [`system/{op}` (PUT)](#systemop-put) +- [Error Codes](#error-codes) @@ -47,31 +51,31 @@ Checks whether the web server is running. #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/state` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | GET | +| Request Component | Value | +| ----------------- | -------------------------- | +| Name | `/state` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| +| Status code | Description | +| ----------- | -------------------------- | +| 200 | The request is successful. | #### Example ##### Request ```shell -$ curl -X GET "http://192.168.1.65:19121/state" -H "accept: application/json" +$ curl -X GET "http://127.0.0.1:19121/state" -H "accept: application/json" ``` ##### Response ```json -{"message":"Success","code":0} +{ "message": "Success", "code": 0 } ``` ### `/devices` @@ -80,33 +84,32 @@ Gets CPU/GPU information from the host. #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/devices` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | GET | +| Request Component | Value | +| ----------------- | -------------------------- | +| Name | `/devices` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | - +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | #### Example ##### Request ```shell -$ curl -X GET "http://192.168.1.65:19121/devices" -H "accept: application/json" +$ curl -X GET "http://127.0.0.1:19121/devices" -H "accept: application/json" ``` ##### Response ```json -{"cpu":{"memory":31},"gpus":{"GPU0":{"memory":5}}} +{ "cpu": { "memory": 31 }, "gpus": { "GPU0": { "memory": 5 } } } ``` ### `/config/advanced` (GET) @@ -115,32 +118,37 @@ Gets the values of parameters in `cache_config` and `engine_config` of the Milvu #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/config/advanced` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | GET | +| Request Component | Value | +| ----------------- | -------------------------- | +| Name | `/config/advanced` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | #### Example ##### Request ```shell -$ curl -X GET "http://192.168.1.65:19121/config/advanced" -H "accept: application/json" +$ curl -X GET "http://127.0.0.1:19121/config/advanced" -H "accept: application/json" ``` ##### Response ```json -{"cpu_cache_capacity":4,"cache_insert_data":false,"use_blas_threshold":1100,"gpu_search_threshold":1000} +{ + "cpu_cache_capacity": 4, + "cache_insert_data": false, + "use_blas_threshold": 1100, + "gpu_search_threshold": 1000 +} ``` ### `/config/advanced` (PUT) @@ -169,33 +177,32 @@ Updates the values of parameters in `cache_config` and `engine_config` of the Mi ##### Body Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `cpu_cache_capacity` | Value of `cpu_cache_capacity` in the Milvus configuration file. The default is 4.| No | -| `cache_insert_data` | Value of `cache_insert_data` in the Milvus configuration file. The default is false. | No | -| `use_blas_threshold` | Value of `use_blas_threshold` in the Milvus configuration file. The default is 1100. | No | -| `gpu_search_threshold` | Value of `gpu_search_threshold` in the Milvus configuration file. The default is 1000. | No | +| Parameter | Description | Required? | +| ---------------------- | -------------------------------------------------------------------------------------- | --------- | +| `cpu_cache_capacity` | Value of `cpu_cache_capacity` in the Milvus configuration file. The default is 4. | No | +| `cache_insert_data` | Value of `cache_insert_data` in the Milvus configuration file. The default is false. | No | +| `use_blas_threshold` | Value of `use_blas_threshold` in the Milvus configuration file. The default is 1100. | No | +| `gpu_search_threshold` | Value of `gpu_search_threshold` in the Milvus configuration file. The default is 1000. | No | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | #### Example ##### Request ```shell -$ curl -X PUT "http://192.168.1.65:19121/config/advanced" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"cpu_cache_capacity\":4,\"cache_insert_data\":false,\"use_blas_threshold\":1100,\"gpu_search_threshold\":1000}" +$ curl -X PUT "http://127.0.0.1:19121/config/advanced" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"cpu_cache_capacity\":4,\"cache_insert_data\":false,\"use_blas_threshold\":1100,\"gpu_search_threshold\":1000}" ``` ##### Response - ```json -{"message": "OK","code": 0} +{ "message": "OK", "code": 0 } ``` ### `/config/advanced` (OPTIONS) @@ -204,20 +211,19 @@ Use this API for Cross-Origin Resource Sharing (CORS). #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/config/advanced` | -| Header | N/A | -| Body | N/A | -| Method | OPTIONS | - +| Request Component | Value | +| ----------------- | ------------------ | +| Name | `/config/advanced` | +| Header | N/A | +| Body | N/A | +| Method | OPTIONS | #### Example ##### Request ```shell -$ curl -X OPTIONS "http://192.168.1.65:19121/config/advanced" +$ curl -X OPTIONS "http://127.0.0.1:19121/config/advanced" ``` ### `/config/gpu_resources` (GET) @@ -228,33 +234,37 @@ Gets the parameter values in `gpu_resource_config` of the Milvus configuration f #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/config/gpu_resources` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | GET | +| Request Component | Value | +| ----------------- | -------------------------- | +| Name | `/config/gpu_resources` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | #### Example ##### Request ```shell -$ curl -X GET "http://192.168.1.65:19121/config/gpu_resources" -H "accept: application/json" +$ curl -X GET "http://127.0.0.1:19121/config/gpu_resources" -H "accept: application/json" ``` ##### Response - ```json -{"enable":true,"cache_capacity":1,"search_resources":["GPU0"],"build_index_resources":["GPU0"]} +{ + "enable": true, + "cache_capacity": 1, + "search_resources": ["GPU0"], + "build_index_resources": ["GPU0"] +} ``` ### `/config/gpu_resources` (PUT) @@ -283,32 +293,32 @@ Updates the parameter values in `gpu_resource_config` of the Milvus configuratio ##### Body Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `enable` | Specifies whether to enable GPU resources. | Yes | -| `cache_capacity` | Size of GPU memory per card used for cache in GBs. | Yes | -| `search_resources` | GPU devices used for search computation, must be in format `gpux`. | Yes | -| `build_index_resources` | GPU devices used for index building, must be in format `gpux`. | Yes | +| Parameter | Description | Required? | +| ----------------------- | ------------------------------------------------------------------ | --------- | +| `enable` | Specifies whether to enable GPU resources. | Yes | +| `cache_capacity` | Size of GPU memory per card used for cache in GBs. | Yes | +| `search_resources` | GPU devices used for search computation, must be in format `gpux`. | Yes | +| `build_index_resources` | GPU devices used for index building, must be in format `gpux`. | Yes | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | #### Example ##### Request ```shell -$ curl -X PUT "http://192.168.1.65:19121/config/gpu_resources" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"enable\":true,\"cache_capacity\":1,\"search_resources\":[\"GPU0\"],\"build_index_resources\":[\"GPU0\"]}" +$ curl -X PUT "http://127.0.0.1:19121/config/gpu_resources" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"enable\":true,\"cache_capacity\":1,\"search_resources\":[\"GPU0\"],\"build_index_resources\":[\"GPU0\"]}" ``` ##### Response ```json -{"message": "OK","code": 0} +{ "message": "OK", "code": 0 } ``` ### `/config/gpu_resources` (OPTIONS) @@ -319,67 +329,78 @@ Use this API for Cross-Origin Resource Sharing (CORS). #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/config/gpu_resources` | -| Header | N/A | -| Body | N/A | -| Method | OPTIONS | +| Request Component | Value | +| ----------------- | ----------------------- | +| Name | `/config/gpu_resources` | +| Header | N/A | +| Body | N/A | +| Method | OPTIONS | #### Example ##### Request ```shell -$ curl -X OPTIONS "http://192.168.1.65:19121/config/gpu_resources" +$ curl -X OPTIONS "http://127.0.0.1:19121/config/gpu_resources" ``` -### `/tables` (GET) +### `/collections` (GET) -Gets all tables starting from `offset` and ends with `page_size`. +Gets all collections starting from `offset` and ends with `page_size`. #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/tables` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | GET | +| Request Component | Value | +| ----------------- | -------------------------- | +| Name | `/collections` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `offset` | Row offset from which the data page starts. The default is 0. | No | -| `page_size` | Size of the data page. The default is 10. | No | - +| Parameter | Description | Required? | +| ----------- | ------------------------------------------------------------- | --------- | +| `offset` | Row offset from which the data page starts. The default is 0. | No | +| `page_size` | Size of the data page. The default is 10. | No | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | #### Example ##### Request ```shell -$ curl -X GET "http://192.168.1.65:19121/tables?offset=0&page_size=1" -H "accept: application/json" +$ curl -X GET "http://127.0.0.1:19121/collections?offset=0&page_size=1" -H "accept: application/json" ``` ##### Response - ```json -{"tables":[{"table_name":"test_table","dimension":1,"index_file_size":10,"metric_type":"L2","count":0,"index":"FLAT","nlist":16384}],"count":58} +{ + "collections": [ + { + "collection_name": "test_collection", + "dimension": 1, + "index_file_size": 10, + "metric_type": "L2", + "count": 0, + "index": "FLAT", + "index_params": {"nlist": 4096} + } + ], + "count": 58 +} ``` -### `/tables` (POST) +### `/collections` (POST) -Creates a table. +Creates a collection. #### Request @@ -389,7 +410,7 @@ Creates a table. Header
accept: application/json
Body

 {
-  "table_name": string,
+  "collection_name": string,
   "dimension": integer($int64),
   "index_file_size": integer($int64),
   "metric_type": string
@@ -401,211 +422,241 @@ Creates a table.
 
 ##### Body Parameters
 
-| Parameter  | Description  |  Required? |
-|-----------------|---|------|
-| `table_name`     |   The name of the table to create, which must be unique within its database.  | Yes   |
-| `dimension`  |  The dimension of the vectors that are to be inserted into the created table. |  Yes  |
-| `index_file_size`    |  Threshold value that triggers index building for raw data files. The default is 1024.   |  No |
-| `metric_type`    |   The method vector distances are compared in Milvus. The default is L2. Currently supported metrics include `L2` (Euclidean distance), `IP` (Inner Product), `HAMMING` (Hamming distance), `JACCARD` (Jaccard distance), and `TANIMOTO` (Tanomoto distance).    |   No  |
+| Parameter         | Description                                                                                                                                                                                                                                                 | Required? |
+| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- |
+| `collection_name` | The name of the collection to create, which must be unique within its database.                                                                                                                                                                             | Yes       |
+| `dimension`       | The dimension of the vectors that are to be inserted into the created collection.                                                                                                                                                                           | Yes       |
+| `index_file_size` | Threshold value that triggers index building for raw data files. The default is 1024.                                                                                                                                                                       | No        |
+| `metric_type`     | The method vector distances are compared in Milvus. The default is L2. Currently supported metrics include `L2` (Euclidean distance), `IP` (Inner Product), `HAMMING` (Hamming distance), `JACCARD` (Jaccard distance), and `TANIMOTO` (Tanomoto distance). | No        |
 
 #### Response
 
-| Status code    | Description |
-|-----------------|---|
-| 201     | Created |
-| 400     | The request is incorrect. Refer to the error message for details. |
+| Status code | Description                                                       |
+| ----------- | ----------------------------------------------------------------- |
+| 201         | Created                                                           |
+| 400         | The request is incorrect. Refer to the error message for details. |
 
 #### Example
 
 ##### Request
 
 ```shell
-$ curl -X POST "http://192.168.1.65:19121/tables" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"table_name\":\"test_table\",\"dimension\":1,\"index_file_size\":10,\"metric_type\":\"L2\"}"
+$ curl -X POST "http://127.0.0.1:19121/collections" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"collection_name\":\"test_collection\",\"dimension\":1,\"index_file_size\":10,\"metric_type\":\"L2\"}"
 ```
 
 ##### Response
 
-
 ```json
-{"message":"OK","code":0}
+{ "message": "OK", "code": 0 }
 ```
 
-### `/tables` (OPTIONS)
+### `/collections` (OPTIONS)
 
 Use this API for Cross-Origin Resource Sharing (CORS).
 
 #### Request
 
-| Request Component     | Value  |
-|-----------------|---|
-| Name     | `/tables`  |
-| Header  | N/A  |
-| Body    |   N/A |
-| Method    |   OPTIONS |
-
+| Request Component | Value          |
+| ----------------- | -------------- |
+| Name              | `/collections` |
+| Header            | N/A            |
+| Body              | N/A            |
+| Method            | OPTIONS        |
 
 #### Example
 
 ##### Request
 
 ```shell
-$ curl -X OPTIONS "http://192.168.1.65:19121/tables"
+$ curl -X OPTIONS "http://127.0.0.1:19121/collections"
 ```
 
-### `/tables/{table_name}` (GET)
+### `/collections/{collection_name}` (GET)
 
-Gets all information about a table by name.
+Gets all information about a collection by name.
 
 #### Request
 
-| Request Component     | Value  |
-|-----------------|---|
-| Name     | `/tables/{table_name}`  |
-| Header  | `accept: application/json`  |
-| Body    |   N/A |
-| Method    |   GET |
-
+| Request Component | Value                            |
+| ----------------- | -------------------------------- |
+| Name              | `/collections/{collection_name}` |
+| Header            | `accept: application/json`       |
+| Body              | N/A                              |
+| Method            | GET                              |
 
 ##### Query Parameters
 
-| Parameter  | Description  |  Required? |
-|-----------------|---|------|
-| `table_name`     | Name of the table.   | Yes   |
-
+| Parameter         | Description                                                                                                                                                                                                                                                                                                               | Required? |
+| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- |
+| `collection_name` | Name of the collection.                                                                                                                                                                                                                                                                                                   | Yes       |
+| `info`            | Type of information to acquire. `info` must either be empty or `stat`. When `info` is empty, Milvus returns collection name, dimension, index file size, metric type, offset, index type, and nlist of the collection. When `info` is `stat`, Milvus returns the collection offset, partition status, and segment status. | No        |
 
 #### Response
 
-| Status code    | Description |
-|-----------------|---|
-| 200     | The request is successful.|
-| 400     | The request is incorrect. Refer to the error message for details. |
-| 404     | The required resource does not exist. |
+| Status code | Description                                                       |
+| ----------- | ----------------------------------------------------------------- |
+| 200         | The request is successful.                                        |
+| 400         | The request is incorrect. Refer to the error message for details. |
+| 404         | The required resource does not exist.                             |
 
 #### Example
 
 ##### Request
 
 ```shell
-$ curl -X GET "http://192.168.1.65:19121/tables/test_table" -H "accept: application/json"
+$ curl -X GET "http://127.0.0.1:19121/collections/test_collection" -H "accept: application/json"
 ```
 
 ##### Response
 
 ```json
-{"table_name":"test_table","dimension":1,"index_file_size":10,"metric_type":"L2","count":0,"index":"FLAT","nlist":16384}
+{
+  "collection_name": "test_collection",
+  "dimension": 1,
+  "index_file_size": 10,
+  "metric_type": "L2",
+  "count": 0,
+  "index": "FLAT",
+  "index_params": {"nprobe":  16384}
+}
 ```
 
-### `/tables/{table_name}` (DELETE)
+##### Request
 
-Drops a table by name.
+```shell
+$ curl -X GET "http://127.0.0.1:19121/collections/test_collection?info=stat" -H "accept: application/json"
+```
+
+##### Response
+
+```json
+{
+  "count": 150000,
+  "partitions_stat": [
+    {
+      "count": 1000,
+      "partition_tag": "_default",
+      "segments_stat": [
+        {
+          "count": 1000,
+          "index": "FLAT",
+          "segment_name": "1583727170217439000",
+          "size": 5284922
+        }
+      ]
+    }
+  ]
+}
+```
+
+### `/collections/{collection_name}` (DELETE)
+
+Drops a collection by name.
 
 #### Request
 
-| Request Component     | Value  |
-|-----------------|-----|
-| Name     | `/tables/{table_name}`  |
-| Header  | `accept: application/json`  |
-| Body    |   N/A |
-| Method    |   DELETE |
+| Request Component | Value                            |
+| ----------------- | -------------------------------- |
+| Name              | `/collections/{collection_name}` |
+| Header            | `accept: application/json`       |
+| Body              | N/A                              |
+| Method            | DELETE                           |
 
 ##### Query Parameters
 
-| Parameter  | Description  |  Required? |
-|-----------------|---|------|
-| `table_name`     | Name of the table.   | Yes   |
+| Parameter         | Description             | Required? |
+| ----------------- | ----------------------- | --------- |
+| `collection_name` | Name of the collection. | Yes       |
 
 #### Response
 
-| Status code    | Description |
-|-----------------|---|
-| 204     | Deleted|
-| 400     | The request is incorrect. Refer to the error message for details. |
-| 404     | The required resource does not exist. |
+| Status code | Description                                                       |
+| ----------- | ----------------------------------------------------------------- |
+| 204         | Deleted                                                           |
+| 400         | The request is incorrect. Refer to the error message for details. |
+| 404         | The required resource does not exist.                             |
 
 #### Example
 
 ##### Request
 
-
 ```shell
-$ curl -X DELETE "http://192.168.1.65:19121/tables/test_table" -H "accept: application/json"
+$ curl -X DELETE "http://127.0.0.1:19121/collections/test_collection" -H "accept: application/json"
 ```
 
 If the deletion is successful, no message will be returned.
 
-### `/tables/{table_name}` (OPTIONS)
+### `/collections/{collection_name}` (OPTIONS)
 
 Use this API for Cross-Origin Resource Sharing (CORS).
 
 #### Request
 
-| Request Component     | Value  |
-|-----------------|-----|
-| Name     | `/tables/{table_name}`  |
-| Header  | N/A  |
-| Body    |   N/A |
-| Method    |   OPTIONS |
+| Request Component | Value                            |
+| ----------------- | -------------------------------- |
+| Name              | `/collections/{collection_name}` |
+| Header            | N/A                              |
+| Body              | N/A                              |
+| Method            | OPTIONS                          |
 
 #### Query Parameters
 
-| Parameter  | Description  |  Required? |
-|-----------------|---|------|
-| `table_name`     | Name of the table.   | Yes   |
-
+| Parameter         | Description             | Required? |
+| ----------------- | ----------------------- | --------- |
+| `collection_name` | Name of the collection. | Yes       |
 
 #### Example
 
 ##### Request
 
 ```shell
-$ curl -X OPTIONS "http://192.168.1.65:19121/tables/test_table"
+$ curl -X OPTIONS "http://127.0.0.1:19121/collections/test_collection"
 ```
 
-### `/tables/{table_name}/indexes` (GET)
+### `/collections/{collection_name}/indexes` (GET)
 
-Gets the index type and nlist of a table.
+Gets the index type and nlist of a collection.
 
 #### Request
 
-| Request Component     | Value  |
-|-----------------|-----|
-| Name     | `/tables/{table_name}/indexes`  |
-| Header  | `accept: application/json`  |
-| Body    |   N/A |
-| Method    |   GET |
+| Request Component | Value                                    |
+| ----------------- | ---------------------------------------- |
+| Name              | `/collections/{collection_name}/indexes` |
+| Header            | `accept: application/json`               |
+| Body              | N/A                                      |
+| Method            | GET                                      |
 
 ##### Query Parameters
 
-| Parameter  | Description  |  Required? |
-|-----------------|---|------|
-| `table_name`     | Name of the table.   | Yes   |
+| Parameter         | Description             | Required? |
+| ----------------- | ----------------------- | --------- |
+| `collection_name` | Name of the collection. | Yes       |
 
 #### Response
 
-| Status code    | Description |
-|-----------------|---|
-| 200     | The request is successful.|
-| 400     | The request is incorrect. Refer to the error message for details. |
-| 404     | The required resource does not exist. |
+| Status code | Description                                                       |
+| ----------- | ----------------------------------------------------------------- |
+| 200         | The request is successful.                                        |
+| 400         | The request is incorrect. Refer to the error message for details. |
+| 404         | The required resource does not exist.                             |
 
 #### Example
 
 ##### Request
 
 ```shell
-$ curl -X GET "http://192.168.1.65:19121/tables/test_table/indexes" -H "accept: application/json"
+$ curl -X GET "http://127.0.0.1:19121/collections/test_collection/indexes" -H "accept: application/json"
 ```
 
 ##### Response
 
-
 ```json
-{"index_type":"FLAT","nlist":16384}
+{ "index_type": "FLAT", "params": { "nlist": 4096 } }
 ```
 
-### `/tables/{table_name}/indexes` (POST)
+### `/collections/{collection_name}/indexes` (POST)
 
-Updates the index type and nlist of a table.
+Updates the index type and nlist of a collection.
 
 #### Request
 
@@ -616,7 +667,9 @@ Updates the index type and nlist of a table.
 Body

 {
   "index_type": string,
-  "nlist": integer($int64)
+  "params": {
+      ......
+  }
 }
 
MethodPOST @@ -625,288 +678,419 @@ Updates the index type and nlist of a table. ##### Body Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `index_type` | The type of indexing method to query the table. Please refer to [Index Types](https://www.milvus.io/docs/reference/index.md) for detailed introduction of supported indexes. The default is "FLAT". | No | -| `nlist` | Number of vector buckets in a file. The default is 16384. | No | +| Parameter | Description | Required? | +| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | +| `index_type` | The type of indexing method to query the collection. Please refer to [Index Types](https://www.milvus.io/docs/reference/index.md) for detailed introduction of supported indexes. The default is "FLAT". | No | +| `params` | The extra params of indexing method to query the collection. Please refer to [Index and search parameters](#Index-and-search-parameters) for detailed introduction of supported indexes. | No | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | #### Response -| Status code | Description | -|-----------------|---| -| 201 | Created | -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | The required resource does not exist. | - -#### Response - -| Status code | Description | -|-----------------|---| -| 201 | Created | -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | The required resource does not exist. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 201 | Created | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | #### Example ##### Request ```shell -$ curl -X POST "http://192.168.1.65:19121/tables/test_table/indexes" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"index_type\":\"FLAT\",\"nlist\":16384}" +$ curl -X POST "http://127.0.0.1:19121/collections/test_collection/indexes" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"index_type\":\"IVFFLAT\",\"params\": {\"nlist\":4096}}" ``` ##### Response ```json -{"message":"OK","code":0} +{ "message": "OK", "code": 0 } ``` -### `/tables/{table_name}/indexes` (DELETE) +### `/collections/{collection_name}/indexes` (DELETE) -Drops an index for a table. +Drops an index for a collection. #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/tables/{table_name}/indexes` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | DELETE | +| Request Component | Value | +| ----------------- | ---------------------------------------- | +| Name | `/collections/{collection_name}/indexes` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | DELETE | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | #### Response -| Status code | Description | -|-----------------|---| -| 204 | Deleted | -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | Resource not available | - +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 204 | Deleted | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | Resource not available | #### Example ##### Request ```shell -$ curl -X DELETE "http://192.168.1.65:19121/tables/test_table/indexes" -H "accept: application/json" +$ curl -X DELETE "http://127.0.0.1:19121/collections/test_collection/indexes" -H "accept: application/json" ``` If the deletion is successful, no message will be returned. - -### `/tables/{table_name}/indexes` (OPTIONS) +### `/collections/{collection_name}/indexes` (OPTIONS) Use this API for Cross-Origin Resource Sharing (CORS). #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/tables/{table_name}/indexes` | -| Header | N/A | -| Body | N/A | -| Method | OPTIONS | +| Request Component | Value | +| ----------------- | ---------------------------------------- | +| Name | `/collections/{collection_name}/indexes` | +| Header | N/A | +| Body | N/A | +| Method | OPTIONS | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | #### Example ##### Request ```shell -$ curl -X OPTIONS "http://192.168.1.65:19121/tables/test_table/indexes" +$ curl -X OPTIONS "http://127.0.0.1:19121/collections/test_collection/indexes" ``` -### `/tables/{table_name}/partitions` (GET) +### `/collections/{collection_name}/partitions` (GET) -Gets all partitions in a table starting from `offset` and ends with `page_size`. +Gets all partitions in a collection starting from `offset` and ends with `page_size`. #### Request -| Request Component | Value | -|-----------------|-----------| -| Name | `/tables/{table_name}/partitions` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | GET | +| Request Component | Value | +| ----------------- | ------------------------------------------- | +| Name | `/collections/{collection_name}/partitions` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | -| `offset` | Row offset from which the data page starts. The default is 0. | No | -| `page_size` | Size of the data page. The default is 10. | No | +| Parameter | Description | Required? | +| ----------------- | ------------------------------------------------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | +| `offset` | Row offset from which the data page starts. The default is 0. | No | +| `page_size` | Size of the data page. The default is 10. | No | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | The required resource does not exist. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | #### Example ##### Request ```shell -$ curl -X GET "http://192.168.1.65:19121/tables/test_table/partitions?offset=0&page_size=3" -H "accept: application/json" +$ curl -X GET "http://127.0.0.1:19121/collections/test_collection/partitions?offset=0&page_size=3" -H "accept: application/json" ``` ##### Response ```json -{"partitions":[{"partition_name":"partition_1","partition_tag":"test_tag"},{"partition_name":"partition_2","partition_tag":"test_2"},{"partition_name":"partition_3","partition_tag":"test_3"}]} +{ + "partitions": [ + { "partition_tag": "_default" }, + { "partition_tag": "test_tag" }, + { "partition_tag": "test_2" } + ], + "count": 10 +} ``` -### `/tables/{table_name}/partitions` (POST) +### `/collections/{collection_name}/partitions` (POST) -Creates a partition in a table. +Creates a partition in a collection. #### Request -| Request Component | Value | -|-----------------|-----------| -| Name | `/tables/{table_name}/partitions` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | POST | +| Request Component | Value | +| ----------------- | ------------------------------------------- | +| Name | `/collections/{collection_name}/partitions` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | POST | #### Response -| Status code | Description | -|-----------------|---| -| 201 | Created | -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | The required resource does not exist. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 201 | Created | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | #### Example ##### Request ```shell -$ curl -X POST "http://192.168.1.65:19121/tables/test_table/partitions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"partition_name\": \"partition_1\",\"partition_tag\": \"test\"}" +$ curl -X POST "http://127.0.0.1:19121/collections/test_collection/partitions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"partition_tag\": \"test\"}" ``` ##### Response ```json -{"message":"OK","code":0} +{ "message": "OK", "code": 0 } ``` -### `/tables/{table_name}/partitions` (OPTIONS) +### `/collections/{collection_name}/partitions` (OPTIONS) Use this API for Cross-Origin Resource Sharing (CORS). #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/tables/{table_name}/partitions` | -| Header | N/A | -| Body | N/A | -| Method | OPTIONS | +| Request Component | Value | +| ----------------- | ------------------------------------------- | +| Name | `/collections/{collection_name}/partitions` | +| Header | N/A | +| Body | N/A | +| Method | OPTIONS | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | - +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | #### Example ##### Request ```shell -$ curl -X OPTIONS "http://192.168.1.65:19121/tables/test_table/partitions" +$ curl -X OPTIONS "http://127.0.0.1:19121/collections/test_collection/partitions" ``` -### `/tables/{table_name}/partitions/{partition_tag}` (DELETE) +### `/collections/{collection_name}/partitions` (DELETE) Deletes a partition by tag. #### Request -| Request Component | Value | -|-----------------|-----------| -| Name | `/tables/{table_name}/partitions/{partition_tag}` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | DELETE | + + + + + + + +
Request ComponentValue
Name
/collections/{collection_name}/partitions
Header
accept: application/json
Body

+{
+  "partition_tag": string
+}
+
MethodPOST
##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table that contains the partition. | Yes | -| `partition_tag` | Tag of the partition to delete. | yes | +| Parameter | Description | Required? | +| ----------------- | --------------------------------------------------- | --------- | +| `collection_name` | Name of the collection that contains the partition. | Yes | +| `partition_tag` | Tag of the partition to delete. | yes | #### Response -| Status code | Description | -|-----------------|---| -| 204 | Deleted | -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | The requested resource does not exist. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 204 | Deleted | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The requested resource does not exist. | #### Example ##### Request ```shell -$ curl -X DELETE "http://192.168.1.65:19121/tables/test_table/partitions/tags_01" -H "accept: application/json" +$ curl -X DELETE "http://127.0.0.1:19121/collections/test_collection/partitions -H "accept: application/json" -d "{\"partition_tag\": \"tags_01\"}" ``` The deletion is successful if no information is returned. -### `/tables/{table_name}/partitions/{partition_tag}` (OPTIONS) +### `/collections/{collection_name}/segments` (GET) -Use this API for Cross-Origin Resource Sharing (CORS). +Gets all segments in a collection starting from `offset` and ends with `page_size`. #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/tables/{table_name}/partitions/{partition_tag}` | -| Header | N/A | -| Body | N/A | -| Method | OPTIONS | +| Request Component | Value | +| ----------------- | ----------------------------------------- | +| Name | `/collections/{collection_name}/segments` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | -| `partition_tag` | Tag of the partition | yes | +| Parameter | Description | Required? | +| ----------------- | ------------------------------------------------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | +| `offset` | Row offset from which the data page starts. The default is 0. | No | +| `page_size` | Size of the data page. The default is 10. | No | + +#### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | #### Example ##### Request ```shell -$ curl -X OPTIONS "http://192.168.1.65:19121/tables/test_table/partitions/tag" +$ curl -X GET "http://127.0.0.1:19121/collections/test_collection/segments?offset=0&page_size=1" -H "accept: application/json" ``` -### `/tables/{table_name}/vectors` (PUT) +##### Response -Searches vectors in a table. +```json +{ + "code": 0, + "message": "OK", + "count": 2, + "segments": [ + { + "count": 10000, + "index": "IVFFLAT", + "partition_tag": "_default", + "segment_name": "1583727470444700000", + "size": 5284922 + } + ] +} +``` + +### `/collections/{collection_name}/segments/{segment_name}/vectors` (GET) + +Gets all vectors of segment in a collection starting from `offset` and ends with `page_size`. + +#### Request + +| Request Component | Value | +| ----------------- | ----------------------------------------- | +| Name | `/collections/{collection_name}/segments` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | + +##### Query Parameters + +| Parameter | Description | Required? | +| ----------------- | ------------------------------------------------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | +| `segment_name` | Name of the segment. | Yes | +| `offset` | Row offset from which the data page starts. The default is 0. | No | +| `page_size` | Size of the data page. The default is 10. | No | + +#### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | + +#### Example + +##### Request + +```shell +$ curl -X GET "http://127.0.0.1:19121/collections/test_collection/segments/1583727470444700000/vectors?offset=0&page_size=1" -H "accept: application/json" +``` + +##### Response + +```json +{ + "code": 0, + "message": "OK", + "count": 2, + "vectors": [ + { + "vector": [0.1], + "id": "1583727470435045000" + } + ] +} +``` + +### `/collections/{collection_name}/segments/{segment_name}/ids` (GET) + +Gets all vector ids of segment in a collection starting from `offset` and ends with `page_size`. + +#### Request + +| Request Component | Value | +| ----------------- | ----------------------------------------- | +| Name | `/collections/{collection_name}/segments` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | + +##### Query Parameters + +| Parameter | Description | Required? | +| ----------------- | ------------------------------------------------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | +| `segment_name` | Name of the segment. | Yes | +| `offset` | Row offset from which the data page starts. The default is 0. | No | +| `page_size` | Size of the data page. The default is 10. | No | + +#### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | + +#### Example + +##### Request + +```shell +$ curl -X GET "http://127.0.0.1:19121/collections/test_collection/segments/1583727470444700000/ids?offset=0&page_size=1" -H "accept: application/json" +``` + +##### Response + +```json +{ + "ids": ["1583727470435045000"], + "count": 10000 +} +``` + +### `/collections/{collection_name}/vectors` (PUT) + +1. Searches vectors in a collection. #### Request @@ -916,12 +1100,15 @@ Searches vectors in a table. Header
accept: application/json
Body

 {
-  "topk": integer($int64),
-  "nprobe": integer($int64),
-  "tags": [string],
-  "file_ids": [string],
-  "records": [[number($float)]],
-  "records_bin": [[number($uint64)]]
+  "search": {
+      "topk": integer($int64),
+      "partition_tags": [string],
+      "file_ids": [string],
+      "vectors": [[number($float/$uint8)]]
+      "params": {
+          "nprobe": 16
+      }
+  }
 }
 
MethodPUT @@ -929,49 +1116,107 @@ Searches vectors in a table. ##### Body Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `topk` | The top k most similar results of each query vector. | Yes | -| `nprobe` | Number of queried vector buckets. | Yes | -| `tags` | Tags of partitions that you need to search. You do not have to specify this value if the table is not partitioned or you wish to search the whole table. | No | -| `file_ids` | IDs of the vector files. You do not have to specify this value if you do not use Milvus in distributed scenarios. Also, if you assign a value to `file_ids`, the value of `tags` is ignored. | No | -| `records` | Numeric vectors to insert to the table. | Yes | -| `records_bin` | Binary vectors to insert to the table. | Yes | - -> Note: Select `records` or `records_bin` depending on the metric used by the table. If the table uses `L2` or `IP`, you must use `records`. If the table uses `HAMMING`, `JACCARD`, or `TANIMOTO`, you must use `records_bin`. +| Parameter | Description | Required? | +| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | +| `topk` | The top k most similar results of each query vector. | Yes | +| `tags` | Tags of partitions that you need to search. You do not have to specify this value if the collection is not partitioned or you wish to search the whole collection. | No | +| `file_ids` | IDs of the vector files. You do not have to specify this value if you do not use Milvus in distributed scenarios. Also, if you assign a value to `file_ids`, the value of `tags` is ignored. | No | +| `vectors` | Vectors to query. | Yes | +| `params` | Extra params for search. Please refer to [Index and search parameters](#Index-and-search-parameters) to get more detail information. | Yes | +> Note: Type of items of vectors depends on the metric used by the collection. If the collection uses `L2` or `IP`, you must use `float`. If the collection uses `HAMMING`, `JACCARD`, or `TANIMOTO`, you must use `uint8`. ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | The required resource does not exist. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | #### Example ##### Request ```shell -$ curl -X PUT "http://192.168.1.65:19121/tables/test_table/vectors" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"topk\":2,\"nprobe\":16,\"records\":[[0.1]]}" +$ curl -X PUT "http://127.0.0.1:19121/collections/test_collection/vectors" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"topk\":2,\"vectors\":[[0.1]], \"params\":{\"nprobe\":16}}" ``` ##### Response ```json -{"num":1,"results":[[{"id":"1578989029645098000","distance":"0.000000"},{"id":"1578989029645098001","distance":"0.010000"}]]} +{ + "num": 1, + "results": [ + [ + { "id": "1578989029645098000", "distance": "0.000000" }, + { "id": "1578989029645098001", "distance": "0.010000" } + ] + ] +} ``` -### `/tables/{table_name}/vectors` (POST) +2. Delete vectors -Inserts vectors to a table. +#### Request + + + + + + + +
Request ComponentValue
Name
/tables/{table_name}/vectors
Header
accept: application/json
Body

+{
+  "delete": {
+     "ids": [$string]
+  }
+}
+
MethodPUT
+ +##### Body Parameters + +| Parameter | Description | Required? | +| --------- | --------------- | --------- | +| ids | IDs of vectors. | Yes | + +##### Query Parameters + +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | + +#### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | + +#### Example + +##### Request + +```shell +$ curl -X PUT "http://127.0.0.1:19121/collections/test_collection/vectors" -H "accept: application/json" -H "Content-Type: application/json" -d "{"delete": {"ids": ["1578989029645098000"]}}" +``` + +##### Response + +```json +{ "code": 0, "message": "success" } +``` + +### `/collections/{collection_name}/vectors` (POST) + +Inserts vectors to a collection. > Note: It is recommended that you do not insert more than 1 million vectors per request. @@ -983,9 +1228,8 @@ Inserts vectors to a table. Header
accept: application/json
Body

 {
-  "tag": string,
-  "records": [[number($float)]],
-  “records_bin”:[[number($uint64)]]
+  "partition_tag": string,
+  "vectors": [[number($float/$uint8)]],
   "ids": [integer($int64)]
 }
 
@@ -994,62 +1238,117 @@ Inserts vectors to a table. ##### Body Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `tag` | Tag of the partition to insert vectors to. | No | -| `records` | Numeric vectors to insert to the table. | Yes | -| `records_bin` | Binary vectors to insert to the table. | Yes | -| `ids` | IDs of the vectors to insert to the table. If you assign IDs to the vectors, you must provide IDs for all vectors in the table. If you do not specify this parameter, Milvus automatically assigns IDs to the vectors. | No | +| Parameter | Description | Required? | +| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | +| `partition_tag` | Tag of the partition to insert vectors to. | No | +| `vectors` | Vectors to insert to the collection. | Yes | +| `ids` | IDs of the vectors to insert to the collection. If you assign IDs to the vectors, you must provide IDs for all vectors in the collection. If you do not specify this parameter, Milvus automatically assigns IDs to the vectors. | No | -> Note: Select `records` or `records_bin` depending on the metric used by the table. If the table uses `L2` or `IP`, you must use `records`. If the table uses `HAMMING`, `JACCARD`, or `TANIMOTO`, you must use `records_bin`. +> Note: Type of items of `vectors` depends on the metric used by the collection. If the collection uses `L2` or `IP`, you must use `float`. If the collection uses `HAMMING`, `JACCARD`, or `TANIMOTO`, you must use `uint8`. ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `table_name` | Name of the table. | Yes | +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | #### Response -| Status code | Description | -|-----------------|---| -| 201 | Created | -| 400 | The request is incorrect. Refer to the error message for details. | -| 404 | The required resource does not exist. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 201 | Created | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | #### Example ##### Request ```shell -$ curl -X POST "http://192.168.1.65:19121/tables/test_table/vectors" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"records\":[[0.1],[0.2],[0.3],[0.4]]}" +$ curl -X POST "http://127.0.0.1:19121/collections/test_collection/vectors" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"vectors\":[[0.1],[0.2],[0.3],[0.4]]}" ``` ##### Response ```json -{"ids":["1578989029645098000","1578989029645098001","1578989029645098002","1578989029645098003"]} +{ + "ids": [ + "1578989029645098000", + "1578989029645098001", + "1578989029645098002", + "1578989029645098003" + ] +} ``` -### `/tables/{table_name}/vectors` (OPTIONS) +### `/collections/{collection_name}/vectors?id={vector_id}` (GET) -Use this API for Cross-Origin Resource Sharing (CORS). +Obtain a vector to by ID. #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/tables/{table_name}/vectors` | -| Header | N/A | -| Body | N/A | -| Method | OPTIONS | +| Request Component | Value | +| ----------------- | ---------------------------------------- | +| Name | `/collections/{collection_name}/vectors` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | + +#### Query Parameters + +| Parameter | Description | Required? | +| ----------------- | ----------------------- | --------- | +| `collection_name` | Name of the collection. | Yes | +| `vector_id` | Vector id. | Yes | + +#### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 201 | Created | +| 400 | The request is incorrect. Refer to the error message for details. | +| 404 | The required resource does not exist. | #### Example ##### Request ```shell -$ curl -X OPTIONS "http://192.168.1.65:19121/tables/test_table/vectors" +$ curl -X POST "http://127.0.0.1:19121/collections/test_collection/vectors?id=1578989029645098000" -H "accept: application/json" -H "Content-Type: application/json" +``` + +##### Response + +```json +{ + "vectors": [ + { + "id": "1578989029645098000", + "vector": [0.1] + } + ] +} +``` + +### `/collections/{collection_name}/vectors` (OPTIONS) + +Use this API for Cross-Origin Resource Sharing (CORS). + +#### Request + +| Request Component | Value | +| ----------------- | ---------------------------------------- | +| Name | `/collections/{collection_name}/vectors` | +| Header | N/A | +| Body | N/A | +| Method | OPTIONS | + +#### Example + +##### Request + +```shell +$ curl -X OPTIONS "http://127.0.0.1:19121/collections/test_collection/vectors" ``` ### `/system/{msg}` (GET) @@ -1058,72 +1357,229 @@ Gets information about the Milvus server. #### Request -| Request Component | Value | -|-----------------|---| -| Name | `/system/{msg}` | -| Header | `accept: application/json` | -| Body | N/A | -| Method | GET | +| Request Component | Value | +| ----------------- | -------------------------- | +| Name | `/system/{msg}` | +| Header | `accept: application/json` | +| Body | N/A | +| Method | GET | ##### Query Parameters -| Parameter | Description | Required? | -|-----------------|---|------| -| `msg` | Type of the message to return. You can use `status` or `version`. | Yes | +| Parameter | Description | Required? | +| --------- | ----------------------------------------------------------------- | --------- | +| `msg` | Type of the message to return. You can use `status` or `version`. | Yes | #### Response -| Status code | Description | -|-----------------|---| -| 200 | The request is successful.| -| 400 | The request is incorrect. Refer to the error message for details. | +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | #### Example ##### Request ```shell -$ curl -X GET "http://192.168.1.65:19121/system/version" -H "accept: application/json" +$ curl -X GET "http://127.0.0.1:19121/system/version" -H "accept: application/json" ``` ##### Response ```json -{"reply":"0.6.0"} +{ "reply": "0.7.0" } ``` +### `system/{op}` (PUT) + +#### Flush a collection + +##### Request + + + + + + + +
Request ComponentValue
Name
/system/task
Header
accept: application/json
Body

+{
+  "flush": {
+     "collection_names": [$string]
+  }
+}
+
MethodPUT
+ +##### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | + +##### Example + +###### Request + +```shell +$ curl -X PUT "http://127.0.0.1:19121/system/task" -H "accept: application/json" -d "{\"flush\": {\"collection_names\": [\"test_collection\"]}}" +``` + +###### Response + +```json +{ "code": 0, "message": "success" } +``` + +#### Compact segments in a collection + +##### Request + + + + + + + +
Request ComponentValue
Name
/system/task
Header
accept: application/json
Body

+{
+  "compact": {
+     "collection_name": $string
+  }
+}
+
MethodPUT
+ +##### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | + +##### Example + +###### Request + +```shell +$ curl -X PUT "http://127.0.0.1:19121/system/task" -H "accept: application/json" -d "{\"compact\": {\"collection_name\": \"test_collection\"}}" +``` + +###### Response + +```json +{ "code": 0, "message": "success" } +``` + +#### Load a collection to memory + +##### Request + + + + + + + +
Request ComponentValue
Name
/system/task
Header
accept: application/json
Body

+{
+  "load": {
+     "collection_name": $string
+  }
+}
+
MethodPUT
+ +##### Response + +| Status code | Description | +| ----------- | ----------------------------------------------------------------- | +| 200 | The request is successful. | +| 400 | The request is incorrect. Refer to the error message for details. | + +##### Example + +###### Request + +```shell +$ curl -X PUT "http://127.0.0.1:19121/system/task" -H "accept: application/json" -d "{\"load\": {\"collection_name\": \"test_collection\"}}" +``` + +###### Response + +```json +{ "code": 0, "message": "success" } +``` + +## Index and search parameters + +For each index type, the RESTful API has specific index parameters and search parameters. + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Index typeCreate index paramSearch param
IVFFLAT
{"nlist": $int}
{"nprobe": $int}
IVFPQ
{"m": $int, "nlist": $int}
{"nprobe": $int}
IVFSQ8
{"nlist": $int}
{"nprobe": $int}
IVFSQ8H
{"nlist": $int}
{"nprobe": $int}
HNSW
{"M": $int, "efConstruction": $int}
{"ef": $int}
+ +For detailed information about the parameters above, refer to [Index Types](https://milvus.io/docs/v0.7.0/reference/index.md) + ## Error Codes The RESTful API returns error messages as JSON text. Each type of error message has a specific error code. -| Type | Code | -|----------|------| -SUCCESS | 0 | -UNEXPECTED_ERROR | 1 | -CONNECT_FAILED | 2 | -PERMISSION_DENIED | 3 | -TABLE_NOT_EXISTS | 4 | -ILLEGAL_ARGUMENT | 5 | -ILLEGAL_RANGE | 6 | -ILLEGAL_DIMENSION | 7 | -ILLEGAL_INDEX_TYPE | 8 | -ILLEGAL_TABLE_NAME | 9 | -ILLEGAL_TOPK | 10 | -ILLEGAL_ROWRECORD | 11 | -ILLEGAL_VECTOR_ID | 12 | -ILLEGAL_SEARCH_RESULT | 13 | -FILE_NOT_FOUND | 14 | -META_FAILED | 15 | -CACHE_FAILED | 16 | -CANNOT_CREATE_FOLDER | 17 | -CANNOT_CREATE_FILE | 18 | -CANNOT_DELETE_FOLDER | 19 | -CANNOT_DELETE_FILE | 20 | -BUILD_INDEX_ERROR | 21 | -ILLEGAL_NLIST | 22 | -ILLEGAL_METRIC_TYPE | 23 | -OUT_OF_MEMORY | 24 | -PATH_PARAM_LOSS | 31 | -QUERY_PARAM_LOSS | 32 | -BODY_FIELD_LOSS | 33 | -ILLEGAL_QUERY_PARAM | 36 | +| Type | Code | +| --------------------- | ---- | +| SUCCESS | 0 | +| UNEXPECTED_ERROR | 1 | +| CONNECT_FAILED | 2 | +| PERMISSION_DENIED | 3 | +| TABLE_NOT_EXISTS | 4 | +| ILLEGAL_ARGUMENT | 5 | +| ILLEGAL_RANGE | 6 | +| ILLEGAL_DIMENSION | 7 | +| ILLEGAL_INDEX_TYPE | 8 | +| ILLEGAL_TABLE_NAME | 9 | +| ILLEGAL_TOPK | 10 | +| ILLEGAL_ROWRECORD | 11 | +| ILLEGAL_VECTOR_ID | 12 | +| ILLEGAL_SEARCH_RESULT | 13 | +| FILE_NOT_FOUND | 14 | +| META_FAILED | 15 | +| CACHE_FAILED | 16 | +| CANNOT_CREATE_FOLDER | 17 | +| CANNOT_CREATE_FILE | 18 | +| CANNOT_DELETE_FOLDER | 19 | +| CANNOT_DELETE_FILE | 20 | +| BUILD_INDEX_ERROR | 21 | +| ILLEGAL_NLIST | 22 | +| ILLEGAL_METRIC_TYPE | 23 | +| OUT_OF_MEMORY | 24 | +| PATH_PARAM_LOSS | 31 | +| UNKNOWN_PATH | 32 | +| QUERY_PARAM_LOSS | 33 | +| BODY_FIELD_LOSS | 34 | +| ILLEGAL_BODY | 35 | +| BODY_PARSE_FAIL | 36 | +| ILLEGAL_QUERY_PARAM | 37 | diff --git a/core/src/server/web_impl/controller/WebController.hpp b/core/src/server/web_impl/controller/WebController.hpp index 55a51168a3..f326578405 100644 --- a/core/src/server/web_impl/controller/WebController.hpp +++ b/core/src/server/web_impl/controller/WebController.hpp @@ -617,10 +617,7 @@ class WebController : public oatpp::web::server::api::ApiController { } ADD_CORS(VectorsOp) - /************* - * Search - * Delete by ID - * */ + ENDPOINT("PUT", "/collections/{collection_name}/vectors", VectorsOp, PATH(String, collection_name), BODY_STRING(String, body)) { TimeRecorder tr(std::string(WEB_LOG_PREFIX) + "PUT \'/collections/" + collection_name->std_str() + "/vectors\'"); @@ -648,6 +645,12 @@ class WebController : public oatpp::web::server::api::ApiController { return response; } + ADD_CORS(SystemOptions) + + ENDPOINT("OPTIONS", "/system/{info}", SystemOptions) { + return createResponse(Status::CODE_204, "No Content"); + } + ADD_CORS(SystemInfo) ENDPOINT("GET", "/system/{info}", SystemInfo, PATH(String, info), QUERIES(const QueryParams&, query_params)) { diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index 84f260c508..50a6536fd2 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -174,14 +174,14 @@ ValidationUtil::ValidateIndexParams(const milvus::json& index_params, const engi case (int32_t)engine::EngineType::FAISS_IVFSQ8: case (int32_t)engine::EngineType::FAISS_IVFSQ8H: case (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT: { - auto status = CheckParameterRange(index_params, knowhere::IndexParams::nlist, 0, 999999, false); + auto status = CheckParameterRange(index_params, knowhere::IndexParams::nlist, 1, 999999); if (!status.ok()) { return status; } break; } case (int32_t)engine::EngineType::FAISS_PQ: { - auto status = CheckParameterRange(index_params, knowhere::IndexParams::nlist, 0, 999999, false); + auto status = CheckParameterRange(index_params, knowhere::IndexParams::nlist, 1, 999999); if (!status.ok()) { return status; } @@ -254,7 +254,7 @@ ValidationUtil::ValidateSearchParams(const milvus::json& search_params, const en break; } case (int32_t)engine::EngineType::HNSW: { - auto status = CheckParameterRange(search_params, knowhere::IndexParams::ef, topk, 1000); + auto status = CheckParameterRange(search_params, knowhere::IndexParams::ef, topk, 4096); if (!status.ok()) { return status; } @@ -264,12 +264,6 @@ ValidationUtil::ValidateSearchParams(const milvus::json& search_params, const en return Status::OK(); } -bool -ValidationUtil::IsBinaryIndexType(int32_t index_type) { - return (index_type == static_cast(engine::EngineType::FAISS_BIN_IDMAP)) || - (index_type == static_cast(engine::EngineType::FAISS_BIN_IVFFLAT)); -} - Status ValidationUtil::ValidateTableIndexFileSize(int64_t index_file_size) { if (index_file_size <= 0 || index_file_size > INDEX_FILE_SIZE_LIMIT) { @@ -294,13 +288,6 @@ ValidationUtil::ValidateTableIndexMetricType(int32_t metric_type) { return Status::OK(); } -bool -ValidationUtil::IsBinaryMetricType(int32_t metric_type) { - return (metric_type == static_cast(engine::MetricType::HAMMING)) || - (metric_type == static_cast(engine::MetricType::JACCARD)) || - (metric_type == static_cast(engine::MetricType::TANIMOTO)); -} - Status ValidationUtil::ValidateSearchTopk(int64_t top_k, const engine::meta::TableSchema& table_schema) { if (top_k <= 0 || top_k > 2048) { diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 4fa2df1619..481cb31f1b 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -43,18 +43,12 @@ class ValidationUtil { ValidateSearchParams(const milvus::json& search_params, const engine::meta::TableSchema& table_schema, int64_t topk); - static bool - IsBinaryIndexType(int32_t index_type); - static Status ValidateTableIndexFileSize(int64_t index_file_size); static Status ValidateTableIndexMetricType(int32_t metric_type); - static bool - IsBinaryMetricType(int32_t metric_type); - static Status ValidateSearchTopk(int64_t top_k, const engine::meta::TableSchema& table_schema); diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index db06504def..115568ba91 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -225,7 +225,9 @@ NSGConfAdapter::CheckTrain(milvus::json& oricfg) { // auto tune params oricfg[knowhere::IndexParams::nlist] = MatchNlist(oricfg[knowhere::meta::ROWS].get(), 8192, 8192); - oricfg[knowhere::IndexParams::nprobe] = int(oricfg[knowhere::IndexParams::nlist].get() * 0.01); + + int64_t nprobe = int(oricfg[knowhere::IndexParams::nlist].get() * 0.1); + oricfg[knowhere::IndexParams::nprobe] = nprobe < 1 ? 1 : nprobe; return true; } diff --git a/core/src/wrapper/VecIndex.h b/core/src/wrapper/VecIndex.h index f7cf50b5bc..7a98e971fe 100644 --- a/core/src/wrapper/VecIndex.h +++ b/core/src/wrapper/VecIndex.h @@ -12,10 +12,10 @@ #pragma once #include -#include #include #include +#include #include #include @@ -180,13 +180,14 @@ class VecIndex : public cache::DataObj { virtual Status SetBlacklist(faiss::ConcurrentBitsetPtr list) { - ENGINE_LOG_ERROR << "SetBlacklist not support"; + // ENGINE_LOG_ERROR << "SetBlacklist not support"; return Status::OK(); } virtual Status GetBlacklist(faiss::ConcurrentBitsetPtr& list) { - ENGINE_LOG_ERROR << "GetBlacklist not support"; + // ENGINE_LOG_ERROR << "GetBlacklist not support"; + ENGINE_LOG_WARNING << "Deletion on unsupported index type"; return Status::OK(); } diff --git a/core/unittest/db/test_db.cpp b/core/unittest/db/test_db.cpp index 459fa24aed..41946003a5 100644 --- a/core/unittest/db/test_db.cpp +++ b/core/unittest/db/test_db.cpp @@ -836,15 +836,15 @@ TEST_F(DBTest, PARTITION_TEST) { ASSERT_TRUE(stat.ok()); fiu_init(0); - FIU_ENABLE_FIU("DBImpl.BuildTableIndexRecursively.fail_build_table_Index_for_partition"); + FIU_ENABLE_FIU("DBImpl.WaitTableIndexRecursively.fail_build_table_Index_for_partition"); stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_FALSE(stat.ok()); - fiu_disable("DBImpl.BuildTableIndexRecursively.fail_build_table_Index_for_partition"); + fiu_disable("DBImpl.WaitTableIndexRecursively.fail_build_table_Index_for_partition"); - FIU_ENABLE_FIU("DBImpl.BuildTableIndexRecursively.not_empty_err_msg"); + FIU_ENABLE_FIU("DBImpl.WaitTableIndexRecursively.not_empty_err_msg"); stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_FALSE(stat.ok()); - fiu_disable("DBImpl.BuildTableIndexRecursively.not_empty_err_msg"); + fiu_disable("DBImpl.WaitTableIndexRecursively.not_empty_err_msg"); uint64_t row_count = 0; stat = db_->GetTableRowCount(TABLE_NAME, row_count); diff --git a/core/unittest/db/test_delete.cpp b/core/unittest/db/test_delete.cpp index 4d6051455d..97c91fab64 100644 --- a/core/unittest/db/test_delete.cpp +++ b/core/unittest/db/test_delete.cpp @@ -68,7 +68,7 @@ TEST_F(DeleteTest, delete_in_mem) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -81,7 +81,7 @@ TEST_F(DeleteTest, delete_in_mem) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -105,7 +105,7 @@ TEST_F(DeleteTest, delete_in_mem) { ids_to_delete.emplace_back(kv.first); } - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); ASSERT_TRUE(stat.ok()); // std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk @@ -113,7 +113,7 @@ TEST_F(DeleteTest, delete_in_mem) { ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb - search_vectors.size()); @@ -124,7 +124,8 @@ TEST_F(DeleteTest, delete_in_mem) { std::vector tags; milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); ASSERT_NE(result_ids[0], pair.first); // ASSERT_LT(result_distances[0], 1e-4); ASSERT_GT(result_distances[0], 1); @@ -136,7 +137,7 @@ TEST_F(DeleteTest, delete_on_disk) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -149,7 +150,7 @@ TEST_F(DeleteTest, delete_on_disk) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -173,7 +174,7 @@ TEST_F(DeleteTest, delete_on_disk) { ASSERT_TRUE(stat.ok()); for (auto& kv : search_vectors) { - stat = db_->DeleteVector(GetTableName(), kv.first); + stat = db_->DeleteVector(table_info.table_id_, kv.first); ASSERT_TRUE(stat.ok()); } @@ -181,7 +182,7 @@ TEST_F(DeleteTest, delete_on_disk) { ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb - search_vectors.size()); @@ -192,7 +193,8 @@ TEST_F(DeleteTest, delete_on_disk) { std::vector tags; milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); ASSERT_NE(result_ids[0], pair.first); // ASSERT_LT(result_distances[0], 1e-4); ASSERT_GT(result_distances[0], 1); @@ -204,7 +206,7 @@ TEST_F(DeleteTest, delete_multiple_times) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -217,7 +219,7 @@ TEST_F(DeleteTest, delete_multiple_times) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -243,7 +245,7 @@ TEST_F(DeleteTest, delete_multiple_times) { int topk = 10, nprobe = 10; for (auto& pair : search_vectors) { std::vector to_delete{pair.first}; - stat = db_->DeleteVectors(GetTableName(), to_delete); + stat = db_->DeleteVectors(table_info.table_id_, to_delete); ASSERT_TRUE(stat.ok()); stat = db_->Flush(); @@ -254,20 +256,21 @@ TEST_F(DeleteTest, delete_multiple_times) { std::vector tags; milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); ASSERT_NE(result_ids[0], pair.first); // ASSERT_LT(result_distances[0], 1e-4); ASSERT_GT(result_distances[0], 1); } } -TEST_F(DeleteTest, delete_with_index) { +TEST_F(DeleteTest, delete_before_create_index) { milvus::engine::meta::TableSchema table_info = BuildTableSchema(); table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT; auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -280,7 +283,83 @@ TEST_F(DeleteTest, delete_with_index) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); + ASSERT_TRUE(stat.ok()); + + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, nb - 1); + + int64_t num_query = 10; + std::map search_vectors; + for (int64_t i = 0; i < num_query; ++i) { + int64_t index = dis(gen); + milvus::engine::VectorsData search; + search.vector_count_ = 1; + for (int64_t j = 0; j < TABLE_DIM; j++) { + search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]); + } + search_vectors.insert(std::make_pair(xb.id_array_[index], search)); + } + + milvus::engine::IDNumbers ids_to_delete; + for (auto& kv : search_vectors) { + ids_to_delete.emplace_back(kv.first); + } + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); + + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + milvus::engine::TableIndex index; + index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; + index.extra_params_ = {{"nlist", 100}}; + stat = db_->CreateIndex(table_info.table_id_, index); + ASSERT_TRUE(stat.ok()); + + uint64_t row_count; + stat = db_->GetTableRowCount(table_info.table_id_, row_count); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(row_count, nb - ids_to_delete.size()); + + int topk = 10, nprobe = 10; + for (auto& pair : search_vectors) { + auto& search = pair.second; + + std::vector tags; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); + ASSERT_NE(result_ids[0], pair.first); + // ASSERT_LT(result_distances[0], 1e-4); + ASSERT_GT(result_distances[0], 1); + } +} + +TEST_F(DeleteTest, delete_with_index) { + milvus::engine::meta::TableSchema table_info = BuildTableSchema(); + table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT; + auto stat = db_->CreateTable(table_info); + + milvus::engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = table_info.table_id_; + stat = db_->DescribeTable(table_info_get); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + int64_t nb = 10000; + milvus::engine::VectorsData xb; + BuildVectors(nb, xb); + + for (int64_t i = 0; i < nb; i++) { + xb.id_array_.push_back(i); + } + + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -302,7 +381,7 @@ TEST_F(DeleteTest, delete_with_index) { milvus::engine::TableIndex index; index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; index.extra_params_ = {{"nlist", 100}}; - stat = db_->CreateIndex(GetTableName(), index); + stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_TRUE(stat.ok()); // std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk @@ -313,13 +392,13 @@ TEST_F(DeleteTest, delete_with_index) { for (auto& kv : search_vectors) { ids_to_delete.emplace_back(kv.first); } - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); stat = db_->Flush(); ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb - ids_to_delete.size()); @@ -330,7 +409,86 @@ TEST_F(DeleteTest, delete_with_index) { std::vector tags; milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); + ASSERT_NE(result_ids[0], pair.first); + // ASSERT_LT(result_distances[0], 1e-4); + ASSERT_GT(result_distances[0], 1); + } +} + +TEST_F(DeleteTest, delete_multiple_times_with_index) { + milvus::engine::meta::TableSchema table_info = BuildTableSchema(); + auto stat = db_->CreateTable(table_info); + + milvus::engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = table_info.table_id_; + stat = db_->DescribeTable(table_info_get); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + int64_t nb = 100000; + milvus::engine::VectorsData xb; + BuildVectors(nb, xb); + + for (int64_t i = 0; i < nb; i++) { + xb.id_array_.push_back(i); + } + + stat = db_->InsertVectors(table_info.table_id_, "", xb); + ASSERT_TRUE(stat.ok()); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, nb - 1); + + int64_t num_query = 10; + std::map search_vectors; + for (int64_t i = 0; i < num_query; ++i) { + int64_t index = dis(gen); + milvus::engine::VectorsData search; + search.vector_count_ = 1; + for (int64_t j = 0; j < TABLE_DIM; j++) { + search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]); + } + search_vectors.insert(std::make_pair(xb.id_array_[index], search)); + } + + // std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + milvus::engine::TableIndex index; + index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFFLAT; + index.extra_params_ = {{"nlist", 1}}; + stat = db_->CreateIndex(table_info.table_id_, index); + ASSERT_TRUE(stat.ok()); + + int topk = 10, nprobe = 10; + int deleted = 0; + for (auto& pair : search_vectors) { + std::vector to_delete{pair.first}; + stat = db_->DeleteVectors(table_info.table_id_, to_delete); + ASSERT_TRUE(stat.ok()); + + stat = db_->Flush(); + ASSERT_TRUE(stat.ok()); + + ++deleted; + + uint64_t row_count; + stat = db_->GetTableRowCount(table_info.table_id_, row_count); + ASSERT_TRUE(stat.ok()); + ASSERT_EQ(row_count, nb - deleted); + + auto& search = pair.second; + + std::vector tags; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_distances; + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, search, result_ids, + result_distances); + ASSERT_TRUE(stat.ok()); ASSERT_NE(result_ids[0], pair.first); // ASSERT_LT(result_distances[0], 1e-4); ASSERT_GT(result_distances[0], 1); @@ -342,7 +500,7 @@ TEST_F(DeleteTest, delete_single_vector) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -351,21 +509,21 @@ TEST_F(DeleteTest, delete_single_vector) { milvus::engine::VectorsData xb; BuildVectors(nb, xb); - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); // std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk stat = db_->Flush(); ASSERT_TRUE(stat.ok()); - stat = db_->DeleteVectors(GetTableName(), xb.id_array_); + stat = db_->DeleteVectors(table_info.table_id_, xb.id_array_); ASSERT_TRUE(stat.ok()); stat = db_->Flush(); ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, 0); @@ -375,7 +533,7 @@ TEST_F(DeleteTest, delete_single_vector) { std::vector tags; milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, xb, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, xb, result_ids, result_distances); ASSERT_TRUE(result_ids.empty()); ASSERT_TRUE(result_distances.empty()); // ASSERT_EQ(result_ids[0], -1); @@ -388,7 +546,7 @@ TEST_F(DeleteTest, delete_add_create_index) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -397,7 +555,7 @@ TEST_F(DeleteTest, delete_add_create_index) { milvus::engine::VectorsData xb; BuildVectors(nb, xb); - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); // stat = db_->Flush(); @@ -405,27 +563,27 @@ TEST_F(DeleteTest, delete_add_create_index) { milvus::engine::TableIndex index; index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; index.extra_params_ = {{"nlist", 100}}; - stat = db_->CreateIndex(GetTableName(), index); + stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_TRUE(stat.ok()); std::vector ids_to_delete; ids_to_delete.emplace_back(xb.id_array_.front()); - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); ASSERT_TRUE(stat.ok()); milvus::engine::VectorsData xb2 = xb; xb2.id_array_.clear(); // same vector, different id - stat = db_->InsertVectors(GetTableName(), "", xb2); + stat = db_->InsertVectors(table_info.table_id_, "", xb2); ASSERT_TRUE(stat.ok()); // stat = db_->Flush(); // ASSERT_TRUE(stat.ok()); - stat = db_->CreateIndex(GetTableName(), index); + stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb * 2 - 1); @@ -439,15 +597,15 @@ TEST_F(DeleteTest, delete_add_create_index) { qb.float_data_.resize(TABLE_DIM); qb.vector_count_ = 1; qb.id_array_.clear(); - stat = db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, qb, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, qb, result_ids, result_distances); ASSERT_EQ(result_ids[0], xb2.id_array_.front()); ASSERT_LT(result_distances[0], 1e-4); result_ids.clear(); result_distances.clear(); - stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, ids_to_delete.front(), result_ids, - result_distances); + stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, ids_to_delete.front(), + result_ids, result_distances); ASSERT_EQ(result_ids[0], -1); ASSERT_EQ(result_distances[0], std::numeric_limits::max()); } @@ -457,7 +615,7 @@ TEST_F(DeleteTest, delete_add_auto_flush) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -466,7 +624,7 @@ TEST_F(DeleteTest, delete_add_auto_flush) { milvus::engine::VectorsData xb; BuildVectors(nb, xb); - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::this_thread::sleep_for(std::chrono::seconds(2)); @@ -475,28 +633,28 @@ TEST_F(DeleteTest, delete_add_auto_flush) { // ASSERT_TRUE(stat.ok()); // milvus::engine::TableIndex index; // index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; - // stat = db_->CreateIndex(GetTableName(), index); + // stat = db_->CreateIndex(table_info.table_id_, index); // ASSERT_TRUE(stat.ok()); std::vector ids_to_delete; ids_to_delete.emplace_back(xb.id_array_.front()); - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); ASSERT_TRUE(stat.ok()); milvus::engine::VectorsData xb2 = xb; xb2.id_array_.clear(); // same vector, different id - stat = db_->InsertVectors(GetTableName(), "", xb2); + stat = db_->InsertVectors(table_info.table_id_, "", xb2); ASSERT_TRUE(stat.ok()); std::this_thread::sleep_for(std::chrono::seconds(2)); // stat = db_->Flush(); // ASSERT_TRUE(stat.ok()); - // stat = db_->CreateIndex(GetTableName(), index); + // stat = db_->CreateIndex(table_info.table_id_, index); // ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb * 2 - 1); @@ -510,15 +668,15 @@ TEST_F(DeleteTest, delete_add_auto_flush) { qb.float_data_.resize(TABLE_DIM); qb.vector_count_ = 1; qb.id_array_.clear(); - stat = db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, qb, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, qb, result_ids, result_distances); ASSERT_EQ(result_ids[0], xb2.id_array_.front()); ASSERT_LT(result_distances[0], 1e-4); result_ids.clear(); result_distances.clear(); - stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, nprobe, ids_to_delete.front(), result_ids, - result_distances); + stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, {{"nprobe", nprobe}}, ids_to_delete.front(), + result_ids, result_distances); ASSERT_EQ(result_ids[0], -1); ASSERT_EQ(result_distances[0], std::numeric_limits::max()); } @@ -528,7 +686,7 @@ TEST_F(CompactTest, compact_basic) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -537,7 +695,7 @@ TEST_F(CompactTest, compact_basic) { milvus::engine::VectorsData xb; BuildVectors(nb, xb); - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); stat = db_->Flush(); @@ -546,18 +704,18 @@ TEST_F(CompactTest, compact_basic) { std::vector ids_to_delete; ids_to_delete.emplace_back(xb.id_array_.front()); ids_to_delete.emplace_back(xb.id_array_.back()); - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); ASSERT_TRUE(stat.ok()); stat = db_->Flush(); ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb - 2); - stat = db_->Compact(GetTableName()); + stat = db_->Compact(table_info.table_id_); ASSERT_TRUE(stat.ok()); const int topk = 1, nprobe = 1; @@ -569,8 +727,8 @@ TEST_F(CompactTest, compact_basic) { milvus::engine::VectorsData qb = xb; for (auto& id : ids_to_delete) { - stat = - db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, id, result_ids, result_distances); + stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, id, result_ids, + result_distances); ASSERT_EQ(result_ids[0], -1); ASSERT_EQ(result_distances[0], std::numeric_limits::max()); } @@ -583,7 +741,7 @@ TEST_F(CompactTest, compact_with_index) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -597,7 +755,7 @@ TEST_F(CompactTest, compact_with_index) { xb.id_array_.emplace_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -618,7 +776,7 @@ TEST_F(CompactTest, compact_with_index) { milvus::engine::TableIndex index; index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; - stat = db_->CreateIndex(GetTableName(), index); + stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_TRUE(stat.ok()); stat = db_->Flush(); @@ -628,25 +786,25 @@ TEST_F(CompactTest, compact_with_index) { for (auto& kv : search_vectors) { ids_to_delete.emplace_back(kv.first); } - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); stat = db_->Flush(); ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb - ids_to_delete.size()); - stat = db_->Compact(GetTableName()); + stat = db_->Compact(table_info.table_id_); ASSERT_TRUE(stat.ok()); - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb - ids_to_delete.size()); milvus::engine::TableIndex table_index; - stat = db_->DescribeIndex(GetTableName(), table_index); + stat = db_->DescribeIndex(table_info.table_id_, table_index); ASSERT_TRUE(stat.ok()); ASSERT_FLOAT_EQ(table_index.engine_type_, index.engine_type_); @@ -659,8 +817,8 @@ TEST_F(CompactTest, compact_with_index) { std::vector tags; milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = - db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, search, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, search, result_ids, + result_distances); ASSERT_NE(result_ids[0], pair.first); // ASSERT_LT(result_distances[0], 1e-4); ASSERT_GT(result_distances[0], 1); diff --git a/core/unittest/db/test_meta.cpp b/core/unittest/db/test_meta.cpp index 820e459670..a0a81f2e07 100644 --- a/core/unittest/db/test_meta.cpp +++ b/core/unittest/db/test_meta.cpp @@ -383,6 +383,47 @@ TEST_F(MetaTest, TABLE_FILE_TEST) { ASSERT_EQ(table_file.file_type_, new_file_type); } +TEST_F(MetaTest, TABLE_FILE_ROW_COUNT_TEST) { + auto table_id = "row_count_test_table"; + + milvus::engine::meta::TableSchema table; + table.table_id_ = table_id; + table.dimension_ = 256; + auto status = impl_->CreateTable(table); + + milvus::engine::meta::TableFileSchema table_file; + table_file.row_count_ = 100; + table_file.table_id_ = table.table_id_; + table_file.file_type_ = 1; + status = impl_->CreateTableFile(table_file); + + uint64_t cnt = 0; + status = impl_->Count(table_id, cnt); + ASSERT_EQ(table_file.row_count_, cnt); + + table_file.row_count_ = 99999; + milvus::engine::meta::TableFilesSchema table_files = {table_file}; + status = impl_->UpdateTableFilesRowCount(table_files); + ASSERT_TRUE(status.ok()); + + cnt = 0; + status = impl_->Count(table_id, cnt); + ASSERT_EQ(table_file.row_count_, cnt); + + std::vector ids = {table_file.id_}; + milvus::engine::meta::TableFilesSchema schemas; + status = impl_->GetTableFiles(table_id, ids, schemas); + ASSERT_EQ(schemas.size(), 1UL); + ASSERT_EQ(table_file.row_count_, schemas[0].row_count_); + ASSERT_EQ(table_file.file_id_, schemas[0].file_id_); + ASSERT_EQ(table_file.file_type_, schemas[0].file_type_); + ASSERT_EQ(table_file.segment_id_, schemas[0].segment_id_); + ASSERT_EQ(table_file.table_id_, schemas[0].table_id_); + ASSERT_EQ(table_file.engine_type_, schemas[0].engine_type_); + ASSERT_EQ(table_file.dimension_, schemas[0].dimension_); + ASSERT_EQ(table_file.flush_lsn_, schemas[0].flush_lsn_); +} + TEST_F(MetaTest, ARCHIVE_TEST_DAYS) { srand(time(0)); milvus::engine::DBMetaOptions options; diff --git a/core/unittest/db/test_meta_mysql.cpp b/core/unittest/db/test_meta_mysql.cpp index 75937e245d..6411eee53d 100644 --- a/core/unittest/db/test_meta_mysql.cpp +++ b/core/unittest/db/test_meta_mysql.cpp @@ -271,6 +271,47 @@ TEST_F(MySqlMetaTest, TABLE_FILE_TEST) { ASSERT_TRUE(status.ok()); } +TEST_F(MySqlMetaTest, TABLE_FILE_ROW_COUNT_TEST) { + auto table_id = "row_count_test_table"; + + milvus::engine::meta::TableSchema table; + table.table_id_ = table_id; + table.dimension_ = 256; + auto status = impl_->CreateTable(table); + + milvus::engine::meta::TableFileSchema table_file; + table_file.row_count_ = 100; + table_file.table_id_ = table.table_id_; + table_file.file_type_ = 1; + status = impl_->CreateTableFile(table_file); + + uint64_t cnt = 0; + status = impl_->Count(table_id, cnt); + ASSERT_EQ(table_file.row_count_, cnt); + + table_file.row_count_ = 99999; + milvus::engine::meta::TableFilesSchema table_files = {table_file}; + status = impl_->UpdateTableFilesRowCount(table_files); + ASSERT_TRUE(status.ok()); + + cnt = 0; + status = impl_->Count(table_id, cnt); + ASSERT_EQ(table_file.row_count_, cnt); + + std::vector ids = {table_file.id_}; + milvus::engine::meta::TableFilesSchema schemas; + status = impl_->GetTableFiles(table_id, ids, schemas); + ASSERT_EQ(schemas.size(), 1UL); + ASSERT_EQ(table_file.row_count_, schemas[0].row_count_); + ASSERT_EQ(table_file.file_id_, schemas[0].file_id_); + ASSERT_EQ(table_file.file_type_, schemas[0].file_type_); + ASSERT_EQ(table_file.segment_id_, schemas[0].segment_id_); + ASSERT_EQ(table_file.table_id_, schemas[0].table_id_); + ASSERT_EQ(table_file.engine_type_, schemas[0].engine_type_); + ASSERT_EQ(table_file.dimension_, schemas[0].dimension_); + ASSERT_EQ(table_file.flush_lsn_, schemas[0].flush_lsn_); +} + TEST_F(MySqlMetaTest, ARCHIVE_TEST_DAYS) { fiu_init(0); diff --git a/core/unittest/db/test_search_by_id.cpp b/core/unittest/db/test_search_by_id.cpp index a4edb301b0..568626296b 100644 --- a/core/unittest/db/test_search_by_id.cpp +++ b/core/unittest/db/test_search_by_id.cpp @@ -68,7 +68,7 @@ TEST_F(SearchByIdTest, basic) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -81,7 +81,7 @@ TEST_F(SearchByIdTest, basic) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -108,7 +108,8 @@ TEST_F(SearchByIdTest, basic) { milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, i, result_ids, result_distances); + stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, i, result_ids, + result_distances); ASSERT_EQ(result_ids[0], i); ASSERT_LT(result_distances[0], 1e-4); } @@ -119,7 +120,7 @@ TEST_F(SearchByIdTest, with_index) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -132,7 +133,7 @@ TEST_F(SearchByIdTest, with_index) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -153,7 +154,7 @@ TEST_F(SearchByIdTest, with_index) { milvus::engine::TableIndex index; index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; index.extra_params_ = {{"nlist", 10}}; - stat = db_->CreateIndex(GetTableName(), index); + stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_TRUE(stat.ok()); const int topk = 10, nprobe = 10; @@ -165,7 +166,8 @@ TEST_F(SearchByIdTest, with_index) { milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, i, result_ids, result_distances); + stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, i, result_ids, + result_distances); ASSERT_EQ(result_ids[0], i); ASSERT_LT(result_distances[0], 1e-3); } @@ -176,7 +178,7 @@ TEST_F(SearchByIdTest, with_delete) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -189,7 +191,7 @@ TEST_F(SearchByIdTest, with_delete) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -211,7 +213,7 @@ TEST_F(SearchByIdTest, with_delete) { for (auto& id : ids_to_search) { ids_to_delete.emplace_back(id); } - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); stat = db_->Flush(); ASSERT_TRUE(stat.ok()); @@ -225,7 +227,8 @@ TEST_F(SearchByIdTest, with_delete) { milvus::engine::ResultIds result_ids; milvus::engine::ResultDistances result_distances; - stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, i, result_ids, result_distances); + stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, i, result_ids, + result_distances); ASSERT_EQ(result_ids[0], -1); ASSERT_EQ(result_distances[0], std::numeric_limits::max()); } @@ -236,7 +239,7 @@ TEST_F(GetVectorByIdTest, basic) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -249,7 +252,7 @@ TEST_F(GetVectorByIdTest, basic) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -277,11 +280,11 @@ TEST_F(GetVectorByIdTest, basic) { milvus::engine::ResultDistances result_distances; milvus::engine::VectorsData vector; - stat = db_->GetVectorByID(GetTableName(), id, vector); + stat = db_->GetVectorByID(table_info.table_id_, id, vector); ASSERT_TRUE(stat.ok()); - stat = - db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, vector, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, vector, result_ids, + result_distances); ASSERT_TRUE(stat.ok()); ASSERT_EQ(result_ids[0], id); ASSERT_LT(result_distances[0], 1e-4); @@ -293,7 +296,7 @@ TEST_F(GetVectorByIdTest, with_index) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -306,7 +309,7 @@ TEST_F(GetVectorByIdTest, with_index) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -327,7 +330,7 @@ TEST_F(GetVectorByIdTest, with_index) { milvus::engine::TableIndex index; index.extra_params_ = {{"nlist", 10}}; index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8; - stat = db_->CreateIndex(GetTableName(), index); + stat = db_->CreateIndex(table_info.table_id_, index); ASSERT_TRUE(stat.ok()); const int topk = 10, nprobe = 10; @@ -340,11 +343,11 @@ TEST_F(GetVectorByIdTest, with_index) { milvus::engine::ResultDistances result_distances; milvus::engine::VectorsData vector; - stat = db_->GetVectorByID(GetTableName(), id, vector); + stat = db_->GetVectorByID(table_info.table_id_, id, vector); ASSERT_TRUE(stat.ok()); - stat = - db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, vector, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, vector, result_ids, + result_distances); ASSERT_EQ(result_ids[0], id); ASSERT_LT(result_distances[0], 1e-3); } @@ -355,7 +358,7 @@ TEST_F(GetVectorByIdTest, with_delete) { auto stat = db_->CreateTable(table_info); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -368,7 +371,7 @@ TEST_F(GetVectorByIdTest, with_delete) { xb.id_array_.push_back(i); } - stat = db_->InsertVectors(GetTableName(), "", xb); + stat = db_->InsertVectors(table_info.table_id_, "", xb); ASSERT_TRUE(stat.ok()); std::random_device rd; @@ -390,7 +393,7 @@ TEST_F(GetVectorByIdTest, with_delete) { for (auto& id : ids_to_search) { ids_to_delete.emplace_back(id); } - stat = db_->DeleteVectors(GetTableName(), ids_to_delete); + stat = db_->DeleteVectors(table_info.table_id_, ids_to_delete); stat = db_->Flush(); ASSERT_TRUE(stat.ok()); @@ -402,7 +405,7 @@ TEST_F(GetVectorByIdTest, with_delete) { milvus::engine::ResultDistances result_distances; milvus::engine::VectorsData vector; - stat = db_->GetVectorByID(GetTableName(), id, vector); + stat = db_->GetVectorByID(table_info.table_id_, id, vector); ASSERT_TRUE(stat.ok()); ASSERT_TRUE(vector.float_data_.empty()); ASSERT_EQ(vector.vector_count_, 0); @@ -419,7 +422,7 @@ TEST_F(SearchByIdTest, BINARY) { ASSERT_TRUE(stat.ok()); milvus::engine::meta::TableSchema table_info_get; - table_info_get.table_id_ = GetTableName(); + table_info_get.table_id_ = table_info.table_id_; stat = db_->DescribeTable(table_info_get); ASSERT_TRUE(stat.ok()); ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); @@ -445,7 +448,7 @@ TEST_F(SearchByIdTest, BINARY) { vectors.id_array_.emplace_back(k * nb + i); } - stat = db_->InsertVectors(GetTableName(), "", vectors); + stat = db_->InsertVectors(table_info.table_id_, "", vectors); ASSERT_TRUE(stat.ok()); } @@ -465,7 +468,7 @@ TEST_F(SearchByIdTest, BINARY) { ASSERT_TRUE(stat.ok()); uint64_t row_count; - stat = db_->GetTableRowCount(GetTableName(), row_count); + stat = db_->GetTableRowCount(table_info.table_id_, row_count); ASSERT_TRUE(stat.ok()); ASSERT_EQ(row_count, nb * insert_loop); @@ -479,12 +482,12 @@ TEST_F(SearchByIdTest, BINARY) { milvus::engine::ResultDistances result_distances; milvus::engine::VectorsData vector; - stat = db_->GetVectorByID(GetTableName(), id, vector); + stat = db_->GetVectorByID(table_info.table_id_, id, vector); ASSERT_TRUE(stat.ok()); ASSERT_EQ(vector.vector_count_, 1); - stat = - db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, vector, result_ids, result_distances); + stat = db_->Query(dummy_context_, table_info.table_id_, tags, topk, json_params, vector, result_ids, + result_distances); ASSERT_TRUE(stat.ok()); ASSERT_EQ(result_ids[0], id); ASSERT_LT(result_distances[0], 1e-4); @@ -493,8 +496,8 @@ TEST_F(SearchByIdTest, BINARY) { result_ids.clear(); result_distances.clear(); - stat = - db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, id, result_ids, result_distances); + stat = db_->QueryByID(dummy_context_, table_info.table_id_, tags, topk, json_params, id, result_ids, + result_distances); ASSERT_TRUE(stat.ok()); ASSERT_EQ(result_ids[0], id); ASSERT_LT(result_distances[0], 1e-4); diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index c68dce1f0e..d6974b1729 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -685,10 +685,10 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_FAIL_TEST) { milvus::Status s = config.LoadConfigFile(config_path + VALID_CONFIG_FILE); ASSERT_TRUE(s.ok()); - // fiu_enable("check_config_version_fail", 1, NULL, 0); - // s = config.ValidateConfig(); - // ASSERT_FALSE(s.ok()); - // fiu_disable("check_config_version_fail"); + fiu_enable("check_config_version_fail", 1, NULL, 0); + s = config.ValidateConfig(); + ASSERT_FALSE(s.ok()); + fiu_disable("check_config_version_fail"); /* server config */ fiu_enable("check_config_address_fail", 1, NULL, 0); diff --git a/sdk/include/MilvusApi.h b/sdk/include/MilvusApi.h index 743248744c..50f012ff62 100644 --- a/sdk/include/MilvusApi.h +++ b/sdk/include/MilvusApi.h @@ -84,19 +84,19 @@ using TopKQueryResult = std::vector; ///< Topk query result * @brief Index parameters * Note: extra_params is extra parameters list, it must be json format * For different index type, parameter list is different accordingly, for example: - * FLAT/IVFLAT/SQ8: "{nlist: '16384'}" + * FLAT/IVFLAT/SQ8: {nlist: 16384} * ///< nlist range:[1, 999999] - * IVFPQ: "{nlist: '16384', m: "12"}" + * IVFPQ: {nlist: 16384, m: 12} * ///< nlist range:[1, 999999] * ///< m is decided by dim and have a couple of results. - * NSG: "{search_length: '45', out_degree:'50', candidate_pool_size:'300', "knng":'100'}" + * NSG: {search_length: 45, out_degree:50, candidate_pool_size:300, knng:100} * ///< search_length range:[10, 300] * ///< out_degree range:[5, 300] * ///< candidate_pool_size range:[50, 1000] * ///< knng range:[5, 300] - * HNSW "{M: '16', efConstruction:'500'}" + * HNSW {M: 16, efConstruction:300} * ///< M range:[5, 48] - * ///< efConstruction range:[topk, 4096] + * ///< efConstruction range:[100, 500] */ struct IndexParam { std::string collection_name; ///< Collection name for create index @@ -386,12 +386,12 @@ class Connection { * @param extra_params, extra search parameters according to different index type, must be json format. * Note: extra_params is extra parameters list, it must be json format, for example: * For different index type, parameter list is different accordingly - * FLAT/IVFLAT/SQ8/IVFPQ: "{nprobe: '32'}" + * FLAT/IVFLAT/SQ8/IVFPQ: {nprobe: 32} * ///< nprobe range:[1,999999] - * NSG: "{search_length:'100'} + * NSG: {search_length:100} * ///< search_length range:[10, 300] - * HNSW "{ef: '64'} - * ///< ef range:[k, 4096] + * HNSW {ef: 64} + * ///< ef range:[topk, 4096] * @param topk_query_result, result array. * * @return Indicate if query is successful.