diff --git a/CHANGELOG.md b/CHANGELOG.md index bc790e8a61..686e005f7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,16 +12,20 @@ Please mark all change in change log and use the issue from GitHub - \#831 Judge branch error in CommonUtil.cpp - \#977 Server crash when create tables concurrently - \#990 Check gpu resources setting when assign repeated value -- \#995 table count set to 0 if no tables found +- \#995 Table count set to 0 if no tables found - \#1010 Improve error message when offset or page_size is equal 0 - \#1022 Check if partition name is valid - \#1028 check if table exists when show partitions - \#1029 check if table exists when try to delete partition - \#1066 optimize http insert and search speed +- \#1022 Check if partition name is legal +- \#1028 Check if table exists when show partitions +- \#1029 Check if table exists when try to delete partition +- \#1066 Optimize http insert and search speed - \#1067 Add binary vectors support in http server - \#1075 Improve error message when page size or offset is illegal - \#1082 Check page_size or offset value to avoid float -- \#1115 http server support load table into memory +- \#1115 Http server support load table into memory - \#1152 Error log output continuously after server start - \#1211 Server down caused by searching with index_type: HNSW - \#1240 Update license declaration @@ -48,6 +52,7 @@ Please mark all change in change log and use the issue from GitHub - \#1556 Index file not created after table and index created - \#1560 Search crashed with Super-high dimensional binary vector - \#1564 Too low recall for glove-200-angular, ivf_pq index +- \#1571 Meta engine type become IDMAP after dropping index for BINARY table - \#1574 Set all existing bitset in cache when applying deletes - \#1577 Row count incorrect if delete vectors then create index diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 29ed17308a..f83c0d2b5a 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -366,8 +366,8 @@ DBImpl::PreloadTable(const std::string& table_id) { if (file.file_type_ == meta::TableFileSchema::FILE_TYPE::RAW || file.file_type_ == meta::TableFileSchema::FILE_TYPE::TO_INDEX || file.file_type_ == meta::TableFileSchema::FILE_TYPE::BACKUP) { - engine_type = server::ValidationUtil::IsBinaryMetricType(file.metric_type_) ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + engine_type = + utils::IsBinaryMetricType(file.metric_type_) ? EngineType::FAISS_BIN_IDMAP : EngineType::FAISS_IDMAP; } else { engine_type = (EngineType)file.engine_type_; } @@ -731,7 +731,7 @@ DBImpl::Compact(const std::string& table_id) { break; } } else { - ENGINE_LOG_ERROR << "Segment " << file.segment_id_ << " has no deleted data. No need to compact"; + ENGINE_LOG_DEBUG << "Segment " << file.segment_id_ << " has no deleted data. No need to compact"; } } @@ -739,7 +739,7 @@ DBImpl::Compact(const std::string& table_id) { ENGINE_LOG_DEBUG << "Finished compacting table: " << table_id; } - ENGINE_LOG_ERROR << "Updating meta after compaction..."; + ENGINE_LOG_DEBUG << "Updating meta after compaction..."; /* // Drop index again, in case some files were in the index building process during compacting @@ -1019,7 +1019,7 @@ DBImpl::GetVectorByIdHelper(const std::string& table_id, IDNumber vector_id, Vec auto deleted = std::find(deleted_docs.begin(), deleted_docs.end(), offset); if (deleted == deleted_docs.end()) { // Load raw vector - bool is_binary = server::ValidationUtil::IsBinaryMetricType(file.metric_type_); + bool is_binary = utils::IsBinaryMetricType(file.metric_type_); size_t single_vector_bytes = is_binary ? file.dimension_ / 8 : file.dimension_ * sizeof(float); std::vector raw_vector; status = segment_reader.LoadVectors(offset * single_vector_bytes, single_vector_bytes, raw_vector); diff --git a/core/src/db/Utils.cpp b/core/src/db/Utils.cpp index f9ac5dfe40..d184cea085 100644 --- a/core/src/db/Utils.cpp +++ b/core/src/db/Utils.cpp @@ -220,6 +220,19 @@ IsRawIndexType(int32_t type) { return (type == (int32_t)EngineType::FAISS_IDMAP) || (type == (int32_t)EngineType::FAISS_BIN_IDMAP); } +bool +IsBinaryIndexType(int32_t index_type) { + return (index_type == (int32_t)engine::EngineType::FAISS_BIN_IDMAP) || + (index_type == (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT); +} + +bool +IsBinaryMetricType(int32_t metric_type) { + return (metric_type == (int32_t)engine::MetricType::HAMMING) || + (metric_type == (int32_t)engine::MetricType::JACCARD) || + (metric_type == (int32_t)engine::MetricType::TANIMOTO); +} + meta::DateT GetDate(const std::time_t& t, int day_delta) { struct tm ltm; diff --git a/core/src/db/Utils.h b/core/src/db/Utils.h index 88197d7d5c..c78b4fd717 100644 --- a/core/src/db/Utils.h +++ b/core/src/db/Utils.h @@ -48,6 +48,12 @@ IsSameIndex(const TableIndex& index1, const TableIndex& index2); bool IsRawIndexType(int32_t type); +static bool +IsBinaryIndexType(int32_t index_type); + +bool +IsBinaryMetricType(int32_t metric_type); + meta::DateT GetDate(const std::time_t& t, int day_delta = 0); meta::DateT diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 1fe820aa7a..9cb1dfb09f 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -100,9 +100,8 @@ ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string& index_type_(index_type), metric_type_(metric_type), index_params_(index_params) { - EngineType tmp_index_type = server::ValidationUtil::IsBinaryMetricType((int32_t)metric_type) - ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + EngineType tmp_index_type = + utils::IsBinaryMetricType((int32_t)metric_type) ? EngineType::FAISS_BIN_IDMAP : EngineType::FAISS_IDMAP; index_ = CreatetVecIndex(tmp_index_type); if (!index_) { throw Exception(DB_ERROR, "Unsupported index type"); diff --git a/core/src/db/meta/MetaTypes.h b/core/src/db/meta/MetaTypes.h index d8df8c2622..611df80d98 100644 --- a/core/src/db/meta/MetaTypes.h +++ b/core/src/db/meta/MetaTypes.h @@ -54,7 +54,7 @@ struct TableSchema { int64_t flag_ = 0; int64_t index_file_size_ = DEFAULT_INDEX_FILE_SIZE; int32_t engine_type_ = DEFAULT_ENGINE_TYPE; - std::string index_params_ = "{ \"nlist\": 16384 }"; + std::string index_params_ = "{}"; int32_t metric_type_ = DEFAULT_METRIC_TYPE; std::string owner_table_; std::string partition_tag_; diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index b1d28c2d34..bd0d4de96d 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -1326,8 +1326,12 @@ MySQLMetaImpl::DropTableIndex(const std::string& table_id) { } // set table index type to raw - dropTableIndexQuery << "UPDATE " << META_TABLES - << " SET engine_type = " << std::to_string(DEFAULT_ENGINE_TYPE) + dropTableIndexQuery << "UPDATE " << META_TABLES << " SET engine_type = " + << " (CASE" + << " WHEN metric_type in (" << (int32_t)MetricType::HAMMING << " ," + << (int32_t)MetricType::JACCARD << " ," << (int32_t)MetricType::TANIMOTO << ")" + << " THEN " << (int32_t)EngineType::FAISS_BIN_IDMAP << " ELSE " + << (int32_t)EngineType::FAISS_IDMAP << " END)" << " , index_params = '{}'" << " WHERE table_id = " << mysqlpp::quote << table_id << ";"; diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index ca37f2c258..ce5ff99742 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -804,8 +804,18 @@ SqliteMetaImpl::DropTableIndex(const std::string& table_id) { c(&TableFileSchema::file_type_) == (int)TableFileSchema::BACKUP)); // set table index type to raw + auto groups = ConnectorPtr->select(columns(&TableSchema::metric_type_), + where(c(&TableSchema::table_id_) == table_id)); + + int32_t raw_engine_type = DEFAULT_ENGINE_TYPE; + if (groups.size() == 1) { + int32_t metric_type_ = std::get<0>(groups[0]); + if (engine::utils::IsBinaryMetricType(metric_type_)) { + raw_engine_type = (int32_t)EngineType::FAISS_BIN_IDMAP; + } + } ConnectorPtr->update_all( - set(c(&TableSchema::engine_type_) = DEFAULT_ENGINE_TYPE, c(&TableSchema::index_params_) = "{}"), + set(c(&TableSchema::engine_type_) = raw_engine_type, c(&TableSchema::index_params_) = "{}"), where(c(&TableSchema::table_id_) == table_id)); ENGINE_LOG_DEBUG << "Successfully drop table index, table id = " << table_id; @@ -1189,29 +1199,21 @@ SqliteMetaImpl::FilesByType(const std::string& table_id, const std::vector& file_schema.metric_type_ = table_schema.metric_type_; switch (file_schema.file_type_) { - case (int)TableFileSchema::RAW: - ++raw_count; + case (int)TableFileSchema::RAW:++raw_count; break; - case (int)TableFileSchema::NEW: - ++new_count; + case (int)TableFileSchema::NEW:++new_count; break; - case (int)TableFileSchema::NEW_MERGE: - ++new_merge_count; + case (int)TableFileSchema::NEW_MERGE:++new_merge_count; break; - case (int)TableFileSchema::NEW_INDEX: - ++new_index_count; + case (int)TableFileSchema::NEW_INDEX:++new_index_count; break; - case (int)TableFileSchema::TO_INDEX: - ++to_index_count; + case (int)TableFileSchema::TO_INDEX:++to_index_count; break; - case (int)TableFileSchema::INDEX: - ++index_count; + case (int)TableFileSchema::INDEX:++index_count; break; - case (int)TableFileSchema::BACKUP: - ++backup_count; - break; - default: + case (int)TableFileSchema::BACKUP:++backup_count; break; + default:break; } auto status = utils::GetTableFilePath(options_, file_schema); @@ -1225,29 +1227,23 @@ SqliteMetaImpl::FilesByType(const std::string& table_id, const std::vector& std::string msg = "Get table files by type."; for (int file_type : file_types) { switch (file_type) { - case (int)TableFileSchema::RAW: - msg = msg + " raw files:" + std::to_string(raw_count); + case (int)TableFileSchema::RAW:msg = msg + " raw files:" + std::to_string(raw_count); break; - case (int)TableFileSchema::NEW: - msg = msg + " new files:" + std::to_string(new_count); + case (int)TableFileSchema::NEW:msg = msg + " new files:" + std::to_string(new_count); break; - case (int)TableFileSchema::NEW_MERGE: - msg = msg + " new_merge files:" + std::to_string(new_merge_count); + case (int)TableFileSchema::NEW_MERGE:msg = msg + " new_merge files:" + + std::to_string(new_merge_count); break; - case (int)TableFileSchema::NEW_INDEX: - msg = msg + " new_index files:" + std::to_string(new_index_count); + case (int)TableFileSchema::NEW_INDEX:msg = msg + " new_index files:" + + std::to_string(new_index_count); break; - case (int)TableFileSchema::TO_INDEX: - msg = msg + " to_index files:" + std::to_string(to_index_count); + case (int)TableFileSchema::TO_INDEX:msg = msg + " to_index files:" + std::to_string(to_index_count); break; - case (int)TableFileSchema::INDEX: - msg = msg + " index files:" + std::to_string(index_count); + case (int)TableFileSchema::INDEX:msg = msg + " index files:" + std::to_string(index_count); break; - case (int)TableFileSchema::BACKUP: - msg = msg + " backup files:" + std::to_string(backup_count); - break; - default: + case (int)TableFileSchema::BACKUP:msg = msg + " backup files:" + std::to_string(backup_count); break; + default:break; } } ENGINE_LOG_DEBUG << msg; diff --git a/core/src/scheduler/task/BuildIndexTask.cpp b/core/src/scheduler/task/BuildIndexTask.cpp index 1ae0def7b7..feec750e5a 100644 --- a/core/src/scheduler/task/BuildIndexTask.cpp +++ b/core/src/scheduler/task/BuildIndexTask.cpp @@ -18,6 +18,7 @@ #include #include +#include "db/Utils.h" #include "db/engine/EngineFactory.h" #include "metrics/Metrics.h" #include "scheduler/job/BuildIndexJob.h" @@ -36,8 +37,8 @@ XBuildIndexTask::XBuildIndexTask(TableFileSchemaPtr file, TaskLabelPtr label) if (file->file_type_ == TableFileSchema::FILE_TYPE::RAW || file->file_type_ == TableFileSchema::FILE_TYPE::TO_INDEX || file->file_type_ == TableFileSchema::FILE_TYPE::BACKUP) { - engine_type = server::ValidationUtil::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + engine_type = engine::utils::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP + : EngineType::FAISS_IDMAP; } else { engine_type = (EngineType)file->engine_type_; } diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index cf96dc504a..49b225eea8 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -110,8 +110,8 @@ XSearchTask::XSearchTask(const std::shared_ptr& context, TableF if (file->file_type_ == TableFileSchema::FILE_TYPE::RAW || file->file_type_ == TableFileSchema::FILE_TYPE::TO_INDEX || file->file_type_ == TableFileSchema::FILE_TYPE::BACKUP) { - engine_type = server::ValidationUtil::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP - : EngineType::FAISS_IDMAP; + engine_type = engine::utils::IsBinaryMetricType(file->metric_type_) ? EngineType::FAISS_BIN_IDMAP + : EngineType::FAISS_IDMAP; } else { engine_type = (EngineType)file->engine_type_; } diff --git a/core/src/server/delivery/request/CreateIndexRequest.cpp b/core/src/server/delivery/request/CreateIndexRequest.cpp index 5836b3cc09..521fe7aa58 100644 --- a/core/src/server/delivery/request/CreateIndexRequest.cpp +++ b/core/src/server/delivery/request/CreateIndexRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/CreateIndexRequest.h" +#include "db/Utils.h" #include "server/Config.h" #include "server/DBWrapper.h" #include "utils/Log.h" @@ -83,7 +84,7 @@ CreateIndexRequest::OnExecute() { status = DBWrapper::DB()->DescribeTable(table_info); int32_t adapter_index_type = index_type_; - if (ValidationUtil::IsBinaryMetricType(table_info.metric_type_)) { // binary vector not allow + if (engine::utils::IsBinaryMetricType(table_info.metric_type_)) { // binary vector not allow if (adapter_index_type == static_cast(engine::EngineType::FAISS_IDMAP)) { adapter_index_type = static_cast(engine::EngineType::FAISS_BIN_IDMAP); } else if (adapter_index_type == static_cast(engine::EngineType::FAISS_IVFFLAT)) { diff --git a/core/src/server/delivery/request/CreateTableRequest.cpp b/core/src/server/delivery/request/CreateTableRequest.cpp index 15d07b8cfd..d92db5a94d 100644 --- a/core/src/server/delivery/request/CreateTableRequest.cpp +++ b/core/src/server/delivery/request/CreateTableRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/CreateTableRequest.h" +#include "db/Utils.h" #include "server/DBWrapper.h" #include "server/delivery/request/BaseRequest.h" #include "utils/Log.h" @@ -78,7 +79,7 @@ CreateTableRequest::OnExecute() { table_info.metric_type_ = metric_type_; // some metric type only support binary vector, adapt the index type - if (ValidationUtil::IsBinaryMetricType(metric_type_)) { + if (engine::utils::IsBinaryMetricType(metric_type_)) { if (table_info.engine_type_ == static_cast(engine::EngineType::FAISS_IDMAP)) { table_info.engine_type_ = static_cast(engine::EngineType::FAISS_BIN_IDMAP); } else if (table_info.engine_type_ == static_cast(engine::EngineType::FAISS_IVFFLAT)) { diff --git a/core/src/server/delivery/request/InsertRequest.cpp b/core/src/server/delivery/request/InsertRequest.cpp index 729dc4d292..a739d86b9f 100644 --- a/core/src/server/delivery/request/InsertRequest.cpp +++ b/core/src/server/delivery/request/InsertRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/InsertRequest.h" +#include "db/Utils.h" #include "server/DBWrapper.h" #include "utils/CommonUtil.h" #include "utils/Log.h" @@ -115,7 +116,7 @@ InsertRequest::OnExecute() { #endif // step 4: some metric type doesn't support float vectors if (!vectors_data_.float_data_.empty()) { // insert float vectors - if (ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)) { + if (engine::utils::IsBinaryMetricType(table_schema.metric_type_)) { return Status(SERVER_INVALID_ROWRECORD_ARRAY, "Table metric type doesn't support float vectors."); } @@ -131,7 +132,7 @@ InsertRequest::OnExecute() { "The vector dimension must be equal to the table dimension."); } } else if (!vectors_data_.binary_data_.empty()) { // insert binary vectors - if (!ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)) { + if (!engine::utils::IsBinaryMetricType(table_schema.metric_type_)) { return Status(SERVER_INVALID_ROWRECORD_ARRAY, "Table metric type doesn't support binary vectors."); } diff --git a/core/src/server/delivery/request/SearchRequest.cpp b/core/src/server/delivery/request/SearchRequest.cpp index 0b12f91f5d..e31be87b40 100644 --- a/core/src/server/delivery/request/SearchRequest.cpp +++ b/core/src/server/delivery/request/SearchRequest.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/SearchRequest.h" +#include "db/Utils.h" #include "server/DBWrapper.h" #include "utils/CommonUtil.h" #include "utils/Log.h" @@ -103,7 +104,7 @@ SearchRequest::OnExecute() { rc.RecordSection("check validation"); // step 4: check metric type - if (ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)) { + if (engine::utils::IsBinaryMetricType(table_schema.metric_type_)) { // check prepared binary data if (vectors_data_.binary_data_.size() % vector_count != 0) { return Status(SERVER_INVALID_ROWRECORD_ARRAY, diff --git a/core/src/utils/ValidationUtil.cpp b/core/src/utils/ValidationUtil.cpp index 84f260c508..616375f08e 100644 --- a/core/src/utils/ValidationUtil.cpp +++ b/core/src/utils/ValidationUtil.cpp @@ -264,12 +264,6 @@ ValidationUtil::ValidateSearchParams(const milvus::json& search_params, const en return Status::OK(); } -bool -ValidationUtil::IsBinaryIndexType(int32_t index_type) { - return (index_type == static_cast(engine::EngineType::FAISS_BIN_IDMAP)) || - (index_type == static_cast(engine::EngineType::FAISS_BIN_IVFFLAT)); -} - Status ValidationUtil::ValidateTableIndexFileSize(int64_t index_file_size) { if (index_file_size <= 0 || index_file_size > INDEX_FILE_SIZE_LIMIT) { @@ -294,13 +288,6 @@ ValidationUtil::ValidateTableIndexMetricType(int32_t metric_type) { return Status::OK(); } -bool -ValidationUtil::IsBinaryMetricType(int32_t metric_type) { - return (metric_type == static_cast(engine::MetricType::HAMMING)) || - (metric_type == static_cast(engine::MetricType::JACCARD)) || - (metric_type == static_cast(engine::MetricType::TANIMOTO)); -} - Status ValidationUtil::ValidateSearchTopk(int64_t top_k, const engine::meta::TableSchema& table_schema) { if (top_k <= 0 || top_k > 2048) { diff --git a/core/src/utils/ValidationUtil.h b/core/src/utils/ValidationUtil.h index 4fa2df1619..481cb31f1b 100644 --- a/core/src/utils/ValidationUtil.h +++ b/core/src/utils/ValidationUtil.h @@ -43,18 +43,12 @@ class ValidationUtil { ValidateSearchParams(const milvus::json& search_params, const engine::meta::TableSchema& table_schema, int64_t topk); - static bool - IsBinaryIndexType(int32_t index_type); - static Status ValidateTableIndexFileSize(int64_t index_file_size); static Status ValidateTableIndexMetricType(int32_t metric_type); - static bool - IsBinaryMetricType(int32_t metric_type); - static Status ValidateSearchTopk(int64_t top_k, const engine::meta::TableSchema& table_schema);