From 223cf1f5478f014240c6fbcb191eb564c28eaf4b Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 21 Aug 2019 16:53:31 +0800 Subject: [PATCH 1/2] MS-338 NewAPI: refine code to support CreateIndex Former-commit-id: b3a60c3de2da5522b2b9becd8ba382825a1c1e05 --- cpp/src/db/DB.h | 3 + cpp/src/db/DBImpl.cpp | 98 ++++--- cpp/src/db/DBImpl.h | 8 +- cpp/src/db/Types.h | 9 + cpp/src/db/Utils.cpp | 7 + cpp/src/db/Utils.h | 3 + cpp/src/db/engine/ExecutionEngine.h | 5 + cpp/src/db/insert/MemTableFile.cpp | 2 +- cpp/src/db/meta/Meta.h | 10 + cpp/src/db/meta/MetaTypes.h | 23 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 91 +++--- cpp/src/db/meta/MySQLMetaImpl.h | 6 + cpp/src/db/meta/SqliteMetaImpl.cpp | 262 ++++++++++++------ cpp/src/db/meta/SqliteMetaImpl.h | 9 + cpp/src/grpc/cpp_gen.sh | 8 +- cpp/src/grpc/gen-milvus/milvus.pb.cc | 255 ++++++----------- cpp/src/grpc/gen-milvus/milvus.pb.h | 48 +--- cpp/src/grpc/milvus.proto | 6 +- cpp/src/grpc/status.proto | 2 + .../examples/grpcsimple/src/ClientTest.cpp | 30 +- cpp/src/sdk/grpc/ClientProxy.cpp | 35 ++- cpp/src/sdk/grpc/ClientProxy.h | 4 +- cpp/src/sdk/grpc/GrpcClient.cpp | 40 +++ cpp/src/sdk/include/MilvusApi.h | 9 +- cpp/src/sdk/interface/ConnectionImpl.cpp | 8 +- cpp/src/sdk/interface/ConnectionImpl.h | 4 +- cpp/src/sdk/thrift/ClientProxy.cpp | 3 +- cpp/src/sdk/thrift/ClientProxy.h | 2 +- .../server/grpc_impl/GrpcRequestHandler.cpp | 14 +- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 99 ++++++- cpp/src/server/grpc_impl/GrpcRequestTask.h | 3 + cpp/unittest/db/meta_tests.cpp | 2 +- cpp/unittest/db/mysql_meta_test.cpp | 2 +- 33 files changed, 663 insertions(+), 447 deletions(-) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 282e762717..b143f0c233 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -46,6 +46,9 @@ public: virtual Status Size(uint64_t& result) = 0; virtual Status BuildIndex(const std::string& table_id) = 0; + virtual Status CreateIndex(const std::string& table_id, const TableIndex& index) = 0; + virtual Status DescribeIndex(const std::string& table_id, TableIndex& index) = 0; + virtual Status DropIndex(const std::string& table_id) = 0; virtual Status DropAll() = 0; diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index ddee622669..a649257a0b 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -6,6 +6,7 @@ #include "DBImpl.h" #include "src/db/meta/SqliteMetaImpl.h" #include "Log.h" +#include "Utils.h" #include "engine/EngineFactory.h" #include "Factories.h" #include "metrics/Metrics.h" @@ -470,7 +471,8 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, } else { table_file.file_type_ = meta::TableFileSchema::RAW; } - table_file.size_ = index_size; + table_file.file_size_ = index->PhysicalSize(); + table_file.row_count_ = index->Count(); updated.push_back(table_file); status = meta_ptr_->UpdateTableFiles(updated); ENGINE_LOG_DEBUG << "New merged file " << table_file.file_id_ << @@ -574,7 +576,58 @@ Status DBImpl::BuildIndex(const std::string& table_id) { times++; } return Status::OK(); - /* return BuildIndexByTable(table_id); */ +} + +Status DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) { + { + std::unique_lock lock(build_index_mutex_); + + //step 1: check index difference + TableIndex old_index; + auto status = DescribeIndex(table_id, old_index); + if(!status.ok()) { + ENGINE_LOG_ERROR << "Failed to get table index info"; + return status; + } + + if(utils::IsSameIndex(old_index, index)) { + ENGINE_LOG_DEBUG << "Same index setting, no need to create index again"; + return Status::OK(); + } + + //step 2: drop old index files + DropIndex(table_id); + + //step 3: update index info + + status = meta_ptr_->UpdateTableIndexParam(table_id, index); + if (!status.ok()) { + ENGINE_LOG_ERROR << "Failed to update table index info"; + return status; + } + } + + bool has = false; + auto status = meta_ptr_->HasNonIndexFiles(table_id, has); + int times = 1; + + while (has) { + ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; + status = meta_ptr_->UpdateTableFilesToIndex(table_id); + /* StartBuildIndexTask(true); */ + std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10*1000, times*100))); + status = meta_ptr_->HasNonIndexFiles(table_id, has); + times++; + } + return Status::OK(); +} + +Status DBImpl::DescribeIndex(const std::string& table_id, TableIndex& index) { + return meta_ptr_->DescribeTableIndex(table_id, index); +} + +Status DBImpl::DropIndex(const std::string& table_id) { + return meta_ptr_->DropTableIndex(table_id); } Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { @@ -650,26 +703,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { //step 6: update meta table_file.file_type_ = meta::TableFileSchema::INDEX; - table_file.size_ = index->Size(); + table_file.file_size_ = index->PhysicalSize(); + table_file.row_count_ = index->Count(); - auto to_remove = file; - to_remove.file_type_ = meta::TableFileSchema::TO_DELETE; + auto origin_file = file; + origin_file.file_type_ = meta::TableFileSchema::BACKUP; - meta::TableFilesSchema update_files = {table_file, to_remove}; + meta::TableFilesSchema update_files = {table_file, origin_file}; status = meta_ptr_->UpdateTableFiles(update_files); if(status.ok()) { ENGINE_LOG_DEBUG << "New index file " << table_file.file_id_ << " of size " << index->PhysicalSize() << " bytes" - << " from file " << to_remove.file_id_; + << " from file " << origin_file.file_id_; if(options_.insert_cache_immediately_) { index->Cache(); } } else { //failed to update meta, mark the new file as to_delete, don't delete old file - to_remove.file_type_ = meta::TableFileSchema::TO_INDEX; - status = meta_ptr_->UpdateTableFile(to_remove); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << to_remove.file_id_ << " to to_index"; + origin_file.file_type_ = meta::TableFileSchema::TO_INDEX; + status = meta_ptr_->UpdateTableFile(origin_file); + ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << origin_file.file_id_ << " to to_index"; table_file.file_type_ = meta::TableFileSchema::TO_DELETE; status = meta_ptr_->UpdateTableFile(table_file); @@ -685,30 +739,6 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { return Status::OK(); } -Status DBImpl::BuildIndexByTable(const std::string& table_id) { - std::unique_lock lock(build_index_mutex_); - meta::TableFilesSchema to_index_files; - meta_ptr_->FilesToIndex(to_index_files); - - Status status; - - for (auto& file : to_index_files) { - status = BuildIndex(file); - if (!status.ok()) { - ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString(); - return status; - } - ENGINE_LOG_DEBUG << "Sync building index for " << file.id_ << " passed"; - - if (shutting_down_.load(std::memory_order_acquire)){ - ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action for table " << table_id; - break; - } - } - - return status; -} - void DBImpl::BackgroundBuildIndex() { ENGINE_LOG_TRACE << " Background build index thread start"; diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 4aa7ac07bf..97c36fadbc 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -93,6 +93,12 @@ class DBImpl : public DB { Status BuildIndex(const std::string& table_id) override; + Status CreateIndex(const std::string& table_id, const TableIndex& index) override; + + Status DescribeIndex(const std::string& table_id, TableIndex& index) override; + + Status DropIndex(const std::string& table_id) override; + ~DBImpl() override; private: @@ -122,8 +128,6 @@ class DBImpl : public DB { void StartBuildIndexTask(bool force=false); void BackgroundBuildIndex(); - Status - BuildIndexByTable(const std::string& table_id); Status BuildIndex(const meta::TableFileSchema &); diff --git a/cpp/src/db/Types.h b/cpp/src/db/Types.h index acf7feea40..a2ffa606e5 100644 --- a/cpp/src/db/Types.h +++ b/cpp/src/db/Types.h @@ -5,7 +5,10 @@ ******************************************************************************/ #pragma once +#include "db/engine/ExecutionEngine.h" + #include +#include namespace zilliz { namespace milvus { @@ -18,6 +21,12 @@ typedef std::vector IDNumbers; typedef std::vector> QueryResult; typedef std::vector QueryResults; +struct TableIndex { + int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; + int32_t nlist = 16384; + int32_t index_file_size = 1024; //MB + int32_t metric_type = (int)MetricType::L2; +}; } // namespace engine } // namespace milvus diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp index 0fe1f76d29..5a0d3cafa2 100644 --- a/cpp/src/db/Utils.cpp +++ b/cpp/src/db/Utils.cpp @@ -142,6 +142,13 @@ Status DeleteTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& return Status::OK(); } +bool IsSameIndex(const TableIndex& index1, const TableIndex& index2) { + return index1.engine_type_ == index2.engine_type_ + && index1.nlist == index2.nlist + && index1.index_file_size == index2.index_file_size + && index1.metric_type == index2.metric_type; +} + } // namespace utils } // namespace engine } // namespace milvus diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h index 8329f5a1fc..47a8fca9b7 100644 --- a/cpp/src/db/Utils.h +++ b/cpp/src/db/Utils.h @@ -7,6 +7,7 @@ #include "Options.h" #include "db/meta/MetaTypes.h" +#include "db/Types.h" #include @@ -24,6 +25,8 @@ Status CreateTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& Status GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); Status DeleteTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); +bool IsSameIndex(const TableIndex& index1, const TableIndex& index2); + } // namespace utils } // namespace engine } // namespace milvus diff --git a/cpp/src/db/engine/ExecutionEngine.h b/cpp/src/db/engine/ExecutionEngine.h index 88be75aeb9..0f2cf42b22 100644 --- a/cpp/src/db/engine/ExecutionEngine.h +++ b/cpp/src/db/engine/ExecutionEngine.h @@ -23,6 +23,11 @@ enum class EngineType { MAX_VALUE = NSG_MIX, }; +enum class MetricType { + L2 = 1, + IP = 2, +}; + class ExecutionEngine { public: diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index 326658df5f..3cbb862389 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -86,7 +86,7 @@ Status MemTableFile::Serialize() { execution_engine_->Serialize(); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - table_file_schema_.size_ = size; + table_file_schema_.row_count_ = execution_engine_->Count(); server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time); diff --git a/cpp/src/db/meta/Meta.h b/cpp/src/db/meta/Meta.h index e88761b446..80ae0fb22e 100644 --- a/cpp/src/db/meta/Meta.h +++ b/cpp/src/db/meta/Meta.h @@ -8,6 +8,7 @@ #include "MetaTypes.h" #include "db/Options.h" #include "db/Status.h" +#include "db/Types.h" #include #include @@ -38,6 +39,9 @@ class Meta { virtual Status AllTables(std::vector &table_schema_array) = 0; + virtual Status + UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) = 0; + virtual Status DeleteTable(const std::string &table_id) = 0; @@ -83,6 +87,12 @@ class Meta { virtual Status HasNonIndexFiles(const std::string &table_id, bool &has) = 0; + virtual Status + DescribeTableIndex(const std::string &table_id, TableIndex& index) = 0; + + virtual Status + DropTableIndex(const std::string &table_id) = 0; + virtual Status CleanUp() = 0; diff --git a/cpp/src/db/meta/MetaTypes.h b/cpp/src/db/meta/MetaTypes.h index 9f58734f39..0e554b2330 100644 --- a/cpp/src/db/meta/MetaTypes.h +++ b/cpp/src/db/meta/MetaTypes.h @@ -28,12 +28,13 @@ struct TableSchema { size_t id_ = 0; std::string table_id_; - int state_ = (int)NORMAL; - size_t files_cnt_ = 0; + int32_t state_ = (int)NORMAL; uint16_t dimension_ = 0; - long created_on_ = 0; - int engine_type_ = (int)EngineType::FAISS_IDMAP; - bool store_raw_data_ = false; + int64_t created_on_ = 0; + int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; + int32_t nlist_ = 16384; + int32_t index_file_size_ = 1024; //MB + int32_t metric_type_ = (int)MetricType::L2; }; // TableSchema struct TableFileSchema { @@ -45,19 +46,21 @@ struct TableFileSchema { TO_DELETE, NEW_MERGE, NEW_INDEX, + BACKUP, } FILE_TYPE; size_t id_ = 0; std::string table_id_; - int engine_type_ = (int)EngineType::FAISS_IDMAP; + int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; std::string file_id_; - int file_type_ = NEW; - size_t size_ = 0; + int32_t file_type_ = NEW; + size_t file_size_ = 0; + size_t row_count_ = 0; DateT date_ = EmptyDate; uint16_t dimension_ = 0; std::string location_; - long updated_time_ = 0; - long created_on_ = 0; + int64_t updated_time_ = 0; + int64_t created_on_ = 0; }; // TableFileSchema typedef std::vector TableFilesSchema; diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index bf4589351e..fa2697ec32 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -167,9 +167,10 @@ Status MySQLMetaImpl::Initialize() { "state INT NOT NULL, " << "dimension SMALLINT NOT NULL, " << "created_on BIGINT NOT NULL, " << - "files_cnt BIGINT DEFAULT 0 NOT NULL, " << "engine_type INT DEFAULT 1 NOT NULL, " << - "store_raw_data BOOL DEFAULT false NOT NULL);"; + "nlist INT DEFAULT 16384 NOT NULL, " << + "index_file_size INT DEFAULT 1024 NOT NULL, " << + "metric_type INT DEFAULT 1 NOT NULL);"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str(); @@ -183,7 +184,8 @@ Status MySQLMetaImpl::Initialize() { "engine_type INT DEFAULT 1 NOT NULL, " << "file_id VARCHAR(255) NOT NULL, " << "file_type INT DEFAULT 0 NOT NULL, " << - "size BIGINT DEFAULT 0 NOT NULL, " << + "file_size BIGINT DEFAULT 0 NOT NULL, " << + "row_count BIGINT DEFAULT 0 NOT NULL, " << "updated_time BIGINT NOT NULL, " << "created_on BIGINT NOT NULL, " << "date INT DEFAULT -1 NOT NULL);"; @@ -325,8 +327,6 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { } } - - table_schema.files_cnt_ = 0; table_schema.id_ = -1; table_schema.created_on_ = utils::GetMicroSecTimeStamp(); @@ -336,13 +336,11 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { std::string state = std::to_string(table_schema.state_); std::string dimension = std::to_string(table_schema.dimension_); std::string created_on = std::to_string(table_schema.created_on_); - std::string files_cnt = "0"; std::string engine_type = std::to_string(table_schema.engine_type_); - std::string store_raw_data = table_schema.store_raw_data_ ? "true" : "false"; createTableQuery << "INSERT INTO Tables VALUES" << "(" << id << ", " << quote << table_id << ", " << state << ", " << dimension << ", " << - created_on << ", " << files_cnt << ", " << engine_type << ", " << store_raw_data << ");"; + created_on << ", " << engine_type << ");"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTable: " << createTableQuery.str(); @@ -430,6 +428,18 @@ Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { return Status::OK(); } +Status MySQLMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { + return Status::OK(); +} + +Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { + return Status::OK(); +} + +Status MySQLMetaImpl::DropTableIndex(const std::string &table_id) { + return Status::OK(); +} + Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { @@ -561,12 +571,7 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { table_schema.dimension_ = resRow["dimension"]; - table_schema.files_cnt_ = resRow["files_cnt"]; - table_schema.engine_type_ = resRow["engine_type"]; - - int store_raw_data = resRow["store_raw_data"]; - table_schema.store_raw_data_ = (store_raw_data == 1); } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -668,13 +673,8 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { table_schema.dimension_ = resRow["dimension"]; - table_schema.files_cnt_ = resRow["files_cnt"]; - table_schema.engine_type_ = resRow["engine_type"]; - int store_raw_data = resRow["store_raw_data"]; - table_schema.store_raw_data_ = (store_raw_data == 1); - table_schema_array.emplace_back(table_schema); } } catch (const BadQuery &er) { @@ -709,7 +709,8 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; - file_schema.size_ = 0; + file_schema.file_size_ = 0; + file_schema.row_count_ = 0; file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; @@ -720,7 +721,7 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -737,7 +738,7 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { createTableFileQuery << "INSERT INTO TableFiles VALUES" << "(" << id << ", " << quote << table_id << ", " << engine_type << ", " << - quote << file_id << ", " << file_type << ", " << size << ", " << + quote << file_id << ", " << file_type << ", " << row_count << ", " << updated_time << ", " << created_on << ", " << date << ");"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTableFile: " << createTableFileQuery.str(); @@ -792,7 +793,7 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { Query filesToIndexQuery = connectionPtr->query(); - filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE file_type = " << std::to_string(TableFileSchema::TO_INDEX) << ";"; @@ -819,7 +820,7 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -877,7 +878,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, if (partition.empty()) { Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(file_type = " << std::to_string(TableFileSchema::RAW) << " OR " << @@ -899,7 +900,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, std::string partitionListStr = partitionListSS.str(); partitionListStr = partitionListStr.substr(0, partitionListStr.size() - 2); //remove the last ", " - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "date IN (" << partitionListStr << ") AND " << @@ -938,7 +939,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -988,7 +989,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, } Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id; @@ -1049,7 +1050,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -1097,11 +1098,11 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, Query filesToMergeQuery = connectionPtr->query(); - filesToMergeQuery << "SELECT id, table_id, file_id, file_type, size, date " << + filesToMergeQuery << "SELECT id, table_id, file_id, file_type, file_size, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "file_type = " << std::to_string(TableFileSchema::RAW) << " " << - "ORDER BY size DESC" << ";"; + "ORDER BY row_count DESC" << ";"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::FilesToMerge: " << filesToMergeQuery.str(); @@ -1131,7 +1132,7 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; table_file.date_ = resRow["date"]; @@ -1189,7 +1190,7 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, Query getTableFileQuery = connectionPtr->query(); - getTableFileQuery << "SELECT id, engine_type, file_id, file_type, size, date " << + getTableFileQuery << "SELECT id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(" << idStr << ");"; @@ -1222,7 +1223,9 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, file_schema.file_type_ = resRow["file_type"]; - file_schema.size_ = resRow["size"]; + file_schema.file_size_ = resRow["file_size"]; + + file_schema.row_count_ = resRow["row_count"]; file_schema.date_ = resRow["date"]; @@ -1321,7 +1324,7 @@ Status MySQLMetaImpl::Size(uint64_t &result) { Query getSizeQuery = connectionPtr->query(); - getSizeQuery << "SELECT IFNULL(SUM(size),0) AS sum " << + getSizeQuery << "SELECT IFNULL(SUM(file_size),0) AS sum " << "FROM TableFiles " << "WHERE file_type <> " << std::to_string(TableFileSchema::TO_DELETE) << ";"; @@ -1379,7 +1382,7 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { Query discardFilesQuery = connectionPtr->query(); - discardFilesQuery << "SELECT id, size " << + discardFilesQuery << "SELECT id, file_size " << "FROM TableFiles " << "WHERE file_type <> " << std::to_string(TableFileSchema::TO_DELETE) << " " << "ORDER BY id ASC " << @@ -1401,11 +1404,11 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { break; } table_file.id_ = resRow["id"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; idsToDiscardSS << "id = " << std::to_string(table_file.id_) << " OR "; ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_ - << " table_file.size=" << table_file.size_; - to_discard_size -= table_file.size_; + << " table_file.size=" << table_file.file_size_; + to_discard_size -= table_file.file_size_; } std::string idsToDiscardStr = idsToDiscardSS.str(); @@ -1480,7 +1483,8 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string file_size = std::to_string(file_schema.file_size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -1490,7 +1494,8 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { "engine_type = " << engine_type << ", " << "file_id = " << quote << file_id << ", " << "file_type = " << file_type << ", " << - "size = " << size << ", " << + "file_size = " << file_size << ", " << + "row_count = " << row_count << ", " << "updated_time = " << updated_time << ", " << "created_on = " << created_on << ", " << "date = " << date << " " << @@ -1606,7 +1611,8 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string file_size = std::to_string(file_schema.file_size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -1616,7 +1622,8 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { "engine_type = " << engine_type << ", " << "file_id = " << quote << file_id << ", " << "file_type = " << file_type << ", " << - "size = " << size << ", " << + "file_size = " << file_size << ", " << + "row_count = " << row_count << ", " << "updated_time = " << updated_time << ", " << "created_on = " << created_on << ", " << "date = " << date << " " << diff --git a/cpp/src/db/meta/MySQLMetaImpl.h b/cpp/src/db/meta/MySQLMetaImpl.h index 30695423dd..3fdd80beed 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.h +++ b/cpp/src/db/meta/MySQLMetaImpl.h @@ -43,6 +43,12 @@ class MySQLMetaImpl : public Meta { Status HasNonIndexFiles(const std::string &table_id, bool &has) override; + Status UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) override; + + Status DescribeTableIndex(const std::string &table_id, TableIndex& index) override; + + Status DropTableIndex(const std::string &table_id) override; + Status UpdateTableFile(TableFileSchema &file_schema) override; Status UpdateTableFilesToIndex(const std::string &table_id) override; diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 9053139e0b..9118eadd17 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -62,16 +62,18 @@ inline auto StoragePrototype(const std::string &path) { make_column("state", &TableSchema::state_), make_column("dimension", &TableSchema::dimension_), make_column("created_on", &TableSchema::created_on_), - make_column("files_cnt", &TableSchema::files_cnt_, default_value(0)), make_column("engine_type", &TableSchema::engine_type_), - make_column("store_raw_data", &TableSchema::store_raw_data_)), + make_column("nlist", &TableSchema::nlist_), + make_column("index_file_size", &TableSchema::index_file_size_), + make_column("metric_type", &TableSchema::metric_type_)), make_table("TableFiles", make_column("id", &TableFileSchema::id_, primary_key()), make_column("table_id", &TableFileSchema::table_id_), make_column("engine_type", &TableFileSchema::engine_type_), make_column("file_id", &TableFileSchema::file_id_), make_column("file_type", &TableFileSchema::file_type_), - make_column("size", &TableFileSchema::size_, default_value(0)), + make_column("file_size", &TableFileSchema::file_size_, default_value(0)), + make_column("row_count", &TableFileSchema::row_count_, default_value(0)), make_column("updated_time", &TableFileSchema::updated_time_), make_column("created_on", &TableFileSchema::created_on_), make_column("date", &TableFileSchema::date_)) @@ -188,7 +190,6 @@ Status SqliteMetaImpl::CreateTable(TableSchema &table_schema) { } } - table_schema.files_cnt_ = 0; table_schema.id_ = -1; table_schema.created_on_ = utils::GetMicroSecTimeStamp(); @@ -218,10 +219,8 @@ Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { //soft delete table auto tables = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::files_cnt_, &TableSchema::dimension_, &TableSchema::engine_type_, - &TableSchema::store_raw_data_, &TableSchema::created_on_), where(c(&TableSchema::table_id_) == table_id)); for (auto &table : tables) { @@ -229,11 +228,9 @@ Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { table_schema.table_id_ = table_id; table_schema.state_ = (int)TableSchema::TO_DELETE; table_schema.id_ = std::get<0>(table); - table_schema.files_cnt_ = std::get<1>(table); - table_schema.dimension_ = std::get<2>(table); - table_schema.engine_type_ = std::get<3>(table); - table_schema.store_raw_data_ = std::get<4>(table); - table_schema.created_on_ = std::get<5>(table); + table_schema.dimension_ = std::get<1>(table); + table_schema.engine_type_ = std::get<2>(table); + table_schema.created_on_ = std::get<3>(table); ConnectorPtr->update(table_schema); } @@ -274,20 +271,15 @@ Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { MetricCollector metric; auto groups = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::table_id_, - &TableSchema::files_cnt_, &TableSchema::dimension_, - &TableSchema::engine_type_, - &TableSchema::store_raw_data_), + &TableSchema::engine_type_), where(c(&TableSchema::table_id_) == table_schema.table_id_ and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); if (groups.size() == 1) { table_schema.id_ = std::get<0>(groups[0]); - table_schema.files_cnt_ = std::get<2>(groups[0]); - table_schema.dimension_ = std::get<3>(groups[0]); - table_schema.engine_type_ = std::get<4>(groups[0]); - table_schema.store_raw_data_ = std::get<5>(groups[0]); + table_schema.dimension_ = std::get<1>(groups[0]); + table_schema.engine_type_ = std::get<2>(groups[0]); } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -302,17 +294,16 @@ Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) { has = false; try { + std::vector file_types = { + (int) TableFileSchema::RAW, + (int) TableFileSchema::NEW, + (int) TableFileSchema::NEW_MERGE, + (int) TableFileSchema::NEW_INDEX, + (int) TableFileSchema::TO_INDEX, + }; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::file_type_), - where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_MERGE - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_INDEX - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX) + where(in(&TableFileSchema::file_type_, file_types) and c(&TableFileSchema::table_id_) == table_id )); @@ -353,6 +344,118 @@ Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) return Status::OK(); } +Status SqliteMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { + try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + + auto tables = ConnectorPtr->select(columns(&TableSchema::id_, + &TableSchema::state_, + &TableSchema::dimension_, + &TableSchema::created_on_), + where(c(&TableSchema::table_id_) == table_id + and c(&TableSchema::state_) != (int) TableSchema::TO_DELETE)); + + if(tables.size() > 0) { + meta::TableSchema table_schema; + table_schema.id_ = std::get<0>(tables[0]); + table_schema.table_id_ = table_id; + table_schema.state_ = std::get<1>(tables[0]); + table_schema.dimension_ = std::get<2>(tables[0]); + table_schema.created_on_ = std::get<3>(tables[0]); + table_schema.engine_type_ = index.engine_type_; + table_schema.nlist_ = index.nlist; + table_schema.index_file_size_ = index.index_file_size; + table_schema.metric_type_ = index.metric_type; + + ConnectorPtr->update(table_schema); + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + //set all backup file to raw + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::RAW, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::BACKUP + )); + + } catch (std::exception &e) { + std::string msg = "Encounter exception when update table index: table_id = " + table_id; + return HandleException(msg, e); + } + return Status::OK(); +} + +Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { + try { + MetricCollector metric; + + auto groups = ConnectorPtr->select(columns(&TableSchema::engine_type_, + &TableSchema::nlist_, + &TableSchema::index_file_size_, + &TableSchema::metric_type_), + where(c(&TableSchema::table_id_) == table_id + and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); + + if (groups.size() == 1) { + index.engine_type_ = std::get<0>(groups[0]); + index.nlist = std::get<1>(groups[0]); + index.index_file_size = std::get<2>(groups[0]); + index.metric_type = std::get<3>(groups[0]); + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when describe index", e); + } + + return Status::OK(); +} + +Status SqliteMetaImpl::DropTableIndex(const std::string &table_id) { + try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + + //soft delete index files + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX + )); + + //set all backup file to raw + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::RAW, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::BACKUP + )); + + } catch (std::exception &e) { + return HandleException("Encounter exception when delete table index files", e); + } + + return Status::OK(); +} + Status SqliteMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { has_or_not = false; @@ -380,19 +483,15 @@ Status SqliteMetaImpl::AllTables(std::vector& table_schema_array) { auto selected = ConnectorPtr->select(columns(&TableSchema::id_, &TableSchema::table_id_, - &TableSchema::files_cnt_, &TableSchema::dimension_, - &TableSchema::engine_type_, - &TableSchema::store_raw_data_), + &TableSchema::engine_type_), where(c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); for (auto &table : selected) { TableSchema schema; schema.id_ = std::get<0>(table); schema.table_id_ = std::get<1>(table); - schema.files_cnt_ = std::get<2>(table); - schema.dimension_ = std::get<3>(table); - schema.engine_type_ = std::get<4>(table); - schema.store_raw_data_ = std::get<5>(table); + schema.dimension_ = std::get<2>(table); + schema.engine_type_ = std::get<3>(table); table_schema_array.emplace_back(schema); } @@ -420,7 +519,8 @@ Status SqliteMetaImpl::CreateTableFile(TableFileSchema &file_schema) { NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; - file_schema.size_ = 0; + file_schema.file_size_ = 0; + file_schema.row_count_ = 0; file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; @@ -450,7 +550,7 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::file_type_) @@ -464,7 +564,7 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); @@ -499,19 +599,16 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, MetricCollector metric; if (partition.empty()) { + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and - (c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::TO_INDEX or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::INDEX))); + in(&TableFileSchema::file_type_, file_type))); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -527,7 +624,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); table_file.dimension_ = table_schema.dimension_; @@ -540,20 +637,17 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, } } else { + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and - in(&TableFileSchema::date_, partition) and - (c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::TO_INDEX or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::INDEX))); + in(&TableFileSchema::date_, partition) and + in(&TableFileSchema::file_type_, file_type))); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -569,7 +663,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); table_file.dimension_ = table_schema.dimension_; @@ -601,7 +695,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_); @@ -643,7 +737,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); table_file.dimension_ = table_schema.dimension_; @@ -673,11 +767,11 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, &TableFileSchema::date_), where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW and c(&TableFileSchema::table_id_) == table_id), - order_by(&TableFileSchema::size_).desc()); + order_by(&TableFileSchema::file_size_).desc()); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -693,7 +787,7 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.file_size_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); @@ -718,7 +812,8 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, auto files = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and @@ -738,9 +833,10 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, file_schema.id_ = std::get<0>(file); file_schema.file_id_ = std::get<1>(file); file_schema.file_type_ = std::get<2>(file); - file_schema.size_ = std::get<3>(file); - file_schema.date_ = std::get<4>(file); - file_schema.engine_type_ = std::get<5>(file); + file_schema.file_size_ = std::get<3>(file); + file_schema.row_count_ = std::get<4>(file); + file_schema.date_ = std::get<5>(file); + file_schema.engine_type_ = std::get<6>(file); file_schema.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, file_schema); @@ -797,23 +893,17 @@ Status SqliteMetaImpl::Archive() { Status SqliteMetaImpl::Size(uint64_t &result) { result = 0; try { - auto files = ConnectorPtr->select(columns(&TableFileSchema::size_, - &TableFileSchema::file_type_, - &TableFileSchema::engine_type_), + auto selected = ConnectorPtr->select(columns(sum(&TableFileSchema::file_size_)), where( c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE )); - - for (auto &file : files) { - auto file_size = std::get<0>(file); - auto file_type = std::get<1>(file); - auto engine_type = std::get<2>(file); - if(file_type == (int)TableFileSchema::INDEX && engine_type == (int)EngineType::FAISS_IVFSQ8) { - result += (uint64_t)file_size/4;//hardcode for sq8 - } else { - result += (uint64_t)file_size; + for (auto &total_size : selected) { + if (!std::get<0>(total_size)) { + continue; } + result += (uint64_t) (*std::get<0>(total_size)); } + } catch (std::exception &e) { return HandleException("Encounter exception when calculte db size", e); } @@ -836,7 +926,7 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { auto commited = ConnectorPtr->transaction([&]() mutable { auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, - &TableFileSchema::size_), + &TableFileSchema::file_size_), where(c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE), order_by(&TableFileSchema::id_), @@ -848,11 +938,11 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { for (auto &file : selected) { if (to_discard_size <= 0) break; table_file.id_ = std::get<0>(file); - table_file.size_ = std::get<1>(file); + table_file.file_size_ = std::get<1>(file); ids.push_back(table_file.id_); ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_ - << " table_file.size=" << table_file.size_; - to_discard_size -= table_file.size_; + << " table_file.size=" << table_file.file_size_; + to_discard_size -= table_file.file_size_; } if (ids.size() == 0) { @@ -1059,12 +1149,8 @@ Status SqliteMetaImpl::CleanUp() { //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); - auto files = ConnectorPtr->select(columns(&TableFileSchema::id_), - where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_INDEX - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_MERGE)); + std::vector file_type = {(int) TableFileSchema::NEW, (int) TableFileSchema::NEW_INDEX, (int) TableFileSchema::NEW_MERGE}; + auto files = ConnectorPtr->select(columns(&TableFileSchema::id_), where(in(&TableFileSchema::file_type_, file_type))); auto commited = ConnectorPtr->transaction([&]() mutable { for (auto &file : files) { @@ -1091,11 +1177,9 @@ Status SqliteMetaImpl::Count(const std::string &table_id, uint64_t &result) { try { MetricCollector metric; - auto selected = ConnectorPtr->select(columns(&TableFileSchema::size_), - where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX - or c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX) + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; + auto selected = ConnectorPtr->select(columns(&TableFileSchema::row_count_), + where(in(&TableFileSchema::file_type_, file_type) and c(&TableFileSchema::table_id_) == table_id)); TableSchema table_schema; diff --git a/cpp/src/db/meta/SqliteMetaImpl.h b/cpp/src/db/meta/SqliteMetaImpl.h index 1525f27e4b..34808f202f 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.h +++ b/cpp/src/db/meta/SqliteMetaImpl.h @@ -51,6 +51,15 @@ class SqliteMetaImpl : public Meta { Status HasNonIndexFiles(const std::string &table_id, bool &has) override; + Status + UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) override; + + Status + DescribeTableIndex(const std::string &table_id, TableIndex& index) override; + + Status + DropTableIndex(const std::string &table_id) override; + Status UpdateTableFilesToIndex(const std::string &table_id) override; diff --git a/cpp/src/grpc/cpp_gen.sh b/cpp/src/grpc/cpp_gen.sh index c441783e7b..62b9d95728 100755 --- a/cpp/src/grpc/cpp_gen.sh +++ b/cpp/src/grpc/cpp_gen.sh @@ -1,9 +1,9 @@ #!/bin/bash -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-status --plugin=protoc-gen-grpc="/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" status.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-status --plugin=protoc-gen-grpc="../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" status.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-status status.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-status status.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-milvus --plugin=protoc-gen-grpc="/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" milvus.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-milvus --plugin=protoc-gen-grpc="../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" milvus.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-milvus milvus.proto \ No newline at end of file +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-milvus milvus.proto \ No newline at end of file diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.cc b/cpp/src/grpc/gen-milvus/milvus.pb.cc index 5ec8fddba0..c8ef38d07b 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.cc +++ b/cpp/src/grpc/gen-milvus/milvus.pb.cc @@ -365,9 +365,7 @@ const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_milvus_2eproto::offsets[] PROT ~0u, // no _oneof_case_ ~0u, // no _weak_field_map_ PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, table_name_), - PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, index_type_), PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, dimension_), - PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, store_raw_vector_), ~0u, // no _has_bits_ PROTOBUF_FIELD_OFFSET(::milvus::grpc::Range, _internal_metadata_), ~0u, // no _extensions_ @@ -481,21 +479,21 @@ const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_milvus_2eproto::offsets[] PROT static const ::PROTOBUF_NAMESPACE_ID::internal::MigrationSchema schemas[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = { { 0, -1, sizeof(::milvus::grpc::TableName)}, { 7, -1, sizeof(::milvus::grpc::TableSchema)}, - { 16, -1, sizeof(::milvus::grpc::Range)}, - { 23, -1, sizeof(::milvus::grpc::RowRecord)}, - { 29, -1, sizeof(::milvus::grpc::InsertParam)}, - { 37, -1, sizeof(::milvus::grpc::VectorIds)}, - { 44, -1, sizeof(::milvus::grpc::SearchParam)}, - { 54, -1, sizeof(::milvus::grpc::SearchInFilesParam)}, - { 61, -1, sizeof(::milvus::grpc::QueryResult)}, - { 68, -1, sizeof(::milvus::grpc::TopKQueryResult)}, - { 75, -1, sizeof(::milvus::grpc::StringReply)}, - { 82, -1, sizeof(::milvus::grpc::BoolReply)}, - { 89, -1, sizeof(::milvus::grpc::TableRowCount)}, - { 96, -1, sizeof(::milvus::grpc::Command)}, - { 102, -1, sizeof(::milvus::grpc::Index)}, - { 111, -1, sizeof(::milvus::grpc::IndexParam)}, - { 118, -1, sizeof(::milvus::grpc::DeleteByRangeParam)}, + { 14, -1, sizeof(::milvus::grpc::Range)}, + { 21, -1, sizeof(::milvus::grpc::RowRecord)}, + { 27, -1, sizeof(::milvus::grpc::InsertParam)}, + { 35, -1, sizeof(::milvus::grpc::VectorIds)}, + { 42, -1, sizeof(::milvus::grpc::SearchParam)}, + { 52, -1, sizeof(::milvus::grpc::SearchInFilesParam)}, + { 59, -1, sizeof(::milvus::grpc::QueryResult)}, + { 66, -1, sizeof(::milvus::grpc::TopKQueryResult)}, + { 73, -1, sizeof(::milvus::grpc::StringReply)}, + { 80, -1, sizeof(::milvus::grpc::BoolReply)}, + { 87, -1, sizeof(::milvus::grpc::TableRowCount)}, + { 94, -1, sizeof(::milvus::grpc::Command)}, + { 100, -1, sizeof(::milvus::grpc::Index)}, + { 109, -1, sizeof(::milvus::grpc::IndexParam)}, + { 116, -1, sizeof(::milvus::grpc::DeleteByRangeParam)}, }; static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = { @@ -521,65 +519,64 @@ static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = const char descriptor_table_protodef_milvus_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = "\n\014milvus.proto\022\013milvus.grpc\032\014status.prot" "o\"D\n\tTableName\022#\n\006status\030\001 \001(\0132\023.milvus." - "grpc.Status\022\022\n\ntable_name\030\002 \001(\t\"z\n\013Table" + "grpc.Status\022\022\n\ntable_name\030\002 \001(\t\"L\n\013Table" "Schema\022*\n\ntable_name\030\001 \001(\0132\026.milvus.grpc" - ".TableName\022\022\n\nindex_type\030\002 \001(\005\022\021\n\tdimens" - "ion\030\003 \001(\003\022\030\n\020store_raw_vector\030\004 \001(\010\"/\n\005R" - "ange\022\023\n\013start_value\030\001 \001(\t\022\021\n\tend_value\030\002" - " \001(\t\" \n\tRowRecord\022\023\n\013vector_data\030\001 \003(\002\"i" - "\n\013InsertParam\022\022\n\ntable_name\030\001 \001(\t\0220\n\020row" - "_record_array\030\002 \003(\0132\026.milvus.grpc.RowRec" - "ord\022\024\n\014row_id_array\030\003 \003(\003\"I\n\tVectorIds\022#" - "\n\006status\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017v" - "ector_id_array\030\002 \003(\003\"\242\001\n\013SearchParam\022\022\n\n" - "table_name\030\001 \001(\t\0222\n\022query_record_array\030\002" - " \003(\0132\026.milvus.grpc.RowRecord\022-\n\021query_ra" - "nge_array\030\003 \003(\0132\022.milvus.grpc.Range\022\014\n\004t" - "opk\030\004 \001(\003\022\016\n\006nprobe\030\005 \001(\003\"[\n\022SearchInFil" - "esParam\022\025\n\rfile_id_array\030\001 \003(\t\022.\n\014search" - "_param\030\002 \001(\0132\030.milvus.grpc.SearchParam\"+" - "\n\013QueryResult\022\n\n\002id\030\001 \001(\003\022\020\n\010distance\030\002 " - "\001(\001\"m\n\017TopKQueryResult\022#\n\006status\030\001 \001(\0132\023" - ".milvus.grpc.Status\0225\n\023query_result_arra" - "ys\030\002 \003(\0132\030.milvus.grpc.QueryResult\"H\n\013St" - "ringReply\022#\n\006status\030\001 \001(\0132\023.milvus.grpc." - "Status\022\024\n\014string_reply\030\002 \001(\t\"D\n\tBoolRepl" - "y\022#\n\006status\030\001 \001(\0132\023.milvus.grpc.Status\022\022" - "\n\nbool_reply\030\002 \001(\010\"M\n\rTableRowCount\022#\n\006s" - "tatus\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017tabl" - "e_row_count\030\002 \001(\003\"\026\n\007Command\022\013\n\003cmd\030\001 \001(" - "\t\"X\n\005Index\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030" - "\002 \001(\003\022\027\n\017index_file_size\030\003 \001(\005\022\023\n\013metric" - "_type\030\004 \001(\005\"[\n\nIndexParam\022*\n\ntable_name\030" - "\001 \001(\0132\026.milvus.grpc.TableName\022!\n\005index\030\002" - " \001(\0132\022.milvus.grpc.Index\"K\n\022DeleteByRang" - "eParam\022!\n\005range\030\001 \001(\0132\022.milvus.grpc.Rang" - "e\022\022\n\ntable_name\030\002 \001(\t2\352\007\n\rMilvusService\022" - ">\n\013CreateTable\022\030.milvus.grpc.TableSchema" - "\032\023.milvus.grpc.Status\"\000\022<\n\010HasTable\022\026.mi" - "lvus.grpc.TableName\032\026.milvus.grpc.BoolRe" - "ply\"\000\022:\n\tDropTable\022\026.milvus.grpc.TableNa" - "me\032\023.milvus.grpc.Status\"\000\022=\n\013CreateIndex" - "\022\027.milvus.grpc.IndexParam\032\023.milvus.grpc." - "Status\"\000\022<\n\006Insert\022\030.milvus.grpc.InsertP" - "aram\032\026.milvus.grpc.VectorIds\"\000\022D\n\006Search" - "\022\030.milvus.grpc.SearchParam\032\034.milvus.grpc" - ".TopKQueryResult\"\0000\001\022R\n\rSearchInFiles\022\037." - "milvus.grpc.SearchInFilesParam\032\034.milvus." - "grpc.TopKQueryResult\"\0000\001\022C\n\rDescribeTabl" - "e\022\026.milvus.grpc.TableName\032\030.milvus.grpc." - "TableSchema\"\000\022B\n\nCountTable\022\026.milvus.grp" - "c.TableName\032\032.milvus.grpc.TableRowCount\"" - "\000\022>\n\nShowTables\022\024.milvus.grpc.Command\032\026." - "milvus.grpc.TableName\"\0000\001\0227\n\003Cmd\022\024.milvu" - "s.grpc.Command\032\030.milvus.grpc.StringReply" - "\"\000\022G\n\rDeleteByRange\022\037.milvus.grpc.Delete" - "ByRangeParam\032\023.milvus.grpc.Status\"\000\022=\n\014P" - "reloadTable\022\026.milvus.grpc.TableName\032\023.mi" - "lvus.grpc.Status\"\000\022B\n\rDescribeIndex\022\026.mi" - "lvus.grpc.TableName\032\027.milvus.grpc.IndexP" - "aram\"\000\022:\n\tDropIndex\022\026.milvus.grpc.TableN" - "ame\032\023.milvus.grpc.Status\"\000b\006proto3" + ".TableName\022\021\n\tdimension\030\002 \001(\003\"/\n\005Range\022\023" + "\n\013start_value\030\001 \001(\t\022\021\n\tend_value\030\002 \001(\t\" " + "\n\tRowRecord\022\023\n\013vector_data\030\001 \003(\002\"i\n\013Inse" + "rtParam\022\022\n\ntable_name\030\001 \001(\t\0220\n\020row_recor" + "d_array\030\002 \003(\0132\026.milvus.grpc.RowRecord\022\024\n" + "\014row_id_array\030\003 \003(\003\"I\n\tVectorIds\022#\n\006stat" + "us\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017vector_" + "id_array\030\002 \003(\003\"\242\001\n\013SearchParam\022\022\n\ntable_" + "name\030\001 \001(\t\0222\n\022query_record_array\030\002 \003(\0132\026" + ".milvus.grpc.RowRecord\022-\n\021query_range_ar" + "ray\030\003 \003(\0132\022.milvus.grpc.Range\022\014\n\004topk\030\004 " + "\001(\003\022\016\n\006nprobe\030\005 \001(\003\"[\n\022SearchInFilesPara" + "m\022\025\n\rfile_id_array\030\001 \003(\t\022.\n\014search_param" + "\030\002 \001(\0132\030.milvus.grpc.SearchParam\"+\n\013Quer" + "yResult\022\n\n\002id\030\001 \001(\003\022\020\n\010distance\030\002 \001(\001\"m\n" + "\017TopKQueryResult\022#\n\006status\030\001 \001(\0132\023.milvu" + "s.grpc.Status\0225\n\023query_result_arrays\030\002 \003" + "(\0132\030.milvus.grpc.QueryResult\"H\n\013StringRe" + "ply\022#\n\006status\030\001 \001(\0132\023.milvus.grpc.Status" + "\022\024\n\014string_reply\030\002 \001(\t\"D\n\tBoolReply\022#\n\006s" + "tatus\030\001 \001(\0132\023.milvus.grpc.Status\022\022\n\nbool" + "_reply\030\002 \001(\010\"M\n\rTableRowCount\022#\n\006status\030" + "\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017table_row_" + "count\030\002 \001(\003\"\026\n\007Command\022\013\n\003cmd\030\001 \001(\t\"X\n\005I" + "ndex\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030\002 \001(\003\022" + "\027\n\017index_file_size\030\003 \001(\005\022\023\n\013metric_type\030" + "\004 \001(\005\"[\n\nIndexParam\022*\n\ntable_name\030\001 \001(\0132" + "\026.milvus.grpc.TableName\022!\n\005index\030\002 \001(\0132\022" + ".milvus.grpc.Index\"K\n\022DeleteByRangeParam" + "\022!\n\005range\030\001 \001(\0132\022.milvus.grpc.Range\022\022\n\nt" + "able_name\030\002 \001(\t2\352\007\n\rMilvusService\022>\n\013Cre" + "ateTable\022\030.milvus.grpc.TableSchema\032\023.mil" + "vus.grpc.Status\"\000\022<\n\010HasTable\022\026.milvus.g" + "rpc.TableName\032\026.milvus.grpc.BoolReply\"\000\022" + ":\n\tDropTable\022\026.milvus.grpc.TableName\032\023.m" + "ilvus.grpc.Status\"\000\022=\n\013CreateIndex\022\027.mil" + "vus.grpc.IndexParam\032\023.milvus.grpc.Status" + "\"\000\022<\n\006Insert\022\030.milvus.grpc.InsertParam\032\026" + ".milvus.grpc.VectorIds\"\000\022D\n\006Search\022\030.mil" + "vus.grpc.SearchParam\032\034.milvus.grpc.TopKQ" + "ueryResult\"\0000\001\022R\n\rSearchInFiles\022\037.milvus" + ".grpc.SearchInFilesParam\032\034.milvus.grpc.T" + "opKQueryResult\"\0000\001\022C\n\rDescribeTable\022\026.mi" + "lvus.grpc.TableName\032\030.milvus.grpc.TableS" + "chema\"\000\022B\n\nCountTable\022\026.milvus.grpc.Tabl" + "eName\032\032.milvus.grpc.TableRowCount\"\000\022>\n\nS" + "howTables\022\024.milvus.grpc.Command\032\026.milvus" + ".grpc.TableName\"\0000\001\0227\n\003Cmd\022\024.milvus.grpc" + ".Command\032\030.milvus.grpc.StringReply\"\000\022G\n\r" + "DeleteByRange\022\037.milvus.grpc.DeleteByRang" + "eParam\032\023.milvus.grpc.Status\"\000\022=\n\014Preload" + "Table\022\026.milvus.grpc.TableName\032\023.milvus.g" + "rpc.Status\"\000\022B\n\rDescribeIndex\022\026.milvus.g" + "rpc.TableName\032\027.milvus.grpc.IndexParam\"\000" + "\022:\n\tDropIndex\022\026.milvus.grpc.TableName\032\023." + "milvus.grpc.Status\"\000b\006proto3" ; static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_milvus_2eproto_deps[1] = { &::descriptor_table_status_2eproto, @@ -606,7 +603,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_mil static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_milvus_2eproto_once; static bool descriptor_table_milvus_2eproto_initialized = false; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_milvus_2eproto = { - &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2434, + &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2388, &descriptor_table_milvus_2eproto_once, descriptor_table_milvus_2eproto_sccs, descriptor_table_milvus_2eproto_deps, 17, 1, schemas, file_default_instances, TableStruct_milvus_2eproto::offsets, file_level_metadata_milvus_2eproto, 17, file_level_enum_descriptors_milvus_2eproto, file_level_service_descriptors_milvus_2eproto, @@ -981,17 +978,15 @@ TableSchema::TableSchema(const TableSchema& from) } else { table_name_ = nullptr; } - ::memcpy(&dimension_, &from.dimension_, - static_cast(reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&dimension_)) + sizeof(store_raw_vector_)); + dimension_ = from.dimension_; // @@protoc_insertion_point(copy_constructor:milvus.grpc.TableSchema) } void TableSchema::SharedCtor() { ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_TableSchema_milvus_2eproto.base); ::memset(&table_name_, 0, static_cast( - reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&table_name_)) + sizeof(store_raw_vector_)); + reinterpret_cast(&dimension_) - + reinterpret_cast(&table_name_)) + sizeof(dimension_)); } TableSchema::~TableSchema() { @@ -1022,9 +1017,7 @@ void TableSchema::Clear() { delete table_name_; } table_name_ = nullptr; - ::memset(&dimension_, 0, static_cast( - reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&dimension_)) + sizeof(store_raw_vector_)); + dimension_ = PROTOBUF_LONGLONG(0); _internal_metadata_.Clear(); } @@ -1043,27 +1036,13 @@ const char* TableSchema::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID CHK_(ptr); } else goto handle_unusual; continue; - // int32 index_type = 2; + // int64 dimension = 2; case 2: if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) { - index_type_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); - CHK_(ptr); - } else goto handle_unusual; - continue; - // int64 dimension = 3; - case 3: - if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24)) { dimension_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); CHK_(ptr); } else goto handle_unusual; continue; - // bool store_raw_vector = 4; - case 4: - if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 32)) { - store_raw_vector_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); - CHK_(ptr); - } else goto handle_unusual; - continue; default: { handle_unusual: if ((tag & 7) == 4 || tag == 0) { @@ -1105,23 +1084,10 @@ bool TableSchema::MergePartialFromCodedStream( break; } - // int32 index_type = 2; + // int64 dimension = 2; case 2: { if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) { - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - ::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>( - input, &index_type_))); - } else { - goto handle_unusual; - } - break; - } - - // int64 dimension = 3; - case 3: { - if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (24 & 0xFF)) { - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< ::PROTOBUF_NAMESPACE_ID::int64, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT64>( input, &dimension_))); @@ -1131,19 +1097,6 @@ bool TableSchema::MergePartialFromCodedStream( break; } - // bool store_raw_vector = 4; - case 4: { - if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (32 & 0xFF)) { - - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - bool, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_BOOL>( - input, &store_raw_vector_))); - } else { - goto handle_unusual; - } - break; - } - default: { handle_unusual: if (tag == 0) { @@ -1177,19 +1130,9 @@ void TableSchema::SerializeWithCachedSizes( 1, _Internal::table_name(this), output); } - // int32 index_type = 2; - if (this->index_type() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->index_type(), output); - } - - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(3, this->dimension(), output); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBool(4, this->store_raw_vector(), output); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(2, this->dimension(), output); } if (_internal_metadata_.have_unknown_fields()) { @@ -1212,19 +1155,9 @@ void TableSchema::SerializeWithCachedSizes( 1, _Internal::table_name(this), target); } - // int32 index_type = 2; - if (this->index_type() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->index_type(), target); - } - - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(3, this->dimension(), target); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(4, this->store_raw_vector(), target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(2, this->dimension(), target); } if (_internal_metadata_.have_unknown_fields()) { @@ -1255,25 +1188,13 @@ size_t TableSchema::ByteSizeLong() const { *table_name_); } - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( this->dimension()); } - // int32 index_type = 2; - if (this->index_type() != 0) { - total_size += 1 + - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( - this->index_type()); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - total_size += 1 + 1; - } - int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; @@ -1307,12 +1228,6 @@ void TableSchema::MergeFrom(const TableSchema& from) { if (from.dimension() != 0) { set_dimension(from.dimension()); } - if (from.index_type() != 0) { - set_index_type(from.index_type()); - } - if (from.store_raw_vector() != 0) { - set_store_raw_vector(from.store_raw_vector()); - } } void TableSchema::CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) { @@ -1338,8 +1253,6 @@ void TableSchema::InternalSwap(TableSchema* other) { _internal_metadata_.Swap(&other->_internal_metadata_); swap(table_name_, other->table_name_); swap(dimension_, other->dimension_); - swap(index_type_, other->index_type_); - swap(store_raw_vector_, other->store_raw_vector_); } ::PROTOBUF_NAMESPACE_ID::Metadata TableSchema::GetMetadata() const { diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.h b/cpp/src/grpc/gen-milvus/milvus.pb.h index d4c33b848a..ce34b264fc 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.h +++ b/cpp/src/grpc/gen-milvus/milvus.pb.h @@ -395,9 +395,7 @@ class TableSchema : enum : int { kTableNameFieldNumber = 1, - kDimensionFieldNumber = 3, - kIndexTypeFieldNumber = 2, - kStoreRawVectorFieldNumber = 4, + kDimensionFieldNumber = 2, }; // .milvus.grpc.TableName table_name = 1; bool has_table_name() const; @@ -407,21 +405,11 @@ class TableSchema : ::milvus::grpc::TableName* mutable_table_name(); void set_allocated_table_name(::milvus::grpc::TableName* table_name); - // int64 dimension = 3; + // int64 dimension = 2; void clear_dimension(); ::PROTOBUF_NAMESPACE_ID::int64 dimension() const; void set_dimension(::PROTOBUF_NAMESPACE_ID::int64 value); - // int32 index_type = 2; - void clear_index_type(); - ::PROTOBUF_NAMESPACE_ID::int32 index_type() const; - void set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value); - - // bool store_raw_vector = 4; - void clear_store_raw_vector(); - bool store_raw_vector() const; - void set_store_raw_vector(bool value); - // @@protoc_insertion_point(class_scope:milvus.grpc.TableSchema) private: class _Internal; @@ -429,8 +417,6 @@ class TableSchema : ::PROTOBUF_NAMESPACE_ID::internal::InternalMetadataWithArena _internal_metadata_; ::milvus::grpc::TableName* table_name_; ::PROTOBUF_NAMESPACE_ID::int64 dimension_; - ::PROTOBUF_NAMESPACE_ID::int32 index_type_; - bool store_raw_vector_; mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; friend struct ::TableStruct_milvus_2eproto; }; @@ -2820,21 +2806,7 @@ inline void TableSchema::set_allocated_table_name(::milvus::grpc::TableName* tab // @@protoc_insertion_point(field_set_allocated:milvus.grpc.TableSchema.table_name) } -// int32 index_type = 2; -inline void TableSchema::clear_index_type() { - index_type_ = 0; -} -inline ::PROTOBUF_NAMESPACE_ID::int32 TableSchema::index_type() const { - // @@protoc_insertion_point(field_get:milvus.grpc.TableSchema.index_type) - return index_type_; -} -inline void TableSchema::set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value) { - - index_type_ = value; - // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.index_type) -} - -// int64 dimension = 3; +// int64 dimension = 2; inline void TableSchema::clear_dimension() { dimension_ = PROTOBUF_LONGLONG(0); } @@ -2848,20 +2820,6 @@ inline void TableSchema::set_dimension(::PROTOBUF_NAMESPACE_ID::int64 value) { // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.dimension) } -// bool store_raw_vector = 4; -inline void TableSchema::clear_store_raw_vector() { - store_raw_vector_ = false; -} -inline bool TableSchema::store_raw_vector() const { - // @@protoc_insertion_point(field_get:milvus.grpc.TableSchema.store_raw_vector) - return store_raw_vector_; -} -inline void TableSchema::set_store_raw_vector(bool value) { - - store_raw_vector_ = value; - // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.store_raw_vector) -} - // ------------------------------------------------------------------- // Range diff --git a/cpp/src/grpc/milvus.proto b/cpp/src/grpc/milvus.proto index 59d74813d1..47209dc5a1 100644 --- a/cpp/src/grpc/milvus.proto +++ b/cpp/src/grpc/milvus.proto @@ -17,9 +17,7 @@ message TableName { */ message TableSchema { TableName table_name = 1; - int32 index_type = 2; - int64 dimension = 3; - bool store_raw_vector = 4; + int64 dimension = 2; } /** @@ -122,6 +120,8 @@ message Command { /** * @brief Index + * @index_type: 0-invalid, 1-idmap, 2-ivflat, 3-ivfsq8, 4-nsgmix + * @metric_type: 1-L2, 2-IP */ message Index { int32 index_type = 1; diff --git a/cpp/src/grpc/status.proto b/cpp/src/grpc/status.proto index 4e8dc15b74..f5b65c5f77 100644 --- a/cpp/src/grpc/status.proto +++ b/cpp/src/grpc/status.proto @@ -25,6 +25,8 @@ enum ErrorCode { CANNOT_DELETE_FOLDER = 19; CANNOT_DELETE_FILE = 20; BUILD_INDEX_ERROR = 21; + ILLEGAL_NLIST = 22; + ILLEGAL_METRIC_TYPE = 23; } message Status { diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 5225f2a97e..b4f7bf0922 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -34,9 +34,7 @@ namespace { void PrintTableSchema(const TableSchema& tb_schema) { BLOCK_SPLITER std::cout << "Table name: " << tb_schema.table_name << std::endl; - std::cout << "Table index type: " << (int)tb_schema.index_type << std::endl; std::cout << "Table dimension: " << tb_schema.dimension << std::endl; - std::cout << "Table store raw data: " << (tb_schema.store_raw_vector ? "true" : "false") << std::endl; BLOCK_SPLITER } @@ -88,16 +86,15 @@ namespace { } std::string GetTableName() { - static std::string s_id(CurrentTime()); - return "tbl_" + s_id; +// static std::string s_id(CurrentTime()); +// return "tbl_" + s_id; + return "test"; } TableSchema BuildTableSchema() { TableSchema tb_schema; tb_schema.table_name = TABLE_NAME; - tb_schema.index_type = IndexType::gpu_ivfflat; tb_schema.dimension = TABLE_DIMENSION; - tb_schema.store_raw_vector = true; return tb_schema; } @@ -276,9 +273,19 @@ ClientTest::Test(const std::string& address, const std::string& port) { } {//wait unit build index finish -// std::cout << "Wait until build all index done" << std::endl; -// Status stat = conn->CreateIndex(); -// std::cout << "BuildIndex function call status: " << stat.ToString() << std::endl; + std::cout << "Wait until create all index done" << std::endl; + IndexParam index; + index.table_name = TABLE_NAME; + index.index_type = IndexType::gpu_ivfflat; + index.nlist = 1000; + index.index_file_size = 1024; + index.metric_type = 1; + Status stat = conn->CreateIndex(index); + std::cout << "CreateIndex function call status: " << stat.ToString() << std::endl; + + IndexParam index2; + stat = conn->DescribeIndex(TABLE_NAME, index2); + std::cout << "DescribeIndex function call status: " << stat.ToString() << std::endl; } {//preload table @@ -290,6 +297,11 @@ ClientTest::Test(const std::string& address, const std::string& port) { DoSearch(conn, search_record_array, "Search after build index finish"); } + {//delete index + Status stat = conn->DropIndex(TABLE_NAME); + std::cout << "DropIndex function call status: " << stat.ToString() << std::endl; + } + {//delete table Status stat = conn->DropTable(TABLE_NAME); std::cout << "DeleteTable function call status: " << stat.ToString() << std::endl; diff --git a/cpp/src/sdk/grpc/ClientProxy.cpp b/cpp/src/sdk/grpc/ClientProxy.cpp index 1107da6eff..48c5159bb5 100644 --- a/cpp/src/sdk/grpc/ClientProxy.cpp +++ b/cpp/src/sdk/grpc/ClientProxy.cpp @@ -82,9 +82,7 @@ ClientProxy::CreateTable(const TableSchema ¶m) { try { ::milvus::grpc::TableSchema schema; schema.mutable_table_name()->set_table_name(param.table_name); - schema.set_index_type((int) param.index_type); schema.set_dimension(param.dimension); - schema.set_store_raw_vector(param.store_raw_vector); return client_ptr_->CreateTable(schema); } catch (std::exception &ex) { @@ -119,6 +117,10 @@ ClientProxy::CreateIndex(const IndexParam &index_param) { ::milvus::grpc::IndexParam grpc_index_param; grpc_index_param.mutable_table_name()->set_table_name( index_param.table_name); + grpc_index_param.mutable_index()->set_index_type((int32_t)index_param.index_type); + grpc_index_param.mutable_index()->set_nlist(index_param.nlist); + grpc_index_param.mutable_index()->set_index_file_size(index_param.index_file_size); + grpc_index_param.mutable_index()->set_metric_type(index_param.metric_type); return client_ptr_->CreateIndex(grpc_index_param); } catch (std::exception &ex) { @@ -269,9 +271,7 @@ ClientProxy::DescribeTable(const std::string &table_name, TableSchema &table_sch Status status = client_ptr_->DescribeTable(grpc_schema, table_name); table_schema.table_name = grpc_schema.table_name().table_name(); - table_schema.index_type = (IndexType) grpc_schema.index_type(); table_schema.dimension = grpc_schema.dimension(); - table_schema.store_raw_vector = grpc_schema.store_raw_vector(); return status; } catch (std::exception &ex) { @@ -345,14 +345,35 @@ ClientProxy::PreloadTable(const std::string &table_name) const { } } -IndexParam -ClientProxy::DescribeIndex(const std::string &table_name) const { +Status +ClientProxy::DescribeIndex(const std::string &table_name, IndexParam &index_param) const { + try { + ::milvus::grpc::TableName grpc_table_name; + grpc_table_name.set_table_name(table_name); + ::milvus::grpc::IndexParam grpc_index_param; + Status status = client_ptr_->DescribeIndex(grpc_table_name, grpc_index_param); + index_param.index_type = (IndexType)(grpc_index_param.mutable_index()->index_type()); + index_param.nlist = grpc_index_param.mutable_index()->nlist(); + index_param.index_file_size = grpc_index_param.mutable_index()->index_file_size(); + index_param.metric_type = grpc_index_param.mutable_index()->metric_type(); + return status; + + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to describe index: " + std::string(ex.what())); + } } Status ClientProxy::DropIndex(const std::string &table_name) const { - + try { + ::milvus::grpc::TableName grpc_table_name; + grpc_table_name.set_table_name(table_name); + Status status = client_ptr_->DropIndex(grpc_table_name); + return status; + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to drop index: " + std::string(ex.what())); + } } } diff --git a/cpp/src/sdk/grpc/ClientProxy.h b/cpp/src/sdk/grpc/ClientProxy.h index f6a39f0445..8a1d34d8e2 100644 --- a/cpp/src/sdk/grpc/ClientProxy.h +++ b/cpp/src/sdk/grpc/ClientProxy.h @@ -75,8 +75,8 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam - DescribeIndex(const std::string &table_name) const override; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam &index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/sdk/grpc/GrpcClient.cpp b/cpp/src/sdk/grpc/GrpcClient.cpp index 00894ea529..77478e5d7e 100644 --- a/cpp/src/sdk/grpc/GrpcClient.cpp +++ b/cpp/src/sdk/grpc/GrpcClient.cpp @@ -270,4 +270,44 @@ GrpcClient::Disconnect() { return Status::OK(); } +Status +GrpcClient::DeleteByRange(grpc::DeleteByRangeParam &delete_by_range_param) { + return Status::OK(); +} + +Status +GrpcClient::DescribeIndex(grpc::TableName &table_name, grpc::IndexParam &index_param) { + ClientContext context; + ::grpc::Status grpc_status = stub_->DescribeIndex(&context, table_name, &index_param); + + if (!grpc_status.ok()) { + std::cerr << "DescribeIndex rpc failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + if (index_param.mutable_table_name()->status().error_code() != grpc::SUCCESS) { + std::cerr << index_param.mutable_table_name()->status().reason() << std::endl; + return Status(StatusCode::ServerFailed, index_param.mutable_table_name()->status().reason()); + } + + return Status::OK(); +} + +Status +GrpcClient::DropIndex(grpc::TableName &table_name) { + ClientContext context; + ::milvus::grpc::Status response; + ::grpc::Status grpc_status = stub_->DropIndex(&context, table_name, &response); + + if (!grpc_status.ok()) { + std::cerr << "DropIndex gRPC failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + + if (response.error_code() != grpc::SUCCESS) { + std::cerr << response.reason() << std::endl; + return Status(StatusCode::ServerFailed, response.reason()); + } + return Status::OK(); +} + } \ No newline at end of file diff --git a/cpp/src/sdk/include/MilvusApi.h b/cpp/src/sdk/include/MilvusApi.h index cb261743e1..766bcbad1d 100644 --- a/cpp/src/sdk/include/MilvusApi.h +++ b/cpp/src/sdk/include/MilvusApi.h @@ -76,9 +76,10 @@ struct TopKQueryResult { */ struct IndexParam { std::string table_name; - int32_t index_type; - int64_t nlist; + IndexType index_type; + int32_t nlist; int32_t index_file_size; + int32_t metric_type; }; /** @@ -354,8 +355,8 @@ class Connection { * * @return index informations and indicate if this operation is successful. */ - virtual IndexParam - DescribeIndex(const std::string &table_name) const = 0; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam &index_param) const = 0; /** * @brief drop index diff --git a/cpp/src/sdk/interface/ConnectionImpl.cpp b/cpp/src/sdk/interface/ConnectionImpl.cpp index b496d1c104..355c06438c 100644 --- a/cpp/src/sdk/interface/ConnectionImpl.cpp +++ b/cpp/src/sdk/interface/ConnectionImpl.cpp @@ -125,14 +125,14 @@ ConnectionImpl::PreloadTable(const std::string &table_name) const { return client_proxy_->PreloadTable(table_name); } -IndexParam -ConnectionImpl::DescribeIndex(const std::string &table_name) const { - +Status +ConnectionImpl::DescribeIndex(const std::string &table_name, IndexParam& index_param) const { + return client_proxy_->DescribeIndex(table_name, index_param); } Status ConnectionImpl::DropIndex(const std::string &table_name) const { - + return client_proxy_->DropIndex(table_name); } } diff --git a/cpp/src/sdk/interface/ConnectionImpl.h b/cpp/src/sdk/interface/ConnectionImpl.h index 24be6060ba..d304736b65 100644 --- a/cpp/src/sdk/interface/ConnectionImpl.h +++ b/cpp/src/sdk/interface/ConnectionImpl.h @@ -81,8 +81,8 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam - DescribeIndex(const std::string &table_name) const override; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam& index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/sdk/thrift/ClientProxy.cpp b/cpp/src/sdk/thrift/ClientProxy.cpp index dff5b98eb6..c258aeefad 100644 --- a/cpp/src/sdk/thrift/ClientProxy.cpp +++ b/cpp/src/sdk/thrift/ClientProxy.cpp @@ -334,8 +334,7 @@ Status ClientProxy::PreloadTable(const std::string &table_name) const { return Status::OK(); } -IndexParam ClientProxy::DescribeIndex(const std::string &table_name) const { - IndexParam index_param; +Status ClientProxy::DescribeIndex(const std::string &table_name, IndexParam &index_param) const { index_param.table_name = table_name; return index_param; } diff --git a/cpp/src/sdk/thrift/ClientProxy.h b/cpp/src/sdk/thrift/ClientProxy.h index 60e070ba8c..27dafc92a6 100644 --- a/cpp/src/sdk/thrift/ClientProxy.h +++ b/cpp/src/sdk/thrift/ClientProxy.h @@ -55,7 +55,7 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam DescribeIndex(const std::string &table_name) const override; + virtual Status DescribeIndex(const std::string &table_name, IndexParam &index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp index 584023e4fc..ad1a0e3d71 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp @@ -187,14 +187,24 @@ GrpcRequestHandler::PreloadTable(::grpc::ServerContext *context, GrpcRequestHandler::DescribeIndex(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::IndexParam *response) { - + BaseTaskPtr task_ptr = DescribeIndexTask::Create(request->table_name(), *response); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->mutable_table_name()->mutable_status()->set_reason(grpc_status.reason()); + response->mutable_table_name()->mutable_status()->set_error_code(grpc_status.error_code()); + return ::grpc::Status::OK; } ::grpc::Status GrpcRequestHandler::DropIndex(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::Status *response) { - + BaseTaskPtr task_ptr = DropIndexTask::Create(request->table_name()); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->set_reason(grpc_status.reason()); + response->set_error_code(grpc_status.error_code()); + return ::grpc::Status::OK; } diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index 8934045579..10ca2b80ed 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -130,17 +130,10 @@ CreateTableTask::OnExecute() { return SetError(res, "Invalid table dimension: " + std::to_string(schema_.dimension())); } - res = ValidationUtil::ValidateTableIndexType(schema_.index_type()); - if (res != SERVER_SUCCESS) { - return SetError(res, "Invalid index type: " + std::to_string(schema_.index_type())); - } - //step 2: construct table schema engine::meta::TableSchema table_info; table_info.dimension_ = (uint16_t) schema_.dimension(); table_info.table_id_ = schema_.table_name().table_name(); - table_info.engine_type_ = (int) EngineType(schema_.index_type()); - table_info.store_raw_data_ = schema_.store_raw_vector(); //step 3: create table engine::Status stat = DBWrapper::DB()->CreateTable(table_info); @@ -190,10 +183,7 @@ DescribeTableTask::OnExecute() { } schema_.mutable_table_name()->set_table_name(table_info.table_id_); - - schema_.set_index_type(IndexType((engine::EngineType) table_info.engine_type_)); schema_.set_dimension(table_info.dimension_); - schema_.set_store_raw_vector(table_info.store_raw_data_); } catch (std::exception &ex) { return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); @@ -238,7 +228,12 @@ CreateIndexTask::OnExecute() { } //step 2: check table existence - stat = DBWrapper::DB()->BuildIndex(table_name_); + engine::TableIndex index; + index.engine_type_ = index_param_.mutable_index()->index_type(); + index.nlist = index_param_.mutable_index()->nlist(); + index.index_file_size = index_param_.mutable_index()->index_file_size(); + index.metric_type = index_param_.mutable_index()->metric_type(); + stat = DBWrapper::DB()->CreateIndex(table_name_, index); if (!stat.ok()) { return SetError(SERVER_BUILD_INDEX_ERROR, "Engine failed: " + stat.ToString()); } @@ -758,7 +753,89 @@ PreloadTableTask::OnExecute() { return SERVER_SUCCESS; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DescribeIndexTask::DescribeIndexTask(const std::string &table_name, + ::milvus::grpc::IndexParam &index_param) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + table_name_(table_name), + index_param_(index_param) { +} + +BaseTaskPtr +DescribeIndexTask::Create(const std::string &table_name, + ::milvus::grpc::IndexParam &index_param){ + return std::shared_ptr(new DescribeIndexTask(table_name, index_param)); +} + +ServerError +DescribeIndexTask::OnExecute() { + try { + TimeRecorder rc("DescribeIndexTask"); + + //step 1: check arguments + ServerError res = ValidationUtil::ValidateTableName(table_name_); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name_); + } + + //step 2: check table existence + engine::TableIndex index; + engine::Status stat = DBWrapper::DB()->DescribeIndex(table_name_, index); + if (!stat.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + index_param_.mutable_table_name()->set_table_name(table_name_); + index_param_.mutable_index()->set_index_type(index.engine_type_); + index_param_.mutable_index()->set_nlist(index.nlist); + index_param_.mutable_index()->set_index_file_size(index.index_file_size); + index_param_.mutable_index()->set_metric_type(index.metric_type); + + rc.ElapseFromBegin("totally cost"); + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DropIndexTask::DropIndexTask(const std::string &table_name) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + table_name_(table_name) { + +} + +BaseTaskPtr +DropIndexTask::Create(const std::string &table_name){ + return std::shared_ptr(new DropIndexTask(table_name)); +} + +ServerError +DropIndexTask::OnExecute() { + try { + TimeRecorder rc("DropIndexTask"); + + //step 1: check arguments + ServerError res = ValidationUtil::ValidateTableName(table_name_); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name_); + } + + //step 2: check table existence + engine::Status stat = DBWrapper::DB()->DropIndex(table_name_); + if (!stat.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + rc.ElapseFromBegin("totally cost"); + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} } } diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.h b/cpp/src/server/grpc_impl/GrpcRequestTask.h index 9a60064e49..e43b9fba60 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.h +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.h @@ -260,6 +260,9 @@ public: protected: DropIndexTask(const std::string &table_name); + ServerError + OnExecute() override; + private: std::string table_name_; diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 0e981f6ae4..4dffeb6db0 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -180,7 +180,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DISK) { for (auto i=0; i Date: Wed, 21 Aug 2019 18:35:57 +0800 Subject: [PATCH 2/2] merge 0.3.1 Former-commit-id: a538685c673319581c756289a4c003fa5833d829 --- ci/jenkinsfile/cluster_dev_test.groovy | 2 +- cpp/CHANGELOG.md | 5 +- cpp/build.sh | 2 +- cpp/cmake/ThirdPartyPackages.cmake | 26 +-- cpp/conf/server_config.template | 2 + cpp/src/config/YamlConfigMgr.cpp | 30 +-- cpp/src/config/YamlConfigMgr.h | 8 +- cpp/src/db/DBImpl.cpp | 6 +- cpp/src/db/Factories.cpp | 10 +- cpp/src/db/Factories.h | 2 +- cpp/src/db/Utils.cpp | 22 ++- cpp/src/db/Utils.h | 2 +- cpp/src/db/meta/MySQLConnectionPool.cpp | 14 +- cpp/src/db/meta/MySQLConnectionPool.h | 6 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 49 ++++- cpp/src/db/meta/SqliteMetaImpl.cpp | 25 ++- cpp/src/server/ServerConfig.h | 14 +- cpp/src/wrapper/FaissGpuResources.cpp | 38 ++++ cpp/src/wrapper/FaissGpuResources.h | 36 ++++ cpp/src/wrapper/Index.cpp | 29 ++- cpp/src/wrapper/Index.h | 1 - cpp/src/wrapper/IndexBuilder.cpp | 70 ++++--- cpp/src/wrapper/IndexBuilder.h | 2 + cpp/thirdparty/versions.txt | 2 +- cpp/unittest/db/db_tests.cpp | 6 + cpp/unittest/db/mem_test.cpp | 7 +- cpp/unittest/db/misc_test.cpp | 4 + cpp/unittest/db/mysql_meta_test.cpp | 30 ++- cpp/unittest/db/scheduler_test.cpp | 2 + cpp/unittest/db/search_test.cpp | 9 + cpp/unittest/db/utils.cpp | 9 +- cpp/unittest/faiss_wrapper/CMakeLists.txt | 56 ++++++ cpp/unittest/faiss_wrapper/wrapper_test.cpp | 203 ++++++++++++++++++++ cpp/unittest/server/config_test.cpp | 51 ++++- cpp/unittest/server/util_test.cpp | 9 + cpp/unittest/utils/ValidationUtilTest.cpp | 76 ++++++++ 36 files changed, 744 insertions(+), 121 deletions(-) create mode 100644 cpp/src/wrapper/FaissGpuResources.cpp create mode 100644 cpp/src/wrapper/FaissGpuResources.h create mode 100644 cpp/unittest/faiss_wrapper/CMakeLists.txt create mode 100644 cpp/unittest/faiss_wrapper/wrapper_test.cpp create mode 100644 cpp/unittest/utils/ValidationUtilTest.cpp diff --git a/ci/jenkinsfile/cluster_dev_test.groovy b/ci/jenkinsfile/cluster_dev_test.groovy index 2d8854ca71..4a15b926cf 100644 --- a/ci/jenkinsfile/cluster_dev_test.groovy +++ b/ci/jenkinsfile/cluster_dev_test.groovy @@ -1,4 +1,4 @@ -timeout(time: 10, unit: 'MINUTES') { +timeout(time: 25, unit: 'MINUTES') { try { dir ("${PROJECT_NAME}_test") { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index a7ddee104a..616aeafc48 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -61,6 +61,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-257 - Update bzip2 download url - MS-288 - Update compile scripts - MS-330 - Stability test failed caused by server core dumped +- MS-347 - Build index hangs again +- MS-382 - fix MySQLMetaImpl::CleanUpFilesWithTTL unknown column bug ## Improvement - MS-156 - Add unittest for merge result functions @@ -89,6 +91,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-324 - Show error when there is not enough gpu memory to build index - MS-328 - Check metric type on server start - MS-332 - Set grpc and thrift server run concurrently +- MS-352 - Add hybrid index ## New Feature - MS-180 - Add new mem manager @@ -148,8 +151,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-130 - Add prometheus_test - MS-144 - Add nprobe config - MS-147 - Enable IVF - - MS-130 - Add prometheus_test + ## Task - MS-74 - Change README.md in cpp - MS-88 - Add support for arm architecture diff --git a/cpp/build.sh b/cpp/build.sh index 7216296c69..500eac6c67 100755 --- a/cpp/build.sh +++ b/cpp/build.sh @@ -86,7 +86,7 @@ if [[ ! -d cmake_build ]]; then fi cd cmake_build - +git CUDA_COMPILER=/usr/local/cuda/bin/nvcc if [[ ${MAKE_CLEAN} == "ON" ]]; then diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index b48724588d..f9140b6d80 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -157,7 +157,6 @@ if (UNIX) endif (APPLE) endif (UNIX) - # ---------------------------------------------------------------------- # thirdparty directory set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") @@ -167,7 +166,7 @@ set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") if(NOT DEFINED USE_JFROG_CACHE) set(USE_JFROG_CACHE "OFF") endif() -if(USE_JFROG_CACHE STREQUAL "ON") +if(USE_JFROG_CACHE STREQUAL "ON") set(JFROG_ARTFACTORY_CACHE_URL "http://192.168.1.201:80/artifactory/generic-local/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") set(JFROG_USER_NAME "test") set(JFROG_PASSWORD "Fantast1c") @@ -308,9 +307,11 @@ set(EASYLOGGINGPP_MD5 "b78cd319db4be9b639927657b8aa7732") if(DEFINED ENV{MILVUS_FAISS_URL}) set(FAISS_SOURCE_URL "$ENV{MILVUS_FAISS_URL}") else() - set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") + set(FAISS_SOURCE_URL "http://192.168.1.105:6060/jinhai/faiss/-/archive/${FAISS_VERSION}/faiss-${FAISS_VERSION}.tar.gz") + # set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() -set(FAISS_MD5 "0bc12737b23def156f6a1eb782050135") + +set(FAISS_MD5 "a589663865a8558205533c8ac414278c") if(DEFINED ENV{MILVUS_KNOWHERE_URL}) set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") @@ -462,6 +463,7 @@ else() endif() set(GRPC_MD5 "7ec59ad54c85a12dcbbfede09bf413a9") + # ---------------------------------------------------------------------- # ARROW @@ -686,7 +688,7 @@ macro(build_bzip2) set(BZIP2_STATIC_LIB "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}") - if(USE_JFROG_CACHE STREQUAL "ON") + if(USE_JFROG_CACHE STREQUAL "ON") set(BZIP2_CACHE_PACKAGE_NAME "bzip2_${BZIP2_MD5}.tar.gz") set(BZIP2_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${BZIP2_CACHE_PACKAGE_NAME}") set(BZIP2_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${BZIP2_CACHE_PACKAGE_NAME}") @@ -1184,7 +1186,7 @@ macro(build_faiss) INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "openblas;lapack" ) endif() - + add_dependencies(faiss faiss_ep) if(${BUILD_FAISS_WITH_MKL} STREQUAL "OFF") @@ -1321,7 +1323,7 @@ if (MILVUS_BUILD_TESTS) if(NOT GTEST_VENDORED) endif() - + get_target_property(GTEST_INCLUDE_DIR gtest INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM "${GTEST_PREFIX}/lib") include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) @@ -1828,7 +1830,7 @@ endmacro() if(MILVUS_WITH_SNAPPY) resolve_dependency(Snappy) - + get_target_property(SNAPPY_INCLUDE_DIRS snappy INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${SNAPPY_PREFIX}/lib/) include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS}) @@ -2131,7 +2133,7 @@ endmacro() if(MILVUS_WITH_YAMLCPP) resolve_dependency(yaml-cpp) - + get_target_property(YAMLCPP_INCLUDE_DIR yaml-cpp INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${YAMLCPP_PREFIX}/lib/) include_directories(SYSTEM ${YAMLCPP_INCLUDE_DIR}) @@ -2203,7 +2205,7 @@ endmacro() if(MILVUS_WITH_ZLIB) resolve_dependency(ZLIB) - + get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) endif() @@ -2301,7 +2303,7 @@ endmacro() if(MILVUS_WITH_ZSTD) resolve_dependency(ZSTD) - + get_target_property(ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${ZSTD_PREFIX}/lib) include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) @@ -2406,7 +2408,7 @@ endmacro() if(MILVUS_WITH_AWS) resolve_dependency(AWS) - + link_directories(SYSTEM ${AWS_PREFIX}/lib) get_target_property(AWS_CPP_SDK_S3_INCLUDE_DIR aws-cpp-sdk-s3 INTERFACE_INCLUDE_DIRECTORIES) diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index c80e981bcd..037e55a0a8 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -45,3 +45,5 @@ engine_config: use_blas_threshold: 20 metric_type: L2 # compare vectors by euclidean distance(L2) or inner product(IP), optional: L2 or IP omp_thread_num: 0 # how many compute threads be used by engine, 0 means use all cpu core to compute + use_hybrid_index: false # use GPU/CPU hybrid index + hybrid_index_gpu: 0 # hybrid index gpu device id diff --git a/cpp/src/config/YamlConfigMgr.cpp b/cpp/src/config/YamlConfigMgr.cpp index 9a34ef3e63..ee935bf32c 100644 --- a/cpp/src/config/YamlConfigMgr.cpp +++ b/cpp/src/config/YamlConfigMgr.cpp @@ -73,19 +73,19 @@ YamlConfigMgr::SetChildConfig(const YAML::Node& node, return false; } -bool -YamlConfigMgr::SetSequence(const YAML::Node &node, - const std::string &child_name, - ConfigNode &config) { - if(node[child_name].IsDefined ()) { - size_t cnt = node[child_name].size(); - for(size_t i = 0; i < cnt; i++){ - config.AddSequenceItem(child_name, node[child_name][i].as()); - } - return true; - } - return false; -} +//bool +//YamlConfigMgr::SetSequence(const YAML::Node &node, +// const std::string &child_name, +// ConfigNode &config) { +// if(node[child_name].IsDefined ()) { +// size_t cnt = node[child_name].size(); +// for(size_t i = 0; i < cnt; i++){ +// config.AddSequenceItem(child_name, node[child_name][i].as()); +// } +// return true; +// } +// return false; +//} void YamlConfigMgr::LoadConfigNode(const YAML::Node& node, ConfigNode& config) { @@ -98,8 +98,8 @@ YamlConfigMgr::LoadConfigNode(const YAML::Node& node, ConfigNode& config) { SetConfigValue(node, key, config); } else if(node[key].IsMap()){ SetChildConfig(node, key, config); - } else if(node[key].IsSequence()){ - SetSequence(node, key, config); +// } else if(node[key].IsSequence()){ +// SetSequence(node, key, config); } } } diff --git a/cpp/src/config/YamlConfigMgr.h b/cpp/src/config/YamlConfigMgr.h index b8828b7a8c..05b55d9da5 100644 --- a/cpp/src/config/YamlConfigMgr.h +++ b/cpp/src/config/YamlConfigMgr.h @@ -33,10 +33,10 @@ class YamlConfigMgr : public IConfigMgr { const std::string &name, ConfigNode &config); - bool - SetSequence(const YAML::Node &node, - const std::string &child_name, - ConfigNode &config); +// bool +// SetSequence(const YAML::Node &node, +// const std::string &child_name, +// ConfigNode &config); void LoadConfigNode(const YAML::Node& node, ConfigNode& config); diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index a649257a0b..bccc6bbf92 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -60,6 +60,7 @@ void CollectQueryMetrics(double total_time, size_t nq) { server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); } +#if 0 void CollectFileMetrics(int file_type, size_t file_size, double total_time) { switch(file_type) { case meta::TableFileSchema::RAW: @@ -79,6 +80,7 @@ void CollectFileMetrics(int file_type, size_t file_size, double total_time) { } } } +#endif } @@ -205,7 +207,7 @@ Status DBImpl::Query(const std::string &table_id, uint64_t k, uint64_t nq, uint6 Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) { - ENGINE_LOG_DEBUG << "Query by vectors"; + ENGINE_LOG_DEBUG << "Query by vectors " << table_id; //get all table files from table meta::DatePartionedTableFilesSchema files; @@ -568,7 +570,7 @@ Status DBImpl::BuildIndex(const std::string& table_id) { int times = 1; while (has) { - ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; + ENGINE_LOG_DEBUG << "Non index files detected in " << table_id << "! Will build index " << times; meta_ptr_->UpdateTableFilesToIndex(table_id); /* StartBuildIndexTask(true); */ std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10*1000, times*100))); diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index bb1056e3c2..58883d5c7b 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -90,11 +90,11 @@ std::shared_ptr DBMetaImplFactory::Build(const DBMetaOptions& metaOp } } -std::shared_ptr DBFactory::Build() { - auto options = OptionsFactory::Build(); - auto db = DBFactory::Build(options); - return std::shared_ptr(db); -} +//std::shared_ptr DBFactory::Build() { +// auto options = OptionsFactory::Build(); +// auto db = DBFactory::Build(options); +// return std::shared_ptr(db); +//} DB* DBFactory::Build(const Options& options) { return new DBImpl(options); diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 0e6823c385..3c3479e512 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -33,7 +33,7 @@ struct DBMetaImplFactory { }; struct DBFactory { - static std::shared_ptr Build(); + //static std::shared_ptr Build(); static DB *Build(const Options &); }; diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp index 5a0d3cafa2..1a1355d507 100644 --- a/cpp/src/db/Utils.cpp +++ b/cpp/src/db/Utils.cpp @@ -85,16 +85,20 @@ Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id return Status::OK(); } -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id) { - std::string db_path = options.path; - std::string table_path = db_path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force) { + std::vector paths = options.slave_paths; + paths.push_back(options.path); - for(auto& path : options.slave_paths) { - table_path = path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + for(auto& path : paths) { + std::string table_path = path + TABLES_FOLDER + table_id; + if(force) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } else if(boost::filesystem::exists(table_path) && + boost::filesystem::is_empty(table_path)) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } } return Status::OK(); diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h index 47a8fca9b7..101d849ca3 100644 --- a/cpp/src/db/Utils.h +++ b/cpp/src/db/Utils.h @@ -19,7 +19,7 @@ namespace utils { long GetMicroSecTimeStamp(); Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id); -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id); +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force = true); Status CreateTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); Status GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); diff --git a/cpp/src/db/meta/MySQLConnectionPool.cpp b/cpp/src/db/meta/MySQLConnectionPool.cpp index b43126920e..8e82dc5ae7 100644 --- a/cpp/src/db/meta/MySQLConnectionPool.cpp +++ b/cpp/src/db/meta/MySQLConnectionPool.cpp @@ -30,13 +30,13 @@ namespace meta { } } - int MySQLConnectionPool::getConnectionsInUse() { - return conns_in_use_; - } - - void MySQLConnectionPool::set_max_idle_time(int max_idle) { - max_idle_time_ = max_idle; - } +// int MySQLConnectionPool::getConnectionsInUse() { +// return conns_in_use_; +// } +// +// void MySQLConnectionPool::set_max_idle_time(int max_idle) { +// max_idle_time_ = max_idle; +// } std::string MySQLConnectionPool::getDB() { return db_; diff --git a/cpp/src/db/meta/MySQLConnectionPool.h b/cpp/src/db/meta/MySQLConnectionPool.h index 62afd2ddbf..9cde818b45 100644 --- a/cpp/src/db/meta/MySQLConnectionPool.h +++ b/cpp/src/db/meta/MySQLConnectionPool.h @@ -44,9 +44,9 @@ public: // Other half of in-use conn count limit void release(const mysqlpp::Connection *pc) override; - int getConnectionsInUse(); - - void set_max_idle_time(int max_idle); +// int getConnectionsInUse(); +// +// void set_max_idle_time(int max_idle); std::string getDB(); diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index fa2697ec32..e38997b22c 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -1652,15 +1652,14 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { } Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { - - auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { @@ -1700,6 +1699,8 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.id_ << " location:" << table_file.location_; idsToDelete.emplace_back(std::to_string(table_file.id_)); + + table_ids.insert(table_file.table_id_); } if (!idsToDelete.empty()) { @@ -1734,12 +1735,11 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + //remove to_delete tables try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { @@ -1765,7 +1765,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { std::string table_id; resRow["table_id"].to_string(table_id); - utils::DeleteTablePath(options_, table_id); + utils::DeleteTablePath(options_, table_id, false);//only delete empty folder idsToDeleteSS << "id = " << std::to_string(id) << " OR "; } @@ -1794,6 +1794,41 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + try { + MetricCollector metric; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + for(auto& table_id : table_ids) { + Query cleanUpFilesWithTTLQuery = connectionPtr->query(); + cleanUpFilesWithTTLQuery << "SELECT file_id " << + "FROM TableFiles " << + "WHERE table_id = " << quote << table_id << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUpFilesWithTTL: " << cleanUpFilesWithTTLQuery.str(); + + StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); + + if (res.empty()) { + utils::DeleteTablePath(options_, table_id); + } + } + } + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN CLEANING UP FILES WITH TTL" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", er.what()); + } + return Status::OK(); } diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 9118eadd17..25f5dbfaf4 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -311,6 +311,7 @@ Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) has = true; int raw_count = 0, new_count = 0, new_merge_count = 0, new_index_count = 0, to_index_count = 0; + std::vector file_ids; for (auto &file : selected) { switch (std::get<1>(file)) { case (int) TableFileSchema::RAW: @@ -1069,6 +1070,9 @@ Status SqliteMetaImpl::UpdateTableFiles(TableFilesSchema &files) { Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; @@ -1098,6 +1102,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.file_id_ << " location:" << table_file.location_; ConnectorPtr->remove(table_file.id_); + table_ids.insert(table_file.table_id_); } return true; }); @@ -1111,6 +1116,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove to_delete tables try { MetricCollector metric; @@ -1123,7 +1129,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto commited = ConnectorPtr->transaction([&]() mutable { for (auto &table : tables) { - utils::DeleteTablePath(options_, std::get<1>(table)); + utils::DeleteTablePath(options_, std::get<1>(table), false);//only delete empty folder ConnectorPtr->remove(std::get<0>(table)); } @@ -1139,6 +1145,23 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove deleted table folder + //don't remove table folder until all its files has been deleted + try { + MetricCollector metric; + + for(auto& table_id : table_ids) { + auto selected = ConnectorPtr->select(columns(&TableFileSchema::file_id_), + where(c(&TableFileSchema::table_id_) == table_id)); + if(selected.size() == 0) { + utils::DeleteTablePath(options_, table_id); + } + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when delete table folder", e); + } + return Status::OK(); } diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 3236ea5cdc..49dc6f50fd 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -48,12 +48,14 @@ static const char* CONFIG_METRIC_COLLECTOR = "collector"; static const char* CONFIG_PROMETHEUS = "prometheus_config"; static const char* CONFIG_METRIC_PROMETHEUS_PORT = "port"; -static const char* CONFIG_ENGINE = "engine_config"; -static const char* CONFIG_NPROBE = "nprobe"; -static const char* CONFIG_NLIST = "nlist"; -static const char* CONFIG_DCBT = "use_blas_threshold"; -static const char* CONFIG_METRICTYPE = "metric_type"; -static const char* CONFIG_OMP_THREAD_NUM = "omp_thread_num"; +static const std::string CONFIG_ENGINE = "engine_config"; +static const std::string CONFIG_NPROBE = "nprobe"; +static const std::string CONFIG_NLIST = "nlist"; +static const std::string CONFIG_DCBT = "use_blas_threshold"; +static const std::string CONFIG_METRICTYPE = "metric_type"; +static const std::string CONFIG_OMP_THREAD_NUM = "omp_thread_num"; +static const std::string CONFIG_USE_HYBRID_INDEX = "use_hybrid_index"; +static const std::string CONFIG_HYBRID_INDEX_GPU = "hybrid_index_gpu"; class ServerConfig { public: diff --git a/cpp/src/wrapper/FaissGpuResources.cpp b/cpp/src/wrapper/FaissGpuResources.cpp new file mode 100644 index 0000000000..b4372f1a2c --- /dev/null +++ b/cpp/src/wrapper/FaissGpuResources.cpp @@ -0,0 +1,38 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "FaissGpuResources.h" +#include "map" + +namespace zilliz { +namespace milvus { +namespace engine { + +FaissGpuResources::Ptr& FaissGpuResources::GetGpuResources(int device_id) { + static std::map gpu_resources_map; + auto search = gpu_resources_map.find(device_id); + if (search != gpu_resources_map.end()) { + return gpu_resources_map[device_id]; + } else { + gpu_resources_map[device_id] = std::make_shared(); + return gpu_resources_map[device_id]; + } +} + +void FaissGpuResources::SelectGpu() { + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode server_config = config.GetConfig(CONFIG_SERVER); + gpu_num_ = server_config.GetInt32Value(server::CONFIG_GPU_INDEX, 0); +} + +int32_t FaissGpuResources::GetGpu() { + return gpu_num_; +} + +} +} +} \ No newline at end of file diff --git a/cpp/src/wrapper/FaissGpuResources.h b/cpp/src/wrapper/FaissGpuResources.h new file mode 100644 index 0000000000..45c011df85 --- /dev/null +++ b/cpp/src/wrapper/FaissGpuResources.h @@ -0,0 +1,36 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include "faiss/gpu/GpuResources.h" +#include "faiss/gpu/StandardGpuResources.h" + +#include "server/ServerConfig.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +class FaissGpuResources { + + public: + using Ptr = std::shared_ptr; + + static FaissGpuResources::Ptr& GetGpuResources(int device_id); + + void SelectGpu(); + + int32_t GetGpu(); + + FaissGpuResources() : gpu_num_(0) { SelectGpu(); } + + private: + int32_t gpu_num_; +}; + +} +} +} \ No newline at end of file diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index 4b10c1e686..6d2ca19449 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -7,16 +7,22 @@ #if 0 // TODO: maybe support static search #ifdef GPU_VERSION + #include "faiss/gpu/GpuAutoTune.h" #include "faiss/gpu/StandardGpuResources.h" #include "faiss/gpu/utils/DeviceUtils.h" + + #endif #include "Index.h" #include "faiss/index_io.h" #include "faiss/IndexIVF.h" #include "faiss/IVFlib.h" +#include "faiss/IndexScalarQuantizer.h" #include "server/ServerConfig.h" +#include "src/wrapper/FaissGpuResources.h" + namespace zilliz { namespace milvus { @@ -74,8 +80,27 @@ void write_index(const Index_ptr &index, const std::string &file_name) { Index_ptr read_index(const std::string &file_name) { std::shared_ptr raw_index = nullptr; - raw_index.reset(faiss::read_index(file_name.c_str())); - return std::make_shared(raw_index); + faiss::Index *cpu_index = faiss::read_index(file_name.c_str()); + + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode engine_config = config.GetConfig(server::CONFIG_ENGINE); + bool use_hybrid_index_ = engine_config.GetBoolValue(server::CONFIG_USE_HYBRID_INDEX, false); + + if (dynamic_cast(cpu_index) != nullptr && use_hybrid_index_) { + + int device_id = engine_config.GetInt32Value(server::CONFIG_HYBRID_INDEX_GPU, 0); + auto gpu_resources = engine::FaissGpuResources::GetGpuResources(device_id); + faiss::gpu::GpuClonerOptions clone_option; + clone_option.storeInCpu = true; + faiss::Index *gpu_index = faiss::gpu::index_cpu_to_gpu(gpu_resources.get(), device_id, cpu_index, &clone_option); + + delete cpu_index; + raw_index.reset(gpu_index); + return std::make_shared(raw_index); + } else { + raw_index.reset(cpu_index); + return std::make_shared(raw_index); + } } } diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index 1668059d11..d722b85330 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -83,7 +83,6 @@ void write_index(const Index_ptr &index, const std::string &file_name); extern Index_ptr read_index(const std::string &file_name); #endif - } } } diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp index 095341ecc7..e2b6971281 100644 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ b/cpp/src/wrapper/IndexBuilder.cpp @@ -17,41 +17,17 @@ #include #include - +#include "faiss/IndexScalarQuantizer.h" #include "server/ServerConfig.h" #include "IndexBuilder.h" +#include "FaissGpuResources.h" namespace zilliz { namespace milvus { namespace engine { -class GpuResources { - public: - static GpuResources &GetInstance() { - static GpuResources instance; - return instance; - } - - void SelectGpu() { - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode server_config = config.GetConfig(CONFIG_SERVER); - gpu_num = server_config.GetInt32Value(server::CONFIG_GPU_INDEX, 0); - } - - int32_t GetGpu() { - return gpu_num; - } - - private: - GpuResources() : gpu_num(0) { SelectGpu(); } - - private: - int32_t gpu_num; -}; - using std::vector; static std::mutex gpu_resource; @@ -59,6 +35,12 @@ static std::mutex cpu_resource; IndexBuilder::IndexBuilder(const Operand_ptr &opd) { opd_ = opd; + + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); + use_hybrid_index_ = engine_config.GetBoolValue(CONFIG_USE_HYBRID_INDEX, false); + hybrid_index_device_id_ = engine_config.GetInt32Value(server::CONFIG_HYBRID_INDEX_GPU, 0); } // Default: build use gpu @@ -76,14 +58,48 @@ Index_ptr IndexBuilder::build_all(const long &nb, faiss::Index *ori_index = faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type); std::lock_guard lk(gpu_resource); + +#ifdef UNITTEST_ONLY faiss::gpu::StandardGpuResources res; - auto device_index = faiss::gpu::index_cpu_to_gpu(&res, GpuResources::GetInstance().GetGpu(), ori_index); + int device_id = 0; + faiss::gpu::GpuClonerOptions clone_option; + clone_option.storeInCpu = use_hybrid_index_; + auto device_index = faiss::gpu::index_cpu_to_gpu(&res, device_id, ori_index, &clone_option); +#else + engine::FaissGpuResources res; + int device_id = res.GetGpu(); + auto gpu_resources = engine::FaissGpuResources::GetGpuResources(device_id); + faiss::gpu::GpuClonerOptions clone_option; + clone_option.storeInCpu = use_hybrid_index_; + auto device_index = faiss::gpu::index_cpu_to_gpu(gpu_resources.get(), device_id, ori_index, &clone_option); +#endif + if (!device_index->is_trained) { nt == 0 || xt == nullptr ? device_index->train(nb, xb) : device_index->train(nt, xt); } device_index->add_with_ids(nb, xb, ids); // TODO: support with add_with_IDMAP + if (dynamic_cast(ori_index) != nullptr + && use_hybrid_index_) { + std::shared_ptr device_hybrid_index = nullptr; + if (hybrid_index_device_id_ != device_id) { + auto host_hybrid_index = faiss::gpu::index_gpu_to_cpu(device_index); + auto hybrid_gpu_resources = engine::FaissGpuResources::GetGpuResources(hybrid_index_device_id_); + auto another_device_index = faiss::gpu::index_cpu_to_gpu(hybrid_gpu_resources.get(), + hybrid_index_device_id_, + host_hybrid_index, + &clone_option); + device_hybrid_index.reset(another_device_index); + delete device_index; + delete host_hybrid_index; + } else { + device_hybrid_index.reset(device_index); + } + delete ori_index; + return std::make_shared(device_hybrid_index); + } + host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index)); delete device_index; diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h index 4cb6de814b..2142df83ee 100644 --- a/cpp/src/wrapper/IndexBuilder.h +++ b/cpp/src/wrapper/IndexBuilder.h @@ -45,6 +45,8 @@ class IndexBuilder { protected: Operand_ptr opd_ = nullptr; + bool use_hybrid_index_; + int hybrid_index_device_id_; }; class BgCpuBuilder : public IndexBuilder { diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 40ca9378e4..e15f66c365 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -2,7 +2,7 @@ ARROW_VERSION=zilliz BOOST_VERSION=1.70.0 BZIP2_VERSION=1.0.6 EASYLOGGINGPP_VERSION=v9.96.7 -FAISS_VERSION=v1.5.3 +FAISS_VERSION=branch-0.1.0 MKL_VERSION=2019.4.243 GTEST_VERSION=1.8.1 JSONCONS_VERSION=0.126.0 diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8b36d2efbd..b6f052a5db 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -93,6 +93,7 @@ TEST_F(DBTest, CONFIG_TEST) { TEST_F(DBTest, DB_TEST) { + db_->Open(GetOptions(), &db_); engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); @@ -161,6 +162,11 @@ TEST_F(DBTest, DB_TEST) { } search.join(); + + uint64_t count; + stat = db_->GetTableRowCount(TABLE_NAME, count); + ASSERT_STATS(stat); + ASSERT_TRUE(count > 0); }; TEST_F(DBTest, SEARCH_TEST) { diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index e561837075..ffb688a23c 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -84,6 +84,7 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -198,6 +199,8 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { status = mem_table.Serialize(); ASSERT_TRUE(status.ok()); + + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -372,7 +375,6 @@ TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { delete db_; boost::filesystem::remove_all(options.meta.path); - }; TEST_F(DBTest, VECTOR_IDS_TEST) @@ -445,4 +447,5 @@ TEST_F(DBTest, VECTOR_IDS_TEST) for (auto i = 0; i < nb; i++) { ASSERT_EQ(vector_ids[i], i + nb); } -} \ No newline at end of file +} + diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp index 6433a224ff..608a5ca175 100644 --- a/cpp/unittest/db/misc_test.cpp +++ b/cpp/unittest/db/misc_test.cpp @@ -135,4 +135,8 @@ TEST(DBMiscTest, UTILS_TEST) { status = engine::utils::DeleteTablePath(options, TABLE_NAME); ASSERT_TRUE(status.ok()); + status = engine::utils::DeleteTableFilePath(options, file); + ASSERT_TRUE(status.ok()); + + } \ No newline at end of file diff --git a/cpp/unittest/db/mysql_meta_test.cpp b/cpp/unittest/db/mysql_meta_test.cpp index 80a9ddf4dd..7703ce15d5 100644 --- a/cpp/unittest/db/mysql_meta_test.cpp +++ b/cpp/unittest/db/mysql_meta_test.cpp @@ -57,7 +57,7 @@ TEST_F(DISABLED_MySQLTest, TABLE_TEST) { table.table_id_ = ""; status = impl.CreateTable(table); - ASSERT_TRUE(status.ok()); +// ASSERT_TRUE(status.ok()); status = impl.DropAll(); ASSERT_TRUE(status.ok()); @@ -82,16 +82,22 @@ TEST_F(DISABLED_MySQLTest, TABLE_FILE_TEST) { table.dimension_ = 256; auto status = impl.CreateTable(table); + meta::TableFileSchema table_file; table_file.table_id_ = table.table_id_; status = impl.CreateTableFile(table_file); ASSERT_TRUE(status.ok()); ASSERT_EQ(table_file.file_type_, meta::TableFileSchema::NEW); + meta::DatesT dates; + dates.push_back(meta::Meta::GetDate()); + status = impl.DropPartitionsByDates(table_file.table_id_, dates); + ASSERT_FALSE(status.ok()); + uint64_t cnt = 0; status = impl.Count(table_id, cnt); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(cnt, 0UL); +// ASSERT_TRUE(status.ok()); +// ASSERT_EQ(cnt, 0UL); auto file_id = table_file.file_id_; @@ -102,11 +108,6 @@ TEST_F(DISABLED_MySQLTest, TABLE_FILE_TEST) { ASSERT_TRUE(status.ok()); ASSERT_EQ(table_file.file_type_, new_file_type); - meta::DatesT dates; - dates.push_back(meta::Meta::GetDate()); - status = impl.DropPartitionsByDates(table_file.table_id_, dates); - ASSERT_FALSE(status.ok()); - dates.clear(); for (auto i=2; i < 10; ++i) { dates.push_back(meta::Meta::GetDateWithDelta(-1*i)); @@ -132,6 +133,8 @@ TEST_F(DISABLED_MySQLTest, TABLE_FILE_TEST) { ASSERT_EQ(files.size(), 1UL); ASSERT_TRUE(files[0].file_type_ == meta::TableFileSchema::TO_DELETE); +// status = impl.NextTableId(table_id); + status = impl.DropAll(); ASSERT_TRUE(status.ok()); } @@ -194,6 +197,13 @@ TEST_F(DISABLED_MySQLTest, ARCHIVE_TEST_DAYS) { i++; } + bool has; + status = impl.HasNonIndexFiles(table_id, has); + ASSERT_TRUE(status.ok()); + + status = impl.UpdateTableFilesToIndex(table_id); + ASSERT_TRUE(status.ok()); + status = impl.DropAll(); ASSERT_TRUE(status.ok()); } @@ -216,6 +226,10 @@ TEST_F(DISABLED_MySQLTest, ARCHIVE_TEST_DISK) { table.table_id_ = table_id; auto status = impl.CreateTable(table); + meta::TableSchema table_schema; + table_schema.table_id_ = ""; + status = impl.CreateTable(table_schema); + meta::TableFilesSchema files; meta::TableFileSchema table_file; table_file.table_id_ = table.table_id_; diff --git a/cpp/unittest/db/scheduler_test.cpp b/cpp/unittest/db/scheduler_test.cpp index 0937ef197a..6b3ad3dbac 100644 --- a/cpp/unittest/db/scheduler_test.cpp +++ b/cpp/unittest/db/scheduler_test.cpp @@ -56,6 +56,8 @@ TEST(DBSchedulerTest, TASK_QUEUE_TEST) { ptr = queue.Back(); ASSERT_EQ(ptr->type(), engine::ScheduleTaskType::kIndexLoad); + load_task->Execute(); + } TEST(DBSchedulerTest, SEARCH_SCHEDULER_TEST) { diff --git a/cpp/unittest/db/search_test.cpp b/cpp/unittest/db/search_test.cpp index 340fa82f20..64e03f65ae 100644 --- a/cpp/unittest/db/search_test.cpp +++ b/cpp/unittest/db/search_test.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include "db/scheduler/task/SearchTask.h" +#include "server/ServerConfig.h" #include "utils/TimeRecorder.h" #include @@ -213,6 +214,10 @@ TEST(DBSearchTest, MERGE_TEST) { } TEST(DBSearchTest, PARALLEL_CLUSTER_TEST) { + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& db_config = config.GetConfig(server::CONFIG_DB); + db_config.SetValue(server::CONFIG_DB_PARALLEL_REDUCE, "true"); + bool ascending = true; std::vector target_ids; std::vector target_distence; @@ -245,6 +250,10 @@ TEST(DBSearchTest, PARALLEL_CLUSTER_TEST) { } TEST(DBSearchTest, PARALLEL_TOPK_TEST) { + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& db_config = config.GetConfig(server::CONFIG_DB); + db_config.SetValue(server::CONFIG_DB_PARALLEL_REDUCE, "true"); + std::vector target_ids; std::vector target_distence; engine::SearchContext::ResultSet src_result; diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 405b48a602..cfac3ea6e7 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -91,9 +91,10 @@ zilliz::milvus::engine::DBMetaOptions DISABLED_MySQLTest::getDBMetaOptions() { zilliz::milvus::engine::DBMetaOptions options; options.path = "/tmp/milvus_test"; options.backend_uri = DBTestEnvironment::getURI(); - + if(options.backend_uri.empty()) { - throw std::exception(); +// throw std::exception(); + options.backend_uri = "mysql://root:Fantast1c@192.168.1.194:3306/"; } return options; @@ -123,6 +124,10 @@ int main(int argc, char **argv) { if (argc > 1) { uri = argv[1]; } + +// if(uri.empty()) { +// uri = "mysql://root:Fantast1c@192.168.1.194:3306/"; +// } // std::cout << uri << std::endl; ::testing::AddGlobalTestEnvironment(new DBTestEnvironment); return RUN_ALL_TESTS(); diff --git a/cpp/unittest/faiss_wrapper/CMakeLists.txt b/cpp/unittest/faiss_wrapper/CMakeLists.txt new file mode 100644 index 0000000000..10f353f00d --- /dev/null +++ b/cpp/unittest/faiss_wrapper/CMakeLists.txt @@ -0,0 +1,56 @@ +#------------------------------------------------------------------------------- +# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +# Unauthorized copying of this file, via any medium is strictly prohibited. +# Proprietary and confidential. +#------------------------------------------------------------------------------- +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) + +set(util_files + ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp) + +# Make sure that your call to link_directories takes place before your call to the relevant add_executable. +include_directories(/usr/local/cuda/include) +link_directories("/usr/local/cuda/lib64") + +set(wrapper_test_src + ${unittest_srcs} + ${wrapper_src} + ${config_files} + ${util_files} + ${require_files} + wrapper_test.cpp + ) + +add_executable(wrapper_test ${wrapper_test_src}) + +set(wrapper_libs + stdc++ + boost_system_static + boost_filesystem_static + faiss + cudart + cublas + sqlite + snappy + bz2 + z + zstd + lz4 + ) +if(${BUILD_FAISS_WITH_MKL} STREQUAL "ON") + set(wrapper_libs ${wrapper_libs} ${MKL_LIBS} ${MKL_LIBS}) +else() + set(wrapper_libs ${wrapper_libs} + lapack + openblas) +endif() + +target_link_libraries(wrapper_test ${wrapper_libs} ${unittest_libs}) +add_definitions("-DUNITTEST_ONLY") + +set(topk_test_src + topk_test.cpp + ${CMAKE_SOURCE_DIR}/src/wrapper/gpu/Topk.cu) + +install(TARGETS wrapper_test DESTINATION bin) diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp new file mode 100644 index 0000000000..3500166c6b --- /dev/null +++ b/cpp/unittest/faiss_wrapper/wrapper_test.cpp @@ -0,0 +1,203 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + + + +#include "wrapper/Operand.h" +#include "wrapper/Index.h" +#include "wrapper/IndexBuilder.h" +#include "wrapper/FaissGpuResources.h" +#include "server/ServerConfig.h" + +#include +#include +#include + +using namespace zilliz::milvus; +using namespace zilliz::milvus::engine; + + +TEST(operand_test, Wrapper_Test) { + using std::cout; + using std::endl; + + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->preproc = "OPQ"; + opd->postproc = "PQ"; + opd->metric_type = "L2"; + opd->d = 64; + + auto opd_str = operand_to_str(opd); + auto new_opd = str_to_operand(opd_str); + + // TODO: fix all place where using opd to build index. + assert(new_opd->get_index_type(10000) == opd->get_index_type(10000)); + auto opd_sq8 = std::make_shared(); + opd_sq8->index_type = "IVFSQ8"; + opd_sq8->preproc = "OPQ"; + opd_sq8->postproc = "PQ"; + opd_sq8->metric_type = "L2"; + opd_sq8->d = 64; + auto opd_str_sq8 = operand_to_str(opd_sq8); + auto new_opd_sq8 = str_to_operand(opd_str_sq8); + assert(new_opd_sq8->get_index_type(10000) == opd_sq8->get_index_type(10000)); + +} + +TEST(build_test, Wrapper_Test) { + // dimension of the vectors to index + int d = 3; + + // make a set of nt training vectors in the unit cube + size_t nt = 10000; + + // a reasonable number of cetroids to index nb vectors + int ncentroids = 16; + + std::random_device rd; + std::mt19937 gen(rd()); + + std::vector xb; + std::vector ids; + + //prepare train data + std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + std::vector xt(nt * d); + for (size_t i = 0; i < nt * d; i++) { + xt[i] = dis_xt(gen); + } + + //train the index + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->d = d; + opd->ncent = ncentroids; + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(0, xb, ids, nt, xt); + ASSERT_TRUE(index_1 != nullptr); + + // size of the database we plan to index + size_t nb = 100000; + + //prepare raw data + xb.resize(nb); + ids.resize(nb); + for (size_t i = 0; i < nb; i++) { + xb[i] = dis_xt(gen); + ids[i] = i; + } + index_1->add_with_ids(nb, xb.data(), ids.data()); + + //search in first quadrant + int nq = 1, k = 10; + std::vector xq = {0.5, 0.5, 0.5}; + float *result_dists = new float[k]; + long *result_ids = new long[k]; + index_1->search(nq, xq.data(), k, result_dists, result_ids); + + for (int i = 0; i < k; i++) { + if (result_ids[i] < 0) { + ASSERT_TRUE(false); + break; + } + + long id = result_ids[i]; + std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", " + << xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl; + + //makesure result vector is in first quadrant + ASSERT_TRUE(xb[id * 3] > 0.0); + ASSERT_TRUE(xb[id * 3 + 1] > 0.0); + ASSERT_TRUE(xb[id * 3 + 2] > 0.0); + } + + delete[] result_dists; + delete[] result_ids; +} + +TEST(gpu_build_test, Wrapper_Test) { + using std::vector; + + int d = 256; + int nb = 3 * 1000 * 100; + int nq = 100; + vector xb(d * nb); + vector xq(d * nq); + vector ids(nb); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + for (auto &e : xb) { e = float(dis_xt(gen)); } + for (auto &e : xq) { e = float(dis_xt(gen)); } + for (int i = 0; i < nb; ++i) { ids[i] = i; } + + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->d = d; + opd->ncent = 256; + + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data()); + assert(index_1->ntotal == nb); + assert(index_1->dim == d); + + // sanity check: search 5 first vectors of xb + int k = 1; + vector I(5 * k); + vector D(5 * k); + index_1->search(5, xb.data(), k, D.data(), I.data()); + for (int i = 0; i < 5; ++i) { assert(i == I[i]); } +} + +TEST(gpu_resource_test, Wrapper_Test) { + FaissGpuResources res_mgr; + FaissGpuResources::Ptr& res = res_mgr.GetGpuResources(0); + ASSERT_NE(res, nullptr); + res = res_mgr.GetGpuResources(0); + ASSERT_NE(res, nullptr); + + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& server_config = config.GetConfig(server::CONFIG_SERVER); + server_config.SetValue(server::CONFIG_GPU_INDEX, "0"); + res_mgr.SelectGpu(); + int32_t gpu_num = res_mgr.GetGpu(); + ASSERT_EQ(gpu_num, 0); +} + +TEST(index_test, Wrapper_Test) { + std::vector data; + std::vector ids; + long vec_count = 10000; + for(long i = 0; i < vec_count; i++) { + data.push_back(i/3); + data.push_back(i/9); + ids.push_back(i); + } + + faiss::Index* faiss_index = faiss::index_factory(2, "IVF128,SQ8"); + faiss_index->train(vec_count, data.data()); + + std::shared_ptr raw_index(faiss_index); + engine::Index_ptr index = std::make_shared(raw_index); + index->add_with_ids(vec_count, data.data(), ids.data()); + + ASSERT_EQ(index->ntotal, vec_count); + + std::string file_name = "/tmp/index_test.t"; + write_index(index, file_name); + + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& engine_config = config.GetConfig(server::CONFIG_ENGINE); + engine_config.SetValue(server::CONFIG_USE_HYBRID_INDEX, "true"); + + Index_ptr index_out = read_index(file_name); + ASSERT_NE(index_out, nullptr); + + bool res = index_out->reset(); + ASSERT_TRUE(res); +} diff --git a/cpp/unittest/server/config_test.cpp b/cpp/unittest/server/config_test.cpp index 2172bdd977..462b813f26 100644 --- a/cpp/unittest/server/config_test.cpp +++ b/cpp/unittest/server/config_test.cpp @@ -4,9 +4,12 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include +#include #include "config/IConfigMgr.h" #include "server/ServerConfig.h" +#include "utils/CommonUtil.h" +#include "utils/ValidationUtil.h" using namespace zilliz::milvus; @@ -15,6 +18,10 @@ namespace { static const char* CONFIG_FILE_PATH = "./milvus/conf/server_config.yaml"; static const char* LOG_FILE_PATH = "./milvus/conf/log_config.conf"; +static constexpr uint64_t KB = 1024; +static constexpr uint64_t MB = KB*1024; +static constexpr uint64_t GB = MB*1024; + } TEST(ConfigTest, CONFIG_TEST) { @@ -87,6 +94,9 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { server::ServerError err = config.LoadConfigFile(CONFIG_FILE_PATH); ASSERT_EQ(err, server::SERVER_SUCCESS); + err = server::ServerConfig::GetInstance().ValidateConfig(); + ASSERT_EQ(err, server::SERVER_SUCCESS); + server::ConfigNode node1 = config.GetConfig("server_config"); server::ConfigNode& node2 = config.GetConfig("cache_config"); node1.Combine(node2); @@ -100,6 +110,43 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { config.PrintAll(); - const server::ServerConfig const_config = config; - server::ConfigNode node = const_config.GetConfig("aaa"); + unsigned long total_mem = 0, free_mem = 0; + server::CommonUtil::GetSystemMemInfo(total_mem, free_mem); + + size_t gpu_mem = 0; + server::ValidationUtil::GetGpuMemory(0, gpu_mem); + + server::ConfigNode& server_config = config.GetConfig("server_config"); + server::ConfigNode& db_config = config.GetConfig("db_config"); + server::ConfigNode& cache_config = config.GetConfig(server::CONFIG_CACHE); + cache_config.SetValue(server::CACHE_FREE_PERCENT, "2.0"); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + size_t cache_cap = 16; + size_t insert_buffer_size = (total_mem - cache_cap*GB + 1*GB)/GB; + db_config.SetValue(server::CONFIG_DB_INSERT_BUFFER_SIZE, std::to_string(insert_buffer_size)); + cache_config.SetValue(server::CONFIG_CPU_CACHE_CAPACITY, std::to_string(cache_cap)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + cache_cap = total_mem/GB + 2; + cache_config.SetValue(server::CONFIG_CPU_CACHE_CAPACITY, std::to_string(cache_cap)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + size_t index_building_threshold = (gpu_mem + 1*MB)/MB; + db_config.SetValue(server::CONFIG_DB_INDEX_TRIGGER_SIZE, + std::to_string(index_building_threshold)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + insert_buffer_size = total_mem/GB + 2; + db_config.SetValue(server::CONFIG_DB_INSERT_BUFFER_SIZE, std::to_string(insert_buffer_size)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + server_config.SetValue(server::CONFIG_GPU_INDEX, "9999"); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); } \ No newline at end of file diff --git a/cpp/unittest/server/util_test.cpp b/cpp/unittest/server/util_test.cpp index c3a47182ee..60f7875d37 100644 --- a/cpp/unittest/server/util_test.cpp +++ b/cpp/unittest/server/util_test.cpp @@ -204,3 +204,12 @@ TEST(UtilTest, VALIDATE_INDEXTYPE_TEST) { ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType((int)engine::EngineType::MAX_VALUE + 1), server::SERVER_INVALID_INDEX_TYPE); } +TEST(UtilTest, TIMERECORDER_TEST) { + for(int64_t log_level = 0; log_level <= 6; log_level++) { + if(log_level == 5) { + continue; //skip fatal + } + server::TimeRecorder rc("time", log_level); + rc.RecordSection("end"); + } +} diff --git a/cpp/unittest/utils/ValidationUtilTest.cpp b/cpp/unittest/utils/ValidationUtilTest.cpp new file mode 100644 index 0000000000..35b8b94e23 --- /dev/null +++ b/cpp/unittest/utils/ValidationUtilTest.cpp @@ -0,0 +1,76 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include + +#include "utils/ValidationUtil.h" +#include "utils/Error.h" +#include "db/ExecutionEngine.h" + +#include + +using namespace zilliz::milvus; +using namespace zilliz::milvus::server; + +TEST(ValidationUtilTest, TableNameTest) { + std::string table_name = "Normal123_"; + ServerError res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "12sds"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = ""; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "_asdasd"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "!@#!@"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "_!@#!@"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "中文"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + + table_name = std::string(10000, 'a'); + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); +} + + +TEST(ValidationUtilTest, TableDimensionTest) { + ASSERT_EQ(ValidationUtil::ValidateTableDimension(-1), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(0), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(16385), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(16384), SERVER_SUCCESS); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(1), SERVER_SUCCESS); +} + +TEST(ValidationUtilTest, TableIndexTypeTest) { + ASSERT_EQ(ValidationUtil::ValidateTableIndexType((int)engine::EngineType::INVALID), SERVER_INVALID_INDEX_TYPE); + for(int i = 1; i <= (int)engine::EngineType::MAX_VALUE; i++) { + ASSERT_EQ(ValidationUtil::ValidateTableIndexType(i), SERVER_SUCCESS); + } + ASSERT_EQ(ValidationUtil::ValidateTableIndexType((int)engine::EngineType::MAX_VALUE + 1), SERVER_INVALID_INDEX_TYPE); +} + +TEST(ValidationUtilTest, ValidateGpuTest) { + ASSERT_EQ(ValidationUtil::ValidateGpuIndex(0), SERVER_SUCCESS); + ASSERT_NE(ValidationUtil::ValidateGpuIndex(100), SERVER_SUCCESS); + + size_t memory = 0; + ASSERT_EQ(ValidationUtil::GetGpuMemory(0, memory), SERVER_SUCCESS); + ASSERT_NE(ValidationUtil::GetGpuMemory(100, memory), SERVER_SUCCESS); +}