From 5b26d194f39f664869971e05abfe25918ffe14a9 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 16 Aug 2019 10:20:21 +0800 Subject: [PATCH 01/22] add unittest Former-commit-id: 89139a24caab971fa16b27b814ca4731efd0ea20 --- cpp/src/db/insert/MemManagerImpl.cpp | 6 ++++-- cpp/src/db/insert/MemTable.cpp | 2 +- cpp/src/db/insert/MemTable.h | 2 +- cpp/src/db/insert/VectorSource.cpp | 6 +++--- cpp/src/db/insert/VectorSource.h | 2 +- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 11 +++++++++-- cpp/unittest/metrics/prometheus_test.cpp | 3 +++ 7 files changed, 22 insertions(+), 10 deletions(-) diff --git a/cpp/src/db/insert/MemManagerImpl.cpp b/cpp/src/db/insert/MemManagerImpl.cpp index bd71f874da..7c0110e56b 100644 --- a/cpp/src/db/insert/MemManagerImpl.cpp +++ b/cpp/src/db/insert/MemManagerImpl.cpp @@ -42,9 +42,11 @@ Status MemManagerImpl::InsertVectorsNoLock(const std::string &table_id, MemTablePtr mem = GetMemByTable(table_id); VectorSource::Ptr source = std::make_shared(n, vectors); - auto status = mem->Add(source); + auto status = mem->Add(source, vector_ids); if (status.ok()) { - vector_ids = source->GetVectorIds(); + if (vector_ids.empty()) { + vector_ids = source->GetVectorIds(); + } } return status; } diff --git a/cpp/src/db/insert/MemTable.cpp b/cpp/src/db/insert/MemTable.cpp index 38206e25fd..ca63c02ad9 100644 --- a/cpp/src/db/insert/MemTable.cpp +++ b/cpp/src/db/insert/MemTable.cpp @@ -15,7 +15,7 @@ MemTable::MemTable(const std::string &table_id, } -Status MemTable::Add(VectorSource::Ptr &source) { +Status MemTable::Add(VectorSource::Ptr &source, IDNumbers &vector_ids) { while (!source->AllAdded()) { diff --git a/cpp/src/db/insert/MemTable.h b/cpp/src/db/insert/MemTable.h index 4f0cdb7d2a..7b2d93ffe8 100644 --- a/cpp/src/db/insert/MemTable.h +++ b/cpp/src/db/insert/MemTable.h @@ -21,7 +21,7 @@ class MemTable { MemTable(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(VectorSource::Ptr &source); + Status Add(VectorSource::Ptr &source, IDNumbers &vector_ids); void GetCurrentMemTableFile(MemTableFile::Ptr &mem_table_file); diff --git a/cpp/src/db/insert/VectorSource.cpp b/cpp/src/db/insert/VectorSource.cpp index f36eeb09bd..5a24d261af 100644 --- a/cpp/src/db/insert/VectorSource.cpp +++ b/cpp/src/db/insert/VectorSource.cpp @@ -12,9 +12,9 @@ namespace engine { VectorSource::VectorSource(const size_t &n, const float *vectors) : - n_(n), - vectors_(vectors), - id_generator_(new SimpleIDGenerator()) { + n_(n), + vectors_(vectors), + id_generator_(std::make_shared()) { current_num_vectors_added = 0; } diff --git a/cpp/src/db/insert/VectorSource.h b/cpp/src/db/insert/VectorSource.h index 9792772d80..3f7e4e8f5e 100644 --- a/cpp/src/db/insert/VectorSource.h +++ b/cpp/src/db/insert/VectorSource.h @@ -37,7 +37,7 @@ class VectorSource { size_t current_num_vectors_added; - IDGenerator *id_generator_; + std::shared_ptr id_generator_; }; //VectorSource diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index 6d2842a34b..73f38528cf 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -381,9 +381,9 @@ InsertTask::InsertTask(const ::milvus::grpc::InsertParam &insert_param, } BaseTaskPtr -InsertTask::Create(const ::milvus::grpc::InsertParam &insert_infos, +InsertTask::Create(const ::milvus::grpc::InsertParam &insert_param, ::milvus::grpc::VectorIds &record_ids) { - return std::shared_ptr(new InsertTask(insert_infos, record_ids)); + return std::shared_ptr(new InsertTask(insert_param, record_ids)); } ServerError @@ -400,6 +400,13 @@ InsertTask::OnExecute() { return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record array is empty"); } + if (!record_ids_.vector_id_array().empty()) { + if (record_ids_.vector_id_array().size() != insert_param_.row_record_array_size()) { + return SetError(SERVER_ILLEGAL_VECTOR_ID, + "Size of vector ids is not equal to row record array size"); + } + } + //step 2: check table existence engine::meta::TableSchema table_info; table_info.table_id_ = insert_param_.table_name(); diff --git a/cpp/unittest/metrics/prometheus_test.cpp b/cpp/unittest/metrics/prometheus_test.cpp index 521e00fc5c..004e58a5fc 100644 --- a/cpp/unittest/metrics/prometheus_test.cpp +++ b/cpp/unittest/metrics/prometheus_test.cpp @@ -54,4 +54,7 @@ TEST(PrometheusTest, PROMETHEUS_TEST){ instance.ConnectionGaugeDecrement(); instance.KeepingAliveCounterIncrement(); instance.OctetsSet(); + instance.CPUCoreUsagePercentSet(); + instance.GPUTemperature(); + instance.CPUTemperature(); } \ No newline at end of file From 9c6c7c31ec8a227f42a9d8cd6a78557e01ee1797 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Tue, 20 Aug 2019 15:58:46 +0800 Subject: [PATCH 02/22] add vector_ids in Insert Former-commit-id: d793fa40cfa71e910e34998e3962df61de6f40f3 --- cpp/src/db/insert/MemTable.cpp | 4 +- cpp/src/db/insert/MemTableFile.cpp | 4 +- cpp/src/db/insert/MemTableFile.h | 2 +- cpp/src/db/insert/VectorSource.cpp | 12 ++- cpp/src/db/insert/VectorSource.h | 3 +- .../examples/grpcsimple/src/ClientTest.cpp | 62 +++---------- cpp/src/sdk/grpc/ClientProxy.cpp | 19 ++-- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 5 +- cpp/unittest/db/mem_test.cpp | 89 ++++++++++++++++--- 9 files changed, 122 insertions(+), 78 deletions(-) diff --git a/cpp/src/db/insert/MemTable.cpp b/cpp/src/db/insert/MemTable.cpp index ca63c02ad9..ff9c25e3e7 100644 --- a/cpp/src/db/insert/MemTable.cpp +++ b/cpp/src/db/insert/MemTable.cpp @@ -27,12 +27,12 @@ Status MemTable::Add(VectorSource::Ptr &source, IDNumbers &vector_ids) { Status status; if (mem_table_file_list_.empty() || current_mem_table_file->IsFull()) { MemTableFile::Ptr new_mem_table_file = std::make_shared(table_id_, meta_, options_); - status = new_mem_table_file->Add(source); + status = new_mem_table_file->Add(source, vector_ids); if (status.ok()) { mem_table_file_list_.emplace_back(new_mem_table_file); } } else { - status = current_mem_table_file->Add(source); + status = current_mem_table_file->Add(source, vector_ids); } if (!status.ok()) { diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index 1d7053ab5a..326658df5f 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -41,7 +41,7 @@ Status MemTableFile::CreateTableFile() { return status; } -Status MemTableFile::Add(const VectorSource::Ptr &source) { +Status MemTableFile::Add(const VectorSource::Ptr &source, IDNumbers& vector_ids) { if (table_file_schema_.dimension_ <= 0) { std::string err_msg = "MemTableFile::Add: table_file_schema dimension = " + @@ -55,7 +55,7 @@ Status MemTableFile::Add(const VectorSource::Ptr &source) { if (mem_left >= single_vector_mem_size) { size_t num_vectors_to_add = std::ceil(mem_left / single_vector_mem_size); size_t num_vectors_added; - auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added); + auto status = source->Add(execution_engine_, table_file_schema_, num_vectors_to_add, num_vectors_added, vector_ids); if (status.ok()) { current_mem_ += (num_vectors_added * single_vector_mem_size); } diff --git a/cpp/src/db/insert/MemTableFile.h b/cpp/src/db/insert/MemTableFile.h index b582152299..d754b03071 100644 --- a/cpp/src/db/insert/MemTableFile.h +++ b/cpp/src/db/insert/MemTableFile.h @@ -19,7 +19,7 @@ class MemTableFile { MemTableFile(const std::string &table_id, const std::shared_ptr &meta, const Options &options); - Status Add(const VectorSource::Ptr &source); + Status Add(const VectorSource::Ptr &source, IDNumbers& vector_ids); size_t GetCurrentMem(); diff --git a/cpp/src/db/insert/VectorSource.cpp b/cpp/src/db/insert/VectorSource.cpp index 5a24d261af..27385b4b23 100644 --- a/cpp/src/db/insert/VectorSource.cpp +++ b/cpp/src/db/insert/VectorSource.cpp @@ -21,14 +21,22 @@ VectorSource::VectorSource(const size_t &n, Status VectorSource::Add(const ExecutionEnginePtr &execution_engine, const meta::TableFileSchema &table_file_schema, const size_t &num_vectors_to_add, - size_t &num_vectors_added) { + size_t &num_vectors_added, + IDNumbers &vector_ids) { auto start_time = METRICS_NOW_TIME; num_vectors_added = current_num_vectors_added + num_vectors_to_add <= n_ ? num_vectors_to_add : n_ - current_num_vectors_added; IDNumbers vector_ids_to_add; - id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); + if (vector_ids.empty()) { + id_generator_->GetNextIDNumbers(num_vectors_added, vector_ids_to_add); + } else { + vector_ids_to_add.resize(num_vectors_added); + for (int pos = current_num_vectors_added; pos < current_num_vectors_added + num_vectors_added; pos++) { + vector_ids_to_add[pos-current_num_vectors_added] = vector_ids[pos]; + } + } Status status = execution_engine->AddWithIds(num_vectors_added, vectors_ + current_num_vectors_added * table_file_schema.dimension_, vector_ids_to_add.data()); diff --git a/cpp/src/db/insert/VectorSource.h b/cpp/src/db/insert/VectorSource.h index 3f7e4e8f5e..4c350c78bc 100644 --- a/cpp/src/db/insert/VectorSource.h +++ b/cpp/src/db/insert/VectorSource.h @@ -21,7 +21,8 @@ class VectorSource { Status Add(const ExecutionEnginePtr &execution_engine, const meta::TableFileSchema &table_file_schema, const size_t &num_vectors_to_add, - size_t &num_vectors_added); + size_t &num_vectors_added, + IDNumbers &vector_ids); size_t GetNumVectorsAdded(); diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 583a917897..5225f2a97e 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -15,6 +15,8 @@ using namespace milvus; +//#define SET_VECTOR_IDS; + namespace { std::string GetTableName(); @@ -211,9 +213,9 @@ ClientTest::Test(const std::string& address, const std::string& port) { std::cout << "All tables: " << std::endl; for(auto& table : tables) { int64_t row_count = 0; -// conn->DropTable(table); - stat = conn->CountTable(table, row_count); - std::cout << "\t" << table << "(" << row_count << " rows)" << std::endl; + conn->DropTable(table); +// stat = conn->CountTable(table, row_count); +// std::cout << "\t" << table << "(" << row_count << " rows)" << std::endl; } } @@ -235,59 +237,21 @@ ClientTest::Test(const std::string& address, const std::string& port) { std::cout << "DescribeTable function call status: " << stat.ToString() << std::endl; PrintTableSchema(tb_schema); } -// -// Connection::Destroy(conn); - -// pid_t pid; -// for (int i = 0; i < 5; ++i) { -// pid = fork(); -// if (pid == 0 || pid == -1) { -// break; -// } -// } -// if (pid == -1) { -// std::cout << "fail to fork!\n"; -// exit(1); -// } else if (pid == 0) { -// std::shared_ptr conn = Connection::Create(); -// -// {//connect server -// ConnectParam param = {address, port}; -// Status stat = conn->Connect(param); -// std::cout << "Connect function call status: " << stat.ToString() << std::endl; -// } -// -// {//server version -// std::string version = conn->ServerVersion(); -// std::cout << "Server version: " << version << std::endl; -// } -// Connection::Destroy(conn); -// exit(0); -// } else { -// std::shared_ptr conn = Connection::Create(); -// -// {//connect server -// ConnectParam param = {address, port}; -// Status stat = conn->Connect(param); -// std::cout << "Connect function call status: " << stat.ToString() << std::endl; -// } -// -// {//server version -// std::string version = conn->ServerVersion(); -// std::cout << "Server version: " << version << std::endl; -// } -// Connection::Destroy(conn); -// std::cout << "in main process\n"; -// exit(0); -// } std::vector> search_record_array; {//insert vectors + std::vector record_ids; for (int i = 0; i < ADD_VECTOR_LOOP; i++) {//add vectors std::vector record_array; int64_t begin_index = i * BATCH_ROW_COUNT; BuildVectors(begin_index, begin_index + BATCH_ROW_COUNT, record_array); - std::vector record_ids; + +#ifdef SET_VECTOR_IDS + record_ids.resize(ADD_VECTOR_LOOP * BATCH_ROW_COUNT); + for (auto j = begin_index; j Insert(vector_ids, insert_param, status); - auto finish = std::chrono::high_resolution_clock::now(); - - for (size_t i = 0; i < vector_ids.vector_id_array_size(); i++) { - id_array.push_back(vector_ids.vector_id_array(i)); + ::milvus::grpc::VectorIds vector_ids; + if (!id_array.empty()) { + for (auto i = 0; i < id_array.size(); i++) { + insert_param.add_row_id_array(id_array[i]); + } + client_ptr_->Insert(vector_ids, insert_param, status); + } else { + client_ptr_->Insert(vector_ids, insert_param, status); + for (size_t i = 0; i < vector_ids.vector_id_array_size(); i++) { + id_array.push_back(vector_ids.vector_id_array(i)); + } } + #endif } catch (std::exception &ex) { diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index 73f38528cf..8934045579 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -453,7 +453,10 @@ InsertTask::OnExecute() { //step 4: insert vectors auto vec_count = (uint64_t) insert_param_.row_record_array_size(); - std::vector vec_ids(record_ids_.vector_id_array_size(), 0); + std::vector vec_ids(insert_param_.row_id_array_size(), 0); + for (auto i = 0; i < insert_param_.row_id_array_size(); i++) { + vec_ids[i] = insert_param_.row_id_array(i); + } stat = DBWrapper::DB()->InsertVectors(insert_param_.table_name(), vec_count, vec_f.data(), vec_ids); diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 1976822e76..0f8d2b65e0 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -68,15 +68,15 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_, table_file_schema.location_, (engine::EngineType) table_file_schema.engine_type_); - status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added); + engine::IDNumbers vector_ids; + status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added, vector_ids); ASSERT_TRUE(status.ok()); - + vector_ids = source.GetVectorIds(); + ASSERT_EQ(vector_ids.size(), 50); ASSERT_EQ(num_vectors_added, 50); - engine::IDNumbers vector_ids = source.GetVectorIds(); - ASSERT_EQ(vector_ids.size(), 50); - - status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added); + vector_ids.clear(); + status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added, vector_ids); ASSERT_TRUE(status.ok()); ASSERT_EQ(num_vectors_added, 50); @@ -105,12 +105,13 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { engine::VectorSource::Ptr source = std::make_shared(n_100, vectors_100.data()); - status = mem_table_file.Add(source); + engine::IDNumbers vector_ids; + status = mem_table_file.Add(source, vector_ids); ASSERT_TRUE(status.ok()); // std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl; - engine::IDNumbers vector_ids = source->GetVectorIds(); + vector_ids = source->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); size_t singleVectorMem = sizeof(float) * TABLE_DIM; @@ -121,7 +122,8 @@ TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) { BuildVectors(n_max, vectors_128M); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = mem_table_file.Add(source_128M); + vector_ids.clear(); + status = mem_table_file.Add(source_128M, vector_ids); vector_ids = source_128M->GetVectorIds(); ASSERT_EQ(vector_ids.size(), n_max - n_100); @@ -149,9 +151,10 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::MemTable mem_table(TABLE_NAME, impl_, options); - status = mem_table.Add(source_100); + engine::IDNumbers vector_ids; + status = mem_table.Add(source_100, vector_ids); ASSERT_TRUE(status.ok()); - engine::IDNumbers vector_ids = source_100->GetVectorIds(); + vector_ids = source_100->GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); engine::MemTableFile::Ptr mem_table_file; @@ -163,8 +166,9 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { std::vector vectors_128M; BuildVectors(n_max, vectors_128M); + vector_ids.clear(); engine::VectorSource::Ptr source_128M = std::make_shared(n_max, vectors_128M.data()); - status = mem_table.Add(source_128M); + status = mem_table.Add(source_128M, vector_ids); ASSERT_TRUE(status.ok()); vector_ids = source_128M->GetVectorIds(); @@ -181,7 +185,8 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { engine::VectorSource::Ptr source_1G = std::make_shared(n_1G, vectors_1G.data()); - status = mem_table.Add(source_1G); + vector_ids.clear(); + status = mem_table.Add(source_1G, vector_ids); ASSERT_TRUE(status.ok()); vector_ids = source_1G->GetVectorIds(); @@ -370,3 +375,61 @@ TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { }; +TEST_F(DBTest, VECTOR_IDS_TEST) +{ + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + engine::IDNumbers vector_ids; + + + int64_t nb = 100000; + std::vector xb; + BuildVectors(nb, xb); + + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i; + } + + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_EQ(vector_ids[0], 0); + ASSERT_STATS(stat); + + nb = 25000; + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i + nb; + } + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_EQ(vector_ids[0], nb); + ASSERT_STATS(stat); + + nb = 262144; //512M + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i + nb / 2; + } + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_EQ(vector_ids[0], nb/2); + ASSERT_STATS(stat); + + nb = 65536; //128M + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_STATS(stat); +} \ No newline at end of file From c1e82cb749431ed3a739cba527da1959532b5035 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Tue, 20 Aug 2019 16:49:04 +0800 Subject: [PATCH 03/22] add vector ids check in unittest Former-commit-id: 26c60e303c5ede2200c77efee11b17a782d0dc46 --- cpp/unittest/db/mem_test.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index 0f8d2b65e0..e561837075 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -432,4 +432,17 @@ TEST_F(DBTest, VECTOR_IDS_TEST) vector_ids.clear(); stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); ASSERT_STATS(stat); + + nb = 100; + xb.clear(); + BuildVectors(nb, xb); + vector_ids.clear(); + vector_ids.resize(nb); + for (auto i = 0; i < nb; i++) { + vector_ids[i] = i + nb; + } + stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + for (auto i = 0; i < nb; i++) { + ASSERT_EQ(vector_ids[i], i + nb); + } } \ No newline at end of file From 223cf1f5478f014240c6fbcb191eb564c28eaf4b Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 21 Aug 2019 16:53:31 +0800 Subject: [PATCH 04/22] MS-338 NewAPI: refine code to support CreateIndex Former-commit-id: b3a60c3de2da5522b2b9becd8ba382825a1c1e05 --- cpp/src/db/DB.h | 3 + cpp/src/db/DBImpl.cpp | 98 ++++--- cpp/src/db/DBImpl.h | 8 +- cpp/src/db/Types.h | 9 + cpp/src/db/Utils.cpp | 7 + cpp/src/db/Utils.h | 3 + cpp/src/db/engine/ExecutionEngine.h | 5 + cpp/src/db/insert/MemTableFile.cpp | 2 +- cpp/src/db/meta/Meta.h | 10 + cpp/src/db/meta/MetaTypes.h | 23 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 91 +++--- cpp/src/db/meta/MySQLMetaImpl.h | 6 + cpp/src/db/meta/SqliteMetaImpl.cpp | 262 ++++++++++++------ cpp/src/db/meta/SqliteMetaImpl.h | 9 + cpp/src/grpc/cpp_gen.sh | 8 +- cpp/src/grpc/gen-milvus/milvus.pb.cc | 255 ++++++----------- cpp/src/grpc/gen-milvus/milvus.pb.h | 48 +--- cpp/src/grpc/milvus.proto | 6 +- cpp/src/grpc/status.proto | 2 + .../examples/grpcsimple/src/ClientTest.cpp | 30 +- cpp/src/sdk/grpc/ClientProxy.cpp | 35 ++- cpp/src/sdk/grpc/ClientProxy.h | 4 +- cpp/src/sdk/grpc/GrpcClient.cpp | 40 +++ cpp/src/sdk/include/MilvusApi.h | 9 +- cpp/src/sdk/interface/ConnectionImpl.cpp | 8 +- cpp/src/sdk/interface/ConnectionImpl.h | 4 +- cpp/src/sdk/thrift/ClientProxy.cpp | 3 +- cpp/src/sdk/thrift/ClientProxy.h | 2 +- .../server/grpc_impl/GrpcRequestHandler.cpp | 14 +- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 99 ++++++- cpp/src/server/grpc_impl/GrpcRequestTask.h | 3 + cpp/unittest/db/meta_tests.cpp | 2 +- cpp/unittest/db/mysql_meta_test.cpp | 2 +- 33 files changed, 663 insertions(+), 447 deletions(-) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 282e762717..b143f0c233 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -46,6 +46,9 @@ public: virtual Status Size(uint64_t& result) = 0; virtual Status BuildIndex(const std::string& table_id) = 0; + virtual Status CreateIndex(const std::string& table_id, const TableIndex& index) = 0; + virtual Status DescribeIndex(const std::string& table_id, TableIndex& index) = 0; + virtual Status DropIndex(const std::string& table_id) = 0; virtual Status DropAll() = 0; diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index ddee622669..a649257a0b 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -6,6 +6,7 @@ #include "DBImpl.h" #include "src/db/meta/SqliteMetaImpl.h" #include "Log.h" +#include "Utils.h" #include "engine/EngineFactory.h" #include "Factories.h" #include "metrics/Metrics.h" @@ -470,7 +471,8 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, } else { table_file.file_type_ = meta::TableFileSchema::RAW; } - table_file.size_ = index_size; + table_file.file_size_ = index->PhysicalSize(); + table_file.row_count_ = index->Count(); updated.push_back(table_file); status = meta_ptr_->UpdateTableFiles(updated); ENGINE_LOG_DEBUG << "New merged file " << table_file.file_id_ << @@ -574,7 +576,58 @@ Status DBImpl::BuildIndex(const std::string& table_id) { times++; } return Status::OK(); - /* return BuildIndexByTable(table_id); */ +} + +Status DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) { + { + std::unique_lock lock(build_index_mutex_); + + //step 1: check index difference + TableIndex old_index; + auto status = DescribeIndex(table_id, old_index); + if(!status.ok()) { + ENGINE_LOG_ERROR << "Failed to get table index info"; + return status; + } + + if(utils::IsSameIndex(old_index, index)) { + ENGINE_LOG_DEBUG << "Same index setting, no need to create index again"; + return Status::OK(); + } + + //step 2: drop old index files + DropIndex(table_id); + + //step 3: update index info + + status = meta_ptr_->UpdateTableIndexParam(table_id, index); + if (!status.ok()) { + ENGINE_LOG_ERROR << "Failed to update table index info"; + return status; + } + } + + bool has = false; + auto status = meta_ptr_->HasNonIndexFiles(table_id, has); + int times = 1; + + while (has) { + ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; + status = meta_ptr_->UpdateTableFilesToIndex(table_id); + /* StartBuildIndexTask(true); */ + std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10*1000, times*100))); + status = meta_ptr_->HasNonIndexFiles(table_id, has); + times++; + } + return Status::OK(); +} + +Status DBImpl::DescribeIndex(const std::string& table_id, TableIndex& index) { + return meta_ptr_->DescribeTableIndex(table_id, index); +} + +Status DBImpl::DropIndex(const std::string& table_id) { + return meta_ptr_->DropTableIndex(table_id); } Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { @@ -650,26 +703,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { //step 6: update meta table_file.file_type_ = meta::TableFileSchema::INDEX; - table_file.size_ = index->Size(); + table_file.file_size_ = index->PhysicalSize(); + table_file.row_count_ = index->Count(); - auto to_remove = file; - to_remove.file_type_ = meta::TableFileSchema::TO_DELETE; + auto origin_file = file; + origin_file.file_type_ = meta::TableFileSchema::BACKUP; - meta::TableFilesSchema update_files = {table_file, to_remove}; + meta::TableFilesSchema update_files = {table_file, origin_file}; status = meta_ptr_->UpdateTableFiles(update_files); if(status.ok()) { ENGINE_LOG_DEBUG << "New index file " << table_file.file_id_ << " of size " << index->PhysicalSize() << " bytes" - << " from file " << to_remove.file_id_; + << " from file " << origin_file.file_id_; if(options_.insert_cache_immediately_) { index->Cache(); } } else { //failed to update meta, mark the new file as to_delete, don't delete old file - to_remove.file_type_ = meta::TableFileSchema::TO_INDEX; - status = meta_ptr_->UpdateTableFile(to_remove); - ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << to_remove.file_id_ << " to to_index"; + origin_file.file_type_ = meta::TableFileSchema::TO_INDEX; + status = meta_ptr_->UpdateTableFile(origin_file); + ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << origin_file.file_id_ << " to to_index"; table_file.file_type_ = meta::TableFileSchema::TO_DELETE; status = meta_ptr_->UpdateTableFile(table_file); @@ -685,30 +739,6 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { return Status::OK(); } -Status DBImpl::BuildIndexByTable(const std::string& table_id) { - std::unique_lock lock(build_index_mutex_); - meta::TableFilesSchema to_index_files; - meta_ptr_->FilesToIndex(to_index_files); - - Status status; - - for (auto& file : to_index_files) { - status = BuildIndex(file); - if (!status.ok()) { - ENGINE_LOG_ERROR << "Building index for " << file.id_ << " failed: " << status.ToString(); - return status; - } - ENGINE_LOG_DEBUG << "Sync building index for " << file.id_ << " passed"; - - if (shutting_down_.load(std::memory_order_acquire)){ - ENGINE_LOG_DEBUG << "Server will shutdown, skip build index action for table " << table_id; - break; - } - } - - return status; -} - void DBImpl::BackgroundBuildIndex() { ENGINE_LOG_TRACE << " Background build index thread start"; diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 4aa7ac07bf..97c36fadbc 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -93,6 +93,12 @@ class DBImpl : public DB { Status BuildIndex(const std::string& table_id) override; + Status CreateIndex(const std::string& table_id, const TableIndex& index) override; + + Status DescribeIndex(const std::string& table_id, TableIndex& index) override; + + Status DropIndex(const std::string& table_id) override; + ~DBImpl() override; private: @@ -122,8 +128,6 @@ class DBImpl : public DB { void StartBuildIndexTask(bool force=false); void BackgroundBuildIndex(); - Status - BuildIndexByTable(const std::string& table_id); Status BuildIndex(const meta::TableFileSchema &); diff --git a/cpp/src/db/Types.h b/cpp/src/db/Types.h index acf7feea40..a2ffa606e5 100644 --- a/cpp/src/db/Types.h +++ b/cpp/src/db/Types.h @@ -5,7 +5,10 @@ ******************************************************************************/ #pragma once +#include "db/engine/ExecutionEngine.h" + #include +#include namespace zilliz { namespace milvus { @@ -18,6 +21,12 @@ typedef std::vector IDNumbers; typedef std::vector> QueryResult; typedef std::vector QueryResults; +struct TableIndex { + int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; + int32_t nlist = 16384; + int32_t index_file_size = 1024; //MB + int32_t metric_type = (int)MetricType::L2; +}; } // namespace engine } // namespace milvus diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp index 0fe1f76d29..5a0d3cafa2 100644 --- a/cpp/src/db/Utils.cpp +++ b/cpp/src/db/Utils.cpp @@ -142,6 +142,13 @@ Status DeleteTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& return Status::OK(); } +bool IsSameIndex(const TableIndex& index1, const TableIndex& index2) { + return index1.engine_type_ == index2.engine_type_ + && index1.nlist == index2.nlist + && index1.index_file_size == index2.index_file_size + && index1.metric_type == index2.metric_type; +} + } // namespace utils } // namespace engine } // namespace milvus diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h index 8329f5a1fc..47a8fca9b7 100644 --- a/cpp/src/db/Utils.h +++ b/cpp/src/db/Utils.h @@ -7,6 +7,7 @@ #include "Options.h" #include "db/meta/MetaTypes.h" +#include "db/Types.h" #include @@ -24,6 +25,8 @@ Status CreateTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& Status GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); Status DeleteTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); +bool IsSameIndex(const TableIndex& index1, const TableIndex& index2); + } // namespace utils } // namespace engine } // namespace milvus diff --git a/cpp/src/db/engine/ExecutionEngine.h b/cpp/src/db/engine/ExecutionEngine.h index 88be75aeb9..0f2cf42b22 100644 --- a/cpp/src/db/engine/ExecutionEngine.h +++ b/cpp/src/db/engine/ExecutionEngine.h @@ -23,6 +23,11 @@ enum class EngineType { MAX_VALUE = NSG_MIX, }; +enum class MetricType { + L2 = 1, + IP = 2, +}; + class ExecutionEngine { public: diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index 326658df5f..3cbb862389 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -86,7 +86,7 @@ Status MemTableFile::Serialize() { execution_engine_->Serialize(); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - table_file_schema_.size_ = size; + table_file_schema_.row_count_ = execution_engine_->Count(); server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time); diff --git a/cpp/src/db/meta/Meta.h b/cpp/src/db/meta/Meta.h index e88761b446..80ae0fb22e 100644 --- a/cpp/src/db/meta/Meta.h +++ b/cpp/src/db/meta/Meta.h @@ -8,6 +8,7 @@ #include "MetaTypes.h" #include "db/Options.h" #include "db/Status.h" +#include "db/Types.h" #include #include @@ -38,6 +39,9 @@ class Meta { virtual Status AllTables(std::vector &table_schema_array) = 0; + virtual Status + UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) = 0; + virtual Status DeleteTable(const std::string &table_id) = 0; @@ -83,6 +87,12 @@ class Meta { virtual Status HasNonIndexFiles(const std::string &table_id, bool &has) = 0; + virtual Status + DescribeTableIndex(const std::string &table_id, TableIndex& index) = 0; + + virtual Status + DropTableIndex(const std::string &table_id) = 0; + virtual Status CleanUp() = 0; diff --git a/cpp/src/db/meta/MetaTypes.h b/cpp/src/db/meta/MetaTypes.h index 9f58734f39..0e554b2330 100644 --- a/cpp/src/db/meta/MetaTypes.h +++ b/cpp/src/db/meta/MetaTypes.h @@ -28,12 +28,13 @@ struct TableSchema { size_t id_ = 0; std::string table_id_; - int state_ = (int)NORMAL; - size_t files_cnt_ = 0; + int32_t state_ = (int)NORMAL; uint16_t dimension_ = 0; - long created_on_ = 0; - int engine_type_ = (int)EngineType::FAISS_IDMAP; - bool store_raw_data_ = false; + int64_t created_on_ = 0; + int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; + int32_t nlist_ = 16384; + int32_t index_file_size_ = 1024; //MB + int32_t metric_type_ = (int)MetricType::L2; }; // TableSchema struct TableFileSchema { @@ -45,19 +46,21 @@ struct TableFileSchema { TO_DELETE, NEW_MERGE, NEW_INDEX, + BACKUP, } FILE_TYPE; size_t id_ = 0; std::string table_id_; - int engine_type_ = (int)EngineType::FAISS_IDMAP; + int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; std::string file_id_; - int file_type_ = NEW; - size_t size_ = 0; + int32_t file_type_ = NEW; + size_t file_size_ = 0; + size_t row_count_ = 0; DateT date_ = EmptyDate; uint16_t dimension_ = 0; std::string location_; - long updated_time_ = 0; - long created_on_ = 0; + int64_t updated_time_ = 0; + int64_t created_on_ = 0; }; // TableFileSchema typedef std::vector TableFilesSchema; diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index bf4589351e..fa2697ec32 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -167,9 +167,10 @@ Status MySQLMetaImpl::Initialize() { "state INT NOT NULL, " << "dimension SMALLINT NOT NULL, " << "created_on BIGINT NOT NULL, " << - "files_cnt BIGINT DEFAULT 0 NOT NULL, " << "engine_type INT DEFAULT 1 NOT NULL, " << - "store_raw_data BOOL DEFAULT false NOT NULL);"; + "nlist INT DEFAULT 16384 NOT NULL, " << + "index_file_size INT DEFAULT 1024 NOT NULL, " << + "metric_type INT DEFAULT 1 NOT NULL);"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str(); @@ -183,7 +184,8 @@ Status MySQLMetaImpl::Initialize() { "engine_type INT DEFAULT 1 NOT NULL, " << "file_id VARCHAR(255) NOT NULL, " << "file_type INT DEFAULT 0 NOT NULL, " << - "size BIGINT DEFAULT 0 NOT NULL, " << + "file_size BIGINT DEFAULT 0 NOT NULL, " << + "row_count BIGINT DEFAULT 0 NOT NULL, " << "updated_time BIGINT NOT NULL, " << "created_on BIGINT NOT NULL, " << "date INT DEFAULT -1 NOT NULL);"; @@ -325,8 +327,6 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { } } - - table_schema.files_cnt_ = 0; table_schema.id_ = -1; table_schema.created_on_ = utils::GetMicroSecTimeStamp(); @@ -336,13 +336,11 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { std::string state = std::to_string(table_schema.state_); std::string dimension = std::to_string(table_schema.dimension_); std::string created_on = std::to_string(table_schema.created_on_); - std::string files_cnt = "0"; std::string engine_type = std::to_string(table_schema.engine_type_); - std::string store_raw_data = table_schema.store_raw_data_ ? "true" : "false"; createTableQuery << "INSERT INTO Tables VALUES" << "(" << id << ", " << quote << table_id << ", " << state << ", " << dimension << ", " << - created_on << ", " << files_cnt << ", " << engine_type << ", " << store_raw_data << ");"; + created_on << ", " << engine_type << ");"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTable: " << createTableQuery.str(); @@ -430,6 +428,18 @@ Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { return Status::OK(); } +Status MySQLMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { + return Status::OK(); +} + +Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { + return Status::OK(); +} + +Status MySQLMetaImpl::DropTableIndex(const std::string &table_id) { + return Status::OK(); +} + Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { @@ -561,12 +571,7 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { table_schema.dimension_ = resRow["dimension"]; - table_schema.files_cnt_ = resRow["files_cnt"]; - table_schema.engine_type_ = resRow["engine_type"]; - - int store_raw_data = resRow["store_raw_data"]; - table_schema.store_raw_data_ = (store_raw_data == 1); } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -668,13 +673,8 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { table_schema.dimension_ = resRow["dimension"]; - table_schema.files_cnt_ = resRow["files_cnt"]; - table_schema.engine_type_ = resRow["engine_type"]; - int store_raw_data = resRow["store_raw_data"]; - table_schema.store_raw_data_ = (store_raw_data == 1); - table_schema_array.emplace_back(table_schema); } } catch (const BadQuery &er) { @@ -709,7 +709,8 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; - file_schema.size_ = 0; + file_schema.file_size_ = 0; + file_schema.row_count_ = 0; file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; @@ -720,7 +721,7 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -737,7 +738,7 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { createTableFileQuery << "INSERT INTO TableFiles VALUES" << "(" << id << ", " << quote << table_id << ", " << engine_type << ", " << - quote << file_id << ", " << file_type << ", " << size << ", " << + quote << file_id << ", " << file_type << ", " << row_count << ", " << updated_time << ", " << created_on << ", " << date << ");"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTableFile: " << createTableFileQuery.str(); @@ -792,7 +793,7 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { Query filesToIndexQuery = connectionPtr->query(); - filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE file_type = " << std::to_string(TableFileSchema::TO_INDEX) << ";"; @@ -819,7 +820,7 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -877,7 +878,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, if (partition.empty()) { Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(file_type = " << std::to_string(TableFileSchema::RAW) << " OR " << @@ -899,7 +900,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, std::string partitionListStr = partitionListSS.str(); partitionListStr = partitionListStr.substr(0, partitionListStr.size() - 2); //remove the last ", " - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "date IN (" << partitionListStr << ") AND " << @@ -938,7 +939,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -988,7 +989,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, } Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, size, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id; @@ -1049,7 +1050,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -1097,11 +1098,11 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, Query filesToMergeQuery = connectionPtr->query(); - filesToMergeQuery << "SELECT id, table_id, file_id, file_type, size, date " << + filesToMergeQuery << "SELECT id, table_id, file_id, file_type, file_size, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "file_type = " << std::to_string(TableFileSchema::RAW) << " " << - "ORDER BY size DESC" << ";"; + "ORDER BY row_count DESC" << ";"; ENGINE_LOG_DEBUG << "MySQLMetaImpl::FilesToMerge: " << filesToMergeQuery.str(); @@ -1131,7 +1132,7 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; table_file.date_ = resRow["date"]; @@ -1189,7 +1190,7 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, Query getTableFileQuery = connectionPtr->query(); - getTableFileQuery << "SELECT id, engine_type, file_id, file_type, size, date " << + getTableFileQuery << "SELECT id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(" << idStr << ");"; @@ -1222,7 +1223,9 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, file_schema.file_type_ = resRow["file_type"]; - file_schema.size_ = resRow["size"]; + file_schema.file_size_ = resRow["file_size"]; + + file_schema.row_count_ = resRow["row_count"]; file_schema.date_ = resRow["date"]; @@ -1321,7 +1324,7 @@ Status MySQLMetaImpl::Size(uint64_t &result) { Query getSizeQuery = connectionPtr->query(); - getSizeQuery << "SELECT IFNULL(SUM(size),0) AS sum " << + getSizeQuery << "SELECT IFNULL(SUM(file_size),0) AS sum " << "FROM TableFiles " << "WHERE file_type <> " << std::to_string(TableFileSchema::TO_DELETE) << ";"; @@ -1379,7 +1382,7 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { Query discardFilesQuery = connectionPtr->query(); - discardFilesQuery << "SELECT id, size " << + discardFilesQuery << "SELECT id, file_size " << "FROM TableFiles " << "WHERE file_type <> " << std::to_string(TableFileSchema::TO_DELETE) << " " << "ORDER BY id ASC " << @@ -1401,11 +1404,11 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { break; } table_file.id_ = resRow["id"]; - table_file.size_ = resRow["size"]; + table_file.file_size_ = resRow["file_size"]; idsToDiscardSS << "id = " << std::to_string(table_file.id_) << " OR "; ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_ - << " table_file.size=" << table_file.size_; - to_discard_size -= table_file.size_; + << " table_file.size=" << table_file.file_size_; + to_discard_size -= table_file.file_size_; } std::string idsToDiscardStr = idsToDiscardSS.str(); @@ -1480,7 +1483,8 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string file_size = std::to_string(file_schema.file_size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -1490,7 +1494,8 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { "engine_type = " << engine_type << ", " << "file_id = " << quote << file_id << ", " << "file_type = " << file_type << ", " << - "size = " << size << ", " << + "file_size = " << file_size << ", " << + "row_count = " << row_count << ", " << "updated_time = " << updated_time << ", " << "created_on = " << created_on << ", " << "date = " << date << " " << @@ -1606,7 +1611,8 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { std::string engine_type = std::to_string(file_schema.engine_type_); std::string file_id = file_schema.file_id_; std::string file_type = std::to_string(file_schema.file_type_); - std::string size = std::to_string(file_schema.size_); + std::string file_size = std::to_string(file_schema.file_size_); + std::string row_count = std::to_string(file_schema.row_count_); std::string updated_time = std::to_string(file_schema.updated_time_); std::string created_on = std::to_string(file_schema.created_on_); std::string date = std::to_string(file_schema.date_); @@ -1616,7 +1622,8 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { "engine_type = " << engine_type << ", " << "file_id = " << quote << file_id << ", " << "file_type = " << file_type << ", " << - "size = " << size << ", " << + "file_size = " << file_size << ", " << + "row_count = " << row_count << ", " << "updated_time = " << updated_time << ", " << "created_on = " << created_on << ", " << "date = " << date << " " << diff --git a/cpp/src/db/meta/MySQLMetaImpl.h b/cpp/src/db/meta/MySQLMetaImpl.h index 30695423dd..3fdd80beed 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.h +++ b/cpp/src/db/meta/MySQLMetaImpl.h @@ -43,6 +43,12 @@ class MySQLMetaImpl : public Meta { Status HasNonIndexFiles(const std::string &table_id, bool &has) override; + Status UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) override; + + Status DescribeTableIndex(const std::string &table_id, TableIndex& index) override; + + Status DropTableIndex(const std::string &table_id) override; + Status UpdateTableFile(TableFileSchema &file_schema) override; Status UpdateTableFilesToIndex(const std::string &table_id) override; diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 9053139e0b..9118eadd17 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -62,16 +62,18 @@ inline auto StoragePrototype(const std::string &path) { make_column("state", &TableSchema::state_), make_column("dimension", &TableSchema::dimension_), make_column("created_on", &TableSchema::created_on_), - make_column("files_cnt", &TableSchema::files_cnt_, default_value(0)), make_column("engine_type", &TableSchema::engine_type_), - make_column("store_raw_data", &TableSchema::store_raw_data_)), + make_column("nlist", &TableSchema::nlist_), + make_column("index_file_size", &TableSchema::index_file_size_), + make_column("metric_type", &TableSchema::metric_type_)), make_table("TableFiles", make_column("id", &TableFileSchema::id_, primary_key()), make_column("table_id", &TableFileSchema::table_id_), make_column("engine_type", &TableFileSchema::engine_type_), make_column("file_id", &TableFileSchema::file_id_), make_column("file_type", &TableFileSchema::file_type_), - make_column("size", &TableFileSchema::size_, default_value(0)), + make_column("file_size", &TableFileSchema::file_size_, default_value(0)), + make_column("row_count", &TableFileSchema::row_count_, default_value(0)), make_column("updated_time", &TableFileSchema::updated_time_), make_column("created_on", &TableFileSchema::created_on_), make_column("date", &TableFileSchema::date_)) @@ -188,7 +190,6 @@ Status SqliteMetaImpl::CreateTable(TableSchema &table_schema) { } } - table_schema.files_cnt_ = 0; table_schema.id_ = -1; table_schema.created_on_ = utils::GetMicroSecTimeStamp(); @@ -218,10 +219,8 @@ Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { //soft delete table auto tables = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::files_cnt_, &TableSchema::dimension_, &TableSchema::engine_type_, - &TableSchema::store_raw_data_, &TableSchema::created_on_), where(c(&TableSchema::table_id_) == table_id)); for (auto &table : tables) { @@ -229,11 +228,9 @@ Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { table_schema.table_id_ = table_id; table_schema.state_ = (int)TableSchema::TO_DELETE; table_schema.id_ = std::get<0>(table); - table_schema.files_cnt_ = std::get<1>(table); - table_schema.dimension_ = std::get<2>(table); - table_schema.engine_type_ = std::get<3>(table); - table_schema.store_raw_data_ = std::get<4>(table); - table_schema.created_on_ = std::get<5>(table); + table_schema.dimension_ = std::get<1>(table); + table_schema.engine_type_ = std::get<2>(table); + table_schema.created_on_ = std::get<3>(table); ConnectorPtr->update(table_schema); } @@ -274,20 +271,15 @@ Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { MetricCollector metric; auto groups = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::table_id_, - &TableSchema::files_cnt_, &TableSchema::dimension_, - &TableSchema::engine_type_, - &TableSchema::store_raw_data_), + &TableSchema::engine_type_), where(c(&TableSchema::table_id_) == table_schema.table_id_ and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); if (groups.size() == 1) { table_schema.id_ = std::get<0>(groups[0]); - table_schema.files_cnt_ = std::get<2>(groups[0]); - table_schema.dimension_ = std::get<3>(groups[0]); - table_schema.engine_type_ = std::get<4>(groups[0]); - table_schema.store_raw_data_ = std::get<5>(groups[0]); + table_schema.dimension_ = std::get<1>(groups[0]); + table_schema.engine_type_ = std::get<2>(groups[0]); } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -302,17 +294,16 @@ Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) { has = false; try { + std::vector file_types = { + (int) TableFileSchema::RAW, + (int) TableFileSchema::NEW, + (int) TableFileSchema::NEW_MERGE, + (int) TableFileSchema::NEW_INDEX, + (int) TableFileSchema::TO_INDEX, + }; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::file_type_), - where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_MERGE - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_INDEX - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX) + where(in(&TableFileSchema::file_type_, file_types) and c(&TableFileSchema::table_id_) == table_id )); @@ -353,6 +344,118 @@ Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) return Status::OK(); } +Status SqliteMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { + try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + + auto tables = ConnectorPtr->select(columns(&TableSchema::id_, + &TableSchema::state_, + &TableSchema::dimension_, + &TableSchema::created_on_), + where(c(&TableSchema::table_id_) == table_id + and c(&TableSchema::state_) != (int) TableSchema::TO_DELETE)); + + if(tables.size() > 0) { + meta::TableSchema table_schema; + table_schema.id_ = std::get<0>(tables[0]); + table_schema.table_id_ = table_id; + table_schema.state_ = std::get<1>(tables[0]); + table_schema.dimension_ = std::get<2>(tables[0]); + table_schema.created_on_ = std::get<3>(tables[0]); + table_schema.engine_type_ = index.engine_type_; + table_schema.nlist_ = index.nlist; + table_schema.index_file_size_ = index.index_file_size; + table_schema.metric_type_ = index.metric_type; + + ConnectorPtr->update(table_schema); + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + //set all backup file to raw + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::RAW, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::BACKUP + )); + + } catch (std::exception &e) { + std::string msg = "Encounter exception when update table index: table_id = " + table_id; + return HandleException(msg, e); + } + return Status::OK(); +} + +Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { + try { + MetricCollector metric; + + auto groups = ConnectorPtr->select(columns(&TableSchema::engine_type_, + &TableSchema::nlist_, + &TableSchema::index_file_size_, + &TableSchema::metric_type_), + where(c(&TableSchema::table_id_) == table_id + and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); + + if (groups.size() == 1) { + index.engine_type_ = std::get<0>(groups[0]); + index.nlist = std::get<1>(groups[0]); + index.index_file_size = std::get<2>(groups[0]); + index.metric_type = std::get<3>(groups[0]); + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when describe index", e); + } + + return Status::OK(); +} + +Status SqliteMetaImpl::DropTableIndex(const std::string &table_id) { + try { + MetricCollector metric; + + //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here + std::lock_guard meta_lock(meta_mutex_); + + //soft delete index files + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::TO_DELETE, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX + )); + + //set all backup file to raw + ConnectorPtr->update_all( + set( + c(&TableFileSchema::file_type_) = (int) TableFileSchema::RAW, + c(&TableFileSchema::updated_time_) = utils::GetMicroSecTimeStamp() + ), + where( + c(&TableFileSchema::table_id_) == table_id and + c(&TableFileSchema::file_type_) == (int) TableFileSchema::BACKUP + )); + + } catch (std::exception &e) { + return HandleException("Encounter exception when delete table index files", e); + } + + return Status::OK(); +} + Status SqliteMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { has_or_not = false; @@ -380,19 +483,15 @@ Status SqliteMetaImpl::AllTables(std::vector& table_schema_array) { auto selected = ConnectorPtr->select(columns(&TableSchema::id_, &TableSchema::table_id_, - &TableSchema::files_cnt_, &TableSchema::dimension_, - &TableSchema::engine_type_, - &TableSchema::store_raw_data_), + &TableSchema::engine_type_), where(c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); for (auto &table : selected) { TableSchema schema; schema.id_ = std::get<0>(table); schema.table_id_ = std::get<1>(table); - schema.files_cnt_ = std::get<2>(table); - schema.dimension_ = std::get<3>(table); - schema.engine_type_ = std::get<4>(table); - schema.store_raw_data_ = std::get<5>(table); + schema.dimension_ = std::get<2>(table); + schema.engine_type_ = std::get<3>(table); table_schema_array.emplace_back(schema); } @@ -420,7 +519,8 @@ Status SqliteMetaImpl::CreateTableFile(TableFileSchema &file_schema) { NextFileId(file_schema.file_id_); file_schema.dimension_ = table_schema.dimension_; - file_schema.size_ = 0; + file_schema.file_size_ = 0; + file_schema.row_count_ = 0; file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; @@ -450,7 +550,7 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::file_type_) @@ -464,7 +564,7 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); @@ -499,19 +599,16 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, MetricCollector metric; if (partition.empty()) { + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and - (c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::TO_INDEX or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::INDEX))); + in(&TableFileSchema::file_type_, file_type))); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -527,7 +624,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); table_file.dimension_ = table_schema.dimension_; @@ -540,20 +637,17 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, } } else { + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and - in(&TableFileSchema::date_, partition) and - (c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::TO_INDEX or - c(&TableFileSchema::file_type_) - == (int) TableFileSchema::INDEX))); + in(&TableFileSchema::date_, partition) and + in(&TableFileSchema::file_type_, file_type))); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -569,7 +663,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); table_file.dimension_ = table_schema.dimension_; @@ -601,7 +695,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_); @@ -643,7 +737,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.row_count_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.engine_type_ = std::get<6>(file); table_file.dimension_ = table_schema.dimension_; @@ -673,11 +767,11 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, &TableFileSchema::date_), where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW and c(&TableFileSchema::table_id_) == table_id), - order_by(&TableFileSchema::size_).desc()); + order_by(&TableFileSchema::file_size_).desc()); TableSchema table_schema; table_schema.table_id_ = table_id; @@ -693,7 +787,7 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.size_ = std::get<4>(file); + table_file.file_size_ = std::get<4>(file); table_file.date_ = std::get<5>(file); table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); @@ -718,7 +812,8 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, auto files = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, - &TableFileSchema::size_, + &TableFileSchema::file_size_, + &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), where(c(&TableFileSchema::table_id_) == table_id and @@ -738,9 +833,10 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, file_schema.id_ = std::get<0>(file); file_schema.file_id_ = std::get<1>(file); file_schema.file_type_ = std::get<2>(file); - file_schema.size_ = std::get<3>(file); - file_schema.date_ = std::get<4>(file); - file_schema.engine_type_ = std::get<5>(file); + file_schema.file_size_ = std::get<3>(file); + file_schema.row_count_ = std::get<4>(file); + file_schema.date_ = std::get<5>(file); + file_schema.engine_type_ = std::get<6>(file); file_schema.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, file_schema); @@ -797,23 +893,17 @@ Status SqliteMetaImpl::Archive() { Status SqliteMetaImpl::Size(uint64_t &result) { result = 0; try { - auto files = ConnectorPtr->select(columns(&TableFileSchema::size_, - &TableFileSchema::file_type_, - &TableFileSchema::engine_type_), + auto selected = ConnectorPtr->select(columns(sum(&TableFileSchema::file_size_)), where( c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE )); - - for (auto &file : files) { - auto file_size = std::get<0>(file); - auto file_type = std::get<1>(file); - auto engine_type = std::get<2>(file); - if(file_type == (int)TableFileSchema::INDEX && engine_type == (int)EngineType::FAISS_IVFSQ8) { - result += (uint64_t)file_size/4;//hardcode for sq8 - } else { - result += (uint64_t)file_size; + for (auto &total_size : selected) { + if (!std::get<0>(total_size)) { + continue; } + result += (uint64_t) (*std::get<0>(total_size)); } + } catch (std::exception &e) { return HandleException("Encounter exception when calculte db size", e); } @@ -836,7 +926,7 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { auto commited = ConnectorPtr->transaction([&]() mutable { auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, - &TableFileSchema::size_), + &TableFileSchema::file_size_), where(c(&TableFileSchema::file_type_) != (int) TableFileSchema::TO_DELETE), order_by(&TableFileSchema::id_), @@ -848,11 +938,11 @@ Status SqliteMetaImpl::DiscardFiles(long to_discard_size) { for (auto &file : selected) { if (to_discard_size <= 0) break; table_file.id_ = std::get<0>(file); - table_file.size_ = std::get<1>(file); + table_file.file_size_ = std::get<1>(file); ids.push_back(table_file.id_); ENGINE_LOG_DEBUG << "Discard table_file.id=" << table_file.file_id_ - << " table_file.size=" << table_file.size_; - to_discard_size -= table_file.size_; + << " table_file.size=" << table_file.file_size_; + to_discard_size -= table_file.file_size_; } if (ids.size() == 0) { @@ -1059,12 +1149,8 @@ Status SqliteMetaImpl::CleanUp() { //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); - auto files = ConnectorPtr->select(columns(&TableFileSchema::id_), - where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_INDEX - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::NEW_MERGE)); + std::vector file_type = {(int) TableFileSchema::NEW, (int) TableFileSchema::NEW_INDEX, (int) TableFileSchema::NEW_MERGE}; + auto files = ConnectorPtr->select(columns(&TableFileSchema::id_), where(in(&TableFileSchema::file_type_, file_type))); auto commited = ConnectorPtr->transaction([&]() mutable { for (auto &file : files) { @@ -1091,11 +1177,9 @@ Status SqliteMetaImpl::Count(const std::string &table_id, uint64_t &result) { try { MetricCollector metric; - auto selected = ConnectorPtr->select(columns(&TableFileSchema::size_), - where((c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW - or - c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX - or c(&TableFileSchema::file_type_) == (int) TableFileSchema::INDEX) + std::vector file_type = {(int) TableFileSchema::RAW, (int) TableFileSchema::TO_INDEX, (int) TableFileSchema::INDEX}; + auto selected = ConnectorPtr->select(columns(&TableFileSchema::row_count_), + where(in(&TableFileSchema::file_type_, file_type) and c(&TableFileSchema::table_id_) == table_id)); TableSchema table_schema; diff --git a/cpp/src/db/meta/SqliteMetaImpl.h b/cpp/src/db/meta/SqliteMetaImpl.h index 1525f27e4b..34808f202f 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.h +++ b/cpp/src/db/meta/SqliteMetaImpl.h @@ -51,6 +51,15 @@ class SqliteMetaImpl : public Meta { Status HasNonIndexFiles(const std::string &table_id, bool &has) override; + Status + UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) override; + + Status + DescribeTableIndex(const std::string &table_id, TableIndex& index) override; + + Status + DropTableIndex(const std::string &table_id) override; + Status UpdateTableFilesToIndex(const std::string &table_id) override; diff --git a/cpp/src/grpc/cpp_gen.sh b/cpp/src/grpc/cpp_gen.sh index c441783e7b..62b9d95728 100755 --- a/cpp/src/grpc/cpp_gen.sh +++ b/cpp/src/grpc/cpp_gen.sh @@ -1,9 +1,9 @@ #!/bin/bash -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-status --plugin=protoc-gen-grpc="/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" status.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-status --plugin=protoc-gen-grpc="../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" status.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-status status.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-status status.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-milvus --plugin=protoc-gen-grpc="/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" milvus.proto +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --grpc_out=./gen-milvus --plugin=protoc-gen-grpc="../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/grpc_cpp_plugin" milvus.proto -/home/yukun/test/milvus/cpp/cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-milvus milvus.proto \ No newline at end of file +../../cmake-build-debug/grpc_ep-prefix/src/grpc_ep/bins/opt/protobuf/protoc -I . --cpp_out=./gen-milvus milvus.proto \ No newline at end of file diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.cc b/cpp/src/grpc/gen-milvus/milvus.pb.cc index 5ec8fddba0..c8ef38d07b 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.cc +++ b/cpp/src/grpc/gen-milvus/milvus.pb.cc @@ -365,9 +365,7 @@ const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_milvus_2eproto::offsets[] PROT ~0u, // no _oneof_case_ ~0u, // no _weak_field_map_ PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, table_name_), - PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, index_type_), PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, dimension_), - PROTOBUF_FIELD_OFFSET(::milvus::grpc::TableSchema, store_raw_vector_), ~0u, // no _has_bits_ PROTOBUF_FIELD_OFFSET(::milvus::grpc::Range, _internal_metadata_), ~0u, // no _extensions_ @@ -481,21 +479,21 @@ const ::PROTOBUF_NAMESPACE_ID::uint32 TableStruct_milvus_2eproto::offsets[] PROT static const ::PROTOBUF_NAMESPACE_ID::internal::MigrationSchema schemas[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = { { 0, -1, sizeof(::milvus::grpc::TableName)}, { 7, -1, sizeof(::milvus::grpc::TableSchema)}, - { 16, -1, sizeof(::milvus::grpc::Range)}, - { 23, -1, sizeof(::milvus::grpc::RowRecord)}, - { 29, -1, sizeof(::milvus::grpc::InsertParam)}, - { 37, -1, sizeof(::milvus::grpc::VectorIds)}, - { 44, -1, sizeof(::milvus::grpc::SearchParam)}, - { 54, -1, sizeof(::milvus::grpc::SearchInFilesParam)}, - { 61, -1, sizeof(::milvus::grpc::QueryResult)}, - { 68, -1, sizeof(::milvus::grpc::TopKQueryResult)}, - { 75, -1, sizeof(::milvus::grpc::StringReply)}, - { 82, -1, sizeof(::milvus::grpc::BoolReply)}, - { 89, -1, sizeof(::milvus::grpc::TableRowCount)}, - { 96, -1, sizeof(::milvus::grpc::Command)}, - { 102, -1, sizeof(::milvus::grpc::Index)}, - { 111, -1, sizeof(::milvus::grpc::IndexParam)}, - { 118, -1, sizeof(::milvus::grpc::DeleteByRangeParam)}, + { 14, -1, sizeof(::milvus::grpc::Range)}, + { 21, -1, sizeof(::milvus::grpc::RowRecord)}, + { 27, -1, sizeof(::milvus::grpc::InsertParam)}, + { 35, -1, sizeof(::milvus::grpc::VectorIds)}, + { 42, -1, sizeof(::milvus::grpc::SearchParam)}, + { 52, -1, sizeof(::milvus::grpc::SearchInFilesParam)}, + { 59, -1, sizeof(::milvus::grpc::QueryResult)}, + { 66, -1, sizeof(::milvus::grpc::TopKQueryResult)}, + { 73, -1, sizeof(::milvus::grpc::StringReply)}, + { 80, -1, sizeof(::milvus::grpc::BoolReply)}, + { 87, -1, sizeof(::milvus::grpc::TableRowCount)}, + { 94, -1, sizeof(::milvus::grpc::Command)}, + { 100, -1, sizeof(::milvus::grpc::Index)}, + { 109, -1, sizeof(::milvus::grpc::IndexParam)}, + { 116, -1, sizeof(::milvus::grpc::DeleteByRangeParam)}, }; static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = { @@ -521,65 +519,64 @@ static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = const char descriptor_table_protodef_milvus_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = "\n\014milvus.proto\022\013milvus.grpc\032\014status.prot" "o\"D\n\tTableName\022#\n\006status\030\001 \001(\0132\023.milvus." - "grpc.Status\022\022\n\ntable_name\030\002 \001(\t\"z\n\013Table" + "grpc.Status\022\022\n\ntable_name\030\002 \001(\t\"L\n\013Table" "Schema\022*\n\ntable_name\030\001 \001(\0132\026.milvus.grpc" - ".TableName\022\022\n\nindex_type\030\002 \001(\005\022\021\n\tdimens" - "ion\030\003 \001(\003\022\030\n\020store_raw_vector\030\004 \001(\010\"/\n\005R" - "ange\022\023\n\013start_value\030\001 \001(\t\022\021\n\tend_value\030\002" - " \001(\t\" \n\tRowRecord\022\023\n\013vector_data\030\001 \003(\002\"i" - "\n\013InsertParam\022\022\n\ntable_name\030\001 \001(\t\0220\n\020row" - "_record_array\030\002 \003(\0132\026.milvus.grpc.RowRec" - "ord\022\024\n\014row_id_array\030\003 \003(\003\"I\n\tVectorIds\022#" - "\n\006status\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017v" - "ector_id_array\030\002 \003(\003\"\242\001\n\013SearchParam\022\022\n\n" - "table_name\030\001 \001(\t\0222\n\022query_record_array\030\002" - " \003(\0132\026.milvus.grpc.RowRecord\022-\n\021query_ra" - "nge_array\030\003 \003(\0132\022.milvus.grpc.Range\022\014\n\004t" - "opk\030\004 \001(\003\022\016\n\006nprobe\030\005 \001(\003\"[\n\022SearchInFil" - "esParam\022\025\n\rfile_id_array\030\001 \003(\t\022.\n\014search" - "_param\030\002 \001(\0132\030.milvus.grpc.SearchParam\"+" - "\n\013QueryResult\022\n\n\002id\030\001 \001(\003\022\020\n\010distance\030\002 " - "\001(\001\"m\n\017TopKQueryResult\022#\n\006status\030\001 \001(\0132\023" - ".milvus.grpc.Status\0225\n\023query_result_arra" - "ys\030\002 \003(\0132\030.milvus.grpc.QueryResult\"H\n\013St" - "ringReply\022#\n\006status\030\001 \001(\0132\023.milvus.grpc." - "Status\022\024\n\014string_reply\030\002 \001(\t\"D\n\tBoolRepl" - "y\022#\n\006status\030\001 \001(\0132\023.milvus.grpc.Status\022\022" - "\n\nbool_reply\030\002 \001(\010\"M\n\rTableRowCount\022#\n\006s" - "tatus\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017tabl" - "e_row_count\030\002 \001(\003\"\026\n\007Command\022\013\n\003cmd\030\001 \001(" - "\t\"X\n\005Index\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030" - "\002 \001(\003\022\027\n\017index_file_size\030\003 \001(\005\022\023\n\013metric" - "_type\030\004 \001(\005\"[\n\nIndexParam\022*\n\ntable_name\030" - "\001 \001(\0132\026.milvus.grpc.TableName\022!\n\005index\030\002" - " \001(\0132\022.milvus.grpc.Index\"K\n\022DeleteByRang" - "eParam\022!\n\005range\030\001 \001(\0132\022.milvus.grpc.Rang" - "e\022\022\n\ntable_name\030\002 \001(\t2\352\007\n\rMilvusService\022" - ">\n\013CreateTable\022\030.milvus.grpc.TableSchema" - "\032\023.milvus.grpc.Status\"\000\022<\n\010HasTable\022\026.mi" - "lvus.grpc.TableName\032\026.milvus.grpc.BoolRe" - "ply\"\000\022:\n\tDropTable\022\026.milvus.grpc.TableNa" - "me\032\023.milvus.grpc.Status\"\000\022=\n\013CreateIndex" - "\022\027.milvus.grpc.IndexParam\032\023.milvus.grpc." - "Status\"\000\022<\n\006Insert\022\030.milvus.grpc.InsertP" - "aram\032\026.milvus.grpc.VectorIds\"\000\022D\n\006Search" - "\022\030.milvus.grpc.SearchParam\032\034.milvus.grpc" - ".TopKQueryResult\"\0000\001\022R\n\rSearchInFiles\022\037." - "milvus.grpc.SearchInFilesParam\032\034.milvus." - "grpc.TopKQueryResult\"\0000\001\022C\n\rDescribeTabl" - "e\022\026.milvus.grpc.TableName\032\030.milvus.grpc." - "TableSchema\"\000\022B\n\nCountTable\022\026.milvus.grp" - "c.TableName\032\032.milvus.grpc.TableRowCount\"" - "\000\022>\n\nShowTables\022\024.milvus.grpc.Command\032\026." - "milvus.grpc.TableName\"\0000\001\0227\n\003Cmd\022\024.milvu" - "s.grpc.Command\032\030.milvus.grpc.StringReply" - "\"\000\022G\n\rDeleteByRange\022\037.milvus.grpc.Delete" - "ByRangeParam\032\023.milvus.grpc.Status\"\000\022=\n\014P" - "reloadTable\022\026.milvus.grpc.TableName\032\023.mi" - "lvus.grpc.Status\"\000\022B\n\rDescribeIndex\022\026.mi" - "lvus.grpc.TableName\032\027.milvus.grpc.IndexP" - "aram\"\000\022:\n\tDropIndex\022\026.milvus.grpc.TableN" - "ame\032\023.milvus.grpc.Status\"\000b\006proto3" + ".TableName\022\021\n\tdimension\030\002 \001(\003\"/\n\005Range\022\023" + "\n\013start_value\030\001 \001(\t\022\021\n\tend_value\030\002 \001(\t\" " + "\n\tRowRecord\022\023\n\013vector_data\030\001 \003(\002\"i\n\013Inse" + "rtParam\022\022\n\ntable_name\030\001 \001(\t\0220\n\020row_recor" + "d_array\030\002 \003(\0132\026.milvus.grpc.RowRecord\022\024\n" + "\014row_id_array\030\003 \003(\003\"I\n\tVectorIds\022#\n\006stat" + "us\030\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017vector_" + "id_array\030\002 \003(\003\"\242\001\n\013SearchParam\022\022\n\ntable_" + "name\030\001 \001(\t\0222\n\022query_record_array\030\002 \003(\0132\026" + ".milvus.grpc.RowRecord\022-\n\021query_range_ar" + "ray\030\003 \003(\0132\022.milvus.grpc.Range\022\014\n\004topk\030\004 " + "\001(\003\022\016\n\006nprobe\030\005 \001(\003\"[\n\022SearchInFilesPara" + "m\022\025\n\rfile_id_array\030\001 \003(\t\022.\n\014search_param" + "\030\002 \001(\0132\030.milvus.grpc.SearchParam\"+\n\013Quer" + "yResult\022\n\n\002id\030\001 \001(\003\022\020\n\010distance\030\002 \001(\001\"m\n" + "\017TopKQueryResult\022#\n\006status\030\001 \001(\0132\023.milvu" + "s.grpc.Status\0225\n\023query_result_arrays\030\002 \003" + "(\0132\030.milvus.grpc.QueryResult\"H\n\013StringRe" + "ply\022#\n\006status\030\001 \001(\0132\023.milvus.grpc.Status" + "\022\024\n\014string_reply\030\002 \001(\t\"D\n\tBoolReply\022#\n\006s" + "tatus\030\001 \001(\0132\023.milvus.grpc.Status\022\022\n\nbool" + "_reply\030\002 \001(\010\"M\n\rTableRowCount\022#\n\006status\030" + "\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017table_row_" + "count\030\002 \001(\003\"\026\n\007Command\022\013\n\003cmd\030\001 \001(\t\"X\n\005I" + "ndex\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030\002 \001(\003\022" + "\027\n\017index_file_size\030\003 \001(\005\022\023\n\013metric_type\030" + "\004 \001(\005\"[\n\nIndexParam\022*\n\ntable_name\030\001 \001(\0132" + "\026.milvus.grpc.TableName\022!\n\005index\030\002 \001(\0132\022" + ".milvus.grpc.Index\"K\n\022DeleteByRangeParam" + "\022!\n\005range\030\001 \001(\0132\022.milvus.grpc.Range\022\022\n\nt" + "able_name\030\002 \001(\t2\352\007\n\rMilvusService\022>\n\013Cre" + "ateTable\022\030.milvus.grpc.TableSchema\032\023.mil" + "vus.grpc.Status\"\000\022<\n\010HasTable\022\026.milvus.g" + "rpc.TableName\032\026.milvus.grpc.BoolReply\"\000\022" + ":\n\tDropTable\022\026.milvus.grpc.TableName\032\023.m" + "ilvus.grpc.Status\"\000\022=\n\013CreateIndex\022\027.mil" + "vus.grpc.IndexParam\032\023.milvus.grpc.Status" + "\"\000\022<\n\006Insert\022\030.milvus.grpc.InsertParam\032\026" + ".milvus.grpc.VectorIds\"\000\022D\n\006Search\022\030.mil" + "vus.grpc.SearchParam\032\034.milvus.grpc.TopKQ" + "ueryResult\"\0000\001\022R\n\rSearchInFiles\022\037.milvus" + ".grpc.SearchInFilesParam\032\034.milvus.grpc.T" + "opKQueryResult\"\0000\001\022C\n\rDescribeTable\022\026.mi" + "lvus.grpc.TableName\032\030.milvus.grpc.TableS" + "chema\"\000\022B\n\nCountTable\022\026.milvus.grpc.Tabl" + "eName\032\032.milvus.grpc.TableRowCount\"\000\022>\n\nS" + "howTables\022\024.milvus.grpc.Command\032\026.milvus" + ".grpc.TableName\"\0000\001\0227\n\003Cmd\022\024.milvus.grpc" + ".Command\032\030.milvus.grpc.StringReply\"\000\022G\n\r" + "DeleteByRange\022\037.milvus.grpc.DeleteByRang" + "eParam\032\023.milvus.grpc.Status\"\000\022=\n\014Preload" + "Table\022\026.milvus.grpc.TableName\032\023.milvus.g" + "rpc.Status\"\000\022B\n\rDescribeIndex\022\026.milvus.g" + "rpc.TableName\032\027.milvus.grpc.IndexParam\"\000" + "\022:\n\tDropIndex\022\026.milvus.grpc.TableName\032\023." + "milvus.grpc.Status\"\000b\006proto3" ; static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_milvus_2eproto_deps[1] = { &::descriptor_table_status_2eproto, @@ -606,7 +603,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_mil static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_milvus_2eproto_once; static bool descriptor_table_milvus_2eproto_initialized = false; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_milvus_2eproto = { - &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2434, + &descriptor_table_milvus_2eproto_initialized, descriptor_table_protodef_milvus_2eproto, "milvus.proto", 2388, &descriptor_table_milvus_2eproto_once, descriptor_table_milvus_2eproto_sccs, descriptor_table_milvus_2eproto_deps, 17, 1, schemas, file_default_instances, TableStruct_milvus_2eproto::offsets, file_level_metadata_milvus_2eproto, 17, file_level_enum_descriptors_milvus_2eproto, file_level_service_descriptors_milvus_2eproto, @@ -981,17 +978,15 @@ TableSchema::TableSchema(const TableSchema& from) } else { table_name_ = nullptr; } - ::memcpy(&dimension_, &from.dimension_, - static_cast(reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&dimension_)) + sizeof(store_raw_vector_)); + dimension_ = from.dimension_; // @@protoc_insertion_point(copy_constructor:milvus.grpc.TableSchema) } void TableSchema::SharedCtor() { ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_TableSchema_milvus_2eproto.base); ::memset(&table_name_, 0, static_cast( - reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&table_name_)) + sizeof(store_raw_vector_)); + reinterpret_cast(&dimension_) - + reinterpret_cast(&table_name_)) + sizeof(dimension_)); } TableSchema::~TableSchema() { @@ -1022,9 +1017,7 @@ void TableSchema::Clear() { delete table_name_; } table_name_ = nullptr; - ::memset(&dimension_, 0, static_cast( - reinterpret_cast(&store_raw_vector_) - - reinterpret_cast(&dimension_)) + sizeof(store_raw_vector_)); + dimension_ = PROTOBUF_LONGLONG(0); _internal_metadata_.Clear(); } @@ -1043,27 +1036,13 @@ const char* TableSchema::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID CHK_(ptr); } else goto handle_unusual; continue; - // int32 index_type = 2; + // int64 dimension = 2; case 2: if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) { - index_type_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); - CHK_(ptr); - } else goto handle_unusual; - continue; - // int64 dimension = 3; - case 3: - if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24)) { dimension_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); CHK_(ptr); } else goto handle_unusual; continue; - // bool store_raw_vector = 4; - case 4: - if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 32)) { - store_raw_vector_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); - CHK_(ptr); - } else goto handle_unusual; - continue; default: { handle_unusual: if ((tag & 7) == 4 || tag == 0) { @@ -1105,23 +1084,10 @@ bool TableSchema::MergePartialFromCodedStream( break; } - // int32 index_type = 2; + // int64 dimension = 2; case 2: { if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) { - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - ::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>( - input, &index_type_))); - } else { - goto handle_unusual; - } - break; - } - - // int64 dimension = 3; - case 3: { - if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (24 & 0xFF)) { - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< ::PROTOBUF_NAMESPACE_ID::int64, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT64>( input, &dimension_))); @@ -1131,19 +1097,6 @@ bool TableSchema::MergePartialFromCodedStream( break; } - // bool store_raw_vector = 4; - case 4: { - if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (32 & 0xFF)) { - - DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - bool, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_BOOL>( - input, &store_raw_vector_))); - } else { - goto handle_unusual; - } - break; - } - default: { handle_unusual: if (tag == 0) { @@ -1177,19 +1130,9 @@ void TableSchema::SerializeWithCachedSizes( 1, _Internal::table_name(this), output); } - // int32 index_type = 2; - if (this->index_type() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->index_type(), output); - } - - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(3, this->dimension(), output); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBool(4, this->store_raw_vector(), output); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(2, this->dimension(), output); } if (_internal_metadata_.have_unknown_fields()) { @@ -1212,19 +1155,9 @@ void TableSchema::SerializeWithCachedSizes( 1, _Internal::table_name(this), target); } - // int32 index_type = 2; - if (this->index_type() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->index_type(), target); - } - - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(3, this->dimension(), target); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(4, this->store_raw_vector(), target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(2, this->dimension(), target); } if (_internal_metadata_.have_unknown_fields()) { @@ -1255,25 +1188,13 @@ size_t TableSchema::ByteSizeLong() const { *table_name_); } - // int64 dimension = 3; + // int64 dimension = 2; if (this->dimension() != 0) { total_size += 1 + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( this->dimension()); } - // int32 index_type = 2; - if (this->index_type() != 0) { - total_size += 1 + - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( - this->index_type()); - } - - // bool store_raw_vector = 4; - if (this->store_raw_vector() != 0) { - total_size += 1 + 1; - } - int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; @@ -1307,12 +1228,6 @@ void TableSchema::MergeFrom(const TableSchema& from) { if (from.dimension() != 0) { set_dimension(from.dimension()); } - if (from.index_type() != 0) { - set_index_type(from.index_type()); - } - if (from.store_raw_vector() != 0) { - set_store_raw_vector(from.store_raw_vector()); - } } void TableSchema::CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) { @@ -1338,8 +1253,6 @@ void TableSchema::InternalSwap(TableSchema* other) { _internal_metadata_.Swap(&other->_internal_metadata_); swap(table_name_, other->table_name_); swap(dimension_, other->dimension_); - swap(index_type_, other->index_type_); - swap(store_raw_vector_, other->store_raw_vector_); } ::PROTOBUF_NAMESPACE_ID::Metadata TableSchema::GetMetadata() const { diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.h b/cpp/src/grpc/gen-milvus/milvus.pb.h index d4c33b848a..ce34b264fc 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.h +++ b/cpp/src/grpc/gen-milvus/milvus.pb.h @@ -395,9 +395,7 @@ class TableSchema : enum : int { kTableNameFieldNumber = 1, - kDimensionFieldNumber = 3, - kIndexTypeFieldNumber = 2, - kStoreRawVectorFieldNumber = 4, + kDimensionFieldNumber = 2, }; // .milvus.grpc.TableName table_name = 1; bool has_table_name() const; @@ -407,21 +405,11 @@ class TableSchema : ::milvus::grpc::TableName* mutable_table_name(); void set_allocated_table_name(::milvus::grpc::TableName* table_name); - // int64 dimension = 3; + // int64 dimension = 2; void clear_dimension(); ::PROTOBUF_NAMESPACE_ID::int64 dimension() const; void set_dimension(::PROTOBUF_NAMESPACE_ID::int64 value); - // int32 index_type = 2; - void clear_index_type(); - ::PROTOBUF_NAMESPACE_ID::int32 index_type() const; - void set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value); - - // bool store_raw_vector = 4; - void clear_store_raw_vector(); - bool store_raw_vector() const; - void set_store_raw_vector(bool value); - // @@protoc_insertion_point(class_scope:milvus.grpc.TableSchema) private: class _Internal; @@ -429,8 +417,6 @@ class TableSchema : ::PROTOBUF_NAMESPACE_ID::internal::InternalMetadataWithArena _internal_metadata_; ::milvus::grpc::TableName* table_name_; ::PROTOBUF_NAMESPACE_ID::int64 dimension_; - ::PROTOBUF_NAMESPACE_ID::int32 index_type_; - bool store_raw_vector_; mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; friend struct ::TableStruct_milvus_2eproto; }; @@ -2820,21 +2806,7 @@ inline void TableSchema::set_allocated_table_name(::milvus::grpc::TableName* tab // @@protoc_insertion_point(field_set_allocated:milvus.grpc.TableSchema.table_name) } -// int32 index_type = 2; -inline void TableSchema::clear_index_type() { - index_type_ = 0; -} -inline ::PROTOBUF_NAMESPACE_ID::int32 TableSchema::index_type() const { - // @@protoc_insertion_point(field_get:milvus.grpc.TableSchema.index_type) - return index_type_; -} -inline void TableSchema::set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value) { - - index_type_ = value; - // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.index_type) -} - -// int64 dimension = 3; +// int64 dimension = 2; inline void TableSchema::clear_dimension() { dimension_ = PROTOBUF_LONGLONG(0); } @@ -2848,20 +2820,6 @@ inline void TableSchema::set_dimension(::PROTOBUF_NAMESPACE_ID::int64 value) { // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.dimension) } -// bool store_raw_vector = 4; -inline void TableSchema::clear_store_raw_vector() { - store_raw_vector_ = false; -} -inline bool TableSchema::store_raw_vector() const { - // @@protoc_insertion_point(field_get:milvus.grpc.TableSchema.store_raw_vector) - return store_raw_vector_; -} -inline void TableSchema::set_store_raw_vector(bool value) { - - store_raw_vector_ = value; - // @@protoc_insertion_point(field_set:milvus.grpc.TableSchema.store_raw_vector) -} - // ------------------------------------------------------------------- // Range diff --git a/cpp/src/grpc/milvus.proto b/cpp/src/grpc/milvus.proto index 59d74813d1..47209dc5a1 100644 --- a/cpp/src/grpc/milvus.proto +++ b/cpp/src/grpc/milvus.proto @@ -17,9 +17,7 @@ message TableName { */ message TableSchema { TableName table_name = 1; - int32 index_type = 2; - int64 dimension = 3; - bool store_raw_vector = 4; + int64 dimension = 2; } /** @@ -122,6 +120,8 @@ message Command { /** * @brief Index + * @index_type: 0-invalid, 1-idmap, 2-ivflat, 3-ivfsq8, 4-nsgmix + * @metric_type: 1-L2, 2-IP */ message Index { int32 index_type = 1; diff --git a/cpp/src/grpc/status.proto b/cpp/src/grpc/status.proto index 4e8dc15b74..f5b65c5f77 100644 --- a/cpp/src/grpc/status.proto +++ b/cpp/src/grpc/status.proto @@ -25,6 +25,8 @@ enum ErrorCode { CANNOT_DELETE_FOLDER = 19; CANNOT_DELETE_FILE = 20; BUILD_INDEX_ERROR = 21; + ILLEGAL_NLIST = 22; + ILLEGAL_METRIC_TYPE = 23; } message Status { diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 5225f2a97e..b4f7bf0922 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -34,9 +34,7 @@ namespace { void PrintTableSchema(const TableSchema& tb_schema) { BLOCK_SPLITER std::cout << "Table name: " << tb_schema.table_name << std::endl; - std::cout << "Table index type: " << (int)tb_schema.index_type << std::endl; std::cout << "Table dimension: " << tb_schema.dimension << std::endl; - std::cout << "Table store raw data: " << (tb_schema.store_raw_vector ? "true" : "false") << std::endl; BLOCK_SPLITER } @@ -88,16 +86,15 @@ namespace { } std::string GetTableName() { - static std::string s_id(CurrentTime()); - return "tbl_" + s_id; +// static std::string s_id(CurrentTime()); +// return "tbl_" + s_id; + return "test"; } TableSchema BuildTableSchema() { TableSchema tb_schema; tb_schema.table_name = TABLE_NAME; - tb_schema.index_type = IndexType::gpu_ivfflat; tb_schema.dimension = TABLE_DIMENSION; - tb_schema.store_raw_vector = true; return tb_schema; } @@ -276,9 +273,19 @@ ClientTest::Test(const std::string& address, const std::string& port) { } {//wait unit build index finish -// std::cout << "Wait until build all index done" << std::endl; -// Status stat = conn->CreateIndex(); -// std::cout << "BuildIndex function call status: " << stat.ToString() << std::endl; + std::cout << "Wait until create all index done" << std::endl; + IndexParam index; + index.table_name = TABLE_NAME; + index.index_type = IndexType::gpu_ivfflat; + index.nlist = 1000; + index.index_file_size = 1024; + index.metric_type = 1; + Status stat = conn->CreateIndex(index); + std::cout << "CreateIndex function call status: " << stat.ToString() << std::endl; + + IndexParam index2; + stat = conn->DescribeIndex(TABLE_NAME, index2); + std::cout << "DescribeIndex function call status: " << stat.ToString() << std::endl; } {//preload table @@ -290,6 +297,11 @@ ClientTest::Test(const std::string& address, const std::string& port) { DoSearch(conn, search_record_array, "Search after build index finish"); } + {//delete index + Status stat = conn->DropIndex(TABLE_NAME); + std::cout << "DropIndex function call status: " << stat.ToString() << std::endl; + } + {//delete table Status stat = conn->DropTable(TABLE_NAME); std::cout << "DeleteTable function call status: " << stat.ToString() << std::endl; diff --git a/cpp/src/sdk/grpc/ClientProxy.cpp b/cpp/src/sdk/grpc/ClientProxy.cpp index 1107da6eff..48c5159bb5 100644 --- a/cpp/src/sdk/grpc/ClientProxy.cpp +++ b/cpp/src/sdk/grpc/ClientProxy.cpp @@ -82,9 +82,7 @@ ClientProxy::CreateTable(const TableSchema ¶m) { try { ::milvus::grpc::TableSchema schema; schema.mutable_table_name()->set_table_name(param.table_name); - schema.set_index_type((int) param.index_type); schema.set_dimension(param.dimension); - schema.set_store_raw_vector(param.store_raw_vector); return client_ptr_->CreateTable(schema); } catch (std::exception &ex) { @@ -119,6 +117,10 @@ ClientProxy::CreateIndex(const IndexParam &index_param) { ::milvus::grpc::IndexParam grpc_index_param; grpc_index_param.mutable_table_name()->set_table_name( index_param.table_name); + grpc_index_param.mutable_index()->set_index_type((int32_t)index_param.index_type); + grpc_index_param.mutable_index()->set_nlist(index_param.nlist); + grpc_index_param.mutable_index()->set_index_file_size(index_param.index_file_size); + grpc_index_param.mutable_index()->set_metric_type(index_param.metric_type); return client_ptr_->CreateIndex(grpc_index_param); } catch (std::exception &ex) { @@ -269,9 +271,7 @@ ClientProxy::DescribeTable(const std::string &table_name, TableSchema &table_sch Status status = client_ptr_->DescribeTable(grpc_schema, table_name); table_schema.table_name = grpc_schema.table_name().table_name(); - table_schema.index_type = (IndexType) grpc_schema.index_type(); table_schema.dimension = grpc_schema.dimension(); - table_schema.store_raw_vector = grpc_schema.store_raw_vector(); return status; } catch (std::exception &ex) { @@ -345,14 +345,35 @@ ClientProxy::PreloadTable(const std::string &table_name) const { } } -IndexParam -ClientProxy::DescribeIndex(const std::string &table_name) const { +Status +ClientProxy::DescribeIndex(const std::string &table_name, IndexParam &index_param) const { + try { + ::milvus::grpc::TableName grpc_table_name; + grpc_table_name.set_table_name(table_name); + ::milvus::grpc::IndexParam grpc_index_param; + Status status = client_ptr_->DescribeIndex(grpc_table_name, grpc_index_param); + index_param.index_type = (IndexType)(grpc_index_param.mutable_index()->index_type()); + index_param.nlist = grpc_index_param.mutable_index()->nlist(); + index_param.index_file_size = grpc_index_param.mutable_index()->index_file_size(); + index_param.metric_type = grpc_index_param.mutable_index()->metric_type(); + return status; + + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to describe index: " + std::string(ex.what())); + } } Status ClientProxy::DropIndex(const std::string &table_name) const { - + try { + ::milvus::grpc::TableName grpc_table_name; + grpc_table_name.set_table_name(table_name); + Status status = client_ptr_->DropIndex(grpc_table_name); + return status; + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to drop index: " + std::string(ex.what())); + } } } diff --git a/cpp/src/sdk/grpc/ClientProxy.h b/cpp/src/sdk/grpc/ClientProxy.h index f6a39f0445..8a1d34d8e2 100644 --- a/cpp/src/sdk/grpc/ClientProxy.h +++ b/cpp/src/sdk/grpc/ClientProxy.h @@ -75,8 +75,8 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam - DescribeIndex(const std::string &table_name) const override; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam &index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/sdk/grpc/GrpcClient.cpp b/cpp/src/sdk/grpc/GrpcClient.cpp index 00894ea529..77478e5d7e 100644 --- a/cpp/src/sdk/grpc/GrpcClient.cpp +++ b/cpp/src/sdk/grpc/GrpcClient.cpp @@ -270,4 +270,44 @@ GrpcClient::Disconnect() { return Status::OK(); } +Status +GrpcClient::DeleteByRange(grpc::DeleteByRangeParam &delete_by_range_param) { + return Status::OK(); +} + +Status +GrpcClient::DescribeIndex(grpc::TableName &table_name, grpc::IndexParam &index_param) { + ClientContext context; + ::grpc::Status grpc_status = stub_->DescribeIndex(&context, table_name, &index_param); + + if (!grpc_status.ok()) { + std::cerr << "DescribeIndex rpc failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + if (index_param.mutable_table_name()->status().error_code() != grpc::SUCCESS) { + std::cerr << index_param.mutable_table_name()->status().reason() << std::endl; + return Status(StatusCode::ServerFailed, index_param.mutable_table_name()->status().reason()); + } + + return Status::OK(); +} + +Status +GrpcClient::DropIndex(grpc::TableName &table_name) { + ClientContext context; + ::milvus::grpc::Status response; + ::grpc::Status grpc_status = stub_->DropIndex(&context, table_name, &response); + + if (!grpc_status.ok()) { + std::cerr << "DropIndex gRPC failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + + if (response.error_code() != grpc::SUCCESS) { + std::cerr << response.reason() << std::endl; + return Status(StatusCode::ServerFailed, response.reason()); + } + return Status::OK(); +} + } \ No newline at end of file diff --git a/cpp/src/sdk/include/MilvusApi.h b/cpp/src/sdk/include/MilvusApi.h index cb261743e1..766bcbad1d 100644 --- a/cpp/src/sdk/include/MilvusApi.h +++ b/cpp/src/sdk/include/MilvusApi.h @@ -76,9 +76,10 @@ struct TopKQueryResult { */ struct IndexParam { std::string table_name; - int32_t index_type; - int64_t nlist; + IndexType index_type; + int32_t nlist; int32_t index_file_size; + int32_t metric_type; }; /** @@ -354,8 +355,8 @@ class Connection { * * @return index informations and indicate if this operation is successful. */ - virtual IndexParam - DescribeIndex(const std::string &table_name) const = 0; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam &index_param) const = 0; /** * @brief drop index diff --git a/cpp/src/sdk/interface/ConnectionImpl.cpp b/cpp/src/sdk/interface/ConnectionImpl.cpp index b496d1c104..355c06438c 100644 --- a/cpp/src/sdk/interface/ConnectionImpl.cpp +++ b/cpp/src/sdk/interface/ConnectionImpl.cpp @@ -125,14 +125,14 @@ ConnectionImpl::PreloadTable(const std::string &table_name) const { return client_proxy_->PreloadTable(table_name); } -IndexParam -ConnectionImpl::DescribeIndex(const std::string &table_name) const { - +Status +ConnectionImpl::DescribeIndex(const std::string &table_name, IndexParam& index_param) const { + return client_proxy_->DescribeIndex(table_name, index_param); } Status ConnectionImpl::DropIndex(const std::string &table_name) const { - + return client_proxy_->DropIndex(table_name); } } diff --git a/cpp/src/sdk/interface/ConnectionImpl.h b/cpp/src/sdk/interface/ConnectionImpl.h index 24be6060ba..d304736b65 100644 --- a/cpp/src/sdk/interface/ConnectionImpl.h +++ b/cpp/src/sdk/interface/ConnectionImpl.h @@ -81,8 +81,8 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam - DescribeIndex(const std::string &table_name) const override; + virtual Status + DescribeIndex(const std::string &table_name, IndexParam& index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/sdk/thrift/ClientProxy.cpp b/cpp/src/sdk/thrift/ClientProxy.cpp index dff5b98eb6..c258aeefad 100644 --- a/cpp/src/sdk/thrift/ClientProxy.cpp +++ b/cpp/src/sdk/thrift/ClientProxy.cpp @@ -334,8 +334,7 @@ Status ClientProxy::PreloadTable(const std::string &table_name) const { return Status::OK(); } -IndexParam ClientProxy::DescribeIndex(const std::string &table_name) const { - IndexParam index_param; +Status ClientProxy::DescribeIndex(const std::string &table_name, IndexParam &index_param) const { index_param.table_name = table_name; return index_param; } diff --git a/cpp/src/sdk/thrift/ClientProxy.h b/cpp/src/sdk/thrift/ClientProxy.h index 60e070ba8c..27dafc92a6 100644 --- a/cpp/src/sdk/thrift/ClientProxy.h +++ b/cpp/src/sdk/thrift/ClientProxy.h @@ -55,7 +55,7 @@ public: virtual Status PreloadTable(const std::string &table_name) const override; - virtual IndexParam DescribeIndex(const std::string &table_name) const override; + virtual Status DescribeIndex(const std::string &table_name, IndexParam &index_param) const override; virtual Status DropIndex(const std::string &table_name) const override; diff --git a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp index 584023e4fc..ad1a0e3d71 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp @@ -187,14 +187,24 @@ GrpcRequestHandler::PreloadTable(::grpc::ServerContext *context, GrpcRequestHandler::DescribeIndex(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::IndexParam *response) { - + BaseTaskPtr task_ptr = DescribeIndexTask::Create(request->table_name(), *response); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->mutable_table_name()->mutable_status()->set_reason(grpc_status.reason()); + response->mutable_table_name()->mutable_status()->set_error_code(grpc_status.error_code()); + return ::grpc::Status::OK; } ::grpc::Status GrpcRequestHandler::DropIndex(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::Status *response) { - + BaseTaskPtr task_ptr = DropIndexTask::Create(request->table_name()); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->set_reason(grpc_status.reason()); + response->set_error_code(grpc_status.error_code()); + return ::grpc::Status::OK; } diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index 8934045579..10ca2b80ed 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -130,17 +130,10 @@ CreateTableTask::OnExecute() { return SetError(res, "Invalid table dimension: " + std::to_string(schema_.dimension())); } - res = ValidationUtil::ValidateTableIndexType(schema_.index_type()); - if (res != SERVER_SUCCESS) { - return SetError(res, "Invalid index type: " + std::to_string(schema_.index_type())); - } - //step 2: construct table schema engine::meta::TableSchema table_info; table_info.dimension_ = (uint16_t) schema_.dimension(); table_info.table_id_ = schema_.table_name().table_name(); - table_info.engine_type_ = (int) EngineType(schema_.index_type()); - table_info.store_raw_data_ = schema_.store_raw_vector(); //step 3: create table engine::Status stat = DBWrapper::DB()->CreateTable(table_info); @@ -190,10 +183,7 @@ DescribeTableTask::OnExecute() { } schema_.mutable_table_name()->set_table_name(table_info.table_id_); - - schema_.set_index_type(IndexType((engine::EngineType) table_info.engine_type_)); schema_.set_dimension(table_info.dimension_); - schema_.set_store_raw_vector(table_info.store_raw_data_); } catch (std::exception &ex) { return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); @@ -238,7 +228,12 @@ CreateIndexTask::OnExecute() { } //step 2: check table existence - stat = DBWrapper::DB()->BuildIndex(table_name_); + engine::TableIndex index; + index.engine_type_ = index_param_.mutable_index()->index_type(); + index.nlist = index_param_.mutable_index()->nlist(); + index.index_file_size = index_param_.mutable_index()->index_file_size(); + index.metric_type = index_param_.mutable_index()->metric_type(); + stat = DBWrapper::DB()->CreateIndex(table_name_, index); if (!stat.ok()) { return SetError(SERVER_BUILD_INDEX_ERROR, "Engine failed: " + stat.ToString()); } @@ -758,7 +753,89 @@ PreloadTableTask::OnExecute() { return SERVER_SUCCESS; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DescribeIndexTask::DescribeIndexTask(const std::string &table_name, + ::milvus::grpc::IndexParam &index_param) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + table_name_(table_name), + index_param_(index_param) { +} + +BaseTaskPtr +DescribeIndexTask::Create(const std::string &table_name, + ::milvus::grpc::IndexParam &index_param){ + return std::shared_ptr(new DescribeIndexTask(table_name, index_param)); +} + +ServerError +DescribeIndexTask::OnExecute() { + try { + TimeRecorder rc("DescribeIndexTask"); + + //step 1: check arguments + ServerError res = ValidationUtil::ValidateTableName(table_name_); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name_); + } + + //step 2: check table existence + engine::TableIndex index; + engine::Status stat = DBWrapper::DB()->DescribeIndex(table_name_, index); + if (!stat.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + index_param_.mutable_table_name()->set_table_name(table_name_); + index_param_.mutable_index()->set_index_type(index.engine_type_); + index_param_.mutable_index()->set_nlist(index.nlist); + index_param_.mutable_index()->set_index_file_size(index.index_file_size); + index_param_.mutable_index()->set_metric_type(index.metric_type); + + rc.ElapseFromBegin("totally cost"); + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DropIndexTask::DropIndexTask(const std::string &table_name) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + table_name_(table_name) { + +} + +BaseTaskPtr +DropIndexTask::Create(const std::string &table_name){ + return std::shared_ptr(new DropIndexTask(table_name)); +} + +ServerError +DropIndexTask::OnExecute() { + try { + TimeRecorder rc("DropIndexTask"); + + //step 1: check arguments + ServerError res = ValidationUtil::ValidateTableName(table_name_); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name_); + } + + //step 2: check table existence + engine::Status stat = DBWrapper::DB()->DropIndex(table_name_); + if (!stat.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + rc.ElapseFromBegin("totally cost"); + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} } } diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.h b/cpp/src/server/grpc_impl/GrpcRequestTask.h index 9a60064e49..e43b9fba60 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.h +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.h @@ -260,6 +260,9 @@ public: protected: DropIndexTask(const std::string &table_name); + ServerError + OnExecute() override; + private: std::string table_name_; diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 0e981f6ae4..4dffeb6db0 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -180,7 +180,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DISK) { for (auto i=0; i Date: Wed, 21 Aug 2019 18:02:37 +0800 Subject: [PATCH 05/22] add deletebyrange interface and unittest Former-commit-id: 54a94b0924d2de5089a4f31810e31a765bceb591 --- cpp/src/db/DBImpl.cpp | 17 ++-- .../examples/grpcsimple/src/ClientTest.cpp | 9 ++ cpp/src/sdk/grpc/ClientProxy.cpp | 15 ++- cpp/src/sdk/grpc/GrpcClient.cpp | 18 ++++ cpp/src/sdk/interface/ConnectionImpl.cpp | 2 +- .../server/grpc_impl/GrpcRequestHandler.cpp | 7 +- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 67 +++++++++++++ cpp/unittest/db/db_tests.cpp | 95 ++++++++++++++++++- 8 files changed, 215 insertions(+), 15 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index ddee622669..44e3e7217a 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -104,13 +104,18 @@ Status DBImpl::DeleteTable(const std::string& table_id, const meta::DatesT& date //dates partly delete files of the table but currently we don't support ENGINE_LOG_DEBUG << "Prepare to delete table " << table_id; - mem_mgr_->EraseMemVector(table_id); //not allow insert - meta_ptr_->DeleteTable(table_id); //soft delete table + if (dates.empty()) { + mem_mgr_->EraseMemVector(table_id); //not allow insert + meta_ptr_->DeleteTable(table_id); //soft delete table + + //scheduler will determine when to delete table files + TaskScheduler& scheduler = TaskScheduler::GetInstance(); + DeleteContextPtr context = std::make_shared(table_id, meta_ptr_); + scheduler.Schedule(context); + } else { + meta_ptr_->DropPartitionsByDates(table_id, dates); + } - //scheduler will determine when to delete table files - TaskScheduler& scheduler = TaskScheduler::GetInstance(); - DeleteContextPtr context = std::make_shared(table_id, meta_ptr_); - scheduler.Schedule(context); return Status::OK(); } diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 5225f2a97e..1e8706e98b 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -290,6 +290,15 @@ ClientTest::Test(const std::string& address, const std::string& port) { DoSearch(conn, search_record_array, "Search after build index finish"); } + { + Range rg; + rg.start_value = CurrentTmDate(-2); + rg.end_value = CurrentTmDate(-3); + + Status stat = conn->DeleteByRange(rg, TABLE_NAME); + std::cout << "DeleteByRange function call status: " << stat.ToString() << std::endl; + } + {//delete table Status stat = conn->DropTable(TABLE_NAME); std::cout << "DeleteTable function call status: " << stat.ToString() << std::endl; diff --git a/cpp/src/sdk/grpc/ClientProxy.cpp b/cpp/src/sdk/grpc/ClientProxy.cpp index 1107da6eff..2e902d8249 100644 --- a/cpp/src/sdk/grpc/ClientProxy.cpp +++ b/cpp/src/sdk/grpc/ClientProxy.cpp @@ -330,10 +330,18 @@ ClientProxy::ServerStatus() const { Status ClientProxy::DeleteByRange(milvus::Range &range, const std::string &table_name) { - + try { + ::milvus::grpc::DeleteByRangeParam delete_by_range_param; + delete_by_range_param.set_table_name(table_name); + delete_by_range_param.mutable_range()->set_start_value(range.start_value); + delete_by_range_param.mutable_range()->set_end_value(range.end_value); + return client_ptr_->DeleteByRange(delete_by_range_param); + } catch (std::exception &ex) { + return Status(StatusCode::UnknownError, "fail to delete by range: " + std::string(ex.what())); + } } -Status + Status ClientProxy::PreloadTable(const std::string &table_name) const { try { ::milvus::grpc::TableName grpc_table_name; @@ -341,13 +349,12 @@ ClientProxy::PreloadTable(const std::string &table_name) const { Status status = client_ptr_->PreloadTable(grpc_table_name); return status; } catch (std::exception &ex) { - return Status(StatusCode::UnknownError, "fail to show tables: " + std::string(ex.what())); + return Status(StatusCode::UnknownError, "fail to preload tables: " + std::string(ex.what())); } } IndexParam ClientProxy::DescribeIndex(const std::string &table_name) const { - } Status diff --git a/cpp/src/sdk/grpc/GrpcClient.cpp b/cpp/src/sdk/grpc/GrpcClient.cpp index 00894ea529..210c3fdf61 100644 --- a/cpp/src/sdk/grpc/GrpcClient.cpp +++ b/cpp/src/sdk/grpc/GrpcClient.cpp @@ -264,6 +264,24 @@ GrpcClient::PreloadTable(milvus::grpc::TableName &table_name) { return Status::OK(); } +Status +GrpcClient::DeleteByRange(grpc::DeleteByRangeParam &delete_by_range_param) { + ClientContext context; + ::milvus::grpc::Status response; + ::grpc::Status grpc_status = stub_->DeleteByRange(&context, delete_by_range_param, &response); + + if (!grpc_status.ok()) { + std::cerr << "DeleteByRange gRPC failed!" << std::endl; + return Status(StatusCode::RPCFailed, grpc_status.error_message()); + } + + if (response.error_code() != grpc::SUCCESS) { + std::cerr << response.reason() << std::endl; + return Status(StatusCode::ServerFailed, response.reason()); + } + return Status::OK(); +} + Status GrpcClient::Disconnect() { stub_.release(); diff --git a/cpp/src/sdk/interface/ConnectionImpl.cpp b/cpp/src/sdk/interface/ConnectionImpl.cpp index b496d1c104..f0875638fd 100644 --- a/cpp/src/sdk/interface/ConnectionImpl.cpp +++ b/cpp/src/sdk/interface/ConnectionImpl.cpp @@ -117,7 +117,7 @@ ConnectionImpl::ServerStatus() const { Status ConnectionImpl::DeleteByRange(Range &range, const std::string &table_name) { - + return client_proxy_->DeleteByRange(range, table_name); } Status diff --git a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp index 584023e4fc..cb705aed80 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp @@ -168,7 +168,12 @@ GrpcRequestHandler::Cmd(::grpc::ServerContext *context, GrpcRequestHandler::DeleteByRange(::grpc::ServerContext *context, const ::milvus::grpc::DeleteByRangeParam *request, ::milvus::grpc::Status *response) { - + BaseTaskPtr task_ptr = DeleteByRangeTask::Create(*request); + ::milvus::grpc::Status grpc_status; + GrpcRequestScheduler::ExecTask(task_ptr, &grpc_status); + response->set_error_code(grpc_status.error_code()); + response->set_reason(grpc_status.reason()); + return ::grpc::Status::OK; } ::grpc::Status diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index 8934045579..20f74ca3c8 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -721,6 +721,73 @@ CmdTask::OnExecute() { return SERVER_SUCCESS; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +DeleteByRangeTask::DeleteByRangeTask(const ::milvus::grpc::DeleteByRangeParam &delete_by_range_param) + : GrpcBaseTask(DDL_DML_TASK_GROUP), + delete_by_range_param_(delete_by_range_param){ +} + +BaseTaskPtr +DeleteByRangeTask::Create(const ::milvus::grpc::DeleteByRangeParam &delete_by_range_param) { + return std::shared_ptr(new DeleteByRangeTask(delete_by_range_param)); +} + +ServerError +DeleteByRangeTask::OnExecute() { + try { + TimeRecorder rc("DeleteByRangeTask"); + + //step 1: check arguments + std::string table_name = delete_by_range_param_.table_name(); + ServerError res = ValidationUtil::ValidateTableName(table_name); + if (res != SERVER_SUCCESS) { + return SetError(res, "Invalid table name: " + table_name); + } + + //step 2: check table existence + engine::meta::TableSchema table_info; + table_info.table_id_ = table_name; + engine::Status stat = DBWrapper::DB()->DescribeTable(table_info); + if (!stat.ok()) { + if (stat.IsNotFound()) { + return SetError(SERVER_TABLE_NOT_EXIST, "Table " + table_name + " not exists"); + } else { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + } + + rc.ElapseFromBegin("check validation"); + + //step 3: check date range, and convert to db dates + std::vector dates; + ServerError error_code = SERVER_SUCCESS; + std::string error_msg; + + std::vector<::milvus::grpc::Range> range_array; + range_array.emplace_back(delete_by_range_param_.range()); + ConvertTimeRangeToDBDates(range_array, dates, error_code, error_msg); + if (error_code != SERVER_SUCCESS) { + return SetError(error_code, error_msg); + } + +#ifdef MILVUS_ENABLE_PROFILING + std::string fname = "/tmp/search_nq_" + std::to_string(this->record_array_.size()) + + "_top_" + std::to_string(this->top_k_) + "_" + + GetCurrTimeStr() + ".profiling"; + ProfilerStart(fname.c_str()); +#endif + engine::Status status = DBWrapper::DB()->DeleteTable(table_name, dates); + if (!status.ok()) { + return SetError(DB_META_TRANSACTION_FAILED, "Engine failed: " + stat.ToString()); + } + + } catch (std::exception &ex) { + return SetError(SERVER_UNEXPECTED_ERROR, ex.what()); + } + + return SERVER_SUCCESS; +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// PreloadTableTask::PreloadTableTask(const std::string &table_name) : GrpcBaseTask(DDL_DML_TASK_GROUP), diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8b36d2efbd..ad8a196494 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -9,6 +9,7 @@ #include "db/meta/MetaConsts.h" #include "db/Factories.h" #include "cache/CpuCacheMgr.h" +#include "utils/CommonUtil.h" #include #include @@ -26,6 +27,8 @@ namespace { static constexpr int64_t TABLE_DIM = 256; static constexpr int64_t VECTOR_COUNT = 250000; static constexpr int64_t INSERT_LOOP = 10000; + static constexpr int64_t SECONDS_EACH_HOUR = 3600; + static constexpr int64_t DAY_SECONDS = 24 * 60 * 60; engine::meta::TableSchema BuildTableSchema() { engine::meta::TableSchema table_info; @@ -45,6 +48,52 @@ namespace { } } + std::string CurrentTmDate(int64_t offset_day = 0) { + time_t tt; + time( &tt ); + tt = tt + 8*SECONDS_EACH_HOUR; + tt = tt + 24*SECONDS_EACH_HOUR*offset_day; + tm* t= gmtime( &tt ); + + std::string str = std::to_string(t->tm_year + 1900) + "-" + std::to_string(t->tm_mon + 1) + + "-" + std::to_string(t->tm_mday); + + return str; + } + + void + ConvertTimeRangeToDBDates(const std::string &start_value, + const std::string &end_value, + std::vector &dates) { + dates.clear(); + + time_t tt_start, tt_end; + tm tm_start, tm_end; + if (!zilliz::milvus::server::CommonUtil::TimeStrToTime(start_value, tt_start, tm_start)) { + return; + } + + if (!zilliz::milvus::server::CommonUtil::TimeStrToTime(end_value, tt_end, tm_end)) { + return; + } + + long days = (tt_end > tt_start) ? (tt_end - tt_start) / DAY_SECONDS : (tt_start - tt_end) / + DAY_SECONDS; + if (days == 0) { + return; + } + + for (long i = 0; i < days; i++) { + time_t tt_day = tt_start + DAY_SECONDS * i; + tm tm_day; + zilliz::milvus::server::CommonUtil::ConvertTime(tt_day, tm_day); + + long date = tm_day.tm_year * 10000 + tm_day.tm_mon * 100 + + tm_day.tm_mday;//according to db logic + dates.push_back(date); + } + } + } TEST_F(DBTest, CONFIG_TEST) { @@ -307,8 +356,6 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { }; TEST_F(DBTest2, DELETE_TEST) { - - engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); @@ -343,4 +390,46 @@ TEST_F(DBTest2, DELETE_TEST) { db_->HasTable(TABLE_NAME, has_table); ASSERT_FALSE(has_table); -}; \ No newline at end of file +}; + +TEST_F(DBTest2, DELETE_BY_RANGE_TEST) { + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + + bool has_table = false; + db_->HasTable(TABLE_NAME, has_table); + ASSERT_TRUE(has_table); + + engine::IDNumbers vector_ids; + + uint64_t size; + db_->Size(size); + + int64_t nb = INSERT_LOOP; + std::vector xb; + BuildVectors(nb, xb); + + int loop = 20; + for (auto i=0; iInsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + std::vector dates; + engine::meta::DateT date; + std::string start_value = CurrentTmDate(-3); + std::string end_value = CurrentTmDate(-2); + ConvertTimeRangeToDBDates(start_value, end_value, dates); + + db_->DeleteTable(TABLE_NAME, dates); +} \ No newline at end of file From d30e432e16f70496dab6ab082b2a00d9eaf8ff05 Mon Sep 17 00:00:00 2001 From: starlord Date: Wed, 21 Aug 2019 18:35:57 +0800 Subject: [PATCH 06/22] merge 0.3.1 Former-commit-id: a538685c673319581c756289a4c003fa5833d829 --- ci/jenkinsfile/cluster_dev_test.groovy | 2 +- cpp/CHANGELOG.md | 5 +- cpp/build.sh | 2 +- cpp/cmake/ThirdPartyPackages.cmake | 26 +-- cpp/conf/server_config.template | 2 + cpp/src/config/YamlConfigMgr.cpp | 30 +-- cpp/src/config/YamlConfigMgr.h | 8 +- cpp/src/db/DBImpl.cpp | 6 +- cpp/src/db/Factories.cpp | 10 +- cpp/src/db/Factories.h | 2 +- cpp/src/db/Utils.cpp | 22 ++- cpp/src/db/Utils.h | 2 +- cpp/src/db/meta/MySQLConnectionPool.cpp | 14 +- cpp/src/db/meta/MySQLConnectionPool.h | 6 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 49 ++++- cpp/src/db/meta/SqliteMetaImpl.cpp | 25 ++- cpp/src/server/ServerConfig.h | 14 +- cpp/src/wrapper/FaissGpuResources.cpp | 38 ++++ cpp/src/wrapper/FaissGpuResources.h | 36 ++++ cpp/src/wrapper/Index.cpp | 29 ++- cpp/src/wrapper/Index.h | 1 - cpp/src/wrapper/IndexBuilder.cpp | 70 ++++--- cpp/src/wrapper/IndexBuilder.h | 2 + cpp/thirdparty/versions.txt | 2 +- cpp/unittest/db/db_tests.cpp | 6 + cpp/unittest/db/mem_test.cpp | 7 +- cpp/unittest/db/misc_test.cpp | 4 + cpp/unittest/db/mysql_meta_test.cpp | 30 ++- cpp/unittest/db/scheduler_test.cpp | 2 + cpp/unittest/db/search_test.cpp | 9 + cpp/unittest/db/utils.cpp | 9 +- cpp/unittest/faiss_wrapper/CMakeLists.txt | 56 ++++++ cpp/unittest/faiss_wrapper/wrapper_test.cpp | 203 ++++++++++++++++++++ cpp/unittest/server/config_test.cpp | 51 ++++- cpp/unittest/server/util_test.cpp | 9 + cpp/unittest/utils/ValidationUtilTest.cpp | 76 ++++++++ 36 files changed, 744 insertions(+), 121 deletions(-) create mode 100644 cpp/src/wrapper/FaissGpuResources.cpp create mode 100644 cpp/src/wrapper/FaissGpuResources.h create mode 100644 cpp/unittest/faiss_wrapper/CMakeLists.txt create mode 100644 cpp/unittest/faiss_wrapper/wrapper_test.cpp create mode 100644 cpp/unittest/utils/ValidationUtilTest.cpp diff --git a/ci/jenkinsfile/cluster_dev_test.groovy b/ci/jenkinsfile/cluster_dev_test.groovy index 2d8854ca71..4a15b926cf 100644 --- a/ci/jenkinsfile/cluster_dev_test.groovy +++ b/ci/jenkinsfile/cluster_dev_test.groovy @@ -1,4 +1,4 @@ -timeout(time: 10, unit: 'MINUTES') { +timeout(time: 25, unit: 'MINUTES') { try { dir ("${PROJECT_NAME}_test") { checkout([$class: 'GitSCM', branches: [[name: "${SEMVER}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${params.GIT_USER}", url: "git@192.168.1.105:Test/milvus_test.git", name: 'origin', refspec: "+refs/heads/${SEMVER}:refs/remotes/origin/${SEMVER}"]]]) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index a7ddee104a..616aeafc48 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -61,6 +61,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-257 - Update bzip2 download url - MS-288 - Update compile scripts - MS-330 - Stability test failed caused by server core dumped +- MS-347 - Build index hangs again +- MS-382 - fix MySQLMetaImpl::CleanUpFilesWithTTL unknown column bug ## Improvement - MS-156 - Add unittest for merge result functions @@ -89,6 +91,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-324 - Show error when there is not enough gpu memory to build index - MS-328 - Check metric type on server start - MS-332 - Set grpc and thrift server run concurrently +- MS-352 - Add hybrid index ## New Feature - MS-180 - Add new mem manager @@ -148,8 +151,8 @@ Please mark all change in change log and use the ticket from JIRA. - MS-130 - Add prometheus_test - MS-144 - Add nprobe config - MS-147 - Enable IVF - - MS-130 - Add prometheus_test + ## Task - MS-74 - Change README.md in cpp - MS-88 - Add support for arm architecture diff --git a/cpp/build.sh b/cpp/build.sh index 7216296c69..500eac6c67 100755 --- a/cpp/build.sh +++ b/cpp/build.sh @@ -86,7 +86,7 @@ if [[ ! -d cmake_build ]]; then fi cd cmake_build - +git CUDA_COMPILER=/usr/local/cuda/bin/nvcc if [[ ${MAKE_CLEAN} == "ON" ]]; then diff --git a/cpp/cmake/ThirdPartyPackages.cmake b/cpp/cmake/ThirdPartyPackages.cmake index b48724588d..f9140b6d80 100644 --- a/cpp/cmake/ThirdPartyPackages.cmake +++ b/cpp/cmake/ThirdPartyPackages.cmake @@ -157,7 +157,6 @@ if (UNIX) endif (APPLE) endif (UNIX) - # ---------------------------------------------------------------------- # thirdparty directory set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") @@ -167,7 +166,7 @@ set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") if(NOT DEFINED USE_JFROG_CACHE) set(USE_JFROG_CACHE "OFF") endif() -if(USE_JFROG_CACHE STREQUAL "ON") +if(USE_JFROG_CACHE STREQUAL "ON") set(JFROG_ARTFACTORY_CACHE_URL "http://192.168.1.201:80/artifactory/generic-local/milvus/thirdparty/cache/${CMAKE_OS_NAME}/${MILVUS_BUILD_ARCH}/${BUILD_TYPE}") set(JFROG_USER_NAME "test") set(JFROG_PASSWORD "Fantast1c") @@ -308,9 +307,11 @@ set(EASYLOGGINGPP_MD5 "b78cd319db4be9b639927657b8aa7732") if(DEFINED ENV{MILVUS_FAISS_URL}) set(FAISS_SOURCE_URL "$ENV{MILVUS_FAISS_URL}") else() - set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") + set(FAISS_SOURCE_URL "http://192.168.1.105:6060/jinhai/faiss/-/archive/${FAISS_VERSION}/faiss-${FAISS_VERSION}.tar.gz") + # set(FAISS_SOURCE_URL "https://github.com/facebookresearch/faiss/archive/${FAISS_VERSION}.tar.gz") endif() -set(FAISS_MD5 "0bc12737b23def156f6a1eb782050135") + +set(FAISS_MD5 "a589663865a8558205533c8ac414278c") if(DEFINED ENV{MILVUS_KNOWHERE_URL}) set(KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}") @@ -462,6 +463,7 @@ else() endif() set(GRPC_MD5 "7ec59ad54c85a12dcbbfede09bf413a9") + # ---------------------------------------------------------------------- # ARROW @@ -686,7 +688,7 @@ macro(build_bzip2) set(BZIP2_STATIC_LIB "${BZIP2_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}bz2${CMAKE_STATIC_LIBRARY_SUFFIX}") - if(USE_JFROG_CACHE STREQUAL "ON") + if(USE_JFROG_CACHE STREQUAL "ON") set(BZIP2_CACHE_PACKAGE_NAME "bzip2_${BZIP2_MD5}.tar.gz") set(BZIP2_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${BZIP2_CACHE_PACKAGE_NAME}") set(BZIP2_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${BZIP2_CACHE_PACKAGE_NAME}") @@ -1184,7 +1186,7 @@ macro(build_faiss) INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "openblas;lapack" ) endif() - + add_dependencies(faiss faiss_ep) if(${BUILD_FAISS_WITH_MKL} STREQUAL "OFF") @@ -1321,7 +1323,7 @@ if (MILVUS_BUILD_TESTS) if(NOT GTEST_VENDORED) endif() - + get_target_property(GTEST_INCLUDE_DIR gtest INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM "${GTEST_PREFIX}/lib") include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) @@ -1828,7 +1830,7 @@ endmacro() if(MILVUS_WITH_SNAPPY) resolve_dependency(Snappy) - + get_target_property(SNAPPY_INCLUDE_DIRS snappy INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${SNAPPY_PREFIX}/lib/) include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS}) @@ -2131,7 +2133,7 @@ endmacro() if(MILVUS_WITH_YAMLCPP) resolve_dependency(yaml-cpp) - + get_target_property(YAMLCPP_INCLUDE_DIR yaml-cpp INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${YAMLCPP_PREFIX}/lib/) include_directories(SYSTEM ${YAMLCPP_INCLUDE_DIR}) @@ -2203,7 +2205,7 @@ endmacro() if(MILVUS_WITH_ZLIB) resolve_dependency(ZLIB) - + get_target_property(ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) endif() @@ -2301,7 +2303,7 @@ endmacro() if(MILVUS_WITH_ZSTD) resolve_dependency(ZSTD) - + get_target_property(ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES) link_directories(SYSTEM ${ZSTD_PREFIX}/lib) include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) @@ -2406,7 +2408,7 @@ endmacro() if(MILVUS_WITH_AWS) resolve_dependency(AWS) - + link_directories(SYSTEM ${AWS_PREFIX}/lib) get_target_property(AWS_CPP_SDK_S3_INCLUDE_DIR aws-cpp-sdk-s3 INTERFACE_INCLUDE_DIRECTORIES) diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index c80e981bcd..037e55a0a8 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -45,3 +45,5 @@ engine_config: use_blas_threshold: 20 metric_type: L2 # compare vectors by euclidean distance(L2) or inner product(IP), optional: L2 or IP omp_thread_num: 0 # how many compute threads be used by engine, 0 means use all cpu core to compute + use_hybrid_index: false # use GPU/CPU hybrid index + hybrid_index_gpu: 0 # hybrid index gpu device id diff --git a/cpp/src/config/YamlConfigMgr.cpp b/cpp/src/config/YamlConfigMgr.cpp index 9a34ef3e63..ee935bf32c 100644 --- a/cpp/src/config/YamlConfigMgr.cpp +++ b/cpp/src/config/YamlConfigMgr.cpp @@ -73,19 +73,19 @@ YamlConfigMgr::SetChildConfig(const YAML::Node& node, return false; } -bool -YamlConfigMgr::SetSequence(const YAML::Node &node, - const std::string &child_name, - ConfigNode &config) { - if(node[child_name].IsDefined ()) { - size_t cnt = node[child_name].size(); - for(size_t i = 0; i < cnt; i++){ - config.AddSequenceItem(child_name, node[child_name][i].as()); - } - return true; - } - return false; -} +//bool +//YamlConfigMgr::SetSequence(const YAML::Node &node, +// const std::string &child_name, +// ConfigNode &config) { +// if(node[child_name].IsDefined ()) { +// size_t cnt = node[child_name].size(); +// for(size_t i = 0; i < cnt; i++){ +// config.AddSequenceItem(child_name, node[child_name][i].as()); +// } +// return true; +// } +// return false; +//} void YamlConfigMgr::LoadConfigNode(const YAML::Node& node, ConfigNode& config) { @@ -98,8 +98,8 @@ YamlConfigMgr::LoadConfigNode(const YAML::Node& node, ConfigNode& config) { SetConfigValue(node, key, config); } else if(node[key].IsMap()){ SetChildConfig(node, key, config); - } else if(node[key].IsSequence()){ - SetSequence(node, key, config); +// } else if(node[key].IsSequence()){ +// SetSequence(node, key, config); } } } diff --git a/cpp/src/config/YamlConfigMgr.h b/cpp/src/config/YamlConfigMgr.h index b8828b7a8c..05b55d9da5 100644 --- a/cpp/src/config/YamlConfigMgr.h +++ b/cpp/src/config/YamlConfigMgr.h @@ -33,10 +33,10 @@ class YamlConfigMgr : public IConfigMgr { const std::string &name, ConfigNode &config); - bool - SetSequence(const YAML::Node &node, - const std::string &child_name, - ConfigNode &config); +// bool +// SetSequence(const YAML::Node &node, +// const std::string &child_name, +// ConfigNode &config); void LoadConfigNode(const YAML::Node& node, ConfigNode& config); diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index a649257a0b..bccc6bbf92 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -60,6 +60,7 @@ void CollectQueryMetrics(double total_time, size_t nq) { server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); } +#if 0 void CollectFileMetrics(int file_type, size_t file_size, double total_time) { switch(file_type) { case meta::TableFileSchema::RAW: @@ -79,6 +80,7 @@ void CollectFileMetrics(int file_type, size_t file_size, double total_time) { } } } +#endif } @@ -205,7 +207,7 @@ Status DBImpl::Query(const std::string &table_id, uint64_t k, uint64_t nq, uint6 Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq, uint64_t nprobe, const float* vectors, const meta::DatesT& dates, QueryResults& results) { - ENGINE_LOG_DEBUG << "Query by vectors"; + ENGINE_LOG_DEBUG << "Query by vectors " << table_id; //get all table files from table meta::DatePartionedTableFilesSchema files; @@ -568,7 +570,7 @@ Status DBImpl::BuildIndex(const std::string& table_id) { int times = 1; while (has) { - ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; + ENGINE_LOG_DEBUG << "Non index files detected in " << table_id << "! Will build index " << times; meta_ptr_->UpdateTableFilesToIndex(table_id); /* StartBuildIndexTask(true); */ std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10*1000, times*100))); diff --git a/cpp/src/db/Factories.cpp b/cpp/src/db/Factories.cpp index bb1056e3c2..58883d5c7b 100644 --- a/cpp/src/db/Factories.cpp +++ b/cpp/src/db/Factories.cpp @@ -90,11 +90,11 @@ std::shared_ptr DBMetaImplFactory::Build(const DBMetaOptions& metaOp } } -std::shared_ptr DBFactory::Build() { - auto options = OptionsFactory::Build(); - auto db = DBFactory::Build(options); - return std::shared_ptr(db); -} +//std::shared_ptr DBFactory::Build() { +// auto options = OptionsFactory::Build(); +// auto db = DBFactory::Build(options); +// return std::shared_ptr(db); +//} DB* DBFactory::Build(const Options& options) { return new DBImpl(options); diff --git a/cpp/src/db/Factories.h b/cpp/src/db/Factories.h index 0e6823c385..3c3479e512 100644 --- a/cpp/src/db/Factories.h +++ b/cpp/src/db/Factories.h @@ -33,7 +33,7 @@ struct DBMetaImplFactory { }; struct DBFactory { - static std::shared_ptr Build(); + //static std::shared_ptr Build(); static DB *Build(const Options &); }; diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp index 5a0d3cafa2..1a1355d507 100644 --- a/cpp/src/db/Utils.cpp +++ b/cpp/src/db/Utils.cpp @@ -85,16 +85,20 @@ Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id return Status::OK(); } -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id) { - std::string db_path = options.path; - std::string table_path = db_path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force) { + std::vector paths = options.slave_paths; + paths.push_back(options.path); - for(auto& path : options.slave_paths) { - table_path = path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + for(auto& path : paths) { + std::string table_path = path + TABLES_FOLDER + table_id; + if(force) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } else if(boost::filesystem::exists(table_path) && + boost::filesystem::is_empty(table_path)) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } } return Status::OK(); diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h index 47a8fca9b7..101d849ca3 100644 --- a/cpp/src/db/Utils.h +++ b/cpp/src/db/Utils.h @@ -19,7 +19,7 @@ namespace utils { long GetMicroSecTimeStamp(); Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id); -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id); +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force = true); Status CreateTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); Status GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); diff --git a/cpp/src/db/meta/MySQLConnectionPool.cpp b/cpp/src/db/meta/MySQLConnectionPool.cpp index b43126920e..8e82dc5ae7 100644 --- a/cpp/src/db/meta/MySQLConnectionPool.cpp +++ b/cpp/src/db/meta/MySQLConnectionPool.cpp @@ -30,13 +30,13 @@ namespace meta { } } - int MySQLConnectionPool::getConnectionsInUse() { - return conns_in_use_; - } - - void MySQLConnectionPool::set_max_idle_time(int max_idle) { - max_idle_time_ = max_idle; - } +// int MySQLConnectionPool::getConnectionsInUse() { +// return conns_in_use_; +// } +// +// void MySQLConnectionPool::set_max_idle_time(int max_idle) { +// max_idle_time_ = max_idle; +// } std::string MySQLConnectionPool::getDB() { return db_; diff --git a/cpp/src/db/meta/MySQLConnectionPool.h b/cpp/src/db/meta/MySQLConnectionPool.h index 62afd2ddbf..9cde818b45 100644 --- a/cpp/src/db/meta/MySQLConnectionPool.h +++ b/cpp/src/db/meta/MySQLConnectionPool.h @@ -44,9 +44,9 @@ public: // Other half of in-use conn count limit void release(const mysqlpp::Connection *pc) override; - int getConnectionsInUse(); - - void set_max_idle_time(int max_idle); +// int getConnectionsInUse(); +// +// void set_max_idle_time(int max_idle); std::string getDB(); diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index fa2697ec32..e38997b22c 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -1652,15 +1652,14 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { } Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { - - auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { @@ -1700,6 +1699,8 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.id_ << " location:" << table_file.location_; idsToDelete.emplace_back(std::to_string(table_file.id_)); + + table_ids.insert(table_file.table_id_); } if (!idsToDelete.empty()) { @@ -1734,12 +1735,11 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + //remove to_delete tables try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { @@ -1765,7 +1765,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { std::string table_id; resRow["table_id"].to_string(table_id); - utils::DeleteTablePath(options_, table_id); + utils::DeleteTablePath(options_, table_id, false);//only delete empty folder idsToDeleteSS << "id = " << std::to_string(id) << " OR "; } @@ -1794,6 +1794,41 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + try { + MetricCollector metric; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + for(auto& table_id : table_ids) { + Query cleanUpFilesWithTTLQuery = connectionPtr->query(); + cleanUpFilesWithTTLQuery << "SELECT file_id " << + "FROM TableFiles " << + "WHERE table_id = " << quote << table_id << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUpFilesWithTTL: " << cleanUpFilesWithTTLQuery.str(); + + StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); + + if (res.empty()) { + utils::DeleteTablePath(options_, table_id); + } + } + } + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN CLEANING UP FILES WITH TTL" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", er.what()); + } + return Status::OK(); } diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 9118eadd17..25f5dbfaf4 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -311,6 +311,7 @@ Status SqliteMetaImpl::HasNonIndexFiles(const std::string& table_id, bool& has) has = true; int raw_count = 0, new_count = 0, new_merge_count = 0, new_index_count = 0, to_index_count = 0; + std::vector file_ids; for (auto &file : selected) { switch (std::get<1>(file)) { case (int) TableFileSchema::RAW: @@ -1069,6 +1070,9 @@ Status SqliteMetaImpl::UpdateTableFiles(TableFilesSchema &files) { Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; @@ -1098,6 +1102,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.file_id_ << " location:" << table_file.location_; ConnectorPtr->remove(table_file.id_); + table_ids.insert(table_file.table_id_); } return true; }); @@ -1111,6 +1116,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove to_delete tables try { MetricCollector metric; @@ -1123,7 +1129,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto commited = ConnectorPtr->transaction([&]() mutable { for (auto &table : tables) { - utils::DeleteTablePath(options_, std::get<1>(table)); + utils::DeleteTablePath(options_, std::get<1>(table), false);//only delete empty folder ConnectorPtr->remove(std::get<0>(table)); } @@ -1139,6 +1145,23 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove deleted table folder + //don't remove table folder until all its files has been deleted + try { + MetricCollector metric; + + for(auto& table_id : table_ids) { + auto selected = ConnectorPtr->select(columns(&TableFileSchema::file_id_), + where(c(&TableFileSchema::table_id_) == table_id)); + if(selected.size() == 0) { + utils::DeleteTablePath(options_, table_id); + } + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when delete table folder", e); + } + return Status::OK(); } diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 3236ea5cdc..49dc6f50fd 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -48,12 +48,14 @@ static const char* CONFIG_METRIC_COLLECTOR = "collector"; static const char* CONFIG_PROMETHEUS = "prometheus_config"; static const char* CONFIG_METRIC_PROMETHEUS_PORT = "port"; -static const char* CONFIG_ENGINE = "engine_config"; -static const char* CONFIG_NPROBE = "nprobe"; -static const char* CONFIG_NLIST = "nlist"; -static const char* CONFIG_DCBT = "use_blas_threshold"; -static const char* CONFIG_METRICTYPE = "metric_type"; -static const char* CONFIG_OMP_THREAD_NUM = "omp_thread_num"; +static const std::string CONFIG_ENGINE = "engine_config"; +static const std::string CONFIG_NPROBE = "nprobe"; +static const std::string CONFIG_NLIST = "nlist"; +static const std::string CONFIG_DCBT = "use_blas_threshold"; +static const std::string CONFIG_METRICTYPE = "metric_type"; +static const std::string CONFIG_OMP_THREAD_NUM = "omp_thread_num"; +static const std::string CONFIG_USE_HYBRID_INDEX = "use_hybrid_index"; +static const std::string CONFIG_HYBRID_INDEX_GPU = "hybrid_index_gpu"; class ServerConfig { public: diff --git a/cpp/src/wrapper/FaissGpuResources.cpp b/cpp/src/wrapper/FaissGpuResources.cpp new file mode 100644 index 0000000000..b4372f1a2c --- /dev/null +++ b/cpp/src/wrapper/FaissGpuResources.cpp @@ -0,0 +1,38 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + +#include "FaissGpuResources.h" +#include "map" + +namespace zilliz { +namespace milvus { +namespace engine { + +FaissGpuResources::Ptr& FaissGpuResources::GetGpuResources(int device_id) { + static std::map gpu_resources_map; + auto search = gpu_resources_map.find(device_id); + if (search != gpu_resources_map.end()) { + return gpu_resources_map[device_id]; + } else { + gpu_resources_map[device_id] = std::make_shared(); + return gpu_resources_map[device_id]; + } +} + +void FaissGpuResources::SelectGpu() { + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode server_config = config.GetConfig(CONFIG_SERVER); + gpu_num_ = server_config.GetInt32Value(server::CONFIG_GPU_INDEX, 0); +} + +int32_t FaissGpuResources::GetGpu() { + return gpu_num_; +} + +} +} +} \ No newline at end of file diff --git a/cpp/src/wrapper/FaissGpuResources.h b/cpp/src/wrapper/FaissGpuResources.h new file mode 100644 index 0000000000..45c011df85 --- /dev/null +++ b/cpp/src/wrapper/FaissGpuResources.h @@ -0,0 +1,36 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include "faiss/gpu/GpuResources.h" +#include "faiss/gpu/StandardGpuResources.h" + +#include "server/ServerConfig.h" + +namespace zilliz { +namespace milvus { +namespace engine { + +class FaissGpuResources { + + public: + using Ptr = std::shared_ptr; + + static FaissGpuResources::Ptr& GetGpuResources(int device_id); + + void SelectGpu(); + + int32_t GetGpu(); + + FaissGpuResources() : gpu_num_(0) { SelectGpu(); } + + private: + int32_t gpu_num_; +}; + +} +} +} \ No newline at end of file diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp index 4b10c1e686..6d2ca19449 100644 --- a/cpp/src/wrapper/Index.cpp +++ b/cpp/src/wrapper/Index.cpp @@ -7,16 +7,22 @@ #if 0 // TODO: maybe support static search #ifdef GPU_VERSION + #include "faiss/gpu/GpuAutoTune.h" #include "faiss/gpu/StandardGpuResources.h" #include "faiss/gpu/utils/DeviceUtils.h" + + #endif #include "Index.h" #include "faiss/index_io.h" #include "faiss/IndexIVF.h" #include "faiss/IVFlib.h" +#include "faiss/IndexScalarQuantizer.h" #include "server/ServerConfig.h" +#include "src/wrapper/FaissGpuResources.h" + namespace zilliz { namespace milvus { @@ -74,8 +80,27 @@ void write_index(const Index_ptr &index, const std::string &file_name) { Index_ptr read_index(const std::string &file_name) { std::shared_ptr raw_index = nullptr; - raw_index.reset(faiss::read_index(file_name.c_str())); - return std::make_shared(raw_index); + faiss::Index *cpu_index = faiss::read_index(file_name.c_str()); + + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode engine_config = config.GetConfig(server::CONFIG_ENGINE); + bool use_hybrid_index_ = engine_config.GetBoolValue(server::CONFIG_USE_HYBRID_INDEX, false); + + if (dynamic_cast(cpu_index) != nullptr && use_hybrid_index_) { + + int device_id = engine_config.GetInt32Value(server::CONFIG_HYBRID_INDEX_GPU, 0); + auto gpu_resources = engine::FaissGpuResources::GetGpuResources(device_id); + faiss::gpu::GpuClonerOptions clone_option; + clone_option.storeInCpu = true; + faiss::Index *gpu_index = faiss::gpu::index_cpu_to_gpu(gpu_resources.get(), device_id, cpu_index, &clone_option); + + delete cpu_index; + raw_index.reset(gpu_index); + return std::make_shared(raw_index); + } else { + raw_index.reset(cpu_index); + return std::make_shared(raw_index); + } } } diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h index 1668059d11..d722b85330 100644 --- a/cpp/src/wrapper/Index.h +++ b/cpp/src/wrapper/Index.h @@ -83,7 +83,6 @@ void write_index(const Index_ptr &index, const std::string &file_name); extern Index_ptr read_index(const std::string &file_name); #endif - } } } diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp index 095341ecc7..e2b6971281 100644 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ b/cpp/src/wrapper/IndexBuilder.cpp @@ -17,41 +17,17 @@ #include #include - +#include "faiss/IndexScalarQuantizer.h" #include "server/ServerConfig.h" #include "IndexBuilder.h" +#include "FaissGpuResources.h" namespace zilliz { namespace milvus { namespace engine { -class GpuResources { - public: - static GpuResources &GetInstance() { - static GpuResources instance; - return instance; - } - - void SelectGpu() { - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode server_config = config.GetConfig(CONFIG_SERVER); - gpu_num = server_config.GetInt32Value(server::CONFIG_GPU_INDEX, 0); - } - - int32_t GetGpu() { - return gpu_num; - } - - private: - GpuResources() : gpu_num(0) { SelectGpu(); } - - private: - int32_t gpu_num; -}; - using std::vector; static std::mutex gpu_resource; @@ -59,6 +35,12 @@ static std::mutex cpu_resource; IndexBuilder::IndexBuilder(const Operand_ptr &opd) { opd_ = opd; + + using namespace zilliz::milvus::server; + ServerConfig &config = ServerConfig::GetInstance(); + ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); + use_hybrid_index_ = engine_config.GetBoolValue(CONFIG_USE_HYBRID_INDEX, false); + hybrid_index_device_id_ = engine_config.GetInt32Value(server::CONFIG_HYBRID_INDEX_GPU, 0); } // Default: build use gpu @@ -76,14 +58,48 @@ Index_ptr IndexBuilder::build_all(const long &nb, faiss::Index *ori_index = faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type); std::lock_guard lk(gpu_resource); + +#ifdef UNITTEST_ONLY faiss::gpu::StandardGpuResources res; - auto device_index = faiss::gpu::index_cpu_to_gpu(&res, GpuResources::GetInstance().GetGpu(), ori_index); + int device_id = 0; + faiss::gpu::GpuClonerOptions clone_option; + clone_option.storeInCpu = use_hybrid_index_; + auto device_index = faiss::gpu::index_cpu_to_gpu(&res, device_id, ori_index, &clone_option); +#else + engine::FaissGpuResources res; + int device_id = res.GetGpu(); + auto gpu_resources = engine::FaissGpuResources::GetGpuResources(device_id); + faiss::gpu::GpuClonerOptions clone_option; + clone_option.storeInCpu = use_hybrid_index_; + auto device_index = faiss::gpu::index_cpu_to_gpu(gpu_resources.get(), device_id, ori_index, &clone_option); +#endif + if (!device_index->is_trained) { nt == 0 || xt == nullptr ? device_index->train(nb, xb) : device_index->train(nt, xt); } device_index->add_with_ids(nb, xb, ids); // TODO: support with add_with_IDMAP + if (dynamic_cast(ori_index) != nullptr + && use_hybrid_index_) { + std::shared_ptr device_hybrid_index = nullptr; + if (hybrid_index_device_id_ != device_id) { + auto host_hybrid_index = faiss::gpu::index_gpu_to_cpu(device_index); + auto hybrid_gpu_resources = engine::FaissGpuResources::GetGpuResources(hybrid_index_device_id_); + auto another_device_index = faiss::gpu::index_cpu_to_gpu(hybrid_gpu_resources.get(), + hybrid_index_device_id_, + host_hybrid_index, + &clone_option); + device_hybrid_index.reset(another_device_index); + delete device_index; + delete host_hybrid_index; + } else { + device_hybrid_index.reset(device_index); + } + delete ori_index; + return std::make_shared(device_hybrid_index); + } + host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index)); delete device_index; diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h index 4cb6de814b..2142df83ee 100644 --- a/cpp/src/wrapper/IndexBuilder.h +++ b/cpp/src/wrapper/IndexBuilder.h @@ -45,6 +45,8 @@ class IndexBuilder { protected: Operand_ptr opd_ = nullptr; + bool use_hybrid_index_; + int hybrid_index_device_id_; }; class BgCpuBuilder : public IndexBuilder { diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 40ca9378e4..e15f66c365 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -2,7 +2,7 @@ ARROW_VERSION=zilliz BOOST_VERSION=1.70.0 BZIP2_VERSION=1.0.6 EASYLOGGINGPP_VERSION=v9.96.7 -FAISS_VERSION=v1.5.3 +FAISS_VERSION=branch-0.1.0 MKL_VERSION=2019.4.243 GTEST_VERSION=1.8.1 JSONCONS_VERSION=0.126.0 diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8b36d2efbd..b6f052a5db 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -93,6 +93,7 @@ TEST_F(DBTest, CONFIG_TEST) { TEST_F(DBTest, DB_TEST) { + db_->Open(GetOptions(), &db_); engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); @@ -161,6 +162,11 @@ TEST_F(DBTest, DB_TEST) { } search.join(); + + uint64_t count; + stat = db_->GetTableRowCount(TABLE_NAME, count); + ASSERT_STATS(stat); + ASSERT_TRUE(count > 0); }; TEST_F(DBTest, SEARCH_TEST) { diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index e561837075..ffb688a23c 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -84,6 +84,7 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { vector_ids = source.GetVectorIds(); ASSERT_EQ(vector_ids.size(), 100); + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -198,6 +199,8 @@ TEST_F(NewMemManagerTest, MEM_TABLE_TEST) { status = mem_table.Serialize(); ASSERT_TRUE(status.ok()); + + status = impl_->DropAll(); ASSERT_TRUE(status.ok()); } @@ -372,7 +375,6 @@ TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) { delete db_; boost::filesystem::remove_all(options.meta.path); - }; TEST_F(DBTest, VECTOR_IDS_TEST) @@ -445,4 +447,5 @@ TEST_F(DBTest, VECTOR_IDS_TEST) for (auto i = 0; i < nb; i++) { ASSERT_EQ(vector_ids[i], i + nb); } -} \ No newline at end of file +} + diff --git a/cpp/unittest/db/misc_test.cpp b/cpp/unittest/db/misc_test.cpp index 6433a224ff..608a5ca175 100644 --- a/cpp/unittest/db/misc_test.cpp +++ b/cpp/unittest/db/misc_test.cpp @@ -135,4 +135,8 @@ TEST(DBMiscTest, UTILS_TEST) { status = engine::utils::DeleteTablePath(options, TABLE_NAME); ASSERT_TRUE(status.ok()); + status = engine::utils::DeleteTableFilePath(options, file); + ASSERT_TRUE(status.ok()); + + } \ No newline at end of file diff --git a/cpp/unittest/db/mysql_meta_test.cpp b/cpp/unittest/db/mysql_meta_test.cpp index 80a9ddf4dd..7703ce15d5 100644 --- a/cpp/unittest/db/mysql_meta_test.cpp +++ b/cpp/unittest/db/mysql_meta_test.cpp @@ -57,7 +57,7 @@ TEST_F(DISABLED_MySQLTest, TABLE_TEST) { table.table_id_ = ""; status = impl.CreateTable(table); - ASSERT_TRUE(status.ok()); +// ASSERT_TRUE(status.ok()); status = impl.DropAll(); ASSERT_TRUE(status.ok()); @@ -82,16 +82,22 @@ TEST_F(DISABLED_MySQLTest, TABLE_FILE_TEST) { table.dimension_ = 256; auto status = impl.CreateTable(table); + meta::TableFileSchema table_file; table_file.table_id_ = table.table_id_; status = impl.CreateTableFile(table_file); ASSERT_TRUE(status.ok()); ASSERT_EQ(table_file.file_type_, meta::TableFileSchema::NEW); + meta::DatesT dates; + dates.push_back(meta::Meta::GetDate()); + status = impl.DropPartitionsByDates(table_file.table_id_, dates); + ASSERT_FALSE(status.ok()); + uint64_t cnt = 0; status = impl.Count(table_id, cnt); - ASSERT_TRUE(status.ok()); - ASSERT_EQ(cnt, 0UL); +// ASSERT_TRUE(status.ok()); +// ASSERT_EQ(cnt, 0UL); auto file_id = table_file.file_id_; @@ -102,11 +108,6 @@ TEST_F(DISABLED_MySQLTest, TABLE_FILE_TEST) { ASSERT_TRUE(status.ok()); ASSERT_EQ(table_file.file_type_, new_file_type); - meta::DatesT dates; - dates.push_back(meta::Meta::GetDate()); - status = impl.DropPartitionsByDates(table_file.table_id_, dates); - ASSERT_FALSE(status.ok()); - dates.clear(); for (auto i=2; i < 10; ++i) { dates.push_back(meta::Meta::GetDateWithDelta(-1*i)); @@ -132,6 +133,8 @@ TEST_F(DISABLED_MySQLTest, TABLE_FILE_TEST) { ASSERT_EQ(files.size(), 1UL); ASSERT_TRUE(files[0].file_type_ == meta::TableFileSchema::TO_DELETE); +// status = impl.NextTableId(table_id); + status = impl.DropAll(); ASSERT_TRUE(status.ok()); } @@ -194,6 +197,13 @@ TEST_F(DISABLED_MySQLTest, ARCHIVE_TEST_DAYS) { i++; } + bool has; + status = impl.HasNonIndexFiles(table_id, has); + ASSERT_TRUE(status.ok()); + + status = impl.UpdateTableFilesToIndex(table_id); + ASSERT_TRUE(status.ok()); + status = impl.DropAll(); ASSERT_TRUE(status.ok()); } @@ -216,6 +226,10 @@ TEST_F(DISABLED_MySQLTest, ARCHIVE_TEST_DISK) { table.table_id_ = table_id; auto status = impl.CreateTable(table); + meta::TableSchema table_schema; + table_schema.table_id_ = ""; + status = impl.CreateTable(table_schema); + meta::TableFilesSchema files; meta::TableFileSchema table_file; table_file.table_id_ = table.table_id_; diff --git a/cpp/unittest/db/scheduler_test.cpp b/cpp/unittest/db/scheduler_test.cpp index 0937ef197a..6b3ad3dbac 100644 --- a/cpp/unittest/db/scheduler_test.cpp +++ b/cpp/unittest/db/scheduler_test.cpp @@ -56,6 +56,8 @@ TEST(DBSchedulerTest, TASK_QUEUE_TEST) { ptr = queue.Back(); ASSERT_EQ(ptr->type(), engine::ScheduleTaskType::kIndexLoad); + load_task->Execute(); + } TEST(DBSchedulerTest, SEARCH_SCHEDULER_TEST) { diff --git a/cpp/unittest/db/search_test.cpp b/cpp/unittest/db/search_test.cpp index 340fa82f20..64e03f65ae 100644 --- a/cpp/unittest/db/search_test.cpp +++ b/cpp/unittest/db/search_test.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include "db/scheduler/task/SearchTask.h" +#include "server/ServerConfig.h" #include "utils/TimeRecorder.h" #include @@ -213,6 +214,10 @@ TEST(DBSearchTest, MERGE_TEST) { } TEST(DBSearchTest, PARALLEL_CLUSTER_TEST) { + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& db_config = config.GetConfig(server::CONFIG_DB); + db_config.SetValue(server::CONFIG_DB_PARALLEL_REDUCE, "true"); + bool ascending = true; std::vector target_ids; std::vector target_distence; @@ -245,6 +250,10 @@ TEST(DBSearchTest, PARALLEL_CLUSTER_TEST) { } TEST(DBSearchTest, PARALLEL_TOPK_TEST) { + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& db_config = config.GetConfig(server::CONFIG_DB); + db_config.SetValue(server::CONFIG_DB_PARALLEL_REDUCE, "true"); + std::vector target_ids; std::vector target_distence; engine::SearchContext::ResultSet src_result; diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 405b48a602..cfac3ea6e7 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -91,9 +91,10 @@ zilliz::milvus::engine::DBMetaOptions DISABLED_MySQLTest::getDBMetaOptions() { zilliz::milvus::engine::DBMetaOptions options; options.path = "/tmp/milvus_test"; options.backend_uri = DBTestEnvironment::getURI(); - + if(options.backend_uri.empty()) { - throw std::exception(); +// throw std::exception(); + options.backend_uri = "mysql://root:Fantast1c@192.168.1.194:3306/"; } return options; @@ -123,6 +124,10 @@ int main(int argc, char **argv) { if (argc > 1) { uri = argv[1]; } + +// if(uri.empty()) { +// uri = "mysql://root:Fantast1c@192.168.1.194:3306/"; +// } // std::cout << uri << std::endl; ::testing::AddGlobalTestEnvironment(new DBTestEnvironment); return RUN_ALL_TESTS(); diff --git a/cpp/unittest/faiss_wrapper/CMakeLists.txt b/cpp/unittest/faiss_wrapper/CMakeLists.txt new file mode 100644 index 0000000000..10f353f00d --- /dev/null +++ b/cpp/unittest/faiss_wrapper/CMakeLists.txt @@ -0,0 +1,56 @@ +#------------------------------------------------------------------------------- +# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +# Unauthorized copying of this file, via any medium is strictly prohibited. +# Proprietary and confidential. +#------------------------------------------------------------------------------- +aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) +aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) + +set(util_files + ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp) + +# Make sure that your call to link_directories takes place before your call to the relevant add_executable. +include_directories(/usr/local/cuda/include) +link_directories("/usr/local/cuda/lib64") + +set(wrapper_test_src + ${unittest_srcs} + ${wrapper_src} + ${config_files} + ${util_files} + ${require_files} + wrapper_test.cpp + ) + +add_executable(wrapper_test ${wrapper_test_src}) + +set(wrapper_libs + stdc++ + boost_system_static + boost_filesystem_static + faiss + cudart + cublas + sqlite + snappy + bz2 + z + zstd + lz4 + ) +if(${BUILD_FAISS_WITH_MKL} STREQUAL "ON") + set(wrapper_libs ${wrapper_libs} ${MKL_LIBS} ${MKL_LIBS}) +else() + set(wrapper_libs ${wrapper_libs} + lapack + openblas) +endif() + +target_link_libraries(wrapper_test ${wrapper_libs} ${unittest_libs}) +add_definitions("-DUNITTEST_ONLY") + +set(topk_test_src + topk_test.cpp + ${CMAKE_SOURCE_DIR}/src/wrapper/gpu/Topk.cu) + +install(TARGETS wrapper_test DESTINATION bin) diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp new file mode 100644 index 0000000000..3500166c6b --- /dev/null +++ b/cpp/unittest/faiss_wrapper/wrapper_test.cpp @@ -0,0 +1,203 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// + + + +#include "wrapper/Operand.h" +#include "wrapper/Index.h" +#include "wrapper/IndexBuilder.h" +#include "wrapper/FaissGpuResources.h" +#include "server/ServerConfig.h" + +#include +#include +#include + +using namespace zilliz::milvus; +using namespace zilliz::milvus::engine; + + +TEST(operand_test, Wrapper_Test) { + using std::cout; + using std::endl; + + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->preproc = "OPQ"; + opd->postproc = "PQ"; + opd->metric_type = "L2"; + opd->d = 64; + + auto opd_str = operand_to_str(opd); + auto new_opd = str_to_operand(opd_str); + + // TODO: fix all place where using opd to build index. + assert(new_opd->get_index_type(10000) == opd->get_index_type(10000)); + auto opd_sq8 = std::make_shared(); + opd_sq8->index_type = "IVFSQ8"; + opd_sq8->preproc = "OPQ"; + opd_sq8->postproc = "PQ"; + opd_sq8->metric_type = "L2"; + opd_sq8->d = 64; + auto opd_str_sq8 = operand_to_str(opd_sq8); + auto new_opd_sq8 = str_to_operand(opd_str_sq8); + assert(new_opd_sq8->get_index_type(10000) == opd_sq8->get_index_type(10000)); + +} + +TEST(build_test, Wrapper_Test) { + // dimension of the vectors to index + int d = 3; + + // make a set of nt training vectors in the unit cube + size_t nt = 10000; + + // a reasonable number of cetroids to index nb vectors + int ncentroids = 16; + + std::random_device rd; + std::mt19937 gen(rd()); + + std::vector xb; + std::vector ids; + + //prepare train data + std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + std::vector xt(nt * d); + for (size_t i = 0; i < nt * d; i++) { + xt[i] = dis_xt(gen); + } + + //train the index + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->d = d; + opd->ncent = ncentroids; + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(0, xb, ids, nt, xt); + ASSERT_TRUE(index_1 != nullptr); + + // size of the database we plan to index + size_t nb = 100000; + + //prepare raw data + xb.resize(nb); + ids.resize(nb); + for (size_t i = 0; i < nb; i++) { + xb[i] = dis_xt(gen); + ids[i] = i; + } + index_1->add_with_ids(nb, xb.data(), ids.data()); + + //search in first quadrant + int nq = 1, k = 10; + std::vector xq = {0.5, 0.5, 0.5}; + float *result_dists = new float[k]; + long *result_ids = new long[k]; + index_1->search(nq, xq.data(), k, result_dists, result_ids); + + for (int i = 0; i < k; i++) { + if (result_ids[i] < 0) { + ASSERT_TRUE(false); + break; + } + + long id = result_ids[i]; + std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", " + << xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl; + + //makesure result vector is in first quadrant + ASSERT_TRUE(xb[id * 3] > 0.0); + ASSERT_TRUE(xb[id * 3 + 1] > 0.0); + ASSERT_TRUE(xb[id * 3 + 2] > 0.0); + } + + delete[] result_dists; + delete[] result_ids; +} + +TEST(gpu_build_test, Wrapper_Test) { + using std::vector; + + int d = 256; + int nb = 3 * 1000 * 100; + int nq = 100; + vector xb(d * nb); + vector xq(d * nq); + vector ids(nb); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis_xt(-1.0, 1.0); + for (auto &e : xb) { e = float(dis_xt(gen)); } + for (auto &e : xq) { e = float(dis_xt(gen)); } + for (int i = 0; i < nb; ++i) { ids[i] = i; } + + auto opd = std::make_shared(); + opd->index_type = "IVF"; + opd->d = d; + opd->ncent = 256; + + IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); + auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data()); + assert(index_1->ntotal == nb); + assert(index_1->dim == d); + + // sanity check: search 5 first vectors of xb + int k = 1; + vector I(5 * k); + vector D(5 * k); + index_1->search(5, xb.data(), k, D.data(), I.data()); + for (int i = 0; i < 5; ++i) { assert(i == I[i]); } +} + +TEST(gpu_resource_test, Wrapper_Test) { + FaissGpuResources res_mgr; + FaissGpuResources::Ptr& res = res_mgr.GetGpuResources(0); + ASSERT_NE(res, nullptr); + res = res_mgr.GetGpuResources(0); + ASSERT_NE(res, nullptr); + + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& server_config = config.GetConfig(server::CONFIG_SERVER); + server_config.SetValue(server::CONFIG_GPU_INDEX, "0"); + res_mgr.SelectGpu(); + int32_t gpu_num = res_mgr.GetGpu(); + ASSERT_EQ(gpu_num, 0); +} + +TEST(index_test, Wrapper_Test) { + std::vector data; + std::vector ids; + long vec_count = 10000; + for(long i = 0; i < vec_count; i++) { + data.push_back(i/3); + data.push_back(i/9); + ids.push_back(i); + } + + faiss::Index* faiss_index = faiss::index_factory(2, "IVF128,SQ8"); + faiss_index->train(vec_count, data.data()); + + std::shared_ptr raw_index(faiss_index); + engine::Index_ptr index = std::make_shared(raw_index); + index->add_with_ids(vec_count, data.data(), ids.data()); + + ASSERT_EQ(index->ntotal, vec_count); + + std::string file_name = "/tmp/index_test.t"; + write_index(index, file_name); + + server::ServerConfig &config = server::ServerConfig::GetInstance(); + server::ConfigNode& engine_config = config.GetConfig(server::CONFIG_ENGINE); + engine_config.SetValue(server::CONFIG_USE_HYBRID_INDEX, "true"); + + Index_ptr index_out = read_index(file_name); + ASSERT_NE(index_out, nullptr); + + bool res = index_out->reset(); + ASSERT_TRUE(res); +} diff --git a/cpp/unittest/server/config_test.cpp b/cpp/unittest/server/config_test.cpp index 2172bdd977..462b813f26 100644 --- a/cpp/unittest/server/config_test.cpp +++ b/cpp/unittest/server/config_test.cpp @@ -4,9 +4,12 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// #include +#include #include "config/IConfigMgr.h" #include "server/ServerConfig.h" +#include "utils/CommonUtil.h" +#include "utils/ValidationUtil.h" using namespace zilliz::milvus; @@ -15,6 +18,10 @@ namespace { static const char* CONFIG_FILE_PATH = "./milvus/conf/server_config.yaml"; static const char* LOG_FILE_PATH = "./milvus/conf/log_config.conf"; +static constexpr uint64_t KB = 1024; +static constexpr uint64_t MB = KB*1024; +static constexpr uint64_t GB = MB*1024; + } TEST(ConfigTest, CONFIG_TEST) { @@ -87,6 +94,9 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { server::ServerError err = config.LoadConfigFile(CONFIG_FILE_PATH); ASSERT_EQ(err, server::SERVER_SUCCESS); + err = server::ServerConfig::GetInstance().ValidateConfig(); + ASSERT_EQ(err, server::SERVER_SUCCESS); + server::ConfigNode node1 = config.GetConfig("server_config"); server::ConfigNode& node2 = config.GetConfig("cache_config"); node1.Combine(node2); @@ -100,6 +110,43 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { config.PrintAll(); - const server::ServerConfig const_config = config; - server::ConfigNode node = const_config.GetConfig("aaa"); + unsigned long total_mem = 0, free_mem = 0; + server::CommonUtil::GetSystemMemInfo(total_mem, free_mem); + + size_t gpu_mem = 0; + server::ValidationUtil::GetGpuMemory(0, gpu_mem); + + server::ConfigNode& server_config = config.GetConfig("server_config"); + server::ConfigNode& db_config = config.GetConfig("db_config"); + server::ConfigNode& cache_config = config.GetConfig(server::CONFIG_CACHE); + cache_config.SetValue(server::CACHE_FREE_PERCENT, "2.0"); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + size_t cache_cap = 16; + size_t insert_buffer_size = (total_mem - cache_cap*GB + 1*GB)/GB; + db_config.SetValue(server::CONFIG_DB_INSERT_BUFFER_SIZE, std::to_string(insert_buffer_size)); + cache_config.SetValue(server::CONFIG_CPU_CACHE_CAPACITY, std::to_string(cache_cap)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + cache_cap = total_mem/GB + 2; + cache_config.SetValue(server::CONFIG_CPU_CACHE_CAPACITY, std::to_string(cache_cap)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + size_t index_building_threshold = (gpu_mem + 1*MB)/MB; + db_config.SetValue(server::CONFIG_DB_INDEX_TRIGGER_SIZE, + std::to_string(index_building_threshold)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + insert_buffer_size = total_mem/GB + 2; + db_config.SetValue(server::CONFIG_DB_INSERT_BUFFER_SIZE, std::to_string(insert_buffer_size)); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); + + server_config.SetValue(server::CONFIG_GPU_INDEX, "9999"); + err = config.ValidateConfig(); + ASSERT_NE(err, server::SERVER_SUCCESS); } \ No newline at end of file diff --git a/cpp/unittest/server/util_test.cpp b/cpp/unittest/server/util_test.cpp index c3a47182ee..60f7875d37 100644 --- a/cpp/unittest/server/util_test.cpp +++ b/cpp/unittest/server/util_test.cpp @@ -204,3 +204,12 @@ TEST(UtilTest, VALIDATE_INDEXTYPE_TEST) { ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType((int)engine::EngineType::MAX_VALUE + 1), server::SERVER_INVALID_INDEX_TYPE); } +TEST(UtilTest, TIMERECORDER_TEST) { + for(int64_t log_level = 0; log_level <= 6; log_level++) { + if(log_level == 5) { + continue; //skip fatal + } + server::TimeRecorder rc("time", log_level); + rc.RecordSection("end"); + } +} diff --git a/cpp/unittest/utils/ValidationUtilTest.cpp b/cpp/unittest/utils/ValidationUtilTest.cpp new file mode 100644 index 0000000000..35b8b94e23 --- /dev/null +++ b/cpp/unittest/utils/ValidationUtilTest.cpp @@ -0,0 +1,76 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved +// Unauthorized copying of this file, via any medium is strictly prohibited. +// Proprietary and confidential. +//////////////////////////////////////////////////////////////////////////////// +#include + +#include "utils/ValidationUtil.h" +#include "utils/Error.h" +#include "db/ExecutionEngine.h" + +#include + +using namespace zilliz::milvus; +using namespace zilliz::milvus::server; + +TEST(ValidationUtilTest, TableNameTest) { + std::string table_name = "Normal123_"; + ServerError res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "12sds"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = ""; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "_asdasd"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_SUCCESS); + + table_name = "!@#!@"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "_!@#!@"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + table_name = "中文"; + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); + + + table_name = std::string(10000, 'a'); + res = ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, SERVER_INVALID_TABLE_NAME); +} + + +TEST(ValidationUtilTest, TableDimensionTest) { + ASSERT_EQ(ValidationUtil::ValidateTableDimension(-1), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(0), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(16385), SERVER_INVALID_VECTOR_DIMENSION); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(16384), SERVER_SUCCESS); + ASSERT_EQ(ValidationUtil::ValidateTableDimension(1), SERVER_SUCCESS); +} + +TEST(ValidationUtilTest, TableIndexTypeTest) { + ASSERT_EQ(ValidationUtil::ValidateTableIndexType((int)engine::EngineType::INVALID), SERVER_INVALID_INDEX_TYPE); + for(int i = 1; i <= (int)engine::EngineType::MAX_VALUE; i++) { + ASSERT_EQ(ValidationUtil::ValidateTableIndexType(i), SERVER_SUCCESS); + } + ASSERT_EQ(ValidationUtil::ValidateTableIndexType((int)engine::EngineType::MAX_VALUE + 1), SERVER_INVALID_INDEX_TYPE); +} + +TEST(ValidationUtilTest, ValidateGpuTest) { + ASSERT_EQ(ValidationUtil::ValidateGpuIndex(0), SERVER_SUCCESS); + ASSERT_NE(ValidationUtil::ValidateGpuIndex(100), SERVER_SUCCESS); + + size_t memory = 0; + ASSERT_EQ(ValidationUtil::GetGpuMemory(0, memory), SERVER_SUCCESS); + ASSERT_NE(ValidationUtil::GetGpuMemory(100, memory), SERVER_SUCCESS); +} From acbec2f65759ecfdcf4fe911d38c32054436bdb6 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Wed, 21 Aug 2019 19:08:31 +0800 Subject: [PATCH 07/22] fix merge bugd Former-commit-id: c3ee55050faefd76433e807842c7873c91dc6aba --- cpp/src/sdk/grpc/GrpcClient.cpp | 5 ----- cpp/unittest/db/db_tests.cpp | 1 - 2 files changed, 6 deletions(-) diff --git a/cpp/src/sdk/grpc/GrpcClient.cpp b/cpp/src/sdk/grpc/GrpcClient.cpp index e17121c102..49ce355656 100644 --- a/cpp/src/sdk/grpc/GrpcClient.cpp +++ b/cpp/src/sdk/grpc/GrpcClient.cpp @@ -288,11 +288,6 @@ GrpcClient::Disconnect() { return Status::OK(); } -Status -GrpcClient::DeleteByRange(grpc::DeleteByRangeParam &delete_by_range_param) { - return Status::OK(); -} - Status GrpcClient::DescribeIndex(grpc::TableName &table_name, grpc::IndexParam &index_param) { ClientContext context; diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index f016699bd5..2426846c15 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -432,7 +432,6 @@ TEST_F(DBTest2, DELETE_BY_RANGE_TEST) { } std::vector dates; - engine::meta::DateT date; std::string start_value = CurrentTmDate(-3); std::string end_value = CurrentTmDate(-2); ConvertTimeRangeToDBDates(start_value, end_value, dates); From 45a777b4d5c676a309c95b3bd25a7db1eecbb63a Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 22 Aug 2019 09:21:15 +0800 Subject: [PATCH 08/22] merge 0.3.1 Former-commit-id: 6fa5ce5365752369865360d715659c56fc367f89 --- cpp/CHANGELOG.md | 1 + cpp/src/db/Utils.cpp | 22 ++++++++------ cpp/src/db/Utils.h | 2 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 47 ++++++++++++++++++++++++----- cpp/src/db/meta/SqliteMetaImpl.cpp | 24 ++++++++++++++- cpp/unittest/db/db_tests.cpp | 7 +++++ cpp/unittest/db/mem_test.cpp | 39 +++++++++++++++++++++++- cpp/unittest/db/mysql_meta_test.cpp | 7 +++++ cpp/unittest/db/scheduler_test.cpp | 1 + cpp/unittest/server/config_test.cpp | 3 ++ cpp/unittest/server/util_test.cpp | 26 ++++++++++++++-- 11 files changed, 158 insertions(+), 21 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index a7ddee104a..de878a8644 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -61,6 +61,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-257 - Update bzip2 download url - MS-288 - Update compile scripts - MS-330 - Stability test failed caused by server core dumped +- MS-347 - Build index hangs again ## Improvement - MS-156 - Add unittest for merge result functions diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp index 5a0d3cafa2..1a1355d507 100644 --- a/cpp/src/db/Utils.cpp +++ b/cpp/src/db/Utils.cpp @@ -85,16 +85,20 @@ Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id return Status::OK(); } -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id) { - std::string db_path = options.path; - std::string table_path = db_path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force) { + std::vector paths = options.slave_paths; + paths.push_back(options.path); - for(auto& path : options.slave_paths) { - table_path = path + TABLES_FOLDER + table_id; - boost::filesystem::remove_all(table_path); - ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + for(auto& path : paths) { + std::string table_path = path + TABLES_FOLDER + table_id; + if(force) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } else if(boost::filesystem::exists(table_path) && + boost::filesystem::is_empty(table_path)) { + boost::filesystem::remove_all(table_path); + ENGINE_LOG_DEBUG << "Remove table folder: " << table_path; + } } return Status::OK(); diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h index 47a8fca9b7..101d849ca3 100644 --- a/cpp/src/db/Utils.h +++ b/cpp/src/db/Utils.h @@ -19,7 +19,7 @@ namespace utils { long GetMicroSecTimeStamp(); Status CreateTablePath(const DBMetaOptions& options, const std::string& table_id); -Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id); +Status DeleteTablePath(const DBMetaOptions& options, const std::string& table_id, bool force = true); Status CreateTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); Status GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file); diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index fa2697ec32..26f3893806 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -1652,15 +1652,14 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { } Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { - - auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { @@ -1700,6 +1699,8 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.id_ << " location:" << table_file.location_; idsToDelete.emplace_back(std::to_string(table_file.id_)); + + table_ids.insert(table_file.table_id_); } if (!idsToDelete.empty()) { @@ -1734,12 +1735,11 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + //remove to_delete tables try { MetricCollector metric; { - - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { @@ -1765,7 +1765,7 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { std::string table_id; resRow["table_id"].to_string(table_id); - utils::DeleteTablePath(options_, table_id); + utils::DeleteTablePath(options_, table_id, false);//only delete empty folder idsToDeleteSS << "id = " << std::to_string(id) << " OR "; } @@ -1794,6 +1794,39 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", er.what()); } + //remove deleted table folder + //don't remove table folder until all its files has been deleted + try { + MetricCollector metric; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + for(auto& table_id : table_ids) { + Query cleanUpFilesWithTTLQuery = connectionPtr->query(); + cleanUpFilesWithTTLQuery << "SELECT file_id " << + "FROM TableFiles " << + "WHERE table_id = " << table_id << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::CleanUpFilesWithTTL: " << cleanUpFilesWithTTLQuery.str(); + + StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); + + if (res.empty()) { + utils::DeleteTablePath(options_, table_id); + } + } + } + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN CLEANING UP TABLES WITH TTL", er.what()); + } + return Status::OK(); } diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 9118eadd17..081c75693e 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -1069,6 +1069,9 @@ Status SqliteMetaImpl::UpdateTableFiles(TableFilesSchema &files) { Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto now = utils::GetMicroSecTimeStamp(); + std::set table_ids; + + //remove to_delete files try { MetricCollector metric; @@ -1098,6 +1101,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ENGINE_LOG_DEBUG << "Removing file id:" << table_file.file_id_ << " location:" << table_file.location_; ConnectorPtr->remove(table_file.id_); + table_ids.insert(table_file.table_id_); } return true; }); @@ -1111,6 +1115,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove to_delete tables try { MetricCollector metric; @@ -1123,7 +1128,7 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { auto commited = ConnectorPtr->transaction([&]() mutable { for (auto &table : tables) { - utils::DeleteTablePath(options_, std::get<1>(table)); + utils::DeleteTablePath(options_, std::get<1>(table), false);//only delete empty folder ConnectorPtr->remove(std::get<0>(table)); } @@ -1139,6 +1144,23 @@ Status SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return HandleException("Encounter exception when clean table files", e); } + //remove deleted table folder + //don't remove table folder until all its files has been deleted + try { + MetricCollector metric; + + for(auto& table_id : table_ids) { + auto selected = ConnectorPtr->select(columns(&TableFileSchema::file_id_), + where(c(&TableFileSchema::table_id_) == table_id)); + if(selected.size() == 0) { + utils::DeleteTablePath(options_, table_id); + } + } + + } catch (std::exception &e) { + return HandleException("Encounter exception when delete table folder", e); + } + return Status::OK(); } diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8b36d2efbd..9a32d519b8 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -93,6 +93,8 @@ TEST_F(DBTest, CONFIG_TEST) { TEST_F(DBTest, DB_TEST) { + db_->Open(GetOptions(), &db_); + engine::meta::TableSchema table_info = BuildTableSchema(); engine::Status stat = db_->CreateTable(table_info); @@ -161,6 +163,11 @@ TEST_F(DBTest, DB_TEST) { } search.join(); + + uint64_t count; + stat = db_->GetTableRowCount(TABLE_NAME, count); + ASSERT_STATS(stat); + ASSERT_TRUE(count > 0); }; TEST_F(DBTest, SEARCH_TEST) { diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index e561837075..cf84d74526 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -445,4 +445,41 @@ TEST_F(DBTest, VECTOR_IDS_TEST) for (auto i = 0; i < nb; i++) { ASSERT_EQ(vector_ids[i], i + nb); } -} \ No newline at end of file +} + +TEST_F(NewMemManagerTest, MEMMANAGER_TEST) { + int setenv_res = setenv("MILVUS_USE_OLD_MEM_MANAGER", "ON", 1); + ASSERT_TRUE(setenv_res == 0); + + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/milvus_test"; + options.meta.backend_uri = "sqlite://:@:/"; + auto db_ = engine::DBFactory::Build(options); + + engine::meta::TableSchema table_info = BuildTableSchema(); + engine::Status stat = db_->CreateTable(table_info); + + engine::meta::TableSchema table_info_get; + table_info_get.table_id_ = TABLE_NAME; + stat = db_->DescribeTable(table_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(table_info_get.dimension_, TABLE_DIM); + + auto start_time = METRICS_NOW_TIME; + + int insert_loop = 20; + for (int i = 0; i < insert_loop; ++i) { + int64_t nb = 40960; + std::vector xb; + BuildVectors(nb, xb); + engine::IDNumbers vector_ids; + engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids); + ASSERT_TRUE(status.ok()); + } + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time; + + delete db_; + boost::filesystem::remove_all(options.meta.path); +} diff --git a/cpp/unittest/db/mysql_meta_test.cpp b/cpp/unittest/db/mysql_meta_test.cpp index 80a9ddf4dd..07e0eada84 100644 --- a/cpp/unittest/db/mysql_meta_test.cpp +++ b/cpp/unittest/db/mysql_meta_test.cpp @@ -194,6 +194,13 @@ TEST_F(DISABLED_MySQLTest, ARCHIVE_TEST_DAYS) { i++; } + bool has; + status = impl.HasNonIndexFiles(table_id, has); + ASSERT_TRUE(status.ok()); + + status = impl.UpdateTableFilesToIndex(table_id); + ASSERT_TRUE(status.ok()); + status = impl.DropAll(); ASSERT_TRUE(status.ok()); } diff --git a/cpp/unittest/db/scheduler_test.cpp b/cpp/unittest/db/scheduler_test.cpp index 0937ef197a..4826fa4b16 100644 --- a/cpp/unittest/db/scheduler_test.cpp +++ b/cpp/unittest/db/scheduler_test.cpp @@ -56,6 +56,7 @@ TEST(DBSchedulerTest, TASK_QUEUE_TEST) { ptr = queue.Back(); ASSERT_EQ(ptr->type(), engine::ScheduleTaskType::kIndexLoad); + load_task->Execute(); } TEST(DBSchedulerTest, SEARCH_SCHEDULER_TEST) { diff --git a/cpp/unittest/server/config_test.cpp b/cpp/unittest/server/config_test.cpp index 2172bdd977..51e3feac3a 100644 --- a/cpp/unittest/server/config_test.cpp +++ b/cpp/unittest/server/config_test.cpp @@ -87,6 +87,9 @@ TEST(ConfigTest, SERVER_CONFIG_TEST) { server::ServerError err = config.LoadConfigFile(CONFIG_FILE_PATH); ASSERT_EQ(err, server::SERVER_SUCCESS); + err = server::ServerConfig::GetInstance().ValidateConfig(); + ASSERT_EQ(err, server::SERVER_SUCCESS); + server::ConfigNode node1 = config.GetConfig("server_config"); server::ConfigNode& node2 = config.GetConfig("cache_config"); node1.Combine(node2); diff --git a/cpp/unittest/server/util_test.cpp b/cpp/unittest/server/util_test.cpp index c3a47182ee..b75673e51c 100644 --- a/cpp/unittest/server/util_test.cpp +++ b/cpp/unittest/server/util_test.cpp @@ -178,12 +178,15 @@ TEST(UtilTest, VALIDATE_TABLENAME_TEST) { res = server::ValidationUtil::ValidateTableName(table_name); ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); + table_name = "_!@#!@"; + res = server::ValidationUtil::ValidateTableName(table_name); + ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); + table_name = "中文"; res = server::ValidationUtil::ValidateTableName(table_name); ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); - - table_name = std::string('a', 32768); + table_name = std::string(10000, 'a'); res = server::ValidationUtil::ValidateTableName(table_name); ASSERT_EQ(res, server::SERVER_INVALID_TABLE_NAME); } @@ -204,3 +207,22 @@ TEST(UtilTest, VALIDATE_INDEXTYPE_TEST) { ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType((int)engine::EngineType::MAX_VALUE + 1), server::SERVER_INVALID_INDEX_TYPE); } +TEST(ValidationUtilTest, ValidateGpuTest) { + ASSERT_EQ(server::ValidationUtil::ValidateGpuIndex(0), server::SERVER_SUCCESS); + ASSERT_NE(server::ValidationUtil::ValidateGpuIndex(100), server::SERVER_SUCCESS); + + size_t memory = 0; + ASSERT_EQ(server::ValidationUtil::GetGpuMemory(0, memory), server::SERVER_SUCCESS); + ASSERT_NE(server::ValidationUtil::GetGpuMemory(100, memory), server::SERVER_SUCCESS); +} + +TEST(UtilTest, TIMERECORDER_TEST) { + for(int64_t log_level = 0; log_level <= 6; log_level++) { + if(log_level == 5) { + continue; //skip fatal + } + server::TimeRecorder rc("time", log_level); + rc.RecordSection("end"); + } +} + From 5b918a4028d18c2be3038b86bbf43599f22956bd Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 22 Aug 2019 11:50:18 +0800 Subject: [PATCH 09/22] MS-339 implement new api Former-commit-id: 53e4a9309807b06dc43761755146d54d5ef63cf5 --- cpp/CHANGELOG.md | 3 + cpp/src/db/Types.h | 6 +- cpp/src/db/Utils.cpp | 6 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 333 +++++++++--------- cpp/src/db/meta/SqliteMetaImpl.cpp | 12 +- cpp/src/grpc/gen-milvus/milvus.pb.cc | 50 +-- cpp/src/grpc/gen-milvus/milvus.pb.h | 22 +- cpp/src/grpc/gen-status/status.pb.cc | 10 +- cpp/src/grpc/gen-status/status.pb.h | 4 +- cpp/src/grpc/milvus.proto | 2 +- .../server/grpc_impl/GrpcRequestHandler.cpp | 1 - cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 32 +- cpp/src/utils/Error.h | 3 + cpp/src/utils/ValidationUtil.cpp | 27 ++ cpp/src/utils/ValidationUtil.h | 9 + 15 files changed, 299 insertions(+), 221 deletions(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index fdbe55a7b3..6ea83b167a 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -41,6 +41,9 @@ Please mark all change in change log and use the ticket from JIRA. ## New Feature - MS-343 - Implement ResourceMgr +- MS-338 - NewAPI: refine code to support CreateIndex +- MS-339 - NewAPI: refine code to support DropIndex +- MS-340 - NewAPI: implement DescribeIndex ## Task - MS-297 - disable mysql unit test diff --git a/cpp/src/db/Types.h b/cpp/src/db/Types.h index a2ffa606e5..7a276913e3 100644 --- a/cpp/src/db/Types.h +++ b/cpp/src/db/Types.h @@ -23,9 +23,9 @@ typedef std::vector QueryResults; struct TableIndex { int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; - int32_t nlist = 16384; - int32_t index_file_size = 1024; //MB - int32_t metric_type = (int)MetricType::L2; + int32_t nlist_ = 16384; + int32_t index_file_size_ = 1024; //MB + int32_t metric_type_ = (int)MetricType::L2; }; } // namespace engine diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp index 1a1355d507..8dd12b0cdd 100644 --- a/cpp/src/db/Utils.cpp +++ b/cpp/src/db/Utils.cpp @@ -148,9 +148,9 @@ Status DeleteTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& bool IsSameIndex(const TableIndex& index1, const TableIndex& index2) { return index1.engine_type_ == index2.engine_type_ - && index1.nlist == index2.nlist - && index1.index_file_size == index2.index_file_size - && index1.metric_type == index2.metric_type; + && index1.nlist_ == index2.nlist_ + && index1.index_file_size_ == index2.index_file_size_ + && index1.metric_type_ == index2.metric_type_; } } // namespace utils diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index 2bc7dc2268..d4dddd05bd 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -32,15 +32,6 @@ namespace meta { using namespace mysqlpp; - - -// - -// - - - - namespace { Status HandleException(const std::string &desc, std::exception &e) { @@ -91,8 +82,6 @@ MySQLMetaImpl::MySQLMetaImpl(const DBMetaOptions &options_, const int &mode) } Status MySQLMetaImpl::Initialize() { - - if (!boost::filesystem::is_directory(options_.path)) { auto ret = boost::filesystem::create_directory(options_.path); if (!ret) { @@ -160,7 +149,6 @@ Status MySQLMetaImpl::Initialize() { } Query InitializeQuery = connectionPtr->query(); - InitializeQuery << "CREATE TABLE IF NOT EXISTS Tables (" << "id BIGINT PRIMARY KEY AUTO_INCREMENT, " << "table_id VARCHAR(255) UNIQUE NOT NULL, " << @@ -197,13 +185,6 @@ Status MySQLMetaImpl::Initialize() { } } //Scoped Connection - - - - - return Status::OK(); - - } catch (const BadQuery &er) { // Handle any query errors ENGINE_LOG_ERROR << "QUERY ERROR DURING INITIALIZATION" << ": " << er.what(); @@ -219,13 +200,13 @@ Status MySQLMetaImpl::Initialize() { ENGINE_LOG_ERROR << "Wrong URI format. URI = " << uri; return Status::Error("Wrong URI format"); } + + return Status::OK(); } // PXU TODO: Temp solution. Will fix later Status MySQLMetaImpl::DropPartitionsByDates(const std::string &table_id, const DatesT &dates) { - - if (dates.empty()) { return Status::OK(); } @@ -290,12 +271,8 @@ Status MySQLMetaImpl::DropPartitionsByDates(const std::string &table_id, } Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { - - try { - MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -303,7 +280,6 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { return Status::Error("Failed to connect to database server"); } - Query createTableQuery = connectionPtr->query(); if (table_schema.table_id_.empty()) { @@ -312,7 +288,6 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { createTableQuery << "SELECT state FROM Tables " << "WHERE table_id = " << quote << table_schema.table_id_ << ";"; - ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTable: " << createTableQuery.str(); StoreQueryResult res = createTableQuery.store(); @@ -330,7 +305,6 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { table_schema.id_ = -1; table_schema.created_on_ = utils::GetMicroSecTimeStamp(); - std::string id = "NULL"; //auto-increment std::string table_id = table_schema.table_id_; std::string state = std::to_string(table_schema.state_); @@ -342,26 +316,18 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { "(" << id << ", " << quote << table_id << ", " << state << ", " << dimension << ", " << created_on << ", " << engine_type << ");"; - ENGINE_LOG_DEBUG << "MySQLMetaImpl::CreateTable: " << createTableQuery.str(); if (SimpleResult res = createTableQuery.execute()) { table_schema.id_ = res.insert_id(); //Might need to use SELECT LAST_INSERT_ID()? //Consume all results to avoid "Commands out of sync" error - - - } else { ENGINE_LOG_ERROR << "Add Table Error"; return Status::DBTransactionError("Add Table Error", createTableQuery.error()); } } //Scoped Connection - - - - return utils::CreateTablePath(options_, table_schema.table_id_); } catch (const BadQuery &er) { @@ -375,18 +341,13 @@ Status MySQLMetaImpl::CreateTable(TableSchema &table_schema) { } catch (std::exception &e) { return HandleException("Encounter exception when create table", e); } - - return Status::OK(); } Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { - has = false; try { - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -394,7 +355,6 @@ Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { return Status::Error("Failed to connect to database server"); } - Query hasNonIndexFilesQuery = connectionPtr->query(); //since table_id is a unique column we just need to check whether it exists or not hasNonIndexFilesQuery << "SELECT EXISTS " << @@ -429,22 +389,7 @@ Status MySQLMetaImpl::HasNonIndexFiles(const std::string &table_id, bool &has) { } Status MySQLMetaImpl::UpdateTableIndexParam(const std::string &table_id, const TableIndex& index) { - return Status::OK(); -} - -Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { - return Status::OK(); -} - -Status MySQLMetaImpl::DropTableIndex(const std::string &table_id) { - return Status::OK(); -} - -Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { - - try { - MetricCollector metric; { @@ -454,9 +399,174 @@ Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { return Status::Error("Failed to connect to database server"); } + Query updateTableIndexParamQuery = connectionPtr->query(); + updateTableIndexParamQuery << "SELECT id, state, dimension, created_on " << + "FROM Tables " << + "WHERE table_id = " << quote << table_id << " AND " << + "state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::UpdateTableIndexParam: " << updateTableIndexParamQuery.str(); + + StoreQueryResult res = updateTableIndexParamQuery.store(); + + if (res.num_rows() == 1) { + const Row &resRow = res[0]; + + size_t id = resRow["id"]; + int32_t state = resRow["state"]; + uint16_t dimension = resRow["dimension"]; + int64_t created_on = resRow["created_on"]; + + updateTableIndexParamQuery << "UPDATE Tables " << + "SET id = " << id << ", " << + "state = " << state << ", " << + "dimension = " << dimension << ", " << + "created_on = " << created_on << ", " << + "engine_type_ = " << index.engine_type_ << ", " << + "nlist = " << index.nlist_ << ", " << + "index_file_size = " << index.index_file_size_ << ", " << + "metric_type = " << index.metric_type_ << ", " << + "WHERE id = " << quote << table_id << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::UpdateTableIndexParam: " << updateTableIndexParamQuery.str(); + if (!updateTableIndexParamQuery.exec()) { + ENGINE_LOG_ERROR << "QUERY ERROR WHEN UPDATING TABLE INDEX PARAM"; + return Status::DBTransactionError("QUERY ERROR WHEN UPDATING TABLE INDEX PARAM", + updateTableIndexParamQuery.error()); + } + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + } //Scoped Connection + + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN UPDATING TABLE INDEX PARAM" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN UPDATING TABLE INDEX PARAM", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN UPDATING TABLE INDEX PARAM" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN UPDATING TABLE INDEX PARAM", er.what()); + } + + return Status::OK(); +} + +Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex& index) { + try { + MetricCollector metric; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + Query describeTableIndexQuery = connectionPtr->query(); + describeTableIndexQuery << "SELECT engine_type, nlist, index_file_size, metric_type " << + "FROM Tables " << + "WHERE table_id = " << quote << table_id << " AND " << + "state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::DescribeTableIndex: " << describeTableIndexQuery.str(); + + StoreQueryResult res = describeTableIndexQuery.store(); + + if (res.num_rows() == 1) { + const Row &resRow = res[0]; + + index.engine_type_ = resRow["engine_type"]; + index.nlist_ = resRow["nlist"]; + index.index_file_size_ = resRow["index_file_size"]; + index.metric_type_ = resRow["metric_type"]; + } else { + return Status::NotFound("Table " + table_id + " not found"); + } + + } //Scoped Connection + + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DESCRIBE TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN DESCRIBE TABLE INDEX", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN DESCRIBE TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN DESCRIBE TABLE INDEX", er.what()); + } + + return Status::OK(); +} + +Status MySQLMetaImpl::DropTableIndex(const std::string &table_id) { + try { + MetricCollector metric; + + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } + + Query dropTableIndexQuery = connectionPtr->query(); + + dropTableIndexQuery << "UPDATE TableFiles " << + "SET file_type = " << std::to_string(TableFileSchema::TO_DELETE) << "," << + "updated_time = " << utils::GetMicroSecTimeStamp() << " " << + "WHERE table_id = " << quote << table_id << " AND " << + "file_type = " << std::to_string(TableFileSchema::INDEX) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::DropTableIndex: " << dropTableIndexQuery.str(); + + if (!dropTableIndexQuery.exec()) { + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DROP TABLE INDEX"; + return Status::DBTransactionError("QUERY ERROR WHEN DROP TABLE INDEX", + dropTableIndexQuery.error()); + } + + dropTableIndexQuery << "UPDATE TableFiles " << + "SET file_type = " << std::to_string(TableFileSchema::RAW) << "," << + "updated_time = " << utils::GetMicroSecTimeStamp() << " " << + "WHERE table_id = " << quote << table_id << " AND " << + "file_type = " << std::to_string(TableFileSchema::BACKUP) << ";"; + + ENGINE_LOG_DEBUG << "MySQLMetaImpl::DropTableIndex: " << dropTableIndexQuery.str(); + + if (!dropTableIndexQuery.exec()) { + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DROP TABLE INDEX"; + return Status::DBTransactionError("QUERY ERROR WHEN DROP TABLE INDEX", + dropTableIndexQuery.error()); + } + + } //Scoped Connection + + } catch (const BadQuery &er) { + // Handle any query errors + ENGINE_LOG_ERROR << "QUERY ERROR WHEN DROP TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("QUERY ERROR WHEN DROP TABLE INDEX", er.what()); + } catch (const Exception &er) { + // Catch-all for any other MySQL++ exceptions + ENGINE_LOG_ERROR << "GENERAL ERROR WHEN DROP TABLE INDEX" << ": " << er.what(); + return Status::DBTransactionError("GENERAL ERROR WHEN DROP TABLE INDEX", er.what()); + } + + return Status::OK(); +} + +Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { + try { + MetricCollector metric; + { + ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); + + if (connectionPtr == nullptr) { + return Status::Error("Failed to connect to database server"); + } //soft delete table Query deleteTableQuery = connectionPtr->query(); @@ -474,7 +584,6 @@ Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { } //Scoped Connection - if (mode_ == Options::MODE::CLUSTER) { DeleteTableFiles(table_id); } @@ -495,7 +604,6 @@ Status MySQLMetaImpl::DeleteTable(const std::string &table_id) { Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { try { MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -503,10 +611,6 @@ Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { return Status::Error("Failed to connect to database server"); } - - - - //soft delete table files Query deleteTableFilesQuery = connectionPtr->query(); // @@ -537,14 +641,9 @@ Status MySQLMetaImpl::DeleteTableFiles(const std::string &table_id) { } Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { - - try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -552,7 +651,6 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { return Status::Error("Failed to connect to database server"); } - Query describeTableQuery = connectionPtr->query(); describeTableQuery << "SELECT id, dimension, files_cnt, engine_type, store_raw_data " << "FROM Tables " << @@ -590,14 +688,9 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { } Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { - - try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -605,7 +698,6 @@ Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { return Status::Error("Failed to connect to database server"); } - Query hasTableQuery = connectionPtr->query(); //since table_id is a unique column we just need to check whether it exists or not hasTableQuery << "SELECT EXISTS " << @@ -636,14 +728,9 @@ Status MySQLMetaImpl::HasTable(const std::string &table_id, bool &has_or_not) { } Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { - - try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -651,7 +738,6 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { return Status::Error("Failed to connect to database server"); } - Query allTablesQuery = connectionPtr->query(); allTablesQuery << "SELECT id, table_id, dimension, files_cnt, engine_type, store_raw_data " << "FROM Tables " << @@ -691,8 +777,6 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { } Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { - - if (file_schema.date_ == EmptyDate) { file_schema.date_ = Meta::GetDate(); } @@ -704,7 +788,6 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { } try { - MetricCollector metric; NextFileId(file_schema.file_id_); @@ -733,7 +816,6 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { return Status::Error("Failed to connect to database server"); } - Query createTableFileQuery = connectionPtr->query(); createTableFileQuery << "INSERT INTO TableFiles VALUES" << @@ -747,9 +829,6 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { file_schema.id_ = res.insert_id(); //Might need to use SELECT LAST_INSERT_ID()? //Consume all results to avoid "Commands out of sync" error - - - } else { ENGINE_LOG_ERROR << "QUERY ERROR WHEN ADDING TABLE FILE"; return Status::DBTransactionError("Add file Error", createTableFileQuery.error()); @@ -769,21 +848,14 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { } catch (std::exception &ex) { return HandleException("Encounter exception when create table file", ex); } - - return Status::OK(); } Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { - - files.clear(); try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -791,7 +863,6 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { return Status::Error("Failed to connect to database server"); } - Query filesToIndexQuery = connectionPtr->query(); filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << "FROM TableFiles " << @@ -857,16 +928,11 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, const DatesT &partition, DatePartionedTableFilesSchema &files) { - - files.clear(); try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -874,7 +940,6 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, return Status::Error("Failed to connect to database server"); } - if (partition.empty()) { Query filesToSearchQuery = connectionPtr->query(); @@ -971,16 +1036,11 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, const std::vector &ids, const DatesT &partition, DatePartionedTableFilesSchema &files) { - - files.clear(); try { - MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1080,15 +1140,11 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, DatePartionedTableFilesSchema &files) { - - files.clear(); try { MetricCollector metric; - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1096,7 +1152,6 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, return Status::Error("Failed to connect to database server"); } - Query filesToMergeQuery = connectionPtr->query(); filesToMergeQuery << "SELECT id, table_id, file_id, file_type, file_size, date " << "FROM TableFiles " << @@ -1164,8 +1219,6 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, const std::vector &ids, TableFilesSchema &table_files) { - - if (ids.empty()) { return Status::OK(); } @@ -1178,9 +1231,7 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, idStr = idStr.substr(0, idStr.size() - 4); //remove the last " OR " try { - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1250,8 +1301,6 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, // PXU TODO: Support Swap Status MySQLMetaImpl::Archive() { - - auto &criterias = options_.archive_conf.GetCriterias(); if (criterias.empty()) { return Status::OK(); @@ -1265,14 +1314,12 @@ Status MySQLMetaImpl::Archive() { long now = utils::GetMicroSecTimeStamp(); try { - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { return Status::Error("Failed to connect to database server"); } - Query archiveQuery = connectionPtr->query(); archiveQuery << "UPDATE TableFiles " << "SET file_type = " << std::to_string(TableFileSchema::TO_DELETE) << " " << @@ -1308,13 +1355,10 @@ Status MySQLMetaImpl::Archive() { } Status MySQLMetaImpl::Size(uint64_t &result) { - - result = 0; + try { - StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1333,16 +1377,10 @@ Status MySQLMetaImpl::Size(uint64_t &result) { res = getSizeQuery.store(); } //Scoped Connection - -// - - if (res.empty()) { result = 0; - } else { result = res[0]["sum"]; - } } catch (const BadQuery &er) { @@ -1359,8 +1397,6 @@ Status MySQLMetaImpl::Size(uint64_t &result) { } Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { - - if (to_discard_size <= 0) { return Status::OK(); @@ -1368,11 +1404,8 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { ENGINE_LOG_DEBUG << "About to discard size=" << to_discard_size; try { - MetricCollector metric; - bool status; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1380,7 +1413,6 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { return Status::Error("Failed to connect to database server"); } - Query discardFilesQuery = connectionPtr->query(); discardFilesQuery << "SELECT id, file_size " << "FROM TableFiles " << @@ -1390,9 +1422,7 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { ENGINE_LOG_DEBUG << "MySQLMetaImpl::DiscardFiles: " << discardFilesQuery.str(); - StoreQueryResult res = discardFilesQuery.store(); - if (res.num_rows() == 0) { return Status::OK(); } @@ -1443,13 +1473,10 @@ Status MySQLMetaImpl::DiscardFiles(long long to_discard_size) { //ZR: this function assumes all fields in file_schema have value Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { - - file_schema.updated_time_ = utils::GetMicroSecTimeStamp(); + try { - MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1457,7 +1484,6 @@ Status MySQLMetaImpl::UpdateTableFile(TableFileSchema &file_schema) { return Status::Error("Failed to connect to database server"); } - Query updateTableFileQuery = connectionPtr->query(); //if the table has been deleted, just mark the table file as TO_DELETE @@ -1563,11 +1589,8 @@ Status MySQLMetaImpl::UpdateTableFilesToIndex(const std::string &table_id) { } Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { - - try { MetricCollector metric; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1575,7 +1598,6 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { return Status::Error("Failed to connect to database server"); } - Query updateTableFilesQuery = connectionPtr->query(); std::map has_tables; @@ -1648,6 +1670,7 @@ Status MySQLMetaImpl::UpdateTableFiles(TableFilesSchema &files) { ENGINE_LOG_ERROR << "GENERAL ERROR WHEN UPDATING TABLE FILES" << ": " << er.what(); return Status::DBTransactionError("GENERAL ERROR WHEN UPDATING TABLE FILES", er.what()); } + return Status::OK(); } @@ -1666,7 +1689,6 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::Error("Failed to connect to database server"); } - Query cleanUpFilesWithTTLQuery = connectionPtr->query(); cleanUpFilesWithTTLQuery << "SELECT id, table_id, file_id, date " << "FROM TableFiles " << @@ -1746,7 +1768,6 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { return Status::Error("Failed to connect to database server"); } - Query cleanUpFilesWithTTLQuery = connectionPtr->query(); cleanUpFilesWithTTLQuery << "SELECT id, table_id " << "FROM Tables " << @@ -1756,7 +1777,6 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { StoreQueryResult res = cleanUpFilesWithTTLQuery.store(); - if (!res.empty()) { std::stringstream idsToDeleteSS; @@ -1835,8 +1855,6 @@ Status MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { } Status MySQLMetaImpl::CleanUp() { - - try { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1844,7 +1862,6 @@ Status MySQLMetaImpl::CleanUp() { return Status::Error("Failed to connect to database server"); } - Query cleanUpQuery = connectionPtr->query(); cleanUpQuery << "SELECT table_name " << "FROM information_schema.tables " << @@ -1884,8 +1901,6 @@ Status MySQLMetaImpl::CleanUp() { } Status MySQLMetaImpl::Count(const std::string &table_id, uint64_t &result) { - - try { MetricCollector metric; @@ -1898,7 +1913,6 @@ Status MySQLMetaImpl::Count(const std::string &table_id, uint64_t &result) { } StoreQueryResult res; - { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1945,24 +1959,21 @@ Status MySQLMetaImpl::Count(const std::string &table_id, uint64_t &result) { ENGINE_LOG_ERROR << "GENERAL ERROR WHEN RETRIEVING COUNT" << ": " << er.what(); return Status::DBTransactionError("GENERAL ERROR WHEN RETRIEVING COUNT", er.what()); } + return Status::OK(); } Status MySQLMetaImpl::DropAll() { - - if (boost::filesystem::is_directory(options_.path)) { boost::filesystem::remove_all(options_.path); } try { - ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); if (connectionPtr == nullptr) { return Status::Error("Failed to connect to database server"); } - Query dropTableQuery = connectionPtr->query(); dropTableQuery << "DROP TABLE IF EXISTS Tables, TableFiles;"; @@ -1983,11 +1994,11 @@ Status MySQLMetaImpl::DropAll() { ENGINE_LOG_ERROR << "GENERAL ERROR WHEN DROPPING TABLE" << ": " << er.what(); return Status::DBTransactionError("GENERAL ERROR WHEN DROPPING TABLE", er.what()); } + return Status::OK(); } MySQLMetaImpl::~MySQLMetaImpl() { - if (mode_ != Options::MODE::READ_ONLY) { CleanUp(); } diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 25f5dbfaf4..c5ef543e69 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -367,9 +367,9 @@ Status SqliteMetaImpl::UpdateTableIndexParam(const std::string &table_id, const table_schema.dimension_ = std::get<2>(tables[0]); table_schema.created_on_ = std::get<3>(tables[0]); table_schema.engine_type_ = index.engine_type_; - table_schema.nlist_ = index.nlist; - table_schema.index_file_size_ = index.index_file_size; - table_schema.metric_type_ = index.metric_type; + table_schema.nlist_ = index.nlist_; + table_schema.index_file_size_ = index.index_file_size_; + table_schema.metric_type_ = index.metric_type_; ConnectorPtr->update(table_schema); } else { @@ -407,9 +407,9 @@ Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableInde if (groups.size() == 1) { index.engine_type_ = std::get<0>(groups[0]); - index.nlist = std::get<1>(groups[0]); - index.index_file_size = std::get<2>(groups[0]); - index.metric_type = std::get<3>(groups[0]); + index.nlist_ = std::get<1>(groups[0]); + index.index_file_size_ = std::get<2>(groups[0]); + index.metric_type_ = std::get<3>(groups[0]); } else { return Status::NotFound("Table " + table_id + " not found"); } diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.cc b/cpp/src/grpc/gen-milvus/milvus.pb.cc index c8ef38d07b..25f115db18 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.cc +++ b/cpp/src/grpc/gen-milvus/milvus.pb.cc @@ -545,7 +545,7 @@ const char descriptor_table_protodef_milvus_2eproto[] PROTOBUF_SECTION_VARIABLE( "_reply\030\002 \001(\010\"M\n\rTableRowCount\022#\n\006status\030" "\001 \001(\0132\023.milvus.grpc.Status\022\027\n\017table_row_" "count\030\002 \001(\003\"\026\n\007Command\022\013\n\003cmd\030\001 \001(\t\"X\n\005I" - "ndex\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030\002 \001(\003\022" + "ndex\022\022\n\nindex_type\030\001 \001(\005\022\r\n\005nlist\030\002 \001(\005\022" "\027\n\017index_file_size\030\003 \001(\005\022\023\n\013metric_type\030" "\004 \001(\005\"[\n\nIndexParam\022*\n\ntable_name\030\001 \001(\0132" "\026.milvus.grpc.TableName\022!\n\005index\030\002 \001(\0132\022" @@ -5268,16 +5268,16 @@ Index::Index(const Index& from) : ::PROTOBUF_NAMESPACE_ID::Message(), _internal_metadata_(nullptr) { _internal_metadata_.MergeFrom(from._internal_metadata_); - ::memcpy(&nlist_, &from.nlist_, + ::memcpy(&index_type_, &from.index_type_, static_cast(reinterpret_cast(&metric_type_) - - reinterpret_cast(&nlist_)) + sizeof(metric_type_)); + reinterpret_cast(&index_type_)) + sizeof(metric_type_)); // @@protoc_insertion_point(copy_constructor:milvus.grpc.Index) } void Index::SharedCtor() { - ::memset(&nlist_, 0, static_cast( + ::memset(&index_type_, 0, static_cast( reinterpret_cast(&metric_type_) - - reinterpret_cast(&nlist_)) + sizeof(metric_type_)); + reinterpret_cast(&index_type_)) + sizeof(metric_type_)); } Index::~Index() { @@ -5303,9 +5303,9 @@ void Index::Clear() { // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; - ::memset(&nlist_, 0, static_cast( + ::memset(&index_type_, 0, static_cast( reinterpret_cast(&metric_type_) - - reinterpret_cast(&nlist_)) + sizeof(metric_type_)); + reinterpret_cast(&index_type_)) + sizeof(metric_type_)); _internal_metadata_.Clear(); } @@ -5324,7 +5324,7 @@ const char* Index::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::inte CHK_(ptr); } else goto handle_unusual; continue; - // int64 nlist = 2; + // int32 nlist = 2; case 2: if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) { nlist_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint(&ptr); @@ -5388,12 +5388,12 @@ bool Index::MergePartialFromCodedStream( break; } - // int64 nlist = 2; + // int32 nlist = 2; case 2: { if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) { DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive< - ::PROTOBUF_NAMESPACE_ID::int64, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT64>( + ::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>( input, &nlist_))); } else { goto handle_unusual; @@ -5459,9 +5459,9 @@ void Index::SerializeWithCachedSizes( ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(1, this->index_type(), output); } - // int64 nlist = 2; + // int32 nlist = 2; if (this->nlist() != 0) { - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64(2, this->nlist(), output); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->nlist(), output); } // int32 index_file_size = 3; @@ -5492,9 +5492,9 @@ void Index::SerializeWithCachedSizes( target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(1, this->index_type(), target); } - // int64 nlist = 2; + // int32 nlist = 2; if (this->nlist() != 0) { - target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt64ToArray(2, this->nlist(), target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(2, this->nlist(), target); } // int32 index_file_size = 3; @@ -5528,13 +5528,6 @@ size_t Index::ByteSizeLong() const { // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; - // int64 nlist = 2; - if (this->nlist() != 0) { - total_size += 1 + - ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int64Size( - this->nlist()); - } - // int32 index_type = 1; if (this->index_type() != 0) { total_size += 1 + @@ -5542,6 +5535,13 @@ size_t Index::ByteSizeLong() const { this->index_type()); } + // int32 nlist = 2; + if (this->nlist() != 0) { + total_size += 1 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->nlist()); + } + // int32 index_file_size = 3; if (this->index_file_size() != 0) { total_size += 1 + @@ -5583,12 +5583,12 @@ void Index::MergeFrom(const Index& from) { ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; - if (from.nlist() != 0) { - set_nlist(from.nlist()); - } if (from.index_type() != 0) { set_index_type(from.index_type()); } + if (from.nlist() != 0) { + set_nlist(from.nlist()); + } if (from.index_file_size() != 0) { set_index_file_size(from.index_file_size()); } @@ -5618,8 +5618,8 @@ bool Index::IsInitialized() const { void Index::InternalSwap(Index* other) { using std::swap; _internal_metadata_.Swap(&other->_internal_metadata_); - swap(nlist_, other->nlist_); swap(index_type_, other->index_type_); + swap(nlist_, other->nlist_); swap(index_file_size_, other->index_file_size_); swap(metric_type_, other->metric_type_); } diff --git a/cpp/src/grpc/gen-milvus/milvus.pb.h b/cpp/src/grpc/gen-milvus/milvus.pb.h index ce34b264fc..2c26bc4e4c 100644 --- a/cpp/src/grpc/gen-milvus/milvus.pb.h +++ b/cpp/src/grpc/gen-milvus/milvus.pb.h @@ -2316,21 +2316,21 @@ class Index : // accessors ------------------------------------------------------- enum : int { - kNlistFieldNumber = 2, kIndexTypeFieldNumber = 1, + kNlistFieldNumber = 2, kIndexFileSizeFieldNumber = 3, kMetricTypeFieldNumber = 4, }; - // int64 nlist = 2; - void clear_nlist(); - ::PROTOBUF_NAMESPACE_ID::int64 nlist() const; - void set_nlist(::PROTOBUF_NAMESPACE_ID::int64 value); - // int32 index_type = 1; void clear_index_type(); ::PROTOBUF_NAMESPACE_ID::int32 index_type() const; void set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value); + // int32 nlist = 2; + void clear_nlist(); + ::PROTOBUF_NAMESPACE_ID::int32 nlist() const; + void set_nlist(::PROTOBUF_NAMESPACE_ID::int32 value); + // int32 index_file_size = 3; void clear_index_file_size(); ::PROTOBUF_NAMESPACE_ID::int32 index_file_size() const; @@ -2346,8 +2346,8 @@ class Index : class _Internal; ::PROTOBUF_NAMESPACE_ID::internal::InternalMetadataWithArena _internal_metadata_; - ::PROTOBUF_NAMESPACE_ID::int64 nlist_; ::PROTOBUF_NAMESPACE_ID::int32 index_type_; + ::PROTOBUF_NAMESPACE_ID::int32 nlist_; ::PROTOBUF_NAMESPACE_ID::int32 index_file_size_; ::PROTOBUF_NAMESPACE_ID::int32 metric_type_; mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; @@ -3827,15 +3827,15 @@ inline void Index::set_index_type(::PROTOBUF_NAMESPACE_ID::int32 value) { // @@protoc_insertion_point(field_set:milvus.grpc.Index.index_type) } -// int64 nlist = 2; +// int32 nlist = 2; inline void Index::clear_nlist() { - nlist_ = PROTOBUF_LONGLONG(0); + nlist_ = 0; } -inline ::PROTOBUF_NAMESPACE_ID::int64 Index::nlist() const { +inline ::PROTOBUF_NAMESPACE_ID::int32 Index::nlist() const { // @@protoc_insertion_point(field_get:milvus.grpc.Index.nlist) return nlist_; } -inline void Index::set_nlist(::PROTOBUF_NAMESPACE_ID::int64 value) { +inline void Index::set_nlist(::PROTOBUF_NAMESPACE_ID::int32 value) { nlist_ = value; // @@protoc_insertion_point(field_set:milvus.grpc.Index.nlist) diff --git a/cpp/src/grpc/gen-status/status.pb.cc b/cpp/src/grpc/gen-status/status.pb.cc index 79c4127020..e6eea519b7 100644 --- a/cpp/src/grpc/gen-status/status.pb.cc +++ b/cpp/src/grpc/gen-status/status.pb.cc @@ -61,7 +61,7 @@ static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] = const char descriptor_table_protodef_status_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) = "\n\014status.proto\022\013milvus.grpc\"D\n\006Status\022*\n" "\nerror_code\030\001 \001(\0162\026.milvus.grpc.ErrorCod" - "e\022\016\n\006reason\030\002 \001(\t*\354\003\n\tErrorCode\022\013\n\007SUCCE" + "e\022\016\n\006reason\030\002 \001(\t*\230\004\n\tErrorCode\022\013\n\007SUCCE" "SS\020\000\022\024\n\020UNEXPECTED_ERROR\020\001\022\022\n\016CONNECT_FA" "ILED\020\002\022\025\n\021PERMISSION_DENIED\020\003\022\024\n\020TABLE_N" "OT_EXISTS\020\004\022\024\n\020ILLEGAL_ARGUMENT\020\005\022\021\n\rILL" @@ -73,7 +73,9 @@ const char descriptor_table_protodef_status_2eproto[] PROTOBUF_SECTION_VARIABLE( "TA_FAILED\020\017\022\020\n\014CACHE_FAILED\020\020\022\030\n\024CANNOT_" "CREATE_FOLDER\020\021\022\026\n\022CANNOT_CREATE_FILE\020\022\022" "\030\n\024CANNOT_DELETE_FOLDER\020\023\022\026\n\022CANNOT_DELE" - "TE_FILE\020\024\022\025\n\021BUILD_INDEX_ERROR\020\025b\006proto3" + "TE_FILE\020\024\022\025\n\021BUILD_INDEX_ERROR\020\025\022\021\n\rILLE" + "GAL_NLIST\020\026\022\027\n\023ILLEGAL_METRIC_TYPE\020\027b\006pr" + "oto3" ; static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_status_2eproto_deps[1] = { }; @@ -83,7 +85,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_sta static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_status_2eproto_once; static bool descriptor_table_status_2eproto_initialized = false; const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_status_2eproto = { - &descriptor_table_status_2eproto_initialized, descriptor_table_protodef_status_2eproto, "status.proto", 600, + &descriptor_table_status_2eproto_initialized, descriptor_table_protodef_status_2eproto, "status.proto", 644, &descriptor_table_status_2eproto_once, descriptor_table_status_2eproto_sccs, descriptor_table_status_2eproto_deps, 1, 0, schemas, file_default_instances, TableStruct_status_2eproto::offsets, file_level_metadata_status_2eproto, 1, file_level_enum_descriptors_status_2eproto, file_level_service_descriptors_status_2eproto, @@ -121,6 +123,8 @@ bool ErrorCode_IsValid(int value) { case 19: case 20: case 21: + case 22: + case 23: return true; default: return false; diff --git a/cpp/src/grpc/gen-status/status.pb.h b/cpp/src/grpc/gen-status/status.pb.h index fe200e09fb..5654256c7f 100644 --- a/cpp/src/grpc/gen-status/status.pb.h +++ b/cpp/src/grpc/gen-status/status.pb.h @@ -91,12 +91,14 @@ enum ErrorCode : int { CANNOT_DELETE_FOLDER = 19, CANNOT_DELETE_FILE = 20, BUILD_INDEX_ERROR = 21, + ILLEGAL_NLIST = 22, + ILLEGAL_METRIC_TYPE = 23, ErrorCode_INT_MIN_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::min(), ErrorCode_INT_MAX_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::max() }; bool ErrorCode_IsValid(int value); constexpr ErrorCode ErrorCode_MIN = SUCCESS; -constexpr ErrorCode ErrorCode_MAX = BUILD_INDEX_ERROR; +constexpr ErrorCode ErrorCode_MAX = ILLEGAL_METRIC_TYPE; constexpr int ErrorCode_ARRAYSIZE = ErrorCode_MAX + 1; const ::PROTOBUF_NAMESPACE_ID::EnumDescriptor* ErrorCode_descriptor(); diff --git a/cpp/src/grpc/milvus.proto b/cpp/src/grpc/milvus.proto index 47209dc5a1..f8058c1fe4 100644 --- a/cpp/src/grpc/milvus.proto +++ b/cpp/src/grpc/milvus.proto @@ -125,7 +125,7 @@ message Command { */ message Index { int32 index_type = 1; - int64 nlist = 2; + int32 nlist = 2; int32 index_file_size = 3; int32 metric_type = 4; } diff --git a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp index 437573601f..be0c6adebb 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestHandler.cpp @@ -42,7 +42,6 @@ GrpcRequestHandler::HasTable(::grpc::ServerContext *context, GrpcRequestHandler::DropTable(::grpc::ServerContext *context, const ::milvus::grpc::TableName *request, ::milvus::grpc::Status *response) { - BaseTaskPtr task_ptr = DropTableTask::Create(request->table_name()); GrpcRequestScheduler::ExecTask(task_ptr, response); return ::grpc::Status::OK; diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index db96ad6202..e10d2ae070 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -227,12 +227,32 @@ CreateIndexTask::OnExecute() { return SetError(SERVER_TABLE_NOT_EXIST, "Table " + table_name_ + " not exists"); } + res = ValidationUtil::ValidateTableIndexType(index_param_.mutable_index()->index_type()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index type: " + std::to_string(index_param_.mutable_index()->index_type())); + } + + res = ValidationUtil::ValidateTableIndexNlist(index_param_.mutable_index()->nlist()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index nlist: " + std::to_string(index_param_.mutable_index()->nlist())); + } + + res = ValidationUtil::ValidateTableIndexMetricType(index_param_.mutable_index()->metric_type()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index metric type: " + std::to_string(index_param_.mutable_index()->metric_type())); + } + + res = ValidationUtil::ValidateTableIndexFileSize(index_param_.mutable_index()->index_file_size()); + if(res != SERVER_SUCCESS) { + return SetError(res, "Invalid index file size: " + std::to_string(index_param_.mutable_index()->index_file_size())); + } + //step 2: check table existence engine::TableIndex index; index.engine_type_ = index_param_.mutable_index()->index_type(); - index.nlist = index_param_.mutable_index()->nlist(); - index.index_file_size = index_param_.mutable_index()->index_file_size(); - index.metric_type = index_param_.mutable_index()->metric_type(); + index.nlist_ = index_param_.mutable_index()->nlist(); + index.index_file_size_ = index_param_.mutable_index()->index_file_size(); + index.metric_type_ = index_param_.mutable_index()->metric_type(); stat = DBWrapper::DB()->CreateIndex(table_name_, index); if (!stat.ok()) { return SetError(SERVER_BUILD_INDEX_ERROR, "Engine failed: " + stat.ToString()); @@ -855,9 +875,9 @@ DescribeIndexTask::OnExecute() { index_param_.mutable_table_name()->set_table_name(table_name_); index_param_.mutable_index()->set_index_type(index.engine_type_); - index_param_.mutable_index()->set_nlist(index.nlist); - index_param_.mutable_index()->set_index_file_size(index.index_file_size); - index_param_.mutable_index()->set_metric_type(index.metric_type); + index_param_.mutable_index()->set_nlist(index.nlist_); + index_param_.mutable_index()->set_index_file_size(index.index_file_size_); + index_param_.mutable_index()->set_metric_type(index.metric_type_); rc.ElapseFromBegin("totally cost"); } catch (std::exception &ex) { diff --git a/cpp/src/utils/Error.h b/cpp/src/utils/Error.h index c264e60b08..e62e4d3271 100644 --- a/cpp/src/utils/Error.h +++ b/cpp/src/utils/Error.h @@ -51,6 +51,9 @@ constexpr ServerError SERVER_ILLEGAL_SEARCH_RESULT = ToGlobalServerErrorCode(110 constexpr ServerError SERVER_CACHE_ERROR = ToGlobalServerErrorCode(111); constexpr ServerError SERVER_WRITE_ERROR = ToGlobalServerErrorCode(112); constexpr ServerError SERVER_INVALID_NPROBE = ToGlobalServerErrorCode(113); +constexpr ServerError SERVER_INVALID_INDEX_NLIST = ToGlobalServerErrorCode(114); +constexpr ServerError SERVER_INVALID_INDEX_METRIC_TYPE = ToGlobalServerErrorCode(115); +constexpr ServerError SERVER_INVALID_INDEX_FILE_SIZE = ToGlobalServerErrorCode(116); constexpr ServerError SERVER_LICENSE_FILE_NOT_EXIST = ToGlobalServerErrorCode(500); diff --git a/cpp/src/utils/ValidationUtil.cpp b/cpp/src/utils/ValidationUtil.cpp index fa0cd8397d..1ac997f1ab 100644 --- a/cpp/src/utils/ValidationUtil.cpp +++ b/cpp/src/utils/ValidationUtil.cpp @@ -10,6 +10,7 @@ namespace server { constexpr size_t table_name_size_limit = 255; constexpr int64_t table_dimension_limit = 16384; +constexpr int32_t index_file_size_limit = 4096; //index trigger size max = 4096 MB ServerError ValidationUtil::ValidateTableName(const std::string &table_name) { @@ -65,6 +66,32 @@ ValidationUtil::ValidateTableIndexType(int32_t index_type) { return SERVER_SUCCESS; } +ServerError +ValidationUtil::ValidateTableIndexNlist(int32_t nlist) { + if(nlist <= 0) { + return SERVER_INVALID_INDEX_NLIST; + } + + return SERVER_SUCCESS; +} + +ServerError +ValidationUtil::ValidateTableIndexFileSize(int32_t index_file_size) { + if(index_file_size <= 0 || index_file_size > index_file_size_limit) { + return SERVER_INVALID_INDEX_FILE_SIZE; + } + + return SERVER_SUCCESS; +} + +ServerError +ValidationUtil::ValidateTableIndexMetricType(int32_t metric_type) { + if(metric_type != (int32_t)engine::MetricType::L2 && metric_type != (int32_t)engine::MetricType::IP) { + return SERVER_INVALID_INDEX_METRIC_TYPE; + } + return SERVER_SUCCESS; +} + ServerError ValidationUtil::ValidateGpuIndex(uint32_t gpu_index) { int num_devices = 0; diff --git a/cpp/src/utils/ValidationUtil.h b/cpp/src/utils/ValidationUtil.h index 1f90fac273..2c90d99dd4 100644 --- a/cpp/src/utils/ValidationUtil.h +++ b/cpp/src/utils/ValidationUtil.h @@ -17,6 +17,15 @@ public: static ServerError ValidateTableIndexType(int32_t index_type); + static ServerError + ValidateTableIndexNlist(int32_t nlist); + + static ServerError + ValidateTableIndexFileSize(int32_t index_file_size); + + static ServerError + ValidateTableIndexMetricType(int32_t metric_type); + static ServerError ValidateGpuIndex(uint32_t gpu_index); From 50a4de6d938f644bfb0d6f2ee26ba110b0099805 Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 22 Aug 2019 12:31:00 +0800 Subject: [PATCH 10/22] refine code Former-commit-id: 7283855c5965c6e9fb95cfcc3d0146fc7dcefccf --- cpp/conf/server_config.template | 2 - cpp/src/cache/CacheMgr.cpp | 4 +- cpp/src/cache/CacheMgr.h | 4 +- cpp/src/cache/DataObj.h | 12 +- cpp/src/wrapper/FaissGpuResources.cpp | 38 ------ cpp/src/wrapper/FaissGpuResources.h | 36 ------ cpp/src/wrapper/Index.cpp | 109 ----------------- cpp/src/wrapper/Index.h | 88 -------------- cpp/src/wrapper/IndexBuilder.cpp | 163 -------------------------- cpp/src/wrapper/IndexBuilder.h | 70 ----------- cpp/src/wrapper/Operand.cpp | 123 ------------------- cpp/src/wrapper/Operand.h | 46 -------- cpp/unittest/server/cache_test.cpp | 11 +- 13 files changed, 15 insertions(+), 691 deletions(-) delete mode 100644 cpp/src/wrapper/FaissGpuResources.cpp delete mode 100644 cpp/src/wrapper/FaissGpuResources.h delete mode 100644 cpp/src/wrapper/Index.cpp delete mode 100644 cpp/src/wrapper/Index.h delete mode 100644 cpp/src/wrapper/IndexBuilder.cpp delete mode 100644 cpp/src/wrapper/IndexBuilder.h delete mode 100644 cpp/src/wrapper/Operand.cpp delete mode 100644 cpp/src/wrapper/Operand.h diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index 037e55a0a8..c80e981bcd 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -45,5 +45,3 @@ engine_config: use_blas_threshold: 20 metric_type: L2 # compare vectors by euclidean distance(L2) or inner product(IP), optional: L2 or IP omp_thread_num: 0 # how many compute threads be used by engine, 0 means use all cpu core to compute - use_hybrid_index: false # use GPU/CPU hybrid index - hybrid_index_gpu: 0 # hybrid index gpu device id diff --git a/cpp/src/cache/CacheMgr.cpp b/cpp/src/cache/CacheMgr.cpp index 5e54c9abe1..977c7e1c42 100644 --- a/cpp/src/cache/CacheMgr.cpp +++ b/cpp/src/cache/CacheMgr.cpp @@ -46,7 +46,7 @@ DataObjPtr CacheMgr::GetItem(const std::string& key) { return cache_->get(key); } -engine::Index_ptr CacheMgr::GetIndex(const std::string& key) { +engine::VecIndexPtr CacheMgr::GetIndex(const std::string& key) { DataObjPtr obj = GetItem(key); if(obj != nullptr) { return obj->data(); @@ -65,7 +65,7 @@ void CacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) { server::Metrics::GetInstance().CacheAccessTotalIncrement(); } -void CacheMgr::InsertItem(const std::string& key, const engine::Index_ptr& index) { +void CacheMgr::InsertItem(const std::string& key, const engine::VecIndexPtr& index) { if(cache_ == nullptr) { SERVER_LOG_ERROR << "Cache doesn't exist"; return; diff --git a/cpp/src/cache/CacheMgr.h b/cpp/src/cache/CacheMgr.h index 003c883be7..b6f1ec8ef1 100644 --- a/cpp/src/cache/CacheMgr.h +++ b/cpp/src/cache/CacheMgr.h @@ -19,10 +19,10 @@ public: virtual bool ItemExists(const std::string& key); virtual DataObjPtr GetItem(const std::string& key); - virtual engine::Index_ptr GetIndex(const std::string& key); + virtual engine::VecIndexPtr GetIndex(const std::string& key); virtual void InsertItem(const std::string& key, const DataObjPtr& data); - virtual void InsertItem(const std::string& key, const engine::Index_ptr& index); + virtual void InsertItem(const std::string& key, const engine::VecIndexPtr& index); virtual void EraseItem(const std::string& key); diff --git a/cpp/src/cache/DataObj.h b/cpp/src/cache/DataObj.h index 341df34174..d9c14f4d1b 100644 --- a/cpp/src/cache/DataObj.h +++ b/cpp/src/cache/DataObj.h @@ -6,7 +6,7 @@ #pragma once -#include "wrapper/Index.h" +#include "wrapper/knowhere/vec_index.h" #include @@ -16,17 +16,17 @@ namespace cache { class DataObj { public: - DataObj(const engine::Index_ptr& index) + DataObj(const engine::VecIndexPtr& index) : index_(index) {} - DataObj(const engine::Index_ptr& index, int64_t size) + DataObj(const engine::VecIndexPtr& index, int64_t size) : index_(index), size_(size) {} - engine::Index_ptr data() { return index_; } - const engine::Index_ptr& data() const { return index_; } + engine::VecIndexPtr data() { return index_; } + const engine::VecIndexPtr& data() const { return index_; } int64_t size() const { if(index_ == nullptr) { @@ -41,7 +41,7 @@ public: } private: - engine::Index_ptr index_ = nullptr; + engine::VecIndexPtr index_ = nullptr; int64_t size_ = 0; }; diff --git a/cpp/src/wrapper/FaissGpuResources.cpp b/cpp/src/wrapper/FaissGpuResources.cpp deleted file mode 100644 index b4372f1a2c..0000000000 --- a/cpp/src/wrapper/FaissGpuResources.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#include "FaissGpuResources.h" -#include "map" - -namespace zilliz { -namespace milvus { -namespace engine { - -FaissGpuResources::Ptr& FaissGpuResources::GetGpuResources(int device_id) { - static std::map gpu_resources_map; - auto search = gpu_resources_map.find(device_id); - if (search != gpu_resources_map.end()) { - return gpu_resources_map[device_id]; - } else { - gpu_resources_map[device_id] = std::make_shared(); - return gpu_resources_map[device_id]; - } -} - -void FaissGpuResources::SelectGpu() { - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode server_config = config.GetConfig(CONFIG_SERVER); - gpu_num_ = server_config.GetInt32Value(server::CONFIG_GPU_INDEX, 0); -} - -int32_t FaissGpuResources::GetGpu() { - return gpu_num_; -} - -} -} -} \ No newline at end of file diff --git a/cpp/src/wrapper/FaissGpuResources.h b/cpp/src/wrapper/FaissGpuResources.h deleted file mode 100644 index 45c011df85..0000000000 --- a/cpp/src/wrapper/FaissGpuResources.h +++ /dev/null @@ -1,36 +0,0 @@ -/******************************************************************************* - * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved - * Unauthorized copying of this file, via any medium is strictly prohibited. - * Proprietary and confidential. - ******************************************************************************/ -#pragma once - -#include "faiss/gpu/GpuResources.h" -#include "faiss/gpu/StandardGpuResources.h" - -#include "server/ServerConfig.h" - -namespace zilliz { -namespace milvus { -namespace engine { - -class FaissGpuResources { - - public: - using Ptr = std::shared_ptr; - - static FaissGpuResources::Ptr& GetGpuResources(int device_id); - - void SelectGpu(); - - int32_t GetGpu(); - - FaissGpuResources() : gpu_num_(0) { SelectGpu(); } - - private: - int32_t gpu_num_; -}; - -} -} -} \ No newline at end of file diff --git a/cpp/src/wrapper/Index.cpp b/cpp/src/wrapper/Index.cpp deleted file mode 100644 index 6d2ca19449..0000000000 --- a/cpp/src/wrapper/Index.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -// TODO: maybe support static search -#ifdef GPU_VERSION - -#include "faiss/gpu/GpuAutoTune.h" -#include "faiss/gpu/StandardGpuResources.h" -#include "faiss/gpu/utils/DeviceUtils.h" - - -#endif - -#include "Index.h" -#include "faiss/index_io.h" -#include "faiss/IndexIVF.h" -#include "faiss/IVFlib.h" -#include "faiss/IndexScalarQuantizer.h" -#include "server/ServerConfig.h" -#include "src/wrapper/FaissGpuResources.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -using std::string; -using std::unordered_map; -using std::vector; - -Index::Index(const std::shared_ptr &raw_index) { - index_ = raw_index; - dim = index_->d; - ntotal = index_->ntotal; - store_on_gpu = false; -} - -bool Index::reset() { - try { - index_->reset(); - ntotal = index_->ntotal; - } - catch (std::exception &e) { -// LOG(ERROR) << e.what(); - return false; - } - return true; -} - -bool Index::add_with_ids(idx_t n, const float *xdata, const long *xids) { - try { - index_->add_with_ids(n, xdata, xids); - ntotal += n; - } - catch (std::exception &e) { -// LOG(ERROR) << e.what(); - return false; - } - return true; -} - -bool Index::search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const { - try { - index_->search(n, data, k, distances, labels); - } - catch (std::exception &e) { -// LOG(ERROR) << e.what(); - return false; - } - return true; -} - -void write_index(const Index_ptr &index, const std::string &file_name) { - write_index(index->index_.get(), file_name.c_str()); -} - -Index_ptr read_index(const std::string &file_name) { - std::shared_ptr raw_index = nullptr; - faiss::Index *cpu_index = faiss::read_index(file_name.c_str()); - - server::ServerConfig &config = server::ServerConfig::GetInstance(); - server::ConfigNode engine_config = config.GetConfig(server::CONFIG_ENGINE); - bool use_hybrid_index_ = engine_config.GetBoolValue(server::CONFIG_USE_HYBRID_INDEX, false); - - if (dynamic_cast(cpu_index) != nullptr && use_hybrid_index_) { - - int device_id = engine_config.GetInt32Value(server::CONFIG_HYBRID_INDEX_GPU, 0); - auto gpu_resources = engine::FaissGpuResources::GetGpuResources(device_id); - faiss::gpu::GpuClonerOptions clone_option; - clone_option.storeInCpu = true; - faiss::Index *gpu_index = faiss::gpu::index_cpu_to_gpu(gpu_resources.get(), device_id, cpu_index, &clone_option); - - delete cpu_index; - raw_index.reset(gpu_index); - return std::make_shared(raw_index); - } else { - raw_index.reset(cpu_index); - return std::make_shared(raw_index); - } -} - -} -} -} -#endif diff --git a/cpp/src/wrapper/Index.h b/cpp/src/wrapper/Index.h deleted file mode 100644 index d722b85330..0000000000 --- a/cpp/src/wrapper/Index.h +++ /dev/null @@ -1,88 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#pragma once - -//#include -//#include -//#include -//#include -//#include -// -//#include "faiss/AutoTune.h" -//#include "faiss/index_io.h" -// -//#include "Operand.h" - -#include "knowhere/vec_index.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -using Index_ptr = VecIndexPtr; - -#if 0 -//class Index; -//using Index_ptr = std::shared_ptr; - -class Index { - typedef long idx_t; - -public: - int dim; ///< std::vector dimension - idx_t ntotal; ///< total nb of indexed std::vectors - bool store_on_gpu; - - explicit Index(const std::shared_ptr &raw_index); - - virtual bool reset(); - - /** - * @brief Same as add, but stores xids instead of sequential ids. - * - * @param data input matrix, size n * d - * @param if ids is not empty ids for the std::vectors - */ - virtual bool add_with_ids(idx_t n, const float *xdata, const long *xids); - - /** - * @brief for each query std::vector, find its k nearest neighbors in the database - * - * @param n queries size - * @param data query std::vectors - * @param k top k nearest neighbors - * @param distances top k nearest distances - * @param labels neighbors of the queries - */ - virtual bool search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const; - - //virtual bool search(idx_t n, const std::vector &data, idx_t k, - // std::vector &distances, std::vector &labels) const; - - //virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location); - //virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove); - //virtual bool index_display(); - - virtual std::shared_ptr data() { return index_; } - - virtual const std::shared_ptr& data() const { return index_; } - -private: - friend void write_index(const Index_ptr &index, const std::string &file_name); - std::shared_ptr index_ = nullptr; -}; - - -void write_index(const Index_ptr &index, const std::string &file_name); - -extern Index_ptr read_index(const std::string &file_name); -#endif - -} -} -} diff --git a/cpp/src/wrapper/IndexBuilder.cpp b/cpp/src/wrapper/IndexBuilder.cpp deleted file mode 100644 index e2b6971281..0000000000 --- a/cpp/src/wrapper/IndexBuilder.cpp +++ /dev/null @@ -1,163 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#include "mutex" - - -#ifdef GPU_VERSION -#include -#include -#include -#endif - - -#include -#include -#include "faiss/IndexScalarQuantizer.h" - -#include "server/ServerConfig.h" -#include "IndexBuilder.h" -#include "FaissGpuResources.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -using std::vector; - -static std::mutex gpu_resource; -static std::mutex cpu_resource; - -IndexBuilder::IndexBuilder(const Operand_ptr &opd) { - opd_ = opd; - - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); - use_hybrid_index_ = engine_config.GetBoolValue(CONFIG_USE_HYBRID_INDEX, false); - hybrid_index_device_id_ = engine_config.GetInt32Value(server::CONFIG_HYBRID_INDEX_GPU, 0); -} - -// Default: build use gpu -Index_ptr IndexBuilder::build_all(const long &nb, - const float *xb, - const long *ids, - const long &nt, - const float *xt) { - std::shared_ptr host_index = nullptr; -#ifdef GPU_VERSION - { - LOG(DEBUG) << "Build index by GPU"; - // TODO: list support index-type. - faiss::MetricType metric_type = opd_->metric_type == "L2" ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - faiss::Index *ori_index = faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type); - - std::lock_guard lk(gpu_resource); - -#ifdef UNITTEST_ONLY - faiss::gpu::StandardGpuResources res; - int device_id = 0; - faiss::gpu::GpuClonerOptions clone_option; - clone_option.storeInCpu = use_hybrid_index_; - auto device_index = faiss::gpu::index_cpu_to_gpu(&res, device_id, ori_index, &clone_option); -#else - engine::FaissGpuResources res; - int device_id = res.GetGpu(); - auto gpu_resources = engine::FaissGpuResources::GetGpuResources(device_id); - faiss::gpu::GpuClonerOptions clone_option; - clone_option.storeInCpu = use_hybrid_index_; - auto device_index = faiss::gpu::index_cpu_to_gpu(gpu_resources.get(), device_id, ori_index, &clone_option); -#endif - - if (!device_index->is_trained) { - nt == 0 || xt == nullptr ? device_index->train(nb, xb) - : device_index->train(nt, xt); - } - device_index->add_with_ids(nb, xb, ids); // TODO: support with add_with_IDMAP - - if (dynamic_cast(ori_index) != nullptr - && use_hybrid_index_) { - std::shared_ptr device_hybrid_index = nullptr; - if (hybrid_index_device_id_ != device_id) { - auto host_hybrid_index = faiss::gpu::index_gpu_to_cpu(device_index); - auto hybrid_gpu_resources = engine::FaissGpuResources::GetGpuResources(hybrid_index_device_id_); - auto another_device_index = faiss::gpu::index_cpu_to_gpu(hybrid_gpu_resources.get(), - hybrid_index_device_id_, - host_hybrid_index, - &clone_option); - device_hybrid_index.reset(another_device_index); - delete device_index; - delete host_hybrid_index; - } else { - device_hybrid_index.reset(device_index); - } - delete ori_index; - return std::make_shared(device_hybrid_index); - } - - host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index)); - - delete device_index; - delete ori_index; - } -#else - { - LOG(DEBUG) << "Build index by CPU"; - faiss::MetricType metric_type = opd_->metric_type == "L2" ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - faiss::Index *index = faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type); - if (!index->is_trained) { - nt == 0 || xt == nullptr ? index->train(nb, xb) - : index->train(nt, xt); - } - index->add_with_ids(nb, xb, ids); - host_index.reset(index); - } -#endif - - return std::make_shared(host_index); -} - -Index_ptr IndexBuilder::build_all(const long &nb, const vector &xb, - const vector &ids, - const long &nt, const vector &xt) { - return build_all(nb, xb.data(), ids.data(), nt, xt.data()); -} - -BgCpuBuilder::BgCpuBuilder(const zilliz::milvus::engine::Operand_ptr &opd) : IndexBuilder(opd) {}; - -Index_ptr BgCpuBuilder::build_all(const long &nb, const float *xb, const long *ids, const long &nt, const float *xt) { - std::shared_ptr index = nullptr; - faiss::MetricType metric_type = opd_->metric_type == "L2" ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - index.reset(faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str(), metric_type)); - - LOG(DEBUG) << "Build index by CPU"; - { - std::lock_guard lk(cpu_resource); - if (!index->is_trained) { - nt == 0 || xt == nullptr ? index->train(nb, xb) - : index->train(nt, xt); - } - index->add_with_ids(nb, xb, ids); - } - - return std::make_shared(index); -} - -IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) { - if (opd->index_type == "IDMap") { - IndexBuilderPtr index = nullptr; - return std::make_shared(opd); - } - - return std::make_shared(opd); -} - -} -} -} -#endif diff --git a/cpp/src/wrapper/IndexBuilder.h b/cpp/src/wrapper/IndexBuilder.h deleted file mode 100644 index 2142df83ee..0000000000 --- a/cpp/src/wrapper/IndexBuilder.h +++ /dev/null @@ -1,70 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#pragma once - -#include "faiss/Index.h" - -#include "Operand.h" -#include "Index.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -class IndexBuilder { - public: - explicit IndexBuilder(const Operand_ptr &opd); - - virtual Index_ptr build_all(const long &nb, - const float *xb, - const long *ids, - const long &nt = 0, - const float *xt = nullptr); - - virtual Index_ptr build_all(const long &nb, - const std::vector &xb, - const std::vector &ids, - const long &nt = 0, - const std::vector &xt = std::vector()); - - //void train(const long &nt, - // const std::vector &xt); - // - //Index_ptr add(const long &nb, - // const std::vector &xb, - // const std::vector &ids); - // - //void set_build_option(const Operand_ptr &opd); - - - protected: - Operand_ptr opd_ = nullptr; - bool use_hybrid_index_; - int hybrid_index_device_id_; -}; - -class BgCpuBuilder : public IndexBuilder { - public: - BgCpuBuilder(const Operand_ptr &opd); - - virtual Index_ptr build_all(const long &nb, - const float *xb, - const long *ids, - const long &nt = 0, - const float *xt = nullptr) override; -}; - -using IndexBuilderPtr = std::shared_ptr; - -extern IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd); - -} -} -} -#endif diff --git a/cpp/src/wrapper/Operand.cpp b/cpp/src/wrapper/Operand.cpp deleted file mode 100644 index 8bc708eb72..0000000000 --- a/cpp/src/wrapper/Operand.cpp +++ /dev/null @@ -1,123 +0,0 @@ - -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#include "Operand.h" - - -namespace zilliz { -namespace milvus { -namespace engine { - -using std::string; - -enum IndexType { - Invalid_Option = 0, - IVF = 1, - IDMAP = 2, - IVFSQ8 = 3, -}; - -IndexType resolveIndexType(const string &index_type) { - if (index_type == "IVF") { return IndexType::IVF; } - if (index_type == "IDMap") { return IndexType::IDMAP; } - if (index_type == "IVFSQ8") { return IndexType::IVFSQ8; } - return IndexType::Invalid_Option; -} - -int CalcBacketCount(int nb, size_t nlist) { - int backet_count = int(nb / 1000000.0 * nlist); - if(backet_count == 0) { - backet_count = 1; //avoid faiss rash - } - - return backet_count; -} - -// nb at least 100 -string Operand::get_index_type(const int &nb) { - if (!index_str.empty()) { return index_str; } - - switch (resolveIndexType(index_type)) { - case Invalid_Option: { - // TODO: add exception - break; - } - case IVF: { - - using namespace zilliz::milvus::server; - ServerConfig &config = ServerConfig::GetInstance(); - ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); - size_t nlist = engine_config.GetInt32Value(CONFIG_NLIST, 16384); - - index_str += (ncent != 0 ? index_type + std::to_string(ncent) : - index_type + std::to_string(CalcBacketCount(nb, nlist))); -// std::cout<<"nlist = "<>(std::istream &is, Operand &obj) { - is >> obj.d - >> obj.index_type - >> obj.metric_type - >> obj.preproc - >> obj.postproc - >> obj.ncent; - return is; -} - -std::string operand_to_str(const Operand_ptr &opd) { - std::ostringstream ss; - ss << *opd; - return ss.str(); -} - -Operand_ptr str_to_operand(const std::string &input) { - std::istringstream is(input); - auto opd = std::make_shared(); - is >> *(opd.get()); - - return opd; -} - -} -} -} -#endif diff --git a/cpp/src/wrapper/Operand.h b/cpp/src/wrapper/Operand.h deleted file mode 100644 index 0e675f6a1b..0000000000 --- a/cpp/src/wrapper/Operand.h +++ /dev/null @@ -1,46 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - -#if 0 -#pragma once - -#include -#include -#include -#include - - -namespace zilliz { -namespace milvus { -namespace engine { - -struct Operand { - friend std::ostream &operator<<(std::ostream &os, const Operand &obj); - - friend std::istream &operator>>(std::istream &is, Operand &obj); - - int d; - std::string index_type = "IVF"; - std::string metric_type = "L2"; //> L2 / IP(Inner Product) - std::string preproc; - std::string postproc = "Flat"; - std::string index_str; - int ncent = 0; - - std::string get_index_type(const int &nb); -}; - -using Operand_ptr = std::shared_ptr; - -extern std::string operand_to_str(const Operand_ptr &opd); - -extern Operand_ptr str_to_operand(const std::string &input); - - -} -} -} -#endif diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index 4f1d1db4ef..4d9379dc73 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -8,7 +8,6 @@ #include "cache/GpuCacheMgr.h" #include "utils/Error.h" -#include "wrapper/Index.h" #include "wrapper/knowhere/vec_index.h" using namespace zilliz::milvus; @@ -112,7 +111,7 @@ TEST(CacheTest, CPU_CACHE_TEST) { for (int i = 0; i < 20; i++) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 1000000;//less 1G per index - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cpu_mgr->InsertItem("index_" + std::to_string(i), index); } @@ -137,7 +136,7 @@ TEST(CacheTest, CPU_CACHE_TEST) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 6000000;//6G less - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cpu_mgr->InsertItem("index_6g", index); ASSERT_EQ(cpu_mgr->ItemCount(), 0);//data greater than capacity can not be inserted sucessfully @@ -154,7 +153,7 @@ TEST(CacheTest, GPU_CACHE_TEST) { for(int i = 0; i < 20; i++) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 1000; - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cache::DataObjPtr obj = std::make_shared(index); @@ -175,7 +174,7 @@ TEST(CacheTest, INVALID_TEST) { ASSERT_EQ(mgr.GetItem("test"), nullptr); mgr.InsertItem("test", cache::DataObjPtr()); - mgr.InsertItem("test", engine::Index_ptr(nullptr)); + mgr.InsertItem("test", engine::VecIndexPtr(nullptr)); mgr.EraseItem("test"); mgr.PrintInfo(); mgr.ClearCache(); @@ -189,7 +188,7 @@ TEST(CacheTest, INVALID_TEST) { for(int i = 0; i < 20; i++) { MockVecIndex* mock_index = new MockVecIndex(); mock_index->ntotal_ = 2; - engine::Index_ptr index(mock_index); + engine::VecIndexPtr index(mock_index); cache::DataObjPtr obj = std::make_shared(index); mgr.InsertItem("index_" + std::to_string(i), obj); From 3d79ccd97b160711688cdd001137f70d0d832580 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Thu, 22 Aug 2019 15:59:01 +0800 Subject: [PATCH 11/22] add GpuCacheMgr Former-commit-id: c95181ef545748516cad9d3c0561fb8f753c6f37 --- cpp/conf/server_config.template | 5 +- cpp/src/cache/Cache.h | 3 + cpp/src/cache/GpuCacheMgr.cpp | 35 ++++++++++ cpp/src/cache/GpuCacheMgr.h | 18 ++++- cpp/src/db/DBImpl.cpp | 2 +- cpp/src/db/engine/ExecutionEngine.h | 2 + cpp/src/db/engine/ExecutionEngineImpl.cpp | 69 ++++++++++++++----- cpp/src/db/engine/ExecutionEngineImpl.h | 2 + cpp/src/metrics/MetricBase.h | 3 +- cpp/src/metrics/PrometheusMetrics.cpp | 13 ++++ cpp/src/metrics/PrometheusMetrics.h | 18 +++-- .../examples/grpcsimple/src/ClientTest.cpp | 9 ++- cpp/src/server/ServerConfig.h | 2 + cpp/unittest/db/db_tests.cpp | 6 ++ 14 files changed, 156 insertions(+), 31 deletions(-) diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template index 037e55a0a8..dfd5b6d4b1 100644 --- a/cpp/conf/server_config.template +++ b/cpp/conf/server_config.template @@ -36,8 +36,11 @@ license_config: # license configure cache_config: # cache configure cpu_cache_capacity: 16 # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory - cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 + cpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 insert_cache_immediately: false # insert data will be load into cache immediately for hot query + gpu_cache_capacity: 5 # how many memory are used as cache in gpu, unit: GB, RANGE: 0 ~ less than total memory + gpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0 + gpu_ids: 0,1 # gpu id engine_config: nprobe: 10 diff --git a/cpp/src/cache/Cache.h b/cpp/src/cache/Cache.h index 4d6f32b9eb..606dc9eb07 100644 --- a/cpp/src/cache/Cache.h +++ b/cpp/src/cache/Cache.h @@ -46,6 +46,8 @@ public: double freemem_percent() const { return freemem_percent_; }; void set_freemem_percent(double percent) { freemem_percent_ = percent; } + void set_gpu_ids(std::vector gpu_ids) { gpu_ids_.assign(gpu_ids.begin(), gpu_ids.end()); } + std::vector gpu_ids() const { return gpu_ids_; } size_t size() const; bool exists(const std::string& key); @@ -60,6 +62,7 @@ private: int64_t usage_; int64_t capacity_; double freemem_percent_; + std::vector gpu_ids_; LRU lru_; mutable std::mutex mutex_; diff --git a/cpp/src/cache/GpuCacheMgr.cpp b/cpp/src/cache/GpuCacheMgr.cpp index 13eec4f2b6..19be0d0821 100644 --- a/cpp/src/cache/GpuCacheMgr.cpp +++ b/cpp/src/cache/GpuCacheMgr.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#include "utils/Log.h" #include "GpuCacheMgr.h" #include "server/ServerConfig.h" @@ -11,19 +12,53 @@ namespace zilliz { namespace milvus { namespace cache { +std::mutex GpuCacheMgr::mutex_; +std::unordered_map GpuCacheMgr::instance_; + namespace { constexpr int64_t unit = 1024 * 1024 * 1024; } GpuCacheMgr::GpuCacheMgr() { server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); + std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1"); + std::vector gpu_ids; + for (auto i = 0; i < gpu_ids_str.length(); ) { + if (gpu_ids_str[i] != ',') { + int id = 0; + while (gpu_ids_str[i] != ',') { + id = id * 10 + gpu_ids_str[i] - '0'; + ++i; + } + gpu_ids.push_back(id); + } else { + ++i; + } + } + + cache_->set_gpu_ids(gpu_ids); + int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 1); cap *= unit; cache_ = std::make_shared(cap, 1UL<<32); + + double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85); + if (free_percent > 0.0 && free_percent <= 1.0) { + cache_->set_freemem_percent(free_percent); + } else { + SERVER_LOG_ERROR << "Invalid gpu_cache_free_percent: " << free_percent << + ", defaultly set to " << cache_->freemem_percent(); + } } void GpuCacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) { //TODO: copy data to gpu + if (cache_ == nullptr) { + SERVER_LOG_ERROR << "Cache doesn't exist"; + return; + } + + cache_->insert(key, data); } } diff --git a/cpp/src/cache/GpuCacheMgr.h b/cpp/src/cache/GpuCacheMgr.h index 4efec08cec..a1d7d4be0d 100644 --- a/cpp/src/cache/GpuCacheMgr.h +++ b/cpp/src/cache/GpuCacheMgr.h @@ -5,6 +5,7 @@ //////////////////////////////////////////////////////////////////////////////// #include "CacheMgr.h" +#include namespace zilliz { namespace milvus { @@ -15,12 +16,23 @@ private: GpuCacheMgr(); public: - static CacheMgr* GetInstance() { - static GpuCacheMgr s_mgr; - return &s_mgr; + static CacheMgr* GetInstance(uint64_t gpu_id) { + if (!instance_[gpu_id]) { + std::lock_guard lock(mutex_); + if(!instance_[gpu_id]) { + instance_.insert(std::pair(gpu_id, new GpuCacheMgr())); + } + } + return instance_.at(gpu_id); +// static GpuCacheMgr s_mgr; +// return &s_mgr; } void InsertItem(const std::string& key, const DataObjPtr& data) override; + +private: + static std::mutex mutex_; + static std::unordered_map instance_; }; } diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index a19fd340da..6cb4e32db1 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -363,7 +363,7 @@ void DBImpl::StartMetricTask() { server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity(); - server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total); + server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage*100/cache_total); uint64_t size; Size(size); server::Metrics::GetInstance().DataFileSizeGaugeSet(size); diff --git a/cpp/src/db/engine/ExecutionEngine.h b/cpp/src/db/engine/ExecutionEngine.h index 0f2cf42b22..d0fd1d6630 100644 --- a/cpp/src/db/engine/ExecutionEngine.h +++ b/cpp/src/db/engine/ExecutionEngine.h @@ -64,6 +64,8 @@ public: virtual Status Cache() = 0; + virtual Status GpuCache(uint64_t gpu_id) = 0; + virtual Status Init() = 0; }; diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp index dd38369832..066e63c1cf 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.cpp +++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp @@ -4,6 +4,7 @@ * Proprietary and confidential. ******************************************************************************/ #include +#include "src/cache/GpuCacheMgr.h" #include "src/server/ServerConfig.h" #include "src/metrics/Metrics.h" @@ -144,28 +145,60 @@ Status ExecutionEngineImpl::Load(bool to_cache) { } Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) { - try { - index_ = index_->CopyToGpu(device_id); - ENGINE_LOG_DEBUG << "CPU to GPU" << device_id; - } catch (knowhere::KnowhereException &e) { - ENGINE_LOG_ERROR << e.what(); - return Status::Error(e.what()); - } catch (std::exception &e) { - return Status::Error(e.what()); + index_ = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_); + bool already_in_cache = (index_ != nullptr); + auto start_time = METRICS_NOW_TIME; + if (!index_) { + try { + index_ = index_->CopyToGpu(device_id); + ENGINE_LOG_DEBUG << "CPU to GPU" << device_id; + } catch (knowhere::KnowhereException &e) { + ENGINE_LOG_ERROR << e.what(); + return Status::Error(e.what()); + } catch (std::exception &e) { + return Status::Error(e.what()); + } } + + if (!already_in_cache) { + GpuCache(device_id); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + double physical_size = PhysicalSize(); + + server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); + server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size); + } + return Status::OK(); } Status ExecutionEngineImpl::CopyToCpu() { - try { - index_ = index_->CopyToCpu(); - ENGINE_LOG_DEBUG << "GPU to CPU"; - } catch (knowhere::KnowhereException &e) { - ENGINE_LOG_ERROR << e.what(); - return Status::Error(e.what()); - } catch (std::exception &e) { - return Status::Error(e.what()); + index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_); + bool already_in_cache = (index_ != nullptr); + auto start_time = METRICS_NOW_TIME; + if (!index_) { + try { + index_ = index_->CopyToCpu(); + ENGINE_LOG_DEBUG << "GPU to CPU"; + } catch (knowhere::KnowhereException &e) { + ENGINE_LOG_ERROR << e.what(); + return Status::Error(e.what()); + } catch (std::exception &e) { + return Status::Error(e.what()); + } } + + if(!already_in_cache) { + Cache(); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + double physical_size = PhysicalSize(); + + server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time); + server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size); + } + return Status::OK(); } @@ -246,6 +279,10 @@ Status ExecutionEngineImpl::Cache() { return Status::OK(); } +Status ExecutionEngineImpl::GpuCache(uint64_t gpu_id) { + zilliz::milvus::cache::GpuCacheMgr::GetInstance(gpu_id)->InsertItem(location_, index_); +} + // TODO(linxj): remove. Status ExecutionEngineImpl::Init() { using namespace zilliz::milvus::server; diff --git a/cpp/src/db/engine/ExecutionEngineImpl.h b/cpp/src/db/engine/ExecutionEngineImpl.h index 948719310c..01af0f4a9b 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.h +++ b/cpp/src/db/engine/ExecutionEngineImpl.h @@ -59,6 +59,8 @@ public: Status Cache() override; + Status GpuCache(uint64_t gpu_id) override; + Status Init() override; private: diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h index 23a2427b35..a11bf14179 100644 --- a/cpp/src/metrics/MetricBase.h +++ b/cpp/src/metrics/MetricBase.h @@ -31,7 +31,8 @@ class MetricsBase{ virtual void IndexFileSizeHistogramObserve(double value) {}; virtual void BuildIndexDurationSecondsHistogramObserve(double value) {}; - virtual void CacheUsageGaugeSet(double value) {}; + virtual void CpuCacheUsageGaugeSet(double value) {}; + virtual void GpuCacheUsageGaugeSet(double value) {}; virtual void MetaAccessTotalIncrement(double value = 1) {}; virtual void MetaAccessDurationSecondsHistogramObserve(double value) {}; diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index c7729ffdbc..08dad64724 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -4,6 +4,7 @@ * Proprietary and confidential. ******************************************************************************/ +#include #include "PrometheusMetrics.h" #include "utils/Log.h" #include "SystemInfo.h" @@ -166,6 +167,18 @@ void PrometheusMetrics::CPUTemperature() { } } +void PrometheusMetrics::GpuCacheUsageGaugeSet(double value) { + if(!startup_) return; + int64_t num_processors = server::SystemInfo::GetInstance().num_processor(); + + for (auto i = 0; i < num_processors; ++i) { +// int gpu_cache_usage = cache::GpuCacheMgr::GetInstance(i)->CacheUsage(); +// int gpu_cache_total = cache::GpuCacheMgr::GetInstance(i)->CacheCapacity(); +// prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}}); +// gpu_cache.Set(gpu_cache_usage * 100 / gpu_cache_total); + } +} + } } } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index 282c58800c..ab37195583 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -54,7 +54,8 @@ class PrometheusMetrics: public MetricsBase { void RawFileSizeHistogramObserve(double value) override { if(startup_) raw_files_size_histogram_.Observe(value);}; void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);}; void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);}; - void CacheUsageGaugeSet(double value) override { if(startup_) cache_usage_gauge_.Set(value);}; + void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);}; + void GpuCacheUsageGaugeSet(double value) override; void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);}; void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);}; @@ -336,12 +337,18 @@ class PrometheusMetrics: public MetricsBase { .Register(*registry_); prometheus::Counter &cache_access_total_ = cache_access_.Add({}); - // record cache usage and % - prometheus::Family &cache_usage_ = prometheus::BuildGauge() + // record CPU cache usage and % + prometheus::Family &cpu_cache_usage_ = prometheus::BuildGauge() .Name("cache_usage_bytes") .Help("current cache usage by bytes") .Register(*registry_); - prometheus::Gauge &cache_usage_gauge_ = cache_usage_.Add({}); + prometheus::Gauge &cpu_cache_usage_gauge_ = cpu_cache_usage_.Add({}); + + //record GPU cache usage and % + prometheus::Family &gpu_cache_usage_ = prometheus::BuildGauge() + .Name("gpu_cache_usage_bytes") + .Help("current gpu cache usage by bytes") + .Register(*registry_); // record query response using Quantiles = std::vector; @@ -360,8 +367,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Family &query_vector_response_per_second_ = prometheus::BuildGauge() .Name("query_vector_response_per_microsecond") .Help("the number of vectors can be queried every second ") - .Register(*registry_); - prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({}); + .Register(*registry_); prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({}); prometheus::Family &query_response_per_second_ = prometheus::BuildGauge() .Name("query_response_per_microsecond") diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 0022f00282..3156adc9c7 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -210,9 +210,9 @@ ClientTest::Test(const std::string& address, const std::string& port) { std::cout << "All tables: " << std::endl; for(auto& table : tables) { int64_t row_count = 0; - conn->DropTable(table); -// stat = conn->CountTable(table, row_count); -// std::cout << "\t" << table << "(" << row_count << " rows)" << std::endl; +// conn->DropTable(table); + stat = conn->CountTable(table, row_count); + std::cout << "\t" << table << "(" << row_count << " rows)" << std::endl; } } @@ -264,6 +264,9 @@ ClientTest::Test(const std::string& address, const std::string& port) { search_record_array.push_back( std::make_pair(record_ids[SEARCH_TARGET], record_array[SEARCH_TARGET])); } + int64_t row_count; + conn->CountTable(TABLE_NAME, row_count); + std::cout << "\t" << TABLE_NAME << "(" << row_count << " rows)" << std::endl; } } diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index 49dc6f50fd..b42a585888 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -38,6 +38,8 @@ static const char* CONFIG_CPU_CACHE_CAPACITY = "cpu_cache_capacity"; static const char* CONFIG_GPU_CACHE_CAPACITY = "gpu_cache_capacity"; static const char* CACHE_FREE_PERCENT = "cache_free_percent"; static const char* CONFIG_INSERT_CACHE_IMMEDIATELY = "insert_cache_immediately"; +static const char* CONFIG_GPU_IDS = "gpu_ids"; +static const char *GPU_CACHE_FREE_PERCENT = "gpu_cache_free_percent"; static const char* CONFIG_LICENSE = "license_config"; static const char* CONFIG_LICENSE_PATH = "license_path"; diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 2426846c15..f2af4b773a 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -9,6 +9,7 @@ #include "db/meta/MetaConsts.h" #include "db/Factories.h" #include "cache/CpuCacheMgr.h" +#include "cache/GpuCacheMgr.h #include "utils/CommonUtil.h" #include @@ -437,4 +438,9 @@ TEST_F(DBTest2, DELETE_BY_RANGE_TEST) { ConvertTimeRangeToDBDates(start_value, end_value, dates); db_->DeleteTable(TABLE_NAME, dates); +} + +TEST_F(DBTest, GPU_CACHE_MGR_TEST) { + std::vector gpu_ids = cache:: + cache::CpuCacheMgr::GetInstance()->CacheUsage(); } \ No newline at end of file From f0d9b0f5e416fae7a4dd594d6b59616f6db910a4 Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 22 Aug 2019 15:59:06 +0800 Subject: [PATCH 12/22] fix row count bug Former-commit-id: d2a9faa6a20ebeb88f8ef2fdb77180a6c216625c --- cpp/src/db/insert/MemTableFile.cpp | 2 + cpp/src/db/meta/MySQLMetaImpl.cpp | 32 +++++++++--- cpp/src/db/meta/SqliteMetaImpl.cpp | 51 ++++++++++++------- .../examples/grpcsimple/src/ClientTest.cpp | 13 +++-- 4 files changed, 68 insertions(+), 30 deletions(-) diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index 3cbb862389..672bd50b00 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -86,6 +86,8 @@ Status MemTableFile::Serialize() { execution_engine_->Serialize(); auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); + + table_file_schema_.file_size_ = execution_engine_->PhysicalSize(); table_file_schema_.row_count_ = execution_engine_->Count(); server::Metrics::GetInstance().DiskStoreIOSpeedGaugeSet((double) size / total_time); diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index d4dddd05bd..46c5cd1d68 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -652,7 +652,7 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { } Query describeTableQuery = connectionPtr->query(); - describeTableQuery << "SELECT id, dimension, files_cnt, engine_type, store_raw_data " << + describeTableQuery << "SELECT id, dimension, engine_type " << "FROM Tables " << "WHERE table_id = " << quote << table_schema.table_id_ << " " << "AND state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; @@ -739,7 +739,7 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { } Query allTablesQuery = connectionPtr->query(); - allTablesQuery << "SELECT id, table_id, dimension, files_cnt, engine_type, store_raw_data " << + allTablesQuery << "SELECT id, table_id, dimension, engine_type " << "FROM Tables " << "WHERE state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; @@ -864,7 +864,7 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { } Query filesToIndexQuery = connectionPtr->query(); - filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << + filesToIndexQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date, created_on " << "FROM TableFiles " << "WHERE file_type = " << std::to_string(TableFileSchema::TO_INDEX) << ";"; @@ -891,10 +891,14 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.file_type_ = resRow["file_type"]; + table_file.file_size_ = resRow["file_size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; + table_file.created_on_ = resRow["created_on"]; + auto groupItr = groups.find(table_file.table_id_); if (groupItr == groups.end()) { TableSchema table_schema; @@ -943,7 +947,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, if (partition.empty()) { Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(file_type = " << std::to_string(TableFileSchema::RAW) << " OR " << @@ -965,7 +969,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, std::string partitionListStr = partitionListSS.str(); partitionListStr = partitionListStr.substr(0, partitionListStr.size() - 2); //remove the last ", " - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "date IN (" << partitionListStr << ") AND " << @@ -1004,6 +1008,8 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; + table_file.file_size_ = resRow["file_size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -1049,7 +1055,7 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, } Query filesToSearchQuery = connectionPtr->query(); - filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, row_count, date " << + filesToSearchQuery << "SELECT id, table_id, engine_type, file_id, file_type, file_size, row_count, date " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id; @@ -1110,6 +1116,8 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; + table_file.file_size_ = resRow["file_size"]; + table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; @@ -1153,7 +1161,7 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, } Query filesToMergeQuery = connectionPtr->query(); - filesToMergeQuery << "SELECT id, table_id, file_id, file_type, file_size, date " << + filesToMergeQuery << "SELECT id, table_id, file_id, file_type, file_size, row_count, date, engine_type, create_on " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "file_type = " << std::to_string(TableFileSchema::RAW) << " " << @@ -1189,8 +1197,14 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, table_file.file_size_ = resRow["file_size"]; + table_file.row_count_ = resRow["row_count"]; + table_file.date_ = resRow["date"]; + table_file.engine_type_ = resRow["engine_type"]; + + table_file.created_on_ = resRow["created_on"]; + table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); @@ -1241,7 +1255,7 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, Query getTableFileQuery = connectionPtr->query(); - getTableFileQuery << "SELECT id, engine_type, file_id, file_type, file_size, row_count, date " << + getTableFileQuery << "SELECT id, engine_type, file_id, file_type, file_size, row_count, date, created_on " << "FROM TableFiles " << "WHERE table_id = " << quote << table_id << " AND " << "(" << idStr << ");"; @@ -1280,6 +1294,8 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, file_schema.date_ = resRow["date"]; + file_schema.created_on_ = resRow["created_on"]; + file_schema.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, file_schema); diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index c5ef543e69..38f68f8638 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -551,9 +551,11 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, + &TableFileSchema::file_size_, &TableFileSchema::row_count_, &TableFileSchema::date_, - &TableFileSchema::engine_type_), + &TableFileSchema::engine_type_, + &TableFileSchema::created_on_), where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::TO_INDEX)); @@ -565,9 +567,11 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.row_count_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); + table_file.created_on_ = std::get<8>(file); utils::GetTableFilePath(options_, table_file); auto groupItr = groups.find(table_file.table_id_); @@ -605,6 +609,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, + &TableFileSchema::file_size_, &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), @@ -625,9 +630,10 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.row_count_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -643,6 +649,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, + &TableFileSchema::file_size_, &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_), @@ -664,9 +671,10 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.row_count_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -696,6 +704,7 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, &TableFileSchema::table_id_, &TableFileSchema::file_id_, &TableFileSchema::file_type_, + &TableFileSchema::file_size_, &TableFileSchema::row_count_, &TableFileSchema::date_, &TableFileSchema::engine_type_); @@ -738,9 +747,10 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.row_count_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); - table_file.engine_type_ = std::get<6>(file); + table_file.file_size_ = std::get<4>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.engine_type_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -769,7 +779,9 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, &TableFileSchema::file_id_, &TableFileSchema::file_type_, &TableFileSchema::file_size_, - &TableFileSchema::date_), + &TableFileSchema::row_count_, + &TableFileSchema::date_, + &TableFileSchema::created_on_), where(c(&TableFileSchema::file_type_) == (int) TableFileSchema::RAW and c(&TableFileSchema::table_id_) == table_id), order_by(&TableFileSchema::file_size_).desc()); @@ -789,7 +801,9 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); table_file.file_size_ = std::get<4>(file); - table_file.date_ = std::get<5>(file); + table_file.row_count_ = std::get<5>(file); + table_file.date_ = std::get<6>(file); + table_file.created_on_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -816,7 +830,8 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, &TableFileSchema::file_size_, &TableFileSchema::row_count_, &TableFileSchema::date_, - &TableFileSchema::engine_type_), + &TableFileSchema::engine_type_, + &TableFileSchema::created_on_), where(c(&TableFileSchema::table_id_) == table_id and in(&TableFileSchema::id_, ids) )); @@ -838,6 +853,7 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, file_schema.row_count_ = std::get<4>(file); file_schema.date_ = std::get<5>(file); file_schema.engine_type_ = std::get<6>(file); + file_schema.created_on_ = std::get<7>(file); file_schema.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, file_schema); @@ -1218,9 +1234,6 @@ Status SqliteMetaImpl::Count(const std::string &table_id, uint64_t &result) { result += std::get<0>(file); } - result /= table_schema.dimension_; - result /= sizeof(float); - } catch (std::exception &e) { return HandleException("Encounter exception when calculate table file size", e); } diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 0022f00282..cae1f0fdfe 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -86,9 +86,8 @@ namespace { } std::string GetTableName() { -// static std::string s_id(CurrentTime()); -// return "tbl_" + s_id; - return "test"; + static std::string s_id(CurrentTime()); + return "tbl_" + s_id; } TableSchema BuildTableSchema() { @@ -269,6 +268,10 @@ ClientTest::Test(const std::string& address, const std::string& port) { {//search vectors without index Sleep(2); + + int64_t row_count = 0; + Status stat = conn->CountTable(TABLE_NAME, row_count); + std::cout << TABLE_NAME << "(" << row_count << " rows)" << std::endl; DoSearch(conn, search_record_array, "Search without index"); } @@ -300,6 +303,10 @@ ClientTest::Test(const std::string& address, const std::string& port) { {//delete index Status stat = conn->DropIndex(TABLE_NAME); std::cout << "DropIndex function call status: " << stat.ToString() << std::endl; + + int64_t row_count = 0; + stat = conn->CountTable(TABLE_NAME, row_count); + std::cout << TABLE_NAME << "(" << row_count << " rows)" << std::endl; } {//delete by range From 51475e3d378f0ffb527b02d5b0b7781e0186024a Mon Sep 17 00:00:00 2001 From: starlord Date: Thu, 22 Aug 2019 18:39:06 +0800 Subject: [PATCH 13/22] fix index size bug Former-commit-id: fdc398b873ffc25d25eeff5c05ae87ecb0872937 --- cpp/src/db/Constants.h | 16 +- cpp/src/db/DBImpl.cpp | 27 +-- cpp/src/db/Options.h | 6 +- cpp/src/db/meta/MetaTypes.h | 3 +- cpp/src/db/meta/MySQLMetaImpl.cpp | 39 ++-- cpp/src/db/meta/SqliteMetaImpl.cpp | 45 ++-- .../examples/grpcsimple/src/ClientTest.cpp | 2 +- cpp/unittest/faiss_wrapper/CMakeLists.txt | 56 ----- cpp/unittest/faiss_wrapper/wrapper_test.cpp | 203 ------------------ cpp/unittest/server/util_test.cpp | 12 +- 10 files changed, 83 insertions(+), 326 deletions(-) delete mode 100644 cpp/unittest/faiss_wrapper/CMakeLists.txt delete mode 100644 cpp/unittest/faiss_wrapper/wrapper_test.cpp diff --git a/cpp/src/db/Constants.h b/cpp/src/db/Constants.h index e94dfa6aea..479f670563 100644 --- a/cpp/src/db/Constants.h +++ b/cpp/src/db/Constants.h @@ -5,19 +5,25 @@ ******************************************************************************/ #pragma once +#include + namespace zilliz { namespace milvus { namespace engine { -constexpr size_t K = 1024UL; -constexpr size_t M = K * K; -constexpr size_t G = K * M; -constexpr size_t T = K * G; +constexpr uint64_t K = 1024UL; +constexpr uint64_t M = K * K; +constexpr uint64_t G = K * M; +constexpr uint64_t T = K * G; -constexpr size_t MAX_TABLE_FILE_MEM = 128 * M; +constexpr uint64_t MAX_TABLE_FILE_MEM = 128 * M; constexpr int VECTOR_TYPE_SIZE = sizeof(float); +static constexpr uint64_t ONE_KB = K; +static constexpr uint64_t ONE_MB = ONE_KB*ONE_KB; +static constexpr uint64_t ONE_GB = ONE_KB*ONE_MB; + } // namespace engine } // namespace milvus } // namespace zilliz diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index a19fd340da..07d95fe950 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -60,27 +60,6 @@ void CollectQueryMetrics(double total_time, size_t nq) { server::Metrics::GetInstance().QueryVectorResponsePerSecondGaugeSet(double (nq) / total_time); } -#if 0 -void CollectFileMetrics(int file_type, size_t file_size, double total_time) { - switch(file_type) { - case meta::TableFileSchema::RAW: - case meta::TableFileSchema::TO_INDEX: { - server::Metrics::GetInstance().SearchRawDataDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().RawFileSizeHistogramObserve(file_size); - server::Metrics::GetInstance().RawFileSizeTotalIncrement(file_size); - server::Metrics::GetInstance().RawFileSizeGaugeSet(file_size); - break; - } - default: { - server::Metrics::GetInstance().SearchIndexDataDurationSecondsHistogramObserve(total_time); - server::Metrics::GetInstance().IndexFileSizeHistogramObserve(file_size); - server::Metrics::GetInstance().IndexFileSizeTotalIncrement(file_size); - server::Metrics::GetInstance().IndexFileSizeGaugeSet(file_size); - break; - } - } -} -#endif } @@ -473,11 +452,7 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, } //step 4: update table files state - if (index_size >= options_.index_trigger_size) { - table_file.file_type_ = meta::TableFileSchema::TO_INDEX; - } else { - table_file.file_type_ = meta::TableFileSchema::RAW; - } + table_file.file_type_ = meta::TableFileSchema::RAW; table_file.file_size_ = index->PhysicalSize(); table_file.row_count_ = index->Count(); updated.push_back(table_file); diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index a1ff28419d..8081531236 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -5,6 +5,8 @@ ******************************************************************************/ #pragma once +#include "Constants.h" + #include #include #include @@ -16,10 +18,6 @@ namespace engine { class Env; -static constexpr uint64_t ONE_KB = 1024; -static constexpr uint64_t ONE_MB = ONE_KB*ONE_KB; -static constexpr uint64_t ONE_GB = ONE_KB*ONE_MB; - static const char* ARCHIVE_CONF_DISK = "disk"; static const char* ARCHIVE_CONF_DAYS = "days"; diff --git a/cpp/src/db/meta/MetaTypes.h b/cpp/src/db/meta/MetaTypes.h index 0e554b2330..852a416c88 100644 --- a/cpp/src/db/meta/MetaTypes.h +++ b/cpp/src/db/meta/MetaTypes.h @@ -6,6 +6,7 @@ #pragma once #include "db/engine/ExecutionEngine.h" +#include "db/Constants.h" #include #include @@ -33,7 +34,7 @@ struct TableSchema { int64_t created_on_ = 0; int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; int32_t nlist_ = 16384; - int32_t index_file_size_ = 1024; //MB + int32_t index_file_size_ = 1024*ONE_MB; int32_t metric_type_ = (int)MetricType::L2; }; // TableSchema diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index 46c5cd1d68..a243630a57 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -424,7 +424,7 @@ Status MySQLMetaImpl::UpdateTableIndexParam(const std::string &table_id, const T "created_on = " << created_on << ", " << "engine_type_ = " << index.engine_type_ << ", " << "nlist = " << index.nlist_ << ", " << - "index_file_size = " << index.index_file_size_ << ", " << + "index_file_size = " << index.index_file_size_*ONE_MB << ", " << "metric_type = " << index.metric_type_ << ", " << "WHERE id = " << quote << table_id << ";"; @@ -481,7 +481,7 @@ Status MySQLMetaImpl::DescribeTableIndex(const std::string &table_id, TableIndex index.engine_type_ = resRow["engine_type"]; index.nlist_ = resRow["nlist"]; - index.index_file_size_ = resRow["index_file_size"]; + index.index_file_size_ = resRow["index_file_size"]/ONE_MB; index.metric_type_ = resRow["metric_type"]; } else { return Status::NotFound("Table " + table_id + " not found"); @@ -652,7 +652,7 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { } Query describeTableQuery = connectionPtr->query(); - describeTableQuery << "SELECT id, dimension, engine_type " << + describeTableQuery << "SELECT id, state, dimension, engine_type, nlist, index_file_size, metric_type " << "FROM Tables " << "WHERE table_id = " << quote << table_schema.table_id_ << " " << "AND state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; @@ -667,9 +667,17 @@ Status MySQLMetaImpl::DescribeTable(TableSchema &table_schema) { table_schema.id_ = resRow["id"]; //implicit conversion + table_schema.state_ = resRow["state"]; + table_schema.dimension_ = resRow["dimension"]; table_schema.engine_type_ = resRow["engine_type"]; + + table_schema.nlist_ = resRow["nlist"]; + + table_schema.index_file_size_ = resRow["index_file_size"]; + + table_schema.metric_type_ = resRow["metric_type"]; } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -1152,6 +1160,15 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, try { MetricCollector metric; + + //check table existence + TableSchema table_schema; + table_schema.table_id_ = table_id; + auto status = DescribeTable(table_schema); + if (!status.ok()) { + return status; + } + StoreQueryResult res; { ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab); @@ -1172,16 +1189,12 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, res = filesToMergeQuery.store(); } //Scoped Connection - TableSchema table_schema; - table_schema.table_id_ = table_id; - auto status = DescribeTable(table_schema); - - if (!status.ok()) { - return status; - } - - TableFileSchema table_file; for (auto &resRow : res) { + TableFileSchema table_file; + table_file.file_size_ = resRow["file_size"]; + if(table_file.file_size_ >= table_schema.index_file_size_) { + continue;//skip large file + } table_file.id_ = resRow["id"]; //implicit conversion @@ -1195,8 +1208,6 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, table_file.file_type_ = resRow["file_type"]; - table_file.file_size_ = resRow["file_size"]; - table_file.row_count_ = resRow["row_count"]; table_file.date_ = resRow["date"]; diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index 38f68f8638..f93e421698 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -271,15 +271,25 @@ Status SqliteMetaImpl::DescribeTable(TableSchema &table_schema) { MetricCollector metric; auto groups = ConnectorPtr->select(columns(&TableSchema::id_, + &TableSchema::state_, &TableSchema::dimension_, - &TableSchema::engine_type_), + &TableSchema::created_on_, + &TableSchema::engine_type_, + &TableSchema::nlist_, + &TableSchema::index_file_size_, + &TableSchema::metric_type_), where(c(&TableSchema::table_id_) == table_schema.table_id_ and c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); if (groups.size() == 1) { table_schema.id_ = std::get<0>(groups[0]); - table_schema.dimension_ = std::get<1>(groups[0]); - table_schema.engine_type_ = std::get<2>(groups[0]); + table_schema.state_ = std::get<1>(groups[0]); + table_schema.dimension_ = std::get<2>(groups[0]); + table_schema.created_on_ = std::get<3>(groups[0]); + table_schema.engine_type_ = std::get<4>(groups[0]); + table_schema.nlist_ = std::get<5>(groups[0]); + table_schema.index_file_size_ = std::get<6>(groups[0]); + table_schema.metric_type_ = std::get<7>(groups[0]); } else { return Status::NotFound("Table " + table_schema.table_id_ + " not found"); } @@ -368,7 +378,7 @@ Status SqliteMetaImpl::UpdateTableIndexParam(const std::string &table_id, const table_schema.created_on_ = std::get<3>(tables[0]); table_schema.engine_type_ = index.engine_type_; table_schema.nlist_ = index.nlist_; - table_schema.index_file_size_ = index.index_file_size_; + table_schema.index_file_size_ = index.index_file_size_*ONE_MB; table_schema.metric_type_ = index.metric_type_; ConnectorPtr->update(table_schema); @@ -408,7 +418,7 @@ Status SqliteMetaImpl::DescribeTableIndex(const std::string &table_id, TableInde if (groups.size() == 1) { index.engine_type_ = std::get<0>(groups[0]); index.nlist_ = std::get<1>(groups[0]); - index.index_file_size_ = std::get<2>(groups[0]); + index.index_file_size_ = std::get<2>(groups[0])/ONE_MB; index.metric_type_ = std::get<3>(groups[0]); } else { return Status::NotFound("Table " + table_id + " not found"); @@ -774,6 +784,15 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, try { MetricCollector metric; + //check table existence + TableSchema table_schema; + table_schema.table_id_ = table_id; + auto status = DescribeTable(table_schema); + if (!status.ok()) { + return status; + } + + //get files to merge auto selected = ConnectorPtr->select(columns(&TableFileSchema::id_, &TableFileSchema::table_id_, &TableFileSchema::file_id_, @@ -786,21 +805,17 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, c(&TableFileSchema::table_id_) == table_id), order_by(&TableFileSchema::file_size_).desc()); - TableSchema table_schema; - table_schema.table_id_ = table_id; - auto status = DescribeTable(table_schema); - - if (!status.ok()) { - return status; - } - - TableFileSchema table_file; for (auto &file : selected) { + TableFileSchema table_file; + table_file.file_size_ = std::get<4>(file); + if(table_file.file_size_ >= table_schema.index_file_size_) { + continue;//skip large file + } + table_file.id_ = std::get<0>(file); table_file.table_id_ = std::get<1>(file); table_file.file_id_ = std::get<2>(file); table_file.file_type_ = std::get<3>(file); - table_file.file_size_ = std::get<4>(file); table_file.row_count_ = std::get<5>(file); table_file.date_ = std::get<6>(file); table_file.created_on_ = std::get<7>(file); diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index cae1f0fdfe..f55a0c4c93 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -26,7 +26,7 @@ namespace { constexpr int64_t NQ = 10; constexpr int64_t TOP_K = 10; constexpr int64_t SEARCH_TARGET = 5000; //change this value, result is different - constexpr int64_t ADD_VECTOR_LOOP = 1; + constexpr int64_t ADD_VECTOR_LOOP = 10; constexpr int64_t SECONDS_EACH_HOUR = 3600; #define BLOCK_SPLITER std::cout << "===========================================" << std::endl; diff --git a/cpp/unittest/faiss_wrapper/CMakeLists.txt b/cpp/unittest/faiss_wrapper/CMakeLists.txt deleted file mode 100644 index 10f353f00d..0000000000 --- a/cpp/unittest/faiss_wrapper/CMakeLists.txt +++ /dev/null @@ -1,56 +0,0 @@ -#------------------------------------------------------------------------------- -# Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -# Unauthorized copying of this file, via any medium is strictly prohibited. -# Proprietary and confidential. -#------------------------------------------------------------------------------- -aux_source_directory(${MILVUS_ENGINE_SRC}/wrapper wrapper_src) -aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) - -set(util_files - ${MILVUS_ENGINE_SRC}/utils/ValidationUtil.cpp) - -# Make sure that your call to link_directories takes place before your call to the relevant add_executable. -include_directories(/usr/local/cuda/include) -link_directories("/usr/local/cuda/lib64") - -set(wrapper_test_src - ${unittest_srcs} - ${wrapper_src} - ${config_files} - ${util_files} - ${require_files} - wrapper_test.cpp - ) - -add_executable(wrapper_test ${wrapper_test_src}) - -set(wrapper_libs - stdc++ - boost_system_static - boost_filesystem_static - faiss - cudart - cublas - sqlite - snappy - bz2 - z - zstd - lz4 - ) -if(${BUILD_FAISS_WITH_MKL} STREQUAL "ON") - set(wrapper_libs ${wrapper_libs} ${MKL_LIBS} ${MKL_LIBS}) -else() - set(wrapper_libs ${wrapper_libs} - lapack - openblas) -endif() - -target_link_libraries(wrapper_test ${wrapper_libs} ${unittest_libs}) -add_definitions("-DUNITTEST_ONLY") - -set(topk_test_src - topk_test.cpp - ${CMAKE_SOURCE_DIR}/src/wrapper/gpu/Topk.cu) - -install(TARGETS wrapper_test DESTINATION bin) diff --git a/cpp/unittest/faiss_wrapper/wrapper_test.cpp b/cpp/unittest/faiss_wrapper/wrapper_test.cpp deleted file mode 100644 index 3500166c6b..0000000000 --- a/cpp/unittest/faiss_wrapper/wrapper_test.cpp +++ /dev/null @@ -1,203 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved -// Unauthorized copying of this file, via any medium is strictly prohibited. -// Proprietary and confidential. -//////////////////////////////////////////////////////////////////////////////// - - - -#include "wrapper/Operand.h" -#include "wrapper/Index.h" -#include "wrapper/IndexBuilder.h" -#include "wrapper/FaissGpuResources.h" -#include "server/ServerConfig.h" - -#include -#include -#include - -using namespace zilliz::milvus; -using namespace zilliz::milvus::engine; - - -TEST(operand_test, Wrapper_Test) { - using std::cout; - using std::endl; - - auto opd = std::make_shared(); - opd->index_type = "IVF"; - opd->preproc = "OPQ"; - opd->postproc = "PQ"; - opd->metric_type = "L2"; - opd->d = 64; - - auto opd_str = operand_to_str(opd); - auto new_opd = str_to_operand(opd_str); - - // TODO: fix all place where using opd to build index. - assert(new_opd->get_index_type(10000) == opd->get_index_type(10000)); - auto opd_sq8 = std::make_shared(); - opd_sq8->index_type = "IVFSQ8"; - opd_sq8->preproc = "OPQ"; - opd_sq8->postproc = "PQ"; - opd_sq8->metric_type = "L2"; - opd_sq8->d = 64; - auto opd_str_sq8 = operand_to_str(opd_sq8); - auto new_opd_sq8 = str_to_operand(opd_str_sq8); - assert(new_opd_sq8->get_index_type(10000) == opd_sq8->get_index_type(10000)); - -} - -TEST(build_test, Wrapper_Test) { - // dimension of the vectors to index - int d = 3; - - // make a set of nt training vectors in the unit cube - size_t nt = 10000; - - // a reasonable number of cetroids to index nb vectors - int ncentroids = 16; - - std::random_device rd; - std::mt19937 gen(rd()); - - std::vector xb; - std::vector ids; - - //prepare train data - std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - std::vector xt(nt * d); - for (size_t i = 0; i < nt * d; i++) { - xt[i] = dis_xt(gen); - } - - //train the index - auto opd = std::make_shared(); - opd->index_type = "IVF"; - opd->d = d; - opd->ncent = ncentroids; - IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); - auto index_1 = index_builder_1->build_all(0, xb, ids, nt, xt); - ASSERT_TRUE(index_1 != nullptr); - - // size of the database we plan to index - size_t nb = 100000; - - //prepare raw data - xb.resize(nb); - ids.resize(nb); - for (size_t i = 0; i < nb; i++) { - xb[i] = dis_xt(gen); - ids[i] = i; - } - index_1->add_with_ids(nb, xb.data(), ids.data()); - - //search in first quadrant - int nq = 1, k = 10; - std::vector xq = {0.5, 0.5, 0.5}; - float *result_dists = new float[k]; - long *result_ids = new long[k]; - index_1->search(nq, xq.data(), k, result_dists, result_ids); - - for (int i = 0; i < k; i++) { - if (result_ids[i] < 0) { - ASSERT_TRUE(false); - break; - } - - long id = result_ids[i]; - std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", " - << xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl; - - //makesure result vector is in first quadrant - ASSERT_TRUE(xb[id * 3] > 0.0); - ASSERT_TRUE(xb[id * 3 + 1] > 0.0); - ASSERT_TRUE(xb[id * 3 + 2] > 0.0); - } - - delete[] result_dists; - delete[] result_ids; -} - -TEST(gpu_build_test, Wrapper_Test) { - using std::vector; - - int d = 256; - int nb = 3 * 1000 * 100; - int nq = 100; - vector xb(d * nb); - vector xq(d * nq); - vector ids(nb); - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution<> dis_xt(-1.0, 1.0); - for (auto &e : xb) { e = float(dis_xt(gen)); } - for (auto &e : xq) { e = float(dis_xt(gen)); } - for (int i = 0; i < nb; ++i) { ids[i] = i; } - - auto opd = std::make_shared(); - opd->index_type = "IVF"; - opd->d = d; - opd->ncent = 256; - - IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd); - auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data()); - assert(index_1->ntotal == nb); - assert(index_1->dim == d); - - // sanity check: search 5 first vectors of xb - int k = 1; - vector I(5 * k); - vector D(5 * k); - index_1->search(5, xb.data(), k, D.data(), I.data()); - for (int i = 0; i < 5; ++i) { assert(i == I[i]); } -} - -TEST(gpu_resource_test, Wrapper_Test) { - FaissGpuResources res_mgr; - FaissGpuResources::Ptr& res = res_mgr.GetGpuResources(0); - ASSERT_NE(res, nullptr); - res = res_mgr.GetGpuResources(0); - ASSERT_NE(res, nullptr); - - server::ServerConfig &config = server::ServerConfig::GetInstance(); - server::ConfigNode& server_config = config.GetConfig(server::CONFIG_SERVER); - server_config.SetValue(server::CONFIG_GPU_INDEX, "0"); - res_mgr.SelectGpu(); - int32_t gpu_num = res_mgr.GetGpu(); - ASSERT_EQ(gpu_num, 0); -} - -TEST(index_test, Wrapper_Test) { - std::vector data; - std::vector ids; - long vec_count = 10000; - for(long i = 0; i < vec_count; i++) { - data.push_back(i/3); - data.push_back(i/9); - ids.push_back(i); - } - - faiss::Index* faiss_index = faiss::index_factory(2, "IVF128,SQ8"); - faiss_index->train(vec_count, data.data()); - - std::shared_ptr raw_index(faiss_index); - engine::Index_ptr index = std::make_shared(raw_index); - index->add_with_ids(vec_count, data.data(), ids.data()); - - ASSERT_EQ(index->ntotal, vec_count); - - std::string file_name = "/tmp/index_test.t"; - write_index(index, file_name); - - server::ServerConfig &config = server::ServerConfig::GetInstance(); - server::ConfigNode& engine_config = config.GetConfig(server::CONFIG_ENGINE); - engine_config.SetValue(server::CONFIG_USE_HYBRID_INDEX, "true"); - - Index_ptr index_out = read_index(file_name); - ASSERT_NE(index_out, nullptr); - - bool res = index_out->reset(); - ASSERT_TRUE(res); -} diff --git a/cpp/unittest/server/util_test.cpp b/cpp/unittest/server/util_test.cpp index fb58b954fe..c0b1c83cf8 100644 --- a/cpp/unittest/server/util_test.cpp +++ b/cpp/unittest/server/util_test.cpp @@ -199,12 +199,22 @@ TEST(UtilTest, VALIDATE_DIMENSIONTEST) { ASSERT_EQ(server::ValidationUtil::ValidateTableDimension(1), server::SERVER_SUCCESS); } -TEST(UtilTest, VALIDATE_INDEXTYPE_TEST) { +TEST(UtilTest, VALIDATE_INDEX_TEST) { ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType((int)engine::EngineType::INVALID), server::SERVER_INVALID_INDEX_TYPE); for(int i = 1; i <= (int)engine::EngineType::MAX_VALUE; i++) { ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType(i), server::SERVER_SUCCESS); } ASSERT_EQ(server::ValidationUtil::ValidateTableIndexType((int)engine::EngineType::MAX_VALUE + 1), server::SERVER_INVALID_INDEX_TYPE); + + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexNlist(0), server::SERVER_INVALID_INDEX_NLIST); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexNlist(100), server::SERVER_SUCCESS); + + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexFileSize(0), server::SERVER_INVALID_INDEX_FILE_SIZE); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexFileSize(100), server::SERVER_SUCCESS); + + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexMetricType(0), server::SERVER_INVALID_INDEX_METRIC_TYPE); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexMetricType(1), server::SERVER_SUCCESS); + ASSERT_EQ(server::ValidationUtil::ValidateTableIndexMetricType(2), server::SERVER_SUCCESS); } TEST(ValidationUtilTest, ValidateGpuTest) { From 58a49caf457a05bda41e9f9e05ab84f36851575e Mon Sep 17 00:00:00 2001 From: quicksilver Date: Thu, 22 Aug 2019 19:10:57 +0800 Subject: [PATCH 14/22] update Check_Last_Modify function Former-commit-id: 9e5d2130bb38f34eb381372a67350deda346870d --- cpp/cmake/BuildUtils.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/BuildUtils.cmake b/cpp/cmake/BuildUtils.cmake index 9c8d763853..265cdd0cbc 100644 --- a/cpp/cmake/BuildUtils.cmake +++ b/cpp/cmake/BuildUtils.cmake @@ -1,11 +1,11 @@ # Define a function that check last file modification -function(Check_Last_Modify cache_ignore_file_path working_dir last_modified_commit_id) +function(Check_Last_Modify cache_check_lists_file_path working_dir last_modified_commit_id) if(EXISTS "${working_dir}") - if(EXISTS "${cache_ignore_file_path}") + if(EXISTS "${cache_check_lists_file_path}") set(GIT_LOG_SKIP_NUM 0) set(_MATCH_ALL ON CACHE BOOL "Match all") set(_LOOP_STATUS ON CACHE BOOL "Whether out of loop") - file(STRINGS ${cache_ignore_file_path} CACHE_IGNORE_TXT) + file(STRINGS ${cache_check_lists_file_path} CACHE_IGNORE_TXT) while(_LOOP_STATUS) foreach(_IGNORE_ENTRY ${CACHE_IGNORE_TXT}) if(NOT _IGNORE_ENTRY MATCHES "^[^#]+") From d416d1c6fc09df7f5ef537d1559734931fe16e3a Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 22 Aug 2019 19:16:49 +0800 Subject: [PATCH 15/22] opt metrics Former-commit-id: 3455b1ffc9d401e8e689c5833fb5e1985b3dc7b5 --- cpp/src/metrics/Metrics.cpp | 25 +++++++++---------------- cpp/src/metrics/Metrics.h | 18 ++++-------------- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/cpp/src/metrics/Metrics.cpp b/cpp/src/metrics/Metrics.cpp index 925bb4cd5c..23fb0a15b8 100644 --- a/cpp/src/metrics/Metrics.cpp +++ b/cpp/src/metrics/Metrics.cpp @@ -3,36 +3,29 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ - - #include "Metrics.h" #include "PrometheusMetrics.h" + namespace zilliz { namespace milvus { namespace server { MetricsBase & -Metrics::CreateMetricsCollector(MetricCollectorType collector_type) { - switch (collector_type) { - case MetricCollectorType::PROMETHEUS: - static PrometheusMetrics instance = PrometheusMetrics::GetInstance(); - return instance; - default:return MetricsBase::GetInstance(); - } +Metrics::GetInstance() { + static MetricsBase &instance = CreateMetricsCollector(); + return instance; } MetricsBase & -Metrics::GetInstance() { +Metrics::CreateMetricsCollector() { ConfigNode &config = ServerConfig::GetInstance().GetConfig(CONFIG_METRIC); - std::string collector_typr_str = config.GetValue(CONFIG_METRIC_COLLECTOR); + std::string collector_type_str = config.GetValue(CONFIG_METRIC_COLLECTOR); - if (collector_typr_str == "prometheus") { - return CreateMetricsCollector(MetricCollectorType::PROMETHEUS); - } else if (collector_typr_str == "zabbix") { - return CreateMetricsCollector(MetricCollectorType::ZABBIX); + if (collector_type_str == "prometheus") { + return PrometheusMetrics::GetInstance(); } else { - return CreateMetricsCollector(MetricCollectorType::INVALID); + return MetricsBase::GetInstance(); } } diff --git a/cpp/src/metrics/Metrics.h b/cpp/src/metrics/Metrics.h index be796eb9c4..65df7140cc 100644 --- a/cpp/src/metrics/Metrics.h +++ b/cpp/src/metrics/Metrics.h @@ -5,22 +5,14 @@ ******************************************************************************/ #pragma once -#include "utils/Error.h" -#include -#include - - -#pragma once - #include "MetricBase.h" -//#include "PrometheusMetrics.h" + namespace zilliz { namespace milvus { namespace server { #define METRICS_NOW_TIME std::chrono::system_clock::now() -//#define server::Metrics::GetInstance() server::Metrics::GetInstance() #define METRICS_MICROSECONDS(a, b) (std::chrono::duration_cast (b-a)).count(); enum class MetricCollectorType { @@ -31,15 +23,13 @@ enum class MetricCollectorType { class Metrics { public: - static MetricsBase & - CreateMetricsCollector(MetricCollectorType collector_type); + static MetricsBase &GetInstance(); - static MetricsBase & - GetInstance(); + private: + static MetricsBase &CreateMetricsCollector(); }; - } } } From 30b437582d9453bca5725baf3a0295483d855e0f Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Thu, 22 Aug 2019 22:14:17 +0800 Subject: [PATCH 16/22] add GpuCacheMgr and unittest Former-commit-id: 57e2a59b5039fca56e10c56ee5474472d87faa9a --- cpp/src/cache/Cache.h | 3 ++- cpp/src/cache/CacheMgr.cpp | 19 +++++++++++++++ cpp/src/cache/CacheMgr.h | 2 ++ cpp/src/cache/CpuCacheMgr.h | 1 + cpp/src/cache/GpuCacheMgr.cpp | 38 +++++++++++++++++------------- cpp/src/cache/GpuCacheMgr.h | 19 ++++++++------- cpp/unittest/db/db_tests.cpp | 6 ----- cpp/unittest/server/cache_test.cpp | 21 ++++++++++++++++- 8 files changed, 75 insertions(+), 34 deletions(-) diff --git a/cpp/src/cache/Cache.h b/cpp/src/cache/Cache.h index 606dc9eb07..6151718530 100644 --- a/cpp/src/cache/Cache.h +++ b/cpp/src/cache/Cache.h @@ -46,7 +46,8 @@ public: double freemem_percent() const { return freemem_percent_; }; void set_freemem_percent(double percent) { freemem_percent_ = percent; } - void set_gpu_ids(std::vector gpu_ids) { gpu_ids_.assign(gpu_ids.begin(), gpu_ids.end()); } + void set_gpu_ids(std::vector& gpu_ids) { gpu_ids_ = gpu_ids; } + std::vector gpu_ids() const { return gpu_ids_; } size_t size() const; diff --git a/cpp/src/cache/CacheMgr.cpp b/cpp/src/cache/CacheMgr.cpp index 977c7e1c42..eb3980da61 100644 --- a/cpp/src/cache/CacheMgr.cpp +++ b/cpp/src/cache/CacheMgr.cpp @@ -56,6 +56,7 @@ engine::VecIndexPtr CacheMgr::GetIndex(const std::string& key) { } void CacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) { + std::cout << "dashalk\n"; if(cache_ == nullptr) { SERVER_LOG_ERROR << "Cache doesn't exist"; return; @@ -130,6 +131,24 @@ void CacheMgr::SetCapacity(int64_t capacity) { cache_->set_capacity(capacity); } +std::vector CacheMgr::GpuIds() const { + if(cache_ == nullptr) { + SERVER_LOG_ERROR << "Cache doesn't exist"; + std::vector gpu_ids; + return gpu_ids; + } + + return cache_->gpu_ids(); +} + +void CacheMgr::SetGpuIds(std::vector gpu_ids){ + if(cache_ == nullptr) { + SERVER_LOG_ERROR << "Cache doesn't exist"; + return; + } + cache_->set_gpu_ids(gpu_ids); +} + } } } diff --git a/cpp/src/cache/CacheMgr.h b/cpp/src/cache/CacheMgr.h index b6f1ec8ef1..9abb30b92f 100644 --- a/cpp/src/cache/CacheMgr.h +++ b/cpp/src/cache/CacheMgr.h @@ -33,6 +33,8 @@ public: int64_t CacheUsage() const; int64_t CacheCapacity() const; void SetCapacity(int64_t capacity); + std::vector GpuIds() const; + void SetGpuIds(std::vector gpu_ids); protected: CacheMgr(); diff --git a/cpp/src/cache/CpuCacheMgr.h b/cpp/src/cache/CpuCacheMgr.h index 8b0f98e6b4..39e33aef89 100644 --- a/cpp/src/cache/CpuCacheMgr.h +++ b/cpp/src/cache/CpuCacheMgr.h @@ -16,6 +16,7 @@ private: CpuCacheMgr(); public: + //TODO: use smart pointer instead static CacheMgr* GetInstance() { static CpuCacheMgr s_mgr; return &s_mgr; diff --git a/cpp/src/cache/GpuCacheMgr.cpp b/cpp/src/cache/GpuCacheMgr.cpp index 19be0d0821..eb6b1dbeb7 100644 --- a/cpp/src/cache/GpuCacheMgr.cpp +++ b/cpp/src/cache/GpuCacheMgr.cpp @@ -13,35 +13,39 @@ namespace milvus { namespace cache { std::mutex GpuCacheMgr::mutex_; -std::unordered_map GpuCacheMgr::instance_; +std::unordered_map GpuCacheMgr::instance_; namespace { constexpr int64_t unit = 1024 * 1024 * 1024; + + void parse_gpu_ids(std::string gpu_ids_str, std::vector& gpu_ids) { + for (auto i = 0; i < gpu_ids_str.length(); ) { + if (gpu_ids_str[i] != ',') { + int id = 0; + while (gpu_ids_str[i] <= '9' && gpu_ids_str[i] >= '0') { + id = id * 10 + gpu_ids_str[i] - '0'; + ++i; + } + gpu_ids.push_back(id); + } else { + ++i; + } + } + } } GpuCacheMgr::GpuCacheMgr() { server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1"); - std::vector gpu_ids; - for (auto i = 0; i < gpu_ids_str.length(); ) { - if (gpu_ids_str[i] != ',') { - int id = 0; - while (gpu_ids_str[i] != ',') { - id = id * 10 + gpu_ids_str[i] - '0'; - ++i; - } - gpu_ids.push_back(id); - } else { - ++i; - } - } - cache_->set_gpu_ids(gpu_ids); - - int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 1); + int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 2); cap *= unit; cache_ = std::make_shared(cap, 1UL<<32); + std::vector gpu_ids; + parse_gpu_ids(gpu_ids_str, gpu_ids); + cache_->set_gpu_ids(gpu_ids); + double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85); if (free_percent > 0.0 && free_percent <= 1.0) { cache_->set_freemem_percent(free_percent); diff --git a/cpp/src/cache/GpuCacheMgr.h b/cpp/src/cache/GpuCacheMgr.h index a1d7d4be0d..8c6a0c012c 100644 --- a/cpp/src/cache/GpuCacheMgr.h +++ b/cpp/src/cache/GpuCacheMgr.h @@ -6,33 +6,34 @@ #include "CacheMgr.h" #include +#include namespace zilliz { namespace milvus { namespace cache { +class GpuCacheMgr; +using GpuCacheMgrPtr = std::shared_ptr; + class GpuCacheMgr : public CacheMgr { -private: +public: GpuCacheMgr(); public: static CacheMgr* GetInstance(uint64_t gpu_id) { - if (!instance_[gpu_id]) { + if (instance_.find(gpu_id) == instance_.end()) { std::lock_guard lock(mutex_); - if(!instance_[gpu_id]) { - instance_.insert(std::pair(gpu_id, new GpuCacheMgr())); - } + instance_.insert(std::pair(gpu_id, std::make_shared())); +// instance_[gpu_id] = std::make_shared(); } - return instance_.at(gpu_id); -// static GpuCacheMgr s_mgr; -// return &s_mgr; + return instance_[gpu_id].get(); } void InsertItem(const std::string& key, const DataObjPtr& data) override; private: static std::mutex mutex_; - static std::unordered_map instance_; + static std::unordered_map instance_; }; } diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index f2af4b773a..2426846c15 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -9,7 +9,6 @@ #include "db/meta/MetaConsts.h" #include "db/Factories.h" #include "cache/CpuCacheMgr.h" -#include "cache/GpuCacheMgr.h #include "utils/CommonUtil.h" #include @@ -438,9 +437,4 @@ TEST_F(DBTest2, DELETE_BY_RANGE_TEST) { ConvertTimeRangeToDBDates(start_value, end_value, dates); db_->DeleteTable(TABLE_NAME, dates); -} - -TEST_F(DBTest, GPU_CACHE_MGR_TEST) { - std::vector gpu_ids = cache:: - cache::CpuCacheMgr::GetInstance()->CacheUsage(); } \ No newline at end of file diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index 4d9379dc73..a4e19f0a98 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -146,7 +146,7 @@ TEST(CacheTest, CPU_CACHE_TEST) { } TEST(CacheTest, GPU_CACHE_TEST) { - cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance(); + cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance(0); const int dim = 256; @@ -164,6 +164,25 @@ TEST(CacheTest, GPU_CACHE_TEST) { gpu_mgr->ClearCache(); ASSERT_EQ(gpu_mgr->ItemCount(), 0); + + gpu_mgr->SetCapacity(4096000000); + for (auto i = 0; i < 3; i++) { + MockVecIndex *mock_index = new MockVecIndex(); + mock_index->ntotal_ = 1000000; //2G + engine::VecIndexPtr index(mock_index); + cache::DataObjPtr data_obj = std::make_shared(index); + std::cout << data_obj->size() <InsertItem("index_" + std::to_string(i), data_obj); + } + +// ASSERT_EQ(gpu_mgr->ItemCount(), 2); +// auto obj0 = gpu_mgr->GetItem("index_0"); +// ASSERT_EQ(obj0, nullptr); +// auto obj1 = gpu_mgr->GetItem("index_1"); +// auto obj2 = gpu_mgr->GetItem("index_2"); + gpu_mgr->ClearCache(); + ASSERT_EQ(gpu_mgr->ItemCount(), 0); + } TEST(CacheTest, INVALID_TEST) { From 4b4071f28825340ff6e8ca9beb0a05c3d24198c0 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 09:19:50 +0800 Subject: [PATCH 17/22] rename cache_free_percent to cpu_cache_free_percent Former-commit-id: 782e596e71c9c7e9e36bb0c2977426ec4f3ebb7d --- cpp/src/server/ServerConfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h index b42a585888..6a76399d42 100644 --- a/cpp/src/server/ServerConfig.h +++ b/cpp/src/server/ServerConfig.h @@ -36,7 +36,7 @@ static const char* CONFIG_LOG = "log_config"; static const char* CONFIG_CACHE = "cache_config"; static const char* CONFIG_CPU_CACHE_CAPACITY = "cpu_cache_capacity"; static const char* CONFIG_GPU_CACHE_CAPACITY = "gpu_cache_capacity"; -static const char* CACHE_FREE_PERCENT = "cache_free_percent"; +static const char* CACHE_FREE_PERCENT = "cpu_cache_free_percent"; static const char* CONFIG_INSERT_CACHE_IMMEDIATELY = "insert_cache_immediately"; static const char* CONFIG_GPU_IDS = "gpu_ids"; static const char *GPU_CACHE_FREE_PERCENT = "gpu_cache_free_percent"; From 2c69cc3734d36bd46c69d70d1097800f437a2c36 Mon Sep 17 00:00:00 2001 From: starlord Date: Fri, 23 Aug 2019 10:43:49 +0800 Subject: [PATCH 18/22] fix index nlist bug Former-commit-id: f06514ca1962826241166f4ec55f6a4acd1a0105 --- cpp/src/db/DBImpl.cpp | 9 ++-- cpp/src/db/engine/EngineFactory.cpp | 49 +++--------------- cpp/src/db/engine/EngineFactory.h | 4 +- cpp/src/db/engine/ExecutionEngine.h | 4 ++ cpp/src/db/engine/ExecutionEngineImpl.cpp | 52 ++++++++----------- cpp/src/db/engine/ExecutionEngineImpl.h | 22 +++++--- cpp/src/db/insert/MemTableFile.cpp | 4 +- cpp/src/db/meta/MetaTypes.h | 17 +++++-- cpp/src/db/meta/MySQLMetaImpl.cpp | 28 ++++++++++- cpp/src/db/meta/SqliteMetaImpl.cpp | 56 +++++++++++++-------- cpp/src/db/scheduler/task/IndexLoadTask.cpp | 6 ++- cpp/src/db/scheduler/task/SearchTask.cpp | 14 ++---- cpp/src/db/scheduler/task/SearchTask.h | 3 +- cpp/unittest/db/mem_test.cpp | 10 ++-- 14 files changed, 149 insertions(+), 129 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 07d95fe950..152e59c4c6 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -130,7 +130,7 @@ Status DBImpl::PreloadTable(const std::string &table_id) { for(auto &day_files : files) { for (auto &file : day_files.second) { - ExecutionEnginePtr engine = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_); + ExecutionEnginePtr engine = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_, (MetricType)file.metric_type_, file.nlist_); if(engine == nullptr) { ENGINE_LOG_ERROR << "Invalid engine type"; return Status::Error("Invalid engine type"); @@ -411,7 +411,8 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, //step 2: merge files ExecutionEnginePtr index = - EngineFactory::Build(table_file.dimension_, table_file.location_, (EngineType)table_file.engine_type_); + EngineFactory::Build(table_file.dimension_, table_file.location_, (EngineType)table_file.engine_type_, + (MetricType)table_file.metric_type_, table_file.nlist_); meta::TableFilesSchema updated; long index_size = 0; @@ -613,7 +614,9 @@ Status DBImpl::DropIndex(const std::string& table_id) { } Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { - ExecutionEnginePtr to_index = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_); + ExecutionEnginePtr to_index = + EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_, + (MetricType)file.metric_type_, file.nlist_); if(to_index == nullptr) { ENGINE_LOG_ERROR << "Invalid engine type"; return Status::Error("Invalid engine type"); diff --git a/cpp/src/db/engine/EngineFactory.cpp b/cpp/src/db/engine/EngineFactory.cpp index d09e9f8b97..a326d6a2c6 100644 --- a/cpp/src/db/engine/EngineFactory.cpp +++ b/cpp/src/db/engine/EngineFactory.cpp @@ -4,7 +4,6 @@ * Proprietary and confidential. ******************************************************************************/ #include "EngineFactory.h" -//#include "FaissExecutionEngine.h" #include "ExecutionEngineImpl.h" #include "db/Log.h" @@ -12,61 +11,25 @@ namespace zilliz { namespace milvus { namespace engine { -#if 0 ExecutionEnginePtr EngineFactory::Build(uint16_t dimension, const std::string &location, - EngineType type) { + EngineType index_type, + MetricType metric_type, + int32_t nlist) { - ExecutionEnginePtr execution_engine_ptr; - - switch (type) { - case EngineType::FAISS_IDMAP: { - execution_engine_ptr = - ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, BUILD_INDEX_TYPE_IDMAP, "IDMap,Flat")); - break; - } - - case EngineType::FAISS_IVFFLAT_GPU: { - execution_engine_ptr = - ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, BUILD_INDEX_TYPE_IVF, "IDMap,Flat")); - break; - } - - case EngineType::FAISS_IVFSQ8: { - execution_engine_ptr = - ExecutionEnginePtr(new FaissExecutionEngine(dimension, location, BUILD_INDEX_TYPE_IVFSQ8, "IDMap,Flat")); - break; - } - - default: { - ENGINE_LOG_ERROR << "Unsupported engine type"; - return nullptr; - } - } - - execution_engine_ptr->Init(); - return execution_engine_ptr; -} -#else -ExecutionEnginePtr -EngineFactory::Build(uint16_t dimension, - const std::string &location, - EngineType type) { - - if(type == EngineType::INVALID) { + if(index_type == EngineType::INVALID) { ENGINE_LOG_ERROR << "Unsupported engine type"; return nullptr; } - ENGINE_LOG_DEBUG << "EngineFactory EngineTypee: " << int(type); + ENGINE_LOG_DEBUG << "EngineFactory EngineTypee: " << (int)index_type; ExecutionEnginePtr execution_engine_ptr = - std::make_shared(dimension, location, type); + std::make_shared(dimension, location, index_type, metric_type, nlist); execution_engine_ptr->Init(); return execution_engine_ptr; } -#endif } } diff --git a/cpp/src/db/engine/EngineFactory.h b/cpp/src/db/engine/EngineFactory.h index d8c35468da..7f2047af9b 100644 --- a/cpp/src/db/engine/EngineFactory.h +++ b/cpp/src/db/engine/EngineFactory.h @@ -16,7 +16,9 @@ class EngineFactory { public: static ExecutionEnginePtr Build(uint16_t dimension, const std::string& location, - EngineType type); + EngineType index_type, + MetricType metric_type, + int32_t nlist); }; } diff --git a/cpp/src/db/engine/ExecutionEngine.h b/cpp/src/db/engine/ExecutionEngine.h index 0f2cf42b22..e6b832db0d 100644 --- a/cpp/src/db/engine/ExecutionEngine.h +++ b/cpp/src/db/engine/ExecutionEngine.h @@ -65,6 +65,10 @@ public: virtual Status Cache() = 0; virtual Status Init() = 0; + + virtual EngineType IndexEngineType() const = 0; + + virtual MetricType IndexMetricType() const = 0; }; using ExecutionEnginePtr = std::shared_ptr; diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp index dd38369832..a7188d5b4e 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.cpp +++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp @@ -5,7 +5,6 @@ ******************************************************************************/ #include -#include "src/server/ServerConfig.h" #include "src/metrics/Metrics.h" #include "db/Log.h" #include "utils/CommonUtil.h" @@ -22,26 +21,23 @@ namespace zilliz { namespace milvus { namespace engine { -namespace { -std::string GetMetricType() { - server::ServerConfig &config = server::ServerConfig::GetInstance(); - server::ConfigNode engine_config = config.GetConfig(server::CONFIG_ENGINE); - return engine_config.GetValue(server::CONFIG_METRICTYPE, "L2"); -} -} - ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, const std::string &location, - EngineType type) - : location_(location), dim(dimension), build_type(type) { - current_type = EngineType::FAISS_IDMAP; + EngineType index_type, + MetricType metric_type, + int32_t nlist) + : location_(location), + dim_(dimension), + index_type_(index_type), + metric_type_(metric_type), + nlist_(nlist) { index_ = CreatetVecIndex(EngineType::FAISS_IDMAP); if (!index_) throw Exception("Create Empty VecIndex"); Config build_cfg; build_cfg["dim"] = dimension; - build_cfg["metric_type"] = GetMetricType(); + build_cfg["metric_type"] = (metric_type_ == MetricType::IP) ? "IP" : "L2"; AutoGenParams(index_->GetType(), 0, build_cfg); auto ec = std::static_pointer_cast(index_)->Build(build_cfg); if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } @@ -49,9 +45,14 @@ ExecutionEngineImpl::ExecutionEngineImpl(uint16_t dimension, ExecutionEngineImpl::ExecutionEngineImpl(VecIndexPtr index, const std::string &location, - EngineType type) - : index_(std::move(index)), location_(location), build_type(type) { - current_type = type; + EngineType index_type, + MetricType metric_type, + int32_t nlist) + : index_(std::move(index)), + location_(location), + index_type_(index_type), + metric_type_(metric_type), + nlist_(nlist) { } VecIndexPtr ExecutionEngineImpl::CreatetVecIndex(EngineType type) { @@ -204,15 +205,15 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { ENGINE_LOG_DEBUG << "Build index file: " << location << " from: " << location_; auto from_index = std::dynamic_pointer_cast(index_); - auto to_index = CreatetVecIndex(build_type); + auto to_index = CreatetVecIndex(index_type_); if (!to_index) { throw Exception("Create Empty VecIndex"); } Config build_cfg; build_cfg["dim"] = Dimension(); - build_cfg["metric_type"] = GetMetricType(); - build_cfg["gpu_id"] = gpu_num; + build_cfg["metric_type"] = (metric_type_ == MetricType::IP) ? "IP" : "L2"; + build_cfg["gpu_id"] = gpu_num_; build_cfg["nlist"] = nlist_; AutoGenParams(to_index->GetType(), Count(), build_cfg); @@ -222,7 +223,7 @@ ExecutionEngineImpl::BuildIndex(const std::string &location) { build_cfg); if (ec != server::KNOWHERE_SUCCESS) { throw Exception("Build index error"); } - return std::make_shared(to_index, location, build_type); + return std::make_shared(to_index, location, index_type_, metric_type_, nlist_); } Status ExecutionEngineImpl::Search(long n, @@ -251,16 +252,7 @@ Status ExecutionEngineImpl::Init() { using namespace zilliz::milvus::server; ServerConfig &config = ServerConfig::GetInstance(); ConfigNode server_config = config.GetConfig(CONFIG_SERVER); - gpu_num = server_config.GetInt32Value("gpu_index", 0); - - switch (build_type) { - case EngineType::FAISS_IVFSQ8: - case EngineType::FAISS_IVFFLAT: { - ConfigNode engine_config = config.GetConfig(CONFIG_ENGINE); - nlist_ = engine_config.GetInt32Value(CONFIG_NLIST, 16384); - break; - } - } + gpu_num_ = server_config.GetInt32Value("gpu_index", 0); return Status::OK(); } diff --git a/cpp/src/db/engine/ExecutionEngineImpl.h b/cpp/src/db/engine/ExecutionEngineImpl.h index 948719310c..16f4707c6a 100644 --- a/cpp/src/db/engine/ExecutionEngineImpl.h +++ b/cpp/src/db/engine/ExecutionEngineImpl.h @@ -22,11 +22,15 @@ public: ExecutionEngineImpl(uint16_t dimension, const std::string &location, - EngineType type); + EngineType index_type, + MetricType metric_type, + int32_t nlist); ExecutionEngineImpl(VecIndexPtr index, const std::string &location, - EngineType type); + EngineType index_type, + MetricType metric_type, + int32_t nlist); Status AddWithIds(long n, const float *xdata, const long *xids) override; @@ -61,6 +65,10 @@ public: Status Init() override; + EngineType IndexEngineType() const override { return index_type_; } + + MetricType IndexMetricType() const override { return metric_type_; } + private: VecIndexPtr CreatetVecIndex(EngineType type); @@ -68,14 +76,14 @@ private: protected: VecIndexPtr index_ = nullptr; - EngineType build_type; - EngineType current_type; + EngineType index_type_; + MetricType metric_type_; - int64_t dim; + int64_t dim_; std::string location_; - size_t nlist_ = 0; - int64_t gpu_num = 0; + int32_t nlist_ = 0; + int64_t gpu_num_ = 0; }; diff --git a/cpp/src/db/insert/MemTableFile.cpp b/cpp/src/db/insert/MemTableFile.cpp index 672bd50b00..f8f79c8618 100644 --- a/cpp/src/db/insert/MemTableFile.cpp +++ b/cpp/src/db/insert/MemTableFile.cpp @@ -23,7 +23,9 @@ MemTableFile::MemTableFile(const std::string &table_id, if (status.ok()) { execution_engine_ = EngineFactory::Build(table_file_schema_.dimension_, table_file_schema_.location_, - (EngineType) table_file_schema_.engine_type_); + (EngineType) table_file_schema_.engine_type_, + (MetricType)table_file_schema_.metric_type_, + table_file_schema_.nlist_); } } diff --git a/cpp/src/db/meta/MetaTypes.h b/cpp/src/db/meta/MetaTypes.h index 852a416c88..b0c3376593 100644 --- a/cpp/src/db/meta/MetaTypes.h +++ b/cpp/src/db/meta/MetaTypes.h @@ -17,6 +17,11 @@ namespace milvus { namespace engine { namespace meta { +constexpr int32_t DEFAULT_ENGINE_TYPE = (int)EngineType::FAISS_IDMAP; +constexpr int32_t DEFAULT_NLIST = 16384; +constexpr int32_t DEFAULT_INDEX_FILE_SIZE = 1024*ONE_MB; +constexpr int32_t DEFAULT_METRIC_TYPE = (int)MetricType::L2; + typedef int DateT; const DateT EmptyDate = -1; typedef std::vector DatesT; @@ -32,10 +37,10 @@ struct TableSchema { int32_t state_ = (int)NORMAL; uint16_t dimension_ = 0; int64_t created_on_ = 0; - int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; - int32_t nlist_ = 16384; - int32_t index_file_size_ = 1024*ONE_MB; - int32_t metric_type_ = (int)MetricType::L2; + int32_t engine_type_ = DEFAULT_ENGINE_TYPE; + int32_t nlist_ = DEFAULT_NLIST; + int32_t index_file_size_ = DEFAULT_INDEX_FILE_SIZE; + int32_t metric_type_ = DEFAULT_METRIC_TYPE; }; // TableSchema struct TableFileSchema { @@ -52,7 +57,6 @@ struct TableFileSchema { size_t id_ = 0; std::string table_id_; - int32_t engine_type_ = (int)EngineType::FAISS_IDMAP; std::string file_id_; int32_t file_type_ = NEW; size_t file_size_ = 0; @@ -62,6 +66,9 @@ struct TableFileSchema { std::string location_; int64_t updated_time_ = 0; int64_t created_on_ = 0; + int32_t engine_type_ = DEFAULT_ENGINE_TYPE; + int32_t nlist_ = DEFAULT_NLIST; //not persist to meta + int32_t metric_type_ = DEFAULT_METRIC_TYPE; //not persist to meta }; // TableFileSchema typedef std::vector TableFilesSchema; diff --git a/cpp/src/db/meta/MySQLMetaImpl.cpp b/cpp/src/db/meta/MySQLMetaImpl.cpp index a243630a57..954c498f7f 100644 --- a/cpp/src/db/meta/MySQLMetaImpl.cpp +++ b/cpp/src/db/meta/MySQLMetaImpl.cpp @@ -747,7 +747,7 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { } Query allTablesQuery = connectionPtr->query(); - allTablesQuery << "SELECT id, table_id, dimension, engine_type " << + allTablesQuery << "SELECT id, table_id, dimension, engine_type, nlist, index_file_size, metric_type " << "FROM Tables " << "WHERE state <> " << std::to_string(TableSchema::TO_DELETE) << ";"; @@ -769,6 +769,12 @@ Status MySQLMetaImpl::AllTables(std::vector &table_schema_array) { table_schema.engine_type_ = resRow["engine_type"]; + table_schema.nlist_ = resRow["nlist"]; + + table_schema.index_file_size_ = resRow["index_file_size"]; + + table_schema.metric_type_ = resRow["metric_type"]; + table_schema_array.emplace_back(table_schema); } } catch (const BadQuery &er) { @@ -805,6 +811,8 @@ Status MySQLMetaImpl::CreateTableFile(TableFileSchema &file_schema) { file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; + file_schema.nlist_ = table_schema.nlist_; + file_schema.metric_type_ = table_schema.metric_type_; utils::GetTableFilePath(options_, file_schema); std::string id = "NULL"; //auto-increment @@ -918,6 +926,8 @@ Status MySQLMetaImpl::FilesToIndex(TableFilesSchema &files) { groups[table_file.table_id_] = table_schema; } + table_file.metric_type_ = groups[table_file.table_id_].metric_type_; + table_file.nlist_ = groups[table_file.table_id_].nlist_; table_file.dimension_ = groups[table_file.table_id_].dimension_; utils::GetTableFilePath(options_, table_file); @@ -1010,6 +1020,10 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.engine_type_ = resRow["engine_type"]; + table_file.metric_type_ = table_schema.metric_type_; + + table_file.nlist_ = table_schema.nlist_; + std::string file_id; resRow["file_id"].to_string(file_id); table_file.file_id_ = file_id; @@ -1118,6 +1132,10 @@ Status MySQLMetaImpl::FilesToSearch(const std::string &table_id, table_file.engine_type_ = resRow["engine_type"]; + table_file.metric_type_ = table_schema.metric_type_; + + table_file.nlist_ = table_schema.nlist_; + std::string file_id; resRow["file_id"].to_string(file_id); table_file.file_id_ = file_id; @@ -1214,6 +1232,10 @@ Status MySQLMetaImpl::FilesToMerge(const std::string &table_id, table_file.engine_type_ = resRow["engine_type"]; + table_file.metric_type_ = table_schema.metric_type_; + + table_file.nlist_ = table_schema.nlist_; + table_file.created_on_ = resRow["created_on"]; table_file.dimension_ = table_schema.dimension_; @@ -1293,6 +1315,10 @@ Status MySQLMetaImpl::GetTableFiles(const std::string &table_id, file_schema.engine_type_ = resRow["engine_type"]; + file_schema.metric_type_ = table_schema.metric_type_; + + file_schema.nlist_ = table_schema.nlist_; + std::string file_id; resRow["file_id"].to_string(file_id); file_schema.file_id_ = file_id; diff --git a/cpp/src/db/meta/SqliteMetaImpl.cpp b/cpp/src/db/meta/SqliteMetaImpl.cpp index f93e421698..b4859473ef 100644 --- a/cpp/src/db/meta/SqliteMetaImpl.cpp +++ b/cpp/src/db/meta/SqliteMetaImpl.cpp @@ -218,22 +218,15 @@ Status SqliteMetaImpl::DeleteTable(const std::string& table_id) { std::lock_guard meta_lock(meta_mutex_); //soft delete table - auto tables = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::dimension_, - &TableSchema::engine_type_, - &TableSchema::created_on_), - where(c(&TableSchema::table_id_) == table_id)); - for (auto &table : tables) { - TableSchema table_schema; - table_schema.table_id_ = table_id; - table_schema.state_ = (int)TableSchema::TO_DELETE; - table_schema.id_ = std::get<0>(table); - table_schema.dimension_ = std::get<1>(table); - table_schema.engine_type_ = std::get<2>(table); - table_schema.created_on_ = std::get<3>(table); + ConnectorPtr->update_all( + set( + c(&TableSchema::state_) = (int) TableSchema::TO_DELETE + ), + where( + c(&TableSchema::table_id_) == table_id and + c(&TableSchema::state_) != (int) TableSchema::TO_DELETE + )); - ConnectorPtr->update(table_schema); - } } catch (std::exception &e) { return HandleException("Encounter exception when delete table", e); } @@ -493,16 +486,24 @@ Status SqliteMetaImpl::AllTables(std::vector& table_schema_array) { MetricCollector metric; auto selected = ConnectorPtr->select(columns(&TableSchema::id_, - &TableSchema::table_id_, - &TableSchema::dimension_, - &TableSchema::engine_type_), + &TableSchema::table_id_, + &TableSchema::dimension_, + &TableSchema::created_on_, + &TableSchema::engine_type_, + &TableSchema::nlist_, + &TableSchema::index_file_size_, + &TableSchema::metric_type_), where(c(&TableSchema::state_) != (int)TableSchema::TO_DELETE)); for (auto &table : selected) { TableSchema schema; schema.id_ = std::get<0>(table); schema.table_id_ = std::get<1>(table); - schema.dimension_ = std::get<2>(table); - schema.engine_type_ = std::get<3>(table); + schema.created_on_ = std::get<2>(table); + schema.dimension_ = std::get<3>(table); + schema.engine_type_ = std::get<4>(table); + schema.nlist_ = std::get<5>(table); + schema.index_file_size_ = std::get<6>(table); + schema.metric_type_ = std::get<7>(table); table_schema_array.emplace_back(schema); } @@ -535,6 +536,8 @@ Status SqliteMetaImpl::CreateTableFile(TableFileSchema &file_schema) { file_schema.created_on_ = utils::GetMicroSecTimeStamp(); file_schema.updated_time_ = file_schema.created_on_; file_schema.engine_type_ = table_schema.engine_type_; + file_schema.nlist_ = table_schema.nlist_; + file_schema.metric_type_ = table_schema.metric_type_; //multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); @@ -594,6 +597,8 @@ Status SqliteMetaImpl::FilesToIndex(TableFilesSchema &files) { } groups[table_file.table_id_] = table_schema; } + table_file.metric_type_ = groups[table_file.table_id_].metric_type_; + table_file.nlist_ = groups[table_file.table_id_].nlist_; table_file.dimension_ = groups[table_file.table_id_].dimension_; files.push_back(table_file); } @@ -644,6 +649,8 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.row_count_ = std::get<5>(file); table_file.date_ = std::get<6>(file); table_file.engine_type_ = std::get<7>(file); + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -685,6 +692,8 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.row_count_ = std::get<5>(file); table_file.date_ = std::get<6>(file); table_file.engine_type_ = std::get<7>(file); + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; table_file.dimension_ = table_schema.dimension_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); @@ -762,6 +771,8 @@ Status SqliteMetaImpl::FilesToSearch(const std::string &table_id, table_file.date_ = std::get<6>(file); table_file.engine_type_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); if (dateItr == files.end()) { @@ -820,6 +831,8 @@ Status SqliteMetaImpl::FilesToMerge(const std::string &table_id, table_file.date_ = std::get<6>(file); table_file.created_on_ = std::get<7>(file); table_file.dimension_ = table_schema.dimension_; + table_file.metric_type_ = table_schema.metric_type_; + table_file.nlist_ = table_schema.nlist_; utils::GetTableFilePath(options_, table_file); auto dateItr = files.find(table_file.date_); if (dateItr == files.end()) { @@ -868,8 +881,11 @@ Status SqliteMetaImpl::GetTableFiles(const std::string& table_id, file_schema.row_count_ = std::get<4>(file); file_schema.date_ = std::get<5>(file); file_schema.engine_type_ = std::get<6>(file); + file_schema.metric_type_ = table_schema.metric_type_; + file_schema.nlist_ = table_schema.nlist_; file_schema.created_on_ = std::get<7>(file); file_schema.dimension_ = table_schema.dimension_; + utils::GetTableFilePath(options_, file_schema); table_files.emplace_back(file_schema); diff --git a/cpp/src/db/scheduler/task/IndexLoadTask.cpp b/cpp/src/db/scheduler/task/IndexLoadTask.cpp index 4b242f230d..561bf07f13 100644 --- a/cpp/src/db/scheduler/task/IndexLoadTask.cpp +++ b/cpp/src/db/scheduler/task/IndexLoadTask.cpp @@ -45,7 +45,9 @@ std::shared_ptr IndexLoadTask::Execute() { //step 1: load index ExecutionEnginePtr index_ptr = EngineFactory::Build(file_->dimension_, file_->location_, - (EngineType)file_->engine_type_); + (EngineType)file_->engine_type_, + (MetricType)file_->metric_type_, + file_->nlist_); try { index_ptr->Load(); @@ -75,7 +77,7 @@ std::shared_ptr IndexLoadTask::Execute() { //step 2: return search task for later execution SearchTaskPtr task_ptr = std::make_shared(); task_ptr->index_id_ = file_->id_; - task_ptr->index_type_ = file_->file_type_; + task_ptr->file_type_ = file_->file_type_; task_ptr->index_engine_ = index_ptr; task_ptr->search_contexts_.swap(search_contexts_); return std::static_pointer_cast(task_ptr); diff --git a/cpp/src/db/scheduler/task/SearchTask.cpp b/cpp/src/db/scheduler/task/SearchTask.cpp index fd9d679d5e..4e7c0f4611 100644 --- a/cpp/src/db/scheduler/task/SearchTask.cpp +++ b/cpp/src/db/scheduler/task/SearchTask.cpp @@ -76,20 +76,10 @@ void CollectDurationMetrics(int index_type, double total_time) { } } -std::string GetMetricType() { - server::ServerConfig &config = server::ServerConfig::GetInstance(); - server::ConfigNode& engine_config = config.GetConfig(server::CONFIG_ENGINE); - return engine_config.GetValue(server::CONFIG_METRICTYPE, "L2"); -} - } SearchTask::SearchTask() : IScheduleTask(ScheduleTaskType::kSearch) { - std::string metric_type = GetMetricType(); - if(metric_type != "L2") { - metric_l2 = false; - } } std::shared_ptr SearchTask::Execute() { @@ -104,6 +94,8 @@ std::shared_ptr SearchTask::Execute() { auto start_time = METRICS_NOW_TIME; + bool metric_l2 = (index_engine_->IndexMetricType() == MetricType::L2); + std::vector output_ids; std::vector output_distence; for(auto& context : search_contexts_) { @@ -147,7 +139,7 @@ std::shared_ptr SearchTask::Execute() { auto end_time = METRICS_NOW_TIME; auto total_time = METRICS_MICROSECONDS(start_time, end_time); - CollectDurationMetrics(index_type_, total_time); + CollectDurationMetrics(file_type_, total_time); rc.ElapseFromBegin("totally cost"); diff --git a/cpp/src/db/scheduler/task/SearchTask.h b/cpp/src/db/scheduler/task/SearchTask.h index 034b53d4dc..6010046446 100644 --- a/cpp/src/db/scheduler/task/SearchTask.h +++ b/cpp/src/db/scheduler/task/SearchTask.h @@ -37,10 +37,9 @@ public: public: size_t index_id_ = 0; - int index_type_ = 0; //for metrics + int file_type_ = 0; //for metrics ExecutionEnginePtr index_engine_; std::vector search_contexts_; - bool metric_l2 = true; }; using SearchTaskPtr = std::shared_ptr; diff --git a/cpp/unittest/db/mem_test.cpp b/cpp/unittest/db/mem_test.cpp index fb4796a34a..77a83abc4e 100644 --- a/cpp/unittest/db/mem_test.cpp +++ b/cpp/unittest/db/mem_test.cpp @@ -65,9 +65,13 @@ TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) { engine::VectorSource source(n, vectors.data()); size_t num_vectors_added; - engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_, - table_file_schema.location_, - (engine::EngineType) table_file_schema.engine_type_); + engine::ExecutionEnginePtr execution_engine_ = + engine::EngineFactory::Build(table_file_schema.dimension_, + table_file_schema.location_, + (engine::EngineType) table_file_schema.engine_type_, + (engine::MetricType)table_file_schema.metric_type_, + table_schema.nlist_); + engine::IDNumbers vector_ids; status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added, vector_ids); ASSERT_TRUE(status.ok()); From b576bd6f1fab62eb644720a1d0cf47ff92cf54d7 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 11:15:14 +0800 Subject: [PATCH 19/22] modify GpuCacheMgr GetInsatnce Former-commit-id: 7323d4a63e58a6e70f8d47f6b2fbe9566197013c --- cpp/src/cache/Cache.h | 4 -- cpp/src/cache/CacheMgr.cpp | 19 ---------- cpp/src/cache/CacheMgr.h | 2 - cpp/src/cache/GpuCacheMgr.cpp | 38 +++++++++++-------- cpp/src/cache/GpuCacheMgr.h | 12 ++++-- .../examples/grpcsimple/src/ClientTest.cpp | 3 -- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 23 +++++------ cpp/unittest/server/cache_test.cpp | 2 +- 8 files changed, 44 insertions(+), 59 deletions(-) diff --git a/cpp/src/cache/Cache.h b/cpp/src/cache/Cache.h index 6151718530..4d6f32b9eb 100644 --- a/cpp/src/cache/Cache.h +++ b/cpp/src/cache/Cache.h @@ -46,9 +46,6 @@ public: double freemem_percent() const { return freemem_percent_; }; void set_freemem_percent(double percent) { freemem_percent_ = percent; } - void set_gpu_ids(std::vector& gpu_ids) { gpu_ids_ = gpu_ids; } - - std::vector gpu_ids() const { return gpu_ids_; } size_t size() const; bool exists(const std::string& key); @@ -63,7 +60,6 @@ private: int64_t usage_; int64_t capacity_; double freemem_percent_; - std::vector gpu_ids_; LRU lru_; mutable std::mutex mutex_; diff --git a/cpp/src/cache/CacheMgr.cpp b/cpp/src/cache/CacheMgr.cpp index eb3980da61..977c7e1c42 100644 --- a/cpp/src/cache/CacheMgr.cpp +++ b/cpp/src/cache/CacheMgr.cpp @@ -56,7 +56,6 @@ engine::VecIndexPtr CacheMgr::GetIndex(const std::string& key) { } void CacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) { - std::cout << "dashalk\n"; if(cache_ == nullptr) { SERVER_LOG_ERROR << "Cache doesn't exist"; return; @@ -131,24 +130,6 @@ void CacheMgr::SetCapacity(int64_t capacity) { cache_->set_capacity(capacity); } -std::vector CacheMgr::GpuIds() const { - if(cache_ == nullptr) { - SERVER_LOG_ERROR << "Cache doesn't exist"; - std::vector gpu_ids; - return gpu_ids; - } - - return cache_->gpu_ids(); -} - -void CacheMgr::SetGpuIds(std::vector gpu_ids){ - if(cache_ == nullptr) { - SERVER_LOG_ERROR << "Cache doesn't exist"; - return; - } - cache_->set_gpu_ids(gpu_ids); -} - } } } diff --git a/cpp/src/cache/CacheMgr.h b/cpp/src/cache/CacheMgr.h index 9abb30b92f..b6f1ec8ef1 100644 --- a/cpp/src/cache/CacheMgr.h +++ b/cpp/src/cache/CacheMgr.h @@ -33,8 +33,6 @@ public: int64_t CacheUsage() const; int64_t CacheCapacity() const; void SetCapacity(int64_t capacity); - std::vector GpuIds() const; - void SetGpuIds(std::vector gpu_ids); protected: CacheMgr(); diff --git a/cpp/src/cache/GpuCacheMgr.cpp b/cpp/src/cache/GpuCacheMgr.cpp index eb6b1dbeb7..4aa5626348 100644 --- a/cpp/src/cache/GpuCacheMgr.cpp +++ b/cpp/src/cache/GpuCacheMgr.cpp @@ -4,6 +4,7 @@ // Proprietary and confidential. //////////////////////////////////////////////////////////////////////////////// +#include #include "utils/Log.h" #include "GpuCacheMgr.h" #include "server/ServerConfig.h" @@ -18,34 +19,39 @@ std::unordered_map GpuCacheMgr::instance_; namespace { constexpr int64_t unit = 1024 * 1024 * 1024; - void parse_gpu_ids(std::string gpu_ids_str, std::vector& gpu_ids) { - for (auto i = 0; i < gpu_ids_str.length(); ) { - if (gpu_ids_str[i] != ',') { - int id = 0; - while (gpu_ids_str[i] <= '9' && gpu_ids_str[i] >= '0') { - id = id * 10 + gpu_ids_str[i] - '0'; - ++i; - } - gpu_ids.push_back(id); - } else { - ++i; + std::vector load() { + server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); + std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1"); + + std::vector gpu_ids; + + std::stringstream ss(gpu_ids_str); + for (int i; ss >> i;) { + gpu_ids.push_back(i); + if (ss.peek() == ',') { + ss.ignore(); } } + return gpu_ids; } } + +bool GpuCacheMgr::GpuIdInConfig(uint64_t gpu_id) { + static std::vector ids = load(); + for (auto id : ids) { + if (gpu_id == id) return true; + } + return false; +} + GpuCacheMgr::GpuCacheMgr() { server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE); - std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1"); int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 2); cap *= unit; cache_ = std::make_shared(cap, 1UL<<32); - std::vector gpu_ids; - parse_gpu_ids(gpu_ids_str, gpu_ids); - cache_->set_gpu_ids(gpu_ids); - double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85); if (free_percent > 0.0 && free_percent <= 1.0) { cache_->set_freemem_percent(free_percent); diff --git a/cpp/src/cache/GpuCacheMgr.h b/cpp/src/cache/GpuCacheMgr.h index 8c6a0c012c..f26dfaa1b7 100644 --- a/cpp/src/cache/GpuCacheMgr.h +++ b/cpp/src/cache/GpuCacheMgr.h @@ -19,12 +19,18 @@ class GpuCacheMgr : public CacheMgr { public: GpuCacheMgr(); -public: + static bool GpuIdInConfig(uint64_t gpu_id); + static CacheMgr* GetInstance(uint64_t gpu_id) { if (instance_.find(gpu_id) == instance_.end()) { std::lock_guard lock(mutex_); - instance_.insert(std::pair(gpu_id, std::make_shared())); -// instance_[gpu_id] = std::make_shared(); + if (instance_.find(gpu_id) == instance_.end()) { + if (GpuIdInConfig(gpu_id)) { + instance_.insert(std::pair(gpu_id, std::make_shared())); + } else { + return nullptr; + } + } } return instance_[gpu_id].get(); } diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp index 1e44c0e469..f30a23e174 100644 --- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp +++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp @@ -263,9 +263,6 @@ ClientTest::Test(const std::string& address, const std::string& port) { search_record_array.push_back( std::make_pair(record_ids[SEARCH_TARGET], record_array[SEARCH_TARGET])); } - int64_t row_count; - conn->CountTable(TABLE_NAME, row_count); - std::cout << "\t" << TABLE_NAME << "(" << row_count << " rows)" << std::endl; } } diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index e10d2ae070..8a6ce0c298 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -448,18 +448,19 @@ InsertTask::OnExecute() { // TODO: change to one dimension array in protobuf or use multiple-thread to copy the data for (size_t i = 0; i < insert_param_.row_record_array_size(); i++) { + if (insert_param_.row_record_array(i).vector_data().empty()) { + return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record float array is empty"); + } + uint64_t vec_dim = insert_param_.row_record_array(i).vector_data().size(); + if (vec_dim != table_info.dimension_) { + ServerError error_code = SERVER_INVALID_VECTOR_DIMENSION; + std::string error_msg = "Invalid rowrecord dimension: " + std::to_string(vec_dim) + + " vs. table dimension:" + + std::to_string(table_info.dimension_); + return SetError(error_code, error_msg); + } + //TODO: use memcpy for (size_t j = 0; j < table_info.dimension_; j++) { - if (insert_param_.row_record_array(i).vector_data().empty()) { - return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record float array is empty"); - } - uint64_t vec_dim = insert_param_.row_record_array(i).vector_data().size(); - if (vec_dim != table_info.dimension_) { - ServerError error_code = SERVER_INVALID_VECTOR_DIMENSION; - std::string error_msg = "Invalid rowrecord dimension: " + std::to_string(vec_dim) - + " vs. table dimension:" + - std::to_string(table_info.dimension_); - return SetError(error_code, error_msg); - } vec_f[i * table_info.dimension_ + j] = insert_param_.row_record_array(i).vector_data(j); } } diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp index a4e19f0a98..d52e34143a 100644 --- a/cpp/unittest/server/cache_test.cpp +++ b/cpp/unittest/server/cache_test.cpp @@ -165,8 +165,8 @@ TEST(CacheTest, GPU_CACHE_TEST) { gpu_mgr->ClearCache(); ASSERT_EQ(gpu_mgr->ItemCount(), 0); - gpu_mgr->SetCapacity(4096000000); for (auto i = 0; i < 3; i++) { + // TODO: use gpu index to mock MockVecIndex *mock_index = new MockVecIndex(); mock_index->ntotal_ = 1000000; //2G engine::VecIndexPtr index(mock_index); From e5383064a2c3c96c26a2f275a9a011a399fe171c Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 11:19:15 +0800 Subject: [PATCH 20/22] add changelog Former-commit-id: 57c78623348c789206a5588aaf57f45046e052f9 --- cpp/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 7fc573ad43..e7f1cc74e1 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -40,6 +40,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-394 - Update scheduler unittest - MS-400 - Add timestamp record in task state change function - MS-402 - Add dump implementation for TaskTableItem +- MS-403 - Add GpuCacheMgr ## New Feature - MS-343 - Implement ResourceMgr From 5264144ddbdfdcb641658f557f39802bb050bd5a Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Fri, 23 Aug 2019 11:27:21 +0800 Subject: [PATCH 21/22] optimize grpc insert Former-commit-id: 1f9d5ee34055303208c48e745d1252ae3c0b60a8 --- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 30 ++++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index db96ad6202..36462e4d06 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -428,20 +428,21 @@ InsertTask::OnExecute() { // TODO: change to one dimension array in protobuf or use multiple-thread to copy the data for (size_t i = 0; i < insert_param_.row_record_array_size(); i++) { - for (size_t j = 0; j < table_info.dimension_; j++) { - if (insert_param_.row_record_array(i).vector_data().empty()) { - return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record float array is empty"); - } - uint64_t vec_dim = insert_param_.row_record_array(i).vector_data().size(); - if (vec_dim != table_info.dimension_) { - ServerError error_code = SERVER_INVALID_VECTOR_DIMENSION; - std::string error_msg = "Invalid rowrecord dimension: " + std::to_string(vec_dim) - + " vs. table dimension:" + - std::to_string(table_info.dimension_); - return SetError(error_code, error_msg); - } - vec_f[i * table_info.dimension_ + j] = insert_param_.row_record_array(i).vector_data(j); + if (insert_param_.row_record_array(i).vector_data().empty()) { + return SetError(SERVER_INVALID_ROWRECORD_ARRAY, "Row record float array is empty"); } + uint64_t vec_dim = insert_param_.row_record_array(i).vector_data().size(); + if (vec_dim != table_info.dimension_) { + ServerError error_code = SERVER_INVALID_VECTOR_DIMENSION; + std::string error_msg = "Invalid rowrecord dimension: " + std::to_string(vec_dim) + + " vs. table dimension:" + + std::to_string(table_info.dimension_); + return SetError(error_code, error_msg); + } + + memcpy(&vec_f[i * table_info.dimension_], + insert_param_.row_record_array(i).vector_data().data(), + table_info.dimension_ * sizeof(float)); } rc.ElapseFromBegin("prepare vectors data"); @@ -453,8 +454,7 @@ InsertTask::OnExecute() { vec_ids[i] = insert_param_.row_id_array(i); } - stat = DBWrapper::DB()->InsertVectors(insert_param_.table_name(), vec_count, vec_f.data(), - vec_ids); + stat = DBWrapper::DB()->InsertVectors(insert_param_.table_name(), vec_count, vec_f.data(), vec_ids); rc.ElapseFromBegin("add vectors to engine"); if (!stat.ok()) { return SetError(SERVER_CACHE_ERROR, "Cache error: " + stat.ToString()); From 8e7d902a847d08d9a7ac44f712968cab2dfeb636 Mon Sep 17 00:00:00 2001 From: Yu Kun Date: Fri, 23 Aug 2019 13:10:46 +0800 Subject: [PATCH 22/22] use memcpy in insert Former-commit-id: fd1de2eca25ffc38481a49c8da914d4cb6ae7449 --- cpp/src/server/grpc_impl/GrpcRequestTask.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp index 8a6ce0c298..49dfd07330 100644 --- a/cpp/src/server/grpc_impl/GrpcRequestTask.cpp +++ b/cpp/src/server/grpc_impl/GrpcRequestTask.cpp @@ -459,10 +459,8 @@ InsertTask::OnExecute() { std::to_string(table_info.dimension_); return SetError(error_code, error_msg); } - //TODO: use memcpy - for (size_t j = 0; j < table_info.dimension_; j++) { - vec_f[i * table_info.dimension_ + j] = insert_param_.row_record_array(i).vector_data(j); - } + memcpy(static_cast(&vec_f[i * table_info.dimension_]), static_cast(insert_param_.row_record_array(i).vector_data().data()), + table_info.dimension_ * sizeof(float)); } rc.ElapseFromBegin("prepare vectors data");