From 495c8d4dd6a63581645029e1d7d7a3861a7d20fb Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 13:02:02 +0800 Subject: [PATCH] refactor(db): change rows to size in group file schema Former-commit-id: 4386308bcfa1bd7136809c7e749e140b53517ca7 --- cpp/src/db/DBImpl.cpp | 4 +-- cpp/src/db/DBMetaImpl.cpp | 46 ++++++++++++++--------------- cpp/src/db/FaissExecutionEngine.cpp | 4 +-- cpp/src/db/MemManager.cpp | 9 +++--- cpp/src/db/MetaTypes.h | 2 +- cpp/src/db/Options.h | 2 +- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 971e6daff6..3673bc4ad3 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -292,7 +292,7 @@ Status DBImpl::merge_files(const std::string& group_id, const meta::Dat } else { group_file.file_type = meta::GroupFileSchema::RAW; } - group_file.rows = index_size; + group_file.size = index_size; updated.push_back(group_file); status = _pMeta->update_files(updated); LOG(DEBUG) << "New merged file " << group_file.file_id << @@ -353,7 +353,7 @@ Status DBImpl::build_index(const meta::GroupFileSchema& file) { auto index = to_index.BuildIndex(group_file.location); group_file.file_type = meta::GroupFileSchema::INDEX; - group_file.rows = index->Size(); + group_file.size = index->Size(); auto to_remove = file; to_remove.file_type = meta::GroupFileSchema::TO_DELETE; diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index d8e4d9bfd9..825d33032b 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -38,7 +38,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("group_id", &GroupFileSchema::group_id), make_column("file_id", &GroupFileSchema::file_id), make_column("file_type", &GroupFileSchema::file_type), - make_column("rows", &GroupFileSchema::rows, default_value(0)), + make_column("size", &GroupFileSchema::size, default_value(0)), make_column("updated_time", &GroupFileSchema::updated_time), make_column("created_on", &GroupFileSchema::created_on), make_column("date", &GroupFileSchema::date)) @@ -227,7 +227,7 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_type = GroupFileSchema::NEW; group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; - group_file.rows = 0; + group_file.size = 0; group_file.created_on = utils::GetMicroSecTimeStamp(); group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); @@ -263,7 +263,7 @@ Status DBMetaImpl::files_to_index(GroupFilesSchema& files) { &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_INDEX)); @@ -275,7 +275,7 @@ Status DBMetaImpl::files_to_index(GroupFilesSchema& files) { group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); GetGroupFilePath(group_file); auto groupItr = groups.find(group_file.group_id); @@ -311,7 +311,7 @@ Status DBMetaImpl::files_to_search(const std::string &group_id, &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::group_id) == group_id and in(&GroupFileSchema::date, dates) and @@ -332,7 +332,7 @@ Status DBMetaImpl::files_to_search(const std::string &group_id, group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); group_file.dimension = group_info.dimension; GetGroupFilePath(group_file); @@ -359,7 +359,7 @@ Status DBMetaImpl::files_to_merge(const std::string& group_id, &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::RAW and c(&GroupFileSchema::group_id) == group_id)); @@ -377,7 +377,7 @@ Status DBMetaImpl::files_to_merge(const std::string& group_id, group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); group_file.dimension = group_info.dimension; GetGroupFilePath(group_file); @@ -410,7 +410,7 @@ Status DBMetaImpl::get_group_file(const std::string& group_id_, &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_id) == file_id_ and c(&GroupFileSchema::group_id) == group_id_ @@ -421,7 +421,7 @@ Status DBMetaImpl::get_group_file(const std::string& group_id_, group_file_info_.group_id = std::get<1>(files[0]); group_file_info_.file_id = std::get<2>(files[0]); group_file_info_.file_type = std::get<3>(files[0]); - group_file_info_.rows = std::get<4>(files[0]); + group_file_info_.size = std::get<4>(files[0]); group_file_info_.date = std::get<5>(files[0]); } else { return Status::NotFound("GroupFile " + file_id_ + " not found"); @@ -473,8 +473,8 @@ Status DBMetaImpl::archive_files() { long sum = 0; size(sum); - // PXU TODO: refactor rows - auto to_delete = (sum - limit*G)/sizeof(float); + // PXU TODO: refactor size + auto to_delete = (sum - limit*G); discard_files_of_size(to_delete); } } @@ -485,7 +485,7 @@ Status DBMetaImpl::archive_files() { Status DBMetaImpl::size(long& result) { result = 0; try { - auto selected = ConnectorPtr->select(columns(sum(&GroupFileSchema::rows)), + auto selected = ConnectorPtr->select(columns(sum(&GroupFileSchema::size)), where( c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); @@ -494,7 +494,7 @@ Status DBMetaImpl::size(long& result) { if(!std::get<0>(sub_query)) { continue; } - result += (long)(*std::get<0>(sub_query))*sizeof(float); + result += (long)(*std::get<0>(sub_query)); } } catch (std::exception & e) { LOG(DEBUG) << e.what(); @@ -511,7 +511,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, - &GroupFileSchema::rows), + &GroupFileSchema::size), where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), order_by(&GroupFileSchema::id), limit(10)); @@ -521,10 +521,10 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { if (to_discard_size <= 0) break; GroupFileSchema group_file; group_file.id = std::get<0>(file); - group_file.rows = std::get<1>(file); + group_file.size = std::get<1>(file); ids.push_back(group_file.id); - LOG(DEBUG) << "Discard group_file.id=" << group_file.id << " group_file.rows=" << group_file.rows; - to_discard_size -= group_file.rows; + LOG(DEBUG) << "Discard group_file.id=" << group_file.id << " group_file.size=" << group_file.size; + to_discard_size -= group_file.size; } if (ids.size() == 0) { @@ -586,7 +586,7 @@ Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_DELETE and c(&GroupFileSchema::updated_time) > now - seconds*US_PS)); @@ -599,7 +599,7 @@ Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); GetGroupFilePath(group_file); if (group_file.file_type == GroupFileSchema::TO_DELETE) { @@ -622,7 +622,7 @@ Status DBMetaImpl::cleanup() { &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_DELETE or c(&GroupFileSchema::file_type) == (int)GroupFileSchema::NEW)); @@ -635,7 +635,7 @@ Status DBMetaImpl::cleanup() { group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); GetGroupFilePath(group_file); if (group_file.file_type == GroupFileSchema::TO_DELETE) { @@ -655,7 +655,7 @@ Status DBMetaImpl::cleanup() { Status DBMetaImpl::count(const std::string& group_id, long& result) { try { - auto selected = ConnectorPtr->select(columns(&GroupFileSchema::rows, + auto selected = ConnectorPtr->select(columns(&GroupFileSchema::size, &GroupFileSchema::date), where((c(&GroupFileSchema::file_type) == (int)GroupFileSchema::RAW or c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_INDEX or diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index 605b979481..c2165c948d 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -47,12 +47,12 @@ size_t FaissExecutionEngine::Count() const { template size_t FaissExecutionEngine::Size() const { - return (size_t)(Count() * pIndex_->d); + return (size_t)(Count() * pIndex_->d)*sizeof(float); } template size_t FaissExecutionEngine::PhysicalSize() const { - return (size_t)(Size()*sizeof(float)); + return (size_t)(Count() * pIndex_->d)*sizeof(float); } template diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index ede2a64522..601146ba66 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -13,6 +13,7 @@ #include "MemManager.h" #include "Meta.h" +#include "MetaConsts.h" namespace zilliz { @@ -48,16 +49,16 @@ size_t MemVectors::approximate_size() const { template Status MemVectors::serialize(std::string& group_id) { group_id = schema_.group_id; - auto rows = approximate_size(); + auto size = approximate_size(); pEE_->Serialize(); - schema_.rows = rows; - schema_.file_type = (rows >= options_.index_trigger_size) ? + schema_.size = size; + schema_.file_type = (size >= options_.index_trigger_size) ? meta::GroupFileSchema::TO_INDEX : meta::GroupFileSchema::RAW; auto status = pMeta_->update_group_file(schema_); LOG(DEBUG) << "New " << ((schema_.file_type == meta::GroupFileSchema::RAW) ? "raw" : "to_index") - << " file " << schema_.file_id << " of size " << pEE_->PhysicalSize() / (1024*1024) << " M"; + << " file " << schema_.file_id << " of size " << pEE_->Size() / meta::M << " M"; pEE_->Cache(); diff --git a/cpp/src/db/MetaTypes.h b/cpp/src/db/MetaTypes.h index fd2038eeb6..4b956590ae 100644 --- a/cpp/src/db/MetaTypes.h +++ b/cpp/src/db/MetaTypes.h @@ -40,7 +40,7 @@ struct GroupFileSchema { std::string group_id; std::string file_id; int file_type = NEW; - size_t rows; + size_t size; DateT date = EmptyDate; uint16_t dimension; std::string location = ""; diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 26608ab976..100ab7b3ab 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -42,7 +42,7 @@ struct Options { Options(); uint16_t memory_sync_interval = 1; uint16_t merge_trigger_number = 2; - size_t index_trigger_size = 1024*1024*256; + size_t index_trigger_size = 1024*1024*1024; Env* env; DBMetaOptions meta; }; // Options