From 5a406d6ea03031cc5af394ad3fcbd1a2b5c000ec Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 19 May 2019 21:50:24 +0800 Subject: [PATCH 01/56] feat(db): add simple delete partitions Former-commit-id: 61aeee06c7d928939e6d3d28013630464cf0e046 --- cpp/src/db/DB.h | 2 ++ cpp/src/db/DBImpl.cpp | 6 ++++++ cpp/src/db/DBImpl.h | 1 + cpp/src/db/DBMetaImpl.cpp | 38 ++++++++++++++++++++++++++++++++++ cpp/src/db/DBMetaImpl.h | 2 ++ cpp/src/db/LocalMetaImpl.h | 4 +++- cpp/src/db/Meta.cpp | 21 ++++++++++++++++--- cpp/src/db/Meta.h | 6 ++++-- cpp/unittest/db/meta_tests.cpp | 5 +++++ 9 files changed, 79 insertions(+), 6 deletions(-) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 450a980c4a..6f922475b5 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -23,6 +23,8 @@ public: virtual Status add_group(meta::GroupSchema& group_info_) = 0; virtual Status get_group(meta::GroupSchema& group_info_) = 0; + virtual Status delete_vectors(const std::string& group_id, + const meta::DatesT& dates) = 0; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) = 0; virtual Status get_group_files(const std::string& group_id_, const int date_delta_, diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 216a9b352d..72b053231b 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -44,6 +44,12 @@ Status DBImpl::get_group(meta::GroupSchema& group_info) { return _pMeta->get_group(group_info); } +template +Status DBImpl::delete_vectors(const std::string& group_id, + const meta::DatesT& dates) { + return _pMeta->delete_group_partitions(group_id, dates); +} + template Status DBImpl::has_group(const std::string& group_id_, bool& has_or_not_) { return _pMeta->has_group(group_id_, has_or_not_); diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index d2aed0af1d..45f249bd06 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -35,6 +35,7 @@ public: virtual Status add_group(meta::GroupSchema& group_info) override; virtual Status get_group(meta::GroupSchema& group_info) override; + virtual Status delete_vectors(const std::string& group_id, const meta::DatesT& dates) override; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; virtual Status get_group_files(const std::string& group_id_, diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index aaaaf21ce4..b15180817d 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -104,6 +104,44 @@ Status DBMetaImpl::initialize() { return Status::OK(); } +// PXU TODO: Temp solution. Will fix later +Status DBMetaImpl::delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) { + if (dates.size() == 0) { + return Status::OK(); + } + + GroupSchema group_info; + group_info.group_id = group_id; + auto status = get_group(group_info); + if (!status.ok()) { + return status; + } + + auto yesterday = GetDate(-2); + + for (auto& date : dates) { + if (date >= yesterday) { + return Status::Error("Could not delete partitions with 2 days"); + } + } + + try { + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + c(&GroupFileSchema::group_id) == group_id and + in(&GroupFileSchema::date, dates) + )); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + return Status::OK(); +} + Status DBMetaImpl::add_group(GroupSchema& group_info) { if (group_info.group_id == "") { std::stringstream ss; diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index aca0ec3141..fab2e1560a 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -24,6 +24,8 @@ public: virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; virtual Status add_group_file(GroupFileSchema& group_file_info) override; + virtual Status delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) override; virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 4c324c5796..116c9e4672 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -22,7 +22,9 @@ public: virtual Status get_group(GroupSchema& group_info_) override; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; - virtual Status add_group_file(GroupFileSchema& group_file_info) = 0; + virtual Status add_group_file(GroupFileSchema& group_file_info) override; + /* virtual Status delete_group_partitions(const std::string& group_id, */ + /* const meta::DatesT& dates) override; */ virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, diff --git a/cpp/src/db/Meta.cpp b/cpp/src/db/Meta.cpp index 1b97c06c79..1bd3a20622 100644 --- a/cpp/src/db/Meta.cpp +++ b/cpp/src/db/Meta.cpp @@ -11,13 +11,28 @@ namespace vecwise { namespace engine { namespace meta { -DateT Meta::GetDate(const std::time_t& t) { +DateT Meta::GetDate(const std::time_t& t, int day_delta) { tm *ltm = std::localtime(&t); + if (day_delta > 0) { + do { + ++ltm->tm_mday; + --day_delta; + } while(day_delta > 0); + mktime(ltm); + } else if (day_delta < 0) { + do { + --ltm->tm_mday; + ++day_delta; + } while(day_delta < 0); + mktime(ltm); + } else { + ltm->tm_mday; + } return ltm->tm_year*10000 + ltm->tm_mon*100 + ltm->tm_mday; } -DateT Meta::GetDate() { - return GetDate(std::time(nullptr)); +DateT Meta::GetDate(int day_delta) { + return GetDate(std::time(nullptr), day_delta); } } // namespace meta diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index e0c1a84c76..ffc35b0606 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -67,6 +67,8 @@ public: virtual Status has_group(const std::string& group_id_, bool& has_or_not_) = 0; virtual Status add_group_file(GroupFileSchema& group_file_info) = 0; + virtual Status delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) = 0; virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, @@ -98,8 +100,8 @@ public: virtual Status count(const std::string& group_id, long& result) = 0; - static DateT GetDate(const std::time_t& t); - static DateT GetDate(); + static DateT GetDate(const std::time_t& t, int day_delta); + static DateT GetDate(int day_delta = 0); }; // MetaData diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 2ede539803..dc11feafad 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -59,6 +59,11 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(status.ok()); ASSERT_EQ(group_file.file_type, new_file_type); + meta::DatesT dates; + dates.push_back(meta::Meta::GetDate()); + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_FALSE(status.ok()); + /* group_file.file_type = meta::GroupFileSchema::NEW; */ /* status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); */ /* ASSERT_TRUE(status.ok()); */ From 36cc019f2b1905b95fa32a250effab77d9a4835b Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 10:47:59 +0800 Subject: [PATCH 02/56] fix(db): get group bug fix Former-commit-id: 26769b5f33531210ab4a7810a1dbd6ed78310583 --- cpp/src/db/DBMetaImpl.cpp | 2 +- cpp/src/db/Meta.cpp | 6 +++++- cpp/src/db/Meta.h | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index b15180817d..c48ed3c421 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -118,7 +118,7 @@ Status DBMetaImpl::delete_group_partitions(const std::string& group_id, return status; } - auto yesterday = GetDate(-2); + auto yesterday = GetDateWithDelta(-2); for (auto& date : dates) { if (date >= yesterday) { diff --git a/cpp/src/db/Meta.cpp b/cpp/src/db/Meta.cpp index 1bd3a20622..31fdee74a4 100644 --- a/cpp/src/db/Meta.cpp +++ b/cpp/src/db/Meta.cpp @@ -31,10 +31,14 @@ DateT Meta::GetDate(const std::time_t& t, int day_delta) { return ltm->tm_year*10000 + ltm->tm_mon*100 + ltm->tm_mday; } -DateT Meta::GetDate(int day_delta) { +DateT Meta::GetDateWithDelta(int day_delta) { return GetDate(std::time(nullptr), day_delta); } +DateT Meta::GetDate() { + return GetDate(std::time(nullptr), 0); +} + } // namespace meta } // namespace engine } // namespace vecwise diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index ffc35b0606..6d2abb8dee 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -100,8 +100,9 @@ public: virtual Status count(const std::string& group_id, long& result) = 0; - static DateT GetDate(const std::time_t& t, int day_delta); - static DateT GetDate(int day_delta = 0); + static DateT GetDate(const std::time_t& t, int day_delta = 0); + static DateT GetDate(); + static DateT GetDateWithDelta(int day_delta); }; // MetaData From 781683419863f9228642d94f91746ad280f9fbdc Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 11:33:10 +0800 Subject: [PATCH 03/56] feat(db): add get group file api Former-commit-id: f01dabd02174e69672a29e8dd5d27ec18f915089 --- cpp/src/db/DBMetaImpl.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index c48ed3c421..3552bf2050 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -118,7 +118,7 @@ Status DBMetaImpl::delete_group_partitions(const std::string& group_id, return status; } - auto yesterday = GetDateWithDelta(-2); + auto yesterday = GetDateWithDelta(-1); for (auto& date : dates) { if (date >= yesterday) { @@ -413,7 +413,32 @@ Status DBMetaImpl::has_group_file(const std::string& group_id_, Status DBMetaImpl::get_group_file(const std::string& group_id_, const std::string& file_id_, GroupFileSchema& group_file_info_) { - //PXU TODO + try { + auto files = ConnectorPtr->select(columns(&GroupFileSchema::id, + &GroupFileSchema::group_id, + &GroupFileSchema::file_id, + &GroupFileSchema::file_type, + &GroupFileSchema::rows, + &GroupFileSchema::date), + where(c(&GroupFileSchema::file_id) == file_id_ and + c(&GroupFileSchema::group_id) == group_id_ + )); + assert(files.size() <= 1); + if (files.size() == 1) { + group_file_info_.id = std::get<0>(files[0]); + group_file_info_.group_id = std::get<1>(files[0]); + group_file_info_.file_id = std::get<2>(files[0]); + group_file_info_.file_type = std::get<3>(files[0]); + group_file_info_.rows = std::get<4>(files[0]); + group_file_info_.date = std::get<5>(files[0]); + } else { + return Status::NotFound("GroupFile " + file_id_ + " not found"); + } + } catch (std::exception &e) { + LOG(DEBUG) << e.what(); + throw e; + } + return Status::OK(); } From 53452841d155fb22bfb933fdee3586a85a95813d Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 11:33:26 +0800 Subject: [PATCH 04/56] test(db): test delete group partitions Former-commit-id: b8575a928d8daaf91b3420e03cf382566eb841dd --- cpp/unittest/db/meta_tests.cpp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index dc11feafad..83177fea19 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -64,10 +64,26 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { status = impl_->delete_group_partitions(group_file.group_id, dates); ASSERT_FALSE(status.ok()); - /* group_file.file_type = meta::GroupFileSchema::NEW; */ - /* status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); */ - /* ASSERT_TRUE(status.ok()); */ - /* ASSERT_EQ(group_file.file_type, new_file_type); */ + dates.clear(); + for (auto i=2; i < 10; ++i) { + dates.push_back(meta::Meta::GetDateWithDelta(-1*i)); + } + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_TRUE(status.ok()); + + group_file.date = meta::Meta::GetDateWithDelta(-2); + status = impl_->update_group_file(group_file); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(group_file.date, meta::Meta::GetDateWithDelta(-2)); + ASSERT_FALSE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); + + dates.clear(); + dates.push_back(group_file.date); + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_TRUE(status.ok()); + status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); + ASSERT_TRUE(status.ok()); + ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } TEST_F(MetaTest, GROUP_FILES_TEST) { From 27b2f8efbaf4595b9b9f84c69cd3d7ad21cda274 Mon Sep 17 00:00:00 2001 From: groot Date: Thu, 23 May 2019 10:46:20 +0800 Subject: [PATCH 05/56] prepare for gpu index Former-commit-id: c8f58a927afbb53934ec326bcdaa48267bec68c0 --- .gitignore | 4 ---- cpp/.gitignore | 6 ++++++ cpp/conf/server_config_template.yaml | 19 +++++++++++++++++++ cpp/src/CMakeLists.txt | 1 + 4 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 cpp/.gitignore create mode 100644 cpp/conf/server_config_template.yaml diff --git a/.gitignore b/.gitignore index d239dd72a6..c0b8dbb2d7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,3 @@ cmake_build *.lo *.tar.gz *.log - -cpp/third_party/thrift-0.12.0/ -cpp/third_party/faiss-1.5.1 -cpp/megasearch/ diff --git a/cpp/.gitignore b/cpp/.gitignore new file mode 100644 index 0000000000..e99e0273f3 --- /dev/null +++ b/cpp/.gitignore @@ -0,0 +1,6 @@ +third_party/thrift-0.12.0/ +third_party/faiss-1.5.1/ +third_party/bzip2-1.0.6/ +third_party/sqlite3/ +megasearch/ +conf/server_config.yaml diff --git a/cpp/conf/server_config_template.yaml b/cpp/conf/server_config_template.yaml new file mode 100644 index 0000000000..fb6f6beae2 --- /dev/null +++ b/cpp/conf/server_config_template.yaml @@ -0,0 +1,19 @@ +server_config: + address: 0.0.0.0 + port: 33001 + transfer_protocol: json #optional: binary, compact, json, debug + server_mode: thread_pool #optional: simple, thread_pool + gpu_index: 0 #which gpu to be used + +db_config: + db_path: /tmp/vecwise + db_backend_url: http://127.0.0.1 + db_flush_interval: 5 #unit: second + idmapper_max_open_file: 128 + +license_config: + license_path: "/tmp/system.license" + +cache_config: + cpu_cache_capacity: 16 # unit: GB + gpu_cache_capacity: 2 # unit: GB \ No newline at end of file diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index d7978db37d..a3d9effb46 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -123,6 +123,7 @@ if (ENABLE_LICENSE STREQUAL "ON") add_executable(get_sys_info ${get_sys_info_src}) target_link_libraries(get_sys_info ${license_libs} vecwise_license) target_link_libraries(license_generator ${license_libs}) + install(TARGETS get_sys_info DESTINATION bin) endif () install(TARGETS vecwise_server DESTINATION bin) \ No newline at end of file From d07379e51eea1c833d05f02d28a1a148bf3d0722 Mon Sep 17 00:00:00 2001 From: groot Date: Thu, 23 May 2019 11:01:11 +0800 Subject: [PATCH 06/56] prepare for gpu index Former-commit-id: 3ee5d9b3575407dbc8226cc0e4df634386cdf61f --- cpp/conf/server_config.yaml | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index 40d10d0a5a..fb6f6beae2 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -1,45 +1,18 @@ server_config: address: 0.0.0.0 port: 33001 - transfer_protocol: json #optional: binary, compact, json, debug + transfer_protocol: json #optional: binary, compact, json, debug server_mode: thread_pool #optional: simple, thread_pool + gpu_index: 0 #which gpu to be used db_config: db_path: /tmp/vecwise db_backend_url: http://127.0.0.1 - db_flush_interval: 5 #unit: second + db_flush_interval: 5 #unit: second idmapper_max_open_file: 128 license_config: - license_path: "/home/jinhai/Documents/development/vecwise_engine/license/system.license" - -log_config: - global: - format: "%datetime | %level | %logger | %msg" - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-global.log" - enabled: true - to_file: true - to_standard_output: true - subsecond_precision: 3 - performance_tracking: false - max_log_file_size: 2097152 # throw log files away after 2mb - debug: - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-debug.log" - enabled: true - warning: - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-warning.log" - trace: - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-trace.log" - verbose: - format: "%datetime{%d/%m/%y} | %level-%vlevel | %msg" - to_file: false - to_standard_output: true - error: - enabled: false - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-error.log" - fatal: - enabled: false - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-fatal.log" + license_path: "/tmp/system.license" cache_config: cpu_cache_capacity: 16 # unit: GB From bbeb77dcae3f08be3a715ba3f4c9dd8565ff0bf7 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 18:58:55 +0800 Subject: [PATCH 07/56] feat(db): add Archive conf in options Former-commit-id: fdae1cf2a4f86d12753ce6c4cc665178df0c1f76 --- cpp/src/db/Options.cpp | 44 +++++++++++++++++++++++++++++++++++ cpp/src/db/Options.h | 17 ++++++++++++++ cpp/unittest/db/db_tests.cpp | 45 ++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/cpp/src/db/Options.cpp b/cpp/src/db/Options.cpp index 2a0c01af8b..2a15f0722d 100644 --- a/cpp/src/db/Options.cpp +++ b/cpp/src/db/Options.cpp @@ -3,6 +3,11 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ +#include +#include +#include +#include + #include "Options.h" #include "Env.h" #include "DBMetaImpl.h" @@ -15,6 +20,45 @@ Options::Options() : env(Env::Default()) { } +ArchiveConf::ArchiveConf(const std::string& type, const std::string& criterias) { + ParseType(type); + ParseCritirias(criterias); +} + +void ArchiveConf::ParseCritirias(const std::string& criterias) { + std::stringstream ss(criterias); + std::vector tokens; + + boost::algorithm::split(tokens, criterias, boost::is_any_of(";")); + + if (tokens.size() == 0) { + return; + } + + for (auto& token : tokens) { + std::vector kv; + boost::algorithm::split(kv, token, boost::is_any_of(":")); + if (kv.size() != 2) { + LOG(WARNING) << "Invalid ArchiveConf Criterias: " << token << " Ignore!"; + continue; + } + if (kv[0] != "disk" && kv[0] != "days") { + LOG(WARNING) << "Invalid ArchiveConf Criterias: " << token << " Ignore!"; + continue; + } + auto value = std::stoi(kv[1]); + criterias_[kv[0]] = value; + } +} + +void ArchiveConf::ParseType(const std::string& type) { + if (type != "delete" && type != "swap") { + LOG(ERROR) << "Invalid Archive"; + assert(false); + } + type_ = type; +} + /* DBMetaOptions::DBMetaOptions(const std::string& dbpath, */ /* const std::string& uri) */ /* : path(dbpath), backend_uri(uri) { */ diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 5bbcf6dabe..ee2d62f715 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -7,6 +7,7 @@ #include #include +#include namespace zilliz { namespace vecwise { @@ -14,6 +15,22 @@ namespace engine { class Env; +struct ArchiveConf { + using CriteriaT = std::map; + + ArchiveConf(const std::string& type, const std::string& criterias = "disk:512"); + + const std::string& GetType() const { return type_; } + const CriteriaT GetCriterias() const { return criterias_; } + +private: + void ParseCritirias(const std::string& type); + void ParseType(const std::string& criterias); + + std::string type_; + CriteriaT criterias_; +}; + struct DBMetaOptions { /* DBMetaOptions(const std::string&, const std::string&); */ std::string path; diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index c9bc958b99..8030e574e2 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -12,6 +12,51 @@ using namespace zilliz::vecwise; +TEST_F(DBTest, CONFIG_TEST) { + { + EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); + } + { + engine::ArchiveConf conf("delete"); + ASSERT_EQ(conf.GetType(), "delete"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 512); + } + { + engine::ArchiveConf conf("swap"); + ASSERT_EQ(conf.GetType(), "swap"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 512); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a")); + engine::ArchiveConf conf("swap", "disk:1024"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 1024); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a")); + engine::ArchiveConf conf("swap", "days:100"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["days"] == 100); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a")); + engine::ArchiveConf conf("swap", "days:100;disk:200"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 2); + ASSERT_TRUE(criterias["days"] == 100); + ASSERT_TRUE(criterias["disk"] == 200); + } +} + TEST_F(DBTest, DB_TEST) { static const std::string group_name = "test_group"; From 89cdeb11810a1b938881ffc7f9363ac6434aa740 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 20:22:39 +0800 Subject: [PATCH 08/56] feat(db): add archive files in meta Former-commit-id: 0d284f947dbf65bbc258a037eec1bb5458fa0007 --- cpp/src/db/DBMetaImpl.cpp | 107 ++++++++++++++++++++++++++++++++--- cpp/src/db/DBMetaImpl.h | 3 + cpp/src/db/LocalMetaImpl.cpp | 5 ++ cpp/src/db/LocalMetaImpl.h | 2 + cpp/src/db/Meta.h | 4 ++ cpp/src/db/Options.h | 2 +- 6 files changed, 114 insertions(+), 9 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 3552bf2050..e506f14ea8 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -151,6 +151,7 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { } group_info.files_cnt = 0; group_info.id = -1; + group_info.created_on = GetMicroSecTimeStamp(); { try { @@ -237,7 +238,8 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; group_file.rows = 0; - group_file.updated_time = GetMicroSecTimeStamp(); //ConnectorPtr->select(datetime("now", "localtime +1 hour")).front(); + group_file.created_on = GetMicroSecTimeStamp(); + group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); { @@ -449,17 +451,106 @@ Status DBMetaImpl::get_group_files(const std::string& group_id_, return Status::OK(); } +// PXU TODO: Support Swap +Status DBMetaImpl::archive_files() { + auto& criterias = _options.archive_conf.GetCriterias(); + if (criterias.size() == 0) { + return Status::OK(); + } + + for (auto kv : criterias) { + auto& criteria = kv.first; + auto& limit = kv.second; + if (criteria == "days") { + auto usecs = 3600*24*limit*1000000; + auto now = GetMicroSecTimeStamp(); + try + { + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + c(&GroupFileSchema::created_on) < now - usecs and + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + } + if (criteria == "disk") { + int G = 1024*1024*1024; + long unsigned int sum = 0; + try { + auto sum_c = ConnectorPtr->sum( + &GroupFileSchema::rows, + where( + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + sum = *sum_c; + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + // PXU TODO: refactor rows + auto to_delete = sum - limit*G/sizeof(float); + discard_files_of_size(to_delete); + } + } + + return Status::OK(); +} + +Status DBMetaImpl::discard_files_of_size(long to_discard_size) { + if (to_discard_size <= 0) { + return Status::OK(); + } + try { + auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, + &GroupFileSchema::file_type, + &GroupFileSchema::rows), + where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), + order_by(&GroupFileSchema::id), + limit(10)); + + /* std::map groups; */ + + /* for (auto& file : selected) { */ + /* GroupFileSchema group_file; */ + /* group_file.id = std::get<0>(file); */ + /* group_file.group_id = std::get<1>(file); */ + /* group_file.file_id = std::get<2>(file); */ + /* group_file.file_type = std::get<3>(file); */ + /* group_file.rows = std::get<4>(file); */ + /* group_file.date = std::get<5>(file); */ + /* GetGroupFilePath(group_file); */ + /* auto groupItr = groups.find(group_file.group_id); */ + /* if (groupItr == groups.end()) { */ + /* GroupSchema group_info; */ + /* group_info.group_id = group_file.group_id; */ + /* auto status = get_group_no_lock(group_info); */ + /* if (!status.ok()) { */ + /* return status; */ + /* } */ + /* groups[group_file.group_id] = group_info; */ + /* } */ + /* group_file.dimension = groups[group_file.group_id].dimension; */ + /* files.push_back(group_file); */ + /* } */ + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + + return Status::OK(); + +} + Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { group_file.updated_time = GetMicroSecTimeStamp(); try { ConnectorPtr->update(group_file); - /* auto commited = ConnectorPtr->transaction([&] () mutable { */ - /* ConnectorPtr->update(group_file); */ - /* return true; */ - /* }); */ - /* if (!commited) { */ - /* return Status::DBTransactionError("Update file Error"); */ - /* } */ } catch (std::exception & e) { LOG(DEBUG) << e.what(); LOG(DEBUG) << "id= " << group_file.id << " file_id=" << group_file.file_id; diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index fab2e1560a..e2fd051b42 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -50,6 +50,8 @@ public: virtual Status files_to_index(GroupFilesSchema&) override; + virtual Status archive_files() override; + virtual Status cleanup() override; virtual Status cleanup_ttl_files(uint16_t seconds) override; @@ -62,6 +64,7 @@ public: private: + Status discard_files_of_size(long to_discard_size); long GetMicroSecTimeStamp(); Status get_group_no_lock(GroupSchema& group_info); std::string GetGroupPath(const std::string& group_id); diff --git a/cpp/src/db/LocalMetaImpl.cpp b/cpp/src/db/LocalMetaImpl.cpp index 60c23158be..2ec5c08ce4 100644 --- a/cpp/src/db/LocalMetaImpl.cpp +++ b/cpp/src/db/LocalMetaImpl.cpp @@ -241,6 +241,11 @@ Status LocalMetaImpl::update_files(GroupFilesSchema& files) { return Status::OK(); } +Status LocalMetaImpl::archive_files() { + //PXU TODO + return Status::OK(); +} + Status LocalMetaImpl::cleanup() { //PXU TODO return Status::OK(); diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 116c9e4672..71927e8425 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -47,6 +47,8 @@ public: virtual Status files_to_index(GroupFilesSchema&) override; + virtual Status archive_files() override; + virtual Status cleanup_ttl_files(uint16_t seconds) override; virtual Status count(const std::string& group_id, long& result) override; diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 6d2abb8dee..74e1637f77 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -30,6 +30,7 @@ struct GroupSchema { size_t files_cnt = 0; uint16_t dimension; std::string location = ""; + long created_on; }; // GroupSchema @@ -51,6 +52,7 @@ struct GroupFileSchema { uint16_t dimension; std::string location = ""; long updated_time; + long created_on; }; // GroupFileSchema typedef std::vector GroupFilesSchema; @@ -91,6 +93,8 @@ public: virtual Status files_to_merge(const std::string& group_id, DatePartionedGroupFilesSchema& files) = 0; + virtual Status archive_files() = 0; + virtual Status files_to_index(GroupFilesSchema&) = 0; virtual Status cleanup() = 0; diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index ee2d62f715..26608ab976 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -32,9 +32,9 @@ private: }; struct DBMetaOptions { - /* DBMetaOptions(const std::string&, const std::string&); */ std::string path; std::string backend_uri; + ArchiveConf archive_conf = ArchiveConf("delete"); }; // DBMetaOptions From 5c79aaf51fb6004d3cbb92f745556b5949055df6 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 20:35:59 +0800 Subject: [PATCH 09/56] feat(db): add archive files in meta part 2 Former-commit-id: 0e534409c959a15d8bf746d6fb9fad093d787bda --- cpp/src/db/DBMetaImpl.cpp | 47 ++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index e506f14ea8..8341a7a961 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -508,43 +508,40 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, - &GroupFileSchema::file_type, &GroupFileSchema::rows), where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), order_by(&GroupFileSchema::id), limit(10)); + std::vector ids; - /* std::map groups; */ + for (auto& file : selected) { + if (to_discard_size <= 0) break; + GroupFileSchema group_file; + group_file.id = std::get<0>(file); + group_file.rows = std::get<1>(file); + ids.push_back(group_file.id); + to_discard_size -= group_file.rows; + } + + if (ids.size() == 0) { + return Status::OK(); + } + + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + in(&GroupFileSchema::id, ids) + )); - /* for (auto& file : selected) { */ - /* GroupFileSchema group_file; */ - /* group_file.id = std::get<0>(file); */ - /* group_file.group_id = std::get<1>(file); */ - /* group_file.file_id = std::get<2>(file); */ - /* group_file.file_type = std::get<3>(file); */ - /* group_file.rows = std::get<4>(file); */ - /* group_file.date = std::get<5>(file); */ - /* GetGroupFilePath(group_file); */ - /* auto groupItr = groups.find(group_file.group_id); */ - /* if (groupItr == groups.end()) { */ - /* GroupSchema group_info; */ - /* group_info.group_id = group_file.group_id; */ - /* auto status = get_group_no_lock(group_info); */ - /* if (!status.ok()) { */ - /* return status; */ - /* } */ - /* groups[group_file.group_id] = group_info; */ - /* } */ - /* group_file.dimension = groups[group_file.group_id].dimension; */ - /* files.push_back(group_file); */ - /* } */ } catch (std::exception & e) { LOG(DEBUG) << e.what(); throw e; } - return Status::OK(); + return discard_files_of_size(to_discard_size); } Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { From 722661778259bf575c5ca9a30f801ccb0beb2b61 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 10:55:54 +0800 Subject: [PATCH 10/56] fix(db): change type from into to size_t Former-commit-id: e1e16afbac7ea8d763c328c9903f4cbe2689d755 --- cpp/src/db/DBMetaImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 8341a7a961..2d11379d1a 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -1,4 +1,5 @@ /******************************************************************************* + * long rows = 3*1024*1024*1024; * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. @@ -480,7 +481,7 @@ Status DBMetaImpl::archive_files() { } } if (criteria == "disk") { - int G = 1024*1024*1024; + size_t G = 1024*1024*1024; long unsigned int sum = 0; try { auto sum_c = ConnectorPtr->sum( From 5d0b5e99f4a1473dd8c22f9bf093158f43e4c842 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 10:56:15 +0800 Subject: [PATCH 11/56] test(db): add archive test Former-commit-id: e8c2116cd7bdec959950b2d2735c4f26d0a4a1d8 --- cpp/unittest/db/meta_tests.cpp | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 83177fea19..8489b0cd1b 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -86,6 +86,49 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } +TEST_F(MetaTest, ARCHIVE_TEST) { + DBMetaOptions options; + options.path = "/tmp/vecwise_test"; + options.archive_conf = ArchiveConf("delete", "disk:41"); + + auto impl = meta::DBMetaImpl(options); + auto group_id = "meta_test_group"; + + meta::GroupSchema group; + group.group_id = group_id; + auto status = impl.add_group(group); + + meta::GroupFilesSchema files; + meta::GroupFileSchema group_file; + group_file.group_id = group.group_id; + + auto cnt = 10; + auto each_size = 2UL; + for (auto i=0; i Date: Fri, 24 May 2019 11:48:40 +0800 Subject: [PATCH 12/56] refactor(db): add utils Former-commit-id: d42ae31c70d1e981ae847454b81c24027ad49cb0 --- cpp/src/db/DBMetaImpl.cpp | 22 +++++++--------------- cpp/src/db/DBMetaImpl.h | 1 - cpp/src/db/Utils.cpp | 26 ++++++++++++++++++++++++++ cpp/src/db/Utils.h | 19 +++++++++++++++++++ 4 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 cpp/src/db/Utils.cpp create mode 100644 cpp/src/db/Utils.h diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 2d11379d1a..ef46abbab0 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -1,5 +1,4 @@ /******************************************************************************* - * long rows = 3*1024*1024*1024; * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. @@ -15,6 +14,7 @@ #include #include "DBMetaImpl.h" #include "IDGenerator.h" +#include "Utils.h" namespace zilliz { namespace vecwise { @@ -56,14 +56,6 @@ std::string DBMetaImpl::GetGroupPath(const std::string& group_id) { return _options.path + "/" + group_id; } -long DBMetaImpl::GetMicroSecTimeStamp() { - auto now = std::chrono::system_clock::now(); - auto micros = std::chrono::duration_cast( - now.time_since_epoch()).count(); - - return micros; -} - std::string DBMetaImpl::GetGroupDatePartitionPath(const std::string& group_id, DateT& date) { std::stringstream ss; ss << GetGroupPath(group_id) << "/" << date; @@ -152,7 +144,7 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { } group_info.files_cnt = 0; group_info.id = -1; - group_info.created_on = GetMicroSecTimeStamp(); + group_info.created_on = utils::GetMicroSecTimeStamp(); { try { @@ -239,7 +231,7 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; group_file.rows = 0; - group_file.created_on = GetMicroSecTimeStamp(); + group_file.created_on = utils::GetMicroSecTimeStamp(); group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); @@ -464,7 +456,7 @@ Status DBMetaImpl::archive_files() { auto& limit = kv.second; if (criteria == "days") { auto usecs = 3600*24*limit*1000000; - auto now = GetMicroSecTimeStamp(); + auto now = utils::GetMicroSecTimeStamp(); try { ConnectorPtr->update_all( @@ -546,7 +538,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { - group_file.updated_time = GetMicroSecTimeStamp(); + group_file.updated_time = utils::GetMicroSecTimeStamp(); try { ConnectorPtr->update(group_file); } catch (std::exception & e) { @@ -561,7 +553,7 @@ Status DBMetaImpl::update_files(GroupFilesSchema& files) { try { auto commited = ConnectorPtr->transaction([&] () mutable { for (auto& file : files) { - file.updated_time = GetMicroSecTimeStamp(); + file.updated_time = utils::GetMicroSecTimeStamp(); ConnectorPtr->update(file); } return true; @@ -577,7 +569,7 @@ Status DBMetaImpl::update_files(GroupFilesSchema& files) { } Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { - auto now = GetMicroSecTimeStamp(); + auto now = utils::GetMicroSecTimeStamp(); try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, &GroupFileSchema::group_id, diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index e2fd051b42..6433361d7a 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -65,7 +65,6 @@ public: private: Status discard_files_of_size(long to_discard_size); - long GetMicroSecTimeStamp(); Status get_group_no_lock(GroupSchema& group_info); std::string GetGroupPath(const std::string& group_id); std::string GetGroupDatePartitionPath(const std::string& group_id, DateT& date); diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp new file mode 100644 index 0000000000..e459bab4bb --- /dev/null +++ b/cpp/src/db/Utils.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ + +#include +#include "Utils.h" + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace utils { + +long GetMicroSecTimeStamp() { + auto now = std::chrono::system_clock::now(); + auto micros = std::chrono::duration_cast( + now.time_since_epoch()).count(); + + return micros; +} + +} // namespace utils +} // namespace engine +} // namespace vecwise +} // namespace zilliz diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h new file mode 100644 index 0000000000..cdcd37b832 --- /dev/null +++ b/cpp/src/db/Utils.h @@ -0,0 +1,19 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace utils { + +long GetMicroSecTimeStamp(); + +} // namespace utils +} // namespace engine +} // namespace vecwise +} // namespace zilliz From e46a1ece01874c13479d96054254f4060beffa54 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 14:56:13 +0800 Subject: [PATCH 13/56] fix(db): update schema and unsigned long type handling Former-commit-id: 59c0446b0f1a2e1c3dc4b01ce5cf088f66d23178 --- cpp/src/db/DBMetaImpl.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index ef46abbab0..b913399297 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -29,6 +29,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("id", &GroupSchema::id, primary_key()), make_column("group_id", &GroupSchema::group_id, unique()), make_column("dimension", &GroupSchema::dimension), + make_column("created_on", &GroupSchema::created_on), make_column("files_cnt", &GroupSchema::files_cnt, default_value(0))), make_table("GroupFile", make_column("id", &GroupFileSchema::id, primary_key()), @@ -37,6 +38,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("file_type", &GroupFileSchema::file_type), make_column("rows", &GroupFileSchema::rows, default_value(0)), make_column("updated_time", &GroupFileSchema::updated_time), + make_column("created_on", &GroupFileSchema::created_on), make_column("date", &GroupFileSchema::date)) ); @@ -455,8 +457,9 @@ Status DBMetaImpl::archive_files() { auto& criteria = kv.first; auto& limit = kv.second; if (criteria == "days") { - auto usecs = 3600*24*limit*1000000; - auto now = utils::GetMicroSecTimeStamp(); + long usecs = 3600*24*limit*1000000UL; + long now = utils::GetMicroSecTimeStamp(); + LOG(DEBUG) << "Limit " << limit << " TimeLimit " << now - usecs; try { ConnectorPtr->update_all( @@ -464,7 +467,7 @@ Status DBMetaImpl::archive_files() { c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE ), where( - c(&GroupFileSchema::created_on) < now - usecs and + c(&GroupFileSchema::created_on) < (long)(now - usecs) and c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); } catch (std::exception & e) { @@ -473,7 +476,7 @@ Status DBMetaImpl::archive_files() { } } if (criteria == "disk") { - size_t G = 1024*1024*1024; + size_t G = 1024*1024*1024UL; long unsigned int sum = 0; try { auto sum_c = ConnectorPtr->sum( From 9fcfb1e8e531130e9347042fe0474885315975bf Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 15:31:18 +0800 Subject: [PATCH 14/56] refactor(db): remove dummy print Former-commit-id: d085da4d7b5f81cf3183393751aa80b7f5f966e7 --- cpp/src/db/DBMetaImpl.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index b913399297..3ad8d3fe49 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -459,7 +459,6 @@ Status DBMetaImpl::archive_files() { if (criteria == "days") { long usecs = 3600*24*limit*1000000UL; long now = utils::GetMicroSecTimeStamp(); - LOG(DEBUG) << "Limit " << limit << " TimeLimit " << now - usecs; try { ConnectorPtr->update_all( From 5cb98b83962ecfa65bf8ed1044df2237b5ce69f9 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 15:35:29 +0800 Subject: [PATCH 15/56] test(db): add test for archive days Former-commit-id: 26da52668e347f6157408be234dc643fd3b2621f --- cpp/unittest/db/meta_tests.cpp | 55 +++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 8489b0cd1b..1347fcf755 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -6,10 +6,13 @@ #include #include #include +#include +#include #include "utils.h" #include "db/DBMetaImpl.h" #include "db/Factories.h" +#include "db/Utils.h" using namespace zilliz::vecwise::engine; @@ -86,7 +89,57 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } -TEST_F(MetaTest, ARCHIVE_TEST) { +TEST_F(MetaTest, ARCHIVE_TEST_DAYS) { + srand(time(0)); + DBMetaOptions options; + options.path = "/tmp/vecwise_test"; + int days_num = rand() % 100; + std::stringstream ss; + ss << "days:" << days_num; + options.archive_conf = ArchiveConf("delete", ss.str()); + + auto impl = meta::DBMetaImpl(options); + auto group_id = "meta_test_group"; + + meta::GroupSchema group; + group.group_id = group_id; + auto status = impl.add_group(group); + + meta::GroupFilesSchema files; + meta::GroupFileSchema group_file; + group_file.group_id = group.group_id; + + auto cnt = 100; + long ts = utils::GetMicroSecTimeStamp(); + std::vector days; + for (auto i=0; i Date: Fri, 24 May 2019 16:43:01 +0800 Subject: [PATCH 16/56] refactor(db): add some debug print for newly added archive Former-commit-id: 5741a45032d63e21fbcaa3cc6fdacd7363fb81d7 --- cpp/src/db/DBMetaImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 3ad8d3fe49..172e7f42f3 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -498,6 +498,7 @@ Status DBMetaImpl::archive_files() { } Status DBMetaImpl::discard_files_of_size(long to_discard_size) { + LOG(DEBUG) << "Abort to discard size=" << to_discard_size; if (to_discard_size <= 0) { return Status::OK(); } @@ -515,6 +516,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { group_file.id = std::get<0>(file); group_file.rows = std::get<1>(file); ids.push_back(group_file.id); + LOG(DEBUG) << "Discard group_file.id=" << group_file.id << " group_file.rows=" << group_file.rows; to_discard_size -= group_file.rows; } From 9b967056edf0ddd881b0380a39dae2973b75e713 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 17:30:33 +0800 Subject: [PATCH 17/56] feat(db): add archive post merge and build index Former-commit-id: a54382cec05b4a3e955e126ed3e8b58270b725cf --- cpp/src/db/DBImpl.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 72b053231b..eee492abbd 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -326,6 +326,10 @@ Status DBImpl::background_merge_files(const std::string& group_id) { merge_files(group_id, kv.first, kv.second); } + if (has_merge) { + _pMeta->archive_files(); + } + try_build_index(); _pMeta->cleanup_ttl_files(1); @@ -362,6 +366,7 @@ Status DBImpl::build_index(const meta::GroupFileSchema& file) { << " from file " << to_remove.file_id; index->Cache(); + _pMeta->archive_files(); return Status::OK(); } From c1024aa26cbb1b58a8ec796a63b056b2260205e0 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 17:41:43 +0800 Subject: [PATCH 18/56] feat(db): add size api for meta Former-commit-id: b169c9ec743b2defe4421fbe20c5ef0970aa0ea7 --- cpp/src/db/DBMetaImpl.cpp | 33 ++++++++++++++++++++------------- cpp/src/db/DBMetaImpl.h | 2 ++ cpp/src/db/LocalMetaImpl.cpp | 5 +++++ cpp/src/db/LocalMetaImpl.h | 2 ++ cpp/src/db/Meta.h | 2 ++ 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 172e7f42f3..a4c3f5d77e 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -476,20 +476,11 @@ Status DBMetaImpl::archive_files() { } if (criteria == "disk") { size_t G = 1024*1024*1024UL; - long unsigned int sum = 0; - try { - auto sum_c = ConnectorPtr->sum( - &GroupFileSchema::rows, - where( - c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE - )); - sum = *sum_c; - } catch (std::exception & e) { - LOG(DEBUG) << e.what(); - throw e; - } + long sum = 0; + size(sum); + // PXU TODO: refactor rows - auto to_delete = sum - limit*G/sizeof(float); + auto to_delete = (sum - limit*G)/sizeof(float); discard_files_of_size(to_delete); } } @@ -497,6 +488,22 @@ Status DBMetaImpl::archive_files() { return Status::OK(); } +Status DBMetaImpl::size(long& result) { + try { + auto sum_c = ConnectorPtr->sum( + &GroupFileSchema::rows, + where( + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + result = *sum_c*sizeof(float); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + + return Status::OK(); +} + Status DBMetaImpl::discard_files_of_size(long to_discard_size) { LOG(DEBUG) << "Abort to discard size=" << to_discard_size; if (to_discard_size <= 0) { diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index 6433361d7a..20f91b8482 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -52,6 +52,8 @@ public: virtual Status archive_files() override; + virtual Status size(long& result) override; + virtual Status cleanup() override; virtual Status cleanup_ttl_files(uint16_t seconds) override; diff --git a/cpp/src/db/LocalMetaImpl.cpp b/cpp/src/db/LocalMetaImpl.cpp index 2ec5c08ce4..aa852a3db6 100644 --- a/cpp/src/db/LocalMetaImpl.cpp +++ b/cpp/src/db/LocalMetaImpl.cpp @@ -261,6 +261,11 @@ Status LocalMetaImpl::drop_all() { return Status::OK(); } +Status LocalMetaImpl::size(long& result) { + // PXU TODO + return Status::OK(); +} + Status LocalMetaImpl::count(const std::string& group_id, long& result) { // PXU TODO return Status::OK(); diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 71927e8425..fb989d5f67 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -55,6 +55,8 @@ public: virtual Status drop_all() override; + virtual Status size(long& result) override; + private: Status GetGroupMetaInfoByPath(const std::string& path, GroupSchema& group_info); diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 74e1637f77..e6150cea11 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -93,6 +93,8 @@ public: virtual Status files_to_merge(const std::string& group_id, DatePartionedGroupFilesSchema& files) = 0; + virtual Status size(long& result) = 0; + virtual Status archive_files() = 0; virtual Status files_to_index(GroupFilesSchema&) = 0; From b2af844f2096c6dfc0b955364a8c543507dce00e Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 18:15:50 +0800 Subject: [PATCH 19/56] feat(db): add size api for db Former-commit-id: 9d20366e22996fc7bb3e7e983ebfa9999591ba6b --- cpp/src/db/DB.h | 2 ++ cpp/src/db/DBImpl.cpp | 5 +++++ cpp/src/db/DBImpl.h | 2 ++ 3 files changed, 9 insertions(+) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 6f922475b5..7d976ad824 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -39,6 +39,8 @@ public: virtual Status search(const std::string& group_id, size_t k, size_t nq, const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0; + virtual Status size(long& result) = 0; + virtual Status drop_all() = 0; virtual Status count(const std::string& group_id, long& result) = 0; diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index eee492abbd..971e6daff6 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -427,6 +427,11 @@ Status DBImpl::count(const std::string& group_id, long& result) { return _pMeta->count(group_id, result); } +template +Status DBImpl::size(long& result) { + return _pMeta->size(result); +} + template DBImpl::~DBImpl() { { diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 45f249bd06..54c22eb48b 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -55,6 +55,8 @@ public: virtual Status count(const std::string& group_id, long& result) override; + virtual Status size(long& result) override; + virtual ~DBImpl(); private: From c90aee8972f600098e3748faa6137fc749a236d3 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 18:16:27 +0800 Subject: [PATCH 20/56] test(db): add test for overall archive Former-commit-id: a90507f675e2ac56e87fb3884d9729d48d178317 --- cpp/unittest/db/db_tests.cpp | 46 ++++++++++++++++++++++++++++++++++++ cpp/unittest/db/utils.cpp | 17 ++++++++++--- cpp/unittest/db/utils.h | 6 +++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8030e574e2..5a6ea703aa 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -9,6 +9,7 @@ #include "utils.h" #include "db/DB.h" +#include "db/DBImpl.h" using namespace zilliz::vecwise; @@ -57,6 +58,51 @@ TEST_F(DBTest, CONFIG_TEST) { } } +TEST_F(DBTest2, ARHIVE_DISK_CHECK) { + + static const std::string group_name = "test_group"; + static const int group_dim = 256; + + engine::meta::GroupSchema group_info; + group_info.dimension = group_dim; + group_info.group_id = group_name; + engine::Status stat = db_->add_group(group_info); + + engine::meta::GroupSchema group_info_get; + group_info_get.group_id = group_name; + stat = db_->get_group(group_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(group_info_get.dimension, group_dim); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + int d = 256; + int nb = 30; + float *xb = new float[d * nb]; + for(int i = 0; i < nb; i++) { + for(int j = 0; j < d; j++) xb[d * i + j] = drand48(); + xb[d * i] += i / 2000.; + } + + int loop = 100000; + + for (auto i=0; iadd_vectors(group_name, nb, xb, vector_ids); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + std::this_thread::sleep_for(std::chrono::seconds(1)); + + long size; + db_->size(size); + /* LOG(DEBUG) << "size=" << size; */ + ASSERT_TRUE(size < 2UL*1024*1024*1024); + + delete [] xb; +}; + + TEST_F(DBTest, DB_TEST) { static const std::string group_name = "test_group"; diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 7188e89107..9762668622 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -29,19 +29,30 @@ void DBTest::InitLog() { el::Loggers::reconfigureLogger("default", defaultConf); } -void DBTest::SetUp() { - InitLog(); +engine::Options DBTest::GetOptions() { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/vecwise_test"; + return options; +} + +void DBTest::SetUp() { + InitLog(); + auto options = GetOptions(); db_ = engine::DBFactory::Build(options, "Faiss,IDMap"); } void DBTest::TearDown() { delete db_; - auto options = engine::OptionsFactory::Build(); boost::filesystem::remove_all("/tmp/vecwise_test"); } +engine::Options DBTest2::GetOptions() { + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/vecwise_test"; + options.meta.archive_conf = engine::ArchiveConf("delete", "disk:2"); + return options; +} + void MetaTest::SetUp() { InitLog(); impl_ = engine::DBMetaImplFactory::Build(); diff --git a/cpp/unittest/db/utils.h b/cpp/unittest/db/utils.h index 21823ffad3..456b54ffcc 100644 --- a/cpp/unittest/db/utils.h +++ b/cpp/unittest/db/utils.h @@ -39,6 +39,12 @@ protected: void InitLog(); virtual void SetUp() override; virtual void TearDown() override; + virtual zilliz::vecwise::engine::Options GetOptions(); +}; + +class DBTest2 : public DBTest { +protected: + virtual zilliz::vecwise::engine::Options GetOptions() override; }; From dfb09528808a86e548436082a964492a621bb5cb Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 11:13:34 +0800 Subject: [PATCH 21/56] fix(db): fix size api for db meta Former-commit-id: b58a3f27ee9b574af78894eb6fb5120904430c66 --- cpp/src/db/DBMetaImpl.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index a4c3f5d77e..2b997d1043 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -489,13 +489,19 @@ Status DBMetaImpl::archive_files() { } Status DBMetaImpl::size(long& result) { + result = 0; try { - auto sum_c = ConnectorPtr->sum( - &GroupFileSchema::rows, + auto selected = ConnectorPtr->select(columns(sum(&GroupFileSchema::rows)), where( c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); - result = *sum_c*sizeof(float); + + for (auto& sub_query : selected) { + if(!std::get<0>(sub_query)) { + continue; + } + result += (long)(*std::get<0>(sub_query))*sizeof(float); + } } catch (std::exception & e) { LOG(DEBUG) << e.what(); throw e; From bb75cb93d89628272ebe5b85d9c1a1cf98a94347 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 11:14:17 +0800 Subject: [PATCH 22/56] test(db): check size Former-commit-id: 07d052fdd6713e728595aff1c21a5b6cde62085c --- cpp/unittest/db/db_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 5a6ea703aa..90d35ee8f9 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -62,6 +62,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { static const std::string group_name = "test_group"; static const int group_dim = 256; + long size; engine::meta::GroupSchema group_info; group_info.dimension = group_dim; @@ -77,6 +78,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { engine::IDNumbers vector_ids; engine::IDNumbers target_ids; + db_->size(size); int d = 256; int nb = 30; float *xb = new float[d * nb]; @@ -94,7 +96,6 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { std::this_thread::sleep_for(std::chrono::seconds(1)); - long size; db_->size(size); /* LOG(DEBUG) << "size=" << size; */ ASSERT_TRUE(size < 2UL*1024*1024*1024); From 4eea03af5cf9cb91375e325b707192d23035f9a7 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 19 May 2019 21:50:24 +0800 Subject: [PATCH 23/56] feat(db): add simple delete partitions Former-commit-id: ed39bb26d68f4ea576e824c081e72bfb17d15b8a --- cpp/src/db/DB.h | 2 ++ cpp/src/db/DBImpl.cpp | 6 ++++++ cpp/src/db/DBImpl.h | 1 + cpp/src/db/DBMetaImpl.cpp | 38 ++++++++++++++++++++++++++++++++++ cpp/src/db/DBMetaImpl.h | 2 ++ cpp/src/db/LocalMetaImpl.h | 4 +++- cpp/src/db/Meta.cpp | 21 ++++++++++++++++--- cpp/src/db/Meta.h | 6 ++++-- cpp/unittest/db/meta_tests.cpp | 5 +++++ 9 files changed, 79 insertions(+), 6 deletions(-) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 450a980c4a..6f922475b5 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -23,6 +23,8 @@ public: virtual Status add_group(meta::GroupSchema& group_info_) = 0; virtual Status get_group(meta::GroupSchema& group_info_) = 0; + virtual Status delete_vectors(const std::string& group_id, + const meta::DatesT& dates) = 0; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) = 0; virtual Status get_group_files(const std::string& group_id_, const int date_delta_, diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 216a9b352d..72b053231b 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -44,6 +44,12 @@ Status DBImpl::get_group(meta::GroupSchema& group_info) { return _pMeta->get_group(group_info); } +template +Status DBImpl::delete_vectors(const std::string& group_id, + const meta::DatesT& dates) { + return _pMeta->delete_group_partitions(group_id, dates); +} + template Status DBImpl::has_group(const std::string& group_id_, bool& has_or_not_) { return _pMeta->has_group(group_id_, has_or_not_); diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index d2aed0af1d..45f249bd06 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -35,6 +35,7 @@ public: virtual Status add_group(meta::GroupSchema& group_info) override; virtual Status get_group(meta::GroupSchema& group_info) override; + virtual Status delete_vectors(const std::string& group_id, const meta::DatesT& dates) override; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; virtual Status get_group_files(const std::string& group_id_, diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index aaaaf21ce4..b15180817d 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -104,6 +104,44 @@ Status DBMetaImpl::initialize() { return Status::OK(); } +// PXU TODO: Temp solution. Will fix later +Status DBMetaImpl::delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) { + if (dates.size() == 0) { + return Status::OK(); + } + + GroupSchema group_info; + group_info.group_id = group_id; + auto status = get_group(group_info); + if (!status.ok()) { + return status; + } + + auto yesterday = GetDate(-2); + + for (auto& date : dates) { + if (date >= yesterday) { + return Status::Error("Could not delete partitions with 2 days"); + } + } + + try { + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + c(&GroupFileSchema::group_id) == group_id and + in(&GroupFileSchema::date, dates) + )); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + return Status::OK(); +} + Status DBMetaImpl::add_group(GroupSchema& group_info) { if (group_info.group_id == "") { std::stringstream ss; diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index aca0ec3141..fab2e1560a 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -24,6 +24,8 @@ public: virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; virtual Status add_group_file(GroupFileSchema& group_file_info) override; + virtual Status delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) override; virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 4c324c5796..116c9e4672 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -22,7 +22,9 @@ public: virtual Status get_group(GroupSchema& group_info_) override; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; - virtual Status add_group_file(GroupFileSchema& group_file_info) = 0; + virtual Status add_group_file(GroupFileSchema& group_file_info) override; + /* virtual Status delete_group_partitions(const std::string& group_id, */ + /* const meta::DatesT& dates) override; */ virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, diff --git a/cpp/src/db/Meta.cpp b/cpp/src/db/Meta.cpp index 1b97c06c79..1bd3a20622 100644 --- a/cpp/src/db/Meta.cpp +++ b/cpp/src/db/Meta.cpp @@ -11,13 +11,28 @@ namespace vecwise { namespace engine { namespace meta { -DateT Meta::GetDate(const std::time_t& t) { +DateT Meta::GetDate(const std::time_t& t, int day_delta) { tm *ltm = std::localtime(&t); + if (day_delta > 0) { + do { + ++ltm->tm_mday; + --day_delta; + } while(day_delta > 0); + mktime(ltm); + } else if (day_delta < 0) { + do { + --ltm->tm_mday; + ++day_delta; + } while(day_delta < 0); + mktime(ltm); + } else { + ltm->tm_mday; + } return ltm->tm_year*10000 + ltm->tm_mon*100 + ltm->tm_mday; } -DateT Meta::GetDate() { - return GetDate(std::time(nullptr)); +DateT Meta::GetDate(int day_delta) { + return GetDate(std::time(nullptr), day_delta); } } // namespace meta diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index e0c1a84c76..ffc35b0606 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -67,6 +67,8 @@ public: virtual Status has_group(const std::string& group_id_, bool& has_or_not_) = 0; virtual Status add_group_file(GroupFileSchema& group_file_info) = 0; + virtual Status delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) = 0; virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, @@ -98,8 +100,8 @@ public: virtual Status count(const std::string& group_id, long& result) = 0; - static DateT GetDate(const std::time_t& t); - static DateT GetDate(); + static DateT GetDate(const std::time_t& t, int day_delta); + static DateT GetDate(int day_delta = 0); }; // MetaData diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 2ede539803..dc11feafad 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -59,6 +59,11 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(status.ok()); ASSERT_EQ(group_file.file_type, new_file_type); + meta::DatesT dates; + dates.push_back(meta::Meta::GetDate()); + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_FALSE(status.ok()); + /* group_file.file_type = meta::GroupFileSchema::NEW; */ /* status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); */ /* ASSERT_TRUE(status.ok()); */ From 315e53279249d361de77a0fe89d6c816eca0eef2 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 10:47:59 +0800 Subject: [PATCH 24/56] fix(db): get group bug fix Former-commit-id: 0a7cc7e3791bf02ea899a3cafdc5698e52334027 --- cpp/src/db/DBMetaImpl.cpp | 2 +- cpp/src/db/Meta.cpp | 6 +++++- cpp/src/db/Meta.h | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index b15180817d..c48ed3c421 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -118,7 +118,7 @@ Status DBMetaImpl::delete_group_partitions(const std::string& group_id, return status; } - auto yesterday = GetDate(-2); + auto yesterday = GetDateWithDelta(-2); for (auto& date : dates) { if (date >= yesterday) { diff --git a/cpp/src/db/Meta.cpp b/cpp/src/db/Meta.cpp index 1bd3a20622..31fdee74a4 100644 --- a/cpp/src/db/Meta.cpp +++ b/cpp/src/db/Meta.cpp @@ -31,10 +31,14 @@ DateT Meta::GetDate(const std::time_t& t, int day_delta) { return ltm->tm_year*10000 + ltm->tm_mon*100 + ltm->tm_mday; } -DateT Meta::GetDate(int day_delta) { +DateT Meta::GetDateWithDelta(int day_delta) { return GetDate(std::time(nullptr), day_delta); } +DateT Meta::GetDate() { + return GetDate(std::time(nullptr), 0); +} + } // namespace meta } // namespace engine } // namespace vecwise diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index ffc35b0606..6d2abb8dee 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -100,8 +100,9 @@ public: virtual Status count(const std::string& group_id, long& result) = 0; - static DateT GetDate(const std::time_t& t, int day_delta); - static DateT GetDate(int day_delta = 0); + static DateT GetDate(const std::time_t& t, int day_delta = 0); + static DateT GetDate(); + static DateT GetDateWithDelta(int day_delta); }; // MetaData From 4e1736f3abf7105e138fd7cd3f8bbc57dd65cda2 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 11:33:10 +0800 Subject: [PATCH 25/56] feat(db): add get group file api Former-commit-id: 1c8ed181f4a38203300d48bbca0ed11b7a0185be --- cpp/src/db/DBMetaImpl.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index c48ed3c421..3552bf2050 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -118,7 +118,7 @@ Status DBMetaImpl::delete_group_partitions(const std::string& group_id, return status; } - auto yesterday = GetDateWithDelta(-2); + auto yesterday = GetDateWithDelta(-1); for (auto& date : dates) { if (date >= yesterday) { @@ -413,7 +413,32 @@ Status DBMetaImpl::has_group_file(const std::string& group_id_, Status DBMetaImpl::get_group_file(const std::string& group_id_, const std::string& file_id_, GroupFileSchema& group_file_info_) { - //PXU TODO + try { + auto files = ConnectorPtr->select(columns(&GroupFileSchema::id, + &GroupFileSchema::group_id, + &GroupFileSchema::file_id, + &GroupFileSchema::file_type, + &GroupFileSchema::rows, + &GroupFileSchema::date), + where(c(&GroupFileSchema::file_id) == file_id_ and + c(&GroupFileSchema::group_id) == group_id_ + )); + assert(files.size() <= 1); + if (files.size() == 1) { + group_file_info_.id = std::get<0>(files[0]); + group_file_info_.group_id = std::get<1>(files[0]); + group_file_info_.file_id = std::get<2>(files[0]); + group_file_info_.file_type = std::get<3>(files[0]); + group_file_info_.rows = std::get<4>(files[0]); + group_file_info_.date = std::get<5>(files[0]); + } else { + return Status::NotFound("GroupFile " + file_id_ + " not found"); + } + } catch (std::exception &e) { + LOG(DEBUG) << e.what(); + throw e; + } + return Status::OK(); } From a7c22f8e53dba5c611c14c24615c6aa43357b693 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 11:33:26 +0800 Subject: [PATCH 26/56] test(db): test delete group partitions Former-commit-id: af392c6bf9440a5d29ee152e4d4129153b499f94 --- cpp/unittest/db/meta_tests.cpp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index dc11feafad..83177fea19 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -64,10 +64,26 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { status = impl_->delete_group_partitions(group_file.group_id, dates); ASSERT_FALSE(status.ok()); - /* group_file.file_type = meta::GroupFileSchema::NEW; */ - /* status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); */ - /* ASSERT_TRUE(status.ok()); */ - /* ASSERT_EQ(group_file.file_type, new_file_type); */ + dates.clear(); + for (auto i=2; i < 10; ++i) { + dates.push_back(meta::Meta::GetDateWithDelta(-1*i)); + } + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_TRUE(status.ok()); + + group_file.date = meta::Meta::GetDateWithDelta(-2); + status = impl_->update_group_file(group_file); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(group_file.date, meta::Meta::GetDateWithDelta(-2)); + ASSERT_FALSE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); + + dates.clear(); + dates.push_back(group_file.date); + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_TRUE(status.ok()); + status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); + ASSERT_TRUE(status.ok()); + ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } TEST_F(MetaTest, GROUP_FILES_TEST) { From 73fb498ff7b36041de8a68baf59861026643512c Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 18:58:55 +0800 Subject: [PATCH 27/56] feat(db): add Archive conf in options Former-commit-id: caa950e7119264fd911f21cd10b4fbb2db896b7d --- cpp/src/db/Options.cpp | 44 +++++++++++++++++++++++++++++++++++ cpp/src/db/Options.h | 17 ++++++++++++++ cpp/unittest/db/db_tests.cpp | 45 ++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/cpp/src/db/Options.cpp b/cpp/src/db/Options.cpp index 2a0c01af8b..2a15f0722d 100644 --- a/cpp/src/db/Options.cpp +++ b/cpp/src/db/Options.cpp @@ -3,6 +3,11 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ +#include +#include +#include +#include + #include "Options.h" #include "Env.h" #include "DBMetaImpl.h" @@ -15,6 +20,45 @@ Options::Options() : env(Env::Default()) { } +ArchiveConf::ArchiveConf(const std::string& type, const std::string& criterias) { + ParseType(type); + ParseCritirias(criterias); +} + +void ArchiveConf::ParseCritirias(const std::string& criterias) { + std::stringstream ss(criterias); + std::vector tokens; + + boost::algorithm::split(tokens, criterias, boost::is_any_of(";")); + + if (tokens.size() == 0) { + return; + } + + for (auto& token : tokens) { + std::vector kv; + boost::algorithm::split(kv, token, boost::is_any_of(":")); + if (kv.size() != 2) { + LOG(WARNING) << "Invalid ArchiveConf Criterias: " << token << " Ignore!"; + continue; + } + if (kv[0] != "disk" && kv[0] != "days") { + LOG(WARNING) << "Invalid ArchiveConf Criterias: " << token << " Ignore!"; + continue; + } + auto value = std::stoi(kv[1]); + criterias_[kv[0]] = value; + } +} + +void ArchiveConf::ParseType(const std::string& type) { + if (type != "delete" && type != "swap") { + LOG(ERROR) << "Invalid Archive"; + assert(false); + } + type_ = type; +} + /* DBMetaOptions::DBMetaOptions(const std::string& dbpath, */ /* const std::string& uri) */ /* : path(dbpath), backend_uri(uri) { */ diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 5bbcf6dabe..ee2d62f715 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -7,6 +7,7 @@ #include #include +#include namespace zilliz { namespace vecwise { @@ -14,6 +15,22 @@ namespace engine { class Env; +struct ArchiveConf { + using CriteriaT = std::map; + + ArchiveConf(const std::string& type, const std::string& criterias = "disk:512"); + + const std::string& GetType() const { return type_; } + const CriteriaT GetCriterias() const { return criterias_; } + +private: + void ParseCritirias(const std::string& type); + void ParseType(const std::string& criterias); + + std::string type_; + CriteriaT criterias_; +}; + struct DBMetaOptions { /* DBMetaOptions(const std::string&, const std::string&); */ std::string path; diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index c9bc958b99..8030e574e2 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -12,6 +12,51 @@ using namespace zilliz::vecwise; +TEST_F(DBTest, CONFIG_TEST) { + { + EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); + } + { + engine::ArchiveConf conf("delete"); + ASSERT_EQ(conf.GetType(), "delete"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 512); + } + { + engine::ArchiveConf conf("swap"); + ASSERT_EQ(conf.GetType(), "swap"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 512); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a")); + engine::ArchiveConf conf("swap", "disk:1024"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 1024); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a")); + engine::ArchiveConf conf("swap", "days:100"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["days"] == 100); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a")); + engine::ArchiveConf conf("swap", "days:100;disk:200"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 2); + ASSERT_TRUE(criterias["days"] == 100); + ASSERT_TRUE(criterias["disk"] == 200); + } +} + TEST_F(DBTest, DB_TEST) { static const std::string group_name = "test_group"; From 73d91135d1dbfe1c4518bb0024b310fb4bdcbadb Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 20:22:39 +0800 Subject: [PATCH 28/56] feat(db): add archive files in meta Former-commit-id: 64a1e267815cc9fb9ccd1b41b6ef8a1f082e3b20 --- cpp/src/db/DBMetaImpl.cpp | 107 ++++++++++++++++++++++++++++++++--- cpp/src/db/DBMetaImpl.h | 3 + cpp/src/db/LocalMetaImpl.cpp | 5 ++ cpp/src/db/LocalMetaImpl.h | 2 + cpp/src/db/Meta.h | 4 ++ cpp/src/db/Options.h | 2 +- 6 files changed, 114 insertions(+), 9 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 3552bf2050..e506f14ea8 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -151,6 +151,7 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { } group_info.files_cnt = 0; group_info.id = -1; + group_info.created_on = GetMicroSecTimeStamp(); { try { @@ -237,7 +238,8 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; group_file.rows = 0; - group_file.updated_time = GetMicroSecTimeStamp(); //ConnectorPtr->select(datetime("now", "localtime +1 hour")).front(); + group_file.created_on = GetMicroSecTimeStamp(); + group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); { @@ -449,17 +451,106 @@ Status DBMetaImpl::get_group_files(const std::string& group_id_, return Status::OK(); } +// PXU TODO: Support Swap +Status DBMetaImpl::archive_files() { + auto& criterias = _options.archive_conf.GetCriterias(); + if (criterias.size() == 0) { + return Status::OK(); + } + + for (auto kv : criterias) { + auto& criteria = kv.first; + auto& limit = kv.second; + if (criteria == "days") { + auto usecs = 3600*24*limit*1000000; + auto now = GetMicroSecTimeStamp(); + try + { + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + c(&GroupFileSchema::created_on) < now - usecs and + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + } + if (criteria == "disk") { + int G = 1024*1024*1024; + long unsigned int sum = 0; + try { + auto sum_c = ConnectorPtr->sum( + &GroupFileSchema::rows, + where( + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + sum = *sum_c; + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + // PXU TODO: refactor rows + auto to_delete = sum - limit*G/sizeof(float); + discard_files_of_size(to_delete); + } + } + + return Status::OK(); +} + +Status DBMetaImpl::discard_files_of_size(long to_discard_size) { + if (to_discard_size <= 0) { + return Status::OK(); + } + try { + auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, + &GroupFileSchema::file_type, + &GroupFileSchema::rows), + where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), + order_by(&GroupFileSchema::id), + limit(10)); + + /* std::map groups; */ + + /* for (auto& file : selected) { */ + /* GroupFileSchema group_file; */ + /* group_file.id = std::get<0>(file); */ + /* group_file.group_id = std::get<1>(file); */ + /* group_file.file_id = std::get<2>(file); */ + /* group_file.file_type = std::get<3>(file); */ + /* group_file.rows = std::get<4>(file); */ + /* group_file.date = std::get<5>(file); */ + /* GetGroupFilePath(group_file); */ + /* auto groupItr = groups.find(group_file.group_id); */ + /* if (groupItr == groups.end()) { */ + /* GroupSchema group_info; */ + /* group_info.group_id = group_file.group_id; */ + /* auto status = get_group_no_lock(group_info); */ + /* if (!status.ok()) { */ + /* return status; */ + /* } */ + /* groups[group_file.group_id] = group_info; */ + /* } */ + /* group_file.dimension = groups[group_file.group_id].dimension; */ + /* files.push_back(group_file); */ + /* } */ + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + + return Status::OK(); + +} + Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { group_file.updated_time = GetMicroSecTimeStamp(); try { ConnectorPtr->update(group_file); - /* auto commited = ConnectorPtr->transaction([&] () mutable { */ - /* ConnectorPtr->update(group_file); */ - /* return true; */ - /* }); */ - /* if (!commited) { */ - /* return Status::DBTransactionError("Update file Error"); */ - /* } */ } catch (std::exception & e) { LOG(DEBUG) << e.what(); LOG(DEBUG) << "id= " << group_file.id << " file_id=" << group_file.file_id; diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index fab2e1560a..e2fd051b42 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -50,6 +50,8 @@ public: virtual Status files_to_index(GroupFilesSchema&) override; + virtual Status archive_files() override; + virtual Status cleanup() override; virtual Status cleanup_ttl_files(uint16_t seconds) override; @@ -62,6 +64,7 @@ public: private: + Status discard_files_of_size(long to_discard_size); long GetMicroSecTimeStamp(); Status get_group_no_lock(GroupSchema& group_info); std::string GetGroupPath(const std::string& group_id); diff --git a/cpp/src/db/LocalMetaImpl.cpp b/cpp/src/db/LocalMetaImpl.cpp index 60c23158be..2ec5c08ce4 100644 --- a/cpp/src/db/LocalMetaImpl.cpp +++ b/cpp/src/db/LocalMetaImpl.cpp @@ -241,6 +241,11 @@ Status LocalMetaImpl::update_files(GroupFilesSchema& files) { return Status::OK(); } +Status LocalMetaImpl::archive_files() { + //PXU TODO + return Status::OK(); +} + Status LocalMetaImpl::cleanup() { //PXU TODO return Status::OK(); diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 116c9e4672..71927e8425 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -47,6 +47,8 @@ public: virtual Status files_to_index(GroupFilesSchema&) override; + virtual Status archive_files() override; + virtual Status cleanup_ttl_files(uint16_t seconds) override; virtual Status count(const std::string& group_id, long& result) override; diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 6d2abb8dee..74e1637f77 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -30,6 +30,7 @@ struct GroupSchema { size_t files_cnt = 0; uint16_t dimension; std::string location = ""; + long created_on; }; // GroupSchema @@ -51,6 +52,7 @@ struct GroupFileSchema { uint16_t dimension; std::string location = ""; long updated_time; + long created_on; }; // GroupFileSchema typedef std::vector GroupFilesSchema; @@ -91,6 +93,8 @@ public: virtual Status files_to_merge(const std::string& group_id, DatePartionedGroupFilesSchema& files) = 0; + virtual Status archive_files() = 0; + virtual Status files_to_index(GroupFilesSchema&) = 0; virtual Status cleanup() = 0; diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index ee2d62f715..26608ab976 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -32,9 +32,9 @@ private: }; struct DBMetaOptions { - /* DBMetaOptions(const std::string&, const std::string&); */ std::string path; std::string backend_uri; + ArchiveConf archive_conf = ArchiveConf("delete"); }; // DBMetaOptions From a073cf35d6c511c51bdefbf87f16cb1e72a058db Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 20:35:59 +0800 Subject: [PATCH 29/56] feat(db): add archive files in meta part 2 Former-commit-id: 76ce47cc2f152d569251b084030c3b747a8f50ff --- cpp/src/db/DBMetaImpl.cpp | 47 ++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index e506f14ea8..8341a7a961 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -508,43 +508,40 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, - &GroupFileSchema::file_type, &GroupFileSchema::rows), where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), order_by(&GroupFileSchema::id), limit(10)); + std::vector ids; - /* std::map groups; */ + for (auto& file : selected) { + if (to_discard_size <= 0) break; + GroupFileSchema group_file; + group_file.id = std::get<0>(file); + group_file.rows = std::get<1>(file); + ids.push_back(group_file.id); + to_discard_size -= group_file.rows; + } + + if (ids.size() == 0) { + return Status::OK(); + } + + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + in(&GroupFileSchema::id, ids) + )); - /* for (auto& file : selected) { */ - /* GroupFileSchema group_file; */ - /* group_file.id = std::get<0>(file); */ - /* group_file.group_id = std::get<1>(file); */ - /* group_file.file_id = std::get<2>(file); */ - /* group_file.file_type = std::get<3>(file); */ - /* group_file.rows = std::get<4>(file); */ - /* group_file.date = std::get<5>(file); */ - /* GetGroupFilePath(group_file); */ - /* auto groupItr = groups.find(group_file.group_id); */ - /* if (groupItr == groups.end()) { */ - /* GroupSchema group_info; */ - /* group_info.group_id = group_file.group_id; */ - /* auto status = get_group_no_lock(group_info); */ - /* if (!status.ok()) { */ - /* return status; */ - /* } */ - /* groups[group_file.group_id] = group_info; */ - /* } */ - /* group_file.dimension = groups[group_file.group_id].dimension; */ - /* files.push_back(group_file); */ - /* } */ } catch (std::exception & e) { LOG(DEBUG) << e.what(); throw e; } - return Status::OK(); + return discard_files_of_size(to_discard_size); } Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { From c757be1cb638aca3a405bdf8ddfcfa9510117b80 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 10:55:54 +0800 Subject: [PATCH 30/56] fix(db): change type from into to size_t Former-commit-id: 97f571c02189eb7cf3ade68f1d26aea2e2ce5f1c --- cpp/src/db/DBMetaImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 8341a7a961..2d11379d1a 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -1,4 +1,5 @@ /******************************************************************************* + * long rows = 3*1024*1024*1024; * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. @@ -480,7 +481,7 @@ Status DBMetaImpl::archive_files() { } } if (criteria == "disk") { - int G = 1024*1024*1024; + size_t G = 1024*1024*1024; long unsigned int sum = 0; try { auto sum_c = ConnectorPtr->sum( From 4c44b02b5ebde4e9b6637e50c4b654fcfd834b24 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 10:56:15 +0800 Subject: [PATCH 31/56] test(db): add archive test Former-commit-id: 7fc4969944759203524c0dbac8fc4421fd528f28 --- cpp/unittest/db/meta_tests.cpp | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 83177fea19..8489b0cd1b 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -86,6 +86,49 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } +TEST_F(MetaTest, ARCHIVE_TEST) { + DBMetaOptions options; + options.path = "/tmp/vecwise_test"; + options.archive_conf = ArchiveConf("delete", "disk:41"); + + auto impl = meta::DBMetaImpl(options); + auto group_id = "meta_test_group"; + + meta::GroupSchema group; + group.group_id = group_id; + auto status = impl.add_group(group); + + meta::GroupFilesSchema files; + meta::GroupFileSchema group_file; + group_file.group_id = group.group_id; + + auto cnt = 10; + auto each_size = 2UL; + for (auto i=0; i Date: Fri, 24 May 2019 11:48:40 +0800 Subject: [PATCH 32/56] refactor(db): add utils Former-commit-id: 48c9f7818c0260e3191a381879c3e83e0ad7ec9a --- cpp/src/db/DBMetaImpl.cpp | 22 +++++++--------------- cpp/src/db/DBMetaImpl.h | 1 - cpp/src/db/Utils.cpp | 26 ++++++++++++++++++++++++++ cpp/src/db/Utils.h | 19 +++++++++++++++++++ 4 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 cpp/src/db/Utils.cpp create mode 100644 cpp/src/db/Utils.h diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 2d11379d1a..ef46abbab0 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -1,5 +1,4 @@ /******************************************************************************* - * long rows = 3*1024*1024*1024; * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. @@ -15,6 +14,7 @@ #include #include "DBMetaImpl.h" #include "IDGenerator.h" +#include "Utils.h" namespace zilliz { namespace vecwise { @@ -56,14 +56,6 @@ std::string DBMetaImpl::GetGroupPath(const std::string& group_id) { return _options.path + "/" + group_id; } -long DBMetaImpl::GetMicroSecTimeStamp() { - auto now = std::chrono::system_clock::now(); - auto micros = std::chrono::duration_cast( - now.time_since_epoch()).count(); - - return micros; -} - std::string DBMetaImpl::GetGroupDatePartitionPath(const std::string& group_id, DateT& date) { std::stringstream ss; ss << GetGroupPath(group_id) << "/" << date; @@ -152,7 +144,7 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { } group_info.files_cnt = 0; group_info.id = -1; - group_info.created_on = GetMicroSecTimeStamp(); + group_info.created_on = utils::GetMicroSecTimeStamp(); { try { @@ -239,7 +231,7 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; group_file.rows = 0; - group_file.created_on = GetMicroSecTimeStamp(); + group_file.created_on = utils::GetMicroSecTimeStamp(); group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); @@ -464,7 +456,7 @@ Status DBMetaImpl::archive_files() { auto& limit = kv.second; if (criteria == "days") { auto usecs = 3600*24*limit*1000000; - auto now = GetMicroSecTimeStamp(); + auto now = utils::GetMicroSecTimeStamp(); try { ConnectorPtr->update_all( @@ -546,7 +538,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { - group_file.updated_time = GetMicroSecTimeStamp(); + group_file.updated_time = utils::GetMicroSecTimeStamp(); try { ConnectorPtr->update(group_file); } catch (std::exception & e) { @@ -561,7 +553,7 @@ Status DBMetaImpl::update_files(GroupFilesSchema& files) { try { auto commited = ConnectorPtr->transaction([&] () mutable { for (auto& file : files) { - file.updated_time = GetMicroSecTimeStamp(); + file.updated_time = utils::GetMicroSecTimeStamp(); ConnectorPtr->update(file); } return true; @@ -577,7 +569,7 @@ Status DBMetaImpl::update_files(GroupFilesSchema& files) { } Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { - auto now = GetMicroSecTimeStamp(); + auto now = utils::GetMicroSecTimeStamp(); try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, &GroupFileSchema::group_id, diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index e2fd051b42..6433361d7a 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -65,7 +65,6 @@ public: private: Status discard_files_of_size(long to_discard_size); - long GetMicroSecTimeStamp(); Status get_group_no_lock(GroupSchema& group_info); std::string GetGroupPath(const std::string& group_id); std::string GetGroupDatePartitionPath(const std::string& group_id, DateT& date); diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp new file mode 100644 index 0000000000..e459bab4bb --- /dev/null +++ b/cpp/src/db/Utils.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ + +#include +#include "Utils.h" + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace utils { + +long GetMicroSecTimeStamp() { + auto now = std::chrono::system_clock::now(); + auto micros = std::chrono::duration_cast( + now.time_since_epoch()).count(); + + return micros; +} + +} // namespace utils +} // namespace engine +} // namespace vecwise +} // namespace zilliz diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h new file mode 100644 index 0000000000..cdcd37b832 --- /dev/null +++ b/cpp/src/db/Utils.h @@ -0,0 +1,19 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace utils { + +long GetMicroSecTimeStamp(); + +} // namespace utils +} // namespace engine +} // namespace vecwise +} // namespace zilliz From 7565a53290c25323d3284ff9b1e5b1194b0d45df Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 14:56:13 +0800 Subject: [PATCH 33/56] fix(db): update schema and unsigned long type handling Former-commit-id: 03f17b0f071817350b4137ed31abfb1d5b7066be --- cpp/src/db/DBMetaImpl.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index ef46abbab0..b913399297 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -29,6 +29,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("id", &GroupSchema::id, primary_key()), make_column("group_id", &GroupSchema::group_id, unique()), make_column("dimension", &GroupSchema::dimension), + make_column("created_on", &GroupSchema::created_on), make_column("files_cnt", &GroupSchema::files_cnt, default_value(0))), make_table("GroupFile", make_column("id", &GroupFileSchema::id, primary_key()), @@ -37,6 +38,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("file_type", &GroupFileSchema::file_type), make_column("rows", &GroupFileSchema::rows, default_value(0)), make_column("updated_time", &GroupFileSchema::updated_time), + make_column("created_on", &GroupFileSchema::created_on), make_column("date", &GroupFileSchema::date)) ); @@ -455,8 +457,9 @@ Status DBMetaImpl::archive_files() { auto& criteria = kv.first; auto& limit = kv.second; if (criteria == "days") { - auto usecs = 3600*24*limit*1000000; - auto now = utils::GetMicroSecTimeStamp(); + long usecs = 3600*24*limit*1000000UL; + long now = utils::GetMicroSecTimeStamp(); + LOG(DEBUG) << "Limit " << limit << " TimeLimit " << now - usecs; try { ConnectorPtr->update_all( @@ -464,7 +467,7 @@ Status DBMetaImpl::archive_files() { c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE ), where( - c(&GroupFileSchema::created_on) < now - usecs and + c(&GroupFileSchema::created_on) < (long)(now - usecs) and c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); } catch (std::exception & e) { @@ -473,7 +476,7 @@ Status DBMetaImpl::archive_files() { } } if (criteria == "disk") { - size_t G = 1024*1024*1024; + size_t G = 1024*1024*1024UL; long unsigned int sum = 0; try { auto sum_c = ConnectorPtr->sum( From cd2afcb2a425e0a12bcc34f34a84d78c1eb7a9f1 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 15:31:18 +0800 Subject: [PATCH 34/56] refactor(db): remove dummy print Former-commit-id: ffc431eeb98d3a4a52708954e2a1e240ad713ebc --- cpp/src/db/DBMetaImpl.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index b913399297..3ad8d3fe49 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -459,7 +459,6 @@ Status DBMetaImpl::archive_files() { if (criteria == "days") { long usecs = 3600*24*limit*1000000UL; long now = utils::GetMicroSecTimeStamp(); - LOG(DEBUG) << "Limit " << limit << " TimeLimit " << now - usecs; try { ConnectorPtr->update_all( From 80860535a7f1faf8438307a41116d5c3f0ff1d96 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 15:35:29 +0800 Subject: [PATCH 35/56] test(db): add test for archive days Former-commit-id: 10dcbdf0d3f32fd90904150c294cbe87a0ebfa7b --- cpp/unittest/db/meta_tests.cpp | 55 +++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 8489b0cd1b..1347fcf755 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -6,10 +6,13 @@ #include #include #include +#include +#include #include "utils.h" #include "db/DBMetaImpl.h" #include "db/Factories.h" +#include "db/Utils.h" using namespace zilliz::vecwise::engine; @@ -86,7 +89,57 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } -TEST_F(MetaTest, ARCHIVE_TEST) { +TEST_F(MetaTest, ARCHIVE_TEST_DAYS) { + srand(time(0)); + DBMetaOptions options; + options.path = "/tmp/vecwise_test"; + int days_num = rand() % 100; + std::stringstream ss; + ss << "days:" << days_num; + options.archive_conf = ArchiveConf("delete", ss.str()); + + auto impl = meta::DBMetaImpl(options); + auto group_id = "meta_test_group"; + + meta::GroupSchema group; + group.group_id = group_id; + auto status = impl.add_group(group); + + meta::GroupFilesSchema files; + meta::GroupFileSchema group_file; + group_file.group_id = group.group_id; + + auto cnt = 100; + long ts = utils::GetMicroSecTimeStamp(); + std::vector days; + for (auto i=0; i Date: Fri, 24 May 2019 16:43:01 +0800 Subject: [PATCH 36/56] refactor(db): add some debug print for newly added archive Former-commit-id: 92b1220200bcbe82cfbf427f5a14d9fe800ec8b5 --- cpp/src/db/DBMetaImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 3ad8d3fe49..172e7f42f3 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -498,6 +498,7 @@ Status DBMetaImpl::archive_files() { } Status DBMetaImpl::discard_files_of_size(long to_discard_size) { + LOG(DEBUG) << "Abort to discard size=" << to_discard_size; if (to_discard_size <= 0) { return Status::OK(); } @@ -515,6 +516,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { group_file.id = std::get<0>(file); group_file.rows = std::get<1>(file); ids.push_back(group_file.id); + LOG(DEBUG) << "Discard group_file.id=" << group_file.id << " group_file.rows=" << group_file.rows; to_discard_size -= group_file.rows; } From 0a7f334697e0970c631c565b249c1062203766c8 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 17:30:33 +0800 Subject: [PATCH 37/56] feat(db): add archive post merge and build index Former-commit-id: 32678939225459bbfb82d491c53b912633fdb395 --- cpp/src/db/DBImpl.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 72b053231b..eee492abbd 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -326,6 +326,10 @@ Status DBImpl::background_merge_files(const std::string& group_id) { merge_files(group_id, kv.first, kv.second); } + if (has_merge) { + _pMeta->archive_files(); + } + try_build_index(); _pMeta->cleanup_ttl_files(1); @@ -362,6 +366,7 @@ Status DBImpl::build_index(const meta::GroupFileSchema& file) { << " from file " << to_remove.file_id; index->Cache(); + _pMeta->archive_files(); return Status::OK(); } From e15c445fb5927ede8022e44390936ace25ed95ca Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 17:41:43 +0800 Subject: [PATCH 38/56] feat(db): add size api for meta Former-commit-id: 0b240dbf664348f65a70e57a5d545857469ad37b --- cpp/src/db/DBMetaImpl.cpp | 33 ++++++++++++++++++++------------- cpp/src/db/DBMetaImpl.h | 2 ++ cpp/src/db/LocalMetaImpl.cpp | 5 +++++ cpp/src/db/LocalMetaImpl.h | 2 ++ cpp/src/db/Meta.h | 2 ++ 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 172e7f42f3..a4c3f5d77e 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -476,20 +476,11 @@ Status DBMetaImpl::archive_files() { } if (criteria == "disk") { size_t G = 1024*1024*1024UL; - long unsigned int sum = 0; - try { - auto sum_c = ConnectorPtr->sum( - &GroupFileSchema::rows, - where( - c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE - )); - sum = *sum_c; - } catch (std::exception & e) { - LOG(DEBUG) << e.what(); - throw e; - } + long sum = 0; + size(sum); + // PXU TODO: refactor rows - auto to_delete = sum - limit*G/sizeof(float); + auto to_delete = (sum - limit*G)/sizeof(float); discard_files_of_size(to_delete); } } @@ -497,6 +488,22 @@ Status DBMetaImpl::archive_files() { return Status::OK(); } +Status DBMetaImpl::size(long& result) { + try { + auto sum_c = ConnectorPtr->sum( + &GroupFileSchema::rows, + where( + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + result = *sum_c*sizeof(float); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + + return Status::OK(); +} + Status DBMetaImpl::discard_files_of_size(long to_discard_size) { LOG(DEBUG) << "Abort to discard size=" << to_discard_size; if (to_discard_size <= 0) { diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index 6433361d7a..20f91b8482 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -52,6 +52,8 @@ public: virtual Status archive_files() override; + virtual Status size(long& result) override; + virtual Status cleanup() override; virtual Status cleanup_ttl_files(uint16_t seconds) override; diff --git a/cpp/src/db/LocalMetaImpl.cpp b/cpp/src/db/LocalMetaImpl.cpp index 2ec5c08ce4..aa852a3db6 100644 --- a/cpp/src/db/LocalMetaImpl.cpp +++ b/cpp/src/db/LocalMetaImpl.cpp @@ -261,6 +261,11 @@ Status LocalMetaImpl::drop_all() { return Status::OK(); } +Status LocalMetaImpl::size(long& result) { + // PXU TODO + return Status::OK(); +} + Status LocalMetaImpl::count(const std::string& group_id, long& result) { // PXU TODO return Status::OK(); diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 71927e8425..fb989d5f67 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -55,6 +55,8 @@ public: virtual Status drop_all() override; + virtual Status size(long& result) override; + private: Status GetGroupMetaInfoByPath(const std::string& path, GroupSchema& group_info); diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 74e1637f77..e6150cea11 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -93,6 +93,8 @@ public: virtual Status files_to_merge(const std::string& group_id, DatePartionedGroupFilesSchema& files) = 0; + virtual Status size(long& result) = 0; + virtual Status archive_files() = 0; virtual Status files_to_index(GroupFilesSchema&) = 0; From c30a6d41308bd7336d3d52e77bf0fe01749badff Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 18:15:50 +0800 Subject: [PATCH 39/56] feat(db): add size api for db Former-commit-id: 2fa9614c86fab66267689a06f73c0bef15dcffe8 --- cpp/src/db/DB.h | 2 ++ cpp/src/db/DBImpl.cpp | 5 +++++ cpp/src/db/DBImpl.h | 2 ++ 3 files changed, 9 insertions(+) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 6f922475b5..7d976ad824 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -39,6 +39,8 @@ public: virtual Status search(const std::string& group_id, size_t k, size_t nq, const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0; + virtual Status size(long& result) = 0; + virtual Status drop_all() = 0; virtual Status count(const std::string& group_id, long& result) = 0; diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index eee492abbd..971e6daff6 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -427,6 +427,11 @@ Status DBImpl::count(const std::string& group_id, long& result) { return _pMeta->count(group_id, result); } +template +Status DBImpl::size(long& result) { + return _pMeta->size(result); +} + template DBImpl::~DBImpl() { { diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 45f249bd06..54c22eb48b 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -55,6 +55,8 @@ public: virtual Status count(const std::string& group_id, long& result) override; + virtual Status size(long& result) override; + virtual ~DBImpl(); private: From 3f28d33b6ec6fef20dd7fb0b3d8ed3cccc33041b Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 18:16:27 +0800 Subject: [PATCH 40/56] test(db): add test for overall archive Former-commit-id: 34b7aa89adc45c2505619b902e7701ce7df4e91a --- cpp/unittest/db/db_tests.cpp | 46 ++++++++++++++++++++++++++++++++++++ cpp/unittest/db/utils.cpp | 17 ++++++++++--- cpp/unittest/db/utils.h | 6 +++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8030e574e2..5a6ea703aa 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -9,6 +9,7 @@ #include "utils.h" #include "db/DB.h" +#include "db/DBImpl.h" using namespace zilliz::vecwise; @@ -57,6 +58,51 @@ TEST_F(DBTest, CONFIG_TEST) { } } +TEST_F(DBTest2, ARHIVE_DISK_CHECK) { + + static const std::string group_name = "test_group"; + static const int group_dim = 256; + + engine::meta::GroupSchema group_info; + group_info.dimension = group_dim; + group_info.group_id = group_name; + engine::Status stat = db_->add_group(group_info); + + engine::meta::GroupSchema group_info_get; + group_info_get.group_id = group_name; + stat = db_->get_group(group_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(group_info_get.dimension, group_dim); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + int d = 256; + int nb = 30; + float *xb = new float[d * nb]; + for(int i = 0; i < nb; i++) { + for(int j = 0; j < d; j++) xb[d * i + j] = drand48(); + xb[d * i] += i / 2000.; + } + + int loop = 100000; + + for (auto i=0; iadd_vectors(group_name, nb, xb, vector_ids); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + std::this_thread::sleep_for(std::chrono::seconds(1)); + + long size; + db_->size(size); + /* LOG(DEBUG) << "size=" << size; */ + ASSERT_TRUE(size < 2UL*1024*1024*1024); + + delete [] xb; +}; + + TEST_F(DBTest, DB_TEST) { static const std::string group_name = "test_group"; diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 7188e89107..9762668622 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -29,19 +29,30 @@ void DBTest::InitLog() { el::Loggers::reconfigureLogger("default", defaultConf); } -void DBTest::SetUp() { - InitLog(); +engine::Options DBTest::GetOptions() { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/vecwise_test"; + return options; +} + +void DBTest::SetUp() { + InitLog(); + auto options = GetOptions(); db_ = engine::DBFactory::Build(options, "Faiss,IDMap"); } void DBTest::TearDown() { delete db_; - auto options = engine::OptionsFactory::Build(); boost::filesystem::remove_all("/tmp/vecwise_test"); } +engine::Options DBTest2::GetOptions() { + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/vecwise_test"; + options.meta.archive_conf = engine::ArchiveConf("delete", "disk:2"); + return options; +} + void MetaTest::SetUp() { InitLog(); impl_ = engine::DBMetaImplFactory::Build(); diff --git a/cpp/unittest/db/utils.h b/cpp/unittest/db/utils.h index 21823ffad3..456b54ffcc 100644 --- a/cpp/unittest/db/utils.h +++ b/cpp/unittest/db/utils.h @@ -39,6 +39,12 @@ protected: void InitLog(); virtual void SetUp() override; virtual void TearDown() override; + virtual zilliz::vecwise::engine::Options GetOptions(); +}; + +class DBTest2 : public DBTest { +protected: + virtual zilliz::vecwise::engine::Options GetOptions() override; }; From b2f079ed676cdf22cf207103ce54fe94fd82e319 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 11:13:34 +0800 Subject: [PATCH 41/56] fix(db): fix size api for db meta Former-commit-id: 88add7eef97827063e8e87f0d84aec62e66f7ca6 --- cpp/src/db/DBMetaImpl.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index a4c3f5d77e..2b997d1043 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -489,13 +489,19 @@ Status DBMetaImpl::archive_files() { } Status DBMetaImpl::size(long& result) { + result = 0; try { - auto sum_c = ConnectorPtr->sum( - &GroupFileSchema::rows, + auto selected = ConnectorPtr->select(columns(sum(&GroupFileSchema::rows)), where( c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); - result = *sum_c*sizeof(float); + + for (auto& sub_query : selected) { + if(!std::get<0>(sub_query)) { + continue; + } + result += (long)(*std::get<0>(sub_query))*sizeof(float); + } } catch (std::exception & e) { LOG(DEBUG) << e.what(); throw e; From f9889598d7ac0e9db8116a26a4852ebf3b92bd1f Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 11:14:17 +0800 Subject: [PATCH 42/56] test(db): check size Former-commit-id: 115707db134408a52f14e95d25c5d434c05d1c9f --- cpp/unittest/db/db_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 5a6ea703aa..90d35ee8f9 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -62,6 +62,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { static const std::string group_name = "test_group"; static const int group_dim = 256; + long size; engine::meta::GroupSchema group_info; group_info.dimension = group_dim; @@ -77,6 +78,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { engine::IDNumbers vector_ids; engine::IDNumbers target_ids; + db_->size(size); int d = 256; int nb = 30; float *xb = new float[d * nb]; @@ -94,7 +96,6 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { std::this_thread::sleep_for(std::chrono::seconds(1)); - long size; db_->size(size); /* LOG(DEBUG) << "size=" << size; */ ASSERT_TRUE(size < 2UL*1024*1024*1024); From 767e63bd0009efbd6c8c226493d93d1d07c5a394 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 11:51:09 +0800 Subject: [PATCH 43/56] refactor(db): add MetaTypes.h Former-commit-id: 2e6e61b9cf0db9728365c904fc2f491962ea5319 --- cpp/src/db/Meta.h | 44 +------------------------------- cpp/src/db/MetaTypes.h | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 43 deletions(-) create mode 100644 cpp/src/db/MetaTypes.h diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index e6150cea11..a4bbb23380 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -4,14 +4,11 @@ * Proprietary and confidential. ******************************************************************************/ #pragma once - -#include #include -#include -#include #include #include +#include "MetaTypes.h" #include "Options.h" #include "Status.h" @@ -20,46 +17,7 @@ namespace vecwise { namespace engine { namespace meta { -typedef int DateT; -const DateT EmptyDate = -1; -typedef std::vector DatesT; -struct GroupSchema { - size_t id; - std::string group_id; - size_t files_cnt = 0; - uint16_t dimension; - std::string location = ""; - long created_on; -}; // GroupSchema - - -struct GroupFileSchema { - typedef enum { - NEW, - RAW, - TO_INDEX, - INDEX, - TO_DELETE, - } FILE_TYPE; - - size_t id; - std::string group_id; - std::string file_id; - int file_type = NEW; - size_t rows; - DateT date = EmptyDate; - uint16_t dimension; - std::string location = ""; - long updated_time; - long created_on; -}; // GroupFileSchema - -typedef std::vector GroupFilesSchema; -typedef std::map DatePartionedGroupFilesSchema; - - -class Meta; class Meta { public: typedef std::shared_ptr Ptr; diff --git a/cpp/src/db/MetaTypes.h b/cpp/src/db/MetaTypes.h new file mode 100644 index 0000000000..fd2038eeb6 --- /dev/null +++ b/cpp/src/db/MetaTypes.h @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include +#include +#include + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace meta { + +typedef int DateT; +const DateT EmptyDate = -1; +typedef std::vector DatesT; + +struct GroupSchema { + size_t id; + std::string group_id; + size_t files_cnt = 0; + uint16_t dimension; + std::string location = ""; + long created_on; +}; // GroupSchema + +struct GroupFileSchema { + typedef enum { + NEW, + RAW, + TO_INDEX, + INDEX, + TO_DELETE, + } FILE_TYPE; + + size_t id; + std::string group_id; + std::string file_id; + int file_type = NEW; + size_t rows; + DateT date = EmptyDate; + uint16_t dimension; + std::string location = ""; + long updated_time; + long created_on; +}; // GroupFileSchema + +typedef std::vector GroupFilesSchema; +typedef std::map DatePartionedGroupFilesSchema; + +} // namespace meta +} // namespace engine +} // namespace vecwise +} // namespace zilliz From b96459cc8b1493e9e97aa69690fd684054f0e5b3 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 12:26:06 +0800 Subject: [PATCH 44/56] refactor(db): add MetaConsts Former-commit-id: 5db9dfba91c61274a8334e8daba1488fe5bead3e --- cpp/src/db/DBMetaImpl.cpp | 16 +++++----------- cpp/src/db/MetaConsts.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 cpp/src/db/MetaConsts.h diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 2b997d1043..d8e4d9bfd9 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -12,9 +12,11 @@ #include #include #include + #include "DBMetaImpl.h" #include "IDGenerator.h" #include "Utils.h" +#include "MetaConsts.h" namespace zilliz { namespace vecwise { @@ -44,16 +46,9 @@ inline auto StoragePrototype(const std::string& path) { } -using ConnectorT = decltype(StoragePrototype("/tmp/dummy.sqlite3")); +using ConnectorT = decltype(StoragePrototype("")); static std::unique_ptr ConnectorPtr; -long GetFileSize(const std::string& filename) -{ - struct stat stat_buf; - int rc = stat(filename.c_str(), &stat_buf); - return rc == 0 ? stat_buf.st_size : -1; -} - std::string DBMetaImpl::GetGroupPath(const std::string& group_id) { return _options.path + "/" + group_id; } @@ -457,7 +452,7 @@ Status DBMetaImpl::archive_files() { auto& criteria = kv.first; auto& limit = kv.second; if (criteria == "days") { - long usecs = 3600*24*limit*1000000UL; + long usecs = limit * D_SEC * US_PS; long now = utils::GetMicroSecTimeStamp(); try { @@ -475,7 +470,6 @@ Status DBMetaImpl::archive_files() { } } if (criteria == "disk") { - size_t G = 1024*1024*1024UL; long sum = 0; size(sum); @@ -595,7 +589,7 @@ Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { &GroupFileSchema::rows, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_DELETE and - c(&GroupFileSchema::updated_time) > now - 1000000*seconds)); + c(&GroupFileSchema::updated_time) > now - seconds*US_PS)); GroupFilesSchema updated; diff --git a/cpp/src/db/MetaConsts.h b/cpp/src/db/MetaConsts.h new file mode 100644 index 0000000000..e4247510c6 --- /dev/null +++ b/cpp/src/db/MetaConsts.h @@ -0,0 +1,32 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace meta { + +const size_t K = 1024UL; +const size_t M = K*K; +const size_t G = K*M; +const size_t T = K*G; + +const size_t S_PS = 1UL; +const size_t MS_PS = 1000*S_PS; +const size_t US_PS = 1000*MS_PS; +const size_t NS_PS = 1000*US_PS; + +const size_t SECOND = 1UL; +const size_t M_SEC = 60*SECOND; +const size_t H_SEC = 60*M_SEC; +const size_t D_SEC = 24*H_SEC; +const size_t W_SEC = 7*D_SEC; + +} // namespace meta +} // namespace engine +} // namespace vecwise +} // namespace zilliz From e09240e45fbada05c1a73241a48a930dc034e985 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 12:26:23 +0800 Subject: [PATCH 45/56] test(db): refactor with meta consts Former-commit-id: a611af0886bdeb51b4a38da2e80b84a4fd13d0cc --- cpp/unittest/db/db_tests.cpp | 5 +++-- cpp/unittest/db/meta_tests.cpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 90d35ee8f9..7fd5ed8bc5 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -10,6 +10,7 @@ #include "utils.h" #include "db/DB.h" #include "db/DBImpl.h" +#include "db/MetaConsts.h" using namespace zilliz::vecwise; @@ -98,7 +99,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { db_->size(size); /* LOG(DEBUG) << "size=" << size; */ - ASSERT_TRUE(size < 2UL*1024*1024*1024); + ASSERT_TRUE(size < 2 * engine::meta::G); delete [] xb; }; @@ -155,7 +156,7 @@ TEST_F(DBTest, DB_TEST) { START_TIMER; stat = db_->search(group_name, k, qb, qxb, results); - ss << "Search " << j << " With Size " << (float)(count*group_dim*sizeof(float))/(1024*1024) << " M"; + ss << "Search " << j << " With Size " << (float)(count*group_dim*sizeof(float))/engine::meta::M << " M"; STOP_TIMER(ss.str()); ASSERT_STATS(stat); diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 1347fcf755..3c8f1802c1 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -13,6 +13,7 @@ #include "db/DBMetaImpl.h" #include "db/Factories.h" #include "db/Utils.h" +#include "db/MetaConsts.h" using namespace zilliz::vecwise::engine; @@ -116,7 +117,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DAYS) { status = impl.add_group_file(group_file); group_file.file_type = meta::GroupFileSchema::NEW; int day = rand() % (days_num*2); - group_file.created_on = ts - day*24*3600*1000000UL - 10000; + group_file.created_on = ts - day*meta::D_SEC*meta::US_PS - 10000; status = impl.update_group_file(group_file); files.push_back(group_file); days.push_back(day); @@ -160,7 +161,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DISK) { for (auto i=0; i Date: Sun, 26 May 2019 13:02:02 +0800 Subject: [PATCH 46/56] refactor(db): change rows to size in group file schema Former-commit-id: 4386308bcfa1bd7136809c7e749e140b53517ca7 --- cpp/src/db/DBImpl.cpp | 4 +-- cpp/src/db/DBMetaImpl.cpp | 46 ++++++++++++++--------------- cpp/src/db/FaissExecutionEngine.cpp | 4 +-- cpp/src/db/MemManager.cpp | 9 +++--- cpp/src/db/MetaTypes.h | 2 +- cpp/src/db/Options.h | 2 +- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 971e6daff6..3673bc4ad3 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -292,7 +292,7 @@ Status DBImpl::merge_files(const std::string& group_id, const meta::Dat } else { group_file.file_type = meta::GroupFileSchema::RAW; } - group_file.rows = index_size; + group_file.size = index_size; updated.push_back(group_file); status = _pMeta->update_files(updated); LOG(DEBUG) << "New merged file " << group_file.file_id << @@ -353,7 +353,7 @@ Status DBImpl::build_index(const meta::GroupFileSchema& file) { auto index = to_index.BuildIndex(group_file.location); group_file.file_type = meta::GroupFileSchema::INDEX; - group_file.rows = index->Size(); + group_file.size = index->Size(); auto to_remove = file; to_remove.file_type = meta::GroupFileSchema::TO_DELETE; diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index d8e4d9bfd9..825d33032b 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -38,7 +38,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("group_id", &GroupFileSchema::group_id), make_column("file_id", &GroupFileSchema::file_id), make_column("file_type", &GroupFileSchema::file_type), - make_column("rows", &GroupFileSchema::rows, default_value(0)), + make_column("size", &GroupFileSchema::size, default_value(0)), make_column("updated_time", &GroupFileSchema::updated_time), make_column("created_on", &GroupFileSchema::created_on), make_column("date", &GroupFileSchema::date)) @@ -227,7 +227,7 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_type = GroupFileSchema::NEW; group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; - group_file.rows = 0; + group_file.size = 0; group_file.created_on = utils::GetMicroSecTimeStamp(); group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); @@ -263,7 +263,7 @@ Status DBMetaImpl::files_to_index(GroupFilesSchema& files) { &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_INDEX)); @@ -275,7 +275,7 @@ Status DBMetaImpl::files_to_index(GroupFilesSchema& files) { group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); GetGroupFilePath(group_file); auto groupItr = groups.find(group_file.group_id); @@ -311,7 +311,7 @@ Status DBMetaImpl::files_to_search(const std::string &group_id, &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::group_id) == group_id and in(&GroupFileSchema::date, dates) and @@ -332,7 +332,7 @@ Status DBMetaImpl::files_to_search(const std::string &group_id, group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); group_file.dimension = group_info.dimension; GetGroupFilePath(group_file); @@ -359,7 +359,7 @@ Status DBMetaImpl::files_to_merge(const std::string& group_id, &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::RAW and c(&GroupFileSchema::group_id) == group_id)); @@ -377,7 +377,7 @@ Status DBMetaImpl::files_to_merge(const std::string& group_id, group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); group_file.dimension = group_info.dimension; GetGroupFilePath(group_file); @@ -410,7 +410,7 @@ Status DBMetaImpl::get_group_file(const std::string& group_id_, &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_id) == file_id_ and c(&GroupFileSchema::group_id) == group_id_ @@ -421,7 +421,7 @@ Status DBMetaImpl::get_group_file(const std::string& group_id_, group_file_info_.group_id = std::get<1>(files[0]); group_file_info_.file_id = std::get<2>(files[0]); group_file_info_.file_type = std::get<3>(files[0]); - group_file_info_.rows = std::get<4>(files[0]); + group_file_info_.size = std::get<4>(files[0]); group_file_info_.date = std::get<5>(files[0]); } else { return Status::NotFound("GroupFile " + file_id_ + " not found"); @@ -473,8 +473,8 @@ Status DBMetaImpl::archive_files() { long sum = 0; size(sum); - // PXU TODO: refactor rows - auto to_delete = (sum - limit*G)/sizeof(float); + // PXU TODO: refactor size + auto to_delete = (sum - limit*G); discard_files_of_size(to_delete); } } @@ -485,7 +485,7 @@ Status DBMetaImpl::archive_files() { Status DBMetaImpl::size(long& result) { result = 0; try { - auto selected = ConnectorPtr->select(columns(sum(&GroupFileSchema::rows)), + auto selected = ConnectorPtr->select(columns(sum(&GroupFileSchema::size)), where( c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); @@ -494,7 +494,7 @@ Status DBMetaImpl::size(long& result) { if(!std::get<0>(sub_query)) { continue; } - result += (long)(*std::get<0>(sub_query))*sizeof(float); + result += (long)(*std::get<0>(sub_query)); } } catch (std::exception & e) { LOG(DEBUG) << e.what(); @@ -511,7 +511,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, - &GroupFileSchema::rows), + &GroupFileSchema::size), where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), order_by(&GroupFileSchema::id), limit(10)); @@ -521,10 +521,10 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { if (to_discard_size <= 0) break; GroupFileSchema group_file; group_file.id = std::get<0>(file); - group_file.rows = std::get<1>(file); + group_file.size = std::get<1>(file); ids.push_back(group_file.id); - LOG(DEBUG) << "Discard group_file.id=" << group_file.id << " group_file.rows=" << group_file.rows; - to_discard_size -= group_file.rows; + LOG(DEBUG) << "Discard group_file.id=" << group_file.id << " group_file.size=" << group_file.size; + to_discard_size -= group_file.size; } if (ids.size() == 0) { @@ -586,7 +586,7 @@ Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_DELETE and c(&GroupFileSchema::updated_time) > now - seconds*US_PS)); @@ -599,7 +599,7 @@ Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); GetGroupFilePath(group_file); if (group_file.file_type == GroupFileSchema::TO_DELETE) { @@ -622,7 +622,7 @@ Status DBMetaImpl::cleanup() { &GroupFileSchema::group_id, &GroupFileSchema::file_id, &GroupFileSchema::file_type, - &GroupFileSchema::rows, + &GroupFileSchema::size, &GroupFileSchema::date), where(c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_DELETE or c(&GroupFileSchema::file_type) == (int)GroupFileSchema::NEW)); @@ -635,7 +635,7 @@ Status DBMetaImpl::cleanup() { group_file.group_id = std::get<1>(file); group_file.file_id = std::get<2>(file); group_file.file_type = std::get<3>(file); - group_file.rows = std::get<4>(file); + group_file.size = std::get<4>(file); group_file.date = std::get<5>(file); GetGroupFilePath(group_file); if (group_file.file_type == GroupFileSchema::TO_DELETE) { @@ -655,7 +655,7 @@ Status DBMetaImpl::cleanup() { Status DBMetaImpl::count(const std::string& group_id, long& result) { try { - auto selected = ConnectorPtr->select(columns(&GroupFileSchema::rows, + auto selected = ConnectorPtr->select(columns(&GroupFileSchema::size, &GroupFileSchema::date), where((c(&GroupFileSchema::file_type) == (int)GroupFileSchema::RAW or c(&GroupFileSchema::file_type) == (int)GroupFileSchema::TO_INDEX or diff --git a/cpp/src/db/FaissExecutionEngine.cpp b/cpp/src/db/FaissExecutionEngine.cpp index 605b979481..c2165c948d 100644 --- a/cpp/src/db/FaissExecutionEngine.cpp +++ b/cpp/src/db/FaissExecutionEngine.cpp @@ -47,12 +47,12 @@ size_t FaissExecutionEngine::Count() const { template size_t FaissExecutionEngine::Size() const { - return (size_t)(Count() * pIndex_->d); + return (size_t)(Count() * pIndex_->d)*sizeof(float); } template size_t FaissExecutionEngine::PhysicalSize() const { - return (size_t)(Size()*sizeof(float)); + return (size_t)(Count() * pIndex_->d)*sizeof(float); } template diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index ede2a64522..601146ba66 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -13,6 +13,7 @@ #include "MemManager.h" #include "Meta.h" +#include "MetaConsts.h" namespace zilliz { @@ -48,16 +49,16 @@ size_t MemVectors::approximate_size() const { template Status MemVectors::serialize(std::string& group_id) { group_id = schema_.group_id; - auto rows = approximate_size(); + auto size = approximate_size(); pEE_->Serialize(); - schema_.rows = rows; - schema_.file_type = (rows >= options_.index_trigger_size) ? + schema_.size = size; + schema_.file_type = (size >= options_.index_trigger_size) ? meta::GroupFileSchema::TO_INDEX : meta::GroupFileSchema::RAW; auto status = pMeta_->update_group_file(schema_); LOG(DEBUG) << "New " << ((schema_.file_type == meta::GroupFileSchema::RAW) ? "raw" : "to_index") - << " file " << schema_.file_id << " of size " << pEE_->PhysicalSize() / (1024*1024) << " M"; + << " file " << schema_.file_id << " of size " << pEE_->Size() / meta::M << " M"; pEE_->Cache(); diff --git a/cpp/src/db/MetaTypes.h b/cpp/src/db/MetaTypes.h index fd2038eeb6..4b956590ae 100644 --- a/cpp/src/db/MetaTypes.h +++ b/cpp/src/db/MetaTypes.h @@ -40,7 +40,7 @@ struct GroupFileSchema { std::string group_id; std::string file_id; int file_type = NEW; - size_t rows; + size_t size; DateT date = EmptyDate; uint16_t dimension; std::string location = ""; diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 26608ab976..100ab7b3ab 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -42,7 +42,7 @@ struct Options { Options(); uint16_t memory_sync_interval = 1; uint16_t merge_trigger_number = 2; - size_t index_trigger_size = 1024*1024*256; + size_t index_trigger_size = 1024*1024*1024; Env* env; DBMetaOptions meta; }; // Options From 92da0fa4ca44d88c5d6ce45c80a888e100ffb1a9 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 13:02:22 +0800 Subject: [PATCH 47/56] refactor(db): for schema changes Former-commit-id: 3d1aa65da396177ff49e1922ae032cec834f7850 --- cpp/unittest/db/meta_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 3c8f1802c1..3832a3263d 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -143,7 +143,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DAYS) { TEST_F(MetaTest, ARCHIVE_TEST_DISK) { DBMetaOptions options; options.path = "/tmp/vecwise_test"; - options.archive_conf = ArchiveConf("delete", "disk:41"); + options.archive_conf = ArchiveConf("delete", "disk:11"); auto impl = meta::DBMetaImpl(options); auto group_id = "meta_test_group"; @@ -161,7 +161,7 @@ TEST_F(MetaTest, ARCHIVE_TEST_DISK) { for (auto i=0; i Date: Sun, 26 May 2019 13:11:48 +0800 Subject: [PATCH 48/56] feat(db): move tables under tables directories to avoid name conflict with reserved meta files Former-commit-id: de11db98c00ee071c8578a41c2e9ff673509fe63 --- cpp/src/db/DBMetaImpl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 825d33032b..aee1aa3f20 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -50,7 +50,7 @@ using ConnectorT = decltype(StoragePrototype("")); static std::unique_ptr ConnectorPtr; std::string DBMetaImpl::GetGroupPath(const std::string& group_id) { - return _options.path + "/" + group_id; + return _options.path + "/tables/" + group_id; } std::string DBMetaImpl::GetGroupDatePartitionPath(const std::string& group_id, DateT& date) { @@ -156,7 +156,7 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { auto group_path = GetGroupPath(group_info.group_id); if (!boost::filesystem::is_directory(group_path)) { - auto ret = boost::filesystem::create_directory(group_path); + auto ret = boost::filesystem::create_directories(group_path); if (!ret) { LOG(ERROR) << "Create directory " << group_path << " Error"; } From 2d08fc415fa1af460d7f88ddce2e53dba8d25787 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 13:24:02 +0800 Subject: [PATCH 49/56] refactor(db): add NextGroupId in DBMetaImpl Former-commit-id: 56deb4603db20ac4ef677f1851a1e76e703a234f --- cpp/src/db/DBMetaImpl.cpp | 14 +++++++++----- cpp/src/db/DBMetaImpl.h | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index aee1aa3f20..08ed0fda93 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -69,6 +69,14 @@ void DBMetaImpl::GetGroupFilePath(GroupFileSchema& group_file) { group_file.location = ss.str(); } +Status DBMetaImpl::NextGroupId(std::string& group_id) { + std::stringstream ss; + SimpleIDGenerator g; + ss << g.getNextIDNumber(); + group_id = ss.str(); + return Status::OK(); +} + DBMetaImpl::DBMetaImpl(const DBMetaOptions& options_) : _options(options_) { initialize(); @@ -134,10 +142,7 @@ Status DBMetaImpl::delete_group_partitions(const std::string& group_id, Status DBMetaImpl::add_group(GroupSchema& group_info) { if (group_info.group_id == "") { - std::stringstream ss; - SimpleIDGenerator g; - ss << g.getNextIDNumber(); - group_info.group_id = ss.str(); + NextGroupId(group_info.group_id); } group_info.files_cnt = 0; group_info.id = -1; @@ -147,7 +152,6 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { try { auto id = ConnectorPtr->insert(group_info); group_info.id = id; - /* LOG(DEBUG) << "Add group " << id; */ } catch (...) { return Status::DBTransactionError("Add Group Error"); } diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index 20f91b8482..a957cf688c 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -65,7 +65,7 @@ public: virtual ~DBMetaImpl(); private: - + Status NextGroupId(std::string& group_id); Status discard_files_of_size(long to_discard_size); Status get_group_no_lock(GroupSchema& group_info); std::string GetGroupPath(const std::string& group_id); From 3958fe5a35c007e512ed4aefff6cd0ff81e3d977 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 15:17:46 +0800 Subject: [PATCH 50/56] refactor(db): add NextFileId Former-commit-id: 51a1f75881a8a1f85e3fdc8ac7217218976cc11c --- cpp/src/db/DBMetaImpl.cpp | 14 +++++++++----- cpp/src/db/DBMetaImpl.h | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 08ed0fda93..9b4e731f74 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -77,6 +77,14 @@ Status DBMetaImpl::NextGroupId(std::string& group_id) { return Status::OK(); } +Status DBMetaImpl::NextFileId(std::string& file_id) { + std::stringstream ss; + SimpleIDGenerator g; + ss << g.getNextIDNumber(); + file_id = ss.str(); + return Status::OK(); +} + DBMetaImpl::DBMetaImpl(const DBMetaOptions& options_) : _options(options_) { initialize(); @@ -225,11 +233,8 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { return status; } - SimpleIDGenerator g; - std::stringstream ss; - ss << g.getNextIDNumber(); + NextFileId(group_file.file_id); group_file.file_type = GroupFileSchema::NEW; - group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; group_file.size = 0; group_file.created_on = utils::GetMicroSecTimeStamp(); @@ -240,7 +245,6 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { try { auto id = ConnectorPtr->insert(group_file); group_file.id = id; - /* LOG(DEBUG) << "Add group_file of file_id=" << group_file.file_id; */ } catch (...) { return Status::DBTransactionError("Add file Error"); } diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index a957cf688c..6108860927 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -65,6 +65,7 @@ public: virtual ~DBMetaImpl(); private: + Status NextFileId(std::string& file_id); Status NextGroupId(std::string& group_id); Status discard_files_of_size(long to_discard_size); Status get_group_no_lock(GroupSchema& group_info); From ff3a89b99768da08977aa2163dd37178a2ea5369 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 16:50:56 +0800 Subject: [PATCH 51/56] feat(db): archive after every serliazation Former-commit-id: bf5191dc707e7336d64534110aa49a2d36da1a1e --- cpp/src/db/DBImpl.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 3673bc4ad3..4028a1bbbe 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -326,9 +326,7 @@ Status DBImpl::background_merge_files(const std::string& group_id) { merge_files(group_id, kv.first, kv.second); } - if (has_merge) { - _pMeta->archive_files(); - } + _pMeta->archive_files(); try_build_index(); From 74e6ef4a9c9935234b26636e83b6b6803e595768 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 16:51:30 +0800 Subject: [PATCH 52/56] test(db): fix for ARHIVE_DISK_CHECK Former-commit-id: 1a82a410db91e2b6f9ddc38bafc491a1204c0dd1 --- cpp/unittest/db/db_tests.cpp | 6 +++--- cpp/unittest/db/utils.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 7fd5ed8bc5..e30234fe38 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -81,7 +81,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { db_->size(size); int d = 256; - int nb = 30; + int nb = 20; float *xb = new float[d * nb]; for(int i = 0; i < nb; i++) { for(int j = 0; j < d; j++) xb[d * i + j] = drand48(); @@ -98,8 +98,8 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { std::this_thread::sleep_for(std::chrono::seconds(1)); db_->size(size); - /* LOG(DEBUG) << "size=" << size; */ - ASSERT_TRUE(size < 2 * engine::meta::G); + LOG(DEBUG) << "size=" << size; + ASSERT_TRUE(size < 1 * engine::meta::G); delete [] xb; }; diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 9762668622..09428427fe 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -49,7 +49,7 @@ void DBTest::TearDown() { engine::Options DBTest2::GetOptions() { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/vecwise_test"; - options.meta.archive_conf = engine::ArchiveConf("delete", "disk:2"); + options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1"); return options; } From 37fc30e439cdd6c2d8e051ae61311bc1f4c9ae64 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 19:35:30 +0800 Subject: [PATCH 53/56] env(all): update change log Former-commit-id: e9124716de75572eab1e027809b2b90a25ae127f --- cpp/CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index 2080fec550..263f4a787a 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -10,7 +10,9 @@ Please mark all change in change log and use the ticket from JIRA. ### New Feature +- MS-5 - Implement Auto Archive Feature + ### Task - MS-1 - Add CHANGELOG.md -- MS-4 - Refactor the vecwise_engine code structure \ No newline at end of file +- MS-4 - Refactor the vecwise_engine code structure From d744ef4a33ccfd1aa0404dda938d729d5a2f7817 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 27 May 2019 10:35:41 +0800 Subject: [PATCH 54/56] fix(db): replace locatime with locatime_r for thread safe concern Former-commit-id: 1b857e297c3b26e2730f86c67e8dc798755493fe --- cpp/src/db/Meta.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/src/db/Meta.cpp b/cpp/src/db/Meta.cpp index 31fdee74a4..3d272e0637 100644 --- a/cpp/src/db/Meta.cpp +++ b/cpp/src/db/Meta.cpp @@ -4,6 +4,7 @@ * Proprietary and confidential. ******************************************************************************/ #include +#include #include "Meta.h" namespace zilliz { @@ -12,23 +13,24 @@ namespace engine { namespace meta { DateT Meta::GetDate(const std::time_t& t, int day_delta) { - tm *ltm = std::localtime(&t); + struct tm ltm; + localtime_r(&t, <m); if (day_delta > 0) { do { - ++ltm->tm_mday; + ++ltm.tm_mday; --day_delta; } while(day_delta > 0); - mktime(ltm); + mktime(<m); } else if (day_delta < 0) { do { - --ltm->tm_mday; + --ltm.tm_mday; ++day_delta; } while(day_delta < 0); - mktime(ltm); + mktime(<m); } else { - ltm->tm_mday; + ltm.tm_mday; } - return ltm->tm_year*10000 + ltm->tm_mon*100 + ltm->tm_mday; + return ltm.tm_year*10000 + ltm.tm_mon*100 + ltm.tm_mday; } DateT Meta::GetDateWithDelta(int day_delta) { From 9dcc68b814730d9c9a9cc18209be4927e05c2b06 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 27 May 2019 14:58:38 +0800 Subject: [PATCH 55/56] feat(db): add exception Former-commit-id: b2a9deb7df0544a303858fff7f4d6eddf3e6b1f7 --- cpp/src/db/Exception.h | 54 ++++++++++++++++++++++++++++++++++++++++++ cpp/src/db/Options.cpp | 24 ++++++++++++------- 2 files changed, 69 insertions(+), 9 deletions(-) create mode 100644 cpp/src/db/Exception.h diff --git a/cpp/src/db/Exception.h b/cpp/src/db/Exception.h new file mode 100644 index 0000000000..a5b4b4c421 --- /dev/null +++ b/cpp/src/db/Exception.h @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + +#include +#include + +namespace zilliz { +namespace vecwise { +namespace engine { + +class Exception : public std::exception { +public: + Exception(const std::string& message) + : message_(message) { + } + + Exception() + : message_() { + } + + virtual const char* what() const throw() { + if (message_.empty()) { + return "Default Exception."; + } else { + return message_.c_str(); + } + } + + virtual ~Exception() throw() {}; + +protected: + + std::string message_; +}; + +class InvalidArgumentException : public Exception { +public: + InvalidArgumentException() : Exception("Invalid Argument"){}; + InvalidArgumentException(const std::string& message) : Exception(message) {}; +}; + +class OutOfRangeException : public Exception { +public: + OutOfRangeException() : Exception("Out Of Range"){}; + OutOfRangeException(const std::string& message) : Exception(message) {}; +}; + +} // namespace engine +} // namespace vecwise +} // namespace zilliz diff --git a/cpp/src/db/Options.cpp b/cpp/src/db/Options.cpp index 2a15f0722d..dfd6311b61 100644 --- a/cpp/src/db/Options.cpp +++ b/cpp/src/db/Options.cpp @@ -11,6 +11,7 @@ #include "Options.h" #include "Env.h" #include "DBMetaImpl.h" +#include "Exception.h" namespace zilliz { namespace vecwise { @@ -46,24 +47,29 @@ void ArchiveConf::ParseCritirias(const std::string& criterias) { LOG(WARNING) << "Invalid ArchiveConf Criterias: " << token << " Ignore!"; continue; } - auto value = std::stoi(kv[1]); - criterias_[kv[0]] = value; + try { + auto value = std::stoi(kv[1]); + criterias_[kv[0]] = value; + } + catch (std::out_of_range&){ + LOG(ERROR) << "Out of range: '" << kv[1] << "'"; + throw OutOfRangeException(); + } + catch (...){ + LOG(ERROR) << "Invalid argument: '" << kv[1] << "'"; + throw InvalidArgumentException(); + } } } void ArchiveConf::ParseType(const std::string& type) { if (type != "delete" && type != "swap") { - LOG(ERROR) << "Invalid Archive"; - assert(false); + LOG(ERROR) << "Invalid argument: type='" << type << "'"; + throw InvalidArgumentException(); } type_ = type; } -/* DBMetaOptions::DBMetaOptions(const std::string& dbpath, */ -/* const std::string& uri) */ -/* : path(dbpath), backend_uri(uri) { */ -/* } */ - } // namespace engine } // namespace vecwise } // namespace zilliz From c5b931d59e411c09084ddcb6dada4ab4cb489117 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 27 May 2019 14:59:00 +0800 Subject: [PATCH 56/56] test(db): fix test error after impl exception Former-commit-id: 71184ffe49ac0130d2002aef4f8f55c825414d0c --- cpp/unittest/db/db_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index e30234fe38..f8ced228e0 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -16,7 +16,8 @@ using namespace zilliz::vecwise; TEST_F(DBTest, CONFIG_TEST) { { - EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); + ASSERT_ANY_THROW(engine::ArchiveConf conf("wrong")); + /* EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); */ } { engine::ArchiveConf conf("delete");