From 5a406d6ea03031cc5af394ad3fcbd1a2b5c000ec Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 19 May 2019 21:50:24 +0800 Subject: [PATCH 01/22] feat(db): add simple delete partitions Former-commit-id: 61aeee06c7d928939e6d3d28013630464cf0e046 --- cpp/src/db/DB.h | 2 ++ cpp/src/db/DBImpl.cpp | 6 ++++++ cpp/src/db/DBImpl.h | 1 + cpp/src/db/DBMetaImpl.cpp | 38 ++++++++++++++++++++++++++++++++++ cpp/src/db/DBMetaImpl.h | 2 ++ cpp/src/db/LocalMetaImpl.h | 4 +++- cpp/src/db/Meta.cpp | 21 ++++++++++++++++--- cpp/src/db/Meta.h | 6 ++++-- cpp/unittest/db/meta_tests.cpp | 5 +++++ 9 files changed, 79 insertions(+), 6 deletions(-) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 450a980c4a..6f922475b5 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -23,6 +23,8 @@ public: virtual Status add_group(meta::GroupSchema& group_info_) = 0; virtual Status get_group(meta::GroupSchema& group_info_) = 0; + virtual Status delete_vectors(const std::string& group_id, + const meta::DatesT& dates) = 0; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) = 0; virtual Status get_group_files(const std::string& group_id_, const int date_delta_, diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 216a9b352d..72b053231b 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -44,6 +44,12 @@ Status DBImpl::get_group(meta::GroupSchema& group_info) { return _pMeta->get_group(group_info); } +template +Status DBImpl::delete_vectors(const std::string& group_id, + const meta::DatesT& dates) { + return _pMeta->delete_group_partitions(group_id, dates); +} + template Status DBImpl::has_group(const std::string& group_id_, bool& has_or_not_) { return _pMeta->has_group(group_id_, has_or_not_); diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index d2aed0af1d..45f249bd06 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -35,6 +35,7 @@ public: virtual Status add_group(meta::GroupSchema& group_info) override; virtual Status get_group(meta::GroupSchema& group_info) override; + virtual Status delete_vectors(const std::string& group_id, const meta::DatesT& dates) override; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; virtual Status get_group_files(const std::string& group_id_, diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index aaaaf21ce4..b15180817d 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -104,6 +104,44 @@ Status DBMetaImpl::initialize() { return Status::OK(); } +// PXU TODO: Temp solution. Will fix later +Status DBMetaImpl::delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) { + if (dates.size() == 0) { + return Status::OK(); + } + + GroupSchema group_info; + group_info.group_id = group_id; + auto status = get_group(group_info); + if (!status.ok()) { + return status; + } + + auto yesterday = GetDate(-2); + + for (auto& date : dates) { + if (date >= yesterday) { + return Status::Error("Could not delete partitions with 2 days"); + } + } + + try { + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + c(&GroupFileSchema::group_id) == group_id and + in(&GroupFileSchema::date, dates) + )); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + return Status::OK(); +} + Status DBMetaImpl::add_group(GroupSchema& group_info) { if (group_info.group_id == "") { std::stringstream ss; diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index aca0ec3141..fab2e1560a 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -24,6 +24,8 @@ public: virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; virtual Status add_group_file(GroupFileSchema& group_file_info) override; + virtual Status delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) override; virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 4c324c5796..116c9e4672 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -22,7 +22,9 @@ public: virtual Status get_group(GroupSchema& group_info_) override; virtual Status has_group(const std::string& group_id_, bool& has_or_not_) override; - virtual Status add_group_file(GroupFileSchema& group_file_info) = 0; + virtual Status add_group_file(GroupFileSchema& group_file_info) override; + /* virtual Status delete_group_partitions(const std::string& group_id, */ + /* const meta::DatesT& dates) override; */ virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, diff --git a/cpp/src/db/Meta.cpp b/cpp/src/db/Meta.cpp index 1b97c06c79..1bd3a20622 100644 --- a/cpp/src/db/Meta.cpp +++ b/cpp/src/db/Meta.cpp @@ -11,13 +11,28 @@ namespace vecwise { namespace engine { namespace meta { -DateT Meta::GetDate(const std::time_t& t) { +DateT Meta::GetDate(const std::time_t& t, int day_delta) { tm *ltm = std::localtime(&t); + if (day_delta > 0) { + do { + ++ltm->tm_mday; + --day_delta; + } while(day_delta > 0); + mktime(ltm); + } else if (day_delta < 0) { + do { + --ltm->tm_mday; + ++day_delta; + } while(day_delta < 0); + mktime(ltm); + } else { + ltm->tm_mday; + } return ltm->tm_year*10000 + ltm->tm_mon*100 + ltm->tm_mday; } -DateT Meta::GetDate() { - return GetDate(std::time(nullptr)); +DateT Meta::GetDate(int day_delta) { + return GetDate(std::time(nullptr), day_delta); } } // namespace meta diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index e0c1a84c76..ffc35b0606 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -67,6 +67,8 @@ public: virtual Status has_group(const std::string& group_id_, bool& has_or_not_) = 0; virtual Status add_group_file(GroupFileSchema& group_file_info) = 0; + virtual Status delete_group_partitions(const std::string& group_id, + const meta::DatesT& dates) = 0; virtual Status has_group_file(const std::string& group_id_, const std::string& file_id_, @@ -98,8 +100,8 @@ public: virtual Status count(const std::string& group_id, long& result) = 0; - static DateT GetDate(const std::time_t& t); - static DateT GetDate(); + static DateT GetDate(const std::time_t& t, int day_delta); + static DateT GetDate(int day_delta = 0); }; // MetaData diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 2ede539803..dc11feafad 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -59,6 +59,11 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(status.ok()); ASSERT_EQ(group_file.file_type, new_file_type); + meta::DatesT dates; + dates.push_back(meta::Meta::GetDate()); + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_FALSE(status.ok()); + /* group_file.file_type = meta::GroupFileSchema::NEW; */ /* status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); */ /* ASSERT_TRUE(status.ok()); */ From 36cc019f2b1905b95fa32a250effab77d9a4835b Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 10:47:59 +0800 Subject: [PATCH 02/22] fix(db): get group bug fix Former-commit-id: 26769b5f33531210ab4a7810a1dbd6ed78310583 --- cpp/src/db/DBMetaImpl.cpp | 2 +- cpp/src/db/Meta.cpp | 6 +++++- cpp/src/db/Meta.h | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index b15180817d..c48ed3c421 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -118,7 +118,7 @@ Status DBMetaImpl::delete_group_partitions(const std::string& group_id, return status; } - auto yesterday = GetDate(-2); + auto yesterday = GetDateWithDelta(-2); for (auto& date : dates) { if (date >= yesterday) { diff --git a/cpp/src/db/Meta.cpp b/cpp/src/db/Meta.cpp index 1bd3a20622..31fdee74a4 100644 --- a/cpp/src/db/Meta.cpp +++ b/cpp/src/db/Meta.cpp @@ -31,10 +31,14 @@ DateT Meta::GetDate(const std::time_t& t, int day_delta) { return ltm->tm_year*10000 + ltm->tm_mon*100 + ltm->tm_mday; } -DateT Meta::GetDate(int day_delta) { +DateT Meta::GetDateWithDelta(int day_delta) { return GetDate(std::time(nullptr), day_delta); } +DateT Meta::GetDate() { + return GetDate(std::time(nullptr), 0); +} + } // namespace meta } // namespace engine } // namespace vecwise diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index ffc35b0606..6d2abb8dee 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -100,8 +100,9 @@ public: virtual Status count(const std::string& group_id, long& result) = 0; - static DateT GetDate(const std::time_t& t, int day_delta); - static DateT GetDate(int day_delta = 0); + static DateT GetDate(const std::time_t& t, int day_delta = 0); + static DateT GetDate(); + static DateT GetDateWithDelta(int day_delta); }; // MetaData From 781683419863f9228642d94f91746ad280f9fbdc Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 11:33:10 +0800 Subject: [PATCH 03/22] feat(db): add get group file api Former-commit-id: f01dabd02174e69672a29e8dd5d27ec18f915089 --- cpp/src/db/DBMetaImpl.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index c48ed3c421..3552bf2050 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -118,7 +118,7 @@ Status DBMetaImpl::delete_group_partitions(const std::string& group_id, return status; } - auto yesterday = GetDateWithDelta(-2); + auto yesterday = GetDateWithDelta(-1); for (auto& date : dates) { if (date >= yesterday) { @@ -413,7 +413,32 @@ Status DBMetaImpl::has_group_file(const std::string& group_id_, Status DBMetaImpl::get_group_file(const std::string& group_id_, const std::string& file_id_, GroupFileSchema& group_file_info_) { - //PXU TODO + try { + auto files = ConnectorPtr->select(columns(&GroupFileSchema::id, + &GroupFileSchema::group_id, + &GroupFileSchema::file_id, + &GroupFileSchema::file_type, + &GroupFileSchema::rows, + &GroupFileSchema::date), + where(c(&GroupFileSchema::file_id) == file_id_ and + c(&GroupFileSchema::group_id) == group_id_ + )); + assert(files.size() <= 1); + if (files.size() == 1) { + group_file_info_.id = std::get<0>(files[0]); + group_file_info_.group_id = std::get<1>(files[0]); + group_file_info_.file_id = std::get<2>(files[0]); + group_file_info_.file_type = std::get<3>(files[0]); + group_file_info_.rows = std::get<4>(files[0]); + group_file_info_.date = std::get<5>(files[0]); + } else { + return Status::NotFound("GroupFile " + file_id_ + " not found"); + } + } catch (std::exception &e) { + LOG(DEBUG) << e.what(); + throw e; + } + return Status::OK(); } From 53452841d155fb22bfb933fdee3586a85a95813d Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Mon, 20 May 2019 11:33:26 +0800 Subject: [PATCH 04/22] test(db): test delete group partitions Former-commit-id: b8575a928d8daaf91b3420e03cf382566eb841dd --- cpp/unittest/db/meta_tests.cpp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index dc11feafad..83177fea19 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -64,10 +64,26 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { status = impl_->delete_group_partitions(group_file.group_id, dates); ASSERT_FALSE(status.ok()); - /* group_file.file_type = meta::GroupFileSchema::NEW; */ - /* status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); */ - /* ASSERT_TRUE(status.ok()); */ - /* ASSERT_EQ(group_file.file_type, new_file_type); */ + dates.clear(); + for (auto i=2; i < 10; ++i) { + dates.push_back(meta::Meta::GetDateWithDelta(-1*i)); + } + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_TRUE(status.ok()); + + group_file.date = meta::Meta::GetDateWithDelta(-2); + status = impl_->update_group_file(group_file); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(group_file.date, meta::Meta::GetDateWithDelta(-2)); + ASSERT_FALSE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); + + dates.clear(); + dates.push_back(group_file.date); + status = impl_->delete_group_partitions(group_file.group_id, dates); + ASSERT_TRUE(status.ok()); + status = impl_->get_group_file(group_file.group_id, group_file.file_id, group_file); + ASSERT_TRUE(status.ok()); + ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } TEST_F(MetaTest, GROUP_FILES_TEST) { From 27b2f8efbaf4595b9b9f84c69cd3d7ad21cda274 Mon Sep 17 00:00:00 2001 From: groot Date: Thu, 23 May 2019 10:46:20 +0800 Subject: [PATCH 05/22] prepare for gpu index Former-commit-id: c8f58a927afbb53934ec326bcdaa48267bec68c0 --- .gitignore | 4 ---- cpp/.gitignore | 6 ++++++ cpp/conf/server_config_template.yaml | 19 +++++++++++++++++++ cpp/src/CMakeLists.txt | 1 + 4 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 cpp/.gitignore create mode 100644 cpp/conf/server_config_template.yaml diff --git a/.gitignore b/.gitignore index d239dd72a6..c0b8dbb2d7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,3 @@ cmake_build *.lo *.tar.gz *.log - -cpp/third_party/thrift-0.12.0/ -cpp/third_party/faiss-1.5.1 -cpp/megasearch/ diff --git a/cpp/.gitignore b/cpp/.gitignore new file mode 100644 index 0000000000..e99e0273f3 --- /dev/null +++ b/cpp/.gitignore @@ -0,0 +1,6 @@ +third_party/thrift-0.12.0/ +third_party/faiss-1.5.1/ +third_party/bzip2-1.0.6/ +third_party/sqlite3/ +megasearch/ +conf/server_config.yaml diff --git a/cpp/conf/server_config_template.yaml b/cpp/conf/server_config_template.yaml new file mode 100644 index 0000000000..fb6f6beae2 --- /dev/null +++ b/cpp/conf/server_config_template.yaml @@ -0,0 +1,19 @@ +server_config: + address: 0.0.0.0 + port: 33001 + transfer_protocol: json #optional: binary, compact, json, debug + server_mode: thread_pool #optional: simple, thread_pool + gpu_index: 0 #which gpu to be used + +db_config: + db_path: /tmp/vecwise + db_backend_url: http://127.0.0.1 + db_flush_interval: 5 #unit: second + idmapper_max_open_file: 128 + +license_config: + license_path: "/tmp/system.license" + +cache_config: + cpu_cache_capacity: 16 # unit: GB + gpu_cache_capacity: 2 # unit: GB \ No newline at end of file diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index d7978db37d..a3d9effb46 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -123,6 +123,7 @@ if (ENABLE_LICENSE STREQUAL "ON") add_executable(get_sys_info ${get_sys_info_src}) target_link_libraries(get_sys_info ${license_libs} vecwise_license) target_link_libraries(license_generator ${license_libs}) + install(TARGETS get_sys_info DESTINATION bin) endif () install(TARGETS vecwise_server DESTINATION bin) \ No newline at end of file From d07379e51eea1c833d05f02d28a1a148bf3d0722 Mon Sep 17 00:00:00 2001 From: groot Date: Thu, 23 May 2019 11:01:11 +0800 Subject: [PATCH 06/22] prepare for gpu index Former-commit-id: 3ee5d9b3575407dbc8226cc0e4df634386cdf61f --- cpp/conf/server_config.yaml | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/cpp/conf/server_config.yaml b/cpp/conf/server_config.yaml index 40d10d0a5a..fb6f6beae2 100644 --- a/cpp/conf/server_config.yaml +++ b/cpp/conf/server_config.yaml @@ -1,45 +1,18 @@ server_config: address: 0.0.0.0 port: 33001 - transfer_protocol: json #optional: binary, compact, json, debug + transfer_protocol: json #optional: binary, compact, json, debug server_mode: thread_pool #optional: simple, thread_pool + gpu_index: 0 #which gpu to be used db_config: db_path: /tmp/vecwise db_backend_url: http://127.0.0.1 - db_flush_interval: 5 #unit: second + db_flush_interval: 5 #unit: second idmapper_max_open_file: 128 license_config: - license_path: "/home/jinhai/Documents/development/vecwise_engine/license/system.license" - -log_config: - global: - format: "%datetime | %level | %logger | %msg" - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-global.log" - enabled: true - to_file: true - to_standard_output: true - subsecond_precision: 3 - performance_tracking: false - max_log_file_size: 2097152 # throw log files away after 2mb - debug: - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-debug.log" - enabled: true - warning: - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-warning.log" - trace: - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-trace.log" - verbose: - format: "%datetime{%d/%m/%y} | %level-%vlevel | %msg" - to_file: false - to_standard_output: true - error: - enabled: false - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-error.log" - fatal: - enabled: false - filename: "/tmp/vecwise/logs/vecwise_engine-%datetime{%h:%m}-fatal.log" + license_path: "/tmp/system.license" cache_config: cpu_cache_capacity: 16 # unit: GB From bbeb77dcae3f08be3a715ba3f4c9dd8565ff0bf7 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 18:58:55 +0800 Subject: [PATCH 07/22] feat(db): add Archive conf in options Former-commit-id: fdae1cf2a4f86d12753ce6c4cc665178df0c1f76 --- cpp/src/db/Options.cpp | 44 +++++++++++++++++++++++++++++++++++ cpp/src/db/Options.h | 17 ++++++++++++++ cpp/unittest/db/db_tests.cpp | 45 ++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/cpp/src/db/Options.cpp b/cpp/src/db/Options.cpp index 2a0c01af8b..2a15f0722d 100644 --- a/cpp/src/db/Options.cpp +++ b/cpp/src/db/Options.cpp @@ -3,6 +3,11 @@ * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. ******************************************************************************/ +#include +#include +#include +#include + #include "Options.h" #include "Env.h" #include "DBMetaImpl.h" @@ -15,6 +20,45 @@ Options::Options() : env(Env::Default()) { } +ArchiveConf::ArchiveConf(const std::string& type, const std::string& criterias) { + ParseType(type); + ParseCritirias(criterias); +} + +void ArchiveConf::ParseCritirias(const std::string& criterias) { + std::stringstream ss(criterias); + std::vector tokens; + + boost::algorithm::split(tokens, criterias, boost::is_any_of(";")); + + if (tokens.size() == 0) { + return; + } + + for (auto& token : tokens) { + std::vector kv; + boost::algorithm::split(kv, token, boost::is_any_of(":")); + if (kv.size() != 2) { + LOG(WARNING) << "Invalid ArchiveConf Criterias: " << token << " Ignore!"; + continue; + } + if (kv[0] != "disk" && kv[0] != "days") { + LOG(WARNING) << "Invalid ArchiveConf Criterias: " << token << " Ignore!"; + continue; + } + auto value = std::stoi(kv[1]); + criterias_[kv[0]] = value; + } +} + +void ArchiveConf::ParseType(const std::string& type) { + if (type != "delete" && type != "swap") { + LOG(ERROR) << "Invalid Archive"; + assert(false); + } + type_ = type; +} + /* DBMetaOptions::DBMetaOptions(const std::string& dbpath, */ /* const std::string& uri) */ /* : path(dbpath), backend_uri(uri) { */ diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 5bbcf6dabe..ee2d62f715 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -7,6 +7,7 @@ #include #include +#include namespace zilliz { namespace vecwise { @@ -14,6 +15,22 @@ namespace engine { class Env; +struct ArchiveConf { + using CriteriaT = std::map; + + ArchiveConf(const std::string& type, const std::string& criterias = "disk:512"); + + const std::string& GetType() const { return type_; } + const CriteriaT GetCriterias() const { return criterias_; } + +private: + void ParseCritirias(const std::string& type); + void ParseType(const std::string& criterias); + + std::string type_; + CriteriaT criterias_; +}; + struct DBMetaOptions { /* DBMetaOptions(const std::string&, const std::string&); */ std::string path; diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index c9bc958b99..8030e574e2 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -12,6 +12,51 @@ using namespace zilliz::vecwise; +TEST_F(DBTest, CONFIG_TEST) { + { + EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); + } + { + engine::ArchiveConf conf("delete"); + ASSERT_EQ(conf.GetType(), "delete"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 512); + } + { + engine::ArchiveConf conf("swap"); + ASSERT_EQ(conf.GetType(), "swap"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 512); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a")); + engine::ArchiveConf conf("swap", "disk:1024"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["disk"] == 1024); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a")); + engine::ArchiveConf conf("swap", "days:100"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 1); + ASSERT_TRUE(criterias["days"] == 100); + } + { + ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:")); + ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a")); + engine::ArchiveConf conf("swap", "days:100;disk:200"); + auto criterias = conf.GetCriterias(); + ASSERT_TRUE(criterias.size() == 2); + ASSERT_TRUE(criterias["days"] == 100); + ASSERT_TRUE(criterias["disk"] == 200); + } +} + TEST_F(DBTest, DB_TEST) { static const std::string group_name = "test_group"; From 89cdeb11810a1b938881ffc7f9363ac6434aa740 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 20:22:39 +0800 Subject: [PATCH 08/22] feat(db): add archive files in meta Former-commit-id: 0d284f947dbf65bbc258a037eec1bb5458fa0007 --- cpp/src/db/DBMetaImpl.cpp | 107 ++++++++++++++++++++++++++++++++--- cpp/src/db/DBMetaImpl.h | 3 + cpp/src/db/LocalMetaImpl.cpp | 5 ++ cpp/src/db/LocalMetaImpl.h | 2 + cpp/src/db/Meta.h | 4 ++ cpp/src/db/Options.h | 2 +- 6 files changed, 114 insertions(+), 9 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 3552bf2050..e506f14ea8 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -151,6 +151,7 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { } group_info.files_cnt = 0; group_info.id = -1; + group_info.created_on = GetMicroSecTimeStamp(); { try { @@ -237,7 +238,8 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; group_file.rows = 0; - group_file.updated_time = GetMicroSecTimeStamp(); //ConnectorPtr->select(datetime("now", "localtime +1 hour")).front(); + group_file.created_on = GetMicroSecTimeStamp(); + group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); { @@ -449,17 +451,106 @@ Status DBMetaImpl::get_group_files(const std::string& group_id_, return Status::OK(); } +// PXU TODO: Support Swap +Status DBMetaImpl::archive_files() { + auto& criterias = _options.archive_conf.GetCriterias(); + if (criterias.size() == 0) { + return Status::OK(); + } + + for (auto kv : criterias) { + auto& criteria = kv.first; + auto& limit = kv.second; + if (criteria == "days") { + auto usecs = 3600*24*limit*1000000; + auto now = GetMicroSecTimeStamp(); + try + { + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + c(&GroupFileSchema::created_on) < now - usecs and + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + } + if (criteria == "disk") { + int G = 1024*1024*1024; + long unsigned int sum = 0; + try { + auto sum_c = ConnectorPtr->sum( + &GroupFileSchema::rows, + where( + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + sum = *sum_c; + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + // PXU TODO: refactor rows + auto to_delete = sum - limit*G/sizeof(float); + discard_files_of_size(to_delete); + } + } + + return Status::OK(); +} + +Status DBMetaImpl::discard_files_of_size(long to_discard_size) { + if (to_discard_size <= 0) { + return Status::OK(); + } + try { + auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, + &GroupFileSchema::file_type, + &GroupFileSchema::rows), + where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), + order_by(&GroupFileSchema::id), + limit(10)); + + /* std::map groups; */ + + /* for (auto& file : selected) { */ + /* GroupFileSchema group_file; */ + /* group_file.id = std::get<0>(file); */ + /* group_file.group_id = std::get<1>(file); */ + /* group_file.file_id = std::get<2>(file); */ + /* group_file.file_type = std::get<3>(file); */ + /* group_file.rows = std::get<4>(file); */ + /* group_file.date = std::get<5>(file); */ + /* GetGroupFilePath(group_file); */ + /* auto groupItr = groups.find(group_file.group_id); */ + /* if (groupItr == groups.end()) { */ + /* GroupSchema group_info; */ + /* group_info.group_id = group_file.group_id; */ + /* auto status = get_group_no_lock(group_info); */ + /* if (!status.ok()) { */ + /* return status; */ + /* } */ + /* groups[group_file.group_id] = group_info; */ + /* } */ + /* group_file.dimension = groups[group_file.group_id].dimension; */ + /* files.push_back(group_file); */ + /* } */ + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + + return Status::OK(); + +} + Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { group_file.updated_time = GetMicroSecTimeStamp(); try { ConnectorPtr->update(group_file); - /* auto commited = ConnectorPtr->transaction([&] () mutable { */ - /* ConnectorPtr->update(group_file); */ - /* return true; */ - /* }); */ - /* if (!commited) { */ - /* return Status::DBTransactionError("Update file Error"); */ - /* } */ } catch (std::exception & e) { LOG(DEBUG) << e.what(); LOG(DEBUG) << "id= " << group_file.id << " file_id=" << group_file.file_id; diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index fab2e1560a..e2fd051b42 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -50,6 +50,8 @@ public: virtual Status files_to_index(GroupFilesSchema&) override; + virtual Status archive_files() override; + virtual Status cleanup() override; virtual Status cleanup_ttl_files(uint16_t seconds) override; @@ -62,6 +64,7 @@ public: private: + Status discard_files_of_size(long to_discard_size); long GetMicroSecTimeStamp(); Status get_group_no_lock(GroupSchema& group_info); std::string GetGroupPath(const std::string& group_id); diff --git a/cpp/src/db/LocalMetaImpl.cpp b/cpp/src/db/LocalMetaImpl.cpp index 60c23158be..2ec5c08ce4 100644 --- a/cpp/src/db/LocalMetaImpl.cpp +++ b/cpp/src/db/LocalMetaImpl.cpp @@ -241,6 +241,11 @@ Status LocalMetaImpl::update_files(GroupFilesSchema& files) { return Status::OK(); } +Status LocalMetaImpl::archive_files() { + //PXU TODO + return Status::OK(); +} + Status LocalMetaImpl::cleanup() { //PXU TODO return Status::OK(); diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 116c9e4672..71927e8425 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -47,6 +47,8 @@ public: virtual Status files_to_index(GroupFilesSchema&) override; + virtual Status archive_files() override; + virtual Status cleanup_ttl_files(uint16_t seconds) override; virtual Status count(const std::string& group_id, long& result) override; diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 6d2abb8dee..74e1637f77 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -30,6 +30,7 @@ struct GroupSchema { size_t files_cnt = 0; uint16_t dimension; std::string location = ""; + long created_on; }; // GroupSchema @@ -51,6 +52,7 @@ struct GroupFileSchema { uint16_t dimension; std::string location = ""; long updated_time; + long created_on; }; // GroupFileSchema typedef std::vector GroupFilesSchema; @@ -91,6 +93,8 @@ public: virtual Status files_to_merge(const std::string& group_id, DatePartionedGroupFilesSchema& files) = 0; + virtual Status archive_files() = 0; + virtual Status files_to_index(GroupFilesSchema&) = 0; virtual Status cleanup() = 0; diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index ee2d62f715..26608ab976 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -32,9 +32,9 @@ private: }; struct DBMetaOptions { - /* DBMetaOptions(const std::string&, const std::string&); */ std::string path; std::string backend_uri; + ArchiveConf archive_conf = ArchiveConf("delete"); }; // DBMetaOptions From 5c79aaf51fb6004d3cbb92f745556b5949055df6 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Thu, 23 May 2019 20:35:59 +0800 Subject: [PATCH 09/22] feat(db): add archive files in meta part 2 Former-commit-id: 0e534409c959a15d8bf746d6fb9fad093d787bda --- cpp/src/db/DBMetaImpl.cpp | 47 ++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index e506f14ea8..8341a7a961 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -508,43 +508,40 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, - &GroupFileSchema::file_type, &GroupFileSchema::rows), where(c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE), order_by(&GroupFileSchema::id), limit(10)); + std::vector ids; - /* std::map groups; */ + for (auto& file : selected) { + if (to_discard_size <= 0) break; + GroupFileSchema group_file; + group_file.id = std::get<0>(file); + group_file.rows = std::get<1>(file); + ids.push_back(group_file.id); + to_discard_size -= group_file.rows; + } + + if (ids.size() == 0) { + return Status::OK(); + } + + ConnectorPtr->update_all( + set( + c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE + ), + where( + in(&GroupFileSchema::id, ids) + )); - /* for (auto& file : selected) { */ - /* GroupFileSchema group_file; */ - /* group_file.id = std::get<0>(file); */ - /* group_file.group_id = std::get<1>(file); */ - /* group_file.file_id = std::get<2>(file); */ - /* group_file.file_type = std::get<3>(file); */ - /* group_file.rows = std::get<4>(file); */ - /* group_file.date = std::get<5>(file); */ - /* GetGroupFilePath(group_file); */ - /* auto groupItr = groups.find(group_file.group_id); */ - /* if (groupItr == groups.end()) { */ - /* GroupSchema group_info; */ - /* group_info.group_id = group_file.group_id; */ - /* auto status = get_group_no_lock(group_info); */ - /* if (!status.ok()) { */ - /* return status; */ - /* } */ - /* groups[group_file.group_id] = group_info; */ - /* } */ - /* group_file.dimension = groups[group_file.group_id].dimension; */ - /* files.push_back(group_file); */ - /* } */ } catch (std::exception & e) { LOG(DEBUG) << e.what(); throw e; } - return Status::OK(); + return discard_files_of_size(to_discard_size); } Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { From 722661778259bf575c5ca9a30f801ccb0beb2b61 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 10:55:54 +0800 Subject: [PATCH 10/22] fix(db): change type from into to size_t Former-commit-id: e1e16afbac7ea8d763c328c9903f4cbe2689d755 --- cpp/src/db/DBMetaImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 8341a7a961..2d11379d1a 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -1,4 +1,5 @@ /******************************************************************************* + * long rows = 3*1024*1024*1024; * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. @@ -480,7 +481,7 @@ Status DBMetaImpl::archive_files() { } } if (criteria == "disk") { - int G = 1024*1024*1024; + size_t G = 1024*1024*1024; long unsigned int sum = 0; try { auto sum_c = ConnectorPtr->sum( From 5d0b5e99f4a1473dd8c22f9bf093158f43e4c842 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 10:56:15 +0800 Subject: [PATCH 11/22] test(db): add archive test Former-commit-id: e8c2116cd7bdec959950b2d2735c4f26d0a4a1d8 --- cpp/unittest/db/meta_tests.cpp | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 83177fea19..8489b0cd1b 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -86,6 +86,49 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } +TEST_F(MetaTest, ARCHIVE_TEST) { + DBMetaOptions options; + options.path = "/tmp/vecwise_test"; + options.archive_conf = ArchiveConf("delete", "disk:41"); + + auto impl = meta::DBMetaImpl(options); + auto group_id = "meta_test_group"; + + meta::GroupSchema group; + group.group_id = group_id; + auto status = impl.add_group(group); + + meta::GroupFilesSchema files; + meta::GroupFileSchema group_file; + group_file.group_id = group.group_id; + + auto cnt = 10; + auto each_size = 2UL; + for (auto i=0; i Date: Fri, 24 May 2019 11:48:40 +0800 Subject: [PATCH 12/22] refactor(db): add utils Former-commit-id: d42ae31c70d1e981ae847454b81c24027ad49cb0 --- cpp/src/db/DBMetaImpl.cpp | 22 +++++++--------------- cpp/src/db/DBMetaImpl.h | 1 - cpp/src/db/Utils.cpp | 26 ++++++++++++++++++++++++++ cpp/src/db/Utils.h | 19 +++++++++++++++++++ 4 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 cpp/src/db/Utils.cpp create mode 100644 cpp/src/db/Utils.h diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 2d11379d1a..ef46abbab0 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -1,5 +1,4 @@ /******************************************************************************* - * long rows = 3*1024*1024*1024; * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved * Unauthorized copying of this file, via any medium is strictly prohibited. * Proprietary and confidential. @@ -15,6 +14,7 @@ #include #include "DBMetaImpl.h" #include "IDGenerator.h" +#include "Utils.h" namespace zilliz { namespace vecwise { @@ -56,14 +56,6 @@ std::string DBMetaImpl::GetGroupPath(const std::string& group_id) { return _options.path + "/" + group_id; } -long DBMetaImpl::GetMicroSecTimeStamp() { - auto now = std::chrono::system_clock::now(); - auto micros = std::chrono::duration_cast( - now.time_since_epoch()).count(); - - return micros; -} - std::string DBMetaImpl::GetGroupDatePartitionPath(const std::string& group_id, DateT& date) { std::stringstream ss; ss << GetGroupPath(group_id) << "/" << date; @@ -152,7 +144,7 @@ Status DBMetaImpl::add_group(GroupSchema& group_info) { } group_info.files_cnt = 0; group_info.id = -1; - group_info.created_on = GetMicroSecTimeStamp(); + group_info.created_on = utils::GetMicroSecTimeStamp(); { try { @@ -239,7 +231,7 @@ Status DBMetaImpl::add_group_file(GroupFileSchema& group_file) { group_file.file_id = ss.str(); group_file.dimension = group_info.dimension; group_file.rows = 0; - group_file.created_on = GetMicroSecTimeStamp(); + group_file.created_on = utils::GetMicroSecTimeStamp(); group_file.updated_time = group_file.created_on; GetGroupFilePath(group_file); @@ -464,7 +456,7 @@ Status DBMetaImpl::archive_files() { auto& limit = kv.second; if (criteria == "days") { auto usecs = 3600*24*limit*1000000; - auto now = GetMicroSecTimeStamp(); + auto now = utils::GetMicroSecTimeStamp(); try { ConnectorPtr->update_all( @@ -546,7 +538,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { } Status DBMetaImpl::update_group_file(GroupFileSchema& group_file) { - group_file.updated_time = GetMicroSecTimeStamp(); + group_file.updated_time = utils::GetMicroSecTimeStamp(); try { ConnectorPtr->update(group_file); } catch (std::exception & e) { @@ -561,7 +553,7 @@ Status DBMetaImpl::update_files(GroupFilesSchema& files) { try { auto commited = ConnectorPtr->transaction([&] () mutable { for (auto& file : files) { - file.updated_time = GetMicroSecTimeStamp(); + file.updated_time = utils::GetMicroSecTimeStamp(); ConnectorPtr->update(file); } return true; @@ -577,7 +569,7 @@ Status DBMetaImpl::update_files(GroupFilesSchema& files) { } Status DBMetaImpl::cleanup_ttl_files(uint16_t seconds) { - auto now = GetMicroSecTimeStamp(); + auto now = utils::GetMicroSecTimeStamp(); try { auto selected = ConnectorPtr->select(columns(&GroupFileSchema::id, &GroupFileSchema::group_id, diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index e2fd051b42..6433361d7a 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -65,7 +65,6 @@ public: private: Status discard_files_of_size(long to_discard_size); - long GetMicroSecTimeStamp(); Status get_group_no_lock(GroupSchema& group_info); std::string GetGroupPath(const std::string& group_id); std::string GetGroupDatePartitionPath(const std::string& group_id, DateT& date); diff --git a/cpp/src/db/Utils.cpp b/cpp/src/db/Utils.cpp new file mode 100644 index 0000000000..e459bab4bb --- /dev/null +++ b/cpp/src/db/Utils.cpp @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ + +#include +#include "Utils.h" + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace utils { + +long GetMicroSecTimeStamp() { + auto now = std::chrono::system_clock::now(); + auto micros = std::chrono::duration_cast( + now.time_since_epoch()).count(); + + return micros; +} + +} // namespace utils +} // namespace engine +} // namespace vecwise +} // namespace zilliz diff --git a/cpp/src/db/Utils.h b/cpp/src/db/Utils.h new file mode 100644 index 0000000000..cdcd37b832 --- /dev/null +++ b/cpp/src/db/Utils.h @@ -0,0 +1,19 @@ +/******************************************************************************* + * Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved + * Unauthorized copying of this file, via any medium is strictly prohibited. + * Proprietary and confidential. + ******************************************************************************/ +#pragma once + + +namespace zilliz { +namespace vecwise { +namespace engine { +namespace utils { + +long GetMicroSecTimeStamp(); + +} // namespace utils +} // namespace engine +} // namespace vecwise +} // namespace zilliz From e46a1ece01874c13479d96054254f4060beffa54 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 14:56:13 +0800 Subject: [PATCH 13/22] fix(db): update schema and unsigned long type handling Former-commit-id: 59c0446b0f1a2e1c3dc4b01ce5cf088f66d23178 --- cpp/src/db/DBMetaImpl.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index ef46abbab0..b913399297 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -29,6 +29,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("id", &GroupSchema::id, primary_key()), make_column("group_id", &GroupSchema::group_id, unique()), make_column("dimension", &GroupSchema::dimension), + make_column("created_on", &GroupSchema::created_on), make_column("files_cnt", &GroupSchema::files_cnt, default_value(0))), make_table("GroupFile", make_column("id", &GroupFileSchema::id, primary_key()), @@ -37,6 +38,7 @@ inline auto StoragePrototype(const std::string& path) { make_column("file_type", &GroupFileSchema::file_type), make_column("rows", &GroupFileSchema::rows, default_value(0)), make_column("updated_time", &GroupFileSchema::updated_time), + make_column("created_on", &GroupFileSchema::created_on), make_column("date", &GroupFileSchema::date)) ); @@ -455,8 +457,9 @@ Status DBMetaImpl::archive_files() { auto& criteria = kv.first; auto& limit = kv.second; if (criteria == "days") { - auto usecs = 3600*24*limit*1000000; - auto now = utils::GetMicroSecTimeStamp(); + long usecs = 3600*24*limit*1000000UL; + long now = utils::GetMicroSecTimeStamp(); + LOG(DEBUG) << "Limit " << limit << " TimeLimit " << now - usecs; try { ConnectorPtr->update_all( @@ -464,7 +467,7 @@ Status DBMetaImpl::archive_files() { c(&GroupFileSchema::file_type) = (int)GroupFileSchema::TO_DELETE ), where( - c(&GroupFileSchema::created_on) < now - usecs and + c(&GroupFileSchema::created_on) < (long)(now - usecs) and c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); } catch (std::exception & e) { @@ -473,7 +476,7 @@ Status DBMetaImpl::archive_files() { } } if (criteria == "disk") { - size_t G = 1024*1024*1024; + size_t G = 1024*1024*1024UL; long unsigned int sum = 0; try { auto sum_c = ConnectorPtr->sum( From 9fcfb1e8e531130e9347042fe0474885315975bf Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 15:31:18 +0800 Subject: [PATCH 14/22] refactor(db): remove dummy print Former-commit-id: d085da4d7b5f81cf3183393751aa80b7f5f966e7 --- cpp/src/db/DBMetaImpl.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index b913399297..3ad8d3fe49 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -459,7 +459,6 @@ Status DBMetaImpl::archive_files() { if (criteria == "days") { long usecs = 3600*24*limit*1000000UL; long now = utils::GetMicroSecTimeStamp(); - LOG(DEBUG) << "Limit " << limit << " TimeLimit " << now - usecs; try { ConnectorPtr->update_all( From 5cb98b83962ecfa65bf8ed1044df2237b5ce69f9 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 15:35:29 +0800 Subject: [PATCH 15/22] test(db): add test for archive days Former-commit-id: 26da52668e347f6157408be234dc643fd3b2621f --- cpp/unittest/db/meta_tests.cpp | 55 +++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/db/meta_tests.cpp b/cpp/unittest/db/meta_tests.cpp index 8489b0cd1b..1347fcf755 100644 --- a/cpp/unittest/db/meta_tests.cpp +++ b/cpp/unittest/db/meta_tests.cpp @@ -6,10 +6,13 @@ #include #include #include +#include +#include #include "utils.h" #include "db/DBMetaImpl.h" #include "db/Factories.h" +#include "db/Utils.h" using namespace zilliz::vecwise::engine; @@ -86,7 +89,57 @@ TEST_F(MetaTest, GROUP_FILE_TEST) { ASSERT_TRUE(group_file.file_type == meta::GroupFileSchema::TO_DELETE); } -TEST_F(MetaTest, ARCHIVE_TEST) { +TEST_F(MetaTest, ARCHIVE_TEST_DAYS) { + srand(time(0)); + DBMetaOptions options; + options.path = "/tmp/vecwise_test"; + int days_num = rand() % 100; + std::stringstream ss; + ss << "days:" << days_num; + options.archive_conf = ArchiveConf("delete", ss.str()); + + auto impl = meta::DBMetaImpl(options); + auto group_id = "meta_test_group"; + + meta::GroupSchema group; + group.group_id = group_id; + auto status = impl.add_group(group); + + meta::GroupFilesSchema files; + meta::GroupFileSchema group_file; + group_file.group_id = group.group_id; + + auto cnt = 100; + long ts = utils::GetMicroSecTimeStamp(); + std::vector days; + for (auto i=0; i Date: Fri, 24 May 2019 16:43:01 +0800 Subject: [PATCH 16/22] refactor(db): add some debug print for newly added archive Former-commit-id: 5741a45032d63e21fbcaa3cc6fdacd7363fb81d7 --- cpp/src/db/DBMetaImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 3ad8d3fe49..172e7f42f3 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -498,6 +498,7 @@ Status DBMetaImpl::archive_files() { } Status DBMetaImpl::discard_files_of_size(long to_discard_size) { + LOG(DEBUG) << "Abort to discard size=" << to_discard_size; if (to_discard_size <= 0) { return Status::OK(); } @@ -515,6 +516,7 @@ Status DBMetaImpl::discard_files_of_size(long to_discard_size) { group_file.id = std::get<0>(file); group_file.rows = std::get<1>(file); ids.push_back(group_file.id); + LOG(DEBUG) << "Discard group_file.id=" << group_file.id << " group_file.rows=" << group_file.rows; to_discard_size -= group_file.rows; } From 9b967056edf0ddd881b0380a39dae2973b75e713 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 17:30:33 +0800 Subject: [PATCH 17/22] feat(db): add archive post merge and build index Former-commit-id: a54382cec05b4a3e955e126ed3e8b58270b725cf --- cpp/src/db/DBImpl.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 72b053231b..eee492abbd 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -326,6 +326,10 @@ Status DBImpl::background_merge_files(const std::string& group_id) { merge_files(group_id, kv.first, kv.second); } + if (has_merge) { + _pMeta->archive_files(); + } + try_build_index(); _pMeta->cleanup_ttl_files(1); @@ -362,6 +366,7 @@ Status DBImpl::build_index(const meta::GroupFileSchema& file) { << " from file " << to_remove.file_id; index->Cache(); + _pMeta->archive_files(); return Status::OK(); } From c1024aa26cbb1b58a8ec796a63b056b2260205e0 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 17:41:43 +0800 Subject: [PATCH 18/22] feat(db): add size api for meta Former-commit-id: b169c9ec743b2defe4421fbe20c5ef0970aa0ea7 --- cpp/src/db/DBMetaImpl.cpp | 33 ++++++++++++++++++++------------- cpp/src/db/DBMetaImpl.h | 2 ++ cpp/src/db/LocalMetaImpl.cpp | 5 +++++ cpp/src/db/LocalMetaImpl.h | 2 ++ cpp/src/db/Meta.h | 2 ++ 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 172e7f42f3..a4c3f5d77e 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -476,20 +476,11 @@ Status DBMetaImpl::archive_files() { } if (criteria == "disk") { size_t G = 1024*1024*1024UL; - long unsigned int sum = 0; - try { - auto sum_c = ConnectorPtr->sum( - &GroupFileSchema::rows, - where( - c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE - )); - sum = *sum_c; - } catch (std::exception & e) { - LOG(DEBUG) << e.what(); - throw e; - } + long sum = 0; + size(sum); + // PXU TODO: refactor rows - auto to_delete = sum - limit*G/sizeof(float); + auto to_delete = (sum - limit*G)/sizeof(float); discard_files_of_size(to_delete); } } @@ -497,6 +488,22 @@ Status DBMetaImpl::archive_files() { return Status::OK(); } +Status DBMetaImpl::size(long& result) { + try { + auto sum_c = ConnectorPtr->sum( + &GroupFileSchema::rows, + where( + c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE + )); + result = *sum_c*sizeof(float); + } catch (std::exception & e) { + LOG(DEBUG) << e.what(); + throw e; + } + + return Status::OK(); +} + Status DBMetaImpl::discard_files_of_size(long to_discard_size) { LOG(DEBUG) << "Abort to discard size=" << to_discard_size; if (to_discard_size <= 0) { diff --git a/cpp/src/db/DBMetaImpl.h b/cpp/src/db/DBMetaImpl.h index 6433361d7a..20f91b8482 100644 --- a/cpp/src/db/DBMetaImpl.h +++ b/cpp/src/db/DBMetaImpl.h @@ -52,6 +52,8 @@ public: virtual Status archive_files() override; + virtual Status size(long& result) override; + virtual Status cleanup() override; virtual Status cleanup_ttl_files(uint16_t seconds) override; diff --git a/cpp/src/db/LocalMetaImpl.cpp b/cpp/src/db/LocalMetaImpl.cpp index 2ec5c08ce4..aa852a3db6 100644 --- a/cpp/src/db/LocalMetaImpl.cpp +++ b/cpp/src/db/LocalMetaImpl.cpp @@ -261,6 +261,11 @@ Status LocalMetaImpl::drop_all() { return Status::OK(); } +Status LocalMetaImpl::size(long& result) { + // PXU TODO + return Status::OK(); +} + Status LocalMetaImpl::count(const std::string& group_id, long& result) { // PXU TODO return Status::OK(); diff --git a/cpp/src/db/LocalMetaImpl.h b/cpp/src/db/LocalMetaImpl.h index 71927e8425..fb989d5f67 100644 --- a/cpp/src/db/LocalMetaImpl.h +++ b/cpp/src/db/LocalMetaImpl.h @@ -55,6 +55,8 @@ public: virtual Status drop_all() override; + virtual Status size(long& result) override; + private: Status GetGroupMetaInfoByPath(const std::string& path, GroupSchema& group_info); diff --git a/cpp/src/db/Meta.h b/cpp/src/db/Meta.h index 74e1637f77..e6150cea11 100644 --- a/cpp/src/db/Meta.h +++ b/cpp/src/db/Meta.h @@ -93,6 +93,8 @@ public: virtual Status files_to_merge(const std::string& group_id, DatePartionedGroupFilesSchema& files) = 0; + virtual Status size(long& result) = 0; + virtual Status archive_files() = 0; virtual Status files_to_index(GroupFilesSchema&) = 0; From b2af844f2096c6dfc0b955364a8c543507dce00e Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 18:15:50 +0800 Subject: [PATCH 19/22] feat(db): add size api for db Former-commit-id: 9d20366e22996fc7bb3e7e983ebfa9999591ba6b --- cpp/src/db/DB.h | 2 ++ cpp/src/db/DBImpl.cpp | 5 +++++ cpp/src/db/DBImpl.h | 2 ++ 3 files changed, 9 insertions(+) diff --git a/cpp/src/db/DB.h b/cpp/src/db/DB.h index 6f922475b5..7d976ad824 100644 --- a/cpp/src/db/DB.h +++ b/cpp/src/db/DB.h @@ -39,6 +39,8 @@ public: virtual Status search(const std::string& group_id, size_t k, size_t nq, const float* vectors, const meta::DatesT& dates, QueryResults& results) = 0; + virtual Status size(long& result) = 0; + virtual Status drop_all() = 0; virtual Status count(const std::string& group_id, long& result) = 0; diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index eee492abbd..971e6daff6 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -427,6 +427,11 @@ Status DBImpl::count(const std::string& group_id, long& result) { return _pMeta->count(group_id, result); } +template +Status DBImpl::size(long& result) { + return _pMeta->size(result); +} + template DBImpl::~DBImpl() { { diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 45f249bd06..54c22eb48b 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -55,6 +55,8 @@ public: virtual Status count(const std::string& group_id, long& result) override; + virtual Status size(long& result) override; + virtual ~DBImpl(); private: From c90aee8972f600098e3748faa6137fc749a236d3 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Fri, 24 May 2019 18:16:27 +0800 Subject: [PATCH 20/22] test(db): add test for overall archive Former-commit-id: a90507f675e2ac56e87fb3884d9729d48d178317 --- cpp/unittest/db/db_tests.cpp | 46 ++++++++++++++++++++++++++++++++++++ cpp/unittest/db/utils.cpp | 17 ++++++++++--- cpp/unittest/db/utils.h | 6 +++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 8030e574e2..5a6ea703aa 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -9,6 +9,7 @@ #include "utils.h" #include "db/DB.h" +#include "db/DBImpl.h" using namespace zilliz::vecwise; @@ -57,6 +58,51 @@ TEST_F(DBTest, CONFIG_TEST) { } } +TEST_F(DBTest2, ARHIVE_DISK_CHECK) { + + static const std::string group_name = "test_group"; + static const int group_dim = 256; + + engine::meta::GroupSchema group_info; + group_info.dimension = group_dim; + group_info.group_id = group_name; + engine::Status stat = db_->add_group(group_info); + + engine::meta::GroupSchema group_info_get; + group_info_get.group_id = group_name; + stat = db_->get_group(group_info_get); + ASSERT_STATS(stat); + ASSERT_EQ(group_info_get.dimension, group_dim); + + engine::IDNumbers vector_ids; + engine::IDNumbers target_ids; + + int d = 256; + int nb = 30; + float *xb = new float[d * nb]; + for(int i = 0; i < nb; i++) { + for(int j = 0; j < d; j++) xb[d * i + j] = drand48(); + xb[d * i] += i / 2000.; + } + + int loop = 100000; + + for (auto i=0; iadd_vectors(group_name, nb, xb, vector_ids); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + std::this_thread::sleep_for(std::chrono::seconds(1)); + + long size; + db_->size(size); + /* LOG(DEBUG) << "size=" << size; */ + ASSERT_TRUE(size < 2UL*1024*1024*1024); + + delete [] xb; +}; + + TEST_F(DBTest, DB_TEST) { static const std::string group_name = "test_group"; diff --git a/cpp/unittest/db/utils.cpp b/cpp/unittest/db/utils.cpp index 7188e89107..9762668622 100644 --- a/cpp/unittest/db/utils.cpp +++ b/cpp/unittest/db/utils.cpp @@ -29,19 +29,30 @@ void DBTest::InitLog() { el::Loggers::reconfigureLogger("default", defaultConf); } -void DBTest::SetUp() { - InitLog(); +engine::Options DBTest::GetOptions() { auto options = engine::OptionsFactory::Build(); options.meta.path = "/tmp/vecwise_test"; + return options; +} + +void DBTest::SetUp() { + InitLog(); + auto options = GetOptions(); db_ = engine::DBFactory::Build(options, "Faiss,IDMap"); } void DBTest::TearDown() { delete db_; - auto options = engine::OptionsFactory::Build(); boost::filesystem::remove_all("/tmp/vecwise_test"); } +engine::Options DBTest2::GetOptions() { + auto options = engine::OptionsFactory::Build(); + options.meta.path = "/tmp/vecwise_test"; + options.meta.archive_conf = engine::ArchiveConf("delete", "disk:2"); + return options; +} + void MetaTest::SetUp() { InitLog(); impl_ = engine::DBMetaImplFactory::Build(); diff --git a/cpp/unittest/db/utils.h b/cpp/unittest/db/utils.h index 21823ffad3..456b54ffcc 100644 --- a/cpp/unittest/db/utils.h +++ b/cpp/unittest/db/utils.h @@ -39,6 +39,12 @@ protected: void InitLog(); virtual void SetUp() override; virtual void TearDown() override; + virtual zilliz::vecwise::engine::Options GetOptions(); +}; + +class DBTest2 : public DBTest { +protected: + virtual zilliz::vecwise::engine::Options GetOptions() override; }; From dfb09528808a86e548436082a964492a621bb5cb Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 11:13:34 +0800 Subject: [PATCH 21/22] fix(db): fix size api for db meta Former-commit-id: b58a3f27ee9b574af78894eb6fb5120904430c66 --- cpp/src/db/DBMetaImpl.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index a4c3f5d77e..2b997d1043 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -489,13 +489,19 @@ Status DBMetaImpl::archive_files() { } Status DBMetaImpl::size(long& result) { + result = 0; try { - auto sum_c = ConnectorPtr->sum( - &GroupFileSchema::rows, + auto selected = ConnectorPtr->select(columns(sum(&GroupFileSchema::rows)), where( c(&GroupFileSchema::file_type) != (int)GroupFileSchema::TO_DELETE )); - result = *sum_c*sizeof(float); + + for (auto& sub_query : selected) { + if(!std::get<0>(sub_query)) { + continue; + } + result += (long)(*std::get<0>(sub_query))*sizeof(float); + } } catch (std::exception & e) { LOG(DEBUG) << e.what(); throw e; From bb75cb93d89628272ebe5b85d9c1a1cf98a94347 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Sun, 26 May 2019 11:14:17 +0800 Subject: [PATCH 22/22] test(db): check size Former-commit-id: 07d052fdd6713e728595aff1c21a5b6cde62085c --- cpp/unittest/db/db_tests.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp index 5a6ea703aa..90d35ee8f9 100644 --- a/cpp/unittest/db/db_tests.cpp +++ b/cpp/unittest/db/db_tests.cpp @@ -62,6 +62,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { static const std::string group_name = "test_group"; static const int group_dim = 256; + long size; engine::meta::GroupSchema group_info; group_info.dimension = group_dim; @@ -77,6 +78,7 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { engine::IDNumbers vector_ids; engine::IDNumbers target_ids; + db_->size(size); int d = 256; int nb = 30; float *xb = new float[d * nb]; @@ -94,7 +96,6 @@ TEST_F(DBTest2, ARHIVE_DISK_CHECK) { std::this_thread::sleep_for(std::chrono::seconds(1)); - long size; db_->size(size); /* LOG(DEBUG) << "size=" << size; */ ASSERT_TRUE(size < 2UL*1024*1024*1024);