diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 599db01ef9..e2246c95cd 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -22,7 +22,7 @@ DBImpl::DBImpl(const Options& options) _shutting_down(false), bg_build_index_started_(false), _pMeta(new meta::DBMetaImpl(_options.meta)), - _pMemMgr(new MemManager(_pMeta)) { + _pMemMgr(new MemManager(_pMeta, _options)) { start_timer_task(_options.memory_sync_interval); } @@ -147,7 +147,7 @@ Status DBImpl::background_merge_files(const std::string& group_id) { for (auto& kv : raw_files) { auto files = kv.second; - if (files.size() <= _options.raw_file_merge_trigger_number) { + if (files.size() <= _options.merge_trigger_number) { continue; } has_merge = true; diff --git a/cpp/src/db/MemManager.cpp b/cpp/src/db/MemManager.cpp index 5b791223a6..4c844a7fab 100644 --- a/cpp/src/db/MemManager.cpp +++ b/cpp/src/db/MemManager.cpp @@ -13,13 +13,13 @@ namespace zilliz { namespace vecwise { namespace engine { -MemVectors::MemVectors(const std::string& group_id, - size_t dimension, const std::string& file_location) : - group_id_(group_id), - _file_location(file_location), +MemVectors::MemVectors(const std::shared_ptr& meta_ptr, + const meta::GroupFileSchema& schema, const Options& options) + : pMeta_(meta_ptr), + options_(options), + schema_(schema), _pIdGenerator(new SimpleIDGenerator()), - _dimension(dimension), - pIndex_(faiss::index_factory(_dimension, "IDMap,Flat")) { + pIndex_(faiss::index_factory(schema_.dimension, "IDMap,Flat")) { } void MemVectors::add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) { @@ -35,7 +35,7 @@ size_t MemVectors::total() const { } size_t MemVectors::approximate_size() const { - return total() * _dimension; + return total() * schema_.dimension; } Status MemVectors::serialize(std::string& group_id) { @@ -45,8 +45,13 @@ Status MemVectors::serialize(std::string& group_id) { /* std::cout << pIndex_->ntotal << std::endl; */ /* std::cout << _file_location << std::endl; */ /* faiss::write_index(pIndex_, _file_location.c_str()); */ - write_index(pIndex_, _file_location.c_str()); - group_id = group_id_; + group_id = schema_.group_id; + auto rows = approximate_size(); + write_index(pIndex_, schema_.location.c_str()); + schema_.rows = rows; + schema_.file_type = (rows >= options_.index_trigger_size) ? + meta::GroupFileSchema::TO_INDEX : meta::GroupFileSchema::RAW; + pMeta_->update_group_file(schema_); return Status::OK(); } @@ -78,9 +83,7 @@ VectorsPtr MemManager::get_mem_by_group(const std::string& group_id) { return nullptr; } - _memMap[group_id] = std::shared_ptr(new MemVectors(group_file.group_id, - group_file.dimension, - group_file.location)); + _memMap[group_id] = std::shared_ptr(new MemVectors(_pMeta, group_file, options_)); return _memMap[group_id]; } diff --git a/cpp/src/db/MemManager.h b/cpp/src/db/MemManager.h index 48aacc4fb6..a87c54c14e 100644 --- a/cpp/src/db/MemManager.h +++ b/cpp/src/db/MemManager.h @@ -8,6 +8,7 @@ #include #include "IDGenerator.h" #include "Status.h" +#include "Meta.h" namespace faiss { class Index; @@ -24,9 +25,8 @@ namespace meta { class MemVectors { public: - explicit MemVectors(const std::string& group_id, - size_t dimension, - const std::string& file_location); + explicit MemVectors(const std::shared_ptr&, + const meta::GroupFileSchema&, const Options&); void add(size_t n_, const float* vectors_, IDNumbers& vector_ids_); @@ -38,17 +38,17 @@ public: ~MemVectors(); - const std::string& location() const { return _file_location; } + const std::string& location() const { return schema_.location; } private: MemVectors() = delete; MemVectors(const MemVectors&) = delete; MemVectors& operator=(const MemVectors&) = delete; - std::string group_id_; - const std::string _file_location; + std::shared_ptr pMeta_; + Options options_; + meta::GroupFileSchema schema_; IDGenerator* _pIdGenerator; - size_t _dimension; faiss::Index* pIndex_; }; // MemVectors @@ -58,8 +58,8 @@ typedef std::shared_ptr VectorsPtr; class MemManager { public: - MemManager(const std::shared_ptr& meta_) - : _pMeta(meta_) /*_last_compact_time(std::time(nullptr))*/ {} + MemManager(const std::shared_ptr& meta_, const Options& options) + : _pMeta(meta_), options_(options) {} VectorsPtr get_mem_by_group(const std::string& group_id_); @@ -78,7 +78,7 @@ private: MemMap _memMap; ImmMemPool _immMems; std::shared_ptr _pMeta; - /* std::time_t _last_compact_time; */ + Options options_; std::mutex _mutex; }; // MemManager diff --git a/cpp/src/db/Options.h b/cpp/src/db/Options.h index 12d57ee28d..8e9dad2b47 100644 --- a/cpp/src/db/Options.h +++ b/cpp/src/db/Options.h @@ -19,8 +19,8 @@ struct DBMetaOptions { struct Options { Options(); uint16_t memory_sync_interval = 10; - uint16_t raw_file_merge_trigger_number = 100; - size_t raw_to_index_trigger_size = 100000; + uint16_t merge_trigger_number = 100; + size_t index_trigger_size = 100000; Env* env; DBMetaOptions meta; }; // Options