feat(db): serialize file bug fix

Former-commit-id: 0596a907902f32e7cb172d471eea66714912fe5f
This commit is contained in:
Xu Peng 2019-04-18 17:43:51 +08:00
parent 8d5ef3fb27
commit 75c35f5184
4 changed files with 29 additions and 26 deletions

View File

@ -22,7 +22,7 @@ DBImpl::DBImpl(const Options& options)
_shutting_down(false),
bg_build_index_started_(false),
_pMeta(new meta::DBMetaImpl(_options.meta)),
_pMemMgr(new MemManager(_pMeta)) {
_pMemMgr(new MemManager(_pMeta, _options)) {
start_timer_task(_options.memory_sync_interval);
}
@ -147,7 +147,7 @@ Status DBImpl::background_merge_files(const std::string& group_id) {
for (auto& kv : raw_files) {
auto files = kv.second;
if (files.size() <= _options.raw_file_merge_trigger_number) {
if (files.size() <= _options.merge_trigger_number) {
continue;
}
has_merge = true;

View File

@ -13,13 +13,13 @@ namespace zilliz {
namespace vecwise {
namespace engine {
MemVectors::MemVectors(const std::string& group_id,
size_t dimension, const std::string& file_location) :
group_id_(group_id),
_file_location(file_location),
MemVectors::MemVectors(const std::shared_ptr<meta::Meta>& meta_ptr,
const meta::GroupFileSchema& schema, const Options& options)
: pMeta_(meta_ptr),
options_(options),
schema_(schema),
_pIdGenerator(new SimpleIDGenerator()),
_dimension(dimension),
pIndex_(faiss::index_factory(_dimension, "IDMap,Flat")) {
pIndex_(faiss::index_factory(schema_.dimension, "IDMap,Flat")) {
}
void MemVectors::add(size_t n_, const float* vectors_, IDNumbers& vector_ids_) {
@ -35,7 +35,7 @@ size_t MemVectors::total() const {
}
size_t MemVectors::approximate_size() const {
return total() * _dimension;
return total() * schema_.dimension;
}
Status MemVectors::serialize(std::string& group_id) {
@ -45,8 +45,13 @@ Status MemVectors::serialize(std::string& group_id) {
/* std::cout << pIndex_->ntotal << std::endl; */
/* std::cout << _file_location << std::endl; */
/* faiss::write_index(pIndex_, _file_location.c_str()); */
write_index(pIndex_, _file_location.c_str());
group_id = group_id_;
group_id = schema_.group_id;
auto rows = approximate_size();
write_index(pIndex_, schema_.location.c_str());
schema_.rows = rows;
schema_.file_type = (rows >= options_.index_trigger_size) ?
meta::GroupFileSchema::TO_INDEX : meta::GroupFileSchema::RAW;
pMeta_->update_group_file(schema_);
return Status::OK();
}
@ -78,9 +83,7 @@ VectorsPtr MemManager::get_mem_by_group(const std::string& group_id) {
return nullptr;
}
_memMap[group_id] = std::shared_ptr<MemVectors>(new MemVectors(group_file.group_id,
group_file.dimension,
group_file.location));
_memMap[group_id] = std::shared_ptr<MemVectors>(new MemVectors(_pMeta, group_file, options_));
return _memMap[group_id];
}

View File

@ -8,6 +8,7 @@
#include <mutex>
#include "IDGenerator.h"
#include "Status.h"
#include "Meta.h"
namespace faiss {
class Index;
@ -24,9 +25,8 @@ namespace meta {
class MemVectors {
public:
explicit MemVectors(const std::string& group_id,
size_t dimension,
const std::string& file_location);
explicit MemVectors(const std::shared_ptr<meta::Meta>&,
const meta::GroupFileSchema&, const Options&);
void add(size_t n_, const float* vectors_, IDNumbers& vector_ids_);
@ -38,17 +38,17 @@ public:
~MemVectors();
const std::string& location() const { return _file_location; }
const std::string& location() const { return schema_.location; }
private:
MemVectors() = delete;
MemVectors(const MemVectors&) = delete;
MemVectors& operator=(const MemVectors&) = delete;
std::string group_id_;
const std::string _file_location;
std::shared_ptr<meta::Meta> pMeta_;
Options options_;
meta::GroupFileSchema schema_;
IDGenerator* _pIdGenerator;
size_t _dimension;
faiss::Index* pIndex_;
}; // MemVectors
@ -58,8 +58,8 @@ typedef std::shared_ptr<MemVectors> VectorsPtr;
class MemManager {
public:
MemManager(const std::shared_ptr<meta::Meta>& meta_)
: _pMeta(meta_) /*_last_compact_time(std::time(nullptr))*/ {}
MemManager(const std::shared_ptr<meta::Meta>& meta_, const Options& options)
: _pMeta(meta_), options_(options) {}
VectorsPtr get_mem_by_group(const std::string& group_id_);
@ -78,7 +78,7 @@ private:
MemMap _memMap;
ImmMemPool _immMems;
std::shared_ptr<meta::Meta> _pMeta;
/* std::time_t _last_compact_time; */
Options options_;
std::mutex _mutex;
}; // MemManager

View File

@ -19,8 +19,8 @@ struct DBMetaOptions {
struct Options {
Options();
uint16_t memory_sync_interval = 10;
uint16_t raw_file_merge_trigger_number = 100;
size_t raw_to_index_trigger_size = 100000;
uint16_t merge_trigger_number = 100;
size_t index_trigger_size = 100000;
Env* env;
DBMetaOptions meta;
}; // Options