feat(db): impl build_index

Former-commit-id: cf2b9ca3555675d6304ea273bdfeac0e15bb7271
This commit is contained in:
Xu Peng 2019-04-17 13:44:22 +08:00
parent 92983fcfbf
commit 074523368e
3 changed files with 32 additions and 11 deletions

View File

@ -6,6 +6,7 @@
#include <faiss/MetaIndexes.h>
#include <faiss/index_io.h>
#include <faiss/AutoTune.h>
#include <wrapper/IndexBuilder.h>
#include "DBImpl.h"
#include "DBMetaImpl.h"
#include "Env.h"
@ -165,12 +166,28 @@ Status DBImpl::background_merge_files(const std::string& group_id) {
}
Status DBImpl::build_index(const meta::GroupFileSchema& file) {
//PXU TODO
std::cout << ">>Building Index for: " << file.location << std::endl;
meta::GroupFileSchema group_file;
Status status = _pMeta->add_group_file(file.group_id, file.date, group_file);
if (!status.ok()) {
return status;
}
auto opd = std::make_shared<Operand>();
opd->index_type = "IDMap,Flat";
IndexBuilderPtr pBuilder = GetIndexBuilder(opd);
auto from_index = dynamic_cast<faiss::IndexIDMap*>(faiss::read_index(file.location.c_str()));
auto index = pBuilder->build_all(from_index->ntotal,
dynamic_cast<faiss::IndexFlat*>(from_index->index)->xb.data(),
from_index->id_map.data());
/* std::cout << "raw size=" << from_index->ntotal << " index size=" << index->ntotal << std::endl; */
// PXU TODO: Remove
auto location = group_file.location + ".index";
write_index(index, location.c_str());
return Status::OK();
}
Status DBImpl::background_build_index() {
void DBImpl::background_build_index() {
assert(bg_build_index_started_);
meta::GroupFilesSchema to_index_files;
_pMeta->files_to_index(to_index_files);
@ -179,12 +196,11 @@ Status DBImpl::background_build_index() {
status = build_index(file);
if (!status.ok()) {
_bg_error = status;
return status;
return;
}
}
bg_build_index_started_ = false;
return Status::OK();
}
Status DBImpl::try_build_index() {

View File

@ -41,7 +41,7 @@ public:
virtual ~DBImpl();
private:
Status background_build_index();
void background_build_index();
Status build_index(const meta::GroupFileSchema&);
Status try_build_index();
Status merge_files(const std::string& group_id,

View File

@ -75,11 +75,14 @@ Status DBMetaImpl::files_to_index(GroupFilesSchema& files) {
boost::filesystem::path path(ss.str().c_str());
boost::filesystem::directory_iterator end_itr;
for (boost::filesystem::directory_iterator itr(path); itr != end_itr; ++itr) {
std::cout << itr->path().string() << std::endl;
/* std::cout << itr->path().string() << std::endl; */
GroupFileSchema f;
f.location = itr->path().string();
if (1024*1024*50 >= GetFileSize(f.location)) continue;
std::cout << "About to index " << f.location << std::endl;
std::string suffixStr = f.location.substr(f.location.find_last_of('.') + 1);
if (suffixStr == "index") continue;
if (1024*1024*1000 >= GetFileSize(f.location)) continue;
std::cout << "[About to index] " << f.location << std::endl;
f.date = Meta::GetDate();
files.push_back(f);
}
return Status::OK();
@ -97,10 +100,12 @@ Status DBMetaImpl::files_to_merge(const std::string& group_id,
DateT date = Meta::GetDate();
files[date] = gfiles;
for (boost::filesystem::directory_iterator itr(path); itr != end_itr; ++itr) {
std::cout << itr->path().string() << std::endl;
/* std::cout << itr->path().string() << std::endl; */
GroupFileSchema f;
f.location = itr->path().string();
if (1024*1024*50 < GetFileSize(f.location)) continue;
std::string suffixStr = f.location.substr(f.location.find_last_of('.') + 1);
if (suffixStr == "index") continue;
if (1024*1024*1000 < GetFileSize(f.location)) continue;
std::cout << "About to merge " << f.location << std::endl;
files[date].push_back(f);
}