From 074523368ebff5bed881b06fa554515881a2fab9 Mon Sep 17 00:00:00 2001 From: Xu Peng Date: Wed, 17 Apr 2019 13:44:22 +0800 Subject: [PATCH] feat(db): impl build_index Former-commit-id: cf2b9ca3555675d6304ea273bdfeac0e15bb7271 --- cpp/src/db/DBImpl.cpp | 26 +++++++++++++++++++++----- cpp/src/db/DBImpl.h | 2 +- cpp/src/db/DBMetaImpl.cpp | 15 ++++++++++----- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index 026f34b64b..095795dcc7 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "DBImpl.h" #include "DBMetaImpl.h" #include "Env.h" @@ -165,12 +166,28 @@ Status DBImpl::background_merge_files(const std::string& group_id) { } Status DBImpl::build_index(const meta::GroupFileSchema& file) { - //PXU TODO - std::cout << ">>Building Index for: " << file.location << std::endl; + meta::GroupFileSchema group_file; + Status status = _pMeta->add_group_file(file.group_id, file.date, group_file); + if (!status.ok()) { + return status; + } + + auto opd = std::make_shared(); + opd->index_type = "IDMap,Flat"; + IndexBuilderPtr pBuilder = GetIndexBuilder(opd); + + auto from_index = dynamic_cast(faiss::read_index(file.location.c_str())); + auto index = pBuilder->build_all(from_index->ntotal, + dynamic_cast(from_index->index)->xb.data(), + from_index->id_map.data()); + /* std::cout << "raw size=" << from_index->ntotal << " index size=" << index->ntotal << std::endl; */ + // PXU TODO: Remove + auto location = group_file.location + ".index"; + write_index(index, location.c_str()); return Status::OK(); } -Status DBImpl::background_build_index() { +void DBImpl::background_build_index() { assert(bg_build_index_started_); meta::GroupFilesSchema to_index_files; _pMeta->files_to_index(to_index_files); @@ -179,12 +196,11 @@ Status DBImpl::background_build_index() { status = build_index(file); if (!status.ok()) { _bg_error = status; - return status; + return; } } bg_build_index_started_ = false; - return Status::OK(); } Status DBImpl::try_build_index() { diff --git a/cpp/src/db/DBImpl.h b/cpp/src/db/DBImpl.h index 15d71c1c34..bab06998bb 100644 --- a/cpp/src/db/DBImpl.h +++ b/cpp/src/db/DBImpl.h @@ -41,7 +41,7 @@ public: virtual ~DBImpl(); private: - Status background_build_index(); + void background_build_index(); Status build_index(const meta::GroupFileSchema&); Status try_build_index(); Status merge_files(const std::string& group_id, diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index fd37038bfc..5ed10f7db3 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -75,11 +75,14 @@ Status DBMetaImpl::files_to_index(GroupFilesSchema& files) { boost::filesystem::path path(ss.str().c_str()); boost::filesystem::directory_iterator end_itr; for (boost::filesystem::directory_iterator itr(path); itr != end_itr; ++itr) { - std::cout << itr->path().string() << std::endl; + /* std::cout << itr->path().string() << std::endl; */ GroupFileSchema f; f.location = itr->path().string(); - if (1024*1024*50 >= GetFileSize(f.location)) continue; - std::cout << "About to index " << f.location << std::endl; + std::string suffixStr = f.location.substr(f.location.find_last_of('.') + 1); + if (suffixStr == "index") continue; + if (1024*1024*1000 >= GetFileSize(f.location)) continue; + std::cout << "[About to index] " << f.location << std::endl; + f.date = Meta::GetDate(); files.push_back(f); } return Status::OK(); @@ -97,10 +100,12 @@ Status DBMetaImpl::files_to_merge(const std::string& group_id, DateT date = Meta::GetDate(); files[date] = gfiles; for (boost::filesystem::directory_iterator itr(path); itr != end_itr; ++itr) { - std::cout << itr->path().string() << std::endl; + /* std::cout << itr->path().string() << std::endl; */ GroupFileSchema f; f.location = itr->path().string(); - if (1024*1024*50 < GetFileSize(f.location)) continue; + std::string suffixStr = f.location.substr(f.location.find_last_of('.') + 1); + if (suffixStr == "index") continue; + if (1024*1024*1000 < GetFileSize(f.location)) continue; std::cout << "About to merge " << f.location << std::endl; files[date].push_back(f); }