From 177eba0b81a7f97cee938be0dd539d31008c1ec8 Mon Sep 17 00:00:00 2001 From: groot Date: Wed, 27 Nov 2019 17:48:09 +0800 Subject: [PATCH 1/3] #545 Avoid dead circle of build index thread when error occurs --- CHANGELOG.md | 3 +- core/src/db/DBImpl.cpp | 128 ++++++++++++++++++++++++++-- core/src/db/DBImpl.h | 24 +++++- core/src/db/Utils.cpp | 4 +- core/src/db/meta/MySQLMetaImpl.cpp | 32 ++++++- core/src/db/meta/SqliteMetaImpl.cpp | 32 ++++++- core/src/utils/CommonUtil.cpp | 2 +- 7 files changed, 203 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620122e9e0..9e235ca3f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#458 - Index data is not compatible between 0.5 and 0.6 - \#465 - Server hang caused by searching with nsg index - \#486 - gpu no usage during index building +- \#497 - CPU-version search performance decreased - \#504 - The code coverage rate of core/src/scheduler/optimizer is too low - \#509 - IVF_PQ index build trapped into dead loop caused by invalid params - \#513 - Unittest DELETE_BY_RANGE sometimes failed @@ -31,7 +32,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#532 - assigin value to `table_name` from confest shell - \#533 - NSG build failed with MetricType Inner Product - \#543 - client raise exception in shards when search results is empty -- \#497 - CPU-version search performance decreased +- \#545 - Avoid dead circle of build index thread when error occurs ## Feature - \#12 - Pure CPU version for Milvus diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 50cf4614ca..0687ca43d4 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -41,6 +41,7 @@ #include #include #include +#include namespace milvus { namespace engine { @@ -51,6 +52,8 @@ constexpr uint64_t METRIC_ACTION_INTERVAL = 1; constexpr uint64_t COMPACT_ACTION_INTERVAL = 1; constexpr uint64_t INDEX_ACTION_INTERVAL = 1; +constexpr uint64_t INDEX_FAILED_RETRY_TIME = 1; + static const Status SHUTDOWN_ERROR = Status(DB_ERROR, "Milsvus server is shutdown!"); void @@ -361,6 +364,7 @@ DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) { WaitMergeFileFinish(); // step 4: wait and build index + status = CleanFailedIndexFileOfTable(table_id); status = BuildTableIndexRecursively(table_id, index); return status; @@ -828,22 +832,35 @@ DBImpl::BackgroundBuildIndex() { std::unique_lock lock(build_index_mutex_); meta::TableFilesSchema to_index_files; meta_ptr_->FilesToIndex(to_index_files); - Status status; + Status status = IgnoreFailedIndexFiles(to_index_files); if (!to_index_files.empty()) { - scheduler::BuildIndexJobPtr job = std::make_shared(meta_ptr_, options_); - // step 2: put build index task to scheduler + std::map job2file_map; for (auto& file : to_index_files) { + scheduler::BuildIndexJobPtr job = std::make_shared(meta_ptr_, options_); scheduler::TableFileSchemaPtr file_ptr = std::make_shared(file); job->AddToIndexFiles(file_ptr); + scheduler::JobMgrInst::GetInstance()->Put(job); + job2file_map.insert(std::make_pair(job, file_ptr)); } - scheduler::JobMgrInst::GetInstance()->Put(job); - job->WaitBuildIndexFinish(); - if (!job->GetStatus().ok()) { - Status status = job->GetStatus(); - ENGINE_LOG_ERROR << "Building index failed: " << status.ToString(); + + for (auto iter = job2file_map.begin(); iter != job2file_map.end(); ++iter) { + scheduler::BuildIndexJobPtr job = iter->first; + meta::TableFileSchema& file_schema = *(iter->second.get()); + job->WaitBuildIndexFinish(); + if (!job->GetStatus().ok()) { + Status status = job->GetStatus(); + ENGINE_LOG_ERROR << "Building index job " << job->id() << " failed: " << status.ToString(); + + MarkFailedIndexFile(file_schema); + } else { + MarkSucceedIndexFile(file_schema); + ENGINE_LOG_DEBUG << "Building index job " << job->id() << " succeed."; + } } + + ENGINE_LOG_DEBUG << "Background build index thread finished"; } // ENGINE_LOG_TRACE << "Background build index thread exit"; @@ -911,6 +928,7 @@ DBImpl::DropTableRecursively(const std::string& table_id, const meta::DatesT& da if (dates.empty()) { status = mem_mgr_->EraseMemVector(table_id); // not allow insert status = meta_ptr_->DropTable(table_id); // soft delete table + CleanFailedIndexFileOfTable(table_id); // scheduler will determine when to delete table files auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource(); @@ -989,6 +1007,8 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10 * 1000, times * 100))); GetFilesToBuildIndex(table_id, file_types, table_files); times++; + + IgnoreFailedIndexFiles(table_files); } // build index for partition @@ -1001,12 +1021,23 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex } } + // failed to build index for some files, return error + std::vector failed_files; + GetFailedIndexFileOfTable(table_id, failed_files); + if (!failed_files.empty()) { + std::string msg = "Failed to build index for " + std::to_string(failed_files.size()) + + ((failed_files.size() == 1) ? " file" : " files") + + ", file size is too large or gpu memory is not enough"; + return Status(DB_ERROR, msg); + } + return Status::OK(); } Status DBImpl::DropTableIndexRecursively(const std::string& table_id) { ENGINE_LOG_DEBUG << "Drop index for table: " << table_id; + CleanFailedIndexFileOfTable(table_id); auto status = meta_ptr_->DropTableIndex(table_id); if (!status.ok()) { return status; @@ -1049,5 +1080,86 @@ DBImpl::GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_c return Status::OK(); } +Status +DBImpl::CleanFailedIndexFileOfTable(const std::string& table_id) { + std::lock_guard lck(index_failed_mutex_); + index_failed_files_.erase(table_id); // rebuild failed index files for this table + + return Status::OK(); +} + +Status +DBImpl::GetFailedIndexFileOfTable(const std::string& table_id, std::vector& failed_files) { + failed_files.clear(); + std::lock_guard lck(index_failed_mutex_); + auto iter = index_failed_files_.find(table_id); + if (iter != index_failed_files_.end()) { + FileID2FailedTimes& failed_map = iter->second; + for (auto it_file = failed_map.begin(); it_file != failed_map.end(); ++it_file) { + failed_files.push_back(it_file->first); + } + } + + return Status::OK(); +} + +Status +DBImpl::MarkFailedIndexFile(const meta::TableFileSchema& file) { + std::lock_guard lck(index_failed_mutex_); + + auto iter = index_failed_files_.find(file.table_id_); + if (iter == index_failed_files_.end()) { + FileID2FailedTimes failed_files; + failed_files.insert(std::make_pair(file.file_id_, 1)); + index_failed_files_.insert(std::make_pair(file.table_id_, failed_files)); + } else { + auto it_failed_files = iter->second.find(file.file_id_); + if (it_failed_files != iter->second.end()) { + it_failed_files->second++; + } else { + iter->second.insert(std::make_pair(file.file_id_, 1)); + } + } + + return Status::OK(); +} + +Status +DBImpl::MarkSucceedIndexFile(const meta::TableFileSchema& file) { + std::lock_guard lck(index_failed_mutex_); + + auto iter = index_failed_files_.find(file.table_id_); + if (iter != index_failed_files_.end()) { + iter->second.erase(file.file_id_); + } + + return Status::OK(); +} + +Status +DBImpl::IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files) { + std::lock_guard lck(index_failed_mutex_); + + // there could be some failed files belong to different table. + // some files may has failed for several times, no need to build index for these files. + // thus we can avoid dead circle for build index operation + for (auto it_file = table_files.begin(); it_file != table_files.end();) { + auto it_failed_files = index_failed_files_.find((*it_file).table_id_); + if (it_failed_files != index_failed_files_.end()) { + auto it_failed_file = it_failed_files->second.find((*it_file).file_id_); + if (it_failed_file != it_failed_files->second.end()) { + if (it_failed_file->second >= INDEX_FAILED_RETRY_TIME) { + it_file = table_files.erase(it_file); + continue; + } + } + } + + ++it_file; + } + + return Status::OK(); +} + } // namespace engine } // namespace milvus diff --git a/core/src/db/DBImpl.h b/core/src/db/DBImpl.h index 82a5d3096b..3baac92c0a 100644 --- a/core/src/db/DBImpl.h +++ b/core/src/db/DBImpl.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -35,8 +36,6 @@ namespace milvus { namespace engine { -class Env; - namespace meta { class Meta; } @@ -179,6 +178,21 @@ class DBImpl : public DB { Status GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_count); + Status + CleanFailedIndexFileOfTable(const std::string& table_id); + + Status + GetFailedIndexFileOfTable(const std::string& table_id, std::vector& failed_files); + + Status + MarkFailedIndexFile(const meta::TableFileSchema& file); + + Status + MarkSucceedIndexFile(const meta::TableFileSchema& file); + + Status + IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files); + private: const DBOptions options_; @@ -200,7 +214,11 @@ class DBImpl : public DB { std::list> index_thread_results_; std::mutex build_index_mutex_; -}; // DBImpl + std::mutex index_failed_mutex_; + using FileID2FailedTimes = std::map; + using Table2FailedFiles = std::map; + Table2FailedFiles index_failed_files_; // file id mapping to failed times +}; // DBImpl } // namespace engine } // namespace milvus diff --git a/core/src/db/Utils.cpp b/core/src/db/Utils.cpp index 0ddf03568a..9689f496cf 100644 --- a/core/src/db/Utils.cpp +++ b/core/src/db/Utils.cpp @@ -154,7 +154,9 @@ GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file } std::string msg = "Table file doesn't exist: " + file_path; - ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_; + if (table_file.file_size_ > 0) { // no need to pop error for empty file + ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_; + } return Status(DB_ERROR, msg); } diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index 2fb5eb0f3c..4d9366d754 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -1610,10 +1610,34 @@ MySQLMetaImpl::FilesByType(const std::string& table_id, const std::vector& } } - ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count - << " new files:" << new_count << " new_merge files:" << new_merge_count - << " new_index files:" << new_index_count << " to_index files:" << to_index_count - << " index files:" << index_count << " backup files:" << backup_count; + std::string msg = "Get table files by type. "; + for (int file_type : file_types) { + switch (file_type) { + case (int)TableFileSchema::RAW: + msg = msg + "raw files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW: + msg = msg + "new files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_MERGE: + msg = msg + "new_merge files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_INDEX: + msg = msg + "new_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::TO_INDEX: + msg = msg + "to_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::INDEX: + msg = msg + "index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::BACKUP: + msg = msg + "backup files:" + std::to_string(raw_count); + break; + default:break; + } + } + ENGINE_LOG_DEBUG << msg; } } catch (std::exception& e) { return HandleException("GENERAL ERROR WHEN GET FILE BY TYPE", e.what()); diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index 24d5d78bad..92c1d9cb8f 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -1157,10 +1157,34 @@ SqliteMetaImpl::FilesByType(const std::string& table_id, table_files.emplace_back(file_schema); } - ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count - << " new files:" << new_count << " new_merge files:" << new_merge_count - << " new_index files:" << new_index_count << " to_index files:" << to_index_count - << " index files:" << index_count << " backup files:" << backup_count; + std::string msg = "Get table files by type. "; + for (int file_type : file_types) { + switch (file_type) { + case (int)TableFileSchema::RAW: + msg = msg + "raw files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW: + msg = msg + "new files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_MERGE: + msg = msg + "new_merge files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_INDEX: + msg = msg + "new_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::TO_INDEX: + msg = msg + "to_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::INDEX: + msg = msg + "index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::BACKUP: + msg = msg + "backup files:" + std::to_string(raw_count); + break; + default:break; + } + } + ENGINE_LOG_DEBUG << msg; } } catch (std::exception& e) { return HandleException("Encounter exception when check non index files", e.what()); diff --git a/core/src/utils/CommonUtil.cpp b/core/src/utils/CommonUtil.cpp index cdfae8f1e5..cfadb2fcc4 100644 --- a/core/src/utils/CommonUtil.cpp +++ b/core/src/utils/CommonUtil.cpp @@ -229,7 +229,7 @@ CommonUtil::ConvertTime(tm time_struct, time_t& time_integer) { void CommonUtil::EraseFromCache(const std::string& item_key) { if (item_key.empty()) { - SERVER_LOG_ERROR << "Empty key cannot be erased from cache"; + // SERVER_LOG_ERROR << "Empty key cannot be erased from cache"; return; } From c37aa5a54ba7ceb52570f14e698054be73a14f3b Mon Sep 17 00:00:00 2001 From: groot Date: Wed, 27 Nov 2019 17:48:09 +0800 Subject: [PATCH 2/3] code lint --- CHANGELOG.md | 3 +- core/src/db/DBImpl.cpp | 128 ++++++++++++++++++++++++++-- core/src/db/DBImpl.h | 24 +++++- core/src/db/Utils.cpp | 4 +- core/src/db/meta/MySQLMetaImpl.cpp | 33 ++++++- core/src/db/meta/SqliteMetaImpl.cpp | 32 ++++++- core/src/utils/CommonUtil.cpp | 2 +- 7 files changed, 204 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620122e9e0..9e235ca3f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#458 - Index data is not compatible between 0.5 and 0.6 - \#465 - Server hang caused by searching with nsg index - \#486 - gpu no usage during index building +- \#497 - CPU-version search performance decreased - \#504 - The code coverage rate of core/src/scheduler/optimizer is too low - \#509 - IVF_PQ index build trapped into dead loop caused by invalid params - \#513 - Unittest DELETE_BY_RANGE sometimes failed @@ -31,7 +32,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#532 - assigin value to `table_name` from confest shell - \#533 - NSG build failed with MetricType Inner Product - \#543 - client raise exception in shards when search results is empty -- \#497 - CPU-version search performance decreased +- \#545 - Avoid dead circle of build index thread when error occurs ## Feature - \#12 - Pure CPU version for Milvus diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 50cf4614ca..0687ca43d4 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -41,6 +41,7 @@ #include #include #include +#include namespace milvus { namespace engine { @@ -51,6 +52,8 @@ constexpr uint64_t METRIC_ACTION_INTERVAL = 1; constexpr uint64_t COMPACT_ACTION_INTERVAL = 1; constexpr uint64_t INDEX_ACTION_INTERVAL = 1; +constexpr uint64_t INDEX_FAILED_RETRY_TIME = 1; + static const Status SHUTDOWN_ERROR = Status(DB_ERROR, "Milsvus server is shutdown!"); void @@ -361,6 +364,7 @@ DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) { WaitMergeFileFinish(); // step 4: wait and build index + status = CleanFailedIndexFileOfTable(table_id); status = BuildTableIndexRecursively(table_id, index); return status; @@ -828,22 +832,35 @@ DBImpl::BackgroundBuildIndex() { std::unique_lock lock(build_index_mutex_); meta::TableFilesSchema to_index_files; meta_ptr_->FilesToIndex(to_index_files); - Status status; + Status status = IgnoreFailedIndexFiles(to_index_files); if (!to_index_files.empty()) { - scheduler::BuildIndexJobPtr job = std::make_shared(meta_ptr_, options_); - // step 2: put build index task to scheduler + std::map job2file_map; for (auto& file : to_index_files) { + scheduler::BuildIndexJobPtr job = std::make_shared(meta_ptr_, options_); scheduler::TableFileSchemaPtr file_ptr = std::make_shared(file); job->AddToIndexFiles(file_ptr); + scheduler::JobMgrInst::GetInstance()->Put(job); + job2file_map.insert(std::make_pair(job, file_ptr)); } - scheduler::JobMgrInst::GetInstance()->Put(job); - job->WaitBuildIndexFinish(); - if (!job->GetStatus().ok()) { - Status status = job->GetStatus(); - ENGINE_LOG_ERROR << "Building index failed: " << status.ToString(); + + for (auto iter = job2file_map.begin(); iter != job2file_map.end(); ++iter) { + scheduler::BuildIndexJobPtr job = iter->first; + meta::TableFileSchema& file_schema = *(iter->second.get()); + job->WaitBuildIndexFinish(); + if (!job->GetStatus().ok()) { + Status status = job->GetStatus(); + ENGINE_LOG_ERROR << "Building index job " << job->id() << " failed: " << status.ToString(); + + MarkFailedIndexFile(file_schema); + } else { + MarkSucceedIndexFile(file_schema); + ENGINE_LOG_DEBUG << "Building index job " << job->id() << " succeed."; + } } + + ENGINE_LOG_DEBUG << "Background build index thread finished"; } // ENGINE_LOG_TRACE << "Background build index thread exit"; @@ -911,6 +928,7 @@ DBImpl::DropTableRecursively(const std::string& table_id, const meta::DatesT& da if (dates.empty()) { status = mem_mgr_->EraseMemVector(table_id); // not allow insert status = meta_ptr_->DropTable(table_id); // soft delete table + CleanFailedIndexFileOfTable(table_id); // scheduler will determine when to delete table files auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource(); @@ -989,6 +1007,8 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10 * 1000, times * 100))); GetFilesToBuildIndex(table_id, file_types, table_files); times++; + + IgnoreFailedIndexFiles(table_files); } // build index for partition @@ -1001,12 +1021,23 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex } } + // failed to build index for some files, return error + std::vector failed_files; + GetFailedIndexFileOfTable(table_id, failed_files); + if (!failed_files.empty()) { + std::string msg = "Failed to build index for " + std::to_string(failed_files.size()) + + ((failed_files.size() == 1) ? " file" : " files") + + ", file size is too large or gpu memory is not enough"; + return Status(DB_ERROR, msg); + } + return Status::OK(); } Status DBImpl::DropTableIndexRecursively(const std::string& table_id) { ENGINE_LOG_DEBUG << "Drop index for table: " << table_id; + CleanFailedIndexFileOfTable(table_id); auto status = meta_ptr_->DropTableIndex(table_id); if (!status.ok()) { return status; @@ -1049,5 +1080,86 @@ DBImpl::GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_c return Status::OK(); } +Status +DBImpl::CleanFailedIndexFileOfTable(const std::string& table_id) { + std::lock_guard lck(index_failed_mutex_); + index_failed_files_.erase(table_id); // rebuild failed index files for this table + + return Status::OK(); +} + +Status +DBImpl::GetFailedIndexFileOfTable(const std::string& table_id, std::vector& failed_files) { + failed_files.clear(); + std::lock_guard lck(index_failed_mutex_); + auto iter = index_failed_files_.find(table_id); + if (iter != index_failed_files_.end()) { + FileID2FailedTimes& failed_map = iter->second; + for (auto it_file = failed_map.begin(); it_file != failed_map.end(); ++it_file) { + failed_files.push_back(it_file->first); + } + } + + return Status::OK(); +} + +Status +DBImpl::MarkFailedIndexFile(const meta::TableFileSchema& file) { + std::lock_guard lck(index_failed_mutex_); + + auto iter = index_failed_files_.find(file.table_id_); + if (iter == index_failed_files_.end()) { + FileID2FailedTimes failed_files; + failed_files.insert(std::make_pair(file.file_id_, 1)); + index_failed_files_.insert(std::make_pair(file.table_id_, failed_files)); + } else { + auto it_failed_files = iter->second.find(file.file_id_); + if (it_failed_files != iter->second.end()) { + it_failed_files->second++; + } else { + iter->second.insert(std::make_pair(file.file_id_, 1)); + } + } + + return Status::OK(); +} + +Status +DBImpl::MarkSucceedIndexFile(const meta::TableFileSchema& file) { + std::lock_guard lck(index_failed_mutex_); + + auto iter = index_failed_files_.find(file.table_id_); + if (iter != index_failed_files_.end()) { + iter->second.erase(file.file_id_); + } + + return Status::OK(); +} + +Status +DBImpl::IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files) { + std::lock_guard lck(index_failed_mutex_); + + // there could be some failed files belong to different table. + // some files may has failed for several times, no need to build index for these files. + // thus we can avoid dead circle for build index operation + for (auto it_file = table_files.begin(); it_file != table_files.end();) { + auto it_failed_files = index_failed_files_.find((*it_file).table_id_); + if (it_failed_files != index_failed_files_.end()) { + auto it_failed_file = it_failed_files->second.find((*it_file).file_id_); + if (it_failed_file != it_failed_files->second.end()) { + if (it_failed_file->second >= INDEX_FAILED_RETRY_TIME) { + it_file = table_files.erase(it_file); + continue; + } + } + } + + ++it_file; + } + + return Status::OK(); +} + } // namespace engine } // namespace milvus diff --git a/core/src/db/DBImpl.h b/core/src/db/DBImpl.h index 82a5d3096b..3baac92c0a 100644 --- a/core/src/db/DBImpl.h +++ b/core/src/db/DBImpl.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -35,8 +36,6 @@ namespace milvus { namespace engine { -class Env; - namespace meta { class Meta; } @@ -179,6 +178,21 @@ class DBImpl : public DB { Status GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_count); + Status + CleanFailedIndexFileOfTable(const std::string& table_id); + + Status + GetFailedIndexFileOfTable(const std::string& table_id, std::vector& failed_files); + + Status + MarkFailedIndexFile(const meta::TableFileSchema& file); + + Status + MarkSucceedIndexFile(const meta::TableFileSchema& file); + + Status + IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files); + private: const DBOptions options_; @@ -200,7 +214,11 @@ class DBImpl : public DB { std::list> index_thread_results_; std::mutex build_index_mutex_; -}; // DBImpl + std::mutex index_failed_mutex_; + using FileID2FailedTimes = std::map; + using Table2FailedFiles = std::map; + Table2FailedFiles index_failed_files_; // file id mapping to failed times +}; // DBImpl } // namespace engine } // namespace milvus diff --git a/core/src/db/Utils.cpp b/core/src/db/Utils.cpp index 0ddf03568a..9689f496cf 100644 --- a/core/src/db/Utils.cpp +++ b/core/src/db/Utils.cpp @@ -154,7 +154,9 @@ GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file } std::string msg = "Table file doesn't exist: " + file_path; - ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_; + if (table_file.file_size_ > 0) { // no need to pop error for empty file + ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_; + } return Status(DB_ERROR, msg); } diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index 2fb5eb0f3c..193a34e4f6 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -1610,10 +1610,35 @@ MySQLMetaImpl::FilesByType(const std::string& table_id, const std::vector& } } - ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count - << " new files:" << new_count << " new_merge files:" << new_merge_count - << " new_index files:" << new_index_count << " to_index files:" << to_index_count - << " index files:" << index_count << " backup files:" << backup_count; + std::string msg = "Get table files by type. "; + for (int file_type : file_types) { + switch (file_type) { + case (int)TableFileSchema::RAW: + msg = msg + "raw files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW: + msg = msg + "new files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_MERGE: + msg = msg + "new_merge files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_INDEX: + msg = msg + "new_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::TO_INDEX: + msg = msg + "to_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::INDEX: + msg = msg + "index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::BACKUP: + msg = msg + "backup files:" + std::to_string(raw_count); + break; + default: + break; + } + } + ENGINE_LOG_DEBUG << msg; } } catch (std::exception& e) { return HandleException("GENERAL ERROR WHEN GET FILE BY TYPE", e.what()); diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index 24d5d78bad..92c1d9cb8f 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -1157,10 +1157,34 @@ SqliteMetaImpl::FilesByType(const std::string& table_id, table_files.emplace_back(file_schema); } - ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count - << " new files:" << new_count << " new_merge files:" << new_merge_count - << " new_index files:" << new_index_count << " to_index files:" << to_index_count - << " index files:" << index_count << " backup files:" << backup_count; + std::string msg = "Get table files by type. "; + for (int file_type : file_types) { + switch (file_type) { + case (int)TableFileSchema::RAW: + msg = msg + "raw files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW: + msg = msg + "new files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_MERGE: + msg = msg + "new_merge files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::NEW_INDEX: + msg = msg + "new_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::TO_INDEX: + msg = msg + "to_index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::INDEX: + msg = msg + "index files:" + std::to_string(raw_count); + break; + case (int)TableFileSchema::BACKUP: + msg = msg + "backup files:" + std::to_string(raw_count); + break; + default:break; + } + } + ENGINE_LOG_DEBUG << msg; } } catch (std::exception& e) { return HandleException("Encounter exception when check non index files", e.what()); diff --git a/core/src/utils/CommonUtil.cpp b/core/src/utils/CommonUtil.cpp index cdfae8f1e5..cfadb2fcc4 100644 --- a/core/src/utils/CommonUtil.cpp +++ b/core/src/utils/CommonUtil.cpp @@ -229,7 +229,7 @@ CommonUtil::ConvertTime(tm time_struct, time_t& time_integer) { void CommonUtil::EraseFromCache(const std::string& item_key) { if (item_key.empty()) { - SERVER_LOG_ERROR << "Empty key cannot be erased from cache"; + // SERVER_LOG_ERROR << "Empty key cannot be erased from cache"; return; } From 0b0d3addde4e2f96e3bbd4a8f9a4c1ae9763f3e7 Mon Sep 17 00:00:00 2001 From: groot Date: Thu, 28 Nov 2019 10:53:20 +0800 Subject: [PATCH 3/3] modify error message --- core/src/db/DBImpl.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 0687ca43d4..0b3309d84e 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -1026,8 +1026,12 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex GetFailedIndexFileOfTable(table_id, failed_files); if (!failed_files.empty()) { std::string msg = "Failed to build index for " + std::to_string(failed_files.size()) + - ((failed_files.size() == 1) ? " file" : " files") + - ", file size is too large or gpu memory is not enough"; + ((failed_files.size() == 1) ? " file" : " files"); +#ifdef MILVUS_CPU_VERSION + msg += ", please double check index parameters."; +#else + msg += ", file size is too large or gpu memory is not enough."; +#endif return Status(DB_ERROR, msg); }