mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-02 08:55:56 +08:00
Merge branch 'branch-0.3.1' into 'branch-0.3.1'
MS-324 Show error when there is not enough gpu memory to build index See merge request megasearch/milvus!312 Former-commit-id: d67ec24877c51a8b138182280fc36f521f013b99
This commit is contained in:
commit
2de151452a
@ -39,10 +39,11 @@ Please mark all change in change log and use the ticket from JIRA.
|
||||
- MS-261 - Update faiss version to 1.5.3 and add BUILD_FAISS_WITH_MKL as an option
|
||||
- MS-266 - Improve topk reduce time by using multi-threads
|
||||
- MS-275 - Avoid sqlite logic error excetion
|
||||
- MS-278 - add IndexStatsHelper
|
||||
- MS-278 - Add IndexStatsHelper
|
||||
- MS-312 - Set openmp thread number by config
|
||||
- MS-305 - add CPU core percent metric
|
||||
- MS-310 - add milvus CPU utilization ratio and CPU/GPU temperature metrics
|
||||
- MS-305 - Add CPU core percent metric
|
||||
- MS-310 - Add milvus CPU utilization ratio and CPU/GPU temperature metrics
|
||||
- MS-324 - Show error when there is not enough gpu memory to build index
|
||||
|
||||
## New Feature
|
||||
- MS-180 - Add new mem manager
|
||||
|
||||
@ -535,11 +535,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
|
||||
}
|
||||
|
||||
//step 3: build index
|
||||
auto start_time = METRICS_NOW_TIME;
|
||||
auto index = to_index->BuildIndex(table_file.location_);
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time);
|
||||
std::shared_ptr<ExecutionEngine> index;
|
||||
|
||||
try {
|
||||
auto start_time = METRICS_NOW_TIME;
|
||||
index = to_index->BuildIndex(table_file.location_);
|
||||
auto end_time = METRICS_NOW_TIME;
|
||||
auto total_time = METRICS_MICROSECONDS(start_time, end_time);
|
||||
server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time);
|
||||
} catch (std::exception& ex) {
|
||||
//typical error: out of gpu memory
|
||||
std::string msg = "BuildIndex encounter exception" + std::string(ex.what());
|
||||
ENGINE_LOG_ERROR << msg;
|
||||
|
||||
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
|
||||
status = meta_ptr_->UpdateTableFile(table_file);
|
||||
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
|
||||
|
||||
std::cout << "ERROR: failed to build index, index file is too large or gpu memory is not enough" << std::endl;
|
||||
|
||||
return Status::Error(msg);
|
||||
}
|
||||
|
||||
//step 4: if table has been deleted, dont save index file
|
||||
bool has_table = false;
|
||||
@ -550,7 +566,22 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
|
||||
}
|
||||
|
||||
//step 5: save index file
|
||||
index->Serialize();
|
||||
try {
|
||||
index->Serialize();
|
||||
} catch (std::exception& ex) {
|
||||
//typical error: out of disk space or permition denied
|
||||
std::string msg = "Serialize index encounter exception" + std::string(ex.what());
|
||||
ENGINE_LOG_ERROR << msg;
|
||||
|
||||
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
|
||||
status = meta_ptr_->UpdateTableFile(table_file);
|
||||
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
|
||||
|
||||
std::cout << "ERROR: failed to persist index file: " << table_file.location_
|
||||
<< ", possible out of disk space" << std::endl;
|
||||
|
||||
return Status::Error(msg);
|
||||
}
|
||||
|
||||
//step 6: update meta
|
||||
table_file.file_type_ = meta::TableFileSchema::INDEX;
|
||||
|
||||
@ -932,7 +932,7 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) {
|
||||
table_file.date_ = std::get<3>(file);
|
||||
|
||||
utils::DeleteTableFilePath(options_, table_file);
|
||||
ENGINE_LOG_DEBUG << "Removing file id:" << table_file.id_ << " location:" << table_file.location_;
|
||||
ENGINE_LOG_DEBUG << "Removing file id:" << table_file.file_id_ << " location:" << table_file.location_;
|
||||
ConnectorPtr->remove<TableFileSchema>(table_file.id_);
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user