milvus/core/src/db/insert/MemTableFile.cpp
BossZou 1963f3b9e9
Make cache_insert_data take effect in-service(# 1521) (#1540)
* make cache_insert_data take effect in-service (fix #1521)

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* complete omitted config setter

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* cache_insert_data take effect for MemTable

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* add config test

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* rename config handler

Signed-off-by: Yhz <yinghao.zou@zilliz.com>
2020-03-07 09:34:35 +08:00

233 lines
8.0 KiB
C++

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#include "db/insert/MemTableFile.h"
#include <algorithm>
#include <cmath>
#include <iterator>
#include <string>
#include <vector>
#include "db/Constants.h"
#include "db/Utils.h"
#include "db/engine/EngineFactory.h"
#include "metrics/Metrics.h"
#include "segment/SegmentReader.h"
#include "utils/Log.h"
#include "utils/ValidationUtil.h"
namespace milvus {
namespace engine {
MemTableFile::MemTableFile(const std::string& table_id, const meta::MetaPtr& meta, const DBOptions& options)
: table_id_(table_id), meta_(meta), options_(options) {
current_mem_ = 0;
auto status = CreateTableFile();
if (status.ok()) {
/*execution_engine_ = EngineFactory::Build(
table_file_schema_.dimension_, table_file_schema_.location_, (EngineType)table_file_schema_.engine_type_,
(MetricType)table_file_schema_.metric_type_, table_file_schema_.nlist_);*/
std::string directory;
utils::GetParentPath(table_file_schema_.location_, directory);
segment_writer_ptr_ = std::make_shared<segment::SegmentWriter>(directory);
}
SetIdentity("MemTableFile");
AddCacheInsertDataListener();
}
MemTableFile::~MemTableFile() {
RemoveCacheInsertDataListener();
}
Status
MemTableFile::CreateTableFile() {
meta::TableFileSchema table_file_schema;
table_file_schema.table_id_ = table_id_;
auto status = meta_->CreateTableFile(table_file_schema);
if (status.ok()) {
table_file_schema_ = table_file_schema;
} else {
std::string err_msg = "MemTableFile::CreateTableFile failed: " + status.ToString();
ENGINE_LOG_ERROR << err_msg;
}
return status;
}
Status
MemTableFile::Add(const VectorSourcePtr& source) {
if (table_file_schema_.dimension_ <= 0) {
std::string err_msg =
"MemTableFile::Add: table_file_schema dimension = " + std::to_string(table_file_schema_.dimension_) +
", table_id = " + table_file_schema_.table_id_;
ENGINE_LOG_ERROR << err_msg;
return Status(DB_ERROR, "Not able to create table file");
}
size_t single_vector_mem_size = source->SingleVectorSize(table_file_schema_.dimension_);
size_t mem_left = GetMemLeft();
if (mem_left >= single_vector_mem_size) {
size_t num_vectors_to_add = std::ceil(mem_left / single_vector_mem_size);
size_t num_vectors_added;
auto status = source->Add(/*execution_engine_,*/ segment_writer_ptr_, table_file_schema_, num_vectors_to_add,
num_vectors_added);
if (status.ok()) {
current_mem_ += (num_vectors_added * single_vector_mem_size);
}
return status;
}
return Status::OK();
}
Status
MemTableFile::Delete(segment::doc_id_t doc_id) {
segment::SegmentPtr segment_ptr;
segment_writer_ptr_->GetSegment(segment_ptr);
// Check wither the doc_id is present, if yes, delete it's corresponding buffer
auto uids = segment_ptr->vectors_ptr_->GetUids();
auto found = std::find(uids.begin(), uids.end(), doc_id);
if (found != uids.end()) {
auto offset = std::distance(uids.begin(), found);
segment_ptr->vectors_ptr_->Erase(offset);
}
return Status::OK();
}
Status
MemTableFile::Delete(const std::vector<segment::doc_id_t>& doc_ids) {
segment::SegmentPtr segment_ptr;
segment_writer_ptr_->GetSegment(segment_ptr);
// Check wither the doc_id is present, if yes, delete it's corresponding buffer
std::vector<segment::doc_id_t> temp;
temp.resize(doc_ids.size());
memcpy(temp.data(), doc_ids.data(), doc_ids.size() * sizeof(segment::doc_id_t));
std::sort(temp.begin(), temp.end());
auto uids = segment_ptr->vectors_ptr_->GetUids();
size_t deleted = 0;
size_t loop = uids.size();
for (size_t i = 0; i < loop; ++i) {
if (std::binary_search(temp.begin(), temp.end(), uids[i])) {
segment_ptr->vectors_ptr_->Erase(i - deleted);
++deleted;
}
}
/*
for (auto& doc_id : doc_ids) {
auto found = std::find(uids.begin(), uids.end(), doc_id);
if (found != uids.end()) {
auto offset = std::distance(uids.begin(), found);
segment_ptr->vectors_ptr_->Erase(offset);
uids = segment_ptr->vectors_ptr_->GetUids();
}
}
*/
return Status::OK();
}
size_t
MemTableFile::GetCurrentMem() {
return current_mem_;
}
size_t
MemTableFile::GetMemLeft() {
return (MAX_TABLE_FILE_MEM - current_mem_);
}
bool
MemTableFile::IsFull() {
size_t single_vector_mem_size = table_file_schema_.dimension_ * FLOAT_TYPE_SIZE;
return (GetMemLeft() < single_vector_mem_size);
}
Status
MemTableFile::Serialize(uint64_t wal_lsn) {
size_t size = GetCurrentMem();
server::CollectSerializeMetrics metrics(size);
auto status = segment_writer_ptr_->Serialize();
if (!status.ok()) {
ENGINE_LOG_ERROR << "Failed to serialize segment: " << table_file_schema_.segment_id_;
/* Can't mark it as to_delete because data is stored in this mem table file. Any further flush
* will try to serialize the same mem table file and it won't be able to find the directory
* to write to or update the associated table file in meta.
*
table_file_schema_.file_type_ = meta::TableFileSchema::TO_DELETE;
meta_->UpdateTableFile(table_file_schema_);
ENGINE_LOG_DEBUG << "Failed to serialize segment, mark file: " << table_file_schema_.file_id_
<< " to to_delete";
*/
return status;
}
// execution_engine_->Serialize();
// TODO(zhiru):
// table_file_schema_.file_size_ = execution_engine_->PhysicalSize();
// table_file_schema_.row_count_ = execution_engine_->Count();
table_file_schema_.file_size_ = segment_writer_ptr_->Size();
table_file_schema_.row_count_ = segment_writer_ptr_->VectorCount();
// if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size
// else set file type to RAW, no need to build index
if (table_file_schema_.engine_type_ != (int)EngineType::FAISS_IDMAP &&
table_file_schema_.engine_type_ != (int)EngineType::FAISS_BIN_IDMAP) {
table_file_schema_.file_type_ = (size >= table_file_schema_.index_file_size_) ? meta::TableFileSchema::TO_INDEX
: meta::TableFileSchema::RAW;
} else {
table_file_schema_.file_type_ = meta::TableFileSchema::RAW;
}
// Set table file's flush_lsn so WAL can roll back and delete garbage files which can be obtained from
// GetTableFilesByFlushLSN() in meta.
table_file_schema_.flush_lsn_ = wal_lsn;
status = meta_->UpdateTableFile(table_file_schema_);
ENGINE_LOG_DEBUG << "New " << ((table_file_schema_.file_type_ == meta::TableFileSchema::RAW) ? "raw" : "to_index")
<< " file " << table_file_schema_.file_id_ << " of size " << size << " bytes, lsn = " << wal_lsn;
// TODO(zhiru): cache
/*
if (options_.insert_cache_immediately_) {
execution_engine_->Cache();
}
*/
if (options_.insert_cache_immediately_) {
segment_writer_ptr_->Cache();
}
return status;
}
const std::string&
MemTableFile::GetSegmentId() const {
return table_file_schema_.segment_id_;
}
void
MemTableFile::OnCacheInsertDataChanged(bool value) {
options_.insert_cache_immediately_ = value;
}
} // namespace engine
} // namespace milvus