merge main and fix conflict

Signed-off-by: Yhz <yinghao.zou@zilliz.com>
This commit is contained in:
Yhz 2020-03-08 16:19:28 +08:00
commit 01eb62836e
19 changed files with 39 additions and 215 deletions

View File

@ -13,7 +13,7 @@ Please mark all change in change log and use the issue from GitHub
- \#977 Server crash when create tables concurrently
- \#990 Check gpu resources setting when assign repeated value
- \#995 table count set to 0 if no tables found
- \#1010 improve error message when offset or page_size is equal 0
- \#1010 Improve error message when offset or page_size is equal 0
- \#1022 check if partition name is legal
- \#1028 check if table exists when show partitions
- \#1029 check if table exists when try to delete partition
@ -29,10 +29,10 @@ Please mark all change in change log and use the issue from GitHub
- \#1359 Negative distance value returned when searching with HNSW index type
- \#1429 Server crashed when searching vectors with GPU
- \#1476 Fix vectors results bug when getting vectors from segments
- \#1484 Index type changed to IDMAP after compacted
- \#1484 Index type changed to IDMAP after compacted
- \#1491 Server crashed during adding vectors
- \#1499 Fix duplicated ID number issue
- \#1491 Server crashed during adding vectors
- \#1504 Avoid possible race condition between delete and search
- \#1504 Avoid possible race condition between delete and search
- \#1507 set_config for insert_buffer_size is wrong
- \#1510 Add set interfaces for WAL configurations
- \#1511 Fix big integer cannot pass to server correctly
@ -41,8 +41,10 @@ Please mark all change in change log and use the issue from GitHub
- \#1525 Add setter API for config preload_table
- \#1529 Fix server crash when cache_insert_data enabled
- \#1530 Set table file with correct engine type in meta
- \#1532 Search with ivf_flat failed with open-dataset: sift-256-hamming
- \#1535 Degradation searching performance with metric_type: binary_idmap
- \#1549 Fix server/wal config setting bug
- \#1556 Index file not created after table and index created
## Feature
- \#216 Add CLI to get server info

View File

@ -812,7 +812,7 @@ DBImpl::CompactFile(const std::string& table_id, const meta::TableFileSchema& fi
// Update table files state
// if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size
// else set file type to RAW, no need to build index
if (compacted_file.engine_type_ != (int)EngineType::FAISS_IDMAP) {
if (!utils::IsRawIndexType(compacted_file.engine_type_)) {
compacted_file.file_type_ = (segment_writer_ptr->Size() >= compacted_file.index_file_size_)
? meta::TableFileSchema::TO_INDEX
: meta::TableFileSchema::RAW;
@ -1468,7 +1468,7 @@ DBImpl::MergeFiles(const std::string& table_id, const meta::TableFilesSchema& fi
// step 4: update table files state
// if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size
// else set file type to RAW, no need to build index
if (table_file.engine_type_ != (int)EngineType::FAISS_IDMAP) {
if (!utils::IsRawIndexType(table_file.engine_type_)) {
table_file.file_type_ = (segment_writer_ptr->Size() >= table_file.index_file_size_)
? meta::TableFileSchema::TO_INDEX
: meta::TableFileSchema::RAW;
@ -1770,7 +1770,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
// for IDMAP type, only wait all NEW file converted to RAW file
// for other type, wait NEW/RAW/NEW_MERGE/NEW_INDEX/TO_INDEX files converted to INDEX files
std::vector<int> file_types;
if (index.engine_type_ == static_cast<int32_t>(EngineType::FAISS_IDMAP)) {
if (utils::IsRawIndexType(index.engine_type_)) {
file_types = {
static_cast<int32_t>(meta::TableFileSchema::NEW),
static_cast<int32_t>(meta::TableFileSchema::NEW_MERGE),
@ -1792,7 +1792,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
while (!table_files.empty()) {
ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times;
if (index.engine_type_ != (int)EngineType::FAISS_IDMAP) {
if (!utils::IsRawIndexType(index.engine_type_)) {
status = meta_ptr_->UpdateTableFilesToIndex(table_id);
}

View File

@ -215,6 +215,11 @@ IsSameIndex(const TableIndex& index1, const TableIndex& index2) {
index1.metric_type_ == index2.metric_type_;
}
bool
IsRawIndexType(int32_t type) {
return (type == (int32_t)EngineType::FAISS_IDMAP) || (type == (int32_t)EngineType::FAISS_BIN_IDMAP);
}
meta::DateT
GetDate(const std::time_t& t, int day_delta) {
struct tm ltm;

View File

@ -45,6 +45,9 @@ GetParentPath(const std::string& path, std::string& parent_path);
bool
IsSameIndex(const TableIndex& index1, const TableIndex& index2);
bool
IsRawIndexType(int32_t type);
meta::DateT
GetDate(const std::time_t& t, int day_delta = 0);
meta::DateT

View File

@ -406,7 +406,7 @@ ExecutionEngineImpl::Load(bool to_cache) {
utils::GetParentPath(location_, segment_dir);
auto segment_reader_ptr = std::make_shared<segment::SegmentReader>(segment_dir);
if (index_type_ == EngineType::FAISS_IDMAP || index_type_ == EngineType::FAISS_BIN_IDMAP) {
if (utils::IsRawIndexType((int32_t)index_type_)) {
index_ = index_type_ == EngineType::FAISS_IDMAP ? GetVecIndexFactory(IndexType::FAISS_IDMAP)
: GetVecIndexFactory(IndexType::FAISS_BIN_IDMAP);
milvus::json conf{{knowhere::meta::DEVICEID, gpu_num_}, {knowhere::meta::DIM, dim_}};

View File

@ -674,16 +674,7 @@ MySQLMetaImpl::CreateTableFile(TableFileSchema& file_schema) {
file_schema.updated_time_ = file_schema.created_on_;
file_schema.index_file_size_ = table_schema.index_file_size_;
file_schema.index_params_ = table_schema.index_params_;
if (file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW ||
file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW_MERGE) {
file_schema.engine_type_ = server::ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)
? (int32_t)EngineType::FAISS_BIN_IDMAP
: (int32_t)EngineType::FAISS_IDMAP;
} else {
file_schema.engine_type_ = table_schema.engine_type_;
}
file_schema.engine_type_ = table_schema.engine_type_;
file_schema.metric_type_ = table_schema.metric_type_;
std::string id = "NULL"; // auto-increment
@ -2086,8 +2077,7 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/)
// If we are deleting a raw table file, it means it's okay to delete the entire segment directory.
// Else, we can only delete the single file
// TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky
if (table_file.engine_type_ == (int32_t)EngineType::FAISS_IDMAP ||
table_file.engine_type_ == (int32_t)EngineType::FAISS_BIN_IDMAP) {
if (utils::IsRawIndexType(table_file.engine_type_)) {
utils::DeleteSegment(options_, table_file);
std::string segment_dir;
utils::GetParentPath(table_file.location_, segment_dir);

View File

@ -375,16 +375,7 @@ SqliteMetaImpl::CreateTableFile(TableFileSchema& file_schema) {
file_schema.updated_time_ = file_schema.created_on_;
file_schema.index_file_size_ = table_schema.index_file_size_;
file_schema.index_params_ = table_schema.index_params_;
if (file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW ||
file_schema.file_type_ == TableFileSchema::FILE_TYPE::NEW_MERGE) {
file_schema.engine_type_ = server::ValidationUtil::IsBinaryMetricType(table_schema.metric_type_)
? (int32_t)EngineType::FAISS_BIN_IDMAP
: (int32_t)EngineType::FAISS_IDMAP;
} else {
file_schema.engine_type_ = table_schema.engine_type_;
}
file_schema.engine_type_ = table_schema.engine_type_;
file_schema.metric_type_ = table_schema.metric_type_;
// multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here
@ -1425,8 +1416,7 @@ SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/
// If we are deleting a raw table file, it means it's okay to delete the entire segment directory.
// Else, we can only delete the single file
// TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky
if (table_file.engine_type_ == (int32_t)EngineType::FAISS_IDMAP ||
table_file.engine_type_ == (int32_t)EngineType::FAISS_BIN_IDMAP) {
if (utils::IsRawIndexType(table_file.engine_type_)) {
utils::DeleteSegment(options_, table_file);
std::string segment_dir;
utils::GetParentPath(table_file.location_, segment_dir);

View File

@ -159,13 +159,6 @@ ClientTest::SearchVectors(const std::string& table_name, int64_t topk, int64_t n
topk_query_result);
}
void
ClientTest::SearchVectorsByIds(const std::string& table_name, int64_t topk, int64_t nprobe) {
std::vector<std::string> partition_tags;
milvus::TopKQueryResult topk_query_result;
milvus_sdk::Utils::DoSearch(conn_, table_name, partition_tags, topk, nprobe, search_id_array_, topk_query_result);
}
void
ClientTest::CreateIndex(const std::string& table_name, milvus::IndexType type, int64_t nlist) {
milvus_sdk::TimeRecorder rc("Create index");
@ -245,7 +238,6 @@ ClientTest::Test() {
GetVectorById(table_name, search_id_array_[0]);
SearchVectors(table_name, TOP_K, NPROBE);
SearchVectorsByIds(table_name, TOP_K, NPROBE);
CreateIndex(table_name, INDEX_TYPE, NLIST);
ShowTableInfo(table_name);

View File

@ -29,36 +29,49 @@ class ClientTest {
private:
void
ShowServerVersion();
void
ShowSdkVersion();
void
ShowTables(std::vector<std::string>&);
void
CreateTable(const std::string&, int64_t, milvus::MetricType);
void
DescribeTable(const std::string&);
void
InsertVectors(const std::string&, int64_t);
void
BuildSearchVectors(int64_t, int64_t);
void
Flush(const std::string&);
void
ShowTableInfo(const std::string&);
void
GetVectorById(const std::string&, int64_t);
void
SearchVectors(const std::string&, int64_t, int64_t);
void
SearchVectorsByIds(const std::string&, int64_t, int64_t);
void
CreateIndex(const std::string&, milvus::IndexType, int64_t);
void
PreloadTable(const std::string&);
void
DeleteByIds(const std::string&, const std::vector<int64_t>&);
void
DropIndex(const std::string&);
void
DropTable(const std::string&);

View File

@ -220,68 +220,6 @@ Utils::DoSearch(std::shared_ptr<milvus::Connection> conn, const std::string& tab
CheckSearchResult(search_record_array, topk_query_result);
}
void
Utils::DoSearch(std::shared_ptr<milvus::Connection> conn, const std::string& table_name,
const std::vector<std::string>& partition_tags, int64_t top_k, int64_t nprobe,
const std::vector<int64_t>& search_id_array, milvus::TopKQueryResult& topk_query_result) {
topk_query_result.clear();
{
BLOCK_SPLITER
JSON json_params = {{"nprobe", nprobe}};
for (auto& search_id : search_id_array) {
milvus_sdk::TimeRecorder rc("search by id " + std::to_string(search_id));
milvus::TopKQueryResult result;
milvus::Status
stat = conn->SearchByID(table_name, partition_tags, search_id, top_k, json_params.dump(), result);
topk_query_result.insert(topk_query_result.end(), std::make_move_iterator(result.begin()),
std::make_move_iterator(result.end()));
std::cout << "SearchByID function call status: " << stat.message() << std::endl;
}
BLOCK_SPLITER
}
if (topk_query_result.size() != search_id_array.size()) {
std::cout << "ERROR: Returned result count does not equal nq" << std::endl;
return;
}
BLOCK_SPLITER
for (size_t i = 0; i < topk_query_result.size(); i++) {
const milvus::QueryResult& one_result = topk_query_result[i];
size_t topk = one_result.ids.size();
auto search_id = search_id_array[i];
std::cout << "No." << i << " vector " << search_id << " top " << topk << " search result:" << std::endl;
for (size_t j = 0; j < topk; j++) {
std::cout << "\t" << one_result.ids[j] << "\t" << one_result.distances[j] << std::endl;
}
}
BLOCK_SPLITER
BLOCK_SPLITER
size_t nq = topk_query_result.size();
for (size_t i = 0; i < nq; i++) {
const milvus::QueryResult& one_result = topk_query_result[i];
auto search_id = search_id_array[i];
uint64_t match_index = one_result.ids.size();
for (uint64_t index = 0; index < one_result.ids.size(); index++) {
if (search_id == one_result.ids[index]) {
match_index = index;
break;
}
}
if (match_index >= one_result.ids.size()) {
std::cout << "The topk result is wrong: not return search target in result set" << std::endl;
} else {
std::cout << "No." << i << " Check result successfully for target: " << search_id << " at top "
<< match_index << std::endl;
}
}
BLOCK_SPLITER
}
void
PrintPartitionStat(const milvus::PartitionStat& partition_stat) {
std::cout << "\tPartition " << partition_stat.tag << " row count: " << partition_stat.row_count << std::endl;

View File

@ -70,12 +70,6 @@ class Utils {
const std::vector<std::pair<int64_t, milvus::RowRecord>>& search_record_array,
milvus::TopKQueryResult& topk_query_result);
static void
DoSearch(std::shared_ptr<milvus::Connection> conn, const std::string& table_name,
const std::vector<std::string>& partition_tags, int64_t top_k, int64_t nprobe,
const std::vector<int64_t>& search_id_array,
milvus::TopKQueryResult& topk_query_result);
static void
PrintTableInfo(const milvus::TableInfo& info);
};

View File

@ -314,49 +314,6 @@ ClientProxy::Search(const std::string& table_name, const std::vector<std::string
}
}
Status
ClientProxy::SearchByID(const std::string& table_name,
const std::vector<std::string>& partition_tag_array,
int64_t query_id,
int64_t topk,
const std::string& extra_params,
TopKQueryResult& topk_query_result) {
try {
// step 1: convert vector id array
::milvus::grpc::SearchByIDParam search_param;
ConstructSearchParam(table_name,
partition_tag_array,
topk,
extra_params,
search_param);
search_param.set_id(query_id);
// step 2: search vectors
::milvus::grpc::TopKQueryResult result;
Status status = client_ptr_->SearchByID(search_param, result);
if (result.row_num() == 0) {
return status;
}
// step 4: convert result array
topk_query_result.reserve(result.row_num());
int64_t nq = result.row_num();
int64_t topk = result.ids().size() / nq;
for (int64_t i = 0; i < result.row_num(); i++) {
milvus::QueryResult one_result;
one_result.ids.resize(topk);
one_result.distances.resize(topk);
memcpy(one_result.ids.data(), result.ids().data() + topk * i, topk * sizeof(int64_t));
memcpy(one_result.distances.data(), result.distances().data() + topk * i, topk * sizeof(float));
topk_query_result.emplace_back(one_result);
}
return status;
} catch (std::exception& ex) {
return Status(StatusCode::UnknownError, "Failed to search vectors: " + std::string(ex.what()));
}
}
Status
ClientProxy::DescribeTable(const std::string& table_name, TableSchema& table_schema) {
try {

View File

@ -63,11 +63,6 @@ class ClientProxy : public Connection {
const std::vector<RowRecord>& query_record_array, int64_t topk, const std::string& extra_params,
TopKQueryResult& topk_query_result) override;
Status
SearchByID(const std::string& table_name, const std::vector<std::string>& partition_tag_array,
int64_t query_id, int64_t topk,
const std::string& extra_params, TopKQueryResult& topk_query_result) override;
Status
DescribeTable(const std::string& table_name, TableSchema& table_schema) override;

View File

@ -178,26 +178,6 @@ GrpcClient::Search(
return Status::OK();
}
Status
GrpcClient::SearchByID(const ::milvus::grpc::SearchByIDParam& search_param,
::milvus::grpc::TopKQueryResult& topk_query_result) {
::milvus::grpc::TopKQueryResult query_result;
ClientContext context;
::grpc::Status grpc_status = stub_->SearchByID(&context, search_param, &topk_query_result);
if (!grpc_status.ok()) {
std::cerr << "SearchByID rpc failed!" << std::endl;
std::cerr << grpc_status.error_message() << std::endl;
return Status(StatusCode::RPCFailed, grpc_status.error_message());
}
if (topk_query_result.status().error_code() != grpc::SUCCESS) {
std::cerr << topk_query_result.status().reason() << std::endl;
return Status(StatusCode::ServerFailed, topk_query_result.status().reason());
}
return Status::OK();
}
Status
GrpcClient::DescribeTable(const std::string& table_name, ::milvus::grpc::TableSchema& grpc_schema) {
ClientContext context;

View File

@ -59,9 +59,6 @@ class GrpcClient {
Status
Search(const grpc::SearchParam& search_param, ::milvus::grpc::TopKQueryResult& topk_query_result);
Status
SearchByID(const grpc::SearchByIDParam& search_param, ::milvus::grpc::TopKQueryResult& topk_query_result);
Status
DescribeTable(const std::string& table_name, grpc::TableSchema& grpc_schema);

View File

@ -334,24 +334,6 @@ class Connection {
const std::vector<RowRecord>& query_record_array, int64_t topk,
const std::string& extra_params, TopKQueryResult& topk_query_result) = 0;
/**
* @brief Search vector by ID
*
* This method is used to query vector in table.
*
* @param table_name, target table's name.
* @param partition_tag_array, target partitions, keep empty if no partition.
* @param query_id, vector id to be queried.
* @param topk, how many similarity vectors will be returned.
* @param extra_params, extra search parameters according to different index type, must be json format.
* @param topk_query_result, result array.
*
* @return Indicate if query is successful.
*/
virtual Status
SearchByID(const std::string& table_name, const PartitionTagList& partition_tag_array, int64_t query_id,
int64_t topk, const std::string& extra_params, TopKQueryResult& topk_query_result) = 0;
/**
* @brief Show table description
*

View File

@ -100,16 +100,6 @@ ConnectionImpl::Search(const std::string& table_name, const std::vector<std::str
return client_proxy_->Search(table_name, partition_tags, query_record_array, topk, extra_params, topk_query_result);
}
Status
ConnectionImpl::SearchByID(const std::string& table_name,
const std::vector<std::string>& partition_tags,
int64_t query_id,
int64_t topk,
const std::string& extra_params,
TopKQueryResult& topk_query_result) {
return client_proxy_->SearchByID(table_name, partition_tags, query_id, topk, extra_params, topk_query_result);
}
Status
ConnectionImpl::DescribeTable(const std::string& table_name, TableSchema& table_schema) {
return client_proxy_->DescribeTable(table_name, table_schema);

View File

@ -65,10 +65,6 @@ class ConnectionImpl : public Connection {
const std::vector<RowRecord>& query_record_array, int64_t topk,
const std::string& extra_params, TopKQueryResult& topk_query_result) override;
Status
SearchByID(const std::string& table_name, const std::vector<std::string>& partition_tag_array, int64_t query_id,
int64_t topk, const std::string& extra_params, TopKQueryResult& topk_query_result) override;
Status
DescribeTable(const std::string& table_name, TableSchema& table_schema) override;

View File

@ -30,7 +30,7 @@ requests-oauthlib==1.2.0
rsa==4.0
six==1.12.0
SQLAlchemy==1.3.5
urllib3==1.25.3
urllib3==1.25.8
jaeger-client>=3.4.0
grpcio-opentracing>=1.0
mock==2.0.0