milvus/core/unittest/db/test_delete.cpp
groot a08b51c2b6
merge json to master to get docker image (#1500)
* General proto api for NNS libraries

Signed-off-by: groot <yihua.mo@zilliz.com>

* refactor confadapter

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* fix unittest failures

Signed-off-by: groot <yihua.mo@zilliz.com>

* update test_add

Signed-off-by: zhenwu <zw@zilliz.com>

* update knowhere

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* update test cases

Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au>

* Update cases

* C++ sdk for json parameters

Signed-off-by: groot <yihua.mo@zilliz.com>

* update unittest

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* fix unittest failures

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix case

Signed-off-by: del-zhenwu <zw@zilliz.com>

* modify test_index.py

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* update

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* update sptag

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* update...

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* Build Pass

Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com>

* knowhere/wrapper ut pass

Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com>

* update util

Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au>

* fix wal case

Signed-off-by: del-zhenwu <zw@zilliz.com>

* modify test_search_vectors

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* update ci

Signed-off-by: del-zhenwu <zw@zilliz.com>

* update util

Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au>

* modify test_search_vectoes

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* add hnsw in http module & modify index apis

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* modify search in http module

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* fix build error

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix typo in test_index and test_search

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* update...

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* index apis in http module done

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* fix build index bug

Signed-off-by: groot <yihua.mo@zilliz.com>

* search apis unittest pass

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* web test pass

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* update confadapter

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* update util

Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au>

* code format

Signed-off-by: groot <yihua.mo@zilliz.com>

* code format

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix vectors results bug (fix #1476)

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* clang format

Signed-off-by: Yhz <yinghao.zou@zilliz.com>

* update test

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* fix unittest

Signed-off-by: groot <yihua.mo@zilliz.com>

* add test_config

Signed-off-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au>

* add log

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix a build error

Signed-off-by: groot <yihua.mo@zilliz.com>

* add invalid param search test

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* fix range check

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* cpmpact/flush case passed

Signed-off-by: del-zhenwu <zhenxiang.li@zilliz.com>

* fix unittest failures

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix unittest failures

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix unittest failures

Signed-off-by: groot <yihua.mo@zilliz.com>

* validate json parameters in request

Signed-off-by: groot <yihua.mo@zilliz.com>

* add unittest cases

Signed-off-by: groot <yihua.mo@zilliz.com>

* update test index/search

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* update test_config

Signed-off-by: sahuang <xiaohaix@student.unimelb.edu.au>

* fix

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* support nsg and ivf-nlist

Signed-off-by: Nicky <nicky.xj.lin@gmail.com>

* update

Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com>

* fix validation bug

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix python test bug

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix python test bug

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix python test bug

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix python test bug

Signed-off-by: groot <yihua.mo@zilliz.com>

* code format

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix python test failure

Signed-off-by: groot <yihua.mo@zilliz.com>

* remove rnsg cases

Signed-off-by: zhenwu <zw@zilliz.com>

* fix python test failure

Signed-off-by: groot <yihua.mo@zilliz.com>

* Update changelog

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* Fix typo

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* add pq to test_index && multithread test

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* add pq to test_search

Signed-off-by: shengjh <jianghong.sheng@zilliz.com>

* Fix format

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* Update CHANGELOG

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* Fix compiling error

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* Fix compiling error

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* fix config bug

Signed-off-by: groot <yihua.mo@zilliz.com>

* code format

Signed-off-by: groot <yihua.mo@zilliz.com>

* fix config test

Signed-off-by: xiaojun.lin <xiaojun.lin@zilliz.com>

* Update CHANGELOG.md

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* Update CHANGELOG.md

Signed-off-by: JinHai-CN <hai.jin@zilliz.com>

* disable config test case

Signed-off-by: zhenwu <zw@zilliz.com>

Co-authored-by: Nicky <nicky.xj.lin@gmail.com>
Co-authored-by: zhenwu <zw@zilliz.com>
Co-authored-by: Xiaohai Xu <xiaohaix@student.unimelb.edu.au>
Co-authored-by: shengjh <jianghong.sheng@zilliz.com>
Co-authored-by: xiaojun.lin <xiaojun.lin@zilliz.com>
Co-authored-by: Yhz <yinghao.zou@zilliz.com>
Co-authored-by: del-zhenwu <zhenxiang.li@zilliz.com>
Co-authored-by: JinHai-CN <hai.jin@zilliz.com>
2020-03-07 15:23:34 +08:00

674 lines
22 KiB
C++

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
#include <boost/filesystem.hpp>
#include <chrono>
#include <cmath>
#include <fstream>
#include <iostream>
#include <limits>
#include <random>
#include <thread>
#include "db/Constants.h"
#include "db/Utils.h"
#include "db/engine/EngineFactory.h"
#include "db/insert/MemTable.h"
#include "db/insert/MemTableFile.h"
#include "db/insert/VectorSource.h"
#include "db/meta/MetaConsts.h"
#include "db/utils.h"
#include "gtest/gtest.h"
#include "metrics/Metrics.h"
namespace {
static constexpr int64_t TABLE_DIM = 256;
std::string
GetTableName() {
auto now = std::chrono::system_clock::now();
auto micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
static std::string table_name = std::to_string(micros);
return table_name;
}
milvus::engine::meta::TableSchema
BuildTableSchema() {
milvus::engine::meta::TableSchema table_info;
table_info.dimension_ = TABLE_DIM;
table_info.table_id_ = GetTableName();
table_info.metric_type_ = (int32_t)milvus::engine::MetricType::L2;
table_info.engine_type_ = (int)milvus::engine::EngineType::FAISS_IDMAP;
return table_info;
}
void
BuildVectors(uint64_t n, milvus::engine::VectorsData& vectors) {
vectors.vector_count_ = n;
vectors.float_data_.clear();
vectors.float_data_.resize(n * TABLE_DIM);
float* data = vectors.float_data_.data();
for (int i = 0; i < n; i++) {
for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
}
}
} // namespace
TEST_F(DeleteTest, delete_in_mem) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 100000;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
for (int64_t i = 0; i < nb; i++) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int64_t> dis(0, nb - 1);
int64_t num_query = 10;
std::map<int64_t, milvus::engine::VectorsData> search_vectors;
for (int64_t i = 0; i < num_query; ++i) {
int64_t index = dis(gen);
milvus::engine::VectorsData search;
search.vector_count_ = 1;
for (int64_t j = 0; j < TABLE_DIM; j++) {
search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]);
}
search_vectors.insert(std::make_pair(xb.id_array_[index], search));
}
milvus::engine::IDNumbers ids_to_delete;
for (auto& kv : search_vectors) {
ids_to_delete.emplace_back(kv.first);
}
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
ASSERT_TRUE(stat.ok());
// std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb - search_vectors.size());
int topk = 10, nprobe = 10;
for (auto& pair : search_vectors) {
auto& search = pair.second;
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances);
ASSERT_NE(result_ids[0], pair.first);
// ASSERT_LT(result_distances[0], 1e-4);
ASSERT_GT(result_distances[0], 1);
}
}
TEST_F(DeleteTest, delete_on_disk) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 100000;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
for (int64_t i = 0; i < nb; i++) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int64_t> dis(0, nb - 1);
int64_t num_query = 10;
std::map<int64_t, milvus::engine::VectorsData> search_vectors;
for (int64_t i = 0; i < num_query; ++i) {
int64_t index = dis(gen);
milvus::engine::VectorsData search;
search.vector_count_ = 1;
for (int64_t j = 0; j < TABLE_DIM; j++) {
search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]);
}
search_vectors.insert(std::make_pair(xb.id_array_[index], search));
}
// std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
for (auto& kv : search_vectors) {
stat = db_->DeleteVector(GetTableName(), kv.first);
ASSERT_TRUE(stat.ok());
}
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb - search_vectors.size());
int topk = 10, nprobe = 10;
for (auto& pair : search_vectors) {
auto& search = pair.second;
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances);
ASSERT_NE(result_ids[0], pair.first);
// ASSERT_LT(result_distances[0], 1e-4);
ASSERT_GT(result_distances[0], 1);
}
}
TEST_F(DeleteTest, delete_multiple_times) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 100000;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
for (int64_t i = 0; i < nb; i++) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int64_t> dis(0, nb - 1);
int64_t num_query = 10;
std::map<int64_t, milvus::engine::VectorsData> search_vectors;
for (int64_t i = 0; i < num_query; ++i) {
int64_t index = dis(gen);
milvus::engine::VectorsData search;
search.vector_count_ = 1;
for (int64_t j = 0; j < TABLE_DIM; j++) {
search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]);
}
search_vectors.insert(std::make_pair(xb.id_array_[index], search));
}
// std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
int topk = 10, nprobe = 10;
for (auto& pair : search_vectors) {
std::vector<int64_t> to_delete{pair.first};
stat = db_->DeleteVectors(GetTableName(), to_delete);
ASSERT_TRUE(stat.ok());
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
auto& search = pair.second;
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances);
ASSERT_NE(result_ids[0], pair.first);
// ASSERT_LT(result_distances[0], 1e-4);
ASSERT_GT(result_distances[0], 1);
}
}
TEST_F(DeleteTest, delete_with_index) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFFLAT;
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 10000;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
for (int64_t i = 0; i < nb; i++) {
xb.id_array_.push_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int64_t> dis(0, nb - 1);
int64_t num_query = 10;
std::map<int64_t, milvus::engine::VectorsData> search_vectors;
for (int64_t i = 0; i < num_query; ++i) {
int64_t index = dis(gen);
milvus::engine::VectorsData search;
search.vector_count_ = 1;
for (int64_t j = 0; j < TABLE_DIM; j++) {
search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]);
}
search_vectors.insert(std::make_pair(xb.id_array_[index], search));
}
milvus::engine::TableIndex index;
index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8;
index.extra_params_ = {{"nlist", 100}};
stat = db_->CreateIndex(GetTableName(), index);
ASSERT_TRUE(stat.ok());
// std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
milvus::engine::IDNumbers ids_to_delete;
for (auto& kv : search_vectors) {
ids_to_delete.emplace_back(kv.first);
}
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb - ids_to_delete.size());
int topk = 10, nprobe = 10;
for (auto& pair : search_vectors) {
auto& search = pair.second;
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->Query(dummy_context_, GetTableName(), tags, topk, nprobe, search, result_ids, result_distances);
ASSERT_NE(result_ids[0], pair.first);
// ASSERT_LT(result_distances[0], 1e-4);
ASSERT_GT(result_distances[0], 1);
}
}
TEST_F(DeleteTest, delete_single_vector) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 1;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
// std::this_thread::sleep_for(std::chrono::seconds(3)); // ensure raw data write to disk
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
stat = db_->DeleteVectors(GetTableName(), xb.id_array_);
ASSERT_TRUE(stat.ok());
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, 0);
const int topk = 1, nprobe = 1;
milvus::json json_params = {{"nprobe", nprobe}};
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat = db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, xb, result_ids, result_distances);
ASSERT_TRUE(result_ids.empty());
ASSERT_TRUE(result_distances.empty());
// ASSERT_EQ(result_ids[0], -1);
// ASSERT_LT(result_distances[0], 1e-4);
// ASSERT_EQ(result_distances[0], std::numeric_limits<float>::max());
}
TEST_F(DeleteTest, delete_add_create_index) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 3000;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
// stat = db_->Flush();
// ASSERT_TRUE(stat.ok());
milvus::engine::TableIndex index;
index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8;
index.extra_params_ = {{"nlist", 100}};
stat = db_->CreateIndex(GetTableName(), index);
ASSERT_TRUE(stat.ok());
std::vector<milvus::engine::IDNumber> ids_to_delete;
ids_to_delete.emplace_back(xb.id_array_.front());
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
ASSERT_TRUE(stat.ok());
milvus::engine::VectorsData xb2 = xb;
xb2.id_array_.clear(); // same vector, different id
stat = db_->InsertVectors(GetTableName(), "", xb2);
ASSERT_TRUE(stat.ok());
// stat = db_->Flush();
// ASSERT_TRUE(stat.ok());
stat = db_->CreateIndex(GetTableName(), index);
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb * 2 - 1);
const int topk = 10, nprobe = 10;
milvus::json json_params = {{"nprobe", nprobe}};
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
milvus::engine::VectorsData qb = xb;
qb.float_data_.resize(TABLE_DIM);
qb.vector_count_ = 1;
qb.id_array_.clear();
stat = db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, qb, result_ids, result_distances);
ASSERT_EQ(result_ids[0], xb2.id_array_.front());
ASSERT_LT(result_distances[0], 1e-4);
result_ids.clear();
result_distances.clear();
stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, ids_to_delete.front(), result_ids,
result_distances);
ASSERT_EQ(result_ids[0], -1);
ASSERT_EQ(result_distances[0], std::numeric_limits<float>::max());
}
TEST_F(DeleteTest, delete_add_auto_flush) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 3000;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
std::this_thread::sleep_for(std::chrono::seconds(2));
// stat = db_->Flush();
// ASSERT_TRUE(stat.ok());
// milvus::engine::TableIndex index;
// index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8;
// stat = db_->CreateIndex(GetTableName(), index);
// ASSERT_TRUE(stat.ok());
std::vector<milvus::engine::IDNumber> ids_to_delete;
ids_to_delete.emplace_back(xb.id_array_.front());
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
ASSERT_TRUE(stat.ok());
milvus::engine::VectorsData xb2 = xb;
xb2.id_array_.clear(); // same vector, different id
stat = db_->InsertVectors(GetTableName(), "", xb2);
ASSERT_TRUE(stat.ok());
std::this_thread::sleep_for(std::chrono::seconds(2));
// stat = db_->Flush();
// ASSERT_TRUE(stat.ok());
// stat = db_->CreateIndex(GetTableName(), index);
// ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb * 2 - 1);
const int topk = 10, nprobe = 10;
milvus::json json_params = {{"nprobe", nprobe}};
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
milvus::engine::VectorsData qb = xb;
qb.float_data_.resize(TABLE_DIM);
qb.vector_count_ = 1;
qb.id_array_.clear();
stat = db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, qb, result_ids, result_distances);
ASSERT_EQ(result_ids[0], xb2.id_array_.front());
ASSERT_LT(result_distances[0], 1e-4);
result_ids.clear();
result_distances.clear();
stat = db_->QueryByID(dummy_context_, GetTableName(), tags, topk, nprobe, ids_to_delete.front(), result_ids,
result_distances);
ASSERT_EQ(result_ids[0], -1);
ASSERT_EQ(result_distances[0], std::numeric_limits<float>::max());
}
TEST_F(CompactTest, compact_basic) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 100;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
std::vector<milvus::engine::IDNumber> ids_to_delete;
ids_to_delete.emplace_back(xb.id_array_.front());
ids_to_delete.emplace_back(xb.id_array_.back());
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
ASSERT_TRUE(stat.ok());
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb - 2);
stat = db_->Compact(GetTableName());
ASSERT_TRUE(stat.ok());
const int topk = 1, nprobe = 1;
milvus::json json_params = {{"nprobe", nprobe}};
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
milvus::engine::VectorsData qb = xb;
for (auto& id : ids_to_delete) {
stat =
db_->QueryByID(dummy_context_, GetTableName(), tags, topk, json_params, id, result_ids, result_distances);
ASSERT_EQ(result_ids[0], -1);
ASSERT_EQ(result_distances[0], std::numeric_limits<float>::max());
}
}
TEST_F(CompactTest, compact_with_index) {
milvus::engine::meta::TableSchema table_info = BuildTableSchema();
table_info.index_file_size_ = milvus::engine::ONE_KB;
table_info.engine_type_ = (int32_t)milvus::engine::EngineType::FAISS_IVFSQ8;
auto stat = db_->CreateTable(table_info);
milvus::engine::meta::TableSchema table_info_get;
table_info_get.table_id_ = GetTableName();
stat = db_->DescribeTable(table_info_get);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
int64_t nb = 3000;
milvus::engine::VectorsData xb;
BuildVectors(nb, xb);
xb.id_array_.clear();
for (int64_t i = 0; i < nb; i++) {
xb.id_array_.emplace_back(i);
}
stat = db_->InsertVectors(GetTableName(), "", xb);
ASSERT_TRUE(stat.ok());
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int64_t> dis(0, nb - 1);
int64_t num_query = 10;
std::map<int64_t, milvus::engine::VectorsData> search_vectors;
for (int64_t i = 0; i < num_query; ++i) {
int64_t index = dis(gen);
milvus::engine::VectorsData search;
search.vector_count_ = 1;
for (int64_t j = 0; j < TABLE_DIM; j++) {
search.float_data_.push_back(xb.float_data_[index * TABLE_DIM + j]);
}
search_vectors.insert(std::make_pair(xb.id_array_[index], search));
}
milvus::engine::TableIndex index;
index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8;
stat = db_->CreateIndex(GetTableName(), index);
ASSERT_TRUE(stat.ok());
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
milvus::engine::IDNumbers ids_to_delete;
for (auto& kv : search_vectors) {
ids_to_delete.emplace_back(kv.first);
}
stat = db_->DeleteVectors(GetTableName(), ids_to_delete);
stat = db_->Flush();
ASSERT_TRUE(stat.ok());
uint64_t row_count;
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb - ids_to_delete.size());
stat = db_->Compact(GetTableName());
ASSERT_TRUE(stat.ok());
stat = db_->GetTableRowCount(GetTableName(), row_count);
ASSERT_TRUE(stat.ok());
ASSERT_EQ(row_count, nb - ids_to_delete.size());
milvus::engine::TableIndex table_index;
stat = db_->DescribeIndex(GetTableName(), table_index);
ASSERT_TRUE(stat.ok());
ASSERT_FLOAT_EQ(table_index.engine_type_, index.engine_type_);
const int topk = 10, nprobe = 10;
milvus::json json_params = {{"nprobe", nprobe}};
for (auto& pair : search_vectors) {
auto& search = pair.second;
std::vector<std::string> tags;
milvus::engine::ResultIds result_ids;
milvus::engine::ResultDistances result_distances;
stat =
db_->Query(dummy_context_, GetTableName(), tags, topk, json_params, search, result_ids, result_distances);
ASSERT_NE(result_ids[0], pair.first);
// ASSERT_LT(result_distances[0], 1e-4);
ASSERT_GT(result_distances[0], 1);
}
}
TEST_F(CompactTest, compact_non_existing_table) {
auto status = db_->Compact("non_existing_table");
ASSERT_FALSE(status.ok());
}