groot 0f1aa5f8bb Tanimoto distance (#1016)
* Add log to debug #678

* Rename nsg_mix to RNSG in C++ sdk #735

* [skip ci] change __function__

* clang-format

* #766 If partition tag is similar, wrong partition is searched

* #766 If partition tag is similar, wrong partition is searched

* reorder changelog id

* typo

* define interface

* Define interface (#832)

* If partition tag is similar, wrong partition is searched  (#825)

* #766 If partition tag is similar, wrong partition is searched

* #766 If partition tag is similar, wrong partition is searched

* reorder changelog id

* typo

* define interface Attach files by dragging & dropping, selecting or pasting them. 

Co-authored-by: groot <yihua.mo@zilliz.com>

* faiss & knowhere

* faiss & knowhere (#842)

* Add log to debug #678

* Rename nsg_mix to RNSG in C++ sdk #735

* [skip ci] change __function__

* clang-format

* If partition tag is similar, wrong partition is searched  (#825)

* #766 If partition tag is similar, wrong partition is searched

* #766 If partition tag is similar, wrong partition is searched

* reorder changelog id

* typo

* faiss & knowhere

Co-authored-by: groot <yihua.mo@zilliz.com>

* support binary input

* code lint

* add wrapper interface

* add knowhere unittest

* sdk support binary

* support using metric tanimoto and hamming

* sdk binary insert/query example

* fix bug

* fix bug

* update wrapper

* format

* Improve unittest and fix bugs

* delete printresult

* fix bug

* #823 Support binary vector tanimoto metric

* fix typo

* dimension limit to 32768

* fix

* dimension limit to 32768

* fix describe index bug

* fix #886

* fix #889

* add jaccard cases

* hamming dev-test case

* change test_connect

* Add tanimoto cases

* change the output type of hamming

* add abs

* merge master

* rearrange changelog id

* modify feature description

Co-authored-by: Yukikaze-CZR <48198922+Yukikaze-CZR@users.noreply.github.com>
Co-authored-by: Tinkerrr <linxiaojun.cn@outlook.com>
2020-01-14 19:22:27 +08:00

198 lines
6.8 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <gtest/gtest.h>
#include <memory>
#include <vector>
#include <cstdlib>
#include <cstdio>
#include <fstream>
#include "wrapper/VecIndex.h"
#include "wrapper/utils.h"
#include "knowhere/index/vector_index/helpers/IndexParameter.h"
#include "wrapper/ConfAdapterMgr.h"
#include "wrapper/ConfAdapter.h"
class DataGenBase;
using DataGenPtr = std::shared_ptr<DataGenBase>;
constexpr int64_t DIM = 128;
constexpr int64_t NB = 100000;
constexpr int64_t NQ = 10;
constexpr int64_t DEVICEID = 0;
constexpr int64_t PINMEM = 1024 * 1024 * 200;
constexpr int64_t TEMPMEM = 1024 * 1024 * 300;
constexpr int64_t RESNUM = 2;
static const char *CONFIG_PATH = "/tmp/milvus_test";
static const char *CONFIG_FILE = "/server_config.yaml";
class KnowhereTest : public ::testing::Test {
protected:
void SetUp() override;
void TearDown() override;
};
class DataGenBase {
public:
virtual void GenData(const int& dim, const int& nb, const int& nq, float* xb, float* xq, int64_t* ids,
const int& k, int64_t* gt_ids, float* gt_dis);
virtual void GenData(const int& dim,
const int& nb,
const int& nq,
std::vector<float>& xb,
std::vector<float>& xq,
std::vector<int64_t>& ids,
const int& k,
std::vector<int64_t>& gt_ids,
std::vector<float>& gt_dis);
void AssertResult(const std::vector<int64_t>& ids, const std::vector<float>& dis);
int dim = DIM;
int nb = NB;
int nq = NQ;
int k = 10;
std::vector<float> xb;
std::vector<float> xq;
std::vector<int64_t> ids;
// Ground Truth
std::vector<int64_t> gt_ids;
std::vector<float> gt_dis;
};
class BinDataGen {
public:
virtual void GenData(const int& dim, const int& nb, const int& nq, uint8_t* xb, uint8_t* xq, int64_t* ids,
const int& k, int64_t* gt_ids, float* gt_dis);
virtual void GenData(const int& dim,
const int& nb,
const int& nq,
std::vector<uint8_t>& xb,
std::vector<uint8_t>& xq,
std::vector<int64_t>& ids,
const int& k,
std::vector<int64_t>& gt_ids,
std::vector<float>& gt_dis);
void AssertResult(const std::vector<int64_t>& ids, const std::vector<float>& dis);
void Generate(const int& dim, const int& nb, const int& nq, const int& k);
int dim = DIM;
int nb = NB;
int nq = NQ;
int k = 10;
std::vector<uint8_t> xb;
std::vector<uint8_t> xq;
std::vector<int64_t> ids;
// Ground Truth
std::vector<int64_t> gt_ids;
std::vector<float> gt_dis;
};
class ParamGenerator {
public:
static ParamGenerator& GetInstance() {
static ParamGenerator instance;
return instance;
}
knowhere::Config
GenSearchConf(const milvus::engine::IndexType& type, const milvus::engine::TempMetaConf& conf) {
auto adapter = milvus::engine::AdapterMgr::GetInstance().GetAdapter(type);
return adapter->MatchSearch(conf, type);
}
knowhere::Config
GenBuild(const milvus::engine::IndexType& type, const milvus::engine::TempMetaConf& conf) {
auto adapter = milvus::engine::AdapterMgr::GetInstance().GetAdapter(type);
return adapter->Match(conf);
}
knowhere::Config
Gen(const milvus::engine::IndexType& type) {
switch (type) {
case milvus::engine::IndexType::FAISS_IDMAP: {
auto tempconf = std::make_shared<knowhere::Cfg>();
tempconf->metric_type = knowhere::METRICTYPE::L2;
return tempconf;
}
case milvus::engine::IndexType::FAISS_IVFFLAT_CPU:
case milvus::engine::IndexType::FAISS_IVFFLAT_GPU:
case milvus::engine::IndexType::FAISS_IVFFLAT_MIX: {
auto tempconf = std::make_shared<knowhere::IVFCfg>();
tempconf->nlist = 100;
tempconf->nprobe = 16;
tempconf->metric_type = knowhere::METRICTYPE::L2;
return tempconf;
}
case milvus::engine::IndexType::FAISS_IVFSQ8_HYBRID:
case milvus::engine::IndexType::FAISS_IVFSQ8_CPU:
case milvus::engine::IndexType::FAISS_IVFSQ8_GPU:
case milvus::engine::IndexType::FAISS_IVFSQ8_MIX: {
auto tempconf = std::make_shared<knowhere::IVFSQCfg>();
tempconf->nlist = 100;
tempconf->nprobe = 16;
tempconf->nbits = 8;
tempconf->metric_type = knowhere::METRICTYPE::L2;
return tempconf;
}
case milvus::engine::IndexType::FAISS_IVFPQ_CPU:
case milvus::engine::IndexType::FAISS_IVFPQ_GPU:
case milvus::engine::IndexType::FAISS_IVFPQ_MIX: {
auto tempconf = std::make_shared<knowhere::IVFPQCfg>();
tempconf->nlist = 100;
tempconf->nprobe = 16;
tempconf->nbits = 8;
tempconf->m = 8;
tempconf->metric_type = knowhere::METRICTYPE::L2;
return tempconf;
}
case milvus::engine::IndexType::NSG_MIX: {
auto tempconf = std::make_shared<knowhere::NSGCfg>();
tempconf->nlist = 100;
tempconf->nprobe = 16;
tempconf->search_length = 8;
tempconf->knng = 200;
tempconf->search_length = 40; // TODO(linxj): be 20 when search
tempconf->out_degree = 60;
tempconf->candidate_pool_size = 200;
tempconf->metric_type = knowhere::METRICTYPE::L2;
return tempconf;
}
}
}
};
//class SanityCheck : public DataGenBase {
// public:
// void GenData(const int &dim, const int &nb, const int &nq, float *xb, float *xq, long *ids,
// const int &k, long *gt_ids, float *gt_dis) override;
//};