Merge branch 'support-gpu-build' into jinhai

Former-commit-id: eb83a5a6a9379760bd83ba6c29f401b64682d9ce
This commit is contained in:
xj.lin 2019-05-06 10:25:03 +08:00
commit 96c64e0b40
7 changed files with 193 additions and 89 deletions

View File

@ -19,7 +19,7 @@ namespace vecwise {
namespace engine {
const std::string RawIndexType = "IDMap,Flat";
const std::string BuildIndexType = "IDMap,Flat";
const std::string BuildIndexType = "IVF"; // IDMap / IVF
FaissExecutionEngine::FaissExecutionEngine(uint16_t dimension, const std::string& location)

View File

@ -39,11 +39,11 @@ public:
virtual bool reset();
/**
* @brief Same as add, but stores xids instead of sequential ids.
*
* @param data input matrix, size n * d
* @param if ids is not empty ids for the std::vectors
*/
* @brief Same as add, but stores xids instead of sequential ids.
*
* @param data input matrix, size n * d
* @param if ids is not empty ids for the std::vectors
*/
virtual bool add_with_ids(idx_t n, const float *xdata, const long *xids);
/**
@ -57,23 +57,20 @@ public:
*/
virtual bool search(idx_t n, const float *data, idx_t k, float *distances, long *labels) const;
// virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location);
//virtual bool search(idx_t n, const std::vector<float> &data, idx_t k,
// std::vector<float> &distances, std::vector<float> &labels) const;
// virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove);
//virtual bool remove_ids(const faiss::IDSelector &sel, long &nremove, long &location);
//virtual bool remove_ids_range(const faiss::IDSelector &sel, long &nremove);
//virtual bool index_display();
// virtual bool index_display();
//
virtual std::shared_ptr<faiss::Index> data() { return index_; }
virtual const std::shared_ptr<faiss::Index>& data() const { return index_; }
private:
friend void write_index(const Index_ptr &index, const std::string &file_name);
std::shared_ptr<faiss::Index> index_ = nullptr;
// std::vector<faiss::gpu::GpuResources *> res_;
// std::vector<int> devs_;
// bool usegpu = true;
// int ngpus = 0;
// faiss::gpu::GpuMultipleClonerOptions *options = new faiss::gpu::GpuMultipleClonerOptions();
};

View File

@ -6,41 +6,54 @@
#include "mutex"
#include <faiss/gpu/StandardGpuResources.h>
#include "faiss/gpu/GpuIndexIVFFlat.h"
#include "faiss/gpu/GpuAutoTune.h"
#include "faiss/IndexFlat.h"
#include "IndexBuilder.h"
namespace zilliz {
namespace vecwise {
namespace engine {
using std::vector;
// todo(linxj): use ResourceMgr instead
static std::mutex gpu_resource;
static std::mutex cpu_resource;
IndexBuilder::IndexBuilder(const Operand_ptr &opd) {
opd_ = opd;
}
// Default: build use gpu
Index_ptr IndexBuilder::build_all(const long &nb,
const float* xb,
const long* ids,
const float *xb,
const long *ids,
const long &nt,
const float* xt) {
std::shared_ptr<faiss::Index> index = nullptr;
index.reset(faiss::index_factory(opd_->d, opd_->index_type.c_str()));
const float *xt) {
std::shared_ptr<faiss::Index> host_index = nullptr;
{
// currently only cpu resources are used.
std::lock_guard<std::mutex> lk(cpu_resource);
if (!index->is_trained) {
nt == 0 || xt == nullptr ? index->train(nb, xb)
: index->train(nt, xt);
// TODO: list support index-type.
faiss::Index *ori_index = faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str());
std::lock_guard<std::mutex> lk(gpu_resource);
faiss::gpu::StandardGpuResources res;
auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index);
if (!device_index->is_trained) {
nt == 0 || xt == nullptr ? device_index->train(nb, xb)
: device_index->train(nt, xt);
}
index->add_with_ids(nb, xb, ids); // todo(linxj): support add_with_idmap
device_index->add_with_ids(nb, xb, ids); // TODO: support with add_with_IDMAP
host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index));
delete device_index;
delete ori_index;
}
return std::make_shared<Index>(index);
return std::make_shared<Index>(host_index);
}
Index_ptr IndexBuilder::build_all(const long &nb, const vector<float> &xb,
@ -49,8 +62,32 @@ Index_ptr IndexBuilder::build_all(const long &nb, const vector<float> &xb,
return build_all(nb, xb.data(), ids.data(), nt, xt.data());
}
// Be Factory pattern later
BgCpuBuilder::BgCpuBuilder(const zilliz::vecwise::engine::Operand_ptr &opd) : IndexBuilder(opd) {};
Index_ptr BgCpuBuilder::build_all(const long &nb, const float *xb, const long *ids, const long &nt, const float *xt) {
std::shared_ptr<faiss::Index> index = nullptr;
index.reset(faiss::index_factory(opd_->d, opd_->get_index_type(nb).c_str()));
{
std::lock_guard<std::mutex> lk(cpu_resource);
if (!index->is_trained) {
nt == 0 || xt == nullptr ? index->train(nb, xb)
: index->train(nt, xt);
}
index->add_with_ids(nb, xb, ids);
}
return std::make_shared<Index>(index);
}
// TODO: Be Factory pattern later
IndexBuilderPtr GetIndexBuilder(const Operand_ptr &opd) {
if (opd->index_type == "IDMap") {
// TODO: fix hardcode
IndexBuilderPtr index = nullptr;
return std::make_shared<BgCpuBuilder>(opd);
}
return std::make_shared<IndexBuilder>(opd);
}

View File

@ -11,25 +11,26 @@
#include "Operand.h"
#include "Index.h"
namespace zilliz {
namespace vecwise {
namespace engine {
class IndexBuilder {
public:
public:
explicit IndexBuilder(const Operand_ptr &opd);
Index_ptr build_all(const long &nb,
const float* xb,
const long* ids,
const long &nt = 0,
const float* xt = nullptr);
virtual Index_ptr build_all(const long &nb,
const float *xb,
const long *ids,
const long &nt = 0,
const float *xt = nullptr);
Index_ptr build_all(const long &nb,
const std::vector<float> &xb,
const std::vector<long> &ids,
const long &nt = 0,
const std::vector<float> &xt = std::vector<float>());
virtual Index_ptr build_all(const long &nb,
const std::vector<float> &xb,
const std::vector<long> &ids,
const long &nt = 0,
const std::vector<float> &xt = std::vector<float>());
void train(const long &nt,
const std::vector<float> &xt);
@ -41,9 +42,19 @@ public:
void set_build_option(const Operand_ptr &opd);
private:
protected:
Operand_ptr opd_ = nullptr;
// std::shared_ptr<faiss::Index> index_ = nullptr;
};
class BgCpuBuilder : public IndexBuilder {
public:
BgCpuBuilder(const Operand_ptr &opd);
virtual Index_ptr build_all(const long &nb,
const float *xb,
const long *ids,
const long &nt = 0,
const float *xt = nullptr) override;
};
using IndexBuilderPtr = std::shared_ptr<IndexBuilder>;

View File

@ -6,16 +6,59 @@
#include "Operand.h"
namespace zilliz {
namespace vecwise {
namespace engine {
using std::string;
enum IndexType {
Invalid_Option = 0,
IVF = 1,
IDMAP = 2
};
IndexType resolveIndexType(const string &index_type) {
if (index_type == "IVF") { return IndexType::IVF; }
if (index_type == "IDMap") { return IndexType::IDMAP; }
return IndexType::Invalid_Option;
}
// nb at least 100
string Operand::get_index_type(const int &nb) {
if (!index_str.empty()) { return index_str; }
// TODO: support OPQ or ...
if (!preproc.empty()) { index_str += (preproc + ","); }
switch (resolveIndexType(index_type)) {
case Invalid_Option: {
// TODO: add exception
break;
}
case IVF: {
index_str += (ncent != 0 ? index_type + std::to_string(ncent) :
index_type + std::to_string(int(nb / 1000000.0 * 16384)));
break;
}
case IDMAP: {
index_str += index_type;
break;
}
}
// TODO: support PQ or ...
if (!postproc.empty()) { index_str += ("," + postproc); }
return index_str;
}
std::ostream &operator<<(std::ostream &os, const Operand &obj) {
os << obj.d << " "
<< obj.index_type << " "
<< obj.metric_type << " "
<< obj.preproc << " "
<< obj.postproc << " "
<< obj.metric_type << " "
<< obj.ncent;
return os;
}
@ -23,16 +66,16 @@ std::ostream &operator<<(std::ostream &os, const Operand &obj) {
std::istream &operator>>(std::istream &is, Operand &obj) {
is >> obj.d
>> obj.index_type
>> obj.metric_type
>> obj.preproc
>> obj.postproc
>> obj.metric_type
>> obj.ncent;
return is;
}
std::string operand_to_str(const Operand_ptr &opd) {
std::ostringstream ss;
ss << opd;
ss << *opd;
return ss.str();
}

View File

@ -11,6 +11,7 @@
#include <iostream>
#include <sstream>
namespace zilliz {
namespace vecwise {
namespace engine {
@ -21,11 +22,14 @@ struct Operand {
friend std::istream &operator>>(std::istream &is, Operand &obj);
int d;
std::string index_type = "IVF13864,Flat";
std::string index_type = "IVF";
std::string metric_type = "L2"; //> L2 / IP(Inner Product)
std::string preproc;
std::string postproc;
std::string metric_type = "L2"; // L2 / Inner Product
int ncent;
std::string postproc = "Flat";
std::string index_str;
int ncent = 0;
std::string get_index_type(const int &nb);
};
using Operand_ptr = std::shared_ptr<Operand>;

View File

@ -14,11 +14,21 @@ using namespace zilliz::vecwise::engine;
TEST(operand_test, Wrapper_Test) {
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF16384,Flat";
opd->d = 256;
using std::cout;
using std::endl;
std::cout << opd << std::endl;
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF";
opd->preproc = "OPQ";
opd->postproc = "PQ";
opd->metric_type = "L2";
opd->d = 64;
auto opd_str = operand_to_str(opd);
auto new_opd = str_to_operand(opd_str);
// TODO: fix all place where using opd to build index.
assert(new_opd->get_index_type(10000) == opd->get_index_type(10000));
}
TEST(build_test, Wrapper_Test) {
@ -46,7 +56,7 @@ TEST(build_test, Wrapper_Test) {
//train the index
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF16,Flat";
opd->index_type = "IVF";
opd->d = d;
opd->ncent = ncentroids;
IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
@ -68,59 +78,61 @@ TEST(build_test, Wrapper_Test) {
//search in first quadrant
int nq = 1, k = 10;
std::vector<float> xq = {0.5, 0.5, 0.5};
float* result_dists = new float[k];
long* result_ids = new long[k];
float *result_dists = new float[k];
long *result_ids = new long[k];
index_1->search(nq, xq.data(), k, result_dists, result_ids);
for(int i = 0; i < k; i++) {
if(result_ids[i] < 0) {
for (int i = 0; i < k; i++) {
if (result_ids[i] < 0) {
ASSERT_TRUE(false);
break;
}
long id = result_ids[i];
std::cout << "No." << id << " [" << xb[id*3] << ", " << xb[id*3 + 1] << ", "
<< xb[id*3 + 2] <<"] distance = " << result_dists[i] << std::endl;
std::cout << "No." << id << " [" << xb[id * 3] << ", " << xb[id * 3 + 1] << ", "
<< xb[id * 3 + 2] << "] distance = " << result_dists[i] << std::endl;
//makesure result vector is in first quadrant
ASSERT_TRUE(xb[id*3] > 0.0);
ASSERT_TRUE(xb[id*3 + 1] > 0.0);
ASSERT_TRUE(xb[id*3 + 2] > 0.0);
ASSERT_TRUE(xb[id * 3] > 0.0);
ASSERT_TRUE(xb[id * 3 + 1] > 0.0);
ASSERT_TRUE(xb[id * 3 + 2] > 0.0);
}
delete[] result_dists;
delete[] result_ids;
}
TEST(search_test, Wrapper_Test) {
const int dim = 256;
TEST(gpu_build_test, Wrapper_Test) {
using std::vector;
size_t nb = 25000;
size_t nq = 100;
size_t k = 100;
std::vector<float> xb(nb*dim);
std::vector<float> xq(nq*dim);
std::vector<long> ids(nb*dim);
int d = 256;
int nb = 3 * 1000 * 100;
int nq = 100;
vector<float> xb(d * nb);
vector<float> xq(d * nq);
vector<long> ids(nb);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
for (size_t i = 0; i < nb*dim; i++) {
xb[i] = dis_xt(gen);
ids[i] = i;
}
for (size_t i = 0; i < nq*dim; i++) {
xq[i] = dis_xt(gen);
}
for (auto &e : xb) { e = float(dis_xt(gen)); }
for (auto &e : xq) { e = float(dis_xt(gen)); }
for (int i = 0; i < nb; ++i) { ids[i] = i; }
// result data
std::vector<long> nns_gt(nq*k); // nns = nearst neg search
std::vector<long> nns(nq*k);
std::vector<float> dis_gt(nq*k);
std::vector<float> dis(nq*k);
faiss::Index* index_gt(faiss::index_factory(dim, "IDMap,Flat"));
index_gt->add_with_ids(nb, xb.data(), ids.data());
index_gt->search(nq, xq.data(), 10, dis_gt.data(), nns_gt.data());
std::cout << "data: " << nns_gt[0];
auto opd = std::make_shared<Operand>();
opd->index_type = "IVF";
opd->d = d;
opd->ncent = 256;
IndexBuilderPtr index_builder_1 = GetIndexBuilder(opd);
auto index_1 = index_builder_1->build_all(nb, xb.data(), ids.data());
assert(index_1->ntotal == nb);
assert(index_1->dim == d);
// sanity check: search 5 first vectors of xb
int k = 1;
vector<long> I(5 * k);
vector<float> D(5 * k);
index_1->search(5, xb.data(), k, D.data(), I.data());
for (int i = 0; i < 5; ++i) { assert(i == I[i]); }
}