diff --git a/cpp/build-support/lint_exclusions.txt b/cpp/build-support/lint_exclusions.txt index b9c1eaa760..6888dfa32c 100644 --- a/cpp/build-support/lint_exclusions.txt +++ b/cpp/build-support/lint_exclusions.txt @@ -5,5 +5,4 @@ *thirdparty* *easylogging++* *SqliteMetaImpl.cpp -*src/grpc* -*src/core* \ No newline at end of file +*src/grpc* \ No newline at end of file diff --git a/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.cpp b/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.cpp index c07cb16fca..bc1dfbf13d 100644 --- a/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.cpp +++ b/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.cpp @@ -15,42 +15,41 @@ // specific language governing permissions and limitations // under the License. - -#include "ArrowAdapter.h" +#include "knowhere/adapter/ArrowAdapter.h" namespace zilliz { namespace knowhere { ArrayPtr -CopyArray(const ArrayPtr &origin) { +CopyArray(const ArrayPtr& origin) { ArrayPtr copy = nullptr; auto copy_data = origin->data()->Copy(); switch (origin->type_id()) { -#define DEFINE_TYPE(type, clazz) \ - case arrow::Type::type: { \ - copy = std::make_shared(copy_data); \ - } +#define DEFINE_TYPE(type, clazz) \ + case arrow::Type::type: { \ + copy = std::make_shared(copy_data); \ + } DEFINE_TYPE(BOOL, BooleanArray) DEFINE_TYPE(BINARY, BinaryArray) DEFINE_TYPE(FIXED_SIZE_BINARY, FixedSizeBinaryArray) DEFINE_TYPE(DECIMAL, Decimal128Array) DEFINE_TYPE(FLOAT, NumericArray) DEFINE_TYPE(INT64, NumericArray) - default:break; + default: + break; } return copy; } SchemaPtr -CopySchema(const SchemaPtr &origin) { +CopySchema(const SchemaPtr& origin) { std::vector> fields; - for (auto &field : origin->fields()) { - auto copy = std::make_shared(field->name(), field->type(),field->nullable(), nullptr); + for (auto& field : origin->fields()) { + auto copy = std::make_shared(field->name(), field->type(), field->nullable(), nullptr); fields.emplace_back(copy); } return std::make_shared(std::move(fields)); } - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.h b/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.h index d19f8f3ae5..f4d0b348cf 100644 --- a/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.h +++ b/cpp/src/core/knowhere/knowhere/adapter/ArrowAdapter.h @@ -15,22 +15,22 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include +#include +#include #include "knowhere/common/Array.h" - namespace zilliz { namespace knowhere { ArrayPtr -CopyArray(const ArrayPtr &origin); +CopyArray(const ArrayPtr& origin); SchemaPtr -CopySchema(const SchemaPtr &origin); +CopySchema(const SchemaPtr& origin); -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.cpp b/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.cpp index 056f4e2cad..8a0ad69591 100644 --- a/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.cpp +++ b/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.cpp @@ -15,36 +15,31 @@ // specific language governing permissions and limitations // under the License. +#include "knowhere/adapter/SptagAdapter.h" +#include "knowhere/adapter/Structure.h" #include "knowhere/index/vector_index/helpers/Definitions.h" -#include "SptagAdapter.h" -#include "Structure.h" - namespace zilliz { namespace knowhere { - std::shared_ptr -ConvertToMetadataSet(const DatasetPtr &dataset) { +ConvertToMetadataSet(const DatasetPtr& dataset) { auto array = dataset->array()[0]; auto elems = array->length(); auto p_data = array->data()->GetValues(1, 0); - auto p_offset = (int64_t *) malloc(sizeof(int64_t) * elems); - for (auto i = 0; i <= elems; ++i) - p_offset[i] = i * 8; - - std::shared_ptr metaset(new SPTAG::MemMetadataSet( - SPTAG::ByteArray((std::uint8_t *) p_data, elems * sizeof(int64_t), false), - SPTAG::ByteArray((std::uint8_t *) p_offset, elems * sizeof(int64_t), true), - elems)); + auto p_offset = (int64_t*)malloc(sizeof(int64_t) * elems); + for (auto i = 0; i <= elems; ++i) p_offset[i] = i * 8; + std::shared_ptr metaset( + new SPTAG::MemMetadataSet(SPTAG::ByteArray((std::uint8_t*)p_data, elems * sizeof(int64_t), false), + SPTAG::ByteArray((std::uint8_t*)p_offset, elems * sizeof(int64_t), true), elems)); return metaset; } std::shared_ptr -ConvertToVectorSet(const DatasetPtr &dataset) { +ConvertToVectorSet(const DatasetPtr& dataset) { auto tensor = dataset->tensor()[0]; auto p_data = tensor->raw_mutable_data(); @@ -54,18 +49,16 @@ ConvertToVectorSet(const DatasetPtr &dataset) { SPTAG::ByteArray byte_array(p_data, num_bytes, false); - auto vectorset = std::make_shared(byte_array, - SPTAG::VectorValueType::Float, - dimension, - rows); + auto vectorset = + std::make_shared(byte_array, SPTAG::VectorValueType::Float, dimension, rows); return vectorset; } std::vector -ConvertToQueryResult(const DatasetPtr &dataset, const Config &config) { +ConvertToQueryResult(const DatasetPtr& dataset, const Config& config) { auto tensor = dataset->tensor()[0]; - auto p_data = (float *) tensor->raw_mutable_data(); + auto p_data = (float*)tensor->raw_mutable_data(); auto dimension = tensor->shape()[1]; auto rows = tensor->shape()[0]; @@ -82,23 +75,23 @@ ConvertToDataset(std::vector query_results) { auto k = query_results[0].GetResultNum(); auto elems = query_results.size() * k; - auto p_id = (int64_t *) malloc(sizeof(int64_t) * elems); - auto p_dist = (float *) malloc(sizeof(float) * elems); - // TODO: throw if malloc failed. + auto p_id = (int64_t*)malloc(sizeof(int64_t) * elems); + auto p_dist = (float*)malloc(sizeof(float) * elems); +// TODO: throw if malloc failed. #pragma omp parallel for for (auto i = 0; i < query_results.size(); ++i) { auto results = query_results[i].GetResults(); auto num_result = query_results[i].GetResultNum(); for (auto j = 0; j < num_result; ++j) { -// p_id[i * k + j] = results[j].VID; - p_id[i * k + j] = *(int64_t *) query_results[i].GetMetadata(j).Data(); + // p_id[i * k + j] = results[j].VID; + p_id[i * k + j] = *(int64_t*)query_results[i].GetMetadata(j).Data(); p_dist[i * k + j] = results[j].Dist; } } - auto id_buf = MakeMutableBufferSmart((uint8_t *) p_id, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t *) p_dist, sizeof(float) * elems); + auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems); + auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems); // TODO: magic std::vector id_bufs{nullptr, id_buf}; @@ -109,11 +102,11 @@ ConvertToDataset(std::vector query_results) { auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); -// auto id_array_data = std::make_shared(int64_type, sizeof(int64_t) * elems, id_bufs); -// auto dist_array_data = std::make_shared(float_type, sizeof(float) * elems, dist_bufs); + // auto id_array_data = std::make_shared(int64_type, sizeof(int64_t) * elems, id_bufs); + // auto dist_array_data = std::make_shared(float_type, sizeof(float) * elems, dist_bufs); -// auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems); -// auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems); + // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems); + // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems); auto ids = std::make_shared>(id_array_data); auto dists = std::make_shared>(dist_array_data); @@ -127,5 +120,5 @@ ConvertToDataset(std::vector query_results) { return std::make_shared(array, schema); } -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.h b/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.h index f47ffdc3b5..0889657cf9 100644 --- a/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.h +++ b/cpp/src/core/knowhere/knowhere/adapter/SptagAdapter.h @@ -15,12 +15,11 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include - #include +#include +#include #include "knowhere/common/Dataset.h" @@ -28,16 +27,16 @@ namespace zilliz { namespace knowhere { std::shared_ptr -ConvertToVectorSet(const DatasetPtr &dataset); +ConvertToVectorSet(const DatasetPtr& dataset); std::shared_ptr -ConvertToMetadataSet(const DatasetPtr &dataset); +ConvertToMetadataSet(const DatasetPtr& dataset); std::vector -ConvertToQueryResult(const DatasetPtr &dataset, const Config &config); +ConvertToQueryResult(const DatasetPtr& dataset, const Config& config); DatasetPtr ConvertToDataset(std::vector query_results); -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/adapter/Structure.cpp b/cpp/src/core/knowhere/knowhere/adapter/Structure.cpp index 18833b5d36..2cd69f3d10 100644 --- a/cpp/src/core/knowhere/knowhere/adapter/Structure.cpp +++ b/cpp/src/core/knowhere/knowhere/adapter/Structure.cpp @@ -15,15 +15,16 @@ // specific language governing permissions and limitations // under the License. +#include "knowhere/adapter/Structure.h" -#include "Structure.h" - +#include +#include namespace zilliz { namespace knowhere { ArrayPtr -ConstructInt64ArraySmart(uint8_t *data, int64_t size) { +ConstructInt64ArraySmart(uint8_t* data, int64_t size) { // TODO: magic std::vector id_buf{nullptr, MakeMutableBufferSmart(data, size)}; auto type = std::make_shared(); @@ -32,7 +33,7 @@ ConstructInt64ArraySmart(uint8_t *data, int64_t size) { } ArrayPtr -ConstructFloatArraySmart(uint8_t *data, int64_t size) { +ConstructFloatArraySmart(uint8_t* data, int64_t size) { // TODO: magic std::vector id_buf{nullptr, MakeMutableBufferSmart(data, size)}; auto type = std::make_shared(); @@ -41,14 +42,14 @@ ConstructFloatArraySmart(uint8_t *data, int64_t size) { } TensorPtr -ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector shape) { +ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector shape) { auto buffer = MakeMutableBufferSmart(data, size); auto float_type = std::make_shared(); return std::make_shared(float_type, buffer, shape); } ArrayPtr -ConstructInt64Array(uint8_t *data, int64_t size) { +ConstructInt64Array(uint8_t* data, int64_t size) { // TODO: magic std::vector id_buf{nullptr, MakeMutableBuffer(data, size)}; auto type = std::make_shared(); @@ -57,7 +58,7 @@ ConstructInt64Array(uint8_t *data, int64_t size) { } ArrayPtr -ConstructFloatArray(uint8_t *data, int64_t size) { +ConstructFloatArray(uint8_t* data, int64_t size) { // TODO: magic std::vector id_buf{nullptr, MakeMutableBuffer(data, size)}; auto type = std::make_shared(); @@ -66,23 +67,23 @@ ConstructFloatArray(uint8_t *data, int64_t size) { } TensorPtr -ConstructFloatTensor(uint8_t *data, int64_t size, std::vector shape) { +ConstructFloatTensor(uint8_t* data, int64_t size, std::vector shape) { auto buffer = MakeMutableBuffer(data, size); auto float_type = std::make_shared(); return std::make_shared(float_type, buffer, shape); } FieldPtr -ConstructInt64Field(const std::string &name) { +ConstructInt64Field(const std::string& name) { auto type = std::make_shared(); return std::make_shared(name, type); } - FieldPtr -ConstructFloatField(const std::string &name) { +ConstructFloatField(const std::string& name) { auto type = std::make_shared(); return std::make_shared(name, type); } -} -} + +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/adapter/Structure.h b/cpp/src/core/knowhere/knowhere/adapter/Structure.h index 7539dce2da..526dc91b73 100644 --- a/cpp/src/core/knowhere/knowhere/adapter/Structure.h +++ b/cpp/src/core/knowhere/knowhere/adapter/Structure.h @@ -15,40 +15,40 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include -#include "knowhere/common/Dataset.h" +#include +#include +#include "knowhere/common/Dataset.h" namespace zilliz { namespace knowhere { extern ArrayPtr -ConstructInt64ArraySmart(uint8_t *data, int64_t size); +ConstructInt64ArraySmart(uint8_t* data, int64_t size); extern ArrayPtr -ConstructFloatArraySmart(uint8_t *data, int64_t size); +ConstructFloatArraySmart(uint8_t* data, int64_t size); extern TensorPtr -ConstructFloatTensorSmart(uint8_t *data, int64_t size, std::vector shape); +ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector shape); extern ArrayPtr -ConstructInt64Array(uint8_t *data, int64_t size); +ConstructInt64Array(uint8_t* data, int64_t size); extern ArrayPtr -ConstructFloatArray(uint8_t *data, int64_t size); +ConstructFloatArray(uint8_t* data, int64_t size); extern TensorPtr -ConstructFloatTensor(uint8_t *data, int64_t size, std::vector shape); +ConstructFloatTensor(uint8_t* data, int64_t size, std::vector shape); extern FieldPtr -ConstructInt64Field(const std::string &name); +ConstructInt64Field(const std::string& name); extern FieldPtr -ConstructFloatField(const std::string &name); +ConstructFloatField(const std::string& name); - -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/adapter/VectorAdapter.h b/cpp/src/core/knowhere/knowhere/adapter/VectorAdapter.h index bdace40a74..ae4ef998cd 100644 --- a/cpp/src/core/knowhere/knowhere/adapter/VectorAdapter.h +++ b/cpp/src/core/knowhere/knowhere/adapter/VectorAdapter.h @@ -15,18 +15,16 @@ // specific language governing permissions and limitations // under the License. - #pragma once namespace zilliz { namespace knowhere { -#define GETTENSOR(dataset) \ - auto tensor = dataset->tensor()[0]; \ - auto p_data = tensor->raw_data(); \ - auto dim = tensor->shape()[1]; \ - auto rows = tensor->shape()[0]; \ +#define GETTENSOR(dataset) \ + auto tensor = dataset->tensor()[0]; \ + auto p_data = tensor->raw_data(); \ + auto dim = tensor->shape()[1]; \ + auto rows = tensor->shape()[0]; - -} -} \ No newline at end of file +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Array.h b/cpp/src/core/knowhere/knowhere/common/Array.h index 94a5470029..89bbf6ccd7 100644 --- a/cpp/src/core/knowhere/knowhere/common/Array.h +++ b/cpp/src/core/knowhere/knowhere/common/Array.h @@ -15,14 +15,13 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include +#include #include "Schema.h" - namespace zilliz { namespace knowhere { @@ -35,9 +34,9 @@ using ArrayPtr = std::shared_ptr; using BooleanArray = arrow::BooleanArray; using BooleanArrayPtr = std::shared_ptr; -template +template using NumericArray = arrow::NumericArray; -template +template using NumericArrayPtr = std::shared_ptr>; using BinaryArray = arrow::BinaryArray; @@ -49,6 +48,5 @@ using FixedSizeBinaryArrayPtr = std::shared_ptr; using Decimal128Array = arrow::Decimal128Array; using Decimal128ArrayPtr = std::shared_ptr; - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/BinarySet.h b/cpp/src/core/knowhere/knowhere/common/BinarySet.h index 509db68e2d..6d8580e071 100644 --- a/cpp/src/core/knowhere/knowhere/common/BinarySet.h +++ b/cpp/src/core/knowhere/knowhere/common/BinarySet.h @@ -15,21 +15,19 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include -#include -#include #include +#include +#include +#include #include "Id.h" - namespace zilliz { namespace knowhere { - struct Binary { ID id; std::shared_ptr data; @@ -37,29 +35,28 @@ struct Binary { }; using BinaryPtr = std::shared_ptr; - class BinarySet { public: BinaryPtr - GetByName(const std::string &name) const { + GetByName(const std::string& name) const { return binary_map_.at(name); } void - Append(const std::string &name, BinaryPtr binary) { + Append(const std::string& name, BinaryPtr binary) { binary_map_[name] = std::move(binary); } void - Append(const std::string &name, std::shared_ptr data, int64_t size) { + Append(const std::string& name, std::shared_ptr data, int64_t size) { auto binary = std::make_shared(); binary->data = data; binary->size = size; binary_map_[name] = std::move(binary); } - //void - //Append(const std::string &name, void *data, int64_t size, ID id) { + // void + // Append(const std::string &name, void *data, int64_t size, ID id) { // Binary binary; // binary.data = data; // binary.size = size; @@ -67,7 +64,8 @@ class BinarySet { // binary_map_[name] = binary; //} - void clear() { + void + clear() { binary_map_.clear(); } @@ -75,6 +73,5 @@ class BinarySet { std::map binary_map_; }; - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Buffer.h b/cpp/src/core/knowhere/knowhere/common/Buffer.h index 4468e6ec01..42af87a09b 100644 --- a/cpp/src/core/knowhere/knowhere/common/Buffer.h +++ b/cpp/src/core/knowhere/knowhere/common/Buffer.h @@ -15,14 +15,12 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include #include - namespace zilliz { namespace knowhere { @@ -34,31 +32,32 @@ using MutableBufferPtr = std::shared_ptr; namespace internal { struct BufferDeleter { - void operator()(Buffer *buffer) { - free((void *) buffer->data()); + void + operator()(Buffer* buffer) { + free((void*)buffer->data()); } }; - } + inline BufferPtr -MakeBufferSmart(uint8_t *data, const int64_t size) { +MakeBufferSmart(uint8_t* data, const int64_t size) { return BufferPtr(new Buffer(data, size), internal::BufferDeleter()); } inline MutableBufferPtr -MakeMutableBufferSmart(uint8_t *data, const int64_t size) { +MakeMutableBufferSmart(uint8_t* data, const int64_t size) { return MutableBufferPtr(new MutableBuffer(data, size), internal::BufferDeleter()); } inline BufferPtr -MakeBuffer(uint8_t *data, const int64_t size) { +MakeBuffer(uint8_t* data, const int64_t size) { return std::make_shared(data, size); } inline MutableBufferPtr -MakeMutableBuffer(uint8_t *data, const int64_t size) { +MakeMutableBuffer(uint8_t* data, const int64_t size) { return std::make_shared(data, size); } -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Config.h b/cpp/src/core/knowhere/knowhere/common/Config.h index cfee35c35b..afb621d5e1 100644 --- a/cpp/src/core/knowhere/knowhere/common/Config.h +++ b/cpp/src/core/knowhere/knowhere/common/Config.h @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include @@ -42,20 +41,18 @@ struct Cfg { int64_t gpu_id = DEFAULT_GPUID; int64_t d = DEFAULT_DIM; - Cfg(const int64_t &dim, - const int64_t &k, - const int64_t &gpu_id, - METRICTYPE type) - : metric_type(type), k(k), gpu_id(gpu_id), d(dim) {} + Cfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, METRICTYPE type) + : metric_type(type), k(k), gpu_id(gpu_id), d(dim) { + } Cfg() = default; virtual bool - CheckValid(){ + CheckValid() { return true; - }; + } }; using Config = std::shared_ptr; -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Dataset.h b/cpp/src/core/knowhere/knowhere/common/Dataset.h index 91fb165811..4a4f4bdd80 100644 --- a/cpp/src/core/knowhere/knowhere/common/Dataset.h +++ b/cpp/src/core/knowhere/knowhere/common/Dataset.h @@ -15,20 +15,19 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include #include +#include +#include #include "Array.h" #include "Buffer.h" -#include "Tensor.h" -#include "Schema.h" #include "Config.h" +#include "Schema.h" +#include "Tensor.h" #include "knowhere/adapter/ArrowAdapter.h" - namespace zilliz { namespace knowhere { @@ -40,34 +39,38 @@ class Dataset { public: Dataset() = default; - Dataset(std::vector &&array, SchemaPtr array_schema, - std::vector &&tensor, SchemaPtr tensor_schema) + Dataset(std::vector&& array, SchemaPtr array_schema, std::vector&& tensor, + SchemaPtr tensor_schema) : array_(std::move(array)), array_schema_(std::move(array_schema)), tensor_(std::move(tensor)), - tensor_schema_(std::move(tensor_schema)) {} + tensor_schema_(std::move(tensor_schema)) { + } Dataset(std::vector array, SchemaPtr array_schema) - : array_(std::move(array)), array_schema_(std::move(array_schema)) {} + : array_(std::move(array)), array_schema_(std::move(array_schema)) { + } Dataset(std::vector tensor, SchemaPtr tensor_schema) - : tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {} + : tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) { + } - Dataset(const Dataset &) = delete; - Dataset &operator=(const Dataset &) = delete; + Dataset(const Dataset&) = delete; + Dataset& + operator=(const Dataset&) = delete; DatasetPtr Clone() { auto dataset = std::make_shared(); std::vector clone_array; - for (auto &array : array_) { + for (auto& array : array_) { clone_array.emplace_back(CopyArray(array)); } dataset->set_array(clone_array); std::vector clone_tensor; - for (auto &tensor : tensor_) { + for (auto& tensor : tensor_) { auto buffer = tensor->data(); std::shared_ptr copy_buffer; // TODO: checkout copy success; @@ -86,16 +89,20 @@ class Dataset { } public: - const std::vector & - array() const { return array_; } + const std::vector& + array() const { + return array_; + } void set_array(std::vector array) { array_ = std::move(array); } - const std::vector & - tensor() const { return tensor_; } + const std::vector& + tensor() const { + return tensor_; + } void set_tensor(std::vector tensor) { @@ -103,7 +110,9 @@ class Dataset { } SchemaConstPtr - array_schema() const { return array_schema_; } + array_schema() const { + return array_schema_; + } void set_array_schema(SchemaPtr array_schema) { @@ -111,18 +120,20 @@ class Dataset { } SchemaConstPtr - tensor_schema() const { return tensor_schema_; } + tensor_schema() const { + return tensor_schema_; + } void set_tensor_schema(SchemaPtr tensor_schema) { tensor_schema_ = std::move(tensor_schema); } - //const Config & - //meta() const { return meta_; } + // const Config & + // meta() const { return meta_; } - //void - //set_meta(Config meta) { + // void + // set_meta(Config meta) { // meta_ = std::move(meta); //} @@ -131,11 +142,10 @@ class Dataset { SchemaPtr array_schema_; std::vector tensor_; SchemaPtr tensor_schema_; - //Config meta_; + // Config meta_; }; using DatasetPtr = std::shared_ptr; - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Exception.cpp b/cpp/src/core/knowhere/knowhere/common/Exception.cpp index a77e85ee41..75d9ce39bb 100644 --- a/cpp/src/core/knowhere/knowhere/common/Exception.cpp +++ b/cpp/src/core/knowhere/knowhere/common/Exception.cpp @@ -15,41 +15,37 @@ // specific language governing permissions and limitations // under the License. - #include -#include "Exception.h" #include "Log.h" +#include "knowhere/common/Exception.h" namespace zilliz { namespace knowhere { +KnowhereException::KnowhereException(const std::string& msg) : msg(msg) { +} -KnowhereException::KnowhereException(const std::string &msg):msg(msg) {} - -KnowhereException::KnowhereException(const std::string &m, const char *funcName, const char *file, int line) { +KnowhereException::KnowhereException(const std::string& m, const char* funcName, const char* file, int line) { #ifdef DEBUG - int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s", - funcName, file, line, m.c_str()); + int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s", funcName, file, line, m.c_str()); msg.resize(size + 1); - snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s", - funcName, file, line, m.c_str()); + snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s", funcName, file, line, m.c_str()); #else std::string file_path(file); auto const pos = file_path.find_last_of('/'); - auto filename = file_path.substr(pos+1).c_str(); + auto filename = file_path.substr(pos + 1).c_str(); - int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s", - funcName, filename, line, m.c_str()); + int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s", funcName, filename, line, m.c_str()); msg.resize(size + 1); - snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s", - funcName, filename, line, m.c_str()); + snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s", funcName, filename, line, m.c_str()); #endif } -const char *KnowhereException::what() const noexcept { +const char* +KnowhereException::what() const noexcept { return msg.c_str(); } -} -} \ No newline at end of file +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Exception.h b/cpp/src/core/knowhere/knowhere/common/Exception.h index d357f0501e..ec89c4d616 100644 --- a/cpp/src/core/knowhere/knowhere/common/Exception.h +++ b/cpp/src/core/knowhere/knowhere/common/Exception.h @@ -15,46 +15,41 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include #include - namespace zilliz { namespace knowhere { class KnowhereException : public std::exception { public: - explicit KnowhereException(const std::string &msg); + explicit KnowhereException(const std::string& msg); - KnowhereException(const std::string &msg, const char *funName, - const char *file, int line); + KnowhereException(const std::string& msg, const char* funName, const char* file, int line); - const char *what() const noexcept override; + const char* + what() const noexcept override; std::string msg; }; +#define KNOHWERE_ERROR_MSG(MSG) printf("%s", KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__).what()) -#define KNOHWERE_ERROR_MSG(MSG)\ -printf("%s", KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__).what()) +#define KNOWHERE_THROW_MSG(MSG) \ + do { \ + throw KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__); \ + } while (false) -#define KNOWHERE_THROW_MSG(MSG)\ -do {\ - throw KnowhereException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__);\ -} while (false) +#define KNOHERE_THROW_FORMAT(FMT, ...) \ + do { \ + std::string __s; \ + int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__); \ + __s.resize(__size + 1); \ + snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__); \ + throw faiss::FaissException(__s, __PRETTY_FUNCTION__, __FILE__, __LINE__); \ + } while (false) -#define KNOHERE_THROW_FORMAT(FMT, ...)\ - do { \ - std::string __s;\ - int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__);\ - __s.resize(__size + 1);\ - snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__);\ - throw faiss::FaissException(__s, __PRETTY_FUNCTION__, __FILE__, __LINE__);\ - } while (false) - - -} -} \ No newline at end of file +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Id.h b/cpp/src/core/knowhere/knowhere/common/Id.h index 87e1467c44..9686075874 100644 --- a/cpp/src/core/knowhere/knowhere/common/Id.h +++ b/cpp/src/core/knowhere/knowhere/common/Id.h @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. - #pragma once //#include "zcommon/id/id.h" -//using ID = zilliz::common::ID; +// using ID = zilliz::common::ID; #include #include @@ -27,18 +26,20 @@ namespace zilliz { namespace knowhere { - - class ID { public: constexpr static int64_t kIDSize = 20; public: - const int32_t * - data() const { return content_; } + const int32_t* + data() const { + return content_; + } - int32_t * - mutable_data() { return content_; } + int32_t* + mutable_data() { + return content_; + } bool IsValid() const; @@ -47,14 +48,14 @@ class ID { ToString() const; bool - operator==(const ID &that) const; + operator==(const ID& that) const; bool - operator<(const ID &that) const; + operator<(const ID& that) const; protected: int32_t content_[5] = {}; }; -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Log.h b/cpp/src/core/knowhere/knowhere/common/Log.h index 1e390b3c1e..1ca78b155a 100644 --- a/cpp/src/core/knowhere/knowhere/common/Log.h +++ b/cpp/src/core/knowhere/knowhere/common/Log.h @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include "utils/easylogging++.h" @@ -33,5 +32,5 @@ namespace knowhere { #define KNOWHERE_LOG_ERROR LOG(ERROR) << KNOWHERE_DOMAIN_NAME #define KNOWHERE_LOG_FATAL LOG(FATAL) << KNOWHERE_DOMAIN_NAME -} // namespace knowhere -} // namespace zilliz \ No newline at end of file +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Schema.h b/cpp/src/core/knowhere/knowhere/common/Schema.h index b43b7eb875..6dc4784e53 100644 --- a/cpp/src/core/knowhere/knowhere/common/Schema.h +++ b/cpp/src/core/knowhere/knowhere/common/Schema.h @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include #include - namespace zilliz { namespace knowhere { - using DataType = arrow::DataType; using Field = arrow::Field; using FieldPtr = std::shared_ptr; @@ -34,7 +31,5 @@ using Schema = arrow::Schema; using SchemaPtr = std::shared_ptr; using SchemaConstPtr = std::shared_ptr; - - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Tensor.h b/cpp/src/core/knowhere/knowhere/common/Tensor.h index 42e86dc4d1..cd23933e96 100644 --- a/cpp/src/core/knowhere/knowhere/common/Tensor.h +++ b/cpp/src/core/knowhere/knowhere/common/Tensor.h @@ -15,21 +15,17 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include #include - namespace zilliz { namespace knowhere { - using Tensor = arrow::Tensor; using TensorPtr = std::shared_ptr; - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Timer.cpp b/cpp/src/core/knowhere/knowhere/common/Timer.cpp index 7f2cb71514..f4db63203c 100644 --- a/cpp/src/core/knowhere/knowhere/common/Timer.cpp +++ b/cpp/src/core/knowhere/knowhere/common/Timer.cpp @@ -15,18 +15,14 @@ // specific language governing permissions and limitations // under the License. +#include // TODO(linxj): using Log instead -#include // TODO(linxj): using Log instead - -#include "Timer.h" +#include "knowhere/common/Timer.h" namespace zilliz { namespace knowhere { -TimeRecorder::TimeRecorder(const std::string &header, - int64_t log_level) : - header_(header), - log_level_(log_level) { +TimeRecorder::TimeRecorder(const std::string& header, int64_t log_level) : header_(header), log_level_(log_level) { start_ = last_ = stdclock::now(); } @@ -42,9 +38,10 @@ TimeRecorder::GetTimeSpanStr(double span) { } void -TimeRecorder::PrintTimeRecord(const std::string &msg, double span) { +TimeRecorder::PrintTimeRecord(const std::string& msg, double span) { std::string str_log; - if (!header_.empty()) str_log += header_ + ": "; + if (!header_.empty()) + str_log += header_ + ": "; str_log += msg; str_log += " ("; str_log += TimeRecorder::GetTimeSpanStr(span); @@ -55,35 +52,35 @@ TimeRecorder::PrintTimeRecord(const std::string &msg, double span) { std::cout << str_log << std::endl; break; } - //case 1: { - // SERVER_LOG_DEBUG << str_log; - // break; - //} - //case 2: { - // SERVER_LOG_INFO << str_log; - // break; - //} - //case 3: { - // SERVER_LOG_WARNING << str_log; - // break; - //} - //case 4: { - // SERVER_LOG_ERROR << str_log; - // break; - //} - //case 5: { - // SERVER_LOG_FATAL << str_log; - // break; - //} - //default: { - // SERVER_LOG_INFO << str_log; - // break; - //} + // case 1: { + // SERVER_LOG_DEBUG << str_log; + // break; + //} + // case 2: { + // SERVER_LOG_INFO << str_log; + // break; + //} + // case 3: { + // SERVER_LOG_WARNING << str_log; + // break; + //} + // case 4: { + // SERVER_LOG_ERROR << str_log; + // break; + //} + // case 5: { + // SERVER_LOG_FATAL << str_log; + // break; + //} + // default: { + // SERVER_LOG_INFO << str_log; + // break; + //} } } double -TimeRecorder::RecordSection(const std::string &msg) { +TimeRecorder::RecordSection(const std::string& msg) { stdclock::time_point curr = stdclock::now(); double span = (std::chrono::duration(curr - last_)).count(); last_ = curr; @@ -93,7 +90,7 @@ TimeRecorder::RecordSection(const std::string &msg) { } double -TimeRecorder::ElapseFromBegin(const std::string &msg) { +TimeRecorder::ElapseFromBegin(const std::string& msg) { stdclock::time_point curr = stdclock::now(); double span = (std::chrono::duration(curr - start_)).count(); @@ -101,5 +98,5 @@ TimeRecorder::ElapseFromBegin(const std::string &msg) { return span; } -} -} \ No newline at end of file +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/common/Timer.h b/cpp/src/core/knowhere/knowhere/common/Timer.h index 8ecd60df99..b1557e4c20 100644 --- a/cpp/src/core/knowhere/knowhere/common/Timer.h +++ b/cpp/src/core/knowhere/knowhere/common/Timer.h @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include #include +#include namespace zilliz { namespace knowhere { @@ -28,19 +27,22 @@ class TimeRecorder { using stdclock = std::chrono::high_resolution_clock; public: - TimeRecorder(const std::string &header, - int64_t log_level = 0); + explicit TimeRecorder(const std::string& header, int64_t log_level = 0); - ~TimeRecorder();//trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5 + ~TimeRecorder(); // trace = 0, debug = 1, info = 2, warn = 3, error = 4, critical = 5 - double RecordSection(const std::string &msg); + double + RecordSection(const std::string& msg); - double ElapseFromBegin(const std::string &msg); + double + ElapseFromBegin(const std::string& msg); - static std::string GetTimeSpanStr(double span); + static std::string + GetTimeSpanStr(double span); private: - void PrintTimeRecord(const std::string &msg, double span); + void + PrintTimeRecord(const std::string& msg, double span); private: std::string header_; @@ -49,5 +51,5 @@ class TimeRecorder { int64_t log_level_; }; -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/Index.h b/cpp/src/core/knowhere/knowhere/index/Index.h index 4ccbef394c..613a8d049a 100644 --- a/cpp/src/core/knowhere/knowhere/index/Index.h +++ b/cpp/src/core/knowhere/knowhere/index/Index.h @@ -15,54 +15,55 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include +#include "IndexModel.h" +#include "IndexType.h" #include "knowhere/common/BinarySet.h" #include "knowhere/common/Dataset.h" -#include "IndexType.h" -#include "IndexModel.h" #include "knowhere/index/preprocessor/Preprocessor.h" - namespace zilliz { namespace knowhere { - class Index { public: virtual BinarySet Serialize() = 0; virtual void - Load(const BinarySet &index_binary) = 0; + Load(const BinarySet& index_binary) = 0; // @throw virtual DatasetPtr - Search(const DatasetPtr &dataset, const Config &config) = 0; + Search(const DatasetPtr& dataset, const Config& config) = 0; public: IndexType - idx_type() const { return idx_type_; } + idx_type() const { + return idx_type_; + } void - set_idx_type(IndexType idx_type) { idx_type_ = idx_type; } + set_idx_type(IndexType idx_type) { + idx_type_ = idx_type; + } virtual void - set_preprocessor(PreprocessorPtr preprocessor) {} + set_preprocessor(PreprocessorPtr preprocessor) { + } virtual void - set_index_model(IndexModelPtr model) {} + set_index_model(IndexModelPtr model) { + } private: IndexType idx_type_; }; - using IndexPtr = std::shared_ptr; - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/IndexModel.h b/cpp/src/core/knowhere/knowhere/index/IndexModel.h index 7557363f65..64acc5ad22 100644 --- a/cpp/src/core/knowhere/knowhere/index/IndexModel.h +++ b/cpp/src/core/knowhere/knowhere/index/IndexModel.h @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include @@ -24,19 +23,16 @@ namespace zilliz { namespace knowhere { - class IndexModel { public: virtual BinarySet Serialize() = 0; virtual void - Load(const BinarySet &binary) = 0; + Load(const BinarySet& binary) = 0; }; using IndexModelPtr = std::shared_ptr; - - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/IndexType.h b/cpp/src/core/knowhere/knowhere/index/IndexType.h index 3ece6287f5..64a2188790 100644 --- a/cpp/src/core/knowhere/knowhere/index/IndexType.h +++ b/cpp/src/core/knowhere/knowhere/index/IndexType.h @@ -15,14 +15,11 @@ // specific language governing permissions and limitations // under the License. - #pragma once - namespace zilliz { namespace knowhere { - enum class IndexType { kUnknown = 0, kVecIdxBegin = 100, @@ -30,6 +27,5 @@ enum class IndexType { kVecIdxEnd, }; - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.cpp b/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.cpp index 41f6ad3715..2a31a22f16 100644 --- a/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.cpp +++ b/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.cpp @@ -1,14 +1,30 @@ +//// Licensed to the Apache Software Foundation (ASF) under one +//// or more contributor license agreements. See the NOTICE file +//// distributed with this work for additional information +//// regarding copyright ownership. The ASF licenses this file +//// to you under the Apache License, Version 2.0 (the +//// "License"); you may not use this file except in compliance +//// with the License. You may obtain a copy of the License at +//// +//// http://www.apache.org/licenses/LICENSE-2.0 +//// +//// Unless required by applicable law or agreed to in writing, +//// software distributed under the License is distributed on an +//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +//// KIND, either express or implied. See the License for the +//// specific language governing permissions and limitations +//// under the License. // //#include "knowhere/index/vector_index/definitions.h" //#include "knowhere/common/config.h" -//#include "knowhere/index/preprocessor/normalize.h" +#include "knowhere/index/preprocessor/Normalize.h" // // -//namespace zilliz { -//namespace knowhere { +// namespace zilliz { +// namespace knowhere { // -//DatasetPtr -//NormalizePreprocessor::Preprocess(const DatasetPtr &dataset) { +// DatasetPtr +// NormalizePreprocessor::Preprocess(const DatasetPtr &dataset) { // // TODO: wrap dataset->tensor // auto tensor = dataset->tensor()[0]; // auto p_data = (float *)tensor->raw_mutable_data(); @@ -21,8 +37,8 @@ // } //} // -//void -//NormalizePreprocessor::Normalize(float *arr, int64_t dimension) { +// void +// NormalizePreprocessor::Normalize(float *arr, int64_t dimension) { // double vector_length = 0; // for (auto j = 0; j < dimension; j++) { // double val = arr[j]; @@ -39,4 +55,3 @@ // //} // namespace knowhere //} // namespace zilliz - diff --git a/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.h b/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.h index 399a5d3490..9e918d56b8 100644 --- a/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.h +++ b/cpp/src/core/knowhere/knowhere/index/preprocessor/Normalize.h @@ -1,13 +1,30 @@ +//// Licensed to the Apache Software Foundation (ASF) under one +//// or more contributor license agreements. See the NOTICE file +//// distributed with this work for additional information +//// regarding copyright ownership. The ASF licenses this file +//// to you under the Apache License, Version 2.0 (the +//// "License"); you may not use this file except in compliance +//// with the License. You may obtain a copy of the License at +//// +//// http://www.apache.org/licenses/LICENSE-2.0 +//// +//// Unless required by applicable law or agreed to in writing, +//// software distributed under the License is distributed on an +//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +//// KIND, either express or implied. See the License for the +//// specific language governing permissions and limitations +//// under the License. +// //#pragma once // //#include //#include "preprocessor.h" // // -//namespace zilliz { -//namespace knowhere { +// namespace zilliz { +// namespace knowhere { // -//class NormalizePreprocessor : public Preprocessor { +// class NormalizePreprocessor : public Preprocessor { // public: // DatasetPtr // Preprocess(const DatasetPtr &input) override; @@ -19,7 +36,7 @@ //}; // // -//using NormalizePreprocessorPtr = std::shared_ptr; +// using NormalizePreprocessorPtr = std::shared_ptr; // // //} // namespace knowhere diff --git a/cpp/src/core/knowhere/knowhere/index/preprocessor/Preprocessor.h b/cpp/src/core/knowhere/knowhere/index/preprocessor/Preprocessor.h index e1c01d2085..c04dbdf3dc 100644 --- a/cpp/src/core/knowhere/knowhere/index/preprocessor/Preprocessor.h +++ b/cpp/src/core/knowhere/knowhere/index/preprocessor/Preprocessor.h @@ -15,27 +15,22 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include #include "knowhere/common/Dataset.h" - namespace zilliz { namespace knowhere { - class Preprocessor { public: virtual DatasetPtr - Preprocess(const DatasetPtr &input) = 0; + Preprocess(const DatasetPtr& input) = 0; }; - using PreprocessorPtr = std::shared_ptr; - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.cpp index a3ab7956cb..5e90da9d56 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.cpp @@ -15,23 +15,24 @@ // specific language governing permissions and limitations // under the License. - -#include #include +#include +#include #include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/FaissBaseIndex.h" #include "knowhere/index/vector_index/helpers/FaissIO.h" -#include "FaissBaseIndex.h" - namespace zilliz { namespace knowhere { -FaissBaseIndex::FaissBaseIndex(std::shared_ptr index) : index_(std::move(index)) {} +FaissBaseIndex::FaissBaseIndex(std::shared_ptr index) : index_(std::move(index)) { +} -BinarySet FaissBaseIndex::SerializeImpl() { +BinarySet +FaissBaseIndex::SerializeImpl() { try { - faiss::Index *index = index_.get(); + faiss::Index* index = index_.get(); SealImpl(); @@ -44,37 +45,38 @@ BinarySet FaissBaseIndex::SerializeImpl() { // TODO(linxj): use virtual func Name() instead of raw string. res_set.Append("IVF", data, writer.rp); return res_set; - } catch (std::exception &e) { + } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } } -void FaissBaseIndex::LoadImpl(const BinarySet &index_binary) { +void +FaissBaseIndex::LoadImpl(const BinarySet& index_binary) { auto binary = index_binary.GetByName("IVF"); MemoryIOReader reader; reader.total = binary->size; reader.data_ = binary->data.get(); - faiss::Index *index = faiss::read_index(&reader); + faiss::Index* index = faiss::read_index(&reader); index_.reset(index); } -void FaissBaseIndex::SealImpl() { -// TODO(linxj): enable -//#ifdef ZILLIZ_FAISS - faiss::Index *index = index_.get(); - auto idx = dynamic_cast(index); +void +FaissBaseIndex::SealImpl() { + // TODO(linxj): enable + //#ifdef ZILLIZ_FAISS + faiss::Index* index = index_.get(); + auto idx = dynamic_cast(index); if (idx != nullptr) { idx->to_readonly(); } - //else { + // else { // KNOHWERE_ERROR_MSG("Seal failed"); //} -//#endif + //#endif } -} // knowhere -} // zilliz - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.h b/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.h index 29edbbd61e..bac4a2bf29 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/FaissBaseIndex.h @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include @@ -24,7 +23,6 @@ #include "knowhere/common/BinarySet.h" - namespace zilliz { namespace knowhere { @@ -36,7 +34,7 @@ class FaissBaseIndex { SerializeImpl(); virtual void - LoadImpl(const BinarySet &index_binary); + LoadImpl(const BinarySet& index_binary); virtual void SealImpl(); @@ -45,8 +43,5 @@ class FaissBaseIndex { std::shared_ptr index_ = nullptr; }; -} // knowhere -} // zilliz - - - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp index ba41859135..332d31bce0 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp @@ -15,30 +15,28 @@ // specific language governing permissions and limitations // under the License. - - +#include +#include #include #include #include -#include -#include #include +#include - -#include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/helpers/Cloner.h" #include "knowhere/adapter/VectorAdapter.h" -#include "IndexGPUIVF.h" +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexGPUIVF.h" +#include "knowhere/index/vector_index/helpers/Cloner.h" #include "knowhere/index/vector_index/helpers/FaissIO.h" - namespace zilliz { namespace knowhere { -IndexModelPtr GPUIVF::Train(const DatasetPtr &dataset, const Config &config) { +IndexModelPtr +GPUIVF::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception + build_cfg->CheckValid(); // throw exception } gpu_id_ = build_cfg->gpu_id; @@ -49,10 +47,9 @@ IndexModelPtr GPUIVF::Train(const DatasetPtr &dataset, const Config &config) { ResScope rs(temp_resource, gpu_id_, true); faiss::gpu::GpuIndexIVFFlatConfig idx_config; idx_config.device = gpu_id_; - faiss::gpu::GpuIndexIVFFlat device_index(temp_resource->faiss_res.get(), dim, - build_cfg->nlist, GetMetricType(build_cfg->metric_type), - idx_config); - device_index.train(rows, (float *) p_data); + faiss::gpu::GpuIndexIVFFlat device_index(temp_resource->faiss_res.get(), dim, build_cfg->nlist, + GetMetricType(build_cfg->metric_type), idx_config); + device_index.train(rows, (float*)p_data); std::shared_ptr host_index = nullptr; host_index.reset(faiss::gpu::index_gpu_to_cpu(&device_index)); @@ -63,7 +60,8 @@ IndexModelPtr GPUIVF::Train(const DatasetPtr &dataset, const Config &config) { } } -void GPUIVF::set_index_model(IndexModelPtr model) { +void +GPUIVF::set_index_model(IndexModelPtr model) { std::lock_guard lk(mutex_); auto host_index = std::static_pointer_cast(model); @@ -77,7 +75,8 @@ void GPUIVF::set_index_model(IndexModelPtr model) { } } -BinarySet GPUIVF::SerializeImpl() { +BinarySet +GPUIVF::SerializeImpl() { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("index not initialize or trained"); } @@ -85,8 +84,8 @@ BinarySet GPUIVF::SerializeImpl() { try { MemoryIOWriter writer; { - faiss::Index *index = index_.get(); - faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(index); + faiss::Index* index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(index); SealImpl(); @@ -100,19 +99,20 @@ BinarySet GPUIVF::SerializeImpl() { res_set.Append("IVF", data, writer.rp); return res_set; - } catch (std::exception &e) { + } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } } -void GPUIVF::LoadImpl(const BinarySet &index_binary) { +void +GPUIVF::LoadImpl(const BinarySet& index_binary) { auto binary = index_binary.GetByName("IVF"); MemoryIOReader reader; { reader.total = binary->size; reader.data_ = binary->data.get(); - faiss::Index *index = faiss::read_index(&reader); + faiss::Index* index = faiss::read_index(&reader); if (auto temp_res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { ResScope rs(temp_res, gpu_id_, false); @@ -127,23 +127,20 @@ void GPUIVF::LoadImpl(const BinarySet &index_binary) { } } -IVFIndexPtr GPUIVF::Copy_index_gpu_to_cpu() { +IVFIndexPtr +GPUIVF::Copy_index_gpu_to_cpu() { std::lock_guard lk(mutex_); - faiss::Index *device_index = index_.get(); - faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::Index* device_index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); std::shared_ptr new_index; new_index.reset(host_index); return std::make_shared(new_index); } -void GPUIVF::search_impl(int64_t n, - const float *data, - int64_t k, - float *distances, - int64_t *labels, - const Config &cfg) { +void +GPUIVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { std::lock_guard lk(mutex_); // TODO(linxj): gpu index support GenParams @@ -154,49 +151,54 @@ void GPUIVF::search_impl(int64_t n, { // TODO(linxj): allocate mem ResScope rs(res_, gpu_id_); - device_index->search(n, (float *) data, k, distances, labels); + device_index->search(n, (float*)data, k, distances, labels); } } } -VectorIndexPtr GPUIVF::CopyGpuToCpu(const Config &config) { +VectorIndexPtr +GPUIVF::CopyGpuToCpu(const Config& config) { std::lock_guard lk(mutex_); - faiss::Index *device_index = index_.get(); - faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::Index* device_index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); std::shared_ptr new_index; new_index.reset(host_index); return std::make_shared(new_index); } -VectorIndexPtr GPUIVF::Clone() { +VectorIndexPtr +GPUIVF::Clone() { auto cpu_idx = CopyGpuToCpu(Config()); return ::zilliz::knowhere::cloner::CopyCpuToGpu(cpu_idx, gpu_id_, Config()); } -VectorIndexPtr GPUIVF::CopyGpuToGpu(const int64_t &device_id, const Config &config) { +VectorIndexPtr +GPUIVF::CopyGpuToGpu(const int64_t& device_id, const Config& config) { auto host_index = CopyGpuToCpu(config); return std::static_pointer_cast(host_index)->CopyCpuToGpu(device_id, config); } -void GPUIVF::Add(const DatasetPtr &dataset, const Config &config) { +void +GPUIVF::Add(const DatasetPtr& dataset, const Config& config) { if (auto spt = res_.lock()) { ResScope rs(res_, gpu_id_); IVF::Add(dataset, config); - } - else { + } else { KNOWHERE_THROW_MSG("Add IVF can't get gpu resource"); } } -void GPUIndex::SetGpuDevice(const int &gpu_id) { +void +GPUIndex::SetGpuDevice(const int& gpu_id) { gpu_id_ = gpu_id; } -const int64_t &GPUIndex::GetGpuDevice() { +const int64_t& +GPUIndex::GetGpuDevice() { return gpu_id_; } -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.h index cd04fe4c54..347e255b87 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVF.h @@ -15,84 +15,84 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include +#include #include "IndexIVF.h" #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" - namespace zilliz { namespace knowhere { class GPUIndex { -public: - explicit GPUIndex(const int &device_id) : gpu_id_(device_id) {} + public: + explicit GPUIndex(const int& device_id) : gpu_id_(device_id) { + } - GPUIndex(const int& device_id, const ResPtr& resource): gpu_id_(device_id), res_(resource) {} + GPUIndex(const int& device_id, const ResPtr& resource) : gpu_id_(device_id), res_(resource) { + } - virtual VectorIndexPtr - CopyGpuToCpu(const Config &config) = 0; + virtual VectorIndexPtr + CopyGpuToCpu(const Config& config) = 0; - virtual VectorIndexPtr - CopyGpuToGpu(const int64_t &device_id, const Config &config) = 0; + virtual VectorIndexPtr + CopyGpuToGpu(const int64_t& device_id, const Config& config) = 0; - void - SetGpuDevice(const int &gpu_id); + void + SetGpuDevice(const int& gpu_id); - const int64_t & - GetGpuDevice(); + const int64_t& + GetGpuDevice(); -protected: - int64_t gpu_id_; - ResWPtr res_; + protected: + int64_t gpu_id_; + ResWPtr res_; }; class GPUIVF : public IVF, public GPUIndex { -public: - explicit GPUIVF(const int &device_id) : IVF(), GPUIndex(device_id) {} + public: + explicit GPUIVF(const int& device_id) : IVF(), GPUIndex(device_id) { + } - explicit GPUIVF(std::shared_ptr index, const int64_t &device_id, ResPtr &resource) - : IVF(std::move(index)), GPUIndex(device_id, resource) {}; + explicit GPUIVF(std::shared_ptr index, const int64_t& device_id, ResPtr& resource) + : IVF(std::move(index)), GPUIndex(device_id, resource) { + } - IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) override; + IndexModelPtr + Train(const DatasetPtr& dataset, const Config& config) override; - void - Add(const DatasetPtr &dataset, const Config &config) override; + void + Add(const DatasetPtr& dataset, const Config& config) override; - void - set_index_model(IndexModelPtr model) override; + void + set_index_model(IndexModelPtr model) override; - //DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override; - VectorIndexPtr - CopyGpuToCpu(const Config &config) override; + // DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override; + VectorIndexPtr + CopyGpuToCpu(const Config& config) override; - VectorIndexPtr - CopyGpuToGpu(const int64_t &device_id, const Config &config) override; + VectorIndexPtr + CopyGpuToGpu(const int64_t& device_id, const Config& config) override; - VectorIndexPtr - Clone() final; + VectorIndexPtr + Clone() final; - // TODO(linxj): Deprecated - virtual IVFIndexPtr Copy_index_gpu_to_cpu(); + // TODO(linxj): Deprecated + virtual IVFIndexPtr + Copy_index_gpu_to_cpu(); -protected: - void - search_impl(int64_t n, - const float *data, - int64_t k, - float *distances, - int64_t *labels, - const Config &cfg) override; + protected: + void + search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override; - BinarySet - SerializeImpl() override; + BinarySet + SerializeImpl() override; - void - LoadImpl(const BinarySet &index_binary) override; + void + LoadImpl(const BinarySet& index_binary) override; }; -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp index 03193ea604..7aa8f7db2a 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.cpp @@ -15,23 +15,23 @@ // specific language governing permissions and limitations // under the License. - -#include -#include #include +#include +#include +#include -#include "IndexGPUIVFPQ.h" -#include "knowhere/common/Exception.h" #include "knowhere/adapter/VectorAdapter.h" - +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexGPUIVFPQ.h" namespace zilliz { namespace knowhere { -IndexModelPtr GPUIVFPQ::Train(const DatasetPtr &dataset, const Config &config) { +IndexModelPtr +GPUIVFPQ::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception + build_cfg->CheckValid(); // throw exception } gpu_id_ = build_cfg->gpu_id; @@ -40,9 +40,9 @@ IndexModelPtr GPUIVFPQ::Train(const DatasetPtr &dataset, const Config &config) { // TODO(linxj): set device here. // TODO(linxj): set gpu resource here. faiss::gpu::StandardGpuResources res; - faiss::gpu::GpuIndexIVFPQ device_index(&res, dim, build_cfg->nlist, build_cfg->m, - build_cfg->nbits, GetMetricType(build_cfg->metric_type)); // IP not support - device_index.train(rows, (float *) p_data); + faiss::gpu::GpuIndexIVFPQ device_index(&res, dim, build_cfg->nlist, build_cfg->m, build_cfg->nbits, + GetMetricType(build_cfg->metric_type)); // IP not support + device_index.train(rows, (float*)p_data); std::shared_ptr host_index = nullptr; host_index.reset(faiss::gpu::index_gpu_to_cpu(&device_index)); @@ -50,20 +50,22 @@ IndexModelPtr GPUIVFPQ::Train(const DatasetPtr &dataset, const Config &config) { return std::make_shared(host_index); } -std::shared_ptr GPUIVFPQ::GenParams(const Config &config) { +std::shared_ptr +GPUIVFPQ::GenParams(const Config& config) { auto params = std::make_shared(); auto search_cfg = std::dynamic_pointer_cast(config); params->nprobe = search_cfg->nprobe; -// params->scan_table_threshold = conf->scan_table_threhold; -// params->polysemous_ht = conf->polysemous_ht; -// params->max_codes = conf->max_codes; + // params->scan_table_threshold = conf->scan_table_threhold; + // params->polysemous_ht = conf->polysemous_ht; + // params->max_codes = conf->max_codes; return params; } -VectorIndexPtr GPUIVFPQ::CopyGpuToCpu(const Config &config) { +VectorIndexPtr +GPUIVFPQ::CopyGpuToCpu(const Config& config) { KNOWHERE_THROW_MSG("not support yet"); } -} // knowhere -} // zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.h index c407ee0cc7..6d298e77e6 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFPQ.h @@ -15,33 +15,32 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include + #include "IndexGPUIVF.h" namespace zilliz { namespace knowhere { class GPUIVFPQ : public GPUIVF { -public: - explicit GPUIVFPQ(const int &device_id) : GPUIVF(device_id) {} + public: + explicit GPUIVFPQ(const int& device_id) : GPUIVF(device_id) { + } IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) override; + Train(const DatasetPtr& dataset, const Config& config) override; -public: + public: VectorIndexPtr - CopyGpuToCpu(const Config &config) override; + CopyGpuToCpu(const Config& config) override; -protected: + protected: // TODO(linxj): remove GenParams. std::shared_ptr - GenParams(const Config &config) override; + GenParams(const Config& config) override; }; -} // knowhere -} // zilliz - - - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp index af23267404..bf2aa22474 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.cpp @@ -15,60 +15,62 @@ // specific language governing permissions and limitations // under the License. - #include +#include +#include #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" -#include "IndexGPUIVFSQ.h" -#include "IndexIVFSQ.h" - +#include "knowhere/index/vector_index/IndexGPUIVFSQ.h" +#include "knowhere/index/vector_index/IndexIVFSQ.h" namespace zilliz { namespace knowhere { - IndexModelPtr GPUIVFSQ::Train(const DatasetPtr &dataset, const Config &config) { - auto build_cfg = std::dynamic_pointer_cast(config); - if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception - } - gpu_id_ = build_cfg->gpu_id; - - GETTENSOR(dataset) - - std::stringstream index_type; - index_type << "IVF" << build_cfg->nlist << "," << "SQ" << build_cfg->nbits; - auto build_index = faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(build_cfg->metric_type)); - - auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_); - if (temp_resource != nullptr) { - ResScope rs(temp_resource, gpu_id_, true); - auto device_index = faiss::gpu::index_cpu_to_gpu(temp_resource->faiss_res.get(), gpu_id_, build_index); - device_index->train(rows, (float *) p_data); - - std::shared_ptr host_index = nullptr; - host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index)); - - delete device_index; - delete build_index; - - return std::make_shared(host_index); - } else { - KNOWHERE_THROW_MSG("Build IVFSQ can't get gpu resource"); - } +IndexModelPtr +GPUIVFSQ::Train(const DatasetPtr& dataset, const Config& config) { + auto build_cfg = std::dynamic_pointer_cast(config); + if (build_cfg != nullptr) { + build_cfg->CheckValid(); // throw exception } + gpu_id_ = build_cfg->gpu_id; - VectorIndexPtr GPUIVFSQ::CopyGpuToCpu(const Config &config) { - std::lock_guard lk(mutex_); + GETTENSOR(dataset) - faiss::Index *device_index = index_.get(); - faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index); + std::stringstream index_type; + index_type << "IVF" << build_cfg->nlist << "," + << "SQ" << build_cfg->nbits; + auto build_index = faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(build_cfg->metric_type)); - std::shared_ptr new_index; - new_index.reset(host_index); - return std::make_shared(new_index); + auto temp_resource = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_); + if (temp_resource != nullptr) { + ResScope rs(temp_resource, gpu_id_, true); + auto device_index = faiss::gpu::index_cpu_to_gpu(temp_resource->faiss_res.get(), gpu_id_, build_index); + device_index->train(rows, (float*)p_data); + + std::shared_ptr host_index = nullptr; + host_index.reset(faiss::gpu::index_gpu_to_cpu(device_index)); + + delete device_index; + delete build_index; + + return std::make_shared(host_index); + } else { + KNOWHERE_THROW_MSG("Build IVFSQ can't get gpu resource"); } +} -} // knowhere -} // zilliz +VectorIndexPtr +GPUIVFSQ::CopyGpuToCpu(const Config& config) { + std::lock_guard lk(mutex_); + faiss::Index* device_index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); + + std::shared_ptr new_index; + new_index.reset(host_index); + return std::make_shared(new_index); +} + +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h index 677907964b..5c6b4a038b 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexGPUIVFSQ.h @@ -15,29 +15,31 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include "IndexGPUIVF.h" +#include +#include +#include "IndexGPUIVF.h" namespace zilliz { namespace knowhere { class GPUIVFSQ : public GPUIVF { -public: - explicit GPUIVFSQ(const int &device_id) : GPUIVF(device_id) {} + public: + explicit GPUIVFSQ(const int& device_id) : GPUIVF(device_id) { + } - explicit GPUIVFSQ(std::shared_ptr index, const int64_t &device_id, ResPtr &resource) - : GPUIVF(std::move(index), device_id, resource) {}; + explicit GPUIVFSQ(std::shared_ptr index, const int64_t& device_id, ResPtr& resource) + : GPUIVF(std::move(index), device_id, resource) { + } IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) override; + Train(const DatasetPtr& dataset, const Config& config) override; VectorIndexPtr - CopyGpuToCpu(const Config &config) override; + CopyGpuToCpu(const Config& config) override; }; -} // knowhere -} // zilliz - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index f5e3fd8f40..8d4ff16a1f 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -15,24 +15,23 @@ // specific language governing permissions and limitations // under the License. - -#include #include +#include #include -#include #include +#include +#include - -#include "knowhere/common/Exception.h" #include "knowhere/adapter/VectorAdapter.h" +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexIDMAP.h" #include "knowhere/index/vector_index/helpers/FaissIO.h" -#include "IndexIDMAP.h" - namespace zilliz { namespace knowhere { -BinarySet IDMAP::Serialize() { +BinarySet +IDMAP::Serialize() { if (!index_) { KNOWHERE_THROW_MSG("index not initialize"); } @@ -41,31 +40,33 @@ BinarySet IDMAP::Serialize() { return SerializeImpl(); } -void IDMAP::Load(const BinarySet &index_binary) { +void +IDMAP::Load(const BinarySet& index_binary) { std::lock_guard lk(mutex_); LoadImpl(index_binary); } -DatasetPtr IDMAP::Search(const DatasetPtr &dataset, const Config &config) { +DatasetPtr +IDMAP::Search(const DatasetPtr& dataset, const Config& config) { if (!index_) { KNOWHERE_THROW_MSG("index not initialize"); } config->CheckValid(); - //auto metric_type = config["metric_type"].as_string() == "L2" ? + // auto metric_type = config["metric_type"].as_string() == "L2" ? // faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - //index_->metric_type = metric_type; + // index_->metric_type = metric_type; GETTENSOR(dataset) auto elems = rows * config->k; - auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems); - auto res_dis = (float *) malloc(sizeof(float) * elems); + auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); + auto res_dis = (float*)malloc(sizeof(float) * elems); - search_impl(rows, (float *) p_data, config->k, res_dis, res_ids, Config()); + search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config()); - auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems); + auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); + auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); std::vector id_bufs{nullptr, id_buf}; std::vector dist_bufs{nullptr, dist_buf}; @@ -83,12 +84,13 @@ DatasetPtr IDMAP::Search(const DatasetPtr &dataset, const Config &config) { return std::make_shared(array, nullptr); } -void IDMAP::search_impl(int64_t n, const float *data, int64_t k, float *distances, int64_t *labels, const Config &cfg) { - index_->search(n, (float *) data, k, distances, labels); - +void +IDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { + index_->search(n, (float*)data, k, distances, labels); } -void IDMAP::Add(const DatasetPtr &dataset, const Config &config) { +void +IDMAP::Add(const DatasetPtr& dataset, const Config& config) { if (!index_) { KNOWHERE_THROW_MSG("index not initialize"); } @@ -98,49 +100,56 @@ void IDMAP::Add(const DatasetPtr &dataset, const Config &config) { // TODO: magic here. auto array = dataset->array()[0]; - auto p_ids = array->data()->GetValues(1, 0); + auto p_ids = array->data()->GetValues(1, 0); - index_->add_with_ids(rows, (float *) p_data, p_ids); + index_->add_with_ids(rows, (float*)p_data, p_ids); } -int64_t IDMAP::Count() { +int64_t +IDMAP::Count() { return index_->ntotal; } -int64_t IDMAP::Dimension() { +int64_t +IDMAP::Dimension() { return index_->d; } // TODO(linxj): return const pointer -float *IDMAP::GetRawVectors() { +float* +IDMAP::GetRawVectors() { try { - auto file_index = dynamic_cast(index_.get()); + auto file_index = dynamic_cast(index_.get()); auto flat_index = dynamic_cast(file_index->index); return flat_index->xb.data(); - } catch (std::exception &e) { + } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } } // TODO(linxj): return const pointer -int64_t *IDMAP::GetRawIds() { +int64_t* +IDMAP::GetRawIds() { try { - auto file_index = dynamic_cast(index_.get()); + auto file_index = dynamic_cast(index_.get()); return file_index->id_map.data(); - } catch (std::exception &e) { + } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } } const char* type = "IDMap,Flat"; -void IDMAP::Train(const Config &config) { + +void +IDMAP::Train(const Config& config) { config->CheckValid(); auto index = faiss::index_factory(config->d, type, GetMetricType(config->metric_type)); index_.reset(index); } -VectorIndexPtr IDMAP::Clone() { +VectorIndexPtr +IDMAP::Clone() { std::lock_guard lk(mutex_); auto clone_index = faiss::clone_index(index_.get()); @@ -149,8 +158,9 @@ VectorIndexPtr IDMAP::Clone() { return std::make_shared(new_index); } -VectorIndexPtr IDMAP::CopyCpuToGpu(const int64_t &device_id, const Config &config) { - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)){ +VectorIndexPtr +IDMAP::CopyCpuToGpu(const int64_t& device_id, const Config& config) { + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get()); @@ -162,38 +172,41 @@ VectorIndexPtr IDMAP::CopyCpuToGpu(const int64_t &device_id, const Config &confi } } -void IDMAP::Seal() { +void +IDMAP::Seal() { // do nothing } -VectorIndexPtr GPUIDMAP::CopyGpuToCpu(const Config &config) { +VectorIndexPtr +GPUIDMAP::CopyGpuToCpu(const Config& config) { std::lock_guard lk(mutex_); - faiss::Index *device_index = index_.get(); - faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::Index* device_index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(device_index); std::shared_ptr new_index; new_index.reset(host_index); return std::make_shared(new_index); } -VectorIndexPtr GPUIDMAP::Clone() { +VectorIndexPtr +GPUIDMAP::Clone() { auto cpu_idx = CopyGpuToCpu(Config()); - if (auto idmap = std::dynamic_pointer_cast(cpu_idx)){ + if (auto idmap = std::dynamic_pointer_cast(cpu_idx)) { return idmap->CopyCpuToGpu(gpu_id_, Config()); - } - else { + } else { KNOWHERE_THROW_MSG("IndexType not Support GpuClone"); } } -BinarySet GPUIDMAP::SerializeImpl() { +BinarySet +GPUIDMAP::SerializeImpl() { try { MemoryIOWriter writer; { - faiss::Index *index = index_.get(); - faiss::Index *host_index = faiss::gpu::index_gpu_to_cpu(index); + faiss::Index* index = index_.get(); + faiss::Index* host_index = faiss::gpu::index_gpu_to_cpu(index); faiss::write_index(host_index, &writer); delete host_index; @@ -205,21 +218,22 @@ BinarySet GPUIDMAP::SerializeImpl() { res_set.Append("IVF", data, writer.rp); return res_set; - } catch (std::exception &e) { + } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } } -void GPUIDMAP::LoadImpl(const BinarySet &index_binary) { +void +GPUIDMAP::LoadImpl(const BinarySet& index_binary) { auto binary = index_binary.GetByName("IVF"); MemoryIOReader reader; { reader.total = binary->size; reader.data_ = binary->data.get(); - faiss::Index *index = faiss::read_index(&reader); + faiss::Index* index = faiss::read_index(&reader); - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_) ){ + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) { ResScope rs(res, gpu_id_, false); auto device_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index); index_.reset(device_index); @@ -232,28 +246,27 @@ void GPUIDMAP::LoadImpl(const BinarySet &index_binary) { } } -VectorIndexPtr GPUIDMAP::CopyGpuToGpu(const int64_t &device_id, const Config &config) { +VectorIndexPtr +GPUIDMAP::CopyGpuToGpu(const int64_t& device_id, const Config& config) { auto cpu_index = CopyGpuToCpu(config); return std::static_pointer_cast(cpu_index)->CopyCpuToGpu(device_id, config); } -float *GPUIDMAP::GetRawVectors() { +float* +GPUIDMAP::GetRawVectors() { KNOWHERE_THROW_MSG("Not support"); } -int64_t *GPUIDMAP::GetRawIds() { +int64_t* +GPUIDMAP::GetRawIds() { KNOWHERE_THROW_MSG("Not support"); } -void GPUIDMAP::search_impl(int64_t n, - const float *data, - int64_t k, - float *distances, - int64_t *labels, - const Config &cfg) { +void +GPUIDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { ResScope rs(res_, gpu_id_); - index_->search(n, (float *) data, k, distances, labels); + index_->search(n, (float*)data, k, distances, labels); } -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.h index 106759faf1..fd13b87e34 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIDMAP.h @@ -15,41 +15,54 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include "IndexIVF.h" #include "IndexGPUIVF.h" +#include "IndexIVF.h" +#include +#include namespace zilliz { namespace knowhere { class IDMAP : public VectorIndex, public FaissBaseIndex { public: - IDMAP() : FaissBaseIndex(nullptr) {}; - explicit IDMAP(std::shared_ptr index) : FaissBaseIndex(std::move(index)) {}; - BinarySet Serialize() override; - void Load(const BinarySet &index_binary) override; - void Train(const Config &config); - DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override; - int64_t Count() override; - VectorIndexPtr Clone() override; - int64_t Dimension() override; - void Add(const DatasetPtr &dataset, const Config &config) override; - VectorIndexPtr CopyCpuToGpu(const int64_t &device_id, const Config &config); - void Seal() override; + IDMAP() : FaissBaseIndex(nullptr) { + } - virtual float *GetRawVectors(); - virtual int64_t *GetRawIds(); + explicit IDMAP(std::shared_ptr index) : FaissBaseIndex(std::move(index)) { + } + + BinarySet + Serialize() override; + void + Load(const BinarySet& index_binary) override; + void + Train(const Config& config); + DatasetPtr + Search(const DatasetPtr& dataset, const Config& config) override; + int64_t + Count() override; + VectorIndexPtr + Clone() override; + int64_t + Dimension() override; + void + Add(const DatasetPtr& dataset, const Config& config) override; + VectorIndexPtr + CopyCpuToGpu(const int64_t& device_id, const Config& config); + void + Seal() override; + + virtual float* + GetRawVectors(); + virtual int64_t* + GetRawIds(); protected: - virtual void search_impl(int64_t n, - const float *data, - int64_t k, - float *distances, - int64_t *labels, - const Config &cfg); + virtual void + search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg); std::mutex mutex_; }; @@ -57,27 +70,31 @@ using IDMAPPtr = std::shared_ptr; class GPUIDMAP : public IDMAP, public GPUIndex { public: - explicit GPUIDMAP(std::shared_ptr index, const int64_t &device_id, ResPtr& res) - : IDMAP(std::move(index)), GPUIndex(device_id, res) {} + explicit GPUIDMAP(std::shared_ptr index, const int64_t& device_id, ResPtr& res) + : IDMAP(std::move(index)), GPUIndex(device_id, res) { + } - VectorIndexPtr CopyGpuToCpu(const Config &config) override; - float *GetRawVectors() override; - int64_t *GetRawIds() override; - VectorIndexPtr Clone() override; - VectorIndexPtr CopyGpuToGpu(const int64_t &device_id, const Config &config) override; + VectorIndexPtr + CopyGpuToCpu(const Config& config) override; + float* + GetRawVectors() override; + int64_t* + GetRawIds() override; + VectorIndexPtr + Clone() override; + VectorIndexPtr + CopyGpuToGpu(const int64_t& device_id, const Config& config) override; protected: - void search_impl(int64_t n, - const float *data, - int64_t k, - float *distances, - int64_t *labels, - const Config &cfg) override; - BinarySet SerializeImpl() override; - void LoadImpl(const BinarySet &index_binary) override; + void + search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) override; + BinarySet + SerializeImpl() override; + void + LoadImpl(const BinarySet& index_binary) override; }; using GPUIDMAPPtr = std::shared_ptr; -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp index 2e6d0e9763..13805ce14d 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -15,47 +15,47 @@ // specific language governing permissions and limitations // under the License. +#include +#include +#include #include #include #include #include -#include -#include -#include -#include #include +#include +#include +#include +#include - -#include "knowhere/common/Exception.h" #include "knowhere/adapter/VectorAdapter.h" -#include "IndexIVF.h" -#include "IndexGPUIVF.h" - +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexGPUIVF.h" +#include "knowhere/index/vector_index/IndexIVF.h" namespace zilliz { namespace knowhere { - -IndexModelPtr IVF::Train(const DatasetPtr &dataset, const Config &config) { +IndexModelPtr +IVF::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception + build_cfg->CheckValid(); // throw exception } GETTENSOR(dataset) - faiss::Index *coarse_quantizer = new faiss::IndexFlatL2(dim); - auto index = std::make_shared(coarse_quantizer, dim, - build_cfg->nlist, + faiss::Index* coarse_quantizer = new faiss::IndexFlatL2(dim); + auto index = std::make_shared(coarse_quantizer, dim, build_cfg->nlist, GetMetricType(build_cfg->metric_type)); - index->train(rows, (float *) p_data); + index->train(rows, (float*)p_data); // TODO(linxj): override here. train return model or not. return std::make_shared(index); } - -void IVF::Add(const DatasetPtr &dataset, const Config &config) { +void +IVF::Add(const DatasetPtr& dataset, const Config& config) { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("index not initialize or trained"); } @@ -64,11 +64,12 @@ void IVF::Add(const DatasetPtr &dataset, const Config &config) { GETTENSOR(dataset) auto array = dataset->array()[0]; - auto p_ids = array->data()->GetValues(1, 0); - index_->add_with_ids(rows, (float *) p_data, p_ids); + auto p_ids = array->data()->GetValues(1, 0); + index_->add_with_ids(rows, (float*)p_data, p_ids); } -void IVF::AddWithoutIds(const DatasetPtr &dataset, const Config &config) { +void +IVF::AddWithoutIds(const DatasetPtr& dataset, const Config& config) { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("index not initialize or trained"); } @@ -76,10 +77,11 @@ void IVF::AddWithoutIds(const DatasetPtr &dataset, const Config &config) { std::lock_guard lk(mutex_); GETTENSOR(dataset) - index_->add(rows, (float *) p_data); + index_->add(rows, (float*)p_data); } -BinarySet IVF::Serialize() { +BinarySet +IVF::Serialize() { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("index not initialize or trained"); } @@ -89,31 +91,33 @@ BinarySet IVF::Serialize() { return SerializeImpl(); } -void IVF::Load(const BinarySet &index_binary) { +void +IVF::Load(const BinarySet& index_binary) { std::lock_guard lk(mutex_); LoadImpl(index_binary); } -DatasetPtr IVF::Search(const DatasetPtr &dataset, const Config &config) { +DatasetPtr +IVF::Search(const DatasetPtr& dataset, const Config& config) { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("index not initialize or trained"); } auto search_cfg = std::dynamic_pointer_cast(config); if (search_cfg != nullptr) { - search_cfg->CheckValid(); // throw exception + search_cfg->CheckValid(); // throw exception } GETTENSOR(dataset) auto elems = rows * search_cfg->k; - auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems); - auto res_dis = (float *) malloc(sizeof(float) * elems); + auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); + auto res_dis = (float*)malloc(sizeof(float) * elems); - search_impl(rows, (float*) p_data, search_cfg->k, res_dis, res_ids, config); + search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config); - auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems); + auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); + auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); std::vector id_bufs{nullptr, id_buf}; std::vector dist_bufs{nullptr, dist_buf}; @@ -131,7 +135,8 @@ DatasetPtr IVF::Search(const DatasetPtr &dataset, const Config &config) { return std::make_shared(array, nullptr); } -void IVF::set_index_model(IndexModelPtr model) { +void +IVF::set_index_model(IndexModelPtr model) { std::lock_guard lk(mutex_); auto rel_model = std::static_pointer_cast(model); @@ -140,25 +145,29 @@ void IVF::set_index_model(IndexModelPtr model) { index_.reset(faiss::clone_index(rel_model->index_.get())); } -std::shared_ptr IVF::GenParams(const Config &config) { +std::shared_ptr +IVF::GenParams(const Config& config) { auto params = std::make_shared(); auto search_cfg = std::dynamic_pointer_cast(config); params->nprobe = search_cfg->nprobe; - //params->max_codes = config.get_with_default("max_codes", size_t(0)); + // params->max_codes = config.get_with_default("max_codes", size_t(0)); return params; } -int64_t IVF::Count() { +int64_t +IVF::Count() { return index_->ntotal; } -int64_t IVF::Dimension() { +int64_t +IVF::Dimension() { return index_->d; } -void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, const Config &config) { +void +IVF::GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config) { GETTENSOR(dataset) auto ntotal = Count(); @@ -174,7 +183,7 @@ void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, co for (int i = 0; i < total_search_count; ++i) { auto b_size = i == total_search_count - 1 && tail_batch_size != 0 ? tail_batch_size : batch_size; - auto &res = res_vec[i]; + auto& res = res_vec[i]; res.resize(k * b_size); auto xq = p_data + batch_size * dim * i; @@ -182,7 +191,7 @@ void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, co int tmp = 0; for (int j = 0; j < b_size; ++j) { - auto &node = graph[batch_size * i + j]; + auto& node = graph[batch_size * i + j]; node.resize(k); for (int m = 0; m < k && tmp < k * b_size; ++m, ++tmp) { // TODO(linxj): avoid memcopy here. @@ -192,18 +201,15 @@ void IVF::GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, co } } -void IVF::search_impl(int64_t n, - const float *data, - int64_t k, - float *distances, - int64_t *labels, - const Config &cfg) { +void +IVF::search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg) { auto params = GenParams(cfg); - faiss::ivflib::search_with_parameters(index_.get(), n, (float *) data, k, distances, labels, params.get()); + faiss::ivflib::search_with_parameters(index_.get(), n, (float*)data, k, distances, labels, params.get()); } -VectorIndexPtr IVF::CopyCpuToGpu(const int64_t& device_id, const Config &config) { - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)){ +VectorIndexPtr +IVF::CopyCpuToGpu(const int64_t& device_id, const Config& config) { + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), device_id, index_.get()); @@ -215,7 +221,8 @@ VectorIndexPtr IVF::CopyCpuToGpu(const int64_t& device_id, const Config &config) } } -VectorIndexPtr IVF::Clone() { +VectorIndexPtr +IVF::Clone() { std::lock_guard lk(mutex_); auto clone_index = faiss::clone_index(index_.get()); @@ -224,21 +231,24 @@ VectorIndexPtr IVF::Clone() { return Clone_impl(new_index); } -VectorIndexPtr IVF::Clone_impl(const std::shared_ptr &index) { +VectorIndexPtr +IVF::Clone_impl(const std::shared_ptr& index) { return std::make_shared(index); } -void IVF::Seal() { +void +IVF::Seal() { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("index not initialize or trained"); } SealImpl(); } +IVFIndexModel::IVFIndexModel(std::shared_ptr index) : FaissBaseIndex(std::move(index)) { +} -IVFIndexModel::IVFIndexModel(std::shared_ptr index) : FaissBaseIndex(std::move(index)) {} - -BinarySet IVFIndexModel::Serialize() { +BinarySet +IVFIndexModel::Serialize() { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("indexmodel not initialize or trained"); } @@ -246,18 +256,16 @@ BinarySet IVFIndexModel::Serialize() { return SerializeImpl(); } -void IVFIndexModel::Load(const BinarySet &binary_set) { +void +IVFIndexModel::Load(const BinarySet& binary_set) { std::lock_guard lk(mutex_); LoadImpl(binary_set); } -void IVFIndexModel::SealImpl() { +void +IVFIndexModel::SealImpl() { // do nothing } - - - - -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.h index 72f98a5afa..088849c22a 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -15,17 +15,17 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include #include +#include +#include -#include "VectorIndex.h" #include "FaissBaseIndex.h" +#include "VectorIndex.h" #include "faiss/IndexIVF.h" - namespace zilliz { namespace knowhere { @@ -33,36 +33,38 @@ using Graph = std::vector>; class IVF : public VectorIndex, protected FaissBaseIndex { public: - IVF() : FaissBaseIndex(nullptr) {}; + IVF() : FaissBaseIndex(nullptr) { + } - explicit IVF(std::shared_ptr index) : FaissBaseIndex(std::move(index)) {} + explicit IVF(std::shared_ptr index) : FaissBaseIndex(std::move(index)) { + } VectorIndexPtr - Clone() override;; + Clone() override; IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) override; + Train(const DatasetPtr& dataset, const Config& config) override; void set_index_model(IndexModelPtr model) override; void - Add(const DatasetPtr &dataset, const Config &config) override; + Add(const DatasetPtr& dataset, const Config& config) override; void - AddWithoutIds(const DatasetPtr &dataset, const Config &config); + AddWithoutIds(const DatasetPtr& dataset, const Config& config); DatasetPtr - Search(const DatasetPtr &dataset, const Config &config) override; + Search(const DatasetPtr& dataset, const Config& config) override; void - GenGraph(const int64_t &k, Graph &graph, const DatasetPtr &dataset, const Config &config); + GenGraph(const int64_t& k, Graph& graph, const DatasetPtr& dataset, const Config& config); BinarySet Serialize() override; void - Load(const BinarySet &index_binary) override; + Load(const BinarySet& index_binary) override; int64_t Count() override; @@ -74,23 +76,17 @@ class IVF : public VectorIndex, protected FaissBaseIndex { Seal() override; virtual VectorIndexPtr - CopyCpuToGpu(const int64_t &device_id, const Config &config); - + CopyCpuToGpu(const int64_t& device_id, const Config& config); protected: virtual std::shared_ptr - GenParams(const Config &config); + GenParams(const Config& config); virtual VectorIndexPtr - Clone_impl(const std::shared_ptr &index); + Clone_impl(const std::shared_ptr& index); virtual void - search_impl(int64_t n, - const float *data, - int64_t k, - float *distances, - int64_t *labels, - const Config &cfg); + search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg); protected: std::mutex mutex_; @@ -106,13 +102,14 @@ class IVFIndexModel : public IndexModel, public FaissBaseIndex { public: explicit IVFIndexModel(std::shared_ptr index); - IVFIndexModel() : FaissBaseIndex(nullptr) {}; + IVFIndexModel() : FaissBaseIndex(nullptr) { + } BinarySet Serialize() override; void - Load(const BinarySet &binary) override; + Load(const BinarySet& binary) override; protected: void @@ -121,7 +118,8 @@ class IVFIndexModel : public IndexModel, public FaissBaseIndex { protected: std::mutex mutex_; }; + using IVFIndexModelPtr = std::shared_ptr; -} -} \ No newline at end of file +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp index b5418b77cf..c0eda94310 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp @@ -15,47 +15,51 @@ // specific language governing permissions and limitations // under the License. - #include #include +#include +#include -#include "IndexIVFPQ.h" -#include "knowhere/common/Exception.h" #include "knowhere/adapter/VectorAdapter.h" +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexIVFPQ.h" namespace zilliz { namespace knowhere { -IndexModelPtr IVFPQ::Train(const DatasetPtr &dataset, const Config &config) { +IndexModelPtr +IVFPQ::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception + build_cfg->CheckValid(); // throw exception } GETTENSOR(dataset) - faiss::Index *coarse_quantizer = new faiss::IndexFlat(dim, GetMetricType(build_cfg->metric_type)); - auto index = std::make_shared(coarse_quantizer, dim, - build_cfg->nlist, build_cfg->m, build_cfg->nbits); - index->train(rows, (float *) p_data); + faiss::Index* coarse_quantizer = new faiss::IndexFlat(dim, GetMetricType(build_cfg->metric_type)); + auto index = + std::make_shared(coarse_quantizer, dim, build_cfg->nlist, build_cfg->m, build_cfg->nbits); + index->train(rows, (float*)p_data); return std::make_shared(index); } -std::shared_ptr IVFPQ::GenParams(const Config &config) { +std::shared_ptr +IVFPQ::GenParams(const Config& config) { auto params = std::make_shared(); auto search_cfg = std::dynamic_pointer_cast(config); params->nprobe = search_cfg->nprobe; -// params->scan_table_threshold = conf->scan_table_threhold; -// params->polysemous_ht = conf->polysemous_ht; -// params->max_codes = conf->max_codes; + // params->scan_table_threshold = conf->scan_table_threhold; + // params->polysemous_ht = conf->polysemous_ht; + // params->max_codes = conf->max_codes; return params; } -VectorIndexPtr IVFPQ::Clone_impl(const std::shared_ptr &index) { +VectorIndexPtr +IVFPQ::Clone_impl(const std::shared_ptr& index) { return std::make_shared(index); } -} // knowhere -} // zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.h index 427f76fe3e..f071c1e0fa 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFPQ.h @@ -15,33 +15,33 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include +#include + #include "IndexIVF.h" namespace zilliz { namespace knowhere { class IVFPQ : public IVF { -public: - explicit IVFPQ(std::shared_ptr index) : IVF(std::move(index)) {} + public: + explicit IVFPQ(std::shared_ptr index) : IVF(std::move(index)) { + } IVFPQ() = default; IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) override; + Train(const DatasetPtr& dataset, const Config& config) override; -protected: + protected: std::shared_ptr - GenParams(const Config &config) override; + GenParams(const Config& config) override; VectorIndexPtr - Clone_impl(const std::shared_ptr &index) override; + Clone_impl(const std::shared_ptr& index) override; }; -} // knowhere -} // zilliz - - - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp index ec29627ebc..5406211fa3 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp @@ -15,44 +15,46 @@ // specific language governing permissions and limitations // under the License. - #include +#include -#include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #include "knowhere/adapter/VectorAdapter.h" -#include "IndexIVFSQ.h" -#include "IndexGPUIVFSQ.h" - +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexGPUIVFSQ.h" +#include "knowhere/index/vector_index/IndexIVFSQ.h" +#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" namespace zilliz { namespace knowhere { -IndexModelPtr IVFSQ::Train(const DatasetPtr &dataset, const Config &config) { +IndexModelPtr +IVFSQ::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception + build_cfg->CheckValid(); // throw exception } GETTENSOR(dataset) std::stringstream index_type; - index_type << "IVF" << build_cfg->nlist << "," << "SQ" << build_cfg->nbits; - auto build_index = faiss::index_factory(dim, index_type.str().c_str(), - GetMetricType(build_cfg->metric_type)); - build_index->train(rows, (float *) p_data); + index_type << "IVF" << build_cfg->nlist << "," + << "SQ" << build_cfg->nbits; + auto build_index = faiss::index_factory(dim, index_type.str().c_str(), GetMetricType(build_cfg->metric_type)); + build_index->train(rows, (float*)p_data); std::shared_ptr ret_index; ret_index.reset(build_index); return std::make_shared(ret_index); } -VectorIndexPtr IVFSQ::Clone_impl(const std::shared_ptr &index) { +VectorIndexPtr +IVFSQ::Clone_impl(const std::shared_ptr& index) { return std::make_shared(index); } -VectorIndexPtr IVFSQ::CopyCpuToGpu(const int64_t &device_id, const Config &config) { - if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)){ +VectorIndexPtr +IVFSQ::CopyCpuToGpu(const int64_t& device_id, const Config& config) { + if (auto res = FaissGpuResourceMgr::GetInstance().GetRes(device_id)) { ResScope rs(res, device_id, false); faiss::gpu::GpuClonerOptions option; option.allInGpu = true; @@ -67,5 +69,5 @@ VectorIndexPtr IVFSQ::CopyCpuToGpu(const int64_t &device_id, const Config &confi } } -} // knowhere -} // zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.h index 5ee771f951..3a05ca9826 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQ.h @@ -15,31 +15,33 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include +#include + #include "IndexIVF.h" namespace zilliz { namespace knowhere { class IVFSQ : public IVF { -public: - explicit IVFSQ(std::shared_ptr index) : IVF(std::move(index)) {} + public: + explicit IVFSQ(std::shared_ptr index) : IVF(std::move(index)) { + } IVFSQ() = default; IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) override; + Train(const DatasetPtr& dataset, const Config& config) override; VectorIndexPtr - CopyCpuToGpu(const int64_t &device_id, const Config &config) override; + CopyCpuToGpu(const int64_t& device_id, const Config& config) override; -protected: + protected: VectorIndexPtr - Clone_impl(const std::shared_ptr &index) override; + Clone_impl(const std::shared_ptr& index) override; }; -} // knowhere -} // zilliz - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.cpp index cceb88516a..6efb839384 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.cpp @@ -15,41 +15,39 @@ // specific language governing permissions and limitations // under the License. - -#include -#include -#include #include - +#include +#include +#include +#include #undef mkdir -#include "IndexKDT.h" +#include "knowhere/index/vector_index/IndexKDT.h" #include "knowhere/index/vector_index/helpers/Definitions.h" //#include "knowhere/index/preprocessor/normalize.h" -#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h" #include "knowhere/adapter/SptagAdapter.h" #include "knowhere/common/Exception.h" - +#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h" namespace zilliz { namespace knowhere { BinarySet CPUKDTRNG::Serialize() { - std::vector index_blobs; + std::vector index_blobs; std::vector index_len; index_ptr_->SaveIndexToMemory(index_blobs, index_len); BinarySet binary_set; auto sample = std::make_shared(); - sample.reset(static_cast(index_blobs[0])); + sample.reset(static_cast(index_blobs[0])); auto tree = std::make_shared(); - tree.reset(static_cast(index_blobs[1])); + tree.reset(static_cast(index_blobs[1])); auto graph = std::make_shared(); - graph.reset(static_cast(index_blobs[2])); + graph.reset(static_cast(index_blobs[2])); auto metadata = std::make_shared(); - metadata.reset(static_cast(index_blobs[3])); + metadata.reset(static_cast(index_blobs[3])); binary_set.Append("samples", sample, index_len[0]); binary_set.Append("tree", tree, index_len[1]); @@ -59,8 +57,8 @@ CPUKDTRNG::Serialize() { } void -CPUKDTRNG::Load(const BinarySet &binary_set) { - std::vector index_blobs; +CPUKDTRNG::Load(const BinarySet& binary_set) { + std::vector index_blobs; auto samples = binary_set.GetByName("samples"); index_blobs.push_back(samples->data.get()); @@ -77,17 +75,17 @@ CPUKDTRNG::Load(const BinarySet &binary_set) { index_ptr_->LoadIndexFromMemory(index_blobs); } -//PreprocessorPtr -//CPUKDTRNG::BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { +// PreprocessorPtr +// CPUKDTRNG::BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { // return std::make_shared(); //} IndexModelPtr -CPUKDTRNG::Train(const DatasetPtr &origin, const Config &train_config) { +CPUKDTRNG::Train(const DatasetPtr& origin, const Config& train_config) { SetParameters(train_config); DatasetPtr dataset = origin->Clone(); - //if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine + // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine // && preprocessor_) { // preprocessor_->Preprocess(dataset); //} @@ -101,11 +99,11 @@ CPUKDTRNG::Train(const DatasetPtr &origin, const Config &train_config) { } void -CPUKDTRNG::Add(const DatasetPtr &origin, const Config &add_config) { +CPUKDTRNG::Add(const DatasetPtr& origin, const Config& add_config) { SetParameters(add_config); DatasetPtr dataset = origin->Clone(); - //if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine + // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine // && preprocessor_) { // preprocessor_->Preprocess(dataset); //} @@ -116,18 +114,18 @@ CPUKDTRNG::Add(const DatasetPtr &origin, const Config &add_config) { } void -CPUKDTRNG::SetParameters(const Config &config) { - for (auto ¶ : KDTParameterMgr::GetInstance().GetKDTParameters()) { -// auto value = config.get_with_default(para.first, para.second); +CPUKDTRNG::SetParameters(const Config& config) { + for (auto& para : KDTParameterMgr::GetInstance().GetKDTParameters()) { + // auto value = config.get_with_default(para.first, para.second); index_ptr_->SetParameter(para.first, para.second); } } DatasetPtr -CPUKDTRNG::Search(const DatasetPtr &dataset, const Config &config) { +CPUKDTRNG::Search(const DatasetPtr& dataset, const Config& config) { SetParameters(config); auto tensor = dataset->tensor()[0]; - auto p = (float *) tensor->raw_mutable_data(); + auto p = (float*)tensor->raw_mutable_data(); for (auto i = 0; i < 10; ++i) { for (auto j = 0; j < 10; ++j) { std::cout << p[i * 10 + j] << " "; @@ -138,7 +136,7 @@ CPUKDTRNG::Search(const DatasetPtr &dataset, const Config &config) { #pragma omp parallel for for (auto i = 0; i < query_results.size(); ++i) { - auto target = (float *) query_results[i].GetTarget(); + auto target = (float*)query_results[i].GetTarget(); std::cout << target[0] << ", " << target[1] << ", " << target[2] << std::endl; index_ptr_->SearchIndex(query_results[i]); } @@ -146,27 +144,34 @@ CPUKDTRNG::Search(const DatasetPtr &dataset, const Config &config) { return ConvertToDataset(query_results); } -int64_t CPUKDTRNG::Count() { +int64_t +CPUKDTRNG::Count() { index_ptr_->GetNumSamples(); } -int64_t CPUKDTRNG::Dimension() { + +int64_t +CPUKDTRNG::Dimension() { index_ptr_->GetFeatureDim(); } -VectorIndexPtr CPUKDTRNG::Clone() { +VectorIndexPtr +CPUKDTRNG::Clone() { KNOWHERE_THROW_MSG("not support"); } -void CPUKDTRNG::Seal() { +void +CPUKDTRNG::Seal() { // do nothing } // TODO(linxj): BinarySet -CPUKDTRNGIndexModel::Serialize() {} +CPUKDTRNGIndexModel::Serialize() { +} void -CPUKDTRNGIndexModel::Load(const BinarySet &binary) {} +CPUKDTRNGIndexModel::Load(const BinarySet& binary) { +} -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.h index 7fd41e9bad..86737ac3dd 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexKDT.h @@ -15,53 +15,54 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include #include #include #include "VectorIndex.h" #include "knowhere/index/IndexModel.h" -#include - namespace zilliz { namespace knowhere { - class CPUKDTRNG : public VectorIndex { public: CPUKDTRNG() { - index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT, - SPTAG::VectorValueType::Float); + index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT, SPTAG::VectorValueType::Float); index_ptr_->SetParameter("DistCalcMethod", "L2"); } public: BinarySet Serialize() override; - VectorIndexPtr Clone() override; + VectorIndexPtr + Clone() override; void - Load(const BinarySet &index_array) override; + Load(const BinarySet& index_array) override; public: - //PreprocessorPtr - //BuildPreprocessor(const DatasetPtr &dataset, const Config &config) override; - int64_t Count() override; - int64_t Dimension() override; + // PreprocessorPtr + // BuildPreprocessor(const DatasetPtr &dataset, const Config &config) override; + int64_t + Count() override; + int64_t + Dimension() override; IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) override; + Train(const DatasetPtr& dataset, const Config& config) override; void - Add(const DatasetPtr &dataset, const Config &config) override; + Add(const DatasetPtr& dataset, const Config& config) override; DatasetPtr - Search(const DatasetPtr &dataset, const Config &config) override; - void Seal() override; + Search(const DatasetPtr& dataset, const Config& config) override; + void + Seal() override; + private: void - SetParameters(const Config &config); + SetParameters(const Config& config); private: PreprocessorPtr preprocessor_; @@ -76,7 +77,7 @@ class CPUKDTRNGIndexModel : public IndexModel { Serialize() override; void - Load(const BinarySet &binary) override; + Load(const BinarySet& binary) override; private: std::shared_ptr index_; @@ -84,5 +85,5 @@ class CPUKDTRNGIndexModel : public IndexModel { using CPUKDTRNGIndexModelPtr = std::shared_ptr; -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.cpp index 38a7fed23b..b731f801b3 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -15,28 +15,27 @@ // specific language governing permissions and limitations // under the License. - -#include "IndexNSG.h" -#include "knowhere/index/vector_index/nsg/NSG.h" -#include "knowhere/index/vector_index/nsg/NSGIO.h" -#include "IndexIDMAP.h" -#include "IndexIVF.h" -#include "IndexGPUIVF.h" +#include "knowhere/index/vector_index/IndexNSG.h" #include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" #include "knowhere/common/Timer.h" - +#include "knowhere/index/vector_index/IndexGPUIVF.h" +#include "knowhere/index/vector_index/IndexIDMAP.h" +#include "knowhere/index/vector_index/IndexIVF.h" +#include "knowhere/index/vector_index/nsg/NSG.h" +#include "knowhere/index/vector_index/nsg/NSGIO.h" namespace zilliz { namespace knowhere { -BinarySet NSG::Serialize() { +BinarySet +NSG::Serialize() { if (!index_ || !index_->is_trained) { KNOWHERE_THROW_MSG("index not initialize or trained"); } try { - algo::NsgIndex *index = index_.get(); + algo::NsgIndex* index = index_.get(); MemoryIOWriter writer; algo::write_index(index, writer); @@ -46,12 +45,13 @@ BinarySet NSG::Serialize() { BinarySet res_set; res_set.Append("NSG", data, writer.total); return res_set; - } catch (std::exception &e) { + } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } } -void NSG::Load(const BinarySet &index_binary) { +void +NSG::Load(const BinarySet& index_binary) { try { auto binary = index_binary.GetByName("NSG"); @@ -61,15 +61,16 @@ void NSG::Load(const BinarySet &index_binary) { auto index = algo::read_index(reader); index_.reset(index); - } catch (std::exception &e) { + } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } } -DatasetPtr NSG::Search(const DatasetPtr &dataset, const Config &config) { +DatasetPtr +NSG::Search(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception + build_cfg->CheckValid(); // throw exception } if (!index_ || !index_->is_trained) { @@ -79,16 +80,15 @@ DatasetPtr NSG::Search(const DatasetPtr &dataset, const Config &config) { GETTENSOR(dataset) auto elems = rows * build_cfg->k; - auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems); - auto res_dis = (float *) malloc(sizeof(float) * elems); + auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); + auto res_dis = (float*)malloc(sizeof(float) * elems); algo::SearchParams s_params; s_params.search_length = build_cfg->search_length; - index_->Search((float *) p_data, rows, dim, - build_cfg->k, res_dis, res_ids, s_params); + index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params); - auto id_buf = MakeMutableBufferSmart((uint8_t *) res_ids, sizeof(int64_t) * elems); - auto dist_buf = MakeMutableBufferSmart((uint8_t *) res_dis, sizeof(float) * elems); + auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); + auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); std::vector id_bufs{nullptr, id_buf}; std::vector dist_bufs{nullptr, dist_buf}; @@ -106,10 +106,11 @@ DatasetPtr NSG::Search(const DatasetPtr &dataset, const Config &config) { return std::make_shared(array, nullptr); } -IndexModelPtr NSG::Train(const DatasetPtr &dataset, const Config &config) { +IndexModelPtr +NSG::Train(const DatasetPtr& dataset, const Config& config) { auto build_cfg = std::dynamic_pointer_cast(config); if (build_cfg != nullptr) { - build_cfg->CheckValid(); // throw exception + build_cfg->CheckValid(); // throw exception } if (build_cfg->metric_type != METRICTYPE::L2) { @@ -132,34 +133,38 @@ IndexModelPtr NSG::Train(const DatasetPtr &dataset, const Config &config) { GETTENSOR(dataset) auto array = dataset->array()[0]; - auto p_ids = array->data()->GetValues(1, 0); + auto p_ids = array->data()->GetValues(1, 0); index_ = std::make_shared(dim, rows); index_->SetKnnGraph(knng); - index_->Build_with_ids(rows, (float *) p_data, (long *) p_ids, b_params); - return nullptr; // TODO(linxj): support serialize + index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params); + return nullptr; // TODO(linxj): support serialize } -void NSG::Add(const DatasetPtr &dataset, const Config &config) { +void +NSG::Add(const DatasetPtr& dataset, const Config& config) { // do nothing } -int64_t NSG::Count() { +int64_t +NSG::Count() { return index_->ntotal; } -int64_t NSG::Dimension() { +int64_t +NSG::Dimension() { return index_->dimension; } -VectorIndexPtr NSG::Clone() { +VectorIndexPtr +NSG::Clone() { KNOWHERE_THROW_MSG("not support"); } -void NSG::Seal() { +void +NSG::Seal() { // do nothing } -} -} - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.h index 873bba2287..4371ddb65a 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexNSG.h @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include "VectorIndex.h" +#include +#include +#include "VectorIndex.h" namespace zilliz { namespace knowhere { @@ -30,18 +31,30 @@ class NsgIndex; class NSG : public VectorIndex { public: - explicit NSG(const int64_t& gpu_num):gpu_(gpu_num){} + explicit NSG(const int64_t& gpu_num) : gpu_(gpu_num) { + } + NSG() = default; - IndexModelPtr Train(const DatasetPtr &dataset, const Config &config) override; - DatasetPtr Search(const DatasetPtr &dataset, const Config &config) override; - void Add(const DatasetPtr &dataset, const Config &config) override; - BinarySet Serialize() override; - void Load(const BinarySet &index_binary) override; - int64_t Count() override; - int64_t Dimension() override; - VectorIndexPtr Clone() override; - void Seal() override; + IndexModelPtr + Train(const DatasetPtr& dataset, const Config& config) override; + DatasetPtr + Search(const DatasetPtr& dataset, const Config& config) override; + void + Add(const DatasetPtr& dataset, const Config& config) override; + BinarySet + Serialize() override; + void + Load(const BinarySet& index_binary) override; + int64_t + Count() override; + int64_t + Dimension() override; + VectorIndexPtr + Clone() override; + void + Seal() override; + private: std::shared_ptr index_; int64_t gpu_; @@ -49,5 +62,5 @@ class NSG : public VectorIndex { using NSGIndexPtr = std::shared_ptr(); -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/VectorIndex.h b/cpp/src/core/knowhere/knowhere/index/vector_index/VectorIndex.h index 908989d15d..e96de885af 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/VectorIndex.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/VectorIndex.h @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. - #pragma once - #include #include "knowhere/common/Config.h" -#include "knowhere/index/vector_index/helpers/IndexParameter.h" #include "knowhere/common/Dataset.h" #include "knowhere/index/Index.h" #include "knowhere/index/preprocessor/Preprocessor.h" - +#include "knowhere/index/vector_index/helpers/IndexParameter.h" namespace zilliz { namespace knowhere { @@ -34,17 +31,20 @@ namespace knowhere { class VectorIndex; using VectorIndexPtr = std::shared_ptr; - class VectorIndex : public Index { public: virtual PreprocessorPtr - BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { return nullptr; } + BuildPreprocessor(const DatasetPtr& dataset, const Config& config) { + return nullptr; + } virtual IndexModelPtr - Train(const DatasetPtr &dataset, const Config &config) { return nullptr; } + Train(const DatasetPtr& dataset, const Config& config) { + return nullptr; + } virtual void - Add(const DatasetPtr &dataset, const Config &config) = 0; + Add(const DatasetPtr& dataset, const Config& config) = 0; virtual void Seal() = 0; @@ -59,7 +59,5 @@ class VectorIndex : public Index { Dimension() = 0; }; - - -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp index d8ccdd1087..c6f3f81053 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.cpp @@ -15,21 +15,20 @@ // specific language governing permissions and limitations // under the License. - +#include "knowhere/index/vector_index/helpers/Cloner.h" #include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/IndexIVF.h" -#include "knowhere/index/vector_index/IndexIVFSQ.h" -#include "knowhere/index/vector_index/IndexIVFPQ.h" #include "knowhere/index/vector_index/IndexGPUIVF.h" #include "knowhere/index/vector_index/IndexIDMAP.h" -#include "Cloner.h" - +#include "knowhere/index/vector_index/IndexIVF.h" +#include "knowhere/index/vector_index/IndexIVFPQ.h" +#include "knowhere/index/vector_index/IndexIVFSQ.h" namespace zilliz { namespace knowhere { namespace cloner { -VectorIndexPtr CopyGpuToCpu(const VectorIndexPtr &index, const Config &config) { +VectorIndexPtr +CopyGpuToCpu(const VectorIndexPtr& index, const Config& config) { if (auto device_index = std::dynamic_pointer_cast(index)) { return device_index->CopyGpuToCpu(config); } else { @@ -37,7 +36,8 @@ VectorIndexPtr CopyGpuToCpu(const VectorIndexPtr &index, const Config &config) { } } -VectorIndexPtr CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_id, const Config &config) { +VectorIndexPtr +CopyCpuToGpu(const VectorIndexPtr& index, const int64_t& device_id, const Config& config) { if (auto device_index = std::dynamic_pointer_cast(index)) { return device_index->CopyGpuToGpu(device_id, config); } @@ -55,6 +55,6 @@ VectorIndexPtr CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_i } } -} // cloner -} -} +} // namespace cloner +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.h b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.h index a72e4df141..fb5d00d7df 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Cloner.h @@ -15,23 +15,21 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include "knowhere/index/vector_index/VectorIndex.h" - namespace zilliz { namespace knowhere { namespace cloner { // TODO(linxj): rename CopyToGpu extern VectorIndexPtr -CopyCpuToGpu(const VectorIndexPtr &index, const int64_t &device_id, const Config &config); +CopyCpuToGpu(const VectorIndexPtr& index, const int64_t& device_id, const Config& config); extern VectorIndexPtr -CopyGpuToCpu(const VectorIndexPtr &index, const Config &config); +CopyGpuToCpu(const VectorIndexPtr& index, const Config& config); -} // cloner -} // knowhere -} // zilliz \ No newline at end of file +} // namespace cloner +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Definitions.h b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Definitions.h index fef7b01221..d5959f7501 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Definitions.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/Definitions.h @@ -15,10 +15,8 @@ // specific language governing permissions and limitations // under the License. - #pragma once - namespace zilliz { namespace knowhere { namespace definition { @@ -27,6 +25,6 @@ namespace definition { #define META_DIM ("dimension") #define META_K ("k") -} // definition -} // knowhere -} // zilliz +} // namespace definition +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp index c50594b4b0..70c879d03a 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp @@ -15,25 +15,24 @@ // specific language governing permissions and limitations // under the License. +#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" -#include "FaissGpuResourceMgr.h" - +#include namespace zilliz { namespace knowhere { -FaissGpuResourceMgr &FaissGpuResourceMgr::GetInstance() { +FaissGpuResourceMgr& +FaissGpuResourceMgr::GetInstance() { static FaissGpuResourceMgr instance; return instance; } -void FaissGpuResourceMgr::AllocateTempMem(ResPtr &resource, - const int64_t &device_id, - const int64_t &size) { +void +FaissGpuResourceMgr::AllocateTempMem(ResPtr& resource, const int64_t& device_id, const int64_t& size) { if (size) { resource->faiss_res->setTempMemory(size); - } - else { + } else { auto search = devices_params_.find(device_id); if (search != devices_params_.end()) { resource->faiss_res->setTempMemory(search->second.temp_mem_size); @@ -42,10 +41,8 @@ void FaissGpuResourceMgr::AllocateTempMem(ResPtr &resource, } } -void FaissGpuResourceMgr::InitDevice(int64_t device_id, - int64_t pin_mem_size, - int64_t temp_mem_size, - int64_t res_num) { +void +FaissGpuResourceMgr::InitDevice(int64_t device_id, int64_t pin_mem_size, int64_t temp_mem_size, int64_t res_num) { DeviceParams params; params.pinned_mem_size = pin_mem_size; params.temp_mem_size = temp_mem_size; @@ -54,23 +51,25 @@ void FaissGpuResourceMgr::InitDevice(int64_t device_id, devices_params_.emplace(device_id, params); } -void FaissGpuResourceMgr::InitResource() { - if(is_init) return ; +void +FaissGpuResourceMgr::InitResource() { + if (is_init) + return; is_init = true; - //std::cout << "InitResource" << std::endl; - for(auto& device : devices_params_) { + // std::cout << "InitResource" << std::endl; + for (auto& device : devices_params_) { auto& device_id = device.first; mutex_cache_.emplace(device_id, std::make_unique()); - //std::cout << "Device Id: " << device_id << std::endl; + // std::cout << "Device Id: " << device_id << std::endl; auto& device_param = device.second; auto& bq = idle_map_[device_id]; for (int64_t i = 0; i < device_param.resource_num; ++i) { - //std::cout << "Resource Id: " << i << std::endl; + // std::cout << "Resource Id: " << i << std::endl; auto raw_resource = std::make_shared(); // TODO(linxj): enable set pinned memory @@ -80,11 +79,11 @@ void FaissGpuResourceMgr::InitResource() { bq.Put(res_wrapper); } } - //std::cout << "End initResource" << std::endl; + // std::cout << "End initResource" << std::endl; } -ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id, - const int64_t &alloc_size) { +ResPtr +FaissGpuResourceMgr::GetRes(const int64_t& device_id, const int64_t& alloc_size) { InitResource(); auto finder = idle_map_.find(device_id); @@ -97,7 +96,8 @@ ResPtr FaissGpuResourceMgr::GetRes(const int64_t &device_id, return nullptr; } -void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res) { +void +FaissGpuResourceMgr::MoveToIdle(const int64_t& device_id, const ResPtr& res) { auto finder = idle_map_.find(device_id); if (finder != idle_map_.end()) { auto& bq = finder->second; @@ -105,8 +105,9 @@ void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res } } -void FaissGpuResourceMgr::Free() { - for (auto &item : idle_map_) { +void +FaissGpuResourceMgr::Free() { + for (auto& item : idle_map_) { auto& bq = item.second; while (!bq.Empty()) { bq.Take(); @@ -117,12 +118,11 @@ void FaissGpuResourceMgr::Free() { void FaissGpuResourceMgr::Dump() { - for (auto &item : idle_map_) { + for (auto& item : idle_map_) { auto& bq = item.second; - std::cout << "device_id: " << item.first - << ", resource count:" << bq.Size(); + std::cout << "device_id: " << item.first << ", resource count:" << bq.Size(); } } -} // knowhere -} // zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h index 3b4786a2a5..926b8b4e00 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include #include #include -#include +#include #include @@ -30,7 +30,7 @@ namespace zilliz { namespace knowhere { struct Resource { - explicit Resource(std::shared_ptr &r) : faiss_res(r) { + explicit Resource(std::shared_ptr& r) : faiss_res(r) { static int64_t global_id = 0; id = global_id++; } @@ -43,19 +43,19 @@ using ResPtr = std::shared_ptr; using ResWPtr = std::weak_ptr; class FaissGpuResourceMgr { -public: + public: friend class ResScope; using ResBQ = zilliz::milvus::server::BlockingQueue; -public: + public: struct DeviceParams { int64_t temp_mem_size = 0; int64_t pinned_mem_size = 0; int64_t resource_num = 2; }; -public: - static FaissGpuResourceMgr & + public: + static FaissGpuResourceMgr& GetInstance(); // Free gpu resource, avoid cudaGetDevice error when deallocate. @@ -64,67 +64,67 @@ public: Free(); void - AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size); + AllocateTempMem(ResPtr& resource, const int64_t& device_id, const int64_t& size); void - InitDevice(int64_t device_id, - int64_t pin_mem_size = 0, - int64_t temp_mem_size = 0, - int64_t res_num = 2); + InitDevice(int64_t device_id, int64_t pin_mem_size = 0, int64_t temp_mem_size = 0, int64_t res_num = 2); void InitResource(); // allocate gpu memory invoke by build or copy_to_gpu ResPtr - GetRes(const int64_t &device_id, const int64_t& alloc_size = 0); + GetRes(const int64_t& device_id, const int64_t& alloc_size = 0); void - MoveToIdle(const int64_t &device_id, const ResPtr& res); + MoveToIdle(const int64_t& device_id, const ResPtr& res); void Dump(); -protected: + protected: bool is_init = false; - std::map> mutex_cache_; + std::map> mutex_cache_; std::map devices_params_; std::map idle_map_; }; class ResScope { -public: - ResScope(ResPtr &res, const int64_t& device_id, const bool& isown) - : resource(res), device_id(device_id), move(true), own(isown) { + public: + ResScope(ResPtr& res, const int64_t& device_id, const bool& isown) + : resource(res), device_id(device_id), move(true), own(isown) { Lock(); } // specif for search // get the ownership of gpuresource and gpu - ResScope(ResWPtr &res, const int64_t &device_id) - :device_id(device_id),move(false),own(true) { + ResScope(ResWPtr& res, const int64_t& device_id) : device_id(device_id), move(false), own(true) { resource = res.lock(); Lock(); } - void Lock() { - if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock(); + void + Lock() { + if (own) + FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->lock(); resource->mutex.lock(); } ~ResScope() { - if (own) FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->unlock(); - if (move) FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource); + if (own) + FaissGpuResourceMgr::GetInstance().mutex_cache_[device_id]->unlock(); + if (move) + FaissGpuResourceMgr::GetInstance().MoveToIdle(device_id, resource); resource->mutex.unlock(); } -private: - ResPtr resource; // hold resource until deconstruct + private: + ResPtr resource; // hold resource until deconstruct int64_t device_id; bool move = true; bool own = false; }; -} // knowhere -} // zilliz \ No newline at end of file +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp index 48167fe925..30a54b388d 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp @@ -15,51 +15,55 @@ // specific language governing permissions and limitations // under the License. - #include -#include "FaissIO.h" +#include "knowhere/index/vector_index/helpers/FaissIO.h" namespace zilliz { namespace knowhere { // TODO(linxj): Get From Config File static size_t magic_num = 2; -size_t MemoryIOWriter::operator()(const void *ptr, size_t size, size_t nitems) { + +size_t +MemoryIOWriter::operator()(const void* ptr, size_t size, size_t nitems) { auto total_need = size * nitems + rp; - if (!data_) { // data == nullptr + if (!data_) { // data == nullptr total = total_need * magic_num; rp = size * nitems; data_ = new uint8_t[total]; - memcpy((void *) (data_), ptr, rp); + memcpy((void*)(data_), ptr, rp); } if (total_need > total) { total = total_need * magic_num; auto new_data = new uint8_t[total]; - memcpy((void *) new_data, (void *) data_, rp); + memcpy((void*)new_data, (void*)data_, rp); delete data_; data_ = new_data; - memcpy((void *) (data_ + rp), ptr, size * nitems); + memcpy((void*)(data_ + rp), ptr, size * nitems); rp = total_need; } else { - memcpy((void *) (data_ + rp), ptr, size * nitems); + memcpy((void*)(data_ + rp), ptr, size * nitems); rp = total_need; } return nitems; } -size_t MemoryIOReader::operator()(void *ptr, size_t size, size_t nitems) { - if (rp >= total) return 0; +size_t +MemoryIOReader::operator()(void* ptr, size_t size, size_t nitems) { + if (rp >= total) + return 0; size_t nremain = (total - rp) / size; - if (nremain < nitems) nitems = nremain; - memcpy(ptr, (void *) (data_ + rp), size * nitems); + if (nremain < nitems) + nitems = nremain; + memcpy(ptr, (void*)(data_ + rp), size * nitems); rp += size * nitems; return nitems; } -} // knowhere -} // zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.h b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.h index cc089ec367..c850e89d9f 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/FaissIO.h @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include @@ -24,25 +23,22 @@ namespace zilliz { namespace knowhere { struct MemoryIOWriter : public faiss::IOWriter { - uint8_t *data_ = nullptr; + uint8_t* data_ = nullptr; size_t total = 0; size_t rp = 0; size_t - operator()(const void *ptr, size_t size, size_t nitems) override; + operator()(const void* ptr, size_t size, size_t nitems) override; }; struct MemoryIOReader : public faiss::IOReader { - uint8_t *data_; + uint8_t* data_; size_t rp = 0; size_t total = 0; size_t - operator()(void *ptr, size_t size, size_t nitems) override; + operator()(void* ptr, size_t size, size_t nitems) override; }; -} // knowhere -} // zilliz - - - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp index 9999444368..c4a179bc6a 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. - -#include "IndexParameter.h" +#include "knowhere/index/vector_index/helpers/IndexParameter.h" #include "knowhere/common/Exception.h" #include @@ -24,7 +23,8 @@ namespace zilliz { namespace knowhere { -faiss::MetricType GetMetricType(METRICTYPE &type) { +faiss::MetricType +GetMetricType(METRICTYPE& type) { if (type == METRICTYPE::L2) { return faiss::METRIC_L2; } @@ -35,6 +35,5 @@ faiss::MetricType GetMetricType(METRICTYPE &type) { KNOWHERE_THROW_MSG("Metric type is invalid"); } - -} -} +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h index cd873ed251..d5056d7ee6 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h @@ -15,17 +15,18 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include "knowhere/common/Config.h" #include +#include +#include "knowhere/common/Config.h" namespace zilliz { namespace knowhere { -extern faiss::MetricType GetMetricType(METRICTYPE &type); +extern faiss::MetricType +GetMetricType(METRICTYPE& type); // IVF Config constexpr int64_t DEFAULT_NLIST = INVALID_VALUE; @@ -46,11 +47,7 @@ struct IVFCfg : public Cfg { int64_t nlist = DEFAULT_NLIST; int64_t nprobe = DEFAULT_NPROBE; - IVFCfg(const int64_t &dim, - const int64_t &k, - const int64_t &gpu_id, - const int64_t &nlist, - const int64_t &nprobe, + IVFCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe, METRICTYPE type) : Cfg(dim, k, gpu_id, type), nlist(nlist), nprobe(nprobe) { } @@ -68,13 +65,8 @@ struct IVFSQCfg : public IVFCfg { // TODO(linxj): cpu only support SQ4 SQ6 SQ8 SQ16, gpu only support SQ4, SQ8, SQ16 int64_t nbits = DEFAULT_NBITS; - IVFSQCfg(const int64_t &dim, - const int64_t &k, - const int64_t &gpu_id, - const int64_t &nlist, - const int64_t &nprobe, - const int64_t &nbits, - METRICTYPE type) + IVFSQCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe, + const int64_t& nbits, METRICTYPE type) : IVFCfg(dim, k, gpu_id, nlist, nprobe, type), nbits(nbits) { } @@ -88,22 +80,16 @@ struct IVFSQCfg : public IVFCfg { using IVFSQConfig = std::shared_ptr; struct IVFPQCfg : public IVFCfg { - int64_t m = DEFAULT_NSUBVECTORS; // number of subquantizers(subvector) - int64_t nbits = DEFAULT_NBITS; // number of bit per subvector index + int64_t m = DEFAULT_NSUBVECTORS; // number of subquantizers(subvector) + int64_t nbits = DEFAULT_NBITS; // number of bit per subvector index // TODO(linxj): not use yet int64_t scan_table_threhold = DEFAULT_SCAN_TABLE_THREHOLD; int64_t polysemous_ht = DEFAULT_POLYSEMOUS_HT; int64_t max_codes = DEFAULT_MAX_CODES; - IVFPQCfg(const int64_t &dim, - const int64_t &k, - const int64_t &gpu_id, - const int64_t &nlist, - const int64_t &nprobe, - const int64_t &nbits, - const int64_t &m, - METRICTYPE type) + IVFPQCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe, + const int64_t& nbits, const int64_t& m, METRICTYPE type) : IVFCfg(dim, k, gpu_id, nlist, nprobe, type), m(m), nbits(nbits) { } @@ -122,19 +108,14 @@ struct NSGCfg : public IVFCfg { int64_t out_degree = DEFAULT_OUT_DEGREE; int64_t candidate_pool_size = DEFAULT_CANDIDATE_SISE; - NSGCfg(const int64_t &dim, - const int64_t &k, - const int64_t &gpu_id, - const int64_t &nlist, - const int64_t &nprobe, - const int64_t &knng, - const int64_t &search_length, - const int64_t &out_degree, - const int64_t &candidate_size, + NSGCfg(const int64_t& dim, const int64_t& k, const int64_t& gpu_id, const int64_t& nlist, const int64_t& nprobe, + const int64_t& knng, const int64_t& search_length, const int64_t& out_degree, const int64_t& candidate_size, METRICTYPE type) : IVFCfg(dim, k, gpu_id, nlist, nprobe, type), - knng(knng), search_length(search_length), - out_degree(out_degree), candidate_pool_size(candidate_size) { + knng(knng), + search_length(search_length), + out_degree(out_degree), + candidate_pool_size(candidate_size) { } NSGCfg() = default; @@ -150,6 +131,5 @@ struct KDTCfg : public Cfg { int64_t tptnubmber = -1; }; -} // knowhere -} // zilliz - +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp index 3d9de271b7..015dc17fc5 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp @@ -15,16 +15,14 @@ // specific language governing permissions and limitations // under the License. - #include -#include "KDTParameterMgr.h" - +#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h" namespace zilliz { namespace knowhere { -const std::vector & +const std::vector& KDTParameterMgr::GetKDTParameters() { return kdt_parameters_; } @@ -55,5 +53,5 @@ KDTParameterMgr::KDTParameterMgr() { }; } -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.h b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.h index 4b43e642e2..ec2d73eebb 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.h @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. - #pragma once +#include #include +#include #include - namespace zilliz { namespace knowhere { @@ -29,18 +29,20 @@ using KDTParameter = std::pair; class KDTParameterMgr { public: - const std::vector & + const std::vector& GetKDTParameters(); public: - static KDTParameterMgr & + static KDTParameterMgr& GetInstance() { static KDTParameterMgr instance; return instance; } - KDTParameterMgr(const KDTParameterMgr &) = delete; - KDTParameterMgr &operator=(const KDTParameterMgr &) = delete; + KDTParameterMgr(const KDTParameterMgr&) = delete; + KDTParameterMgr& + operator=(const KDTParameterMgr&) = delete; + private: KDTParameterMgr(); @@ -48,5 +50,5 @@ class KDTParameterMgr { std::vector kdt_parameters_; }; -} // namespace knowhere -} // namespace zilliz +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.cpp index fcf77625db..19bc825d81 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.cpp @@ -15,29 +15,28 @@ // specific language governing permissions and limitations // under the License. -#include +#include #include -#include +#include #include +#include #include -#include +#include -#include "NSG.h" #include "knowhere/common/Exception.h" #include "knowhere/common/Log.h" #include "knowhere/common/Timer.h" -#include "NSGHelper.h" +#include "knowhere/index/vector_index/nsg/NSG.h" +#include "knowhere/index/vector_index/nsg/NSGHelper.h" // TODO: enable macro //#include - namespace zilliz { namespace knowhere { namespace algo { - -NsgIndex::NsgIndex(const size_t &dimension, const size_t &n, MetricType metric) +NsgIndex::NsgIndex(const size_t& dimension, const size_t& n, MetricType metric) : dimension(dimension), ntotal(n), metric_type(metric) { } @@ -46,16 +45,17 @@ NsgIndex::~NsgIndex() { delete[] ids_; } -//void NsgIndex::Build(size_t nb, const float *data, const BuildParam ¶meters) { +// void NsgIndex::Build(size_t nb, const float *data, const BuildParam ¶meters) { //} -void NsgIndex::Build_with_ids(size_t nb, const float *data, const long *ids, const BuildParams ¶meters) { +void +NsgIndex::Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters) { TimeRecorder rc("NSG"); ntotal = nb; ori_data_ = new float[ntotal * dimension]; - ids_ = new long[ntotal]; - memcpy((void *) ori_data_, (void *) data, sizeof(float) * ntotal * dimension); - memcpy((void *) ids_, (void *) ids, sizeof(long) * ntotal); + ids_ = new int64_t[ntotal]; + memcpy((void*)ori_data_, (void*)data, sizeof(float) * ntotal * dimension); + memcpy((void*)ids_, (void*)ids, sizeof(int64_t) * ntotal); search_length = parameters.search_length; out_degree = parameters.out_degree; @@ -69,8 +69,8 @@ void NsgIndex::Build_with_ids(size_t nb, const float *data, const long *ids, con //>> Debug code ///// - //int count = 0; - //for (int i = 0; i < ntotal; ++i) { + // int count = 0; + // for (int i = 0; i < ntotal; ++i) { // count += nsg[i].size(); //} ///// @@ -92,7 +92,8 @@ void NsgIndex::Build_with_ids(size_t nb, const float *data, const long *ids, con is_trained = true; } -void NsgIndex::InitNavigationPoint() { +void +NsgIndex::InitNavigationPoint() { // calculate the center of vectors auto center = new float[dimension]; memset(center, 0, sizeof(float) * dimension); @@ -108,11 +109,12 @@ void NsgIndex::InitNavigationPoint() { // select navigation point std::vector resset, fullset; - navigation_point = rand() % ntotal; // random initialize navigating point + unsigned int seed = 100; + navigation_point = rand_r(&seed) % ntotal; // random initialize navigating point //>> Debug code ///// - //navigation_point = drand48(); + // navigation_point = drand48(); ///// GetNeighbors(center, resset, knng); @@ -120,22 +122,21 @@ void NsgIndex::InitNavigationPoint() { //>> Debug code ///// - //std::cout << "ep: " << navigation_point << std::endl; + // std::cout << "ep: " << navigation_point << std::endl; ///// //>> Debug code ///// - //float r1 = calculate(center, ori_data_ + navigation_point * dimension, dimension); - //assert(r1 == resset[0].distance); + // float r1 = calculate(center, ori_data_ + navigation_point * dimension, dimension); + // assert(r1 == resset[0].distance); ///// } // Specify Link -void NsgIndex::GetNeighbors(const float *query, - std::vector &resset, - std::vector &fullset, - boost::dynamic_bitset<> &has_calculated_dist) { - auto &graph = knng; +void +NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::vector& fullset, + boost::dynamic_bitset<>& has_calculated_dist) { + auto& graph = knng; size_t buffer_size = search_length; if (buffer_size > ntotal) { @@ -156,9 +157,12 @@ void NsgIndex::GetNeighbors(const float *query, has_calculated_dist[init_ids[i]] = true; ++count; } + + unsigned int seed = 100; while (count < buffer_size) { - node_t id = rand() % ntotal; - if (has_calculated_dist[id]) continue; // duplicate id + node_t id = rand_r(&seed) % ntotal; + if (has_calculated_dist[id]) + continue; // duplicate id init_ids.push_back(id); ++count; has_calculated_dist[id] = true; @@ -184,9 +188,9 @@ void NsgIndex::GetNeighbors(const float *query, fullset.push_back(resset[i]); /////////////////////////////////////// } - std::sort(resset.begin(), resset.end()); // sort by distance + std::sort(resset.begin(), resset.end()); // sort by distance - //search nearest neighbor + // search nearest neighbor size_t cursor = 0; while (cursor < buffer_size) { size_t nearest_updated_pos = buffer_size; @@ -195,36 +199,42 @@ void NsgIndex::GetNeighbors(const float *query, resset[cursor].has_explored = true; node_t start_pos = resset[cursor].id; - auto &wait_for_search_node_vec = graph[start_pos]; + auto& wait_for_search_node_vec = graph[start_pos]; for (size_t i = 0; i < wait_for_search_node_vec.size(); ++i) { node_t id = wait_for_search_node_vec[i]; - if (has_calculated_dist[id]) continue; + if (has_calculated_dist[id]) + continue; has_calculated_dist[id] = true; - float - dist = calculate(query, ori_data_ + dimension * id, dimension); + float dist = calculate(query, ori_data_ + dimension * id, dimension); Neighbor nn(id, dist, false); fullset.push_back(nn); - if (dist >= resset[buffer_size - 1].distance) continue; + if (dist >= resset[buffer_size - 1].distance) + continue; - size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node - if (pos < nearest_updated_pos) nearest_updated_pos = pos; + size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node + if (pos < nearest_updated_pos) + nearest_updated_pos = pos; - //assert(buffer_size + 1 >= resset.size()); - if (buffer_size + 1 < resset.size()) ++buffer_size; + // assert(buffer_size + 1 >= resset.size()); + if (buffer_size + 1 < resset.size()) + ++buffer_size; } } if (cursor >= nearest_updated_pos) { - cursor = nearest_updated_pos; // re-search from new pos - } else ++cursor; + cursor = nearest_updated_pos; // re-search from new pos + } else { + ++cursor; + } } } } // FindUnconnectedNode -void NsgIndex::GetNeighbors(const float *query, std::vector &resset, std::vector &fullset) { - auto &graph = nsg; +void +NsgIndex::GetNeighbors(const float* query, std::vector& resset, std::vector& fullset) { + auto& graph = nsg; size_t buffer_size = search_length; if (buffer_size > ntotal) { @@ -232,7 +242,7 @@ void NsgIndex::GetNeighbors(const float *query, std::vector &resset, s } std::vector init_ids; - boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; // TODO: ? + boost::dynamic_bitset<> has_calculated_dist{ntotal, 0}; // TODO: ? { /* @@ -246,9 +256,11 @@ void NsgIndex::GetNeighbors(const float *query, std::vector &resset, s has_calculated_dist[init_ids[i]] = true; ++count; } + unsigned int seed = 100; while (count < buffer_size) { - node_t id = rand() % ntotal; - if (has_calculated_dist[id]) continue; // duplicate id + node_t id = rand_r(&seed) % ntotal; + if (has_calculated_dist[id]) + continue; // duplicate id init_ids.push_back(id); ++count; has_calculated_dist[id] = true; @@ -270,7 +282,7 @@ void NsgIndex::GetNeighbors(const float *query, std::vector &resset, s float dist = calculate(ori_data_ + id * dimension, query, dimension); resset[i] = Neighbor(id, dist, false); } - std::sort(resset.begin(), resset.end()); // sort by distance + std::sort(resset.begin(), resset.end()); // sort by distance // search nearest neighbor size_t cursor = 0; @@ -281,38 +293,41 @@ void NsgIndex::GetNeighbors(const float *query, std::vector &resset, s resset[cursor].has_explored = true; node_t start_pos = resset[cursor].id; - auto &wait_for_search_node_vec = graph[start_pos]; + auto& wait_for_search_node_vec = graph[start_pos]; for (size_t i = 0; i < wait_for_search_node_vec.size(); ++i) { node_t id = wait_for_search_node_vec[i]; - if (has_calculated_dist[id]) continue; + if (has_calculated_dist[id]) + continue; has_calculated_dist[id] = true; - float - dist = calculate(ori_data_ + dimension * id, query, dimension); + float dist = calculate(ori_data_ + dimension * id, query, dimension); Neighbor nn(id, dist, false); fullset.push_back(nn); - if (dist >= resset[buffer_size - 1].distance) continue; + if (dist >= resset[buffer_size - 1].distance) + continue; - size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node - if (pos < nearest_updated_pos) nearest_updated_pos = pos; + size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node + if (pos < nearest_updated_pos) + nearest_updated_pos = pos; - //assert(buffer_size + 1 >= resset.size()); - if (buffer_size + 1 < resset.size()) ++buffer_size; // trick + // assert(buffer_size + 1 >= resset.size()); + if (buffer_size + 1 < resset.size()) + ++buffer_size; // trick } } if (cursor >= nearest_updated_pos) { - cursor = nearest_updated_pos; // re-search from new pos - } else ++cursor; + cursor = nearest_updated_pos; // re-search from new pos + } else { + ++cursor; + } } } } -void NsgIndex::GetNeighbors(const float *query, - std::vector &resset, - Graph &graph, - SearchParams *params) { - size_t &buffer_size = params ? params->search_length : search_length; +void +NsgIndex::GetNeighbors(const float* query, std::vector& resset, Graph& graph, SearchParams* params) { + size_t& buffer_size = params ? params->search_length : search_length; if (buffer_size > ntotal) { // TODO: throw exception here. @@ -333,9 +348,11 @@ void NsgIndex::GetNeighbors(const float *query, has_calculated_dist[init_ids[i]] = true; ++count; } + unsigned int seed = 100; while (count < buffer_size) { - node_t id = rand() % ntotal; - if (has_calculated_dist[id]) continue; // duplicate id + node_t id = rand_r(&seed) % ntotal; + if (has_calculated_dist[id]) + continue; // duplicate id init_ids.push_back(id); ++count; has_calculated_dist[id] = true; @@ -349,7 +366,7 @@ void NsgIndex::GetNeighbors(const float *query, for (size_t i = 0; i < init_ids.size(); ++i) { node_t id = init_ids[i]; - //assert(id < ntotal); + // assert(id < ntotal); if (id >= static_cast(ntotal)) { KNOWHERE_THROW_MSG("Build Index Error, id > ntotal"); continue; @@ -358,11 +375,11 @@ void NsgIndex::GetNeighbors(const float *query, float dist = calculate(ori_data_ + id * dimension, query, dimension); resset[i] = Neighbor(id, dist, false); } - std::sort(resset.begin(), resset.end()); // sort by distance + std::sort(resset.begin(), resset.end()); // sort by distance //>> Debug code ///// - //for (int j = 0; j < buffer_size; ++j) { + // for (int j = 0; j < buffer_size; ++j) { // std::cout << "resset_id: " << resset[j].id << ", resset_dist: " << resset[j].distance << std::endl; //} ///// @@ -376,41 +393,47 @@ void NsgIndex::GetNeighbors(const float *query, resset[cursor].has_explored = true; node_t start_pos = resset[cursor].id; - auto &wait_for_search_node_vec = graph[start_pos]; + auto& wait_for_search_node_vec = graph[start_pos]; for (size_t i = 0; i < wait_for_search_node_vec.size(); ++i) { node_t id = wait_for_search_node_vec[i]; - if (has_calculated_dist[id]) continue; + if (has_calculated_dist[id]) + continue; has_calculated_dist[id] = true; - float - dist = calculate(query, ori_data_ + dimension * id, dimension); + float dist = calculate(query, ori_data_ + dimension * id, dimension); - if (dist >= resset[buffer_size - 1].distance) continue; + if (dist >= resset[buffer_size - 1].distance) + continue; ///////////// difference from other GetNeighbors /////////////// Neighbor nn(id, dist, false); /////////////////////////////////////// - size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node - if (pos < nearest_updated_pos) nearest_updated_pos = pos; + size_t pos = InsertIntoPool(resset.data(), buffer_size, nn); // replace with a closer node + if (pos < nearest_updated_pos) + nearest_updated_pos = pos; //>> Debug code ///// - //std::cout << "pos: " << pos << ", nn: " << nn.id << ":" << nn.distance << ", nup: " << nearest_updated_pos << std::endl; + // std::cout << "pos: " << pos << ", nn: " << nn.id << ":" << nn.distance << ", nup: " << + // nearest_updated_pos << std::endl; ///// - // trick: avoid search query search_length < init_ids.size() ... - if (buffer_size + 1 < resset.size()) ++buffer_size; + if (buffer_size + 1 < resset.size()) + ++buffer_size; } } if (cursor >= nearest_updated_pos) { - cursor = nearest_updated_pos; // re-search from new pos - } else ++cursor; + cursor = nearest_updated_pos; // re-search from new pos + } else { + ++cursor; + } } } } -void NsgIndex::Link() { +void +NsgIndex::Link() { auto cut_graph_dist = new float[ntotal * out_degree]; nsg.resize(ntotal); @@ -418,7 +441,7 @@ void NsgIndex::Link() { { std::vector fullset; std::vector temp; - boost::dynamic_bitset<> flags{ntotal, 0}; // TODO: ? + boost::dynamic_bitset<> flags{ntotal, 0}; // TODO: ? #pragma omp for schedule(dynamic, 100) for (size_t n = 0; n < ntotal; ++n) { fullset.clear(); @@ -427,8 +450,8 @@ void NsgIndex::Link() { //>> Debug code ///// - //float r1 = calculate(ori_data_ + n * dimension, ori_data_ + temp[0].id * dimension, dimension); - //assert(r1 == temp[0].distance); + // float r1 = calculate(ori_data_ + n * dimension, ori_data_ + temp[0].id * dimension, dimension); + // assert(r1 == temp[0].distance); ///// SyncPrune(n, fullset, flags, cut_graph_dist); } @@ -436,7 +459,7 @@ void NsgIndex::Link() { //>> Debug code ///// - //auto bak_nsg = nsg; + // auto bak_nsg = nsg; ///// knng.clear(); @@ -452,8 +475,8 @@ void NsgIndex::Link() { //>> Debug code ///// - //int count = 0; - //for (int i = 0; i < ntotal; ++i) { + // int count = 0; + // for (int i = 0; i < ntotal; ++i) { // if (bak_nsg[i].size() != nsg[i].size()) { // //count += nsg[i].size() - bak_nsg[i].size(); // count += nsg[i].size(); @@ -466,16 +489,15 @@ void NsgIndex::Link() { } } -void NsgIndex::SyncPrune(size_t n, - std::vector &pool, - boost::dynamic_bitset<> &has_calculated, - float *cut_graph_dist) { +void +NsgIndex::SyncPrune(size_t n, std::vector& pool, boost::dynamic_bitset<>& has_calculated, + float* cut_graph_dist) { // avoid lose nearest neighbor in knng for (size_t i = 0; i < knng[n].size(); ++i) { auto id = knng[n][i]; - if (has_calculated[id]) continue; - float dist = calculate(ori_data_ + dimension * n, - ori_data_ + dimension * id, dimension); + if (has_calculated[id]) + continue; + float dist = calculate(ori_data_ + dimension * n, ori_data_ + dimension * id, dimension); pool.emplace_back(Neighbor(id, dist, true)); } @@ -486,13 +508,13 @@ void NsgIndex::SyncPrune(size_t n, if (pool[cursor].id == static_cast(n)) { cursor++; } - result.push_back(pool[cursor]); // init result with nearest neighbor + result.push_back(pool[cursor]); // init result with nearest neighbor SelectEdge(cursor, pool, result, true); // filling the cut_graph - auto &des_id_pool = nsg[n]; - float *des_dist_pool = cut_graph_dist + n * out_degree; + auto& des_id_pool = nsg[n]; + float* des_dist_pool = cut_graph_dist + n * out_degree; for (size_t i = 0; i < result.size(); ++i) { des_id_pool.push_back(result[i].id); des_dist_pool[i] = result[i].distance; @@ -504,24 +526,27 @@ void NsgIndex::SyncPrune(size_t n, } //>> Optimize: remove read-lock -void NsgIndex::InterInsert(unsigned n, std::vector &mutex_vec, float *cut_graph_dist) { - auto ¤t = n; +void +NsgIndex::InterInsert(unsigned n, std::vector& mutex_vec, float* cut_graph_dist) { + auto& current = n; - auto &neighbor_id_pool = nsg[current]; - float *neighbor_dist_pool = cut_graph_dist + current * out_degree; + auto& neighbor_id_pool = nsg[current]; + float* neighbor_dist_pool = cut_graph_dist + current * out_degree; for (size_t i = 0; i < out_degree; ++i) { - if (neighbor_dist_pool[i] == -1) break; + if (neighbor_dist_pool[i] == -1) + break; - size_t current_neighbor = neighbor_id_pool[i]; // center's neighbor id - auto &nsn_id_pool = nsg[current_neighbor]; // nsn => neighbor's neighbor - float *nsn_dist_pool = cut_graph_dist + current_neighbor * out_degree; + size_t current_neighbor = neighbor_id_pool[i]; // center's neighbor id + auto& nsn_id_pool = nsg[current_neighbor]; // nsn => neighbor's neighbor + float* nsn_dist_pool = cut_graph_dist + current_neighbor * out_degree; - std::vector wait_for_link_pool; // maintain candidate neighbor of the current neighbor. + std::vector wait_for_link_pool; // maintain candidate neighbor of the current neighbor. int duplicate = false; { LockGuard lk(mutex_vec[current_neighbor]); for (size_t j = 0; j < out_degree; ++j) { - if (nsn_dist_pool[j] == -1) break; + if (nsn_dist_pool[j] == -1) + break; // 保证至少有一条边能连回来 if (n == nsn_id_pool[j]) { @@ -533,7 +558,8 @@ void NsgIndex::InterInsert(unsigned n, std::vector &mutex_vec, float wait_for_link_pool.push_back(nsn); } } - if (duplicate) continue; + if (duplicate) + continue; // original: (neighbor) <------- (current) // after: (neighbor) -------> (current) @@ -564,20 +590,18 @@ void NsgIndex::InterInsert(unsigned n, std::vector &mutex_vec, float if (nsn_dist_pool[j] == -1) { nsn_id_pool.push_back(current_as_neighbor.id); nsn_dist_pool[j] = current_as_neighbor.distance; - if (j + 1 < out_degree) nsn_dist_pool[j + 1] = -1; + if (j + 1 < out_degree) + nsn_dist_pool[j + 1] = -1; break; } } } - } } -void NsgIndex::SelectEdge(unsigned &cursor, - std::vector &sort_pool, - std::vector &result, - bool limit) { - auto &pool = sort_pool; +void +NsgIndex::SelectEdge(unsigned& cursor, std::vector& sort_pool, std::vector& result, bool limit) { + auto& pool = sort_pool; /* * edge selection @@ -587,22 +611,23 @@ void NsgIndex::SelectEdge(unsigned &cursor, */ size_t search_deepth = limit ? candidate_pool_size : pool.size(); while (result.size() < out_degree && cursor < search_deepth && (++cursor) < pool.size()) { - auto &p = pool[cursor]; + auto& p = pool[cursor]; bool should_link = true; for (size_t t = 0; t < result.size(); ++t) { - float dist = calculate(ori_data_ + dimension * result[t].id, - ori_data_ + dimension * p.id, dimension); + float dist = calculate(ori_data_ + dimension * result[t].id, ori_data_ + dimension * p.id, dimension); if (dist < p.distance) { should_link = false; break; } } - if (should_link) result.push_back(p); + if (should_link) + result.push_back(p); } } -void NsgIndex::CheckConnectivity() { +void +NsgIndex::CheckConnectivity() { auto root = navigation_point; boost::dynamic_bitset<> has_linked{ntotal, 0}; int64_t linked_count = 0; @@ -616,28 +641,29 @@ void NsgIndex::CheckConnectivity() { } } -void NsgIndex::DFS(size_t root, boost::dynamic_bitset<> &has_linked, int64_t &linked_count) { +void +NsgIndex::DFS(size_t root, boost::dynamic_bitset<>& has_linked, int64_t& linked_count) { size_t start = root; std::stack s; s.push(root); if (!has_linked[root]) { - linked_count++; // not link - has_linked[root] = true; // link start... + linked_count++; // not link + has_linked[root] = true; // link start... } while (!s.empty()) { size_t next = ntotal + 1; for (unsigned i = 0; i < nsg[start].size(); i++) { - if (has_linked[nsg[start][i]] == false) // if not link - { + if (has_linked[nsg[start][i]] == false) { // if not link next = nsg[start][i]; break; } } if (next == (ntotal + 1)) { s.pop(); - if (s.empty()) break; + if (s.empty()) + break; start = s.top(); continue; } @@ -648,17 +674,19 @@ void NsgIndex::DFS(size_t root, boost::dynamic_bitset<> &has_linked, int64_t &li } } -void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t &root) { +void +NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<>& has_linked, int64_t& root) { // find any of unlinked-node size_t id = ntotal; - for (size_t i = 0; i < ntotal; i++) { // find not link + for (size_t i = 0; i < ntotal; i++) { // find not link if (has_linked[i] == false) { id = i; break; } } - if (id == ntotal) return; // No Unlinked Node + if (id == ntotal) + return; // No Unlinked Node // search unlinked-node's neighbor std::vector tmp, pool; @@ -666,7 +694,7 @@ void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t std::sort(pool.begin(), pool.end()); size_t found = 0; - for (size_t i = 0; i < pool.size(); i++) { // find nearest neighbor and add unlinked-node as its neighbor + for (size_t i = 0; i < pool.size(); i++) { // find nearest neighbor and add unlinked-node as its neighbor if (has_linked[pool[i].id]) { root = pool[i].id; found = 1; @@ -674,8 +702,9 @@ void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t } } if (found == 0) { - while (true) { // random a linked-node and add unlinked-node as its neighbor - size_t rid = rand() % ntotal; + unsigned int seed = 100; + while (true) { // random a linked-node and add unlinked-node as its neighbor + size_t rid = rand_r(&seed) % ntotal; if (has_linked[rid]) { root = rid; break; @@ -685,22 +714,17 @@ void NsgIndex::FindUnconnectedNode(boost::dynamic_bitset<> &has_linked, int64_t nsg[root].push_back(id); } - -void NsgIndex::Search(const float *query, - const unsigned &nq, - const unsigned &dim, - const unsigned &k, - float *dist, - long *ids, - SearchParams ¶ms) { +void +NsgIndex::Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, + int64_t* ids, SearchParams& params) { std::vector> resset(nq); TimeRecorder rc("search"); if (nq == 1) { GetNeighbors(query, resset[0], nsg, ¶ms); - } else{ - //#pragma omp parallel for schedule(dynamic, 50) - #pragma omp parallel for + } else { +//#pragma omp parallel for schedule(dynamic, 50) +#pragma omp parallel for for (unsigned int i = 0; i < nq; ++i) { // TODO(linxj): when to use openmp auto single_query = query + i * dim; @@ -711,7 +735,7 @@ void NsgIndex::Search(const float *query, for (unsigned int i = 0; i < nq; ++i) { for (unsigned int j = 0; j < k; ++j) { - //ids[i * k + j] = resset[i][j].id; + // ids[i * k + j] = resset[i][j].id; // Fix(linxj): bug, reset[i][j] out of range ids[i * k + j] = ids_[resset[i][j].id]; @@ -720,27 +744,28 @@ void NsgIndex::Search(const float *query, } //>> Debug: test single insert - //int x_0 = resset[0].size(); - //for (int l = 0; l < resset[0].size(); ++l) { + // int x_0 = resset[0].size(); + // for (int l = 0; l < resset[0].size(); ++l) { // resset[0].pop_back(); //} - //resset.clear(); + // resset.clear(); - //ProfilerStart("xx.prof"); - //std::vector resset; - //GetNeighbors(query, resset, nsg, ¶ms); - //for (int i = 0; i < k; ++i) { + // ProfilerStart("xx.prof"); + // std::vector resset; + // GetNeighbors(query, resset, nsg, ¶ms); + // for (int i = 0; i < k; ++i) { // ids[i] = resset[i].id; - //dist[i] = resset[i].distance; + // dist[i] = resset[i].distance; //} - //ProfilerStop(); + // ProfilerStop(); } -void NsgIndex::SetKnnGraph(Graph &g) { +void +NsgIndex::SetKnnGraph(Graph& g) { knng = std::move(g); } -//void NsgIndex::GetKnnGraphFromFile() { +// void NsgIndex::GetKnnGraphFromFile() { // //std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift.1M.50NN.graph"; // std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift.50NN.graph"; // @@ -765,6 +790,6 @@ void NsgIndex::SetKnnGraph(Graph &g) { // in.close(); //} -} -} -} +} // namespace algo +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.h b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.h index 5291c17280..e075260398 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSG.h @@ -15,22 +15,19 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include -#include #include +#include #include #include "Neighbor.h" - namespace zilliz { namespace knowhere { namespace algo { - using node_t = int64_t; enum class MetricType { @@ -53,15 +50,15 @@ using Graph = std::vector>; class NsgIndex { public: size_t dimension; - size_t ntotal; // totabl nb of indexed vectors - MetricType metric_type; // L2 | IP + size_t ntotal; // totabl nb of indexed vectors + MetricType metric_type; // L2 | IP - float *ori_data_; - long *ids_; // TODO: support different type - Graph nsg; // final graph - Graph knng; // reset after build + float* ori_data_; + int64_t* ids_; // TODO: support different type + Graph nsg; // final graph + Graph knng; // reset after build - node_t navigation_point; // offset of node in origin data + node_t navigation_point; // offset of node in origin data bool is_trained = false; @@ -69,91 +66,81 @@ class NsgIndex { * build and search parameter */ size_t search_length; - size_t candidate_pool_size; // search deepth in fullset + size_t candidate_pool_size; // search deepth in fullset size_t out_degree; public: - explicit NsgIndex(const size_t &dimension, - const size_t &n, - MetricType metric = MetricType::METRIC_L2); + explicit NsgIndex(const size_t& dimension, const size_t& n, MetricType metric = MetricType::METRIC_L2); NsgIndex() = default; virtual ~NsgIndex(); - void SetKnnGraph(Graph &knng); + void + SetKnnGraph(Graph& knng); - virtual void Build_with_ids(size_t nb, - const float *data, - const long *ids, - const BuildParams ¶meters); + virtual void + Build_with_ids(size_t nb, const float* data, const int64_t* ids, const BuildParams& parameters); - void Search(const float *query, - const unsigned &nq, - const unsigned &dim, - const unsigned &k, - float *dist, - long *ids, - SearchParams ¶ms); + void + Search(const float* query, const unsigned& nq, const unsigned& dim, const unsigned& k, float* dist, int64_t* ids, + SearchParams& params); // Not support yet. - //virtual void Add() = 0; - //virtual void Add_with_ids() = 0; - //virtual void Delete() = 0; - //virtual void Delete_with_ids() = 0; - //virtual void Rebuild(size_t nb, + // virtual void Add() = 0; + // virtual void Add_with_ids() = 0; + // virtual void Delete() = 0; + // virtual void Delete_with_ids() = 0; + // virtual void Rebuild(size_t nb, // const float *data, - // const long *ids, + // const int64_t *ids, // const Parameters ¶meters) = 0; - //virtual void Build(size_t nb, + // virtual void Build(size_t nb, // const float *data, // const BuildParam ¶meters); protected: - virtual void InitNavigationPoint(); + virtual void + InitNavigationPoint(); // link specify - void GetNeighbors(const float *query, - std::vector &resset, - std::vector &fullset, - boost::dynamic_bitset<> &has_calculated_dist); + void + GetNeighbors(const float* query, std::vector& resset, std::vector& fullset, + boost::dynamic_bitset<>& has_calculated_dist); // FindUnconnectedNode - void GetNeighbors(const float *query, - std::vector &resset, - std::vector &fullset); + void + GetNeighbors(const float* query, std::vector& resset, std::vector& fullset); // search and navigation-point - void GetNeighbors(const float *query, - std::vector &resset, - Graph &graph, - SearchParams *param = nullptr); + void + GetNeighbors(const float* query, std::vector& resset, Graph& graph, SearchParams* param = nullptr); - void Link(); + void + Link(); - void SyncPrune(size_t q, - std::vector &pool, - boost::dynamic_bitset<> &has_calculated, - float *cut_graph_dist - ); + void + SyncPrune(size_t q, std::vector& pool, boost::dynamic_bitset<>& has_calculated, float* cut_graph_dist); - void SelectEdge(unsigned &cursor, - std::vector &sort_pool, - std::vector &result, - bool limit = false); + void + SelectEdge(unsigned& cursor, std::vector& sort_pool, std::vector& result, bool limit = false); - void InterInsert(unsigned n, std::vector &mutex_vec, float *dist); + void + InterInsert(unsigned n, std::vector& mutex_vec, float* dist); - void CheckConnectivity(); + void + CheckConnectivity(); - void DFS(size_t root, boost::dynamic_bitset<> &flags, int64_t &count); + void + DFS(size_t root, boost::dynamic_bitset<>& flags, int64_t& count); - void FindUnconnectedNode(boost::dynamic_bitset<> &flags, int64_t &root); + void + FindUnconnectedNode(boost::dynamic_bitset<>& flags, int64_t& root); - //private: - // void GetKnnGraphFromFile(); + // private: + // void GetKnnGraphFromFile(); }; -} -} -} +} // namespace algo +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.cpp index 77cf0e9464..1dd306bf77 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.cpp @@ -15,19 +15,18 @@ // specific language governing permissions and limitations // under the License. - #include #include -#include "NSGHelper.h" - +#include "knowhere/index/vector_index/nsg/NSGHelper.h" namespace zilliz { namespace knowhere { namespace algo { // TODO: impl search && insert && return insert pos. why not just find and swap? -int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) { +int +InsertIntoPool(Neighbor* addr, unsigned K, Neighbor nn) { //>> Fix: Add assert for (unsigned int i = 0; i < K; ++i) { assert(addr[i].id != nn.id); @@ -37,7 +36,7 @@ int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) { int left = 0, right = K - 1; if (addr[left].distance > nn.distance) { //>> Fix: memmove overflow, dump when vector deconstruct - memmove((char *) &addr[left + 1], &addr[left], (K - 1) * sizeof(Neighbor)); + memmove((char*)&addr[left + 1], &addr[left], (K - 1) * sizeof(Neighbor)); addr[left] = nn; return left; } @@ -52,10 +51,10 @@ int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) { else left = mid; } - //check equal ID + // check equal ID while (left > 0) { - if (addr[left].distance < nn.distance) // pos is right + if (addr[left].distance < nn.distance) // pos is right break; if (addr[left].id == nn.id) return K + 1; @@ -65,24 +64,25 @@ int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn) { return K + 1; //>> Fix: memmove overflow, dump when vector deconstruct - memmove((char *) &addr[right + 1], &addr[right], (K - 1 - right) * sizeof(Neighbor)); + memmove((char*)&addr[right + 1], &addr[right], (K - 1 - right) * sizeof(Neighbor)); addr[right] = nn; return right; } // TODO: support L2 / IP -float calculate(const float *a, const float *b, unsigned size) { +float +calculate(const float* a, const float* b, unsigned size) { float result = 0; #ifdef __GNUC__ #ifdef __AVX__ #define AVX_L2SQR(addr1, addr2, dest, tmp1, tmp2) \ - tmp1 = _mm256_loadu_ps(addr1);\ - tmp2 = _mm256_loadu_ps(addr2);\ - tmp1 = _mm256_sub_ps(tmp1, tmp2); \ - tmp1 = _mm256_mul_ps(tmp1, tmp1); \ - dest = _mm256_add_ps(dest, tmp1); + tmp1 = _mm256_loadu_ps(addr1); \ + tmp2 = _mm256_loadu_ps(addr2); \ + tmp1 = _mm256_sub_ps(tmp1, tmp2); \ + tmp1 = _mm256_mul_ps(tmp1, tmp1); \ + dest = _mm256_add_ps(dest, tmp1); __m256 sum; __m256 l0, l1; @@ -90,14 +90,16 @@ float calculate(const float *a, const float *b, unsigned size) { unsigned D = (size + 7) & ~7U; unsigned DR = D % 16; unsigned DD = D - DR; - const float *l = a; - const float *r = b; - const float *e_l = l + DD; - const float *e_r = r + DD; - float unpack[8] __attribute__ ((aligned (32))) = {0, 0, 0, 0, 0, 0, 0, 0}; + const float* l = a; + const float* r = b; + const float* e_l = l + DD; + const float* e_r = r + DD; + float unpack[8] __attribute__((aligned(32))) = {0, 0, 0, 0, 0, 0, 0, 0}; sum = _mm256_loadu_ps(unpack); - if (DR) { AVX_L2SQR(e_l, e_r, sum, l0, r0); } + if (DR) { + AVX_L2SQR(e_l, e_r, sum, l0, r0); + } for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) { AVX_L2SQR(l, r, sum, l0, r0); @@ -109,11 +111,11 @@ float calculate(const float *a, const float *b, unsigned size) { #else #ifdef __SSE2__ #define SSE_L2SQR(addr1, addr2, dest, tmp1, tmp2) \ - tmp1 = _mm_load_ps(addr1);\ - tmp2 = _mm_load_ps(addr2);\ - tmp1 = _mm_sub_ps(tmp1, tmp2); \ - tmp1 = _mm_mul_ps(tmp1, tmp1); \ - dest = _mm_add_ps(dest, tmp1); + tmp1 = _mm_load_ps(addr1); \ + tmp2 = _mm_load_ps(addr2); \ + tmp1 = _mm_sub_ps(tmp1, tmp2); \ + tmp1 = _mm_mul_ps(tmp1, tmp1); \ + dest = _mm_add_ps(dest, tmp1); __m128 sum; __m128 l0, l1, l2, l3; @@ -121,18 +123,22 @@ float calculate(const float *a, const float *b, unsigned size) { unsigned D = (size + 3) & ~3U; unsigned DR = D % 16; unsigned DD = D - DR; - const float *l = a; - const float *r = b; - const float *e_l = l + DD; - const float *e_r = r + DD; - float unpack[4] __attribute__ ((aligned (16))) = {0, 0, 0, 0}; + const float* l = a; + const float* r = b; + const float* e_l = l + DD; + const float* e_r = r + DD; + float unpack[4] __attribute__((aligned(16))) = {0, 0, 0, 0}; sum = _mm_load_ps(unpack); switch (DR) { - case 12:SSE_L2SQR(e_l + 8, e_r + 8, sum, l2, r2); - case 8:SSE_L2SQR(e_l + 4, e_r + 4, sum, l1, r1); - case 4:SSE_L2SQR(e_l, e_r, sum, l0, r0); - default:break; + case 12: + SSE_L2SQR(e_l + 8, e_r + 8, sum, l2, r2); + case 8: + SSE_L2SQR(e_l + 4, e_r + 4, sum, l1, r1); + case 4: + SSE_L2SQR(e_l, e_r, sum, l0, r0); + default: + break; } for (unsigned i = 0; i < DD; i += 16, l += 16, r += 16) { SSE_L2SQR(l, r, sum, l0, r0); @@ -143,28 +149,28 @@ float calculate(const float *a, const float *b, unsigned size) { _mm_storeu_ps(unpack, sum); result += unpack[0] + unpack[1] + unpack[2] + unpack[3]; -//nomal distance +// nomal distance #else float diff0, diff1, diff2, diff3; - const float* last = a + size; - const float* unroll_group = last - 3; + const float* last = a + size; + const float* unroll_group = last - 3; - /* Process 4 items with each loop for efficiency. */ - while (a < unroll_group) { - diff0 = a[0] - b[0]; - diff1 = a[1] - b[1]; - diff2 = a[2] - b[2]; - diff3 = a[3] - b[3]; - result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; - a += 4; - b += 4; - } - /* Process last 0-3 pixels. Not needed for standard vector lengths. */ - while (a < last) { - diff0 = *a++ - *b++; - result += diff0 * diff0; - } + /* Process 4 items with each loop for efficiency. */ + while (a < unroll_group) { + diff0 = a[0] - b[0]; + diff1 = a[1] - b[1]; + diff2 = a[2] - b[2]; + diff3 = a[3] - b[3]; + result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; + a += 4; + b += 4; + } + /* Process last 0-3 pixels. Not needed for standard vector lengths. */ + while (a < last) { + diff0 = *a++ - *b++; + result += diff0 * diff0; + } #endif #endif #endif @@ -172,7 +178,6 @@ float calculate(const float *a, const float *b, unsigned size) { return result; } - -} -} -} \ No newline at end of file +} // namespace algo +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.h b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.h index f5c13194c4..cb8007f2b1 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGHelper.h @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include @@ -26,14 +25,15 @@ #include "NSG.h" #include "knowhere/common/Config.h" - namespace zilliz { namespace knowhere { namespace algo { -extern int InsertIntoPool(Neighbor *addr, unsigned K, Neighbor nn); -extern float calculate(const float *a, const float *b, unsigned size); +extern int +InsertIntoPool(Neighbor* addr, unsigned K, Neighbor nn); +extern float +calculate(const float* a, const float* b, unsigned size); -} -} -} +} // namespace algo +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.cpp index bcdde8052c..db3113da2f 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.cpp +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.cpp @@ -15,31 +15,31 @@ // specific language governing permissions and limitations // under the License. - #include -#include "NSGIO.h" - +#include "knowhere/index/vector_index/nsg/NSGIO.h" namespace zilliz { namespace knowhere { namespace algo { -void write_index(NsgIndex *index, MemoryIOWriter &writer) { +void +write_index(NsgIndex* index, MemoryIOWriter& writer) { writer(&index->ntotal, sizeof(index->ntotal), 1); writer(&index->dimension, sizeof(index->dimension), 1); writer(&index->navigation_point, sizeof(index->navigation_point), 1); writer(index->ori_data_, sizeof(float) * index->ntotal * index->dimension, 1); - writer(index->ids_, sizeof(long) * index->ntotal, 1); + writer(index->ids_, sizeof(int64_t) * index->ntotal, 1); for (unsigned i = 0; i < index->ntotal; ++i) { - auto neighbor_num = (node_t) index->nsg[i].size(); + auto neighbor_num = (node_t)index->nsg[i].size(); writer(&neighbor_num, sizeof(node_t), 1); writer(index->nsg[i].data(), neighbor_num * sizeof(node_t), 1); } } -NsgIndex *read_index(MemoryIOReader &reader) { +NsgIndex* +read_index(MemoryIOReader& reader) { size_t ntotal; size_t dimension; reader(&ntotal, sizeof(size_t), 1); @@ -48,9 +48,9 @@ NsgIndex *read_index(MemoryIOReader &reader) { reader(&index->navigation_point, sizeof(index->navigation_point), 1); index->ori_data_ = new float[index->ntotal * index->dimension]; - index->ids_ = new long[index->ntotal]; + index->ids_ = new int64_t[index->ntotal]; reader(index->ori_data_, sizeof(float) * index->ntotal * index->dimension, 1); - reader(index->ids_, sizeof(long) * index->ntotal, 1); + reader(index->ids_, sizeof(int64_t) * index->ntotal, 1); index->nsg.reserve(index->ntotal); index->nsg.resize(index->ntotal); @@ -66,6 +66,6 @@ NsgIndex *read_index(MemoryIOReader &reader) { return index; } -} -} -} +} // namespace algo +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.h b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.h index 3d6786c6c2..fd740a991e 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/NSGIO.h @@ -15,21 +15,21 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include "knowhere/index/vector_index/helpers/FaissIO.h" #include "NSG.h" #include "knowhere/index/vector_index/IndexIVF.h" - +#include "knowhere/index/vector_index/helpers/FaissIO.h" namespace zilliz { namespace knowhere { namespace algo { -extern void write_index(NsgIndex* index, MemoryIOWriter& writer); -extern NsgIndex* read_index(MemoryIOReader& reader); +extern void +write_index(NsgIndex* index, MemoryIOWriter& writer); +extern NsgIndex* +read_index(MemoryIOReader& reader); -} -} -} +} // namespace algo +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/Neighbor.h b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/Neighbor.h index 9aceb62692..ee71a3f66e 100644 --- a/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/Neighbor.h +++ b/cpp/src/core/knowhere/knowhere/index/vector_index/nsg/Neighbor.h @@ -15,12 +15,10 @@ // specific language governing permissions and limitations // under the License. - #pragma once #include - namespace zilliz { namespace knowhere { namespace algo { @@ -29,21 +27,25 @@ using node_t = int64_t; // TODO: search use simple neighbor struct Neighbor { - node_t id; // offset of node in origin data + node_t id; // offset of node in origin data float distance; bool has_explored; Neighbor() = default; - explicit Neighbor(node_t id, float distance, bool f) : id{id}, distance{distance}, has_explored(f) {} - explicit Neighbor(node_t id, float distance) : id{id}, distance{distance}, has_explored(false) {} + explicit Neighbor(node_t id, float distance, bool f) : id{id}, distance{distance}, has_explored(f) { + } - inline bool operator<(const Neighbor &other) const { + explicit Neighbor(node_t id, float distance) : id{id}, distance{distance}, has_explored(false) { + } + + inline bool + operator<(const Neighbor& other) const { return distance < other.distance; } }; -//struct SimpleNeighbor { +// struct SimpleNeighbor { // node_t id; // offset of node in origin data // float distance; // @@ -57,7 +59,6 @@ struct Neighbor { typedef std::lock_guard LockGuard; - -} -} -} \ No newline at end of file +} // namespace algo +} // namespace knowhere +} // namespace zilliz diff --git a/cpp/src/core/test/CMakeLists.txt b/cpp/src/core/test/CMakeLists.txt index 5ad1c9e00e..ecb52695bc 100644 --- a/cpp/src/core/test/CMakeLists.txt +++ b/cpp/src/core/test/CMakeLists.txt @@ -1,6 +1,7 @@ include_directories(${CORE_SOURCE_DIR}/thirdparty) include_directories(${CORE_SOURCE_DIR}/thirdparty/SPTAG/AnnService) include_directories(${CORE_SOURCE_DIR}/knowhere) +include_directories(${CORE_SOURCE_DIR}) include_directories(/usr/local/cuda/include) link_directories(/usr/local/cuda/lib64) link_directories(${CORE_SOURCE_DIR}/thirdparty/tbb) diff --git a/cpp/src/core/test/SPTAG.cpp b/cpp/src/core/test/SPTAG.cpp index 3dfea0f088..62198d5495 100644 --- a/cpp/src/core/test/SPTAG.cpp +++ b/cpp/src/core/test/SPTAG.cpp @@ -1,36 +1,50 @@ - -#include -#include -#include -#include "SPTAG/AnnService/inc/Core/Common.h" -#include "SPTAG/AnnService/inc/Core/VectorIndex.h" - - -int -main(int argc, char *argv[]) { - using namespace SPTAG; - const int d = 128; - const int n = 100; - - auto p_data = new float[n * d]; - - auto index = VectorIndex::CreateInstance(IndexAlgoType::KDT, VectorValueType::Float); - - std::random_device rd; - std::mt19937 mt(rd()); - std::uniform_real_distribution dist(1.0, 2.0); - - for (auto i = 0; i < n; i++) { - for (auto j = 0; j < d; j++) { - p_data[i * d + j] = dist(mt) - 1; - } - } - std::cout << "generate random n * d finished."; - ByteArray data((uint8_t *) p_data, n * d * sizeof(float), true); - - auto vectorset = std::make_shared(data, VectorValueType::Float, d, n); - index->BuildIndex(vectorset, nullptr); - - std::cout << index->GetFeatureDim(); -} - +//// Licensed to the Apache Software Foundation (ASF) under one +//// or more contributor license agreements. See the NOTICE file +//// distributed with this work for additional information +//// regarding copyright ownership. The ASF licenses this file +//// to you under the Apache License, Version 2.0 (the +//// "License"); you may not use this file except in compliance +//// with the License. You may obtain a copy of the License at +//// +//// http://www.apache.org/licenses/LICENSE-2.0 +//// +//// Unless required by applicable law or agreed to in writing, +//// software distributed under the License is distributed on an +//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +//// KIND, either express or implied. See the License for the +//// specific language governing permissions and limitations +//// under the License. +// +//#include +//#include +//#include +//#include +//#include +// +// int +// main(int argc, char* argv[]) { +// using namespace SPTAG; +// const int d = 128; +// const int n = 100; +// +// auto p_data = new float[n * d]; +// +// auto index = VectorIndex::CreateInstance(IndexAlgoType::KDT, VectorValueType::Float); +// +// std::random_device rd; +// std::mt19937 mt(rd()); +// std::uniform_real_distribution dist(1.0, 2.0); +// +// for (auto i = 0; i < n; i++) { +// for (auto j = 0; j < d; j++) { +// p_data[i * d + j] = dist(mt) - 1; +// } +// } +// std::cout << "generate random n * d finished."; +// ByteArray data((uint8_t*)p_data, n * d * sizeof(float), true); +// +// auto vectorset = std::make_shared(data, VectorValueType::Float, d, n); +// index->BuildIndex(vectorset, nullptr); +// +// std::cout << index->GetFeatureDim(); +//} diff --git a/cpp/src/core/test/faiss_ori/gpuresource_test.cpp b/cpp/src/core/test/faiss_ori/gpuresource_test.cpp index 0dcc2766cc..90383b944c 100644 --- a/cpp/src/core/test/faiss_ori/gpuresource_test.cpp +++ b/cpp/src/core/test/faiss_ori/gpuresource_test.cpp @@ -17,46 +17,45 @@ #include -#include -#include -#include #include +#include #include +#include +#include #include -#include #include #include - -using namespace std::chrono_literals; +#include class TestGpuRes { public: TestGpuRes() { res_ = new faiss::gpu::StandardGpuResources; } + ~TestGpuRes() { delete res_; delete index_; } - std::shared_ptr Do() { - int d = 128; // dimension - int nb = 100000; // database size - int nq = 100; // nb of queries + + std::shared_ptr + Do() { + int d = 128; // dimension + int nb = 100000; // database size + int nq = 100; // nb of queries int nlist = 1638; - float *xb = new float[d * nb]; - float *xq = new float[d * nq]; + float* xb = new float[d * nb]; + float* xq = new float[d * nq]; for (int i = 0; i < nb; i++) { - for (int j = 0; j < d; j++) - xb[d * i + j] = drand48(); + for (int j = 0; j < d; j++) xb[d * i + j] = drand48(); xb[d * i] += i / 1000.; } for (int i = 0; i < nq; i++) { - for (int j = 0; j < d; j++) - xq[d * i + j] = drand48(); + for (int j = 0; j < d; j++) xq[d * i + j] = drand48(); xq[d * i] += i / 1000.; } @@ -68,9 +67,10 @@ class TestGpuRes { host_index.reset(faiss::gpu::index_gpu_to_cpu(index_)); return host_index; } + private: - faiss::gpu::GpuResources *res_ = nullptr; - faiss::Index *index_ = nullptr; + faiss::gpu::GpuResources* res_ = nullptr; + faiss::Index* index_ = nullptr; }; TEST(gpuresource, resource) { @@ -79,30 +79,28 @@ TEST(gpuresource, resource) { } TEST(test, resource_re) { - int d = 128; // dimension - int nb = 1000000; // database size - int nq = 100; // nb of queries + int d = 128; // dimension + int nb = 1000000; // database size + int nq = 100; // nb of queries int nlist = 16384; int k = 100; - float *xb = new float[d * nb]; - float *xq = new float[d * nq]; + float* xb = new float[d * nb]; + float* xq = new float[d * nq]; for (int i = 0; i < nb; i++) { - for (int j = 0; j < d; j++) - xb[d * i + j] = drand48(); + for (int j = 0; j < d; j++) xb[d * i + j] = drand48(); xb[d * i] += i / 1000.; } for (int i = 0; i < nq; i++) { - for (int j = 0; j < d; j++) - xq[d * i + j] = drand48(); + for (int j = 0; j < d; j++) xq[d * i + j] = drand48(); xq[d * i] += i / 1000.; } auto elems = nq * k; - auto res_ids = (int64_t *) malloc(sizeof(int64_t) * elems); - auto res_dis = (float *) malloc(sizeof(float) * elems); + auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); + auto res_dis = (float*)malloc(sizeof(float) * elems); faiss::gpu::StandardGpuResources res; auto cpu_index = faiss::index_factory(d, "IVF16384, Flat"); @@ -117,7 +115,7 @@ TEST(test, resource_re) { auto load = [&] { std::cout << "start" << std::endl; faiss::gpu::StandardGpuResources res; - //res.noTempMemory(); + // res.noTempMemory(); for (int l = 0; l < 100; ++l) { auto x = faiss::gpu::index_cpu_to_gpu(&res, 1, new_index); delete x; @@ -126,42 +124,42 @@ TEST(test, resource_re) { }; auto search = [&] { - faiss::gpu::StandardGpuResources res; - auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 1, new_index); - std::cout << "search start" << std::endl; - for (int l = 0; l < 10000; ++l) { - device_index->search(nq,xq,10, res_dis, res_ids); - } - std::cout << "search finish" << std::endl; - delete device_index; - delete cpu_index; + faiss::gpu::StandardGpuResources res; + auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 1, new_index); + std::cout << "search start" << std::endl; + for (int l = 0; l < 10000; ++l) { + device_index->search(nq, xq, 10, res_dis, res_ids); + } + std::cout << "search finish" << std::endl; + delete device_index; + delete cpu_index; }; load(); search(); std::thread t1(search); - std::this_thread::sleep_for(1s); + std::this_thread::sleep_for(std::chrono::seconds(1)); std::thread t2(load); t1.join(); t2.join(); std::cout << "finish clone" << std::endl; - //std::this_thread::sleep_for(5s); + // std::this_thread::sleep_for(5s); // - //auto device_index_2 = faiss::gpu::index_cpu_to_gpu(&res, 1, cpu_index); - //device_index->train(nb, xb); - //device_index->add(nb, xb); + // auto device_index_2 = faiss::gpu::index_cpu_to_gpu(&res, 1, cpu_index); + // device_index->train(nb, xb); + // device_index->add(nb, xb); - //std::cout << "finish clone" << std::endl; - //std::this_thread::sleep_for(5s); + // std::cout << "finish clone" << std::endl; + // std::this_thread::sleep_for(5s); - //std::this_thread::sleep_for(2s); - //std::cout << "start clone" << std::endl; - //auto new_index = faiss::clone_index(device_index); - //std::cout << "start search" << std::endl; - //new_index->search(nq, xq, k, res_dis, res_ids); + // std::this_thread::sleep_for(2s); + // std::cout << "start clone" << std::endl; + // auto new_index = faiss::clone_index(device_index); + // std::cout << "start search" << std::endl; + // new_index->search(nq, xq, k, res_dis, res_ids); - //std::cout << "start clone" << std::endl; + // std::cout << "start clone" << std::endl; //{ // faiss::gpu::StandardGpuResources res; // auto cpu_index = faiss::index_factory(d, "IVF1638, Flat"); @@ -174,5 +172,5 @@ TEST(test, resource_re) { // std::cout << "finish clone" << std::endl; //} // - //std::cout << "finish clone" << std::endl; + // std::cout << "finish clone" << std::endl; } diff --git a/cpp/src/core/test/kdtree.cpp b/cpp/src/core/test/kdtree.cpp index b3de8f345c..351a177d4b 100644 --- a/cpp/src/core/test/kdtree.cpp +++ b/cpp/src/core/test/kdtree.cpp @@ -1,134 +1,149 @@ - -#include -#include -#include "knowhere/index/vector_index/cpu_kdt_rng.h" -#include "knowhere/index/vector_index/definitions.h" -#include "knowhere/adapter/sptag.h" -#include "knowhere/adapter/structure.h" - - -using namespace zilliz::knowhere; - -DatasetPtr -generate_dataset(int64_t n, int64_t d, int64_t base) { - auto elems = n * d; - auto p_data = (float *) malloc(elems * sizeof(float)); - auto p_id = (int64_t *) malloc(elems * sizeof(int64_t)); - assert(p_data != nullptr && p_id != nullptr); - - for (auto i = 0; i < n; ++i) { - for (auto j = 0; j < d; ++j) { - p_data[i * d + j] = float(base + i); - } - p_id[i] = i; - } - - std::vector shape{n, d}; - auto tensor = ConstructFloatTensorSmart((uint8_t *) p_data, elems * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); - - auto id_array = ConstructInt64ArraySmart((uint8_t *) p_id, n * sizeof(int64_t)); - std::vector arrays{id_array}; - std::vector array_fields{ConstructInt64Field("id")}; - auto array_schema = std::make_shared(tensor_fields); - - auto dataset = std::make_shared(std::move(arrays), array_schema, - std::move(tensors), tensor_schema); - - return dataset; -} - -DatasetPtr -generate_queries(int64_t n, int64_t d, int64_t k, int64_t base) { - size_t size = sizeof(float) * n * d; - auto v = (float *) malloc(size); - // TODO(lxj): check malloc - for (auto i = 0; i < n; ++i) { - for (auto j = 0; j < d; ++j) { - v[i * d + j] = float(base + i); - } - } - - std::vector data; - auto buffer = MakeMutableBufferSmart((uint8_t *) v, size); - std::vector shape{n, d}; - auto float_type = std::make_shared(); - auto tensor = std::make_shared(float_type, buffer, shape); - data.push_back(tensor); - - Config meta; - meta[META_ROWS] = int64_t (n); - meta[META_DIM] = int64_t (d); - meta[META_K] = int64_t (k); - - auto type = std::make_shared(); - auto field = std::make_shared("data", type); - std::vector fields{field}; - auto schema = std::make_shared(fields); - - return std::make_shared(data, schema); -} - - -int -main(int argc, char *argv[]) { - auto kdt_index = std::make_shared(); - - const auto d = 10; - const auto k = 3; - const auto nquery = 10; - - // ID [0, 99] - auto train = generate_dataset(100, d, 0); - // ID [100] - auto base = generate_dataset(1, d, 0); - auto queries = generate_queries(nquery, d, k, 0); - - // Build Preprocessor - auto preprocessor = kdt_index->BuildPreprocessor(train, Config()); - - // Set Preprocessor - kdt_index->set_preprocessor(preprocessor); - - Config train_config; - train_config["TPTNumber"] = "64"; - // Train - kdt_index->Train(train, train_config); - - // Add - kdt_index->Add(base, Config()); - - auto binary = kdt_index->Serialize(); - auto new_index = std::make_shared(); - new_index->Load(binary); -// auto new_index = kdt_index; - - Config search_config; - search_config[META_K] = int64_t (k); - - // Search - auto result = new_index->Search(queries, search_config); - - // Print Result - { - auto ids = result->array()[0]; - auto dists = result->array()[1]; - - std::stringstream ss_id; - std::stringstream ss_dist; - for (auto i = 0; i < nquery; i++) { - for (auto j = 0; j < k; ++j) { - ss_id << *ids->data()->GetValues(1, i * k + j) << " "; - ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; - } - ss_id << std::endl; - ss_dist << std::endl; - } - std::cout << "id\n" << ss_id.str() << std::endl; - std::cout << "dist\n" << ss_dist.str() << std::endl; - } -} - - +//// Licensed to the Apache Software Foundation (ASF) under one +//// or more contributor license agreements. See the NOTICE file +//// distributed with this work for additional information +//// regarding copyright ownership. The ASF licenses this file +//// to you under the Apache License, Version 2.0 (the +//// "License"); you may not use this file except in compliance +//// with the License. You may obtain a copy of the License at +//// +//// http://www.apache.org/licenses/LICENSE-2.0 +//// +//// Unless required by applicable law or agreed to in writing, +//// software distributed under the License is distributed on an +//// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +//// KIND, either express or implied. See the License for the +//// specific language governing permissions and limitations +//// under the License. +// +//#include +//#include +//#include "knowhere/adapter/sptag.h" +//#include "knowhere/adapter/structure.h" +//#include "knowhere/index/vector_index/cpu_kdt_rng.h" +//#include "knowhere/index/vector_index/definitions.h" +// +// namespace { +// +// namespace kn = zilliz::knowhere; +// +//} // namespace +// +// kn::DatasetPtr +// generate_dataset(int64_t n, int64_t d, int64_t base) { +// auto elems = n * d; +// auto p_data = (float*)malloc(elems * sizeof(float)); +// auto p_id = (int64_t*)malloc(elems * sizeof(int64_t)); +// assert(p_data != nullptr && p_id != nullptr); +// +// for (auto i = 0; i < n; ++i) { +// for (auto j = 0; j < d; ++j) { +// p_data[i * d + j] = float(base + i); +// } +// p_id[i] = i; +// } +// +// std::vector shape{n, d}; +// auto tensor = ConstructFloatTensorSmart((uint8_t*)p_data, elems * sizeof(float), shape); +// std::vector tensors{tensor}; +// std::vector tensor_fields{ConstructFloatField("data")}; +// auto tensor_schema = std::make_shared(tensor_fields); +// +// auto id_array = ConstructInt64ArraySmart((uint8_t*)p_id, n * sizeof(int64_t)); +// std::vector arrays{id_array}; +// std::vector array_fields{ConstructInt64Field("id")}; +// auto array_schema = std::make_shared(tensor_fields); +// +// auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema); +// +// return dataset; +//} +// +// kn::DatasetPtr +// generate_queries(int64_t n, int64_t d, int64_t k, int64_t base) { +// size_t size = sizeof(float) * n * d; +// auto v = (float*)malloc(size); +// // TODO(lxj): check malloc +// for (auto i = 0; i < n; ++i) { +// for (auto j = 0; j < d; ++j) { +// v[i * d + j] = float(base + i); +// } +// } +// +// std::vector data; +// auto buffer = MakeMutableBufferSmart((uint8_t*)v, size); +// std::vector shape{n, d}; +// auto float_type = std::make_shared(); +// auto tensor = std::make_shared(float_type, buffer, shape); +// data.push_back(tensor); +// +// Config meta; +// meta[META_ROWS] = int64_t(n); +// meta[META_DIM] = int64_t(d); +// meta[META_K] = int64_t(k); +// +// auto type = std::make_shared(); +// auto field = std::make_shared("data", type); +// std::vector fields{field}; +// auto schema = std::make_shared(fields); +// +// return std::make_shared(data, schema); +//} +// +// int +// main(int argc, char* argv[]) { +// auto kdt_index = std::make_shared(); +// +// const auto d = 10; +// const auto k = 3; +// const auto nquery = 10; +// +// // ID [0, 99] +// auto train = generate_dataset(100, d, 0); +// // ID [100] +// auto base = generate_dataset(1, d, 0); +// auto queries = generate_queries(nquery, d, k, 0); +// +// // Build Preprocessor +// auto preprocessor = kdt_index->BuildPreprocessor(train, Config()); +// +// // Set Preprocessor +// kdt_index->set_preprocessor(preprocessor); +// +// Config train_config; +// train_config["TPTNumber"] = "64"; +// // Train +// kdt_index->Train(train, train_config); +// +// // Add +// kdt_index->Add(base, Config()); +// +// auto binary = kdt_index->Serialize(); +// auto new_index = std::make_shared(); +// new_index->Load(binary); +// // auto new_index = kdt_index; +// +// Config search_config; +// search_config[META_K] = int64_t(k); +// +// // Search +// auto result = new_index->Search(queries, search_config); +// +// // Print Result +// { +// auto ids = result->array()[0]; +// auto dists = result->array()[1]; +// +// std::stringstream ss_id; +// std::stringstream ss_dist; +// for (auto i = 0; i < nquery; i++) { +// for (auto j = 0; j < k; ++j) { +// ss_id << *ids->data()->GetValues(1, i * k + j) << " "; +// ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; +// } +// ss_id << std::endl; +// ss_dist << std::endl; +// } +// std::cout << "id\n" << ss_id.str() << std::endl; +// std::cout << "dist\n" << ss_dist.str() << std::endl; +// } +//} diff --git a/cpp/src/core/test/test_idmap.cpp b/cpp/src/core/test/test_idmap.cpp index 3d71bca931..8bf0d5c8d2 100644 --- a/cpp/src/core/test/test_idmap.cpp +++ b/cpp/src/core/test/test_idmap.cpp @@ -15,51 +15,51 @@ // specific language governing permissions and limitations // under the License. - #include - #include -#include "knowhere/index/vector_index/IndexIDMAP.h" #include "knowhere/adapter/Structure.h" -#include "knowhere/index/vector_index/helpers/Cloner.h" #include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexIDMAP.h" +#include "knowhere/index/vector_index/helpers/Cloner.h" -#include "utils.h" +#include "test/utils.h" +namespace { -using namespace zilliz::knowhere; -using namespace zilliz::knowhere::cloner; +namespace kn = zilliz::knowhere; + +} // namespace static int device_id = 0; class IDMAPTest : public DataGen, public ::testing::Test { protected: - void SetUp() override { - FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*300, 2); + void + SetUp() override { + kn::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 300, 2); Init_with_default(); - index_ = std::make_shared(); + index_ = std::make_shared(); } - void TearDown() override { - FaissGpuResourceMgr::GetInstance().Free(); + void + TearDown() override { + kn::FaissGpuResourceMgr::GetInstance().Free(); } protected: - IDMAPPtr index_ = nullptr; + kn::IDMAPPtr index_ = nullptr; }; -void AssertAnns(const DatasetPtr &result, - const int &nq, - const int &k) { +void +AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) { auto ids = result->array()[0]; for (auto i = 0; i < nq; i++) { EXPECT_EQ(i, *(ids->data()->GetValues(1, i * k))); } } -void PrintResult(const DatasetPtr &result, - const int &nq, - const int &k) { +void +PrintResult(const kn::DatasetPtr& result, const int& nq, const int& k) { auto ids = result->array()[0]; auto dists = result->array()[1]; @@ -80,10 +80,10 @@ void PrintResult(const DatasetPtr &result, TEST_F(IDMAPTest, idmap_basic) { ASSERT_TRUE(!xb.empty()); - auto conf = std::make_shared(); + auto conf = std::make_shared(); conf->d = dim; conf->k = k; - conf->metric_type = METRICTYPE::L2; + conf->metric_type = kn::METRICTYPE::L2; index_->Train(conf); index_->Add(base_dataset, conf); @@ -97,7 +97,7 @@ TEST_F(IDMAPTest, idmap_basic) { index_->Seal(); auto binaryset = index_->Serialize(); - auto new_index = std::make_shared(); + auto new_index = std::make_shared(); new_index->Load(binaryset); auto re_result = index_->Search(query_dataset, conf); AssertAnns(re_result, nq, k); @@ -105,23 +105,23 @@ TEST_F(IDMAPTest, idmap_basic) { } TEST_F(IDMAPTest, idmap_serialize) { - auto serialize = [](const std::string &filename, BinaryPtr &bin, uint8_t *ret) { + auto serialize = [](const std::string& filename, kn::BinaryPtr& bin, uint8_t* ret) { FileIOWriter writer(filename); - writer(static_cast(bin->data.get()), bin->size); + writer(static_cast(bin->data.get()), bin->size); FileIOReader reader(filename); reader(ret, bin->size); }; - auto conf = std::make_shared(); + auto conf = std::make_shared(); conf->d = dim; conf->k = k; - conf->metric_type = METRICTYPE::L2; + conf->metric_type = kn::METRICTYPE::L2; { // serialize index index_->Train(conf); - index_->Add(base_dataset, Config()); + index_->Add(base_dataset, kn::Config()); auto re_result = index_->Search(query_dataset, conf); AssertAnns(re_result, nq, k); PrintResult(re_result, nq, k); @@ -151,10 +151,10 @@ TEST_F(IDMAPTest, idmap_serialize) { TEST_F(IDMAPTest, copy_test) { ASSERT_TRUE(!xb.empty()); - auto conf = std::make_shared(); + auto conf = std::make_shared(); conf->d = dim; conf->k = k; - conf->metric_type = METRICTYPE::L2; + conf->metric_type = kn::METRICTYPE::L2; index_->Train(conf); index_->Add(base_dataset, conf); @@ -164,7 +164,7 @@ TEST_F(IDMAPTest, copy_test) { ASSERT_TRUE(index_->GetRawIds() != nullptr); auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, k); - //PrintResult(result, nq, k); + // PrintResult(result, nq, k); { // clone @@ -175,12 +175,12 @@ TEST_F(IDMAPTest, copy_test) { { // cpu to gpu - auto clone_index = CopyCpuToGpu(index_, device_id, conf); + auto clone_index = kn::cloner::CopyCpuToGpu(index_, device_id, conf); auto clone_result = clone_index->Search(query_dataset, conf); AssertAnns(clone_result, nq, k); - ASSERT_THROW({ std::static_pointer_cast(clone_index)->GetRawVectors(); }, + ASSERT_THROW({ std::static_pointer_cast(clone_index)->GetRawVectors(); }, zilliz::knowhere::KnowhereException); - ASSERT_THROW({ std::static_pointer_cast(clone_index)->GetRawIds(); }, + ASSERT_THROW({ std::static_pointer_cast(clone_index)->GetRawIds(); }, zilliz::knowhere::KnowhereException); auto binary = clone_index->Serialize(); @@ -193,15 +193,15 @@ TEST_F(IDMAPTest, copy_test) { AssertAnns(clone_gpu_res, nq, k); // gpu to cpu - auto host_index = CopyGpuToCpu(clone_index, conf); + auto host_index = kn::cloner::CopyGpuToCpu(clone_index, conf); auto host_result = host_index->Search(query_dataset, conf); AssertAnns(host_result, nq, k); - ASSERT_TRUE(std::static_pointer_cast(host_index)->GetRawVectors() != nullptr); - ASSERT_TRUE(std::static_pointer_cast(host_index)->GetRawIds() != nullptr); + ASSERT_TRUE(std::static_pointer_cast(host_index)->GetRawVectors() != nullptr); + ASSERT_TRUE(std::static_pointer_cast(host_index)->GetRawIds() != nullptr); // gpu to gpu - auto device_index = CopyCpuToGpu(index_, device_id, conf); - auto new_device_index = std::static_pointer_cast(device_index)->CopyGpuToGpu(device_id, conf); + auto device_index = kn::cloner::CopyCpuToGpu(index_, device_id, conf); + auto new_device_index = std::static_pointer_cast(device_index)->CopyGpuToGpu(device_id, conf); auto device_result = new_device_index->Search(query_dataset, conf); AssertAnns(device_result, nq, k); } diff --git a/cpp/src/core/test/test_ivf.cpp b/cpp/src/core/test/test_ivf.cpp index 7196fe15e4..9835264bff 100644 --- a/cpp/src/core/test/test_ivf.cpp +++ b/cpp/src/core/test/test_ivf.cpp @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. - #include #include @@ -25,21 +24,24 @@ #include #include +#include "knowhere/adapter/Structure.h" #include "knowhere/common/Exception.h" #include "knowhere/common/Timer.h" -#include "knowhere/adapter/Structure.h" -#include "knowhere/index/vector_index/helpers/Cloner.h" -#include "knowhere/index/vector_index/IndexIVF.h" #include "knowhere/index/vector_index/IndexGPUIVF.h" -#include "knowhere/index/vector_index/IndexIVFPQ.h" #include "knowhere/index/vector_index/IndexGPUIVFPQ.h" -#include "knowhere/index/vector_index/IndexIVFSQ.h" #include "knowhere/index/vector_index/IndexGPUIVFSQ.h" +#include "knowhere/index/vector_index/IndexIVF.h" +#include "knowhere/index/vector_index/IndexIVFPQ.h" +#include "knowhere/index/vector_index/IndexIVFSQ.h" +#include "knowhere/index/vector_index/helpers/Cloner.h" -#include "utils.h" +#include "test/utils.h" -using namespace zilliz::knowhere; -using namespace zilliz::knowhere::cloner; +namespace { + +namespace kn = zilliz::knowhere; + +} // namespace using ::testing::TestWithParam; using ::testing::Values; @@ -47,23 +49,24 @@ using ::testing::Combine; constexpr int device_id = 0; constexpr int64_t DIM = 128; -constexpr int64_t NB = 1000000/100; +constexpr int64_t NB = 1000000 / 100; constexpr int64_t NQ = 10; constexpr int64_t K = 10; -IVFIndexPtr IndexFactory(const std::string &type) { +kn::IVFIndexPtr +IndexFactory(const std::string& type) { if (type == "IVF") { - return std::make_shared(); + return std::make_shared(); } else if (type == "IVFPQ") { - return std::make_shared(); + return std::make_shared(); } else if (type == "GPUIVF") { - return std::make_shared(device_id); + return std::make_shared(device_id); } else if (type == "GPUIVFPQ") { - return std::make_shared(device_id); + return std::make_shared(device_id); } else if (type == "IVFSQ") { - return std::make_shared(); + return std::make_shared(); } else if (type == "GPUIVFSQ") { - return std::make_shared(device_id); + return std::make_shared(device_id); } } @@ -76,24 +79,25 @@ enum class ParameterType { class ParamGenerator { public: - static ParamGenerator& GetInstance(){ + static ParamGenerator& + GetInstance() { static ParamGenerator instance; return instance; } - Config Gen(const ParameterType& type){ + kn::Config + Gen(const ParameterType& type) { if (type == ParameterType::ivf) { - auto tempconf = std::make_shared(); + auto tempconf = std::make_shared(); tempconf->d = DIM; tempconf->gpu_id = device_id; tempconf->nlist = 100; tempconf->nprobe = 16; tempconf->k = K; - tempconf->metric_type = METRICTYPE::L2; + tempconf->metric_type = kn::METRICTYPE::L2; return tempconf; - } - else if (type == ParameterType::ivfpq) { - auto tempconf = std::make_shared(); + } else if (type == ParameterType::ivfpq) { + auto tempconf = std::make_shared(); tempconf->d = DIM; tempconf->gpu_id = device_id; tempconf->nlist = 100; @@ -101,70 +105,64 @@ class ParamGenerator { tempconf->k = K; tempconf->m = 8; tempconf->nbits = 8; - tempconf->metric_type = METRICTYPE::L2; + tempconf->metric_type = kn::METRICTYPE::L2; return tempconf; - } - else if (type == ParameterType::ivfsq) { - auto tempconf = std::make_shared(); + } else if (type == ParameterType::ivfsq) { + auto tempconf = std::make_shared(); tempconf->d = DIM; tempconf->gpu_id = device_id; tempconf->nlist = 100; tempconf->nprobe = 16; tempconf->k = K; tempconf->nbits = 8; - tempconf->metric_type = METRICTYPE::L2; + tempconf->metric_type = kn::METRICTYPE::L2; return tempconf; } } }; -class IVFTest - : public DataGen, public TestWithParam<::std::tuple> { +class IVFTest : public DataGen, public TestWithParam<::std::tuple> { protected: - void SetUp() override { + void + SetUp() override { ParameterType parameter_type; std::tie(index_type, parameter_type) = GetParam(); - //Init_with_default(); + // Init_with_default(); Generate(DIM, NB, NQ); index_ = IndexFactory(index_type); conf = ParamGenerator::GetInstance().Gen(parameter_type); - FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*600, 2); + kn::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 600, 2); } - void TearDown() override { - FaissGpuResourceMgr::GetInstance().Free(); + + void + TearDown() override { + kn::FaissGpuResourceMgr::GetInstance().Free(); } protected: std::string index_type; - Config conf; - IVFIndexPtr index_ = nullptr; + kn::Config conf; + kn::IVFIndexPtr index_ = nullptr; }; - - INSTANTIATE_TEST_CASE_P(IVFParameters, IVFTest, - Values( - std::make_tuple("IVF", ParameterType::ivf), - std::make_tuple("GPUIVF", ParameterType::ivf), -// std::make_tuple("IVFPQ", ParameterType::ivfpq), -// std::make_tuple("GPUIVFPQ", ParameterType::ivfpq), - std::make_tuple("IVFSQ", ParameterType::ivfsq), - std::make_tuple("GPUIVFSQ", ParameterType::ivfsq) - ) -); + Values(std::make_tuple("IVF", ParameterType::ivf), + std::make_tuple("GPUIVF", ParameterType::ivf), + // std::make_tuple("IVFPQ", ParameterType::ivfpq), + // std::make_tuple("GPUIVFPQ", ParameterType::ivfpq), + std::make_tuple("IVFSQ", ParameterType::ivfsq), + std::make_tuple("GPUIVFSQ", ParameterType::ivfsq))); -void AssertAnns(const DatasetPtr &result, - const int &nq, - const int &k) { +void +AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) { auto ids = result->array()[0]; for (auto i = 0; i < nq; i++) { EXPECT_EQ(i, *(ids->data()->GetValues(1, i * k))); } } -void PrintResult(const DatasetPtr &result, - const int &nq, - const int &k) { +void +PrintResult(const kn::DatasetPtr& result, const int& nq, const int& k) { auto ids = result->array()[0]; auto dists = result->array()[1]; @@ -195,10 +193,10 @@ TEST_P(IVFTest, ivf_basic) { EXPECT_EQ(index_->Dimension(), dim); auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); - //PrintResult(result, nq, k); + // PrintResult(result, nq, k); } -//TEST_P(IVFTest, gpu_to_cpu) { +// TEST_P(IVFTest, gpu_to_cpu) { // if (index_type.find("GPU") == std::string::npos) { return; } // // // else @@ -223,9 +221,9 @@ TEST_P(IVFTest, ivf_basic) { //} TEST_P(IVFTest, ivf_serialize) { - auto serialize = [](const std::string &filename, BinaryPtr &bin, uint8_t *ret) { + auto serialize = [](const std::string& filename, kn::BinaryPtr& bin, uint8_t* ret) { FileIOWriter writer(filename); - writer(static_cast(bin->data.get()), bin->size); + writer(static_cast(bin->data.get()), bin->size); FileIOReader reader(filename); reader(ret, bin->size); @@ -292,15 +290,14 @@ TEST_P(IVFTest, clone_test) { EXPECT_EQ(index_->Dimension(), dim); auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); - //PrintResult(result, nq, k); + // PrintResult(result, nq, k); - auto AssertEqual = [&] (DatasetPtr p1, DatasetPtr p2) { + auto AssertEqual = [&](kn::DatasetPtr p1, kn::DatasetPtr p2) { auto ids_p1 = p1->array()[0]; auto ids_p2 = p2->array()[0]; for (int i = 0; i < nq * k; ++i) { - EXPECT_EQ(*(ids_p2->data()->GetValues(1, i)), - *(ids_p1->data()->GetValues(1, i))); + EXPECT_EQ(*(ids_p2->data()->GetValues(1, i)), *(ids_p1->data()->GetValues(1, i))); } }; @@ -310,17 +307,19 @@ TEST_P(IVFTest, clone_test) { auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type); if (finder != support_idx_vec.cend()) { EXPECT_NO_THROW({ - auto clone_index = index_->Clone(); - auto clone_result = clone_index->Search(query_dataset, conf); - //AssertAnns(result, nq, conf->k); - AssertEqual(result, clone_result); - std::cout << "inplace clone [" << index_type << "] success" << std::endl; - }); + auto clone_index = index_->Clone(); + auto clone_result = clone_index->Search(query_dataset, conf); + // AssertAnns(result, nq, conf->k); + AssertEqual(result, clone_result); + std::cout << "inplace clone [" << index_type << "] success" << std::endl; + }); } else { - EXPECT_THROW({ - std::cout << "inplace clone [" << index_type << "] failed" << std::endl; - auto clone_index = index_->Clone(); - }, KnowhereException); + EXPECT_THROW( + { + std::cout << "inplace clone [" << index_type << "] failed" << std::endl; + auto clone_index = index_->Clone(); + }, + kn::KnowhereException); } } @@ -330,16 +329,18 @@ TEST_P(IVFTest, clone_test) { auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type); if (finder != support_idx_vec.cend()) { EXPECT_NO_THROW({ - auto clone_index = CopyGpuToCpu(index_, Config()); - auto clone_result = clone_index->Search(query_dataset, conf); - AssertEqual(result, clone_result); - std::cout << "clone G <=> C [" << index_type << "] success" << std::endl; - }); + auto clone_index = kn::cloner::CopyGpuToCpu(index_, kn::Config()); + auto clone_result = clone_index->Search(query_dataset, conf); + AssertEqual(result, clone_result); + std::cout << "clone G <=> C [" << index_type << "] success" << std::endl; + }); } else { - EXPECT_THROW({ - std::cout << "clone G <=> C [" << index_type << "] failed" << std::endl; - auto clone_index = CopyGpuToCpu(index_, Config()); - }, KnowhereException); + EXPECT_THROW( + { + std::cout << "clone G <=> C [" << index_type << "] failed" << std::endl; + auto clone_index = kn::cloner::CopyGpuToCpu(index_, kn::Config()); + }, + kn::KnowhereException); } } @@ -349,22 +350,24 @@ TEST_P(IVFTest, clone_test) { auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type); if (finder != support_idx_vec.cend()) { EXPECT_NO_THROW({ - auto clone_index = CopyCpuToGpu(index_, device_id, Config()); - auto clone_result = clone_index->Search(query_dataset, conf); - AssertEqual(result, clone_result); - std::cout << "clone C <=> G [" << index_type << "] success" << std::endl; - }); + auto clone_index = kn::cloner::CopyCpuToGpu(index_, device_id, kn::Config()); + auto clone_result = clone_index->Search(query_dataset, conf); + AssertEqual(result, clone_result); + std::cout << "clone C <=> G [" << index_type << "] success" << std::endl; + }); } else { - EXPECT_THROW({ - std::cout << "clone C <=> G [" << index_type << "] failed" << std::endl; - auto clone_index = CopyCpuToGpu(index_, device_id, Config()); - }, KnowhereException); + EXPECT_THROW( + { + std::cout << "clone C <=> G [" << index_type << "] failed" << std::endl; + auto clone_index = kn::cloner::CopyCpuToGpu(index_, device_id, kn::Config()); + }, + kn::KnowhereException); } } } TEST_P(IVFTest, seal_test) { - //FaissGpuResourceMgr::GetInstance().InitDevice(device_id); + // FaissGpuResourceMgr::GetInstance().InitDevice(device_id); std::vector support_idx_vec{"GPUIVF", "GPUIVFSQ"}; auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type); @@ -385,44 +388,44 @@ TEST_P(IVFTest, seal_test) { auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, conf->k); - auto cpu_idx = CopyGpuToCpu(index_, Config()); + auto cpu_idx = kn::cloner::CopyGpuToCpu(index_, kn::Config()); - TimeRecorder tc("CopyToGpu"); - CopyCpuToGpu(cpu_idx, device_id, Config()); + kn::TimeRecorder tc("CopyToGpu"); + kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); auto without_seal = tc.RecordSection("Without seal"); cpu_idx->Seal(); tc.RecordSection("seal cost"); - CopyCpuToGpu(cpu_idx, device_id, Config()); + kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); auto with_seal = tc.RecordSection("With seal"); ASSERT_GE(without_seal, with_seal); } - -class GPURESTEST - : public DataGen, public ::testing::Test { +class GPURESTEST : public DataGen, public ::testing::Test { protected: - void SetUp() override { + void + SetUp() override { Generate(128, 1000000, 1000); - FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*300, 2); + kn::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 300, 2); k = 100; elems = nq * k; - ids = (int64_t *) malloc(sizeof(int64_t) * elems); - dis = (float *) malloc(sizeof(float) * elems); + ids = (int64_t*)malloc(sizeof(int64_t) * elems); + dis = (float*)malloc(sizeof(float) * elems); } - void TearDown() override { + void + TearDown() override { delete ids; delete dis; - FaissGpuResourceMgr::GetInstance().Free(); + kn::FaissGpuResourceMgr::GetInstance().Free(); } protected: std::string index_type; - IVFIndexPtr index_ = nullptr; + kn::IVFIndexPtr index_ = nullptr; - int64_t *ids = nullptr; - float *dis = nullptr; + int64_t* ids = nullptr; + float* dis = nullptr; int64_t elems = 0; }; @@ -433,16 +436,16 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { assert(!xb.empty()); { - index_ = std::make_shared(-1); - ASSERT_EQ(std::dynamic_pointer_cast(index_)->GetGpuDevice(), -1); - std::dynamic_pointer_cast(index_)->SetGpuDevice(device_id); - ASSERT_EQ(std::dynamic_pointer_cast(index_)->GetGpuDevice(), device_id); + index_ = std::make_shared(-1); + ASSERT_EQ(std::dynamic_pointer_cast(index_)->GetGpuDevice(), -1); + std::dynamic_pointer_cast(index_)->SetGpuDevice(device_id); + ASSERT_EQ(std::dynamic_pointer_cast(index_)->GetGpuDevice(), device_id); - auto conf = std::make_shared(); + auto conf = std::make_shared(); conf->nlist = 1638; conf->d = dim; conf->gpu_id = device_id; - conf->metric_type = METRICTYPE::L2; + conf->metric_type = kn::METRICTYPE::L2; conf->k = k; conf->nprobe = 1; @@ -454,7 +457,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { EXPECT_EQ(index_->Count(), nb); EXPECT_EQ(index_->Dimension(), dim); - TimeRecorder tc("knowere GPUIVF"); + kn::TimeRecorder tc("knowere GPUIVF"); for (int i = 0; i < search_count; ++i) { index_->Search(query_dataset, conf); if (i > search_count - 6 || i < 5) @@ -462,7 +465,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { } tc.ElapseFromBegin("search all"); } - FaissGpuResourceMgr::GetInstance().Dump(); + kn::FaissGpuResourceMgr::GetInstance().Dump(); { // IVF-Search @@ -473,7 +476,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { device_index.train(nb, xb.data()); device_index.add(nb, xb.data()); - TimeRecorder tc("ori IVF"); + kn::TimeRecorder tc("ori IVF"); for (int i = 0; i < search_count; ++i) { device_index.search(nq, xq.data(), k, dis, ids); if (i > search_count - 6 || i < 5) @@ -481,7 +484,6 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) { } tc.ElapseFromBegin("search all"); } - } TEST_F(GPURESTEST, gpuivfsq) { @@ -490,11 +492,11 @@ TEST_F(GPURESTEST, gpuivfsq) { index_type = "GPUIVFSQ"; index_ = IndexFactory(index_type); - auto conf = std::make_shared(); + auto conf = std::make_shared(); conf->nlist = 1638; conf->d = dim; conf->gpu_id = device_id; - conf->metric_type = METRICTYPE::L2; + conf->metric_type = kn::METRICTYPE::L2; conf->k = k; conf->nbits = 8; conf->nprobe = 1; @@ -507,11 +509,11 @@ TEST_F(GPURESTEST, gpuivfsq) { auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, k); - auto cpu_idx = CopyGpuToCpu(index_, Config()); + auto cpu_idx = kn::cloner::CopyGpuToCpu(index_, kn::Config()); cpu_idx->Seal(); - TimeRecorder tc("knowhere GPUSQ8"); - auto search_idx = CopyCpuToGpu(cpu_idx, device_id, Config()); + kn::TimeRecorder tc("knowhere GPUSQ8"); + auto search_idx = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); tc.RecordSection("Copy to gpu"); for (int i = 0; i < search_count; ++i) { search_idx->Search(query_dataset, conf); @@ -523,8 +525,8 @@ TEST_F(GPURESTEST, gpuivfsq) { { // Ori gpuivfsq Test - const char *index_description = "IVF1638,SQ8"; - faiss::Index *ori_index = faiss::index_factory(dim, index_description, faiss::METRIC_L2); + const char* index_description = "IVF1638,SQ8"; + faiss::Index* ori_index = faiss::index_factory(dim, index_description, faiss::METRIC_L2); faiss::gpu::StandardGpuResources res; auto device_index = faiss::gpu::index_cpu_to_gpu(&res, device_id, ori_index); @@ -532,7 +534,7 @@ TEST_F(GPURESTEST, gpuivfsq) { device_index->add(nb, xb.data()); auto cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); - auto idx = dynamic_cast(cpu_index); + auto idx = dynamic_cast(cpu_index); if (idx != nullptr) { idx->to_readonly(); } @@ -542,8 +544,8 @@ TEST_F(GPURESTEST, gpuivfsq) { faiss::gpu::GpuClonerOptions option; option.allInGpu = true; - TimeRecorder tc("ori GPUSQ8"); - faiss::Index *search_idx = faiss::gpu::index_cpu_to_gpu(&res, device_id, cpu_index, &option); + kn::TimeRecorder tc("ori GPUSQ8"); + faiss::Index* search_idx = faiss::gpu::index_cpu_to_gpu(&res, device_id, cpu_index, &option); tc.RecordSection("Copy to gpu"); for (int i = 0; i < search_count; ++i) { search_idx->search(nq, xq.data(), k, dis, ids); @@ -554,7 +556,6 @@ TEST_F(GPURESTEST, gpuivfsq) { delete cpu_index; delete search_idx; } - } TEST_F(GPURESTEST, copyandsearch) { @@ -564,11 +565,11 @@ TEST_F(GPURESTEST, copyandsearch) { index_type = "GPUIVFSQ"; index_ = IndexFactory(index_type); - auto conf = std::make_shared(); + auto conf = std::make_shared(); conf->nlist = 1638; conf->d = dim; conf->gpu_id = device_id; - conf->metric_type = METRICTYPE::L2; + conf->metric_type = kn::METRICTYPE::L2; conf->k = k; conf->nbits = 8; conf->nprobe = 1; @@ -581,32 +582,32 @@ TEST_F(GPURESTEST, copyandsearch) { auto result = index_->Search(query_dataset, conf); AssertAnns(result, nq, k); - auto cpu_idx = CopyGpuToCpu(index_, Config()); + auto cpu_idx = kn::cloner::CopyGpuToCpu(index_, kn::Config()); cpu_idx->Seal(); - auto search_idx = CopyCpuToGpu(cpu_idx, device_id, Config()); + auto search_idx = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); auto search_func = [&] { - //TimeRecorder tc("search&load"); + // TimeRecorder tc("search&load"); for (int i = 0; i < search_count; ++i) { search_idx->Search(query_dataset, conf); - //if (i > search_count - 6 || i == 0) + // if (i > search_count - 6 || i == 0) // tc.RecordSection("search once"); } - //tc.ElapseFromBegin("search finish"); + // tc.ElapseFromBegin("search finish"); }; auto load_func = [&] { - //TimeRecorder tc("search&load"); + // TimeRecorder tc("search&load"); for (int i = 0; i < load_count; ++i) { - CopyCpuToGpu(cpu_idx, device_id, Config()); - //if (i > load_count -5 || i < 5) - //tc.RecordSection("Copy to gpu"); + kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); + // if (i > load_count -5 || i < 5) + // tc.RecordSection("Copy to gpu"); } - //tc.ElapseFromBegin("load finish"); + // tc.ElapseFromBegin("load finish"); }; - TimeRecorder tc("basic"); - CopyCpuToGpu(cpu_idx, device_id, Config()); + kn::TimeRecorder tc("basic"); + kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); tc.RecordSection("Copy to gpu once"); search_idx->Search(query_dataset, conf); tc.RecordSection("search once"); @@ -626,11 +627,11 @@ TEST_F(GPURESTEST, TrainAndSearch) { index_type = "GPUIVFSQ"; index_ = IndexFactory(index_type); - auto conf = std::make_shared(); + auto conf = std::make_shared(); conf->nlist = 1638; conf->d = dim; conf->gpu_id = device_id; - conf->metric_type = METRICTYPE::L2; + conf->metric_type = kn::METRICTYPE::L2; conf->k = k; conf->nbits = 8; conf->nprobe = 1; @@ -641,9 +642,9 @@ TEST_F(GPURESTEST, TrainAndSearch) { auto new_index = IndexFactory(index_type); new_index->set_index_model(model); new_index->Add(base_dataset, conf); - auto cpu_idx = CopyGpuToCpu(new_index, Config()); + auto cpu_idx = kn::cloner::CopyGpuToCpu(new_index, kn::Config()); cpu_idx->Seal(); - auto search_idx = CopyCpuToGpu(cpu_idx, device_id, Config()); + auto search_idx = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); constexpr int train_count = 1; constexpr int search_count = 5000; @@ -655,18 +656,18 @@ TEST_F(GPURESTEST, TrainAndSearch) { test_idx->Add(base_dataset, conf); } }; - auto search_stage = [&](VectorIndexPtr& search_idx) { + auto search_stage = [&](kn::VectorIndexPtr& search_idx) { for (int i = 0; i < search_count; ++i) { auto result = search_idx->Search(query_dataset, conf); AssertAnns(result, nq, k); } }; - //TimeRecorder tc("record"); - //train_stage(); - //tc.RecordSection("train cost"); - //search_stage(search_idx); - //tc.RecordSection("search cost"); + // TimeRecorder tc("record"); + // train_stage(); + // tc.RecordSection("train cost"); + // search_stage(search_idx); + // tc.RecordSection("search cost"); { // search and build parallel @@ -684,7 +685,7 @@ TEST_F(GPURESTEST, TrainAndSearch) { } { // search parallel - auto search_idx_2 = CopyCpuToGpu(cpu_idx, device_id, Config()); + auto search_idx_2 = kn::cloner::CopyCpuToGpu(cpu_idx, device_id, kn::Config()); std::thread search_1(search_stage, std::ref(search_idx)); std::thread search_2(search_stage, std::ref(search_idx_2)); search_1.join(); @@ -692,6 +693,4 @@ TEST_F(GPURESTEST, TrainAndSearch) { } } - - // TODO(lxj): Add exception test diff --git a/cpp/src/core/test/test_json.cpp b/cpp/src/core/test/test_json.cpp index b3c4fd4993..acdddad30a 100644 --- a/cpp/src/core/test/test_json.cpp +++ b/cpp/src/core/test/test_json.cpp @@ -15,13 +15,17 @@ // specific language governing permissions and limitations // under the License. - #include "knowhere/common/config.h" -using namespace zilliz::knowhere; +namespace { -int main(){ - Config cfg; +namespace kn = zilliz::knowhere; + +} // namespace + +int +main() { + kn::Config cfg; cfg["size"] = size_t(199); auto size = cfg.get_with_default("size", 123); diff --git a/cpp/src/core/test/test_kdt.cpp b/cpp/src/core/test/test_kdt.cpp index 3780f655cc..45809bf396 100644 --- a/cpp/src/core/test/test_kdt.cpp +++ b/cpp/src/core/test/test_kdt.cpp @@ -15,35 +15,36 @@ // specific language governing permissions and limitations // under the License. - #include #include #include +#include "knowhere/adapter/SptagAdapter.h" +#include "knowhere/adapter/Structure.h" #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/IndexKDT.h" #include "knowhere/index/vector_index/helpers/Definitions.h" -#include "knowhere/adapter/SptagAdapter.h" -#include "knowhere/adapter/Structure.h" -#include "utils.h" +#include "test/utils.h" +namespace { -using namespace zilliz::knowhere; +namespace kn = zilliz::knowhere; + +} // namespace using ::testing::TestWithParam; using ::testing::Values; using ::testing::Combine; - -class KDTTest - : public DataGen, public ::testing::Test { +class KDTTest : public DataGen, public ::testing::Test { protected: - void SetUp() override { - index_ = std::make_shared(); + void + SetUp() override { + index_ = std::make_shared(); - auto tempconf = std::make_shared(); + auto tempconf = std::make_shared(); tempconf->tptnubmber = 1; tempconf->k = 10; conf = tempconf; @@ -52,22 +53,20 @@ class KDTTest } protected: - Config conf; - std::shared_ptr index_ = nullptr; + kn::Config conf; + std::shared_ptr index_ = nullptr; }; -void AssertAnns(const DatasetPtr &result, - const int &nq, - const int &k) { +void +AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) { auto ids = result->array()[0]; for (auto i = 0; i < nq; i++) { EXPECT_EQ(i, *(ids->data()->GetValues(1, i * k))); } } -void PrintResult(const DatasetPtr &result, - const int &nq, - const int &k) { +void +PrintResult(const kn::DatasetPtr& result, const int& nq, const int& k) { auto ids = result->array()[0]; auto dists = result->array()[1]; @@ -124,25 +123,25 @@ TEST_F(KDTTest, kdt_serialize) { index_->set_preprocessor(preprocessor); auto model = index_->Train(base_dataset, conf); - //index_->Add(base_dataset, conf); + // index_->Add(base_dataset, conf); auto binaryset = index_->Serialize(); - auto new_index = std::make_shared(); + auto new_index = std::make_shared(); new_index->Load(binaryset); auto result = new_index->Search(query_dataset, conf); AssertAnns(result, nq, k); PrintResult(result, nq, k); ASSERT_EQ(new_index->Count(), nb); ASSERT_EQ(new_index->Dimension(), dim); - ASSERT_THROW({new_index->Clone();}, zilliz::knowhere::KnowhereException); - ASSERT_NO_THROW({new_index->Seal();}); + ASSERT_THROW({ new_index->Clone(); }, zilliz::knowhere::KnowhereException); + ASSERT_NO_THROW({ new_index->Seal(); }); { int fileno = 0; - const std::string &base_name = "/tmp/kdt_serialize_test_bin_"; + const std::string& base_name = "/tmp/kdt_serialize_test_bin_"; std::vector filename_list; - std::vector> meta_list; - for (auto &iter: binaryset.binary_map_) { - const std::string &filename = base_name + std::to_string(fileno); + std::vector> meta_list; + for (auto& iter : binaryset.binary_map_) { + const std::string& filename = base_name + std::to_string(fileno); FileIOWriter writer(filename); writer(iter.second->data.get(), iter.second->size); @@ -151,7 +150,7 @@ TEST_F(KDTTest, kdt_serialize) { ++fileno; } - BinarySet load_data_list; + kn::BinarySet load_data_list; for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { auto bin_size = meta_list[i].second; FileIOReader reader(filename_list[i]); @@ -163,7 +162,7 @@ TEST_F(KDTTest, kdt_serialize) { load_data_list.Append(meta_list[i].first, data, bin_size); } - auto new_index = std::make_shared(); + auto new_index = std::make_shared(); new_index->Load(load_data_list); auto result = new_index->Search(query_dataset, conf); AssertAnns(result, nq, k); diff --git a/cpp/src/core/test/test_nsg.cpp b/cpp/src/core/test/test_nsg.cpp index f156925027..a4b6006e71 100644 --- a/cpp/src/core/test/test_nsg.cpp +++ b/cpp/src/core/test/test_nsg.cpp @@ -15,46 +15,46 @@ // specific language governing permissions and limitations // under the License. - #include #include -#include -#include "index.h" +#include "knowhere/index/index.h" +#include "test/utils.h" //#include -using namespace zilliz::knowhere; - -void load_data(std::string &filename, float *&data, unsigned &num, - unsigned &dim) { // load data with sift10K pattern +void +load_data(std::string& filename, float*& data, unsigned& num, + unsigned& dim) { // load data with sift10K pattern std::ifstream in(filename, std::ios::binary); if (!in.is_open()) { std::cout << "open file error" << std::endl; exit(-1); } - in.read((char *) &dim, 4); + in.read((char*)&dim, 4); in.seekg(0, std::ios::end); std::ios::pos_type ss = in.tellg(); - size_t fsize = (size_t) ss; - num = (unsigned) (fsize / (dim + 1) / 4); - data = new float[(size_t) num * (size_t) dim]; + size_t fsize = (size_t)ss; + num = (unsigned)(fsize / (dim + 1) / 4); + data = new float[(size_t)num * (size_t)dim]; in.seekg(0, std::ios::beg); for (size_t i = 0; i < num; i++) { in.seekg(4, std::ios::cur); - in.read((char *) (data + i * dim), dim * 4); + in.read((char*)(data + i * dim), dim * 4); } in.close(); } -void test_distance() { +void +test_distance() { std::vector xb{1, 2, 3, 4}; std::vector xq{2, 2, 3, 4}; float r = calculate(xb.data(), xq.data(), 4); std::cout << r << std::endl; } -int main() { +int +main() { test_distance(); BuildParams params; @@ -62,16 +62,16 @@ int main() { params.candidate_pool_size = 100; params.out_degree = 50; - float *data = nullptr; - long *ids = nullptr; + float* data = nullptr; + int64_t* ids = nullptr; unsigned ntotal, dim; std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/siftsmall/siftsmall_base.fvecs"; - //std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift/sift_base.fvecs"; + // std::string filename = "/home/zilliz/opt/workspace/wook/efanna_graph/tests/sift/sift_base.fvecs"; load_data(filename, data, ntotal, dim); assert(data); - //float x = calculate(data + dim * 0, data + dim * 62, dim); - //std::cout << x << std::endl; + // float x = calculate(data + dim * 0, data + dim * 62, dim); + // std::cout << x << std::endl; NsgIndex index(dim, ntotal); @@ -81,24 +81,23 @@ int main() { std::chrono::duration diff = e - s; std::cout << "indexing time: " << diff.count() << "\n"; - int k = 10; int nq = 1000; SearchParams s_params; s_params.search_length = 50; - auto dist = new float[nq*k]; - auto ids_b = new long[nq*k]; + auto dist = new float[nq * k]; + auto ids_b = new int64_t[nq * k]; s = std::chrono::high_resolution_clock::now(); - //ProfilerStart("xx.prof"); + // ProfilerStart("xx.prof"); index.Search(data, nq, dim, k, dist, ids_b, s_params); - //ProfilerStop(); + // ProfilerStop(); e = std::chrono::high_resolution_clock::now(); diff = e - s; std::cout << "search time: " << diff.count() << "\n"; for (int i = 0; i < k; ++i) { std::cout << "id " << ids_b[i] << std::endl; - //std::cout << "dist " << dist[i] << std::endl; + // std::cout << "dist " << dist[i] << std::endl; } delete[] dist; @@ -106,5 +105,3 @@ int main() { return 0; } - - diff --git a/cpp/src/core/test/test_nsg/test_nsg.cpp b/cpp/src/core/test/test_nsg/test_nsg.cpp index 8f0187e0a3..6f7ca1eba9 100644 --- a/cpp/src/core/test/test_nsg/test_nsg.cpp +++ b/cpp/src/core/test/test_nsg/test_nsg.cpp @@ -15,35 +15,39 @@ // specific language governing permissions and limitations // under the License. - #include #include #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/FaissBaseIndex.h" #include "knowhere/index/vector_index/IndexNSG.h" -#include "knowhere/index/vector_index/nsg/NSGIO.h" #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" +#include "knowhere/index/vector_index/nsg/NSGIO.h" -#include "../utils.h" +#include "test/utils.h" +namespace { + +namespace kn = zilliz::knowhere; + +} // namespace -using namespace zilliz::knowhere; using ::testing::TestWithParam; using ::testing::Values; using ::testing::Combine; constexpr int64_t DEVICE_ID = 1; - class NSGInterfaceTest : public DataGen, public ::testing::Test { +class NSGInterfaceTest : public DataGen, public ::testing::Test { protected: - void SetUp() override { - //Init_with_default(); - FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, 1024*1024*200, 1024*1024*600, 2); + void + SetUp() override { + // Init_with_default(); + kn::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, 1024 * 1024 * 200, 1024 * 1024 * 600, 2); Generate(256, 1000000, 1); - index_ = std::make_shared(); + index_ = std::make_shared(); - auto tmp_conf = std::make_shared(); + auto tmp_conf = std::make_shared(); tmp_conf->gpu_id = DEVICE_ID; tmp_conf->knng = 100; tmp_conf->nprobe = 32; @@ -51,28 +55,28 @@ constexpr int64_t DEVICE_ID = 1; tmp_conf->search_length = 60; tmp_conf->out_degree = 70; tmp_conf->candidate_pool_size = 500; - tmp_conf->metric_type = METRICTYPE::L2; + tmp_conf->metric_type = kn::METRICTYPE::L2; train_conf = tmp_conf; - auto tmp2_conf = std::make_shared(); + auto tmp2_conf = std::make_shared(); tmp2_conf->k = k; tmp2_conf->search_length = 30; search_conf = tmp2_conf; } - void TearDown() override { - FaissGpuResourceMgr::GetInstance().Free(); + void + TearDown() override { + kn::FaissGpuResourceMgr::GetInstance().Free(); } protected: - std::shared_ptr index_; - Config train_conf; - Config search_conf; + std::shared_ptr index_; + kn::Config train_conf; + kn::Config search_conf; }; -void AssertAnns(const DatasetPtr &result, - const int &nq, - const int &k) { +void +AssertAnns(const kn::DatasetPtr& result, const int& nq, const int& k) { auto ids = result->array()[0]; for (auto i = 0; i < nq; i++) { EXPECT_EQ(i, *(ids->data()->GetValues(1, i * k))); @@ -87,33 +91,32 @@ TEST_F(NSGInterfaceTest, basic_test) { AssertAnns(result, nq, k); auto binaryset = index_->Serialize(); - auto new_index = std::make_shared(); + auto new_index = std::make_shared(); new_index->Load(binaryset); auto new_result = new_index->Search(query_dataset, search_conf); AssertAnns(result, nq, k); ASSERT_EQ(index_->Count(), nb); ASSERT_EQ(index_->Dimension(), dim); - ASSERT_THROW({index_->Clone();}, zilliz::knowhere::KnowhereException); + ASSERT_THROW({ index_->Clone(); }, zilliz::knowhere::KnowhereException); ASSERT_NO_THROW({ - index_->Add(base_dataset, Config()); + index_->Add(base_dataset, kn::Config()); index_->Seal(); }); { - //std::cout << "k = 1" << std::endl; - //new_index->Search(GenQuery(1), Config::object{{"k", 1}}); - //new_index->Search(GenQuery(10), Config::object{{"k", 1}}); - //new_index->Search(GenQuery(100), Config::object{{"k", 1}}); - //new_index->Search(GenQuery(1000), Config::object{{"k", 1}}); - //new_index->Search(GenQuery(10000), Config::object{{"k", 1}}); + // std::cout << "k = 1" << std::endl; + // new_index->Search(GenQuery(1), Config::object{{"k", 1}}); + // new_index->Search(GenQuery(10), Config::object{{"k", 1}}); + // new_index->Search(GenQuery(100), Config::object{{"k", 1}}); + // new_index->Search(GenQuery(1000), Config::object{{"k", 1}}); + // new_index->Search(GenQuery(10000), Config::object{{"k", 1}}); - //std::cout << "k = 5" << std::endl; - //new_index->Search(GenQuery(1), Config::object{{"k", 5}}); - //new_index->Search(GenQuery(20), Config::object{{"k", 5}}); - //new_index->Search(GenQuery(100), Config::object{{"k", 5}}); - //new_index->Search(GenQuery(300), Config::object{{"k", 5}}); - //new_index->Search(GenQuery(500), Config::object{{"k", 5}}); + // std::cout << "k = 5" << std::endl; + // new_index->Search(GenQuery(1), Config::object{{"k", 5}}); + // new_index->Search(GenQuery(20), Config::object{{"k", 5}}); + // new_index->Search(GenQuery(100), Config::object{{"k", 5}}); + // new_index->Search(GenQuery(300), Config::object{{"k", 5}}); + // new_index->Search(GenQuery(500), Config::object{{"k", 5}}); } } - diff --git a/cpp/src/core/test/utils.cpp b/cpp/src/core/test/utils.cpp index df9410bd6c..a921b69802 100644 --- a/cpp/src/core/test/utils.cpp +++ b/cpp/src/core/test/utils.cpp @@ -15,24 +15,35 @@ // specific language governing permissions and limitations // under the License. +#include "test/utils.h" -#include "utils.h" +#include +#include +#include INITIALIZE_EASYLOGGINGPP -void InitLog() { +namespace { + +namespace kn = zilliz::knowhere; + +} // namespace + +void +InitLog() { el::Configurations defaultConf; defaultConf.setToDefault(); - defaultConf.set(el::Level::Debug, - el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)"); + defaultConf.set(el::Level::Debug, el::ConfigurationType::Format, "[%thread-%datetime-%level]: %msg (%fbase:%line)"); el::Loggers::reconfigureLogger("default", defaultConf); } -void DataGen::Init_with_default() { +void +DataGen::Init_with_default() { Generate(dim, nb, nq); } -void DataGen::Generate(const int &dim, const int &nb, const int &nq) { +void +DataGen::Generate(const int& dim, const int& nb, const int& nq) { this->nb = nb; this->nq = nq; this->dim = dim; @@ -43,9 +54,10 @@ void DataGen::Generate(const int &dim, const int &nb, const int &nq) { base_dataset = generate_dataset(nb, dim, xb.data(), ids.data()); query_dataset = generate_query_dataset(nq, dim, xq.data()); - } -zilliz::knowhere::DatasetPtr DataGen::GenQuery(const int &nq) { + +zilliz::knowhere::DatasetPtr +DataGen::GenQuery(const int& nq) { xq.resize(nq * dim); for (int i = 0; i < nq * dim; ++i) { xq[i] = xb[i]; @@ -53,37 +65,28 @@ zilliz::knowhere::DatasetPtr DataGen::GenQuery(const int &nq) { return generate_query_dataset(nq, dim, xq.data()); } -void GenAll(const int64_t dim, - const int64_t &nb, - std::vector &xb, - std::vector &ids, - const int64_t &nq, - std::vector &xq) { +void +GenAll(const int64_t dim, const int64_t& nb, std::vector& xb, std::vector& ids, const int64_t& nq, + std::vector& xq) { xb.resize(nb * dim); xq.resize(nq * dim); ids.resize(nb); GenAll(dim, nb, xb.data(), ids.data(), nq, xq.data()); } -void GenAll(const int64_t &dim, - const int64_t &nb, - float *xb, - int64_t *ids, - const int64_t &nq, - float *xq) { +void +GenAll(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids, const int64_t& nq, float* xq) { GenBase(dim, nb, xb, ids); for (int64_t i = 0; i < nq * dim; ++i) { xq[i] = xb[i]; } } -void GenBase(const int64_t &dim, - const int64_t &nb, - float *xb, - int64_t *ids) { +void +GenBase(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids) { for (auto i = 0; i < nb; ++i) { for (auto j = 0; j < dim; ++j) { - //p_data[i * d + j] = float(base + i); + // p_data[i * d + j] = float(base + i); xb[i * dim + j] = drand48(); } xb[dim * i] += i / 1000.; @@ -91,7 +94,7 @@ void GenBase(const int64_t &dim, } } -FileIOReader::FileIOReader(const std::string &fname) { +FileIOReader::FileIOReader(const std::string& fname) { name = fname; fs = std::fstream(name, std::ios::in | std::ios::binary); } @@ -100,12 +103,13 @@ FileIOReader::~FileIOReader() { fs.close(); } -size_t FileIOReader::operator()(void *ptr, size_t size) { - fs.read(reinterpret_cast(ptr), size); +size_t +FileIOReader::operator()(void* ptr, size_t size) { + fs.read(reinterpret_cast(ptr), size); return size; } -FileIOWriter::FileIOWriter(const std::string &fname) { +FileIOWriter::FileIOWriter(const std::string& fname) { name = fname; fs = std::fstream(name, std::ios::out | std::ios::binary); } @@ -114,39 +118,37 @@ FileIOWriter::~FileIOWriter() { fs.close(); } -size_t FileIOWriter::operator()(void *ptr, size_t size) { - fs.write(reinterpret_cast(ptr), size); +size_t +FileIOWriter::operator()(void* ptr, size_t size) { + fs.write(reinterpret_cast(ptr), size); return size; } -using namespace zilliz::knowhere; - -DatasetPtr -generate_dataset(int64_t nb, int64_t dim, float *xb, long *ids) { +kn::DatasetPtr +generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids) { std::vector shape{nb, dim}; - auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); + auto tensor = kn::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{kn::ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); - auto id_array = ConstructInt64Array((uint8_t *) ids, nb * sizeof(int64_t)); - std::vector arrays{id_array}; - std::vector array_fields{ConstructInt64Field("id")}; - auto array_schema = std::make_shared(tensor_fields); + auto id_array = kn::ConstructInt64Array((uint8_t*)ids, nb * sizeof(int64_t)); + std::vector arrays{id_array}; + std::vector array_fields{kn::ConstructInt64Field("id")}; + auto array_schema = std::make_shared(tensor_fields); - auto dataset = std::make_shared(std::move(arrays), array_schema, - std::move(tensors), tensor_schema); + auto dataset = std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema); return dataset; } -DatasetPtr -generate_query_dataset(int64_t nb, int64_t dim, float *xb) { +kn::DatasetPtr +generate_query_dataset(int64_t nb, int64_t dim, float* xb) { std::vector shape{nb, dim}; - auto tensor = ConstructFloatTensor((uint8_t *) xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); + auto tensor = kn::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape); + std::vector tensors{tensor}; + std::vector tensor_fields{kn::ConstructFloatField("data")}; + auto tensor_schema = std::make_shared(tensor_fields); - auto dataset = std::make_shared(std::move(tensors), tensor_schema); + auto dataset = std::make_shared(std::move(tensors), tensor_schema); return dataset; } diff --git a/cpp/src/core/test/utils.h b/cpp/src/core/test/utils.h index 41dbbc1f30..07a104ad4a 100644 --- a/cpp/src/core/test/utils.h +++ b/cpp/src/core/test/utils.h @@ -15,24 +15,27 @@ // specific language governing permissions and limitations // under the License. - #pragma once -#include -#include #include +#include #include +#include +#include #include "knowhere/adapter/Structure.h" #include "knowhere/common/Log.h" -class DataGen { +class DataGen { protected: - void Init_with_default(); + void + Init_with_default(); - void Generate(const int &dim, const int &nb, const int &nq); + void + Generate(const int& dim, const int& nb, const int& nq); - zilliz::knowhere::DatasetPtr GenQuery(const int&nq); + zilliz::knowhere::DatasetPtr + GenQuery(const int& nq); protected: int nb = 10000; @@ -46,49 +49,41 @@ class DataGen { zilliz::knowhere::DatasetPtr query_dataset = nullptr; }; +extern void +GenAll(const int64_t dim, const int64_t& nb, std::vector& xb, std::vector& ids, const int64_t& nq, + std::vector& xq); -extern void GenAll(const int64_t dim, - const int64_t &nb, - std::vector &xb, - std::vector &ids, - const int64_t &nq, - std::vector &xq); +extern void +GenAll(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids, const int64_t& nq, float* xq); -extern void GenAll(const int64_t &dim, - const int64_t &nb, - float *xb, - int64_t *ids, - const int64_t &nq, - float *xq); +extern void +GenBase(const int64_t& dim, const int64_t& nb, float* xb, int64_t* ids); -extern void GenBase(const int64_t &dim, - const int64_t &nb, - float *xb, - int64_t *ids); - -extern void InitLog(); +extern void +InitLog(); zilliz::knowhere::DatasetPtr -generate_dataset(int64_t nb, int64_t dim, float *xb, long *ids); +generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids); zilliz::knowhere::DatasetPtr -generate_query_dataset(int64_t nb, int64_t dim, float *xb); +generate_query_dataset(int64_t nb, int64_t dim, float* xb); struct FileIOWriter { std::fstream fs; std::string name; - FileIOWriter(const std::string &fname); + explicit FileIOWriter(const std::string& fname); ~FileIOWriter(); - size_t operator()(void *ptr, size_t size); + size_t + operator()(void* ptr, size_t size); }; struct FileIOReader { std::fstream fs; std::string name; - FileIOReader(const std::string &fname); + explicit FileIOReader(const std::string& fname); ~FileIOReader(); - size_t operator()(void *ptr, size_t size); + size_t + operator()(void* ptr, size_t size); }; -