diff --git a/CHANGELOG.md b/CHANGELOG.md index 1241f70475..161225e636 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Please mark all change in change log and use the issue from GitHub ## Improvement - \#738 - Use Openblas / lapack from apt install - \#758 - Enhance config description +- \#791 - Remove Arrow ## Task diff --git a/ci/jenkins/step/build.groovy b/ci/jenkins/step/build.groovy index 2a12929575..aff5908900 100644 --- a/ci/jenkins/step/build.groovy +++ b/ci/jenkins/step/build.groovy @@ -1,4 +1,4 @@ -timeout(time: 60, unit: 'MINUTES') { +timeout(time: 75, unit: 'MINUTES') { dir ("ci/scripts") { withCredentials([usernamePassword(credentialsId: "${params.JFROG_CREDENTIALS_ID}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) { def checkResult = sh(script: "./check_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache", returnStatus: true) diff --git a/core/src/index/cmake/DefineOptionsCore.cmake b/core/src/index/cmake/DefineOptionsCore.cmake index 38599997c0..1777fdbe8e 100644 --- a/core/src/index/cmake/DefineOptionsCore.cmake +++ b/core/src/index/cmake/DefineOptionsCore.cmake @@ -73,7 +73,7 @@ Note that this requires linking Boost statically" OFF) define_option(KNOWHERE_BOOST_HEADER_ONLY "Use only BOOST headers" OFF) -define_option(KNOWHERE_WITH_ARROW "Build with ARROW" ON) +define_option(KNOWHERE_WITH_ARROW "Build with ARROW" OFF) define_option(KNOWHERE_WITH_FAISS "Build with FAISS library" ON) diff --git a/core/src/index/knowhere/CMakeLists.txt b/core/src/index/knowhere/CMakeLists.txt index 6c2c5d7fff..1cd7c21963 100644 --- a/core/src/index/knowhere/CMakeLists.txt +++ b/core/src/index/knowhere/CMakeLists.txt @@ -22,8 +22,7 @@ endif () set(external_srcs knowhere/adapter/SptagAdapter.cpp - knowhere/adapter/Structure.cpp - knowhere/adapter/ArrowAdapter.cpp + knowhere/adapter/VectorAdapter.cpp knowhere/common/Exception.cpp knowhere/common/Timer.cpp ) @@ -49,8 +48,6 @@ set(index_srcs set(depend_libs SPTAGLibStatic faiss - arrow - ${ARROW_LIB_DIR}/libjemalloc_pic.a gomp gfortran pthread diff --git a/core/src/index/knowhere/knowhere/adapter/ArrowAdapter.cpp b/core/src/index/knowhere/knowhere/adapter/ArrowAdapter.cpp deleted file mode 100644 index 38227f43ab..0000000000 --- a/core/src/index/knowhere/knowhere/adapter/ArrowAdapter.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "knowhere/adapter/ArrowAdapter.h" - -namespace knowhere { - -ArrayPtr -CopyArray(const ArrayPtr& origin) { - ArrayPtr copy = nullptr; - auto copy_data = origin->data()->Copy(); - switch (origin->type_id()) { -#define DEFINE_TYPE(type, clazz) \ - case arrow::Type::type: { \ - copy = std::make_shared(copy_data); \ - } - DEFINE_TYPE(BOOL, BooleanArray) - DEFINE_TYPE(BINARY, BinaryArray) - DEFINE_TYPE(FIXED_SIZE_BINARY, FixedSizeBinaryArray) - DEFINE_TYPE(DECIMAL, Decimal128Array) - DEFINE_TYPE(FLOAT, NumericArray) - DEFINE_TYPE(INT64, NumericArray) - default: - break; - } - return copy; -} - -SchemaPtr -CopySchema(const SchemaPtr& origin) { - std::vector> fields; - for (auto& field : origin->fields()) { - auto copy = std::make_shared(field->name(), field->type(), field->nullable(), nullptr); - fields.emplace_back(copy); - } - return std::make_shared(std::move(fields)); -} - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp b/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp index db4a415261..7072e1a594 100644 --- a/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp +++ b/core/src/index/knowhere/knowhere/adapter/SptagAdapter.cpp @@ -16,17 +16,15 @@ // under the License. #include "knowhere/adapter/SptagAdapter.h" -#include "knowhere/adapter/Structure.h" -#include "knowhere/index/vector_index/helpers/Definitions.h" +#include "VectorAdapter.h" namespace knowhere { std::shared_ptr ConvertToMetadataSet(const DatasetPtr& dataset) { - auto array = dataset->array()[0]; - auto elems = array->length(); + auto elems = dataset->Get(meta::ROWS); + auto p_data = dataset->Get(meta::IDS); - auto p_data = array->data()->GetValues(1, 0); auto p_offset = (int64_t*)malloc(sizeof(int64_t) * elems); for (auto i = 0; i <= elems; ++i) p_offset[i] = i * 8; @@ -39,31 +37,21 @@ ConvertToMetadataSet(const DatasetPtr& dataset) { std::shared_ptr ConvertToVectorSet(const DatasetPtr& dataset) { - auto tensor = dataset->tensor()[0]; + GETTENSOR(dataset); + size_t num_bytes = rows * dim * sizeof(float); + SPTAG::ByteArray byte_array((uint8_t*)p_data, num_bytes, false); - auto p_data = tensor->raw_mutable_data(); - auto dimension = tensor->shape()[1]; - auto rows = tensor->shape()[0]; - auto num_bytes = tensor->size() * sizeof(float); - - SPTAG::ByteArray byte_array(p_data, num_bytes, false); - - auto vectorset = - std::make_shared(byte_array, SPTAG::VectorValueType::Float, dimension, rows); + auto vectorset = std::make_shared(byte_array, SPTAG::VectorValueType::Float, dim, rows); return vectorset; } std::vector ConvertToQueryResult(const DatasetPtr& dataset, const Config& config) { - auto tensor = dataset->tensor()[0]; - - auto p_data = (float*)tensor->raw_mutable_data(); - auto dimension = tensor->shape()[1]; - auto rows = tensor->shape()[0]; + GETTENSOR(dataset); std::vector query_results(rows, SPTAG::QueryResult(nullptr, config->k, true)); for (auto i = 0; i < rows; ++i) { - query_results[i].SetTarget(&p_data[i * dimension]); + query_results[i].SetTarget(&p_data[i * dim]); } return query_results; @@ -74,9 +62,10 @@ ConvertToDataset(std::vector query_results) { auto k = query_results[0].GetResultNum(); auto elems = query_results.size() * k; - auto p_id = (int64_t*)malloc(sizeof(int64_t) * elems); - auto p_dist = (float*)malloc(sizeof(float) * elems); - // TODO: throw if malloc failed. + size_t p_id_size = sizeof(int64_t) * elems; + size_t p_dist_size = sizeof(float) * elems; + auto p_id = (int64_t*)malloc(p_id_size); + auto p_dist = (float*)malloc(p_dist_size); #pragma omp parallel for for (auto i = 0; i < query_results.size(); ++i) { @@ -89,35 +78,10 @@ ConvertToDataset(std::vector query_results) { } } - // auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems); - // auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems); - // - // // TODO: magic - // std::vector id_bufs{nullptr, id_buf}; - // std::vector dist_bufs{nullptr, dist_buf}; - // - // auto int64_type = std::make_shared(); - // auto float_type = std::make_shared(); - // - // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - // // auto id_array_data = std::make_shared(int64_type, sizeof(int64_t) * elems, id_bufs); - // // auto dist_array_data = std::make_shared(float_type, sizeof(float) * elems, dist_bufs); - // - // // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems); - // // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems); - // - // auto ids = std::make_shared>(id_array_data); - // auto dists = std::make_shared>(dist_array_data); - // std::vector array{ids, dists}; - // - // auto field_id = std::make_shared("id", std::make_shared()); - // auto field_dist = std::make_shared("dist", std::make_shared()); - // std::vector fields{field_id, field_dist}; - // auto schema = std::make_shared(fields); - // - // return std::make_shared(array, schema); - return std::make_shared((void*)p_id, (void*)p_dist); + auto ret_ds = std::make_shared(); + ret_ds->Set(meta::IDS, p_id); + ret_ds->Set(meta::DISTANCE, p_dist); + return ret_ds; } } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/adapter/SptagAdapter.h b/core/src/index/knowhere/knowhere/adapter/SptagAdapter.h index 9f92497562..6847062ca5 100644 --- a/core/src/index/knowhere/knowhere/adapter/SptagAdapter.h +++ b/core/src/index/knowhere/knowhere/adapter/SptagAdapter.h @@ -21,6 +21,7 @@ #include #include +#include "knowhere/common/Config.h" #include "knowhere/common/Dataset.h" namespace knowhere { diff --git a/core/src/index/knowhere/knowhere/adapter/Structure.cpp b/core/src/index/knowhere/knowhere/adapter/Structure.cpp deleted file mode 100644 index 44b068c792..0000000000 --- a/core/src/index/knowhere/knowhere/adapter/Structure.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "knowhere/adapter/Structure.h" - -#include -#include - -namespace knowhere { - -ArrayPtr -ConstructInt64ArraySmart(uint8_t* data, int64_t size) { - // TODO: magic - std::vector id_buf{nullptr, MakeMutableBufferSmart(data, size)}; - auto type = std::make_shared(); - auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(int64_t), id_buf); - return std::make_shared>(id_array_data); -} - -ArrayPtr -ConstructFloatArraySmart(uint8_t* data, int64_t size) { - // TODO: magic - std::vector id_buf{nullptr, MakeMutableBufferSmart(data, size)}; - auto type = std::make_shared(); - auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(float), id_buf); - return std::make_shared>(id_array_data); -} - -TensorPtr -ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector shape) { - auto buffer = MakeMutableBufferSmart(data, size); - auto float_type = std::make_shared(); - return std::make_shared(float_type, buffer, shape); -} - -ArrayPtr -ConstructInt64Array(uint8_t* data, int64_t size) { - // TODO: magic - std::vector id_buf{nullptr, MakeMutableBuffer(data, size)}; - auto type = std::make_shared(); - auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(int64_t), id_buf); - return std::make_shared>(id_array_data); -} - -ArrayPtr -ConstructFloatArray(uint8_t* data, int64_t size) { - // TODO: magic - std::vector id_buf{nullptr, MakeMutableBuffer(data, size)}; - auto type = std::make_shared(); - auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(float), id_buf); - return std::make_shared>(id_array_data); -} - -TensorPtr -ConstructFloatTensor(uint8_t* data, int64_t size, std::vector shape) { - auto buffer = MakeMutableBuffer(data, size); - auto float_type = std::make_shared(); - return std::make_shared(float_type, buffer, shape); -} - -FieldPtr -ConstructInt64Field(const std::string& name) { - auto type = std::make_shared(); - return std::make_shared(name, type); -} - -FieldPtr -ConstructFloatField(const std::string& name) { - auto type = std::make_shared(); - return std::make_shared(name, type); -} - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/adapter/Structure.h b/core/src/index/knowhere/knowhere/adapter/Structure.h deleted file mode 100644 index 6bde9ddfe6..0000000000 --- a/core/src/index/knowhere/knowhere/adapter/Structure.h +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "knowhere/common/Dataset.h" - -namespace knowhere { - -extern ArrayPtr -ConstructInt64ArraySmart(uint8_t* data, int64_t size); - -extern ArrayPtr -ConstructFloatArraySmart(uint8_t* data, int64_t size); - -extern TensorPtr -ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector shape); - -extern ArrayPtr -ConstructInt64Array(uint8_t* data, int64_t size); - -extern ArrayPtr -ConstructFloatArray(uint8_t* data, int64_t size); - -extern TensorPtr -ConstructFloatTensor(uint8_t* data, int64_t size, std::vector shape); - -extern FieldPtr -ConstructInt64Field(const std::string& name); - -extern FieldPtr -ConstructFloatField(const std::string& name); - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/adapter/ArrowAdapter.h b/core/src/index/knowhere/knowhere/adapter/VectorAdapter.cpp similarity index 79% rename from core/src/index/knowhere/knowhere/adapter/ArrowAdapter.h rename to core/src/index/knowhere/knowhere/adapter/VectorAdapter.cpp index 75580cd163..0a5d3c5a67 100644 --- a/core/src/index/knowhere/knowhere/adapter/ArrowAdapter.h +++ b/core/src/index/knowhere/knowhere/adapter/VectorAdapter.cpp @@ -15,20 +15,16 @@ // specific language governing permissions and limitations // under the License. -#pragma once - -#include -#include -#include - -#include "knowhere/common/Array.h" +#include "knowhere/adapter/VectorAdapter.h" namespace knowhere { -ArrayPtr -CopyArray(const ArrayPtr& origin); - -SchemaPtr -CopySchema(const SchemaPtr& origin); +namespace meta { +const char* DIM = "dim"; +const char* TENSOR = "tensor"; +const char* ROWS = "rows"; +const char* IDS = "ids"; +const char* DISTANCE = "distance"; +}; // namespace meta } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/adapter/VectorAdapter.h b/core/src/index/knowhere/knowhere/adapter/VectorAdapter.h index 2b16227bb3..9f74fa2240 100644 --- a/core/src/index/knowhere/knowhere/adapter/VectorAdapter.h +++ b/core/src/index/knowhere/knowhere/adapter/VectorAdapter.h @@ -17,12 +17,22 @@ #pragma once +#include +#include "knowhere/common/Dataset.h" + namespace knowhere { -#define GETTENSOR(dataset) \ - auto tensor = dataset->tensor()[0]; \ - auto p_data = tensor->raw_data(); \ - auto dim = tensor->shape()[1]; \ - auto rows = tensor->shape()[0]; +namespace meta { +extern const char* DIM; +extern const char* TENSOR; +extern const char* ROWS; +extern const char* IDS; +extern const char* DISTANCE; +}; // namespace meta + +#define GETTENSOR(dataset) \ + auto dim = dataset->Get(meta::DIM); \ + auto rows = dataset->Get(meta::ROWS); \ + auto p_data = dataset->Get(meta::TENSOR); } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/common/Array.h b/core/src/index/knowhere/knowhere/common/Array.h deleted file mode 100644 index 71ad78b79b..0000000000 --- a/core/src/index/knowhere/knowhere/common/Array.h +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "Schema.h" - -namespace knowhere { - -using ArrayData = arrow::ArrayData; -using ArrayDataPtr = std::shared_ptr; - -using Array = arrow::Array; -using ArrayPtr = std::shared_ptr; - -using BooleanArray = arrow::BooleanArray; -using BooleanArrayPtr = std::shared_ptr; - -template -using NumericArray = arrow::NumericArray; -template -using NumericArrayPtr = std::shared_ptr>; - -using BinaryArray = arrow::BinaryArray; -using BinaryArrayPtr = std::shared_ptr; - -using FixedSizeBinaryArray = arrow::FixedSizeBinaryArray; -using FixedSizeBinaryArrayPtr = std::shared_ptr; - -using Decimal128Array = arrow::Decimal128Array; -using Decimal128ArrayPtr = std::shared_ptr; - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/common/Buffer.h b/core/src/index/knowhere/knowhere/common/Buffer.h deleted file mode 100644 index f9e15d95bd..0000000000 --- a/core/src/index/knowhere/knowhere/common/Buffer.h +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include - -namespace knowhere { - -using Buffer = arrow::Buffer; -using BufferPtr = std::shared_ptr; -using MutableBuffer = arrow::MutableBuffer; -using MutableBufferPtr = std::shared_ptr; - -namespace internal { - -struct BufferDeleter { - void - operator()(Buffer* buffer) { - free((void*)buffer->data()); - } -}; -} // namespace internal - -inline BufferPtr -MakeBufferSmart(uint8_t* data, const int64_t size) { - return BufferPtr(new Buffer(data, size), internal::BufferDeleter()); -} - -inline MutableBufferPtr -MakeMutableBufferSmart(uint8_t* data, const int64_t size) { - return MutableBufferPtr(new MutableBuffer(data, size), internal::BufferDeleter()); -} - -inline BufferPtr -MakeBuffer(uint8_t* data, const int64_t size) { - return std::make_shared(data, size); -} - -inline MutableBufferPtr -MakeMutableBuffer(uint8_t* data, const int64_t size) { - return std::make_shared(data, size); -} - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/common/Dataset.h b/core/src/index/knowhere/knowhere/common/Dataset.h index b101aba6a7..33e6ad56ea 100644 --- a/core/src/index/knowhere/knowhere/common/Dataset.h +++ b/core/src/index/knowhere/knowhere/common/Dataset.h @@ -17,149 +17,105 @@ #pragma once +#include +#include #include +#include +#include +#include +#include #include -#include - -#include "Array.h" -#include "Buffer.h" -#include "Config.h" -#include "Schema.h" -#include "Tensor.h" -#include "knowhere/adapter/ArrowAdapter.h" namespace knowhere { -class Dataset; +struct BaseValue; +using BasePtr = std::unique_ptr; +struct BaseValue { + virtual ~BaseValue() = default; -using DatasetPtr = std::shared_ptr; + // virtual BasePtr + // Clone() const = 0; +}; + +template +struct AnyValue : public BaseValue { + T data_; + + template + explicit AnyValue(U&& value) : data_(std::forward(value)) { + } + + // BasePtr + // Clone() const { + // return BasePtr(data_); + // } +}; + +struct Value { + std::type_index type_; + BasePtr data_; + + template ::type, Value>::value, U>::type> + explicit Value(U&& value) + : data_(new AnyValue::type>(std::forward(value))), + type_(std::type_index(typeid(typename std::decay::type))) { + } + + template + bool + Is() const { + return type_ == std::type_index(typeid(U)); + } + + template + U& + AnyCast() { + if (!Is()) { + std::stringstream ss; + ss << "Can't cast t " << type_.name() << " to " << typeid(U).name(); + throw std::logic_error(ss.str()); + } + + auto derived = dynamic_cast*>(data_.get()); + return derived->data_; + } +}; +using ValuePtr = std::shared_ptr; class Dataset { public: Dataset() = default; - Dataset(std::vector&& array, SchemaPtr array_schema, std::vector&& tensor, - SchemaPtr tensor_schema) - : array_(std::move(array)), - array_schema_(std::move(array_schema)), - tensor_(std::move(tensor)), - tensor_schema_(std::move(tensor_schema)) { + template + void + Set(const std::string& k, T&& v) { + std::lock_guard lk(mutex_); + auto value = std::make_shared(std::forward(v)); + data_[k] = value; } - Dataset(std::vector array, SchemaPtr array_schema) - : array_(std::move(array)), array_schema_(std::move(array_schema)) { - } - - Dataset(std::vector tensor, SchemaPtr tensor_schema) - : tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) { - } - - Dataset(void* ids, void* dists) : ids_(ids), dists_(dists) { - } - - Dataset(const Dataset&) = delete; - Dataset& - operator=(const Dataset&) = delete; - - DatasetPtr - Clone() { - auto dataset = std::make_shared(); - - std::vector clone_array; - for (auto& array : array_) { - clone_array.emplace_back(CopyArray(array)); + template + T + Get(const std::string& k) { + std::lock_guard lk(mutex_); + auto finder = data_.find(k); + if (finder != data_.end()) { + return finder->second->AnyCast(); + } else { + throw std::logic_error("Can't find this key"); } - dataset->set_array(clone_array); - - std::vector clone_tensor; - for (auto& tensor : tensor_) { - auto buffer = tensor->data(); - std::shared_ptr copy_buffer; - // TODO: checkout copy success; - buffer->Copy(0, buffer->size(), ©_buffer); - auto copy = std::make_shared(tensor->type(), copy_buffer, tensor->shape()); - clone_tensor.emplace_back(copy); - } - dataset->set_tensor(clone_tensor); - - if (array_schema_) - dataset->set_array_schema(CopySchema(array_schema_)); - if (tensor_schema_) - dataset->set_tensor_schema(CopySchema(tensor_schema_)); - - return dataset; } - public: - const std::vector& - array() const { - return array_; + const std::map& + data() const { + return data_; } - void - set_array(std::vector array) { - array_ = std::move(array); - } - - const std::vector& - tensor() const { - return tensor_; - } - - void - set_tensor(std::vector tensor) { - tensor_ = std::move(tensor); - } - - SchemaConstPtr - array_schema() const { - return array_schema_; - } - - void - set_array_schema(SchemaPtr array_schema) { - array_schema_ = std::move(array_schema); - } - - SchemaConstPtr - tensor_schema() const { - return tensor_schema_; - } - - void - set_tensor_schema(SchemaPtr tensor_schema) { - tensor_schema_ = std::move(tensor_schema); - } - - void* - ids() { - return ids_; - } - - void* - dist() { - return dists_; - } - - // const Config & - // meta() const { return meta_; } - - // void - // set_meta(Config meta) { - // meta_ = std::move(meta); - //} - private: - std::vector array_; - SchemaPtr array_schema_; - std::vector tensor_; - SchemaPtr tensor_schema_; - // TODO(yukun): using smart pointer - void* ids_; - void* dists_; - // Config meta_; + std::mutex mutex_; + std::map data_; }; - using DatasetPtr = std::shared_ptr; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/common/Schema.h b/core/src/index/knowhere/knowhere/common/Schema.h deleted file mode 100644 index c90bac7572..0000000000 --- a/core/src/index/knowhere/knowhere/common/Schema.h +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include - -namespace knowhere { - -using DataType = arrow::DataType; -using Field = arrow::Field; -using FieldPtr = std::shared_ptr; -using Schema = arrow::Schema; -using SchemaPtr = std::shared_ptr; -using SchemaConstPtr = std::shared_ptr; - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/common/Tensor.h b/core/src/index/knowhere/knowhere/common/Tensor.h deleted file mode 100644 index ff957319e5..0000000000 --- a/core/src/index/knowhere/knowhere/common/Tensor.h +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include - -namespace knowhere { - -using Tensor = arrow::Tensor; -using TensorPtr = std::shared_ptr; - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp index 3982921b9a..6165a53795 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp @@ -127,7 +127,7 @@ GPUIDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances, } void -GPUIDMAP::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) { +GPUIDMAP::GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config) { int64_t K = k + 1; auto ntotal = Count(); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h index 31c7039f50..00ae79656f 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h @@ -49,7 +49,7 @@ class GPUIDMAP : public IDMAP, public GPUIndex { CopyGpuToGpu(const int64_t& device_id, const Config& config) override; void - GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config); + GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config); protected: void diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp index 96cb76683a..62d3901cef 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp @@ -66,38 +66,21 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) { if (!index_) { KNOWHERE_THROW_MSG("index not initialize"); } - config->CheckValid(); - // auto metric_type = config["metric_type"].as_string() == "L2" ? - // faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; - // index_->metric_type = metric_type; - GETTENSOR(dataset) auto elems = rows * config->k; - auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); - auto res_dis = (float*)malloc(sizeof(float) * elems); + size_t p_id_size = sizeof(int64_t) * elems; + size_t p_dist_size = sizeof(float) * elems; + auto p_id = (int64_t*)malloc(p_id_size); + auto p_dist = (float*)malloc(p_dist_size); - search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config()); + search_impl(rows, (float*)p_data, config->k, p_dist, p_id, Config()); - // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); - // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); - // - // std::vector id_bufs{nullptr, id_buf}; - // std::vector dist_bufs{nullptr, dist_buf}; - // - // auto int64_type = std::make_shared(); - // auto float_type = std::make_shared(); - // - // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - // - // auto ids = std::make_shared>(id_array_data); - // auto dists = std::make_shared>(dist_array_data); - // std::vector array{ids, dists}; - // - // return std::make_shared(array, nullptr); - return std::make_shared((void*)res_ids, (void*)res_dis); + auto ret_ds = std::make_shared(); + ret_ds->Set(meta::IDS, p_id); + ret_ds->Set(meta::DISTANCE, p_dist); + return ret_ds; } void @@ -114,10 +97,7 @@ IDMAP::Add(const DatasetPtr& dataset, const Config& config) { std::lock_guard lk(mutex_); GETTENSOR(dataset) - // TODO: magic here. - auto array = dataset->array()[0]; - auto p_ids = array->data()->GetValues(1, 0); - + auto p_ids = dataset->Get(meta::IDS); index_->add_with_ids(rows, (float*)p_data, p_ids); } @@ -130,9 +110,6 @@ IDMAP::AddWithoutId(const DatasetPtr& dataset, const Config& config) { std::lock_guard lk(mutex_); GETTENSOR(dataset) - // TODO: magic here. - auto array = dataset->array()[0]; - std::vector new_ids(rows); for (int i = 0; i < rows; ++i) { new_ids[i] = i; @@ -151,8 +128,7 @@ IDMAP::Dimension() { return index_->d; } -// TODO(linxj): return const pointer -float* +const float* IDMAP::GetRawVectors() { try { auto file_index = dynamic_cast(index_.get()); @@ -163,8 +139,7 @@ IDMAP::GetRawVectors() { } } -// TODO(linxj): return const pointer -int64_t* +const int64_t* IDMAP::GetRawIds() { try { auto file_index = dynamic_cast(index_.get()); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h index 8ae6839f65..6909834989 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h @@ -64,10 +64,10 @@ class IDMAP : public VectorIndex, public FaissBaseIndex { void Seal() override; - virtual float* + virtual const float* GetRawVectors(); - virtual int64_t* + virtual const int64_t* GetRawIds(); protected: diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp index ec70be0e33..62564aaaa3 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp @@ -73,8 +73,7 @@ IVF::Add(const DatasetPtr& dataset, const Config& config) { std::lock_guard lk(mutex_); GETTENSOR(dataset) - auto array = dataset->array()[0]; - auto p_ids = array->data()->GetValues(1, 0); + auto p_ids = dataset->Get(meta::IDS); index_->add_with_ids(rows, (float*)p_data, p_ids); } @@ -121,10 +120,13 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { try { auto elems = rows * search_cfg->k; - auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); - auto res_dis = (float*)malloc(sizeof(float) * elems); - search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config); + size_t p_id_size = sizeof(int64_t) * elems; + size_t p_dist_size = sizeof(float) * elems; + auto p_id = (int64_t*)malloc(p_id_size); + auto p_dist = (float*)malloc(p_dist_size); + + search_impl(rows, (float*)p_data, search_cfg->k, p_dist, p_id, config); // std::stringstream ss_res_id, ss_res_dist; // for (int i = 0; i < 10; ++i) { @@ -139,23 +141,10 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) { // std::cout << ss_res_id.str() << std::endl; // std::cout << ss_res_dist.str() << std::endl << std::endl; - // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); - // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); - // - // std::vector id_bufs{nullptr, id_buf}; - // std::vector dist_bufs{nullptr, dist_buf}; - // - // auto int64_type = std::make_shared(); - // auto float_type = std::make_shared(); - // - // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - // - // auto ids = std::make_shared>(id_array_data); - // auto dists = std::make_shared>(dist_array_data); - // std::vector array{ids, dists}; - - return std::make_shared((void*)res_ids, (void*)res_dis); + auto ret_ds = std::make_shared(); + ret_ds->Set(meta::IDS, p_id); + ret_ds->Set(meta::DISTANCE, p_dist); + return ret_ds; } catch (faiss::FaissException& e) { KNOWHERE_THROW_MSG(e.what()); } catch (std::exception& e) { @@ -195,7 +184,7 @@ IVF::Dimension() { } void -IVF::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) { +IVF::GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config) { int64_t K = k + 1; auto ntotal = Count(); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h index 9742bea40b..96bf178dc2 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h @@ -57,7 +57,7 @@ class IVF : public VectorIndex, public FaissBaseIndex { Search(const DatasetPtr& dataset, const Config& config) override; void - GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config); + GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config); BinarySet Serialize() override; diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp index db8b05f992..6b3255c829 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp @@ -84,31 +84,19 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) { GETTENSOR(dataset) auto elems = rows * build_cfg->k; - auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems); - auto res_dis = (float*)malloc(sizeof(float) * elems); + size_t p_id_size = sizeof(int64_t) * elems; + size_t p_dist_size = sizeof(float) * elems; + auto p_id = (int64_t*)malloc(p_id_size); + auto p_dist = (float*)malloc(p_dist_size); algo::SearchParams s_params; s_params.search_length = build_cfg->search_length; - index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params); + index_->Search((float*)p_data, rows, dim, build_cfg->k, p_dist, p_id, s_params); - // auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems); - // auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems); - - // std::vector id_bufs{nullptr, id_buf}; - // std::vector dist_bufs{nullptr, dist_buf}; - // - // auto int64_type = std::make_shared(); - // auto float_type = std::make_shared(); - // - // auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs); - // auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs); - // - // auto ids = std::make_shared>(id_array_data); - // auto dists = std::make_shared>(dist_array_data); - // std::vector array{ids, dists}; - // - // return std::make_shared(array, nullptr); - return std::make_shared((void*)res_ids, (void*)res_dis); + auto ret_ds = std::make_shared(); + ret_ds->Set(meta::IDS, p_id); + ret_ds->Set(meta::DISTANCE, p_dist); + return ret_ds; } IndexModelPtr @@ -123,7 +111,7 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) { idmap->Train(config); idmap->AddWithoutId(dataset, config); Graph knng; - float* raw_data = idmap->GetRawVectors(); + const float* raw_data = idmap->GetRawVectors(); #ifdef MILVUS_GPU_VERSION if (build_cfg->gpu_id == knowhere::INVALID_VALUE) { auto preprocess_index = std::make_shared(); @@ -150,8 +138,7 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) { b_params.out_degree = build_cfg->out_degree; b_params.search_length = build_cfg->search_length; - auto array = dataset->array()[0]; - auto p_ids = array->data()->GetValues(1, 0); + auto p_ids = dataset->Get(meta::IDS); GETTENSOR(dataset) index_ = std::make_shared(dim, rows); diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp index 9d1d693c14..d5014783c6 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp @@ -26,6 +26,7 @@ #undef mkdir #include "knowhere/adapter/SptagAdapter.h" +#include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/IndexSPTAG.h" #include "knowhere/index/vector_index/helpers/Definitions.h" @@ -88,42 +89,6 @@ CPUSPTAGRNG::Serialize() { binary_set.Append("config", config, length); binary_set.Append("graph", graph, index_blobs[2].Length()); - // MemoryIOWriter writer; - // size_t len = 0; - // for (int i = 0; i < 6; ++i) { - // len = index_blobs[i].Length(); - // assert(len != 0); - // writer(&len, sizeof(size_t), 1); - // writer(index_blobs[i].Data(), len, 1); - // len = 0; - // } - // writer(&length, sizeof(size_t), 1); - // writer(cstr, length, 1); - // auto data = std::make_shared(); - // data.reset(writer.data_); - // BinarySet binary_set; - // binary_set.Append("sptag", data, writer.total); - - // MemoryIOWriter writer; - // size_t len = 0; - // for (int i = 0; i < 6; ++i) { - // if (i == 2) continue; - // len = index_blobs[i].Length(); - // assert(len != 0); - // writer(&len, sizeof(size_t), 1); - // writer(index_blobs[i].Data(), len, 1); - // len = 0; - // } - // writer(&length, sizeof(size_t), 1); - // writer(cstr, length, 1); - // auto data = std::make_shared(); - // data.reset(writer.data_); - // BinarySet binary_set; - // binary_set.Append("sptag", data, writer.total); - // auto graph = std::make_shared(); - // graph.reset(static_cast(index_blobs[2].Data())); - // binary_set.Append("graph", graph, index_blobs[2].Length()); - return binary_set; } @@ -153,52 +118,6 @@ CPUSPTAGRNG::Load(const BinarySet& binary_set) { auto config = binary_set.GetByName("config"); index_config = reinterpret_cast(config->data.get()); - // std::vector index_blobs; - // auto data = binary_set.GetByName("sptag"); - // MemoryIOReader reader; - // reader.total = data->size; - // reader.data_ = data->data.get(); - // size_t len = 0; - // for (int i = 0; i < 6; ++i) { - // reader(&len, sizeof(size_t), 1); - // assert(len != 0); - // auto binary = new uint8_t[len]; - // reader(binary, len, 1); - // index_blobs.emplace_back(SPTAG::ByteArray(binary, len, true)); - // len = 0; - // } - // reader(&len, sizeof(size_t), 1); - // assert(len != 0); - // auto config = new char[len]; - // reader(config, len, 1); - // std::string index_config = config; - // delete[] config; - - // std::vector index_blobs; - // auto data = binary_set.GetByName("sptag"); - // MemoryIOReader reader; - // reader.total = data->size; - // reader.data_ = data->data.get(); - // size_t len = 0; - // for (int i = 0; i < 6; ++i) { - // if (i == 2) { - // auto graph = binary_set.GetByName("graph"); - // index_blobs.emplace_back(SPTAG::ByteArray(graph->data.get(), graph->size, false)); - // continue; - // } - // reader(&len, sizeof(size_t), 1); - // assert(len != 0); - // auto binary = new uint8_t[len]; - // reader(binary, len, 1); - // index_blobs.emplace_back(SPTAG::ByteArray(binary, len, true)); - // len = 0; - // } - // reader(&len, sizeof(size_t), 1); - // assert(len != 0); - // auto config = new char[len]; - // reader(config, len, 1); - // std::string index_config = config; - // delete[] config; index_ptr_->LoadIndex(index_config, index_blobs); } @@ -213,7 +132,8 @@ CPUSPTAGRNG::Train(const DatasetPtr& origin, const Config& train_config) { if (train_config != nullptr) { train_config->CheckValid(); // throw exception } - DatasetPtr dataset = origin->Clone(); + + DatasetPtr dataset = origin; // TODO(linxj): copy or reference? // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine // && preprocessor_) { @@ -301,11 +221,11 @@ CPUSPTAGRNG::Search(const DatasetPtr& dataset, const Config& config) { if (config != nullptr) { config->CheckValid(); // throw exception } - auto tensor = dataset->tensor()[0]; - auto p = (float*)tensor->raw_mutable_data(); + + auto p_data = dataset->Get(meta::TENSOR); for (auto i = 0; i < 10; ++i) { for (auto j = 0; j < 10; ++j) { - std::cout << p[i * 10 + j] << " "; + std::cout << p_data[i * 10 + j] << " "; } std::cout << std::endl; } diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index 831ae81a05..c4f263e817 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -6,7 +6,6 @@ include_directories(${INDEX_SOURCE_DIR}) set(depend_libs gtest gmock gtest_main gmock_main faiss - arrow "${ARROW_LIB_DIR}/libjemalloc_pic.a" ) if (FAISS_WITH_MKL) set(depend_libs ${depend_libs} @@ -31,8 +30,7 @@ set(util_srcs ${MILVUS_THIRDPARTY_SRC}/easyloggingpp/easylogging++.cc ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp - ${INDEX_SOURCE_DIR}/knowhere/knowhere/adapter/Structure.cpp - ${INDEX_SOURCE_DIR}/knowhere/knowhere/adapter/ArrowAdapter.cpp + ${INDEX_SOURCE_DIR}/knowhere/knowhere/adapter/VectorAdapter.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/common/Exception.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/common/Timer.cpp ${INDEX_SOURCE_DIR}/unittest/utils.cpp diff --git a/core/src/index/unittest/faiss_benchmark/CMakeLists.txt b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt index 41fc132723..843945c759 100644 --- a/core/src/index/unittest/faiss_benchmark/CMakeLists.txt +++ b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt @@ -13,7 +13,6 @@ if (KNOWHERE_GPU_VERSION) set(depend_libs faiss hdf5 - arrow ${ARROW_LIB_DIR}/libjemalloc_pic.a ) if (FAISS_WITH_MKL) set(depend_libs ${depend_libs} diff --git a/core/src/index/unittest/faiss_ori/CMakeLists.txt b/core/src/index/unittest/faiss_ori/CMakeLists.txt index 23d9c2f613..a93824bf19 100644 --- a/core/src/index/unittest/faiss_ori/CMakeLists.txt +++ b/core/src/index/unittest/faiss_ori/CMakeLists.txt @@ -8,7 +8,6 @@ if (KNOWHERE_GPU_VERSION) set(depend_libs faiss - arrow ${ARROW_LIB_DIR}/libjemalloc_pic.a ) if (FAISS_WITH_MKL) set(depend_libs ${depend_libs} diff --git a/core/src/index/unittest/test_idmap.cpp b/core/src/index/unittest/test_idmap.cpp index 1ed750d9a8..b35155f577 100644 --- a/core/src/index/unittest/test_idmap.cpp +++ b/core/src/index/unittest/test_idmap.cpp @@ -18,7 +18,6 @@ #include #include -#include "knowhere/adapter/Structure.h" #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/IndexIDMAP.h" #ifdef MILVUS_GPU_VERSION diff --git a/core/src/index/unittest/test_ivf.cpp b/core/src/index/unittest/test_ivf.cpp index 7438d5656a..d24fc0197c 100644 --- a/core/src/index/unittest/test_ivf.cpp +++ b/core/src/index/unittest/test_ivf.cpp @@ -24,6 +24,7 @@ #include #endif +#include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" #include "knowhere/common/Timer.h" @@ -165,6 +166,7 @@ TEST_P(IVFTest, ivf_serialize) { } } +// TODO(linxj): deprecated #ifdef MILVUS_GPU_VERSION TEST_P(IVFTest, clone_test) { assert(!xb.empty()); @@ -182,8 +184,8 @@ TEST_P(IVFTest, clone_test) { // PrintResult(result, nq, k); auto AssertEqual = [&](knowhere::DatasetPtr p1, knowhere::DatasetPtr p2) { - auto ids_p1 = p1->ids(); - auto ids_p2 = p2->ids(); + auto ids_p1 = p1->Get(knowhere::meta::IDS); + auto ids_p2 = p2->Get(knowhere::meta::IDS); for (int i = 0; i < nq * k; ++i) { EXPECT_EQ(*((int64_t*)(ids_p2) + i), *((int64_t*)(ids_p1) + i)); diff --git a/core/src/index/unittest/test_sptag.cpp b/core/src/index/unittest/test_sptag.cpp index d472dab0d7..c2b6c9916f 100644 --- a/core/src/index/unittest/test_sptag.cpp +++ b/core/src/index/unittest/test_sptag.cpp @@ -19,9 +19,8 @@ #include #include - #include "knowhere/adapter/SptagAdapter.h" -#include "knowhere/adapter/Structure.h" +#include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/IndexSPTAG.h" #include "knowhere/index/vector_index/helpers/Definitions.h" @@ -76,10 +75,8 @@ TEST_P(SPTAGTest, sptag_basic) { AssertAnns(result, nq, k); { - // auto ids = result->array()[0]; - // auto dists = result->array()[1]; - auto ids = result->ids(); - auto dists = result->dist(); + auto ids = result->Get(knowhere::meta::IDS); + auto dist = result->Get(knowhere::meta::DISTANCE); std::stringstream ss_id; std::stringstream ss_dist; @@ -88,7 +85,7 @@ TEST_P(SPTAGTest, sptag_basic) { // ss_id << *ids->data()->GetValues(1, i * k + j) << " "; // ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; ss_id << *((int64_t*)(ids) + i * k + j) << " "; - ss_dist << *((float*)(dists) + i * k + j) << " "; + ss_dist << *((float*)(dist) + i * k + j) << " "; } ss_id << std::endl; ss_dist << std::endl; diff --git a/core/src/index/unittest/utils.cpp b/core/src/index/unittest/utils.cpp index a2ff6fd829..1f1bd2e4ec 100644 --- a/core/src/index/unittest/utils.cpp +++ b/core/src/index/unittest/utils.cpp @@ -16,6 +16,7 @@ // under the License. #include "unittest/utils.h" +#include "knowhere/adapter/VectorAdapter.h" #include #include @@ -120,38 +121,27 @@ FileIOWriter::operator()(void* ptr, size_t size) { } knowhere::DatasetPtr -generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids) { - std::vector shape{nb, dim}; - auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{knowhere::ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); - - auto id_array = knowhere::ConstructInt64Array((uint8_t*)ids, nb * sizeof(int64_t)); - std::vector arrays{id_array}; - std::vector array_fields{knowhere::ConstructInt64Field("id")}; - auto array_schema = std::make_shared(tensor_fields); - - auto dataset = - std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema); - return dataset; +generate_dataset(int64_t nb, int64_t dim, const float* xb, const int64_t* ids) { + auto ret_ds = std::make_shared(); + ret_ds->Set(knowhere::meta::ROWS, nb); + ret_ds->Set(knowhere::meta::DIM, dim); + ret_ds->Set(knowhere::meta::TENSOR, xb); + ret_ds->Set(knowhere::meta::IDS, ids); + return ret_ds; } knowhere::DatasetPtr -generate_query_dataset(int64_t nb, int64_t dim, float* xb) { - std::vector shape{nb, dim}; - auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{knowhere::ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); - - auto dataset = std::make_shared(std::move(tensors), tensor_schema); - return dataset; +generate_query_dataset(int64_t nb, int64_t dim, const float* xb) { + auto ret_ds = std::make_shared(); + ret_ds->Set(knowhere::meta::ROWS, nb); + ret_ds->Set(knowhere::meta::DIM, dim); + ret_ds->Set(knowhere::meta::TENSOR, xb); + return ret_ds; } void AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) { - auto ids = result->ids(); + auto ids = result->Get(knowhere::meta::IDS); for (auto i = 0; i < nq; i++) { EXPECT_EQ(i, *((int64_t*)(ids) + i * k)); // EXPECT_EQ(i, *(ids->data()->GetValues(1, i * k))); @@ -160,8 +150,8 @@ AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) { void PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) { - auto ids = result->ids(); - auto dists = result->dist(); + auto ids = result->Get(knowhere::meta::IDS); + auto dist = result->Get(knowhere::meta::DISTANCE); std::stringstream ss_id; std::stringstream ss_dist; @@ -170,7 +160,7 @@ PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) { // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; ss_id << *((int64_t*)(ids) + i * k + j) << " "; - ss_dist << *((float*)(dists) + i * k + j) << " "; + ss_dist << *((float*)(dist) + i * k + j) << " "; } ss_id << std::endl; ss_dist << std::endl; diff --git a/core/src/index/unittest/utils.h b/core/src/index/unittest/utils.h index 03fd157222..8f5b4fcba8 100644 --- a/core/src/index/unittest/utils.h +++ b/core/src/index/unittest/utils.h @@ -23,7 +23,7 @@ #include #include -#include "knowhere/adapter/Structure.h" +#include "knowhere/common/Dataset.h" #include "knowhere/common/Log.h" class DataGen { @@ -63,10 +63,10 @@ extern void InitLog(); knowhere::DatasetPtr -generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids); +generate_dataset(int64_t nb, int64_t dim, const float* xb, const int64_t* ids); knowhere::DatasetPtr -generate_query_dataset(int64_t nb, int64_t dim, float* xb); +generate_query_dataset(int64_t nb, int64_t dim, const float* xb); void AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k); diff --git a/core/src/wrapper/DataTransfer.cpp b/core/src/wrapper/DataTransfer.cpp index 5eb83290d1..2dca383b6e 100644 --- a/core/src/wrapper/DataTransfer.cpp +++ b/core/src/wrapper/DataTransfer.cpp @@ -16,42 +16,30 @@ // under the License. #include "wrapper/DataTransfer.h" +#include "knowhere/adapter/VectorAdapter.h" #include -#include -#include namespace milvus { namespace engine { knowhere::DatasetPtr GenDatasetWithIds(const int64_t& nb, const int64_t& dim, const float* xb, const int64_t* ids) { - std::vector shape{nb, dim}; - auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{knowhere::ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); - - auto id_array = knowhere::ConstructInt64Array((uint8_t*)ids, nb * sizeof(int64_t)); - std::vector arrays{id_array}; - std::vector array_fields{knowhere::ConstructInt64Field("id")}; - auto array_schema = std::make_shared(tensor_fields); - - auto dataset = - std::make_shared(std::move(arrays), array_schema, std::move(tensors), tensor_schema); - return dataset; + auto ret_ds = std::make_shared(); + ret_ds->Set(knowhere::meta::ROWS, nb); + ret_ds->Set(knowhere::meta::DIM, dim); + ret_ds->Set(knowhere::meta::TENSOR, xb); + ret_ds->Set(knowhere::meta::IDS, ids); + return ret_ds; } knowhere::DatasetPtr GenDataset(const int64_t& nb, const int64_t& dim, const float* xb) { - std::vector shape{nb, dim}; - auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape); - std::vector tensors{tensor}; - std::vector tensor_fields{knowhere::ConstructFloatField("data")}; - auto tensor_schema = std::make_shared(tensor_fields); - - auto dataset = std::make_shared(std::move(tensors), tensor_schema); - return dataset; + auto ret_ds = std::make_shared(); + ret_ds->Set(knowhere::meta::ROWS, nb); + ret_ds->Set(knowhere::meta::DIM, dim); + ret_ds->Set(knowhere::meta::TENSOR, xb); + return ret_ds; } } // namespace engine diff --git a/core/src/wrapper/DataTransfer.h b/core/src/wrapper/DataTransfer.h index e945eaa6db..cfdf84ccd0 100644 --- a/core/src/wrapper/DataTransfer.h +++ b/core/src/wrapper/DataTransfer.h @@ -17,7 +17,7 @@ #pragma once -#include "knowhere/adapter/Structure.h" +#include "knowhere/common/Dataset.h" namespace milvus { namespace engine { diff --git a/core/src/wrapper/VecImpl.cpp b/core/src/wrapper/VecImpl.cpp index dda7452cd0..caa59611a0 100644 --- a/core/src/wrapper/VecImpl.cpp +++ b/core/src/wrapper/VecImpl.cpp @@ -17,6 +17,7 @@ #include "wrapper/VecImpl.h" #include "DataTransfer.h" +#include "knowhere/adapter/VectorAdapter.h" #include "knowhere/common/Exception.h" #include "knowhere/index/vector_index/IndexIDMAP.h" #include "utils/Log.h" @@ -86,9 +87,6 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i Config search_cfg = cfg; auto res = index_->Search(dataset, search_cfg); - // auto ids_array = res->array()[0]; - // auto dis_array = res->array()[1]; - //{ // auto& ids = ids_array; // auto& dists = dis_array; @@ -110,10 +108,12 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i // auto p_dist = dis_array->data()->GetValues(1, 0); // TODO(linxj): avoid copy here. - memcpy(ids, res->ids(), sizeof(int64_t) * nq * k); - memcpy(dist, res->dist(), sizeof(float) * nq * k); - free(res->ids()); - free(res->dist()); + auto res_ids = res->Get(knowhere::meta::IDS); + auto res_dist = res->Get(knowhere::meta::DISTANCE); + memcpy(ids, res_ids, sizeof(int64_t) * nq * k); + memcpy(dist, res_dist, sizeof(float) * nq * k); + free(res_ids); + free(res_dist); } catch (knowhere::KnowhereException& e) { WRAPPER_LOG_ERROR << e.what(); return Status(KNOWHERE_UNEXPECTED_ERROR, e.what()); @@ -200,7 +200,7 @@ VecIndexImpl::GetDeviceId() { #endif } -float* +const float* BFIndex::GetRawVectors() { auto raw_index = std::dynamic_pointer_cast(index_); if (raw_index) { @@ -209,7 +209,7 @@ BFIndex::GetRawVectors() { return nullptr; } -int64_t* +const int64_t* BFIndex::GetRawIds() { return std::static_pointer_cast(index_)->GetRawIds(); } diff --git a/core/src/wrapper/VecImpl.h b/core/src/wrapper/VecImpl.h index a6d81cba9f..0042625d97 100644 --- a/core/src/wrapper/VecImpl.h +++ b/core/src/wrapper/VecImpl.h @@ -86,14 +86,14 @@ class BFIndex : public VecIndexImpl { ErrorCode Build(const Config& cfg); - float* + const float* GetRawVectors(); Status BuildAll(const int64_t& nb, const float* xb, const int64_t* ids, const Config& cfg, const int64_t& nt, const float* xt) override; - int64_t* + const int64_t* GetRawIds(); };