Remove Arrow (#817)

* replace arrow

* format

* update changelog

* code-style

* fix bug

* fix 2

* fix 3

* fix 4

* change build timeout on Jenkinsfile
This commit is contained in:
Tinkerrr 2019-12-25 15:34:51 +08:00 committed by Jin Hai
parent 685d199ae9
commit d37670bd3e
36 changed files with 224 additions and 821 deletions

View File

@ -18,6 +18,7 @@ Please mark all change in change log and use the issue from GitHub
## Improvement
- \#738 - Use Openblas / lapack from apt install
- \#758 - Enhance config description
- \#791 - Remove Arrow
## Task

View File

@ -1,4 +1,4 @@
timeout(time: 60, unit: 'MINUTES') {
timeout(time: 75, unit: 'MINUTES') {
dir ("ci/scripts") {
withCredentials([usernamePassword(credentialsId: "${params.JFROG_CREDENTIALS_ID}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
def checkResult = sh(script: "./check_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache", returnStatus: true)

View File

@ -73,7 +73,7 @@ Note that this requires linking Boost statically" OFF)
define_option(KNOWHERE_BOOST_HEADER_ONLY "Use only BOOST headers" OFF)
define_option(KNOWHERE_WITH_ARROW "Build with ARROW" ON)
define_option(KNOWHERE_WITH_ARROW "Build with ARROW" OFF)
define_option(KNOWHERE_WITH_FAISS "Build with FAISS library" ON)

View File

@ -22,8 +22,7 @@ endif ()
set(external_srcs
knowhere/adapter/SptagAdapter.cpp
knowhere/adapter/Structure.cpp
knowhere/adapter/ArrowAdapter.cpp
knowhere/adapter/VectorAdapter.cpp
knowhere/common/Exception.cpp
knowhere/common/Timer.cpp
)
@ -49,8 +48,6 @@ set(index_srcs
set(depend_libs
SPTAGLibStatic
faiss
arrow
${ARROW_LIB_DIR}/libjemalloc_pic.a
gomp
gfortran
pthread

View File

@ -1,53 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "knowhere/adapter/ArrowAdapter.h"
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr& origin) {
ArrayPtr copy = nullptr;
auto copy_data = origin->data()->Copy();
switch (origin->type_id()) {
#define DEFINE_TYPE(type, clazz) \
case arrow::Type::type: { \
copy = std::make_shared<arrow::clazz>(copy_data); \
}
DEFINE_TYPE(BOOL, BooleanArray)
DEFINE_TYPE(BINARY, BinaryArray)
DEFINE_TYPE(FIXED_SIZE_BINARY, FixedSizeBinaryArray)
DEFINE_TYPE(DECIMAL, Decimal128Array)
DEFINE_TYPE(FLOAT, NumericArray<arrow::FloatType>)
DEFINE_TYPE(INT64, NumericArray<arrow::Int64Type>)
default:
break;
}
return copy;
}
SchemaPtr
CopySchema(const SchemaPtr& origin) {
std::vector<std::shared_ptr<Field>> fields;
for (auto& field : origin->fields()) {
auto copy = std::make_shared<Field>(field->name(), field->type(), field->nullable(), nullptr);
fields.emplace_back(copy);
}
return std::make_shared<Schema>(std::move(fields));
}
} // namespace knowhere

View File

@ -16,17 +16,15 @@
// under the License.
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/adapter/Structure.h"
#include "knowhere/index/vector_index/helpers/Definitions.h"
#include "VectorAdapter.h"
namespace knowhere {
std::shared_ptr<SPTAG::MetadataSet>
ConvertToMetadataSet(const DatasetPtr& dataset) {
auto array = dataset->array()[0];
auto elems = array->length();
auto elems = dataset->Get<int64_t>(meta::ROWS);
auto p_data = dataset->Get<const int64_t*>(meta::IDS);
auto p_data = array->data()->GetValues<int64_t>(1, 0);
auto p_offset = (int64_t*)malloc(sizeof(int64_t) * elems);
for (auto i = 0; i <= elems; ++i) p_offset[i] = i * 8;
@ -39,31 +37,21 @@ ConvertToMetadataSet(const DatasetPtr& dataset) {
std::shared_ptr<SPTAG::VectorSet>
ConvertToVectorSet(const DatasetPtr& dataset) {
auto tensor = dataset->tensor()[0];
GETTENSOR(dataset);
size_t num_bytes = rows * dim * sizeof(float);
SPTAG::ByteArray byte_array((uint8_t*)p_data, num_bytes, false);
auto p_data = tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
auto num_bytes = tensor->size() * sizeof(float);
SPTAG::ByteArray byte_array(p_data, num_bytes, false);
auto vectorset =
std::make_shared<SPTAG::BasicVectorSet>(byte_array, SPTAG::VectorValueType::Float, dimension, rows);
auto vectorset = std::make_shared<SPTAG::BasicVectorSet>(byte_array, SPTAG::VectorValueType::Float, dim, rows);
return vectorset;
}
std::vector<SPTAG::QueryResult>
ConvertToQueryResult(const DatasetPtr& dataset, const Config& config) {
auto tensor = dataset->tensor()[0];
auto p_data = (float*)tensor->raw_mutable_data();
auto dimension = tensor->shape()[1];
auto rows = tensor->shape()[0];
GETTENSOR(dataset);
std::vector<SPTAG::QueryResult> query_results(rows, SPTAG::QueryResult(nullptr, config->k, true));
for (auto i = 0; i < rows; ++i) {
query_results[i].SetTarget(&p_data[i * dimension]);
query_results[i].SetTarget(&p_data[i * dim]);
}
return query_results;
@ -74,9 +62,10 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
auto k = query_results[0].GetResultNum();
auto elems = query_results.size() * k;
auto p_id = (int64_t*)malloc(sizeof(int64_t) * elems);
auto p_dist = (float*)malloc(sizeof(float) * elems);
// TODO: throw if malloc failed.
size_t p_id_size = sizeof(int64_t) * elems;
size_t p_dist_size = sizeof(float) * elems;
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
#pragma omp parallel for
for (auto i = 0; i < query_results.size(); ++i) {
@ -89,35 +78,10 @@ ConvertToDataset(std::vector<SPTAG::QueryResult> query_results) {
}
}
// auto id_buf = MakeMutableBufferSmart((uint8_t*)p_id, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)p_dist, sizeof(float) * elems);
//
// // TODO: magic
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
// // auto id_array_data = std::make_shared<ArrayData>(int64_type, sizeof(int64_t) * elems, id_bufs);
// // auto dist_array_data = std::make_shared<ArrayData>(float_type, sizeof(float) * elems, dist_bufs);
//
// // auto ids = ConstructInt64Array((uint8_t*)p_id, sizeof(int64_t) * elems);
// // auto dists = ConstructFloatArray((uint8_t*)p_dist, sizeof(float) * elems);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// auto field_id = std::make_shared<Field>("id", std::make_shared<arrow::Int64Type>());
// auto field_dist = std::make_shared<Field>("dist", std::make_shared<arrow::FloatType>());
// std::vector<FieldPtr> fields{field_id, field_dist};
// auto schema = std::make_shared<Schema>(fields);
//
// return std::make_shared<Dataset>(array, schema);
return std::make_shared<Dataset>((void*)p_id, (void*)p_dist);
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
return ret_ds;
}
} // namespace knowhere

View File

@ -21,6 +21,7 @@
#include <memory>
#include <vector>
#include "knowhere/common/Config.h"
#include "knowhere/common/Dataset.h"
namespace knowhere {

View File

@ -1,87 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "knowhere/adapter/Structure.h"
#include <string>
#include <vector>
namespace knowhere {
ArrayPtr
ConstructInt64ArraySmart(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(int64_t), id_buf);
return std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
}
ArrayPtr
ConstructFloatArraySmart(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBufferSmart(data, size)};
auto type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(float), id_buf);
return std::make_shared<NumericArray<arrow::FloatType>>(id_array_data);
}
TensorPtr
ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBufferSmart(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
ArrayPtr
ConstructInt64Array(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::Int64Type>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(int64_t), id_buf);
return std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
}
ArrayPtr
ConstructFloatArray(uint8_t* data, int64_t size) {
// TODO: magic
std::vector<BufferPtr> id_buf{nullptr, MakeMutableBuffer(data, size)};
auto type = std::make_shared<arrow::FloatType>();
auto id_array_data = arrow::ArrayData::Make(type, size / sizeof(float), id_buf);
return std::make_shared<NumericArray<arrow::FloatType>>(id_array_data);
}
TensorPtr
ConstructFloatTensor(uint8_t* data, int64_t size, std::vector<int64_t> shape) {
auto buffer = MakeMutableBuffer(data, size);
auto float_type = std::make_shared<arrow::FloatType>();
return std::make_shared<Tensor>(float_type, buffer, shape);
}
FieldPtr
ConstructInt64Field(const std::string& name) {
auto type = std::make_shared<arrow::Int64Type>();
return std::make_shared<Field>(name, type);
}
FieldPtr
ConstructFloatField(const std::string& name) {
auto type = std::make_shared<arrow::FloatType>();
return std::make_shared<Field>(name, type);
}
} // namespace knowhere

View File

@ -1,52 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "knowhere/common/Dataset.h"
namespace knowhere {
extern ArrayPtr
ConstructInt64ArraySmart(uint8_t* data, int64_t size);
extern ArrayPtr
ConstructFloatArraySmart(uint8_t* data, int64_t size);
extern TensorPtr
ConstructFloatTensorSmart(uint8_t* data, int64_t size, std::vector<int64_t> shape);
extern ArrayPtr
ConstructInt64Array(uint8_t* data, int64_t size);
extern ArrayPtr
ConstructFloatArray(uint8_t* data, int64_t size);
extern TensorPtr
ConstructFloatTensor(uint8_t* data, int64_t size, std::vector<int64_t> shape);
extern FieldPtr
ConstructInt64Field(const std::string& name);
extern FieldPtr
ConstructFloatField(const std::string& name);
} // namespace knowhere

View File

@ -15,20 +15,16 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "knowhere/common/Array.h"
#include "knowhere/adapter/VectorAdapter.h"
namespace knowhere {
ArrayPtr
CopyArray(const ArrayPtr& origin);
SchemaPtr
CopySchema(const SchemaPtr& origin);
namespace meta {
const char* DIM = "dim";
const char* TENSOR = "tensor";
const char* ROWS = "rows";
const char* IDS = "ids";
const char* DISTANCE = "distance";
}; // namespace meta
} // namespace knowhere

View File

@ -17,12 +17,22 @@
#pragma once
#include <string>
#include "knowhere/common/Dataset.h"
namespace knowhere {
#define GETTENSOR(dataset) \
auto tensor = dataset->tensor()[0]; \
auto p_data = tensor->raw_data(); \
auto dim = tensor->shape()[1]; \
auto rows = tensor->shape()[0];
namespace meta {
extern const char* DIM;
extern const char* TENSOR;
extern const char* ROWS;
extern const char* IDS;
extern const char* DISTANCE;
}; // namespace meta
#define GETTENSOR(dataset) \
auto dim = dataset->Get<int64_t>(meta::DIM); \
auto rows = dataset->Get<int64_t>(meta::ROWS); \
auto p_data = dataset->Get<const float*>(meta::TENSOR);
} // namespace knowhere

View File

@ -1,50 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <arrow/array.h>
#include <memory>
#include "Schema.h"
namespace knowhere {
using ArrayData = arrow::ArrayData;
using ArrayDataPtr = std::shared_ptr<ArrayData>;
using Array = arrow::Array;
using ArrayPtr = std::shared_ptr<Array>;
using BooleanArray = arrow::BooleanArray;
using BooleanArrayPtr = std::shared_ptr<arrow::BooleanArray>;
template <typename DType>
using NumericArray = arrow::NumericArray<DType>;
template <typename DType>
using NumericArrayPtr = std::shared_ptr<arrow::NumericArray<DType>>;
using BinaryArray = arrow::BinaryArray;
using BinaryArrayPtr = std::shared_ptr<arrow::BinaryArray>;
using FixedSizeBinaryArray = arrow::FixedSizeBinaryArray;
using FixedSizeBinaryArrayPtr = std::shared_ptr<arrow::FixedSizeBinaryArray>;
using Decimal128Array = arrow::Decimal128Array;
using Decimal128ArrayPtr = std::shared_ptr<arrow::Decimal128Array>;
} // namespace knowhere

View File

@ -1,61 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <arrow/buffer.h>
namespace knowhere {
using Buffer = arrow::Buffer;
using BufferPtr = std::shared_ptr<Buffer>;
using MutableBuffer = arrow::MutableBuffer;
using MutableBufferPtr = std::shared_ptr<MutableBuffer>;
namespace internal {
struct BufferDeleter {
void
operator()(Buffer* buffer) {
free((void*)buffer->data());
}
};
} // namespace internal
inline BufferPtr
MakeBufferSmart(uint8_t* data, const int64_t size) {
return BufferPtr(new Buffer(data, size), internal::BufferDeleter());
}
inline MutableBufferPtr
MakeMutableBufferSmart(uint8_t* data, const int64_t size) {
return MutableBufferPtr(new MutableBuffer(data, size), internal::BufferDeleter());
}
inline BufferPtr
MakeBuffer(uint8_t* data, const int64_t size) {
return std::make_shared<Buffer>(data, size);
}
inline MutableBufferPtr
MakeMutableBuffer(uint8_t* data, const int64_t size) {
return std::make_shared<MutableBuffer>(data, size);
}
} // namespace knowhere

View File

@ -17,149 +17,105 @@
#pragma once
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <sstream>
#include <string>
#include <typeindex>
#include <utility>
#include <vector>
#include "Array.h"
#include "Buffer.h"
#include "Config.h"
#include "Schema.h"
#include "Tensor.h"
#include "knowhere/adapter/ArrowAdapter.h"
namespace knowhere {
class Dataset;
struct BaseValue;
using BasePtr = std::unique_ptr<BaseValue>;
struct BaseValue {
virtual ~BaseValue() = default;
using DatasetPtr = std::shared_ptr<Dataset>;
// virtual BasePtr
// Clone() const = 0;
};
template <typename T>
struct AnyValue : public BaseValue {
T data_;
template <typename U>
explicit AnyValue(U&& value) : data_(std::forward<U>(value)) {
}
// BasePtr
// Clone() const {
// return BasePtr(data_);
// }
};
struct Value {
std::type_index type_;
BasePtr data_;
template <typename U,
class = typename std::enable_if<!std::is_same<typename std::decay<U>::type, Value>::value, U>::type>
explicit Value(U&& value)
: data_(new AnyValue<typename std::decay<U>::type>(std::forward<U>(value))),
type_(std::type_index(typeid(typename std::decay<U>::type))) {
}
template <typename U>
bool
Is() const {
return type_ == std::type_index(typeid(U));
}
template <typename U>
U&
AnyCast() {
if (!Is<U>()) {
std::stringstream ss;
ss << "Can't cast t " << type_.name() << " to " << typeid(U).name();
throw std::logic_error(ss.str());
}
auto derived = dynamic_cast<AnyValue<U>*>(data_.get());
return derived->data_;
}
};
using ValuePtr = std::shared_ptr<Value>;
class Dataset {
public:
Dataset() = default;
Dataset(std::vector<ArrayPtr>&& array, SchemaPtr array_schema, std::vector<TensorPtr>&& tensor,
SchemaPtr tensor_schema)
: array_(std::move(array)),
array_schema_(std::move(array_schema)),
tensor_(std::move(tensor)),
tensor_schema_(std::move(tensor_schema)) {
template <typename T>
void
Set(const std::string& k, T&& v) {
std::lock_guard<std::mutex> lk(mutex_);
auto value = std::make_shared<Value>(std::forward<T>(v));
data_[k] = value;
}
Dataset(std::vector<ArrayPtr> array, SchemaPtr array_schema)
: array_(std::move(array)), array_schema_(std::move(array_schema)) {
}
Dataset(std::vector<TensorPtr> tensor, SchemaPtr tensor_schema)
: tensor_(std::move(tensor)), tensor_schema_(std::move(tensor_schema)) {
}
Dataset(void* ids, void* dists) : ids_(ids), dists_(dists) {
}
Dataset(const Dataset&) = delete;
Dataset&
operator=(const Dataset&) = delete;
DatasetPtr
Clone() {
auto dataset = std::make_shared<Dataset>();
std::vector<ArrayPtr> clone_array;
for (auto& array : array_) {
clone_array.emplace_back(CopyArray(array));
template <typename T>
T
Get(const std::string& k) {
std::lock_guard<std::mutex> lk(mutex_);
auto finder = data_.find(k);
if (finder != data_.end()) {
return finder->second->AnyCast<T>();
} else {
throw std::logic_error("Can't find this key");
}
dataset->set_array(clone_array);
std::vector<TensorPtr> clone_tensor;
for (auto& tensor : tensor_) {
auto buffer = tensor->data();
std::shared_ptr<Buffer> copy_buffer;
// TODO: checkout copy success;
buffer->Copy(0, buffer->size(), &copy_buffer);
auto copy = std::make_shared<Tensor>(tensor->type(), copy_buffer, tensor->shape());
clone_tensor.emplace_back(copy);
}
dataset->set_tensor(clone_tensor);
if (array_schema_)
dataset->set_array_schema(CopySchema(array_schema_));
if (tensor_schema_)
dataset->set_tensor_schema(CopySchema(tensor_schema_));
return dataset;
}
public:
const std::vector<ArrayPtr>&
array() const {
return array_;
const std::map<std::string, ValuePtr>&
data() const {
return data_;
}
void
set_array(std::vector<ArrayPtr> array) {
array_ = std::move(array);
}
const std::vector<TensorPtr>&
tensor() const {
return tensor_;
}
void
set_tensor(std::vector<TensorPtr> tensor) {
tensor_ = std::move(tensor);
}
SchemaConstPtr
array_schema() const {
return array_schema_;
}
void
set_array_schema(SchemaPtr array_schema) {
array_schema_ = std::move(array_schema);
}
SchemaConstPtr
tensor_schema() const {
return tensor_schema_;
}
void
set_tensor_schema(SchemaPtr tensor_schema) {
tensor_schema_ = std::move(tensor_schema);
}
void*
ids() {
return ids_;
}
void*
dist() {
return dists_;
}
// const Config &
// meta() const { return meta_; }
// void
// set_meta(Config meta) {
// meta_ = std::move(meta);
//}
private:
std::vector<ArrayPtr> array_;
SchemaPtr array_schema_;
std::vector<TensorPtr> tensor_;
SchemaPtr tensor_schema_;
// TODO(yukun): using smart pointer
void* ids_;
void* dists_;
// Config meta_;
std::mutex mutex_;
std::map<std::string, ValuePtr> data_;
};
using DatasetPtr = std::shared_ptr<Dataset>;
} // namespace knowhere

View File

@ -1,33 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <arrow/type.h>
namespace knowhere {
using DataType = arrow::DataType;
using Field = arrow::Field;
using FieldPtr = std::shared_ptr<arrow::Field>;
using Schema = arrow::Schema;
using SchemaPtr = std::shared_ptr<Schema>;
using SchemaConstPtr = std::shared_ptr<const Schema>;
} // namespace knowhere

View File

@ -1,29 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <arrow/tensor.h>
namespace knowhere {
using Tensor = arrow::Tensor;
using TensorPtr = std::shared_ptr<Tensor>;
} // namespace knowhere

View File

@ -127,7 +127,7 @@ GPUIDMAP::search_impl(int64_t n, const float* data, int64_t k, float* distances,
}
void
GPUIDMAP::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) {
GPUIDMAP::GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config) {
int64_t K = k + 1;
auto ntotal = Count();

View File

@ -49,7 +49,7 @@ class GPUIDMAP : public IDMAP, public GPUIndex {
CopyGpuToGpu(const int64_t& device_id, const Config& config) override;
void
GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config);
GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config);
protected:
void

View File

@ -66,38 +66,21 @@ IDMAP::Search(const DatasetPtr& dataset, const Config& config) {
if (!index_) {
KNOWHERE_THROW_MSG("index not initialize");
}
config->CheckValid();
// auto metric_type = config["metric_type"].as_string() == "L2" ?
// faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
// index_->metric_type = metric_type;
GETTENSOR(dataset)
auto elems = rows * config->k;
auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems);
auto res_dis = (float*)malloc(sizeof(float) * elems);
size_t p_id_size = sizeof(int64_t) * elems;
size_t p_dist_size = sizeof(float) * elems;
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
search_impl(rows, (float*)p_data, config->k, res_dis, res_ids, Config());
search_impl(rows, (float*)p_data, config->k, p_dist, p_id, Config());
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
return ret_ds;
}
void
@ -114,10 +97,7 @@ IDMAP::Add(const DatasetPtr& dataset, const Config& config) {
std::lock_guard<std::mutex> lk(mutex_);
GETTENSOR(dataset)
// TODO: magic here.
auto array = dataset->array()[0];
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
auto p_ids = dataset->Get<const int64_t*>(meta::IDS);
index_->add_with_ids(rows, (float*)p_data, p_ids);
}
@ -130,9 +110,6 @@ IDMAP::AddWithoutId(const DatasetPtr& dataset, const Config& config) {
std::lock_guard<std::mutex> lk(mutex_);
GETTENSOR(dataset)
// TODO: magic here.
auto array = dataset->array()[0];
std::vector<int64_t> new_ids(rows);
for (int i = 0; i < rows; ++i) {
new_ids[i] = i;
@ -151,8 +128,7 @@ IDMAP::Dimension() {
return index_->d;
}
// TODO(linxj): return const pointer
float*
const float*
IDMAP::GetRawVectors() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());
@ -163,8 +139,7 @@ IDMAP::GetRawVectors() {
}
}
// TODO(linxj): return const pointer
int64_t*
const int64_t*
IDMAP::GetRawIds() {
try {
auto file_index = dynamic_cast<faiss::IndexIDMap*>(index_.get());

View File

@ -64,10 +64,10 @@ class IDMAP : public VectorIndex, public FaissBaseIndex {
void
Seal() override;
virtual float*
virtual const float*
GetRawVectors();
virtual int64_t*
virtual const int64_t*
GetRawIds();
protected:

View File

@ -73,8 +73,7 @@ IVF::Add(const DatasetPtr& dataset, const Config& config) {
std::lock_guard<std::mutex> lk(mutex_);
GETTENSOR(dataset)
auto array = dataset->array()[0];
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
auto p_ids = dataset->Get<const int64_t*>(meta::IDS);
index_->add_with_ids(rows, (float*)p_data, p_ids);
}
@ -121,10 +120,13 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
try {
auto elems = rows * search_cfg->k;
auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems);
auto res_dis = (float*)malloc(sizeof(float) * elems);
search_impl(rows, (float*)p_data, search_cfg->k, res_dis, res_ids, config);
size_t p_id_size = sizeof(int64_t) * elems;
size_t p_dist_size = sizeof(float) * elems;
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
search_impl(rows, (float*)p_data, search_cfg->k, p_dist, p_id, config);
// std::stringstream ss_res_id, ss_res_dist;
// for (int i = 0; i < 10; ++i) {
@ -139,23 +141,10 @@ IVF::Search(const DatasetPtr& dataset, const Config& config) {
// std::cout << ss_res_id.str() << std::endl;
// std::cout << ss_res_dist.str() << std::endl << std::endl;
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
//
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
return ret_ds;
} catch (faiss::FaissException& e) {
KNOWHERE_THROW_MSG(e.what());
} catch (std::exception& e) {
@ -195,7 +184,7 @@ IVF::Dimension() {
}
void
IVF::GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config) {
IVF::GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config) {
int64_t K = k + 1;
auto ntotal = Count();

View File

@ -57,7 +57,7 @@ class IVF : public VectorIndex, public FaissBaseIndex {
Search(const DatasetPtr& dataset, const Config& config) override;
void
GenGraph(float* data, const int64_t& k, Graph& graph, const Config& config);
GenGraph(const float* data, const int64_t& k, Graph& graph, const Config& config);
BinarySet
Serialize() override;

View File

@ -84,31 +84,19 @@ NSG::Search(const DatasetPtr& dataset, const Config& config) {
GETTENSOR(dataset)
auto elems = rows * build_cfg->k;
auto res_ids = (int64_t*)malloc(sizeof(int64_t) * elems);
auto res_dis = (float*)malloc(sizeof(float) * elems);
size_t p_id_size = sizeof(int64_t) * elems;
size_t p_dist_size = sizeof(float) * elems;
auto p_id = (int64_t*)malloc(p_id_size);
auto p_dist = (float*)malloc(p_dist_size);
algo::SearchParams s_params;
s_params.search_length = build_cfg->search_length;
index_->Search((float*)p_data, rows, dim, build_cfg->k, res_dis, res_ids, s_params);
index_->Search((float*)p_data, rows, dim, build_cfg->k, p_dist, p_id, s_params);
// auto id_buf = MakeMutableBufferSmart((uint8_t*)res_ids, sizeof(int64_t) * elems);
// auto dist_buf = MakeMutableBufferSmart((uint8_t*)res_dis, sizeof(float) * elems);
// std::vector<BufferPtr> id_bufs{nullptr, id_buf};
// std::vector<BufferPtr> dist_bufs{nullptr, dist_buf};
//
// auto int64_type = std::make_shared<arrow::Int64Type>();
// auto float_type = std::make_shared<arrow::FloatType>();
//
// auto id_array_data = arrow::ArrayData::Make(int64_type, elems, id_bufs);
// auto dist_array_data = arrow::ArrayData::Make(float_type, elems, dist_bufs);
//
// auto ids = std::make_shared<NumericArray<arrow::Int64Type>>(id_array_data);
// auto dists = std::make_shared<NumericArray<arrow::FloatType>>(dist_array_data);
// std::vector<ArrayPtr> array{ids, dists};
//
// return std::make_shared<Dataset>(array, nullptr);
return std::make_shared<Dataset>((void*)res_ids, (void*)res_dis);
auto ret_ds = std::make_shared<Dataset>();
ret_ds->Set(meta::IDS, p_id);
ret_ds->Set(meta::DISTANCE, p_dist);
return ret_ds;
}
IndexModelPtr
@ -123,7 +111,7 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) {
idmap->Train(config);
idmap->AddWithoutId(dataset, config);
Graph knng;
float* raw_data = idmap->GetRawVectors();
const float* raw_data = idmap->GetRawVectors();
#ifdef MILVUS_GPU_VERSION
if (build_cfg->gpu_id == knowhere::INVALID_VALUE) {
auto preprocess_index = std::make_shared<IVF>();
@ -150,8 +138,7 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) {
b_params.out_degree = build_cfg->out_degree;
b_params.search_length = build_cfg->search_length;
auto array = dataset->array()[0];
auto p_ids = array->data()->GetValues<int64_t>(1, 0);
auto p_ids = dataset->Get<const int64_t*>(meta::IDS);
GETTENSOR(dataset)
index_ = std::make_shared<algo::NsgIndex>(dim, rows);

View File

@ -26,6 +26,7 @@
#undef mkdir
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexSPTAG.h"
#include "knowhere/index/vector_index/helpers/Definitions.h"
@ -88,42 +89,6 @@ CPUSPTAGRNG::Serialize() {
binary_set.Append("config", config, length);
binary_set.Append("graph", graph, index_blobs[2].Length());
// MemoryIOWriter writer;
// size_t len = 0;
// for (int i = 0; i < 6; ++i) {
// len = index_blobs[i].Length();
// assert(len != 0);
// writer(&len, sizeof(size_t), 1);
// writer(index_blobs[i].Data(), len, 1);
// len = 0;
// }
// writer(&length, sizeof(size_t), 1);
// writer(cstr, length, 1);
// auto data = std::make_shared<uint8_t>();
// data.reset(writer.data_);
// BinarySet binary_set;
// binary_set.Append("sptag", data, writer.total);
// MemoryIOWriter writer;
// size_t len = 0;
// for (int i = 0; i < 6; ++i) {
// if (i == 2) continue;
// len = index_blobs[i].Length();
// assert(len != 0);
// writer(&len, sizeof(size_t), 1);
// writer(index_blobs[i].Data(), len, 1);
// len = 0;
// }
// writer(&length, sizeof(size_t), 1);
// writer(cstr, length, 1);
// auto data = std::make_shared<uint8_t>();
// data.reset(writer.data_);
// BinarySet binary_set;
// binary_set.Append("sptag", data, writer.total);
// auto graph = std::make_shared<uint8_t>();
// graph.reset(static_cast<uint8_t*>(index_blobs[2].Data()));
// binary_set.Append("graph", graph, index_blobs[2].Length());
return binary_set;
}
@ -153,52 +118,6 @@ CPUSPTAGRNG::Load(const BinarySet& binary_set) {
auto config = binary_set.GetByName("config");
index_config = reinterpret_cast<char*>(config->data.get());
// std::vector<SPTAG::ByteArray> index_blobs;
// auto data = binary_set.GetByName("sptag");
// MemoryIOReader reader;
// reader.total = data->size;
// reader.data_ = data->data.get();
// size_t len = 0;
// for (int i = 0; i < 6; ++i) {
// reader(&len, sizeof(size_t), 1);
// assert(len != 0);
// auto binary = new uint8_t[len];
// reader(binary, len, 1);
// index_blobs.emplace_back(SPTAG::ByteArray(binary, len, true));
// len = 0;
// }
// reader(&len, sizeof(size_t), 1);
// assert(len != 0);
// auto config = new char[len];
// reader(config, len, 1);
// std::string index_config = config;
// delete[] config;
// std::vector<SPTAG::ByteArray> index_blobs;
// auto data = binary_set.GetByName("sptag");
// MemoryIOReader reader;
// reader.total = data->size;
// reader.data_ = data->data.get();
// size_t len = 0;
// for (int i = 0; i < 6; ++i) {
// if (i == 2) {
// auto graph = binary_set.GetByName("graph");
// index_blobs.emplace_back(SPTAG::ByteArray(graph->data.get(), graph->size, false));
// continue;
// }
// reader(&len, sizeof(size_t), 1);
// assert(len != 0);
// auto binary = new uint8_t[len];
// reader(binary, len, 1);
// index_blobs.emplace_back(SPTAG::ByteArray(binary, len, true));
// len = 0;
// }
// reader(&len, sizeof(size_t), 1);
// assert(len != 0);
// auto config = new char[len];
// reader(config, len, 1);
// std::string index_config = config;
// delete[] config;
index_ptr_->LoadIndex(index_config, index_blobs);
}
@ -213,7 +132,8 @@ CPUSPTAGRNG::Train(const DatasetPtr& origin, const Config& train_config) {
if (train_config != nullptr) {
train_config->CheckValid(); // throw exception
}
DatasetPtr dataset = origin->Clone();
DatasetPtr dataset = origin; // TODO(linxj): copy or reference?
// if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
// && preprocessor_) {
@ -301,11 +221,11 @@ CPUSPTAGRNG::Search(const DatasetPtr& dataset, const Config& config) {
if (config != nullptr) {
config->CheckValid(); // throw exception
}
auto tensor = dataset->tensor()[0];
auto p = (float*)tensor->raw_mutable_data();
auto p_data = dataset->Get<const float*>(meta::TENSOR);
for (auto i = 0; i < 10; ++i) {
for (auto j = 0; j < 10; ++j) {
std::cout << p[i * 10 + j] << " ";
std::cout << p_data[i * 10 + j] << " ";
}
std::cout << std::endl;
}

View File

@ -6,7 +6,6 @@ include_directories(${INDEX_SOURCE_DIR})
set(depend_libs
gtest gmock gtest_main gmock_main
faiss
arrow "${ARROW_LIB_DIR}/libjemalloc_pic.a"
)
if (FAISS_WITH_MKL)
set(depend_libs ${depend_libs}
@ -31,8 +30,7 @@ set(util_srcs
${MILVUS_THIRDPARTY_SRC}/easyloggingpp/easylogging++.cc
${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/FaissIO.cpp
${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/IndexParameter.cpp
${INDEX_SOURCE_DIR}/knowhere/knowhere/adapter/Structure.cpp
${INDEX_SOURCE_DIR}/knowhere/knowhere/adapter/ArrowAdapter.cpp
${INDEX_SOURCE_DIR}/knowhere/knowhere/adapter/VectorAdapter.cpp
${INDEX_SOURCE_DIR}/knowhere/knowhere/common/Exception.cpp
${INDEX_SOURCE_DIR}/knowhere/knowhere/common/Timer.cpp
${INDEX_SOURCE_DIR}/unittest/utils.cpp

View File

@ -13,7 +13,6 @@ if (KNOWHERE_GPU_VERSION)
set(depend_libs
faiss hdf5
arrow ${ARROW_LIB_DIR}/libjemalloc_pic.a
)
if (FAISS_WITH_MKL)
set(depend_libs ${depend_libs}

View File

@ -8,7 +8,6 @@ if (KNOWHERE_GPU_VERSION)
set(depend_libs
faiss
arrow ${ARROW_LIB_DIR}/libjemalloc_pic.a
)
if (FAISS_WITH_MKL)
set(depend_libs ${depend_libs}

View File

@ -18,7 +18,6 @@
#include <gtest/gtest.h>
#include <iostream>
#include "knowhere/adapter/Structure.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIDMAP.h"
#ifdef MILVUS_GPU_VERSION

View File

@ -24,6 +24,7 @@
#include <faiss/gpu/GpuIndexIVFFlat.h>
#endif
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/common/Timer.h"
@ -165,6 +166,7 @@ TEST_P(IVFTest, ivf_serialize) {
}
}
// TODO(linxj): deprecated
#ifdef MILVUS_GPU_VERSION
TEST_P(IVFTest, clone_test) {
assert(!xb.empty());
@ -182,8 +184,8 @@ TEST_P(IVFTest, clone_test) {
// PrintResult(result, nq, k);
auto AssertEqual = [&](knowhere::DatasetPtr p1, knowhere::DatasetPtr p2) {
auto ids_p1 = p1->ids();
auto ids_p2 = p2->ids();
auto ids_p1 = p1->Get<int64_t*>(knowhere::meta::IDS);
auto ids_p2 = p2->Get<int64_t*>(knowhere::meta::IDS);
for (int i = 0; i < nq * k; ++i) {
EXPECT_EQ(*((int64_t*)(ids_p2) + i), *((int64_t*)(ids_p1) + i));

View File

@ -19,9 +19,8 @@
#include <iostream>
#include <sstream>
#include "knowhere/adapter/SptagAdapter.h"
#include "knowhere/adapter/Structure.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexSPTAG.h"
#include "knowhere/index/vector_index/helpers/Definitions.h"
@ -76,10 +75,8 @@ TEST_P(SPTAGTest, sptag_basic) {
AssertAnns(result, nq, k);
{
// auto ids = result->array()[0];
// auto dists = result->array()[1];
auto ids = result->ids();
auto dists = result->dist();
auto ids = result->Get<int64_t*>(knowhere::meta::IDS);
auto dist = result->Get<float*>(knowhere::meta::DISTANCE);
std::stringstream ss_id;
std::stringstream ss_dist;
@ -88,7 +85,7 @@ TEST_P(SPTAGTest, sptag_basic) {
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
ss_id << *((int64_t*)(ids) + i * k + j) << " ";
ss_dist << *((float*)(dists) + i * k + j) << " ";
ss_dist << *((float*)(dist) + i * k + j) << " ";
}
ss_id << std::endl;
ss_dist << std::endl;

View File

@ -16,6 +16,7 @@
// under the License.
#include "unittest/utils.h"
#include "knowhere/adapter/VectorAdapter.h"
#include <gtest/gtest.h>
#include <memory>
@ -120,38 +121,27 @@ FileIOWriter::operator()(void* ptr, size_t size) {
}
knowhere::DatasetPtr
generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids) {
std::vector<int64_t> shape{nb, dim};
auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape);
std::vector<knowhere::TensorPtr> tensors{tensor};
std::vector<knowhere::FieldPtr> tensor_fields{knowhere::ConstructFloatField("data")};
auto tensor_schema = std::make_shared<knowhere::Schema>(tensor_fields);
auto id_array = knowhere::ConstructInt64Array((uint8_t*)ids, nb * sizeof(int64_t));
std::vector<knowhere::ArrayPtr> arrays{id_array};
std::vector<knowhere::FieldPtr> array_fields{knowhere::ConstructInt64Field("id")};
auto array_schema = std::make_shared<knowhere::Schema>(tensor_fields);
auto dataset =
std::make_shared<knowhere::Dataset>(std::move(arrays), array_schema, std::move(tensors), tensor_schema);
return dataset;
generate_dataset(int64_t nb, int64_t dim, const float* xb, const int64_t* ids) {
auto ret_ds = std::make_shared<knowhere::Dataset>();
ret_ds->Set(knowhere::meta::ROWS, nb);
ret_ds->Set(knowhere::meta::DIM, dim);
ret_ds->Set(knowhere::meta::TENSOR, xb);
ret_ds->Set(knowhere::meta::IDS, ids);
return ret_ds;
}
knowhere::DatasetPtr
generate_query_dataset(int64_t nb, int64_t dim, float* xb) {
std::vector<int64_t> shape{nb, dim};
auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape);
std::vector<knowhere::TensorPtr> tensors{tensor};
std::vector<knowhere::FieldPtr> tensor_fields{knowhere::ConstructFloatField("data")};
auto tensor_schema = std::make_shared<knowhere::Schema>(tensor_fields);
auto dataset = std::make_shared<knowhere::Dataset>(std::move(tensors), tensor_schema);
return dataset;
generate_query_dataset(int64_t nb, int64_t dim, const float* xb) {
auto ret_ds = std::make_shared<knowhere::Dataset>();
ret_ds->Set(knowhere::meta::ROWS, nb);
ret_ds->Set(knowhere::meta::DIM, dim);
ret_ds->Set(knowhere::meta::TENSOR, xb);
return ret_ds;
}
void
AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->ids();
auto ids = result->Get<int64_t*>(knowhere::meta::IDS);
for (auto i = 0; i < nq; i++) {
EXPECT_EQ(i, *((int64_t*)(ids) + i * k));
// EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
@ -160,8 +150,8 @@ AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
void
PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
auto ids = result->ids();
auto dists = result->dist();
auto ids = result->Get<int64_t*>(knowhere::meta::IDS);
auto dist = result->Get<float*>(knowhere::meta::DISTANCE);
std::stringstream ss_id;
std::stringstream ss_dist;
@ -170,7 +160,7 @@ PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
// ss_id << *(ids->data()->GetValues<int64_t>(1, i * k + j)) << " ";
// ss_dist << *(dists->data()->GetValues<float>(1, i * k + j)) << " ";
ss_id << *((int64_t*)(ids) + i * k + j) << " ";
ss_dist << *((float*)(dists) + i * k + j) << " ";
ss_dist << *((float*)(dist) + i * k + j) << " ";
}
ss_id << std::endl;
ss_dist << std::endl;

View File

@ -23,7 +23,7 @@
#include <string>
#include <vector>
#include "knowhere/adapter/Structure.h"
#include "knowhere/common/Dataset.h"
#include "knowhere/common/Log.h"
class DataGen {
@ -63,10 +63,10 @@ extern void
InitLog();
knowhere::DatasetPtr
generate_dataset(int64_t nb, int64_t dim, float* xb, int64_t* ids);
generate_dataset(int64_t nb, int64_t dim, const float* xb, const int64_t* ids);
knowhere::DatasetPtr
generate_query_dataset(int64_t nb, int64_t dim, float* xb);
generate_query_dataset(int64_t nb, int64_t dim, const float* xb);
void
AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k);

View File

@ -16,42 +16,30 @@
// under the License.
#include "wrapper/DataTransfer.h"
#include "knowhere/adapter/VectorAdapter.h"
#include <memory>
#include <utility>
#include <vector>
namespace milvus {
namespace engine {
knowhere::DatasetPtr
GenDatasetWithIds(const int64_t& nb, const int64_t& dim, const float* xb, const int64_t* ids) {
std::vector<int64_t> shape{nb, dim};
auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape);
std::vector<knowhere::TensorPtr> tensors{tensor};
std::vector<knowhere::FieldPtr> tensor_fields{knowhere::ConstructFloatField("data")};
auto tensor_schema = std::make_shared<knowhere::Schema>(tensor_fields);
auto id_array = knowhere::ConstructInt64Array((uint8_t*)ids, nb * sizeof(int64_t));
std::vector<knowhere::ArrayPtr> arrays{id_array};
std::vector<knowhere::FieldPtr> array_fields{knowhere::ConstructInt64Field("id")};
auto array_schema = std::make_shared<knowhere::Schema>(tensor_fields);
auto dataset =
std::make_shared<knowhere::Dataset>(std::move(arrays), array_schema, std::move(tensors), tensor_schema);
return dataset;
auto ret_ds = std::make_shared<knowhere::Dataset>();
ret_ds->Set(knowhere::meta::ROWS, nb);
ret_ds->Set(knowhere::meta::DIM, dim);
ret_ds->Set(knowhere::meta::TENSOR, xb);
ret_ds->Set(knowhere::meta::IDS, ids);
return ret_ds;
}
knowhere::DatasetPtr
GenDataset(const int64_t& nb, const int64_t& dim, const float* xb) {
std::vector<int64_t> shape{nb, dim};
auto tensor = knowhere::ConstructFloatTensor((uint8_t*)xb, nb * dim * sizeof(float), shape);
std::vector<knowhere::TensorPtr> tensors{tensor};
std::vector<knowhere::FieldPtr> tensor_fields{knowhere::ConstructFloatField("data")};
auto tensor_schema = std::make_shared<knowhere::Schema>(tensor_fields);
auto dataset = std::make_shared<knowhere::Dataset>(std::move(tensors), tensor_schema);
return dataset;
auto ret_ds = std::make_shared<knowhere::Dataset>();
ret_ds->Set(knowhere::meta::ROWS, nb);
ret_ds->Set(knowhere::meta::DIM, dim);
ret_ds->Set(knowhere::meta::TENSOR, xb);
return ret_ds;
}
} // namespace engine

View File

@ -17,7 +17,7 @@
#pragma once
#include "knowhere/adapter/Structure.h"
#include "knowhere/common/Dataset.h"
namespace milvus {
namespace engine {

View File

@ -17,6 +17,7 @@
#include "wrapper/VecImpl.h"
#include "DataTransfer.h"
#include "knowhere/adapter/VectorAdapter.h"
#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/IndexIDMAP.h"
#include "utils/Log.h"
@ -86,9 +87,6 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
Config search_cfg = cfg;
auto res = index_->Search(dataset, search_cfg);
// auto ids_array = res->array()[0];
// auto dis_array = res->array()[1];
//{
// auto& ids = ids_array;
// auto& dists = dis_array;
@ -110,10 +108,12 @@ VecIndexImpl::Search(const int64_t& nq, const float* xq, float* dist, int64_t* i
// auto p_dist = dis_array->data()->GetValues<float>(1, 0);
// TODO(linxj): avoid copy here.
memcpy(ids, res->ids(), sizeof(int64_t) * nq * k);
memcpy(dist, res->dist(), sizeof(float) * nq * k);
free(res->ids());
free(res->dist());
auto res_ids = res->Get<int64_t*>(knowhere::meta::IDS);
auto res_dist = res->Get<float*>(knowhere::meta::DISTANCE);
memcpy(ids, res_ids, sizeof(int64_t) * nq * k);
memcpy(dist, res_dist, sizeof(float) * nq * k);
free(res_ids);
free(res_dist);
} catch (knowhere::KnowhereException& e) {
WRAPPER_LOG_ERROR << e.what();
return Status(KNOWHERE_UNEXPECTED_ERROR, e.what());
@ -200,7 +200,7 @@ VecIndexImpl::GetDeviceId() {
#endif
}
float*
const float*
BFIndex::GetRawVectors() {
auto raw_index = std::dynamic_pointer_cast<knowhere::IDMAP>(index_);
if (raw_index) {
@ -209,7 +209,7 @@ BFIndex::GetRawVectors() {
return nullptr;
}
int64_t*
const int64_t*
BFIndex::GetRawIds() {
return std::static_pointer_cast<knowhere::IDMAP>(index_)->GetRawIds();
}

View File

@ -86,14 +86,14 @@ class BFIndex : public VecIndexImpl {
ErrorCode
Build(const Config& cfg);
float*
const float*
GetRawVectors();
Status
BuildAll(const int64_t& nb, const float* xb, const int64_t* ids, const Config& cfg, const int64_t& nt,
const float* xt) override;
int64_t*
const int64_t*
GetRawIds();
};