mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
Annoy support (#1746)
* add annoy source code Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * add annoy knowhere Signed-off-by: shengjun.li <shengjun.li@zilliz.com> * annoy local gtest passed Signed-off-by: lichengming <chengming.li@zilliz.com> * fix lint error and update changelog Signed-off-by: lichengming <chengming.li@zilliz.com> * fix compile error Signed-off-by: cmli <chengming.li@zilliz.com> * Update connect timeout in test cases Signed-off-by: zw <zw@milvus.io> * fix some potential bugs Signed-off-by: cmli <chengming.li@zilliz.com> * retry ci Signed-off-by: cmli <chengming.li@zilliz.com> * rerun ci! Signed-off-by: cmli <chengming.li@zilliz.com> * fix errors tested by c++ sdk Signed-off-by: cmli <chengming.li@zilliz.com> * fix lint error Signed-off-by: cmli <chengming.li@zilliz.com> Co-authored-by: shengjun.li <shengjun.li@zilliz.com> Co-authored-by: lichengming <chengming.li@zilliz.com> Co-authored-by: zw <zw@milvus.io>
This commit is contained in:
parent
23e2780309
commit
310d5d70bc
@ -33,6 +33,7 @@ Please mark all change in change log and use the issue from GitHub
|
||||
- \#1756 Fix memory exhausted during searching
|
||||
|
||||
## Feature
|
||||
- \#261 Integrate ANNOY into Milvus
|
||||
- \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure
|
||||
- \#1660 IVF PQ CPU support deleted vectors searching
|
||||
- \#1661 HNSW support deleted vectors searching
|
||||
|
||||
@ -21,3 +21,4 @@
|
||||
| aws-sdk-cpp | [Apache 2.0](https://github.com/aws/aws-sdk-cpp/blob/master/LICENSE) |
|
||||
| SPTAG | [MIT](https://github.com/microsoft/SPTAG/blob/master/LICENSE) |
|
||||
| hnswlib | [Apache 2.0](https://github.com/nmslib/hnswlib/blob/master/LICENSE) |
|
||||
| annoy | [Apache 2.0](https://github.com/spotify/annoy/blob/master/LICENSE) |
|
||||
|
||||
@ -291,6 +291,7 @@ DBImpl::GetTableInfo(const std::string& table_id, TableInfo& table_info) {
|
||||
{(int32_t)engine::EngineType::FAISS_IVFFLAT, "IVFFLAT"},
|
||||
{(int32_t)engine::EngineType::FAISS_IVFSQ8, "IVFSQ8"},
|
||||
{(int32_t)engine::EngineType::NSG_MIX, "NSG"},
|
||||
{(int32_t)engine::EngineType::ANNOY, "ANNOY"},
|
||||
{(int32_t)engine::EngineType::FAISS_IVFSQ8H, "IVFSQ8H"},
|
||||
{(int32_t)engine::EngineType::FAISS_PQ, "PQ"},
|
||||
{(int32_t)engine::EngineType::SPTAG_KDT, "KDT"},
|
||||
|
||||
@ -35,7 +35,8 @@ enum class EngineType {
|
||||
FAISS_BIN_IDMAP,
|
||||
FAISS_BIN_IVFFLAT,
|
||||
HNSW,
|
||||
MAX_VALUE = HNSW,
|
||||
ANNOY,
|
||||
MAX_VALUE = ANNOY,
|
||||
};
|
||||
|
||||
enum class MetricType {
|
||||
|
||||
@ -216,6 +216,10 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) {
|
||||
index = vec_index_factory.CreateVecIndex(knowhere::IndexEnum::INDEX_HNSW, mode);
|
||||
break;
|
||||
}
|
||||
case EngineType::ANNOY: {
|
||||
index = vec_index_factory.CreateVecIndex(knowhere::IndexEnum::INDEX_ANNOY, mode);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ENGINE_LOG_ERROR << "Unsupported index type " << (int)type;
|
||||
return nullptr;
|
||||
|
||||
@ -50,6 +50,7 @@ set(index_srcs
|
||||
knowhere/index/vector_index/IndexSPTAG.cpp
|
||||
knowhere/index/vector_index/IndexType.cpp
|
||||
knowhere/index/vector_index/VecIndexFactory.cpp
|
||||
knowhere/index/vector_index/IndexAnnoy.cpp
|
||||
)
|
||||
|
||||
set(depend_libs
|
||||
|
||||
@ -297,5 +297,21 @@ BinIVFConfAdapter::CheckTrain(Config& oricfg, const IndexMode mode) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ANNOYConfAdapter::CheckTrain(Config& oricfg, const IndexMode mode) {
|
||||
static int64_t MIN_NTREES = 0;
|
||||
// too large of n_trees takes much time, if there is real requirement, change this threshold.
|
||||
static int64_t MAX_NTREES = 16384;
|
||||
|
||||
CheckIntByRange(knowhere::IndexParams::n_trees, MIN_NTREES, MAX_NTREES);
|
||||
|
||||
return ConfAdapter::CheckTrain(oricfg, mode);
|
||||
}
|
||||
|
||||
bool
|
||||
ANNOYConfAdapter::CheckSearch(Config& oricfg, const IndexType type, const IndexMode mode) {
|
||||
return ConfAdapter::CheckSearch(oricfg, type, mode);
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
||||
|
||||
@ -84,5 +84,14 @@ class HNSWConfAdapter : public ConfAdapter {
|
||||
CheckSearch(Config& oricfg, const IndexType type, const IndexMode mode) override;
|
||||
};
|
||||
|
||||
class ANNOYConfAdapter : public ConfAdapter {
|
||||
public:
|
||||
bool
|
||||
CheckTrain(Config& oricfg, const IndexMode mode) override;
|
||||
|
||||
bool
|
||||
CheckSearch(Config& oricfg, const IndexType type, const IndexMode mode) override;
|
||||
};
|
||||
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
||||
|
||||
@ -46,6 +46,7 @@ AdapterMgr::RegisterAdapter() {
|
||||
REGISTER_CONF_ADAPTER(ConfAdapter, IndexEnum::INDEX_SPTAG_KDT_RNT, sptag_kdt_adapter);
|
||||
REGISTER_CONF_ADAPTER(ConfAdapter, IndexEnum::INDEX_SPTAG_BKT_RNT, sptag_bkt_adapter);
|
||||
REGISTER_CONF_ADAPTER(HNSWConfAdapter, IndexEnum::INDEX_HNSW, hnsw_adapter);
|
||||
REGISTER_CONF_ADAPTER(ANNOYConfAdapter, IndexEnum::INDEX_ANNOY, annoy_adapter);
|
||||
}
|
||||
|
||||
} // namespace knowhere
|
||||
|
||||
@ -0,0 +1,172 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include "knowhere/index/vector_index/IndexAnnoy.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "hnswlib/hnswalg.h"
|
||||
#include "hnswlib/space_ip.h"
|
||||
#include "hnswlib/space_l2.h"
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/common/Log.h"
|
||||
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
|
||||
#include "knowhere/index/vector_index/helpers/FaissIO.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
BinarySet
|
||||
IndexAnnoy::Serialize(const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize or trained");
|
||||
}
|
||||
|
||||
BinarySet res_set;
|
||||
auto metric_type_length = metric_type_.length();
|
||||
uint8_t* p = new uint8_t[metric_type_length];
|
||||
std::shared_ptr<uint8_t> metric_type(p, [](uint8_t* p) { delete[] p; });
|
||||
memcpy(p, metric_type_.data(), metric_type_.length());
|
||||
|
||||
uint8_t* p_dim = new uint8_t[sizeof(uint64_t)];
|
||||
std::shared_ptr<uint8_t> dim_data(p_dim, [](uint8_t* p_dim) { delete[] p_dim; });
|
||||
auto dim = Dim();
|
||||
memcpy(p_dim, &dim, sizeof(uint64_t));
|
||||
|
||||
auto index_length = index_->get_index_length();
|
||||
uint8_t* q = new uint8_t[index_length];
|
||||
std::shared_ptr<uint8_t> index_data(q, [](uint8_t* q) { delete[] q; });
|
||||
memcpy(q, index_->get_index(), (size_t)index_length);
|
||||
|
||||
res_set.Append("annoy_metric_type", metric_type, metric_type_length);
|
||||
res_set.Append("annoy_dim", dim_data, sizeof(uint64_t));
|
||||
res_set.Append("annoy_index_data", index_data, index_length);
|
||||
return res_set;
|
||||
}
|
||||
|
||||
void
|
||||
IndexAnnoy::Load(const BinarySet& index_binary) {
|
||||
auto metric_type = index_binary.GetByName("annoy_metric_type");
|
||||
metric_type_.resize((size_t)metric_type->size + 1);
|
||||
memcpy(metric_type_.data(), metric_type->data.get(), (size_t)metric_type->size);
|
||||
|
||||
auto dim_data = index_binary.GetByName("annoy_dim");
|
||||
uint64_t dim;
|
||||
memcpy(&dim, dim_data->data.get(), (size_t)dim_data->size);
|
||||
|
||||
if (metric_type_ == Metric::L2) {
|
||||
index_ = std::make_shared<AnnoyIndex<int64_t, float, ::Euclidean, ::Kiss64Random>>(dim);
|
||||
} else if (metric_type_ == Metric::IP) {
|
||||
index_ = std::make_shared<AnnoyIndex<int64_t, float, ::DotProduct, ::Kiss64Random>>(dim);
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("metric not supported " + metric_type_);
|
||||
}
|
||||
|
||||
auto index_data = index_binary.GetByName("annoy_index_data");
|
||||
char* p = nullptr;
|
||||
if (!index_->load_index(index_data->data.get(), index_data->size, &p)) {
|
||||
std::string error_msg(p);
|
||||
free(p);
|
||||
KNOWHERE_THROW_MSG(error_msg);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IndexAnnoy::BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (index_) {
|
||||
// it is builded all
|
||||
return;
|
||||
}
|
||||
|
||||
GETTENSORWITHIDS(dataset_ptr)
|
||||
|
||||
metric_type_ = config[Metric::TYPE];
|
||||
if (metric_type_ == Metric::L2) {
|
||||
index_ = std::make_shared<AnnoyIndex<int64_t, float, ::Euclidean, ::Kiss64Random>>(dim);
|
||||
} else if (metric_type_ == Metric::IP) {
|
||||
index_ = std::make_shared<AnnoyIndex<int64_t, float, ::DotProduct, ::Kiss64Random>>(dim);
|
||||
} else {
|
||||
KNOWHERE_THROW_MSG("metric not supported " + metric_type_);
|
||||
}
|
||||
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
index_->add_item(p_ids[i], (const float*)p_data + dim * i);
|
||||
}
|
||||
|
||||
index_->build(config[IndexParams::n_trees].get<int64_t>());
|
||||
}
|
||||
|
||||
DatasetPtr
|
||||
IndexAnnoy::Query(const DatasetPtr& dataset_ptr, const Config& config) {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize or trained");
|
||||
}
|
||||
|
||||
GETTENSOR(dataset_ptr)
|
||||
auto k = config[meta::TOPK].get<int64_t>();
|
||||
auto search_k = config[IndexParams::search_k].get<int64_t>();
|
||||
auto all_num = rows * k;
|
||||
auto p_id = (int64_t*)malloc(all_num * sizeof(int64_t));
|
||||
auto p_dist = (float*)malloc(all_num * sizeof(float));
|
||||
faiss::ConcurrentBitsetPtr blacklist = nullptr;
|
||||
GetBlacklist(blacklist);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (unsigned int i = 0; i < rows; ++i) {
|
||||
std::vector<int64_t> result;
|
||||
result.reserve(k);
|
||||
std::vector<float> distances;
|
||||
distances.reserve(k);
|
||||
index_->get_nns_by_vector((const float*)p_data + i * dim, k, search_k, &result, &distances, blacklist);
|
||||
|
||||
memcpy(p_id + k * i, result.data(), k * sizeof(int64_t));
|
||||
memcpy(p_dist + k * i, distances.data(), k * sizeof(float));
|
||||
}
|
||||
|
||||
auto ret_ds = std::make_shared<Dataset>();
|
||||
ret_ds->Set(meta::IDS, p_id);
|
||||
ret_ds->Set(meta::DISTANCE, p_dist);
|
||||
return ret_ds;
|
||||
}
|
||||
|
||||
int64_t
|
||||
IndexAnnoy::Count() {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
|
||||
return index_->get_n_items();
|
||||
}
|
||||
|
||||
int64_t
|
||||
IndexAnnoy::Dim() {
|
||||
if (!index_) {
|
||||
KNOWHERE_THROW_MSG("index not initialize");
|
||||
}
|
||||
|
||||
return index_->get_dim();
|
||||
}
|
||||
|
||||
int64_t
|
||||
IndexAnnoy::IndexSize() {
|
||||
if (index_size_ != -1) {
|
||||
return index_size_;
|
||||
}
|
||||
|
||||
return index_size_ = Dim() * Count() * sizeof(float);
|
||||
}
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
||||
@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include "annoy/src/annoylib.h"
|
||||
#include "annoy/src/kissrandom.h"
|
||||
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/index/vector_index/VecIndex.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace knowhere {
|
||||
|
||||
class IndexAnnoy : public VecIndex {
|
||||
public:
|
||||
IndexAnnoy() {
|
||||
index_type_ = IndexEnum::INDEX_ANNOY;
|
||||
}
|
||||
|
||||
BinarySet
|
||||
Serialize(const Config& config = Config()) override;
|
||||
|
||||
void
|
||||
Load(const BinarySet& index_binary) override;
|
||||
|
||||
void
|
||||
BuildAll(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
||||
void
|
||||
Train(const DatasetPtr& dataset_ptr, const Config& config) override {
|
||||
KNOWHERE_THROW_MSG("Annoy not support build item dynamically, please invoke BuildAll interface.");
|
||||
}
|
||||
|
||||
void
|
||||
Add(const DatasetPtr& dataset_ptr, const Config& config) override {
|
||||
KNOWHERE_THROW_MSG("Annoy not support add item dynamically, please invoke BuildAll interface.");
|
||||
}
|
||||
|
||||
void
|
||||
AddWithoutIds(const DatasetPtr&, const Config&) override {
|
||||
KNOWHERE_THROW_MSG("Incremental index is not supported");
|
||||
}
|
||||
|
||||
DatasetPtr
|
||||
Query(const DatasetPtr& dataset_ptr, const Config& config) override;
|
||||
|
||||
int64_t
|
||||
Count() override;
|
||||
|
||||
int64_t
|
||||
Dim() override;
|
||||
|
||||
int64_t
|
||||
IndexSize() override;
|
||||
|
||||
private:
|
||||
MetricType metric_type_;
|
||||
std::shared_ptr<AnnoyIndexInterface<int64_t, float>> index_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace knowhere
|
||||
} // namespace milvus
|
||||
@ -34,6 +34,7 @@ static std::unordered_map<int32_t, std::string> old_index_type_str_map = {
|
||||
{(int32_t)OldIndexType::SPTAG_KDT_RNT_CPU, IndexEnum::INDEX_SPTAG_KDT_RNT},
|
||||
{(int32_t)OldIndexType::SPTAG_BKT_RNT_CPU, IndexEnum::INDEX_SPTAG_BKT_RNT},
|
||||
{(int32_t)OldIndexType::HNSW, IndexEnum::INDEX_HNSW},
|
||||
{(int32_t)OldIndexType::ANNOY, IndexEnum::INDEX_ANNOY},
|
||||
{(int32_t)OldIndexType::FAISS_BIN_IDMAP, IndexEnum::INDEX_FAISS_BIN_IDMAP},
|
||||
{(int32_t)OldIndexType::FAISS_BIN_IVFLAT_CPU, IndexEnum::INDEX_FAISS_BIN_IVFFLAT},
|
||||
};
|
||||
@ -49,6 +50,7 @@ static std::unordered_map<std::string, int32_t> str_old_index_type_map = {
|
||||
{IndexEnum::INDEX_SPTAG_KDT_RNT, (int32_t)OldIndexType::SPTAG_KDT_RNT_CPU},
|
||||
{IndexEnum::INDEX_SPTAG_BKT_RNT, (int32_t)OldIndexType::SPTAG_BKT_RNT_CPU},
|
||||
{IndexEnum::INDEX_HNSW, (int32_t)OldIndexType::HNSW},
|
||||
{IndexEnum::INDEX_ANNOY, (int32_t)OldIndexType::ANNOY},
|
||||
{IndexEnum::INDEX_FAISS_BIN_IDMAP, (int32_t)OldIndexType::FAISS_BIN_IDMAP},
|
||||
{IndexEnum::INDEX_FAISS_BIN_IVFFLAT, (int32_t)OldIndexType::FAISS_BIN_IVFLAT_CPU},
|
||||
};
|
||||
|
||||
@ -34,6 +34,7 @@ enum class OldIndexType {
|
||||
FAISS_IVFPQ_MIX,
|
||||
SPTAG_BKT_RNT_CPU,
|
||||
HNSW,
|
||||
ANNOY,
|
||||
FAISS_BIN_IDMAP = 100,
|
||||
FAISS_BIN_IVFLAT_CPU = 101,
|
||||
};
|
||||
@ -54,6 +55,7 @@ constexpr const char* INDEX_NSG = "NSG";
|
||||
constexpr const char* INDEX_SPTAG_KDT_RNT = "SPTAG_KDT_RNT";
|
||||
constexpr const char* INDEX_SPTAG_BKT_RNT = "SPTAG_BKT_RNT";
|
||||
constexpr const char* INDEX_HNSW = "HNSW";
|
||||
constexpr const char* INDEX_ANNOY = "ANNOY";
|
||||
} // namespace IndexEnum
|
||||
|
||||
enum class IndexMode { MODE_CPU = 0, MODE_GPU = 1 };
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/common/Log.h"
|
||||
#include "knowhere/index/vector_index/IndexAnnoy.h"
|
||||
#include "knowhere/index/vector_index/IndexBinaryIDMAP.h"
|
||||
#include "knowhere/index/vector_index/IndexBinaryIVF.h"
|
||||
#include "knowhere/index/vector_index/IndexHNSW.h"
|
||||
@ -78,6 +79,8 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
|
||||
return std::make_shared<knowhere::CPUSPTAGRNG>("BKT");
|
||||
} else if (type == IndexEnum::INDEX_HNSW) {
|
||||
return std::make_shared<knowhere::IndexHNSW>();
|
||||
} else if (type == IndexEnum::INDEX_ANNOY) {
|
||||
return std::make_shared<knowhere::IndexAnnoy>();
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -44,6 +44,10 @@ constexpr const char* candidate = "candidate_pool_size";
|
||||
constexpr const char* efConstruction = "efConstruction";
|
||||
constexpr const char* M = "M";
|
||||
constexpr const char* ef = "ef";
|
||||
|
||||
// Annoy Params
|
||||
constexpr const char* n_trees = "n_trees";
|
||||
constexpr const char* search_k = "search_k";
|
||||
} // namespace IndexParams
|
||||
|
||||
namespace Metric {
|
||||
|
||||
202
core/src/index/thirdparty/annoy/LICENSE
vendored
Normal file
202
core/src/index/thirdparty/annoy/LICENSE
vendored
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
15
core/src/index/thirdparty/annoy/RELEASE.md
vendored
Normal file
15
core/src/index/thirdparty/annoy/RELEASE.md
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
How to release
|
||||
--------------
|
||||
|
||||
1. Make sure you're on master. `git checkout master && git fetch && git reset --hard origin/master`
|
||||
1. Update `setup.py` to the newest version, `git add setup.py && git commit -m "version 1.2.3"`
|
||||
1. `python setup.py sdist bdist_wheel`
|
||||
1. `git tag -a v1.2.3 -m "version 1.2.3"`
|
||||
1. `git push --tags origin master` to push the last version to Github
|
||||
1. Go to https://github.com/spotify/annoy/releases and click "Draft a new release"
|
||||
1. `twine upload dist/annoy-1.2.3*`
|
||||
|
||||
TODO
|
||||
----
|
||||
|
||||
* Wheel
|
||||
14
core/src/index/thirdparty/annoy/examples/mmap_test.py
vendored
Normal file
14
core/src/index/thirdparty/annoy/examples/mmap_test.py
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
from annoy import AnnoyIndex
|
||||
|
||||
a = AnnoyIndex(3, 'angular')
|
||||
a.add_item(0, [1, 0, 0])
|
||||
a.add_item(1, [0, 1, 0])
|
||||
a.add_item(2, [0, 0, 1])
|
||||
a.build(-1)
|
||||
a.save('test.tree')
|
||||
|
||||
b = AnnoyIndex(3)
|
||||
b.load('test.tree')
|
||||
|
||||
print(b.get_nns_by_item(0, 100))
|
||||
print(b.get_nns_by_vector([1.0, 0.5, 0.5], 100))
|
||||
176
core/src/index/thirdparty/annoy/examples/precision_test.cpp
vendored
Normal file
176
core/src/index/thirdparty/annoy/examples/precision_test.cpp
vendored
Normal file
@ -0,0 +1,176 @@
|
||||
/*
|
||||
* precision_test.cpp
|
||||
|
||||
*
|
||||
* Created on: Jul 13, 2016
|
||||
* Author: Claudio Sanhueza
|
||||
* Contact: csanhuezalobos@gmail.com
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include "../src/kissrandom.h"
|
||||
#include "../src/annoylib.h"
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <random>
|
||||
|
||||
|
||||
int precision(int f=40, int n=1000000){
|
||||
std::chrono::high_resolution_clock::time_point t_start, t_end;
|
||||
|
||||
std::default_random_engine generator;
|
||||
std::normal_distribution<double> distribution(0.0, 1.0);
|
||||
|
||||
//******************************************************
|
||||
//Building the tree
|
||||
AnnoyIndex<int, double, Angular, Kiss32Random> t = AnnoyIndex<int, double, Angular, Kiss32Random>(f);
|
||||
|
||||
std::cout << "Building index ... be patient !!" << std::endl;
|
||||
std::cout << "\"Trees that are slow to grow bear the best fruit\" (Moliere)" << std::endl;
|
||||
|
||||
|
||||
|
||||
for(int i=0; i<n; ++i){
|
||||
double *vec = (double *) malloc( f * sizeof(double) );
|
||||
|
||||
for(int z=0; z<f; ++z){
|
||||
vec[z] = (distribution(generator));
|
||||
}
|
||||
|
||||
t.add_item(i, vec);
|
||||
|
||||
std::cout << "Loading objects ...\t object: "<< i+1 << "\tProgress:"<< std::fixed << std::setprecision(2) << (double) i / (double)(n + 1) * 100 << "%\r";
|
||||
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "Building index num_trees = 2 * num_features ...";
|
||||
t_start = std::chrono::high_resolution_clock::now();
|
||||
t.build(2 * f);
|
||||
t_end = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::seconds>( t_end - t_start ).count();
|
||||
std::cout << " Done in "<< duration << " secs." << std::endl;
|
||||
|
||||
|
||||
std::cout << "Saving index ...";
|
||||
t.save("precision.tree");
|
||||
std::cout << " Done" << std::endl;
|
||||
|
||||
|
||||
|
||||
//******************************************************
|
||||
std::vector<int> limits = {10, 100, 1000, 10000};
|
||||
int K=10;
|
||||
int prec_n = 1000;
|
||||
|
||||
std::map<int, double> prec_sum;
|
||||
std::map<int, double> time_sum;
|
||||
std::vector<int> closest;
|
||||
|
||||
//init precision and timers map
|
||||
for(std::vector<int>::iterator it = limits.begin(); it!=limits.end(); ++it){
|
||||
prec_sum[(*it)] = 0.0;
|
||||
time_sum[(*it)] = 0.0;
|
||||
}
|
||||
|
||||
// doing the work
|
||||
for(int i=0; i<prec_n; ++i){
|
||||
|
||||
//select a random node
|
||||
int j = rand() % n;
|
||||
|
||||
std::cout << "finding nbs for " << j << std::endl;
|
||||
|
||||
// getting the K closest
|
||||
t.get_nns_by_item(j, K, n, &closest, nullptr);
|
||||
|
||||
std::vector<int> toplist;
|
||||
std::vector<int> intersection;
|
||||
|
||||
for(std::vector<int>::iterator limit = limits.begin(); limit!=limits.end(); ++limit){
|
||||
|
||||
t_start = std::chrono::high_resolution_clock::now();
|
||||
t.get_nns_by_item(j, (*limit), (size_t) -1, &toplist, nullptr); //search_k defaults to "n_trees * n" if not provided.
|
||||
t_end = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( t_end - t_start ).count();
|
||||
|
||||
//intersecting results
|
||||
std::sort(closest.begin(), closest.end(), std::less<int>());
|
||||
std::sort(toplist.begin(), toplist.end(), std::less<int>());
|
||||
intersection.resize(std::max(closest.size(), toplist.size()));
|
||||
std::vector<int>::iterator it_set = std::set_intersection(closest.begin(), closest.end(), toplist.begin(), toplist.end(), intersection.begin());
|
||||
intersection.resize(it_set-intersection.begin());
|
||||
|
||||
// storing metrics
|
||||
int found = intersection.size();
|
||||
double hitrate = found / (double) K;
|
||||
prec_sum[(*limit)] += hitrate;
|
||||
|
||||
time_sum[(*limit)] += duration;
|
||||
|
||||
|
||||
//deallocate memory
|
||||
vector<int>().swap(intersection);
|
||||
vector<int>().swap(toplist);
|
||||
}
|
||||
|
||||
//print resulting metrics
|
||||
for(std::vector<int>::iterator limit = limits.begin(); limit!=limits.end(); ++limit){
|
||||
std::cout << "limit: " << (*limit) << "\tprecision: "<< std::fixed << std::setprecision(2) << (100.0 * prec_sum[(*limit)] / (i + 1)) << "% \tavg. time: "<< std::fixed<< std::setprecision(6) << (time_sum[(*limit)] / (i + 1)) * 1e-04 << "s" << std::endl;
|
||||
}
|
||||
|
||||
closest.clear(); vector<int>().swap(closest);
|
||||
|
||||
}
|
||||
|
||||
std::cout << "\nDone" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void help(){
|
||||
std::cout << "Annoy Precision C++ example" << std::endl;
|
||||
std::cout << "Usage:" << std::endl;
|
||||
std::cout << "(default) ./precision" << std::endl;
|
||||
std::cout << "(using parameters) ./precision num_features num_nodes" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void feedback(int f, int n){
|
||||
std::cout<<"Runing precision example with:" << std::endl;
|
||||
std::cout<<"num. features: "<< f << std::endl;
|
||||
std::cout<<"num. nodes: "<< n << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int f, n;
|
||||
|
||||
|
||||
if(argc == 1){
|
||||
f = 40;
|
||||
n = 1000000;
|
||||
|
||||
feedback(f,n);
|
||||
|
||||
precision(40, 1000000);
|
||||
}
|
||||
else if(argc == 3){
|
||||
|
||||
f = atoi(argv[1]);
|
||||
n = atoi(argv[2]);
|
||||
|
||||
feedback(f,n);
|
||||
|
||||
precision(f, n);
|
||||
}
|
||||
else {
|
||||
help();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
46
core/src/index/thirdparty/annoy/examples/precision_test.py
vendored
Normal file
46
core/src/index/thirdparty/annoy/examples/precision_test.py
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
from __future__ import print_function
|
||||
import random, time
|
||||
from annoy import AnnoyIndex
|
||||
|
||||
try:
|
||||
xrange
|
||||
except NameError:
|
||||
# Python 3 compat
|
||||
xrange = range
|
||||
|
||||
n, f = 100000, 40
|
||||
|
||||
t = AnnoyIndex(f, 'angular')
|
||||
for i in xrange(n):
|
||||
v = []
|
||||
for z in xrange(f):
|
||||
v.append(random.gauss(0, 1))
|
||||
t.add_item(i, v)
|
||||
|
||||
t.build(2 * f)
|
||||
t.save('test.tree')
|
||||
|
||||
limits = [10, 100, 1000, 10000]
|
||||
k = 10
|
||||
prec_sum = {}
|
||||
prec_n = 1000
|
||||
time_sum = {}
|
||||
|
||||
for i in xrange(prec_n):
|
||||
j = random.randrange(0, n)
|
||||
|
||||
closest = set(t.get_nns_by_item(j, k, n))
|
||||
for limit in limits:
|
||||
t0 = time.time()
|
||||
toplist = t.get_nns_by_item(j, k, limit)
|
||||
T = time.time() - t0
|
||||
|
||||
found = len(closest.intersection(toplist))
|
||||
hitrate = 1.0 * found / k
|
||||
prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
|
||||
time_sum[limit] = time_sum.get(limit, 0.0) + T
|
||||
|
||||
for limit in limits:
|
||||
print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
|
||||
% (limit, 100.0 * prec_sum[limit] / (i + 1),
|
||||
time_sum[limit] / (i + 1)))
|
||||
7
core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh
vendored
Executable file
7
core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh
vendored
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
echo "compiling precision example..."
|
||||
cmd="g++ precision_test.cpp -o precision_test -std=c++11"
|
||||
eval $cmd
|
||||
echo "Done"
|
||||
10
core/src/index/thirdparty/annoy/examples/simple_test.py
vendored
Normal file
10
core/src/index/thirdparty/annoy/examples/simple_test.py
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
from annoy import AnnoyIndex
|
||||
|
||||
a = AnnoyIndex(3, 'angular')
|
||||
a.add_item(0, [1, 0, 0])
|
||||
a.add_item(1, [0, 1, 0])
|
||||
a.add_item(2, [0, 0, 1])
|
||||
a.build(-1)
|
||||
|
||||
print(a.get_nns_by_item(0, 100))
|
||||
print(a.get_nns_by_vector([1.0, 0.5, 0.5], 100))
|
||||
92
core/src/index/thirdparty/annoy/src/annoygomodule.h
vendored
Normal file
92
core/src/index/thirdparty/annoy/src/annoygomodule.h
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
#include "annoylib.h"
|
||||
#include "kissrandom.h"
|
||||
|
||||
namespace GoAnnoy {
|
||||
|
||||
class AnnoyIndex {
|
||||
protected:
|
||||
::AnnoyIndexInterface<int32_t, float> *ptr;
|
||||
|
||||
int f;
|
||||
|
||||
public:
|
||||
~AnnoyIndex() {
|
||||
delete ptr;
|
||||
};
|
||||
void addItem(int item, const float* w) {
|
||||
ptr->add_item(item, w);
|
||||
};
|
||||
void build(int q) {
|
||||
ptr->build(q);
|
||||
};
|
||||
bool save(const char* filename, bool prefault) {
|
||||
return ptr->save(filename, prefault);
|
||||
};
|
||||
bool save(const char* filename) {
|
||||
return ptr->save(filename, true);
|
||||
};
|
||||
void unload() {
|
||||
ptr->unload();
|
||||
};
|
||||
bool load(const char* filename, bool prefault) {
|
||||
return ptr->load(filename, prefault);
|
||||
};
|
||||
bool load(const char* filename) {
|
||||
return ptr->load(filename, true);
|
||||
};
|
||||
float getDistance(int i, int j) {
|
||||
return ptr->get_distance(i, j);
|
||||
};
|
||||
void getNnsByItem(int item, int n, int search_k, vector<int32_t>* result, vector<float>* distances) {
|
||||
ptr->get_nns_by_item(item, n, search_k, result, distances);
|
||||
};
|
||||
void getNnsByVector(const float* w, int n, int search_k, vector<int32_t>* result, vector<float>* distances) {
|
||||
ptr->get_nns_by_vector(w, n, search_k, result, distances);
|
||||
};
|
||||
void getNnsByItem(int item, int n, int search_k, vector<int32_t>* result) {
|
||||
ptr->get_nns_by_item(item, n, search_k, result, NULL);
|
||||
};
|
||||
void getNnsByVector(const float* w, int n, int search_k, vector<int32_t>* result) {
|
||||
ptr->get_nns_by_vector(w, n, search_k, result, NULL);
|
||||
};
|
||||
|
||||
int getNItems() {
|
||||
return (int)ptr->get_n_items();
|
||||
};
|
||||
void verbose(bool v) {
|
||||
ptr->verbose(v);
|
||||
};
|
||||
void getItem(int item, vector<float> *v) {
|
||||
v->resize(this->f);
|
||||
ptr->get_item(item, &v->front());
|
||||
};
|
||||
bool onDiskBuild(const char* filename) {
|
||||
return ptr->on_disk_build(filename);
|
||||
};
|
||||
};
|
||||
|
||||
class AnnoyIndexAngular : public AnnoyIndex
|
||||
{
|
||||
public:
|
||||
AnnoyIndexAngular(int f) {
|
||||
ptr = new ::AnnoyIndex<int32_t, float, ::Angular, ::Kiss64Random>(f);
|
||||
this->f = f;
|
||||
}
|
||||
};
|
||||
|
||||
class AnnoyIndexEuclidean : public AnnoyIndex {
|
||||
public:
|
||||
AnnoyIndexEuclidean(int f) {
|
||||
ptr = new ::AnnoyIndex<int32_t, float, ::Euclidean, ::Kiss64Random>(f);
|
||||
this->f = f;
|
||||
}
|
||||
};
|
||||
|
||||
class AnnoyIndexManhattan : public AnnoyIndex {
|
||||
public:
|
||||
AnnoyIndexManhattan(int f) {
|
||||
ptr = new ::AnnoyIndex<int32_t, float, ::Manhattan, ::Kiss64Random>(f);
|
||||
this->f = f;
|
||||
}
|
||||
};
|
||||
}
|
||||
96
core/src/index/thirdparty/annoy/src/annoygomodule.i
vendored
Normal file
96
core/src/index/thirdparty/annoy/src/annoygomodule.i
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
%module annoyindex
|
||||
|
||||
%{
|
||||
#include "annoygomodule.h"
|
||||
%}
|
||||
|
||||
|
||||
// const float *
|
||||
%typemap(gotype) (const float *) "[]float32"
|
||||
|
||||
%typemap(in) (const float *)
|
||||
%{
|
||||
float *v;
|
||||
vector<float> w;
|
||||
v = (float *)$input.array;
|
||||
for (int i = 0; i < $input.len; i++) {
|
||||
w.push_back(v[i]);
|
||||
}
|
||||
$1 = &w[0];
|
||||
%}
|
||||
|
||||
// vector<int32_t> *
|
||||
%typemap(gotype) (vector<int32_t> *) "*[]int"
|
||||
|
||||
%typemap(in) (vector<int32_t> *)
|
||||
%{
|
||||
$1 = new vector<int32_t>();
|
||||
%}
|
||||
|
||||
%typemap(freearg) (vector<int32_t> *)
|
||||
%{
|
||||
delete $1;
|
||||
%}
|
||||
|
||||
%typemap(argout) (vector<int32_t> *)
|
||||
%{
|
||||
{
|
||||
$input->len = $1->size();
|
||||
$input->cap = $1->size();
|
||||
$input->array = malloc($input->len * sizeof(intgo));
|
||||
for (int i = 0; i < $1->size(); i++) {
|
||||
((intgo *)$input->array)[i] = (intgo)(*$1)[i];
|
||||
}
|
||||
}
|
||||
%}
|
||||
|
||||
|
||||
// vector<float> *
|
||||
%typemap(gotype) (vector<float> *) "*[]float32"
|
||||
|
||||
%typemap(in) (vector<float> *)
|
||||
%{
|
||||
$1 = new vector<float>();
|
||||
%}
|
||||
|
||||
%typemap(freearg) (vector<float> *)
|
||||
%{
|
||||
delete $1;
|
||||
%}
|
||||
|
||||
%typemap(argout) (vector<float> *)
|
||||
%{
|
||||
{
|
||||
$input->len = $1->size();
|
||||
$input->cap = $1->size();
|
||||
$input->array = malloc($input->len * sizeof(float));
|
||||
for (int i = 0; i < $1->size(); i++) {
|
||||
((float *)$input->array)[i] = (float)(*$1)[i];
|
||||
}
|
||||
}
|
||||
%}
|
||||
|
||||
|
||||
%typemap(gotype) (const char *) "string"
|
||||
|
||||
%typemap(in) (const char *)
|
||||
%{
|
||||
$1 = (char *)calloc((((_gostring_)$input).n + 1), sizeof(char));
|
||||
strncpy($1, (((_gostring_)$input).p), ((_gostring_)$input).n);
|
||||
%}
|
||||
|
||||
%typemap(freearg) (const char *)
|
||||
%{
|
||||
free($1);
|
||||
%}
|
||||
|
||||
|
||||
/* Let's just grab the original header file here */
|
||||
%include "annoygomodule.h"
|
||||
|
||||
%feature("notabstract") GoAnnoyIndexAngular;
|
||||
%feature("notabstract") GoAnnoyIndexEuclidean;
|
||||
%feature("notabstract") GoAnnoyIndexManhattan;
|
||||
|
||||
|
||||
|
||||
1377
core/src/index/thirdparty/annoy/src/annoylib.h
vendored
Normal file
1377
core/src/index/thirdparty/annoy/src/annoylib.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
318
core/src/index/thirdparty/annoy/src/annoyluamodule.cc
vendored
Normal file
318
core/src/index/thirdparty/annoy/src/annoyluamodule.cc
vendored
Normal file
@ -0,0 +1,318 @@
|
||||
// Copyright (c) 2016 Boris Nagaev
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy of
|
||||
// the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations under
|
||||
// the License.
|
||||
|
||||
#include <cstring>
|
||||
#include <typeinfo>
|
||||
|
||||
#include <lua.hpp>
|
||||
|
||||
#include "annoylib.h"
|
||||
#include "kissrandom.h"
|
||||
|
||||
#if LUA_VERSION_NUM == 501
|
||||
#define compat_setfuncs(L, funcs) luaL_register(L, NULL, funcs)
|
||||
#define compat_rawlen lua_objlen
|
||||
#else
|
||||
#define compat_setfuncs(L, funcs) luaL_setfuncs(L, funcs, 0)
|
||||
#define compat_rawlen lua_rawlen
|
||||
#endif
|
||||
|
||||
template<typename Distance>
|
||||
class LuaAnnoy {
|
||||
public:
|
||||
typedef int32_t AnnoyS;
|
||||
typedef float AnnoyT;
|
||||
typedef AnnoyIndex<AnnoyS, AnnoyT, Distance, Kiss64Random> Impl;
|
||||
typedef LuaAnnoy<Distance> ThisClass;
|
||||
|
||||
class LuaArrayProxy {
|
||||
public:
|
||||
LuaArrayProxy(lua_State* L, int object, int f)
|
||||
: L_(L)
|
||||
, object_(object)
|
||||
{
|
||||
luaL_checktype(L, object, LUA_TTABLE);
|
||||
int v_len = compat_rawlen(L, object);
|
||||
luaL_argcheck(L, v_len == f, object, "Length of v != f");
|
||||
}
|
||||
|
||||
double operator[](int index) const {
|
||||
lua_rawgeti(L_, object_, index + 1);
|
||||
double result = lua_tonumber(L_, -1);
|
||||
lua_pop(L_, 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
lua_State* L_;
|
||||
int object_;
|
||||
};
|
||||
|
||||
static void toVector(lua_State* L, int object, int f, AnnoyT* dst) {
|
||||
LuaArrayProxy proxy(L, object, f);
|
||||
for (int i = 0; i < f; i++) {
|
||||
dst[i] = proxy[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Vector>
|
||||
static void pushVector(lua_State* L, const Vector& v) {
|
||||
lua_createtable(L, v.size(), 0);
|
||||
for (int j = 0; j < v.size(); j++) {
|
||||
lua_pushnumber(L, v[j]);
|
||||
lua_rawseti(L, -2, j + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static const char* typeAsString() {
|
||||
return typeid(Impl).name();
|
||||
}
|
||||
|
||||
static Impl* getAnnoy(lua_State* L, int object) {
|
||||
return reinterpret_cast<Impl*>(
|
||||
luaL_checkudata(L, object, typeAsString())
|
||||
);
|
||||
}
|
||||
|
||||
static int getItemIndex(lua_State* L, int object, int size = -1) {
|
||||
int item = luaL_checkinteger(L, object);
|
||||
luaL_argcheck(L, item >= 0, object, "Index must be >= 0");
|
||||
if (size != -1) {
|
||||
luaL_argcheck(L, item < size, object, "Index must be < size");
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
static int gc(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
self->~Impl();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tostring(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
lua_pushfstring(
|
||||
L,
|
||||
"annoy.AnnoyIndex object (%dx%d, %s distance)",
|
||||
self->get_n_items(), self->get_f(), Distance::name()
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int add_item(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
int item = getItemIndex(L, 2);
|
||||
self->add_item_impl(item, LuaArrayProxy(L, 3, self->get_f()));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int build(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
int n_trees = luaL_checkinteger(L, 2);
|
||||
self->build(n_trees);
|
||||
lua_pushboolean(L, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int on_disk_build(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
const char* filename = luaL_checkstring(L, 2);
|
||||
self->on_disk_build(filename);
|
||||
lua_pushboolean(L, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int save(lua_State* L) {
|
||||
int nargs = lua_gettop(L);
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
const char* filename = luaL_checkstring(L, 2);
|
||||
bool prefault = true;
|
||||
if (nargs >= 3) {
|
||||
prefault = lua_toboolean(L, 3);
|
||||
}
|
||||
self->save(filename, prefault);
|
||||
lua_pushboolean(L, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int load(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
int nargs = lua_gettop(L);
|
||||
const char* filename = luaL_checkstring(L, 2);
|
||||
bool prefault = true;
|
||||
if (nargs >= 3) {
|
||||
prefault = lua_toboolean(L, 3);
|
||||
}
|
||||
if (!self->load(filename, prefault)) {
|
||||
return luaL_error(L, "Can't load file: %s", filename);
|
||||
}
|
||||
lua_pushboolean(L, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int unload(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
self->unload();
|
||||
lua_pushboolean(L, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct Searcher {
|
||||
std::vector<AnnoyS> result;
|
||||
std::vector<AnnoyT> distances;
|
||||
Impl* self;
|
||||
int n;
|
||||
int search_k;
|
||||
bool include_distances;
|
||||
|
||||
Searcher(lua_State* L) {
|
||||
int nargs = lua_gettop(L);
|
||||
self = getAnnoy(L, 1);
|
||||
n = luaL_checkinteger(L, 3);
|
||||
search_k = -1;
|
||||
if (nargs >= 4) {
|
||||
search_k = luaL_checkinteger(L, 4);
|
||||
}
|
||||
include_distances = false;
|
||||
if (nargs >= 5) {
|
||||
include_distances = lua_toboolean(L, 5);
|
||||
}
|
||||
}
|
||||
|
||||
int pushResults(lua_State* L) {
|
||||
pushVector(L, result);
|
||||
if (include_distances) {
|
||||
pushVector(L, distances);
|
||||
}
|
||||
return include_distances ? 2 : 1;
|
||||
}
|
||||
};
|
||||
|
||||
static int get_nns_by_item(lua_State* L) {
|
||||
Searcher s(L);
|
||||
int item = getItemIndex(L, 2, s.self->get_n_items());
|
||||
s.self->get_nns_by_item(item, s.n, s.search_k, &s.result,
|
||||
s.include_distances ? &s.distances : NULL);
|
||||
return s.pushResults(L);
|
||||
}
|
||||
|
||||
static int get_nns_by_vector(lua_State* L) {
|
||||
Searcher s(L);
|
||||
std::vector<AnnoyT> _vec(s.self->get_f());
|
||||
AnnoyT* vec = &(_vec[0]);
|
||||
toVector(L, 2, s.self->get_f(), vec);
|
||||
s.self->get_nns_by_vector(vec, s.n, s.search_k, &s.result,
|
||||
s.include_distances ? &s.distances : NULL);
|
||||
return s.pushResults(L);
|
||||
}
|
||||
|
||||
static int get_item_vector(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
int item = getItemIndex(L, 2, self->get_n_items());
|
||||
std::vector<AnnoyT> _vec(self->get_f());
|
||||
AnnoyT* vec = &(_vec[0]);
|
||||
self->get_item(item, vec);
|
||||
pushVector(L, _vec);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int get_distance(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
int i = getItemIndex(L, 2, self->get_n_items());
|
||||
int j = getItemIndex(L, 3, self->get_n_items());
|
||||
AnnoyT distance = self->get_distance(i, j);
|
||||
lua_pushnumber(L, distance);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int get_n_items(lua_State* L) {
|
||||
Impl* self = getAnnoy(L, 1);
|
||||
lua_pushnumber(L, self->get_n_items());
|
||||
return 1;
|
||||
}
|
||||
|
||||
static const luaL_Reg* getMetatable() {
|
||||
static const luaL_Reg funcs[] = {
|
||||
{"__gc", &ThisClass::gc},
|
||||
{"__tostring", &ThisClass::tostring},
|
||||
{NULL, NULL},
|
||||
};
|
||||
return funcs;
|
||||
}
|
||||
|
||||
static const luaL_Reg* getMethods() {
|
||||
static const luaL_Reg funcs[] = {
|
||||
{"add_item", &ThisClass::add_item},
|
||||
{"build", &ThisClass::build},
|
||||
{"save", &ThisClass::save},
|
||||
{"load", &ThisClass::load},
|
||||
{"unload", &ThisClass::unload},
|
||||
{"get_nns_by_item", &ThisClass::get_nns_by_item},
|
||||
{"get_nns_by_vector", &ThisClass::get_nns_by_vector},
|
||||
{"get_item_vector", &ThisClass::get_item_vector},
|
||||
{"get_distance", &ThisClass::get_distance},
|
||||
{"get_n_items", &ThisClass::get_n_items},
|
||||
{"on_disk_build", &ThisClass::on_disk_build},
|
||||
{NULL, NULL},
|
||||
};
|
||||
return funcs;
|
||||
}
|
||||
|
||||
static void createNew(lua_State* L, int f) {
|
||||
void* self = lua_newuserdata(L, sizeof(Impl));
|
||||
if (luaL_newmetatable(L, typeAsString())) {
|
||||
compat_setfuncs(L, getMetatable());
|
||||
lua_newtable(L);
|
||||
compat_setfuncs(L, getMethods());
|
||||
lua_setfield(L, -2, "__index");
|
||||
}
|
||||
new (self) Impl(f);
|
||||
lua_setmetatable(L, -2);
|
||||
}
|
||||
};
|
||||
|
||||
static int lua_an_make(lua_State* L) {
|
||||
int f = luaL_checkinteger(L, 1);
|
||||
const char* metric = "angular";
|
||||
if (lua_gettop(L) >= 2) {
|
||||
metric = luaL_checkstring(L, 2);
|
||||
}
|
||||
if (strcmp(metric, "angular") == 0) {
|
||||
LuaAnnoy<Angular>::createNew(L, f);
|
||||
return 1;
|
||||
} else if (strcmp(metric, "euclidean") == 0) {
|
||||
LuaAnnoy<Euclidean>::createNew(L, f);
|
||||
return 1;
|
||||
} else if (strcmp(metric, "manhattan") == 0) {
|
||||
LuaAnnoy<Manhattan>::createNew(L, f);
|
||||
return 1;
|
||||
} else {
|
||||
return luaL_error(L, "Unknown metric: %s", metric);
|
||||
}
|
||||
}
|
||||
|
||||
static const luaL_Reg LUA_ANNOY_FUNCS[] = {
|
||||
{"AnnoyIndex", lua_an_make},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
int luaopen_annoy(lua_State* L) {
|
||||
lua_newtable(L);
|
||||
compat_setfuncs(L, LUA_ANNOY_FUNCS);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: tabstop=2 shiftwidth=2
|
||||
632
core/src/index/thirdparty/annoy/src/annoymodule.cc
vendored
Normal file
632
core/src/index/thirdparty/annoy/src/annoymodule.cc
vendored
Normal file
@ -0,0 +1,632 @@
|
||||
// Copyright (c) 2013 Spotify AB
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy of
|
||||
// the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations under
|
||||
// the License.
|
||||
|
||||
#include "annoylib.h"
|
||||
#include "kissrandom.h"
|
||||
#include "Python.h"
|
||||
#include "structmember.h"
|
||||
#include <exception>
|
||||
#if defined(_MSC_VER) && _MSC_VER == 1500
|
||||
typedef signed __int32 int32_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
#define AVX_INFO "Using 512-bit AVX instructions"
|
||||
#elif defined(USE_AVX128)
|
||||
#define AVX_INFO "Using 128-bit AVX instructions"
|
||||
#else
|
||||
#define AVX_INFO "Not using AVX instructions"
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define COMPILER_INFO "Compiled using MSC"
|
||||
#elif defined(__GNUC__)
|
||||
#define COMPILER_INFO "Compiled on GCC"
|
||||
#else
|
||||
#define COMPILER_INFO "Compiled on unknown platform"
|
||||
#endif
|
||||
|
||||
#define ANNOY_DOC (COMPILER_INFO ". " AVX_INFO ".")
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define IS_PY3K
|
||||
#endif
|
||||
|
||||
#ifndef Py_TYPE
|
||||
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
|
||||
#endif
|
||||
|
||||
#ifdef IS_PY3K
|
||||
#define PyInt_FromLong PyLong_FromLong
|
||||
#endif
|
||||
|
||||
|
||||
template class AnnoyIndexInterface<int32_t, float>;
|
||||
|
||||
class HammingWrapper : public AnnoyIndexInterface<int32_t, float> {
|
||||
// Wrapper class for Hamming distance, using composition.
|
||||
// This translates binary (float) vectors into packed uint64_t vectors.
|
||||
// This is questionable from a performance point of view. Should reconsider this solution.
|
||||
private:
|
||||
int32_t _f_external, _f_internal;
|
||||
AnnoyIndex<int32_t, uint64_t, Hamming, Kiss64Random> _index;
|
||||
void _pack(const float* src, uint64_t* dst) const {
|
||||
for (int32_t i = 0; i < _f_internal; i++) {
|
||||
dst[i] = 0;
|
||||
for (int32_t j = 0; j < 64 && i*64+j < _f_external; j++) {
|
||||
dst[i] |= (uint64_t)(src[i * 64 + j] > 0.5) << j;
|
||||
}
|
||||
}
|
||||
};
|
||||
void _unpack(const uint64_t* src, float* dst) const {
|
||||
for (int32_t i = 0; i < _f_external; i++) {
|
||||
dst[i] = (src[i / 64] >> (i % 64)) & 1;
|
||||
}
|
||||
};
|
||||
public:
|
||||
HammingWrapper(int f) : _f_external(f), _f_internal((f + 63) / 64), _index((f + 63) / 64) {};
|
||||
bool add_item(int32_t item, const float* w, char**error) {
|
||||
vector<uint64_t> w_internal(_f_internal, 0);
|
||||
_pack(w, &w_internal[0]);
|
||||
return _index.add_item(item, &w_internal[0], error);
|
||||
};
|
||||
bool build(int q, char** error) { return _index.build(q, error); };
|
||||
bool unbuild(char** error) { return _index.unbuild(error); };
|
||||
bool save(const char* filename, bool prefault, char** error) { return _index.save(filename, prefault, error); };
|
||||
void unload() { _index.unload(); };
|
||||
bool load(const char* filename, bool prefault, char** error) { return _index.load(filename, prefault, error); };
|
||||
float get_distance(int32_t i, int32_t j) const { return _index.get_distance(i, j); };
|
||||
void get_nns_by_item(int32_t item, size_t n, int search_k, vector<int32_t>* result, vector<float>* distances) const {
|
||||
if (distances) {
|
||||
vector<uint64_t> distances_internal;
|
||||
_index.get_nns_by_item(item, n, search_k, result, &distances_internal);
|
||||
distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
|
||||
} else {
|
||||
_index.get_nns_by_item(item, n, search_k, result, NULL);
|
||||
}
|
||||
};
|
||||
void get_nns_by_vector(const float* w, size_t n, int search_k, vector<int32_t>* result, vector<float>* distances) const {
|
||||
vector<uint64_t> w_internal(_f_internal, 0);
|
||||
_pack(w, &w_internal[0]);
|
||||
if (distances) {
|
||||
vector<uint64_t> distances_internal;
|
||||
_index.get_nns_by_vector(&w_internal[0], n, search_k, result, &distances_internal);
|
||||
distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
|
||||
} else {
|
||||
_index.get_nns_by_vector(&w_internal[0], n, search_k, result, NULL);
|
||||
}
|
||||
};
|
||||
int32_t get_n_items() const { return _index.get_n_items(); };
|
||||
int32_t get_n_trees() const { return _index.get_n_trees(); };
|
||||
void verbose(bool v) { _index.verbose(v); };
|
||||
void get_item(int32_t item, float* v) const {
|
||||
vector<uint64_t> v_internal(_f_internal, 0);
|
||||
_index.get_item(item, &v_internal[0]);
|
||||
_unpack(&v_internal[0], v);
|
||||
};
|
||||
void set_seed(int q) { _index.set_seed(q); };
|
||||
bool on_disk_build(const char* filename, char** error) { return _index.on_disk_build(filename, error); };
|
||||
};
|
||||
|
||||
// annoy python object
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
int f;
|
||||
AnnoyIndexInterface<int32_t, float>* ptr;
|
||||
} py_annoy;
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
|
||||
py_annoy *self = (py_annoy *)type->tp_alloc(type, 0);
|
||||
if (self == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
const char *metric = NULL;
|
||||
|
||||
static char const * kwlist[] = {"f", "metric", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &self->f, &metric))
|
||||
return NULL;
|
||||
if (!metric) {
|
||||
// This keeps coming up, see #368 etc
|
||||
PyErr_WarnEx(PyExc_FutureWarning, "The default argument for metric will be removed "
|
||||
"in future version of Annoy. Please pass metric='angular' explicitly.", 1);
|
||||
self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random>(self->f);
|
||||
} else if (!strcmp(metric, "angular")) {
|
||||
self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random>(self->f);
|
||||
} else if (!strcmp(metric, "euclidean")) {
|
||||
self->ptr = new AnnoyIndex<int32_t, float, Euclidean, Kiss64Random>(self->f);
|
||||
} else if (!strcmp(metric, "manhattan")) {
|
||||
self->ptr = new AnnoyIndex<int32_t, float, Manhattan, Kiss64Random>(self->f);
|
||||
} else if (!strcmp(metric, "hamming")) {
|
||||
self->ptr = new HammingWrapper(self->f);
|
||||
} else if (!strcmp(metric, "dot")) {
|
||||
self->ptr = new AnnoyIndex<int32_t, float, DotProduct, Kiss64Random>(self->f);
|
||||
} else {
|
||||
PyErr_SetString(PyExc_ValueError, "No such metric");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
py_an_init(py_annoy *self, PyObject *args, PyObject *kwargs) {
|
||||
// Seems to be needed for Python 3
|
||||
const char *metric = NULL;
|
||||
int f;
|
||||
static char const * kwlist[] = {"f", "metric", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &f, &metric))
|
||||
return (int) NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
py_an_dealloc(py_annoy* self) {
|
||||
delete self->ptr;
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
|
||||
static PyMemberDef py_annoy_members[] = {
|
||||
{(char*)"f", T_INT, offsetof(py_annoy, f), 0,
|
||||
(char*)""},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_load(py_annoy *self, PyObject *args, PyObject *kwargs) {
|
||||
char *filename, *error;
|
||||
bool prefault = false;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
static char const * kwlist[] = {"fn", "prefault", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
|
||||
return NULL;
|
||||
|
||||
if (!self->ptr->load(filename, prefault, &error)) {
|
||||
PyErr_SetString(PyExc_IOError, error);
|
||||
free(error);
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_save(py_annoy *self, PyObject *args, PyObject *kwargs) {
|
||||
char *filename, *error;
|
||||
bool prefault = false;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
static char const * kwlist[] = {"fn", "prefault", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
|
||||
return NULL;
|
||||
|
||||
if (!self->ptr->save(filename, prefault, &error)) {
|
||||
PyErr_SetString(PyExc_IOError, error);
|
||||
free(error);
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
|
||||
PyObject*
|
||||
get_nns_to_python(const vector<int32_t>& result, const vector<float>& distances, int include_distances) {
|
||||
PyObject* l = PyList_New(result.size());
|
||||
for (size_t i = 0; i < result.size(); i++)
|
||||
PyList_SetItem(l, i, PyInt_FromLong(result[i]));
|
||||
if (!include_distances)
|
||||
return l;
|
||||
|
||||
PyObject* d = PyList_New(distances.size());
|
||||
for (size_t i = 0; i < distances.size(); i++)
|
||||
PyList_SetItem(d, i, PyFloat_FromDouble(distances[i]));
|
||||
|
||||
PyObject* t = PyTuple_New(2);
|
||||
PyTuple_SetItem(t, 0, l);
|
||||
PyTuple_SetItem(t, 1, d);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
bool check_constraints(py_annoy *self, int32_t item, bool building) {
|
||||
if (item < 0) {
|
||||
PyErr_SetString(PyExc_IndexError, "Item index can not be negative");
|
||||
return false;
|
||||
} else if (!building && item >= self->ptr->get_n_items()) {
|
||||
PyErr_SetString(PyExc_IndexError, "Item index larger than the largest item index");
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
py_an_get_nns_by_item(py_annoy *self, PyObject *args, PyObject *kwargs) {
|
||||
int32_t item, n, search_k=-1, include_distances=0;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
|
||||
static char const * kwlist[] = {"i", "n", "search_k", "include_distances", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ii|ii", (char**)kwlist, &item, &n, &search_k, &include_distances))
|
||||
return NULL;
|
||||
|
||||
if (!check_constraints(self, item, false)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vector<int32_t> result;
|
||||
vector<float> distances;
|
||||
|
||||
Py_BEGIN_ALLOW_THREADS;
|
||||
self->ptr->get_nns_by_item(item, n, search_k, &result, include_distances ? &distances : NULL);
|
||||
Py_END_ALLOW_THREADS;
|
||||
|
||||
return get_nns_to_python(result, distances, include_distances);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
convert_list_to_vector(PyObject* v, int f, vector<float>* w) {
|
||||
if (PyObject_Size(v) == -1) {
|
||||
char buf[256];
|
||||
snprintf(buf, 256, "Expected an iterable, got an object of type \"%s\"", v->ob_type->tp_name);
|
||||
PyErr_SetString(PyExc_ValueError, buf);
|
||||
return false;
|
||||
}
|
||||
if (PyObject_Size(v) != f) {
|
||||
char buf[128];
|
||||
snprintf(buf, 128, "Vector has wrong length (expected %d, got %ld)", f, PyObject_Size(v));
|
||||
PyErr_SetString(PyExc_IndexError, buf);
|
||||
return false;
|
||||
}
|
||||
for (int z = 0; z < f; z++) {
|
||||
PyObject *key = PyInt_FromLong(z);
|
||||
PyObject *pf = PyObject_GetItem(v, key);
|
||||
(*w)[z] = PyFloat_AsDouble(pf);
|
||||
Py_DECREF(key);
|
||||
Py_DECREF(pf);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
py_an_get_nns_by_vector(py_annoy *self, PyObject *args, PyObject *kwargs) {
|
||||
PyObject* v;
|
||||
int32_t n, search_k=-1, include_distances=0;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
|
||||
static char const * kwlist[] = {"vector", "n", "search_k", "include_distances", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii", (char**)kwlist, &v, &n, &search_k, &include_distances))
|
||||
return NULL;
|
||||
|
||||
vector<float> w(self->f);
|
||||
if (!convert_list_to_vector(v, self->f, &w)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vector<int32_t> result;
|
||||
vector<float> distances;
|
||||
|
||||
Py_BEGIN_ALLOW_THREADS;
|
||||
self->ptr->get_nns_by_vector(&w[0], n, search_k, &result, include_distances ? &distances : NULL);
|
||||
Py_END_ALLOW_THREADS;
|
||||
|
||||
return get_nns_to_python(result, distances, include_distances);
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
py_an_get_item_vector(py_annoy *self, PyObject *args) {
|
||||
int32_t item;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "i", &item))
|
||||
return NULL;
|
||||
|
||||
if (!check_constraints(self, item, false)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vector<float> v(self->f);
|
||||
self->ptr->get_item(item, &v[0]);
|
||||
PyObject* l = PyList_New(self->f);
|
||||
for (int z = 0; z < self->f; z++) {
|
||||
PyList_SetItem(l, z, PyFloat_FromDouble(v[z]));
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
py_an_add_item(py_annoy *self, PyObject *args, PyObject* kwargs) {
|
||||
PyObject* v;
|
||||
int32_t item;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
static char const * kwlist[] = {"i", "vector", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "iO", (char**)kwlist, &item, &v))
|
||||
return NULL;
|
||||
|
||||
if (!check_constraints(self, item, true)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
vector<float> w(self->f);
|
||||
if (!convert_list_to_vector(v, self->f, &w)) {
|
||||
return NULL;
|
||||
}
|
||||
char* error;
|
||||
if (!self->ptr->add_item(item, &w[0], &error)) {
|
||||
PyErr_SetString(PyExc_Exception, error);
|
||||
free(error);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
py_an_on_disk_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
|
||||
char *filename, *error;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
static char const * kwlist[] = {"fn", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", (char**)kwlist, &filename))
|
||||
return NULL;
|
||||
|
||||
if (!self->ptr->on_disk_build(filename, &error)) {
|
||||
PyErr_SetString(PyExc_IOError, error);
|
||||
free(error);
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
py_an_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
|
||||
int q;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
static char const * kwlist[] = {"n_trees", NULL};
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i", (char**)kwlist, &q))
|
||||
return NULL;
|
||||
|
||||
bool res;
|
||||
char* error;
|
||||
Py_BEGIN_ALLOW_THREADS;
|
||||
res = self->ptr->build(q, &error);
|
||||
Py_END_ALLOW_THREADS;
|
||||
if (!res) {
|
||||
PyErr_SetString(PyExc_Exception, error);
|
||||
free(error);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_unbuild(py_annoy *self) {
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
|
||||
char* error;
|
||||
if (!self->ptr->unbuild(&error)) {
|
||||
PyErr_SetString(PyExc_Exception, error);
|
||||
free(error);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_unload(py_annoy *self) {
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
|
||||
self->ptr->unload();
|
||||
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_get_distance(py_annoy *self, PyObject *args) {
|
||||
int32_t i, j;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "ii", &i, &j))
|
||||
return NULL;
|
||||
|
||||
if (!check_constraints(self, i, false) || !check_constraints(self, j, false)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
double d = self->ptr->get_distance(i,j);
|
||||
return PyFloat_FromDouble(d);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_get_n_items(py_annoy *self) {
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
|
||||
int32_t n = self->ptr->get_n_items();
|
||||
return PyInt_FromLong(n);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
py_an_get_n_trees(py_annoy *self) {
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
|
||||
int32_t n = self->ptr->get_n_trees();
|
||||
return PyInt_FromLong(n);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
py_an_verbose(py_annoy *self, PyObject *args) {
|
||||
int verbose;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "i", &verbose))
|
||||
return NULL;
|
||||
|
||||
self->ptr->verbose((bool)verbose);
|
||||
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
py_an_set_seed(py_annoy *self, PyObject *args) {
|
||||
int q;
|
||||
if (!self->ptr)
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "i", &q))
|
||||
return NULL;
|
||||
|
||||
self->ptr->set_seed(q);
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef AnnoyMethods[] = {
|
||||
{"load", (PyCFunction)py_an_load, METH_VARARGS | METH_KEYWORDS, "Loads (mmaps) an index from disk."},
|
||||
{"save", (PyCFunction)py_an_save, METH_VARARGS | METH_KEYWORDS, "Saves the index to disk."},
|
||||
{"get_nns_by_item",(PyCFunction)py_an_get_nns_by_item, METH_VARARGS | METH_KEYWORDS, "Returns the `n` closest items to item `i`.\n\n:param search_k: the query will inspect up to `search_k` nodes.\n`search_k` gives you a run-time tradeoff between better accuracy and speed.\n`search_k` defaults to `n_trees * n` if not provided.\n\n:param include_distances: If `True`, this function will return a\n2 element tuple of lists. The first list contains the `n` closest items.\nThe second list contains the corresponding distances."},
|
||||
{"get_nns_by_vector",(PyCFunction)py_an_get_nns_by_vector, METH_VARARGS | METH_KEYWORDS, "Returns the `n` closest items to vector `vector`.\n\n:param search_k: the query will inspect up to `search_k` nodes.\n`search_k` gives you a run-time tradeoff between better accuracy and speed.\n`search_k` defaults to `n_trees * n` if not provided.\n\n:param include_distances: If `True`, this function will return a\n2 element tuple of lists. The first list contains the `n` closest items.\nThe second list contains the corresponding distances."},
|
||||
{"get_item_vector",(PyCFunction)py_an_get_item_vector, METH_VARARGS, "Returns the vector for item `i` that was previously added."},
|
||||
{"add_item",(PyCFunction)py_an_add_item, METH_VARARGS | METH_KEYWORDS, "Adds item `i` (any nonnegative integer) with vector `v`.\n\nNote that it will allocate memory for `max(i)+1` items."},
|
||||
{"on_disk_build",(PyCFunction)py_an_on_disk_build, METH_VARARGS | METH_KEYWORDS, "Build will be performed with storage on disk instead of RAM."},
|
||||
{"build",(PyCFunction)py_an_build, METH_VARARGS | METH_KEYWORDS, "Builds a forest of `n_trees` trees.\n\nMore trees give higher precision when querying. After calling `build`,\nno more items can be added."},
|
||||
{"unbuild",(PyCFunction)py_an_unbuild, METH_NOARGS, "Unbuilds the tree in order to allows adding new items.\n\nbuild() has to be called again afterwards in order to\nrun queries."},
|
||||
{"unload",(PyCFunction)py_an_unload, METH_NOARGS, "Unloads an index from disk."},
|
||||
{"get_distance",(PyCFunction)py_an_get_distance, METH_VARARGS, "Returns the distance between items `i` and `j`."},
|
||||
{"get_n_items",(PyCFunction)py_an_get_n_items, METH_NOARGS, "Returns the number of items in the index."},
|
||||
{"get_n_trees",(PyCFunction)py_an_get_n_trees, METH_NOARGS, "Returns the number of trees in the index."},
|
||||
{"verbose",(PyCFunction)py_an_verbose, METH_VARARGS, ""},
|
||||
{"set_seed",(PyCFunction)py_an_set_seed, METH_VARARGS, "Sets the seed of Annoy's random number generator."},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
||||
static PyTypeObject PyAnnoyType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"annoy.Annoy", /*tp_name*/
|
||||
sizeof(py_annoy), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)py_an_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
0, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
ANNOY_DOC, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
AnnoyMethods, /* tp_methods */
|
||||
py_annoy_members, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)py_an_init, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
py_an_new, /* tp_new */
|
||||
};
|
||||
|
||||
static PyMethodDef module_methods[] = {
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"annoylib", /* m_name */
|
||||
ANNOY_DOC, /* m_doc */
|
||||
-1, /* m_size */
|
||||
module_methods, /* m_methods */
|
||||
NULL, /* m_reload */
|
||||
NULL, /* m_traverse */
|
||||
NULL, /* m_clear */
|
||||
NULL, /* m_free */
|
||||
};
|
||||
#endif
|
||||
|
||||
PyObject *create_module(void) {
|
||||
PyObject *m;
|
||||
|
||||
if (PyType_Ready(&PyAnnoyType) < 0)
|
||||
return NULL;
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
m = PyModule_Create(&moduledef);
|
||||
#else
|
||||
m = Py_InitModule("annoylib", module_methods);
|
||||
#endif
|
||||
|
||||
if (m == NULL)
|
||||
return NULL;
|
||||
|
||||
Py_INCREF(&PyAnnoyType);
|
||||
PyModule_AddObject(m, "Annoy", (PyObject *)&PyAnnoyType);
|
||||
return m;
|
||||
}
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
PyMODINIT_FUNC PyInit_annoylib(void) {
|
||||
return create_module(); // it should return moudule object in py3
|
||||
}
|
||||
#else
|
||||
PyMODINIT_FUNC initannoylib(void) {
|
||||
create_module();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// vim: tabstop=2 shiftwidth=2
|
||||
106
core/src/index/thirdparty/annoy/src/kissrandom.h
vendored
Normal file
106
core/src/index/thirdparty/annoy/src/kissrandom.h
vendored
Normal file
@ -0,0 +1,106 @@
|
||||
#ifndef KISSRANDOM_H
|
||||
#define KISSRANDOM_H
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER == 1500
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
// KISS = "keep it simple, stupid", but high quality random number generator
|
||||
// http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code"
|
||||
// http://mathforum.org/kb/message.jspa?messageID=6627731
|
||||
// https://de.wikipedia.org/wiki/KISS_(Zufallszahlengenerator)
|
||||
|
||||
// 32 bit KISS
|
||||
struct Kiss32Random {
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
uint32_t c;
|
||||
|
||||
// seed must be != 0
|
||||
Kiss32Random(uint32_t seed = 123456789) {
|
||||
x = seed;
|
||||
y = 362436000;
|
||||
z = 521288629;
|
||||
c = 7654321;
|
||||
}
|
||||
|
||||
uint32_t kiss() {
|
||||
// Linear congruence generator
|
||||
x = 69069 * x + 12345;
|
||||
|
||||
// Xor shift
|
||||
y ^= y << 13;
|
||||
y ^= y >> 17;
|
||||
y ^= y << 5;
|
||||
|
||||
// Multiply-with-carry
|
||||
uint64_t t = 698769069ULL * z + c;
|
||||
c = t >> 32;
|
||||
z = (uint32_t) t;
|
||||
|
||||
return x + y + z;
|
||||
}
|
||||
inline int flip() {
|
||||
// Draw random 0 or 1
|
||||
return kiss() & 1;
|
||||
}
|
||||
inline size_t index(size_t n) {
|
||||
// Draw random integer between 0 and n-1 where n is at most the number of data points you have
|
||||
return kiss() % n;
|
||||
}
|
||||
inline void set_seed(uint32_t seed) {
|
||||
x = seed;
|
||||
}
|
||||
};
|
||||
|
||||
// 64 bit KISS. Use this if you have more than about 2^24 data points ("big data" ;) )
|
||||
struct Kiss64Random {
|
||||
uint64_t x;
|
||||
uint64_t y;
|
||||
uint64_t z;
|
||||
uint64_t c;
|
||||
|
||||
// seed must be != 0
|
||||
Kiss64Random(uint64_t seed = 1234567890987654321ULL) {
|
||||
x = seed;
|
||||
y = 362436362436362436ULL;
|
||||
z = 1066149217761810ULL;
|
||||
c = 123456123456123456ULL;
|
||||
}
|
||||
|
||||
uint64_t kiss() {
|
||||
// Linear congruence generator
|
||||
z = 6906969069LL*z+1234567;
|
||||
|
||||
// Xor shift
|
||||
y ^= (y<<13);
|
||||
y ^= (y>>17);
|
||||
y ^= (y<<43);
|
||||
|
||||
// Multiply-with-carry (uint128_t t = (2^58 + 1) * x + c; c = t >> 64; x = (uint64_t) t)
|
||||
uint64_t t = (x<<58)+c;
|
||||
c = (x>>6);
|
||||
x += t;
|
||||
c += (x<t);
|
||||
|
||||
return x + y + z;
|
||||
}
|
||||
inline int flip() {
|
||||
// Draw random 0 or 1
|
||||
return kiss() & 1;
|
||||
}
|
||||
inline size_t index(size_t n) {
|
||||
// Draw random integer between 0 and n-1 where n is at most the number of data points you have
|
||||
return kiss() % n;
|
||||
}
|
||||
inline void set_seed(uint32_t seed) {
|
||||
x = seed;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
// vim: tabstop=2 shiftwidth=2
|
||||
238
core/src/index/thirdparty/annoy/src/mman.h
vendored
Normal file
238
core/src/index/thirdparty/annoy/src/mman.h
vendored
Normal file
@ -0,0 +1,238 @@
|
||||
|
||||
// This is from https://code.google.com/p/mman-win32/
|
||||
//
|
||||
// Licensed under MIT
|
||||
|
||||
#ifndef _MMAN_WIN32_H
|
||||
#define _MMAN_WIN32_H
|
||||
|
||||
#ifndef _WIN32_WINNT // Allow use of features specific to Windows XP or later.
|
||||
#define _WIN32_WINNT 0x0501 // Change this to the appropriate value to target other versions of Windows.
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <windows.h>
|
||||
#include <errno.h>
|
||||
#include <io.h>
|
||||
|
||||
#define PROT_NONE 0
|
||||
#define PROT_READ 1
|
||||
#define PROT_WRITE 2
|
||||
#define PROT_EXEC 4
|
||||
|
||||
#define MAP_FILE 0
|
||||
#define MAP_SHARED 1
|
||||
#define MAP_PRIVATE 2
|
||||
#define MAP_TYPE 0xf
|
||||
#define MAP_FIXED 0x10
|
||||
#define MAP_ANONYMOUS 0x20
|
||||
#define MAP_ANON MAP_ANONYMOUS
|
||||
|
||||
#define MAP_FAILED ((void *)-1)
|
||||
|
||||
/* Flags for msync. */
|
||||
#define MS_ASYNC 1
|
||||
#define MS_SYNC 2
|
||||
#define MS_INVALIDATE 4
|
||||
|
||||
#ifndef FILE_MAP_EXECUTE
|
||||
#define FILE_MAP_EXECUTE 0x0020
|
||||
#endif
|
||||
|
||||
static int __map_mman_error(const DWORD err, const int deferr)
|
||||
{
|
||||
if (err == 0)
|
||||
return 0;
|
||||
//TODO: implement
|
||||
return err;
|
||||
}
|
||||
|
||||
static DWORD __map_mmap_prot_page(const int prot)
|
||||
{
|
||||
DWORD protect = 0;
|
||||
|
||||
if (prot == PROT_NONE)
|
||||
return protect;
|
||||
|
||||
if ((prot & PROT_EXEC) != 0)
|
||||
{
|
||||
protect = ((prot & PROT_WRITE) != 0) ?
|
||||
PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
|
||||
}
|
||||
else
|
||||
{
|
||||
protect = ((prot & PROT_WRITE) != 0) ?
|
||||
PAGE_READWRITE : PAGE_READONLY;
|
||||
}
|
||||
|
||||
return protect;
|
||||
}
|
||||
|
||||
static DWORD __map_mmap_prot_file(const int prot)
|
||||
{
|
||||
DWORD desiredAccess = 0;
|
||||
|
||||
if (prot == PROT_NONE)
|
||||
return desiredAccess;
|
||||
|
||||
if ((prot & PROT_READ) != 0)
|
||||
desiredAccess |= FILE_MAP_READ;
|
||||
if ((prot & PROT_WRITE) != 0)
|
||||
desiredAccess |= FILE_MAP_WRITE;
|
||||
if ((prot & PROT_EXEC) != 0)
|
||||
desiredAccess |= FILE_MAP_EXECUTE;
|
||||
|
||||
return desiredAccess;
|
||||
}
|
||||
|
||||
inline void* mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
|
||||
{
|
||||
HANDLE fm, h;
|
||||
|
||||
void * map = MAP_FAILED;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable: 4293)
|
||||
#endif
|
||||
|
||||
const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) ?
|
||||
(DWORD)off : (DWORD)(off & 0xFFFFFFFFL);
|
||||
const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
|
||||
(DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL);
|
||||
const DWORD protect = __map_mmap_prot_page(prot);
|
||||
const DWORD desiredAccess = __map_mmap_prot_file(prot);
|
||||
|
||||
const off_t maxSize = off + (off_t)len;
|
||||
|
||||
const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) ?
|
||||
(DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL);
|
||||
const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
|
||||
(DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
errno = 0;
|
||||
|
||||
if (len == 0
|
||||
/* Unsupported flag combinations */
|
||||
|| (flags & MAP_FIXED) != 0
|
||||
/* Usupported protection combinations */
|
||||
|| prot == PROT_EXEC)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
h = ((flags & MAP_ANONYMOUS) == 0) ?
|
||||
(HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE;
|
||||
|
||||
if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE)
|
||||
{
|
||||
errno = EBADF;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
|
||||
|
||||
if (fm == NULL)
|
||||
{
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
|
||||
|
||||
CloseHandle(fm);
|
||||
|
||||
if (map == NULL)
|
||||
{
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
inline int munmap(void *addr, size_t len)
|
||||
{
|
||||
if (UnmapViewOfFile(addr))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline int mprotect(void *addr, size_t len, int prot)
|
||||
{
|
||||
DWORD newProtect = __map_mmap_prot_page(prot);
|
||||
DWORD oldProtect = 0;
|
||||
|
||||
if (VirtualProtect(addr, len, newProtect, &oldProtect))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline int msync(void *addr, size_t len, int flags)
|
||||
{
|
||||
if (FlushViewOfFile(addr, len))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline int mlock(const void *addr, size_t len)
|
||||
{
|
||||
if (VirtualLock((LPVOID)addr, len))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline int munlock(const void *addr, size_t len)
|
||||
{
|
||||
if (VirtualUnlock((LPVOID)addr, len))
|
||||
return 0;
|
||||
|
||||
errno = __map_mman_error(GetLastError(), EPERM);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if !defined(__MINGW32__)
|
||||
inline int ftruncate(int fd, unsigned int size) {
|
||||
if (fd < 0) {
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
|
||||
HANDLE h = (HANDLE)_get_osfhandle(fd);
|
||||
unsigned int cur = SetFilePointer(h, 0, NULL, FILE_CURRENT);
|
||||
if (cur == ~0 || SetFilePointer(h, size, NULL, FILE_BEGIN) == ~0 || !SetEndOfFile(h)) {
|
||||
int error = GetLastError();
|
||||
switch (GetLastError()) {
|
||||
case ERROR_INVALID_HANDLE:
|
||||
errno = EBADF;
|
||||
break;
|
||||
default:
|
||||
errno = EIO;
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -89,6 +89,16 @@ if (NOT TARGET test_idmap)
|
||||
endif ()
|
||||
target_link_libraries(test_idmap ${depend_libs} ${unittest_libs} ${basic_libs})
|
||||
|
||||
#<ANNOY-TEST>
|
||||
set(annoy_srcs
|
||||
${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp
|
||||
)
|
||||
|
||||
if (NOT TARGET test_annoy)
|
||||
add_executable(test_annoy test_annoy.cpp ${annoy_srcs} ${util_srcs})
|
||||
endif ()
|
||||
target_link_libraries(test_annoy ${depend_libs} ${unittest_libs} ${basic_libs})
|
||||
|
||||
#<HNSW-TEST>
|
||||
set(hnsw_srcs
|
||||
${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/IndexHNSW.cpp
|
||||
@ -144,6 +154,7 @@ install(TARGETS test_idmap DESTINATION unittest)
|
||||
install(TARGETS test_binaryidmap DESTINATION unittest)
|
||||
install(TARGETS test_sptag DESTINATION unittest)
|
||||
install(TARGETS test_knowhere_common DESTINATION unittest)
|
||||
install(TARGETS test_annoy DESTINATION unittest)
|
||||
|
||||
if (KNOWHERE_GPU_VERSION)
|
||||
install(TARGETS test_gpuresource DESTINATION unittest)
|
||||
|
||||
221
core/src/index/unittest/test_annoy.cpp
Normal file
221
core/src/index/unittest/test_annoy.cpp
Normal file
@ -0,0 +1,221 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "knowhere/common/Exception.h"
|
||||
#include "knowhere/index/vector_index/IndexAnnoy.h"
|
||||
|
||||
#include "unittest/utils.h"
|
||||
|
||||
using ::testing::Combine;
|
||||
using ::testing::TestWithParam;
|
||||
using ::testing::Values;
|
||||
|
||||
int
|
||||
main() {
|
||||
int64_t d = 64; // dimension
|
||||
int64_t nb = 10000; // database size
|
||||
int64_t nq = 10; // 10000; // nb of queries
|
||||
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(nb);
|
||||
|
||||
int64_t* ids = new int64_t[nb];
|
||||
float* xb = new float[d * nb];
|
||||
float* xq = new float[d * nq];
|
||||
|
||||
for (int i = 0; i < nb; i++) {
|
||||
for (int j = 0; j < d; j++) xb[d * i + j] = (float)drand48();
|
||||
xb[d * i] += i / 1000.;
|
||||
ids[i] = i;
|
||||
}
|
||||
printf("gen xb and ids done! \n");
|
||||
|
||||
// srand((unsigned)time(NULL));
|
||||
auto random_seed = (unsigned)time(NULL);
|
||||
printf("delete ids: \n");
|
||||
for (int i = 0; i < nq; i++) {
|
||||
auto tmp = rand_r(&random_seed) % nb;
|
||||
printf("%d\n", tmp);
|
||||
// std::cout << "before delete, test result: " << bitset->test(tmp) << std::endl;
|
||||
bitset->set(tmp);
|
||||
// std::cout << "after delete, test result: " << bitset->test(tmp) << std::endl;
|
||||
for (int j = 0; j < d; j++) xq[d * i + j] = xb[d * tmp + j];
|
||||
// xq[d * i] += i / 1000.;
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
int k = 4;
|
||||
int n_trees = 5;
|
||||
int search_k = 100;
|
||||
milvus::knowhere::IndexAnnoy index;
|
||||
milvus::knowhere::DatasetPtr base_dataset = generate_dataset(nb, d, (const void*)xb, ids);
|
||||
|
||||
milvus::knowhere::Config base_conf{
|
||||
{milvus::knowhere::meta::DIM, d},
|
||||
{milvus::knowhere::meta::TOPK, k},
|
||||
{milvus::knowhere::IndexParams::n_trees, n_trees},
|
||||
{milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2},
|
||||
};
|
||||
milvus::knowhere::DatasetPtr query_dataset = generate_query_dataset(nq, d, (const void*)xq);
|
||||
milvus::knowhere::Config query_conf{
|
||||
{milvus::knowhere::meta::DIM, d},
|
||||
{milvus::knowhere::meta::TOPK, k},
|
||||
{milvus::knowhere::IndexParams::search_k, search_k},
|
||||
};
|
||||
|
||||
index.BuildAll(base_dataset, base_conf);
|
||||
|
||||
printf("------------sanity check----------------\n");
|
||||
{ // sanity check
|
||||
auto res = index.Query(query_dataset, query_conf);
|
||||
printf("Query done!\n");
|
||||
const int64_t* I = res->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
||||
float* D = res->Get<float*>(milvus::knowhere::meta::DISTANCE);
|
||||
|
||||
printf("I=\n");
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("D=\n");
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < k; j++) printf("%7g ", D[i * k + j]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
printf("---------------search xq-------------\n");
|
||||
{ // search xq
|
||||
auto res = index.Query(query_dataset, query_conf);
|
||||
const int64_t* I = res->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
||||
|
||||
printf("I=\n");
|
||||
for (int i = 0; i < nq; i++) {
|
||||
for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
printf("----------------search xq with delete------------\n");
|
||||
{ // search xq with delete
|
||||
index.SetBlacklist(bitset);
|
||||
auto res = index.Query(query_dataset, query_conf);
|
||||
auto I = res->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
||||
|
||||
printf("I=\n");
|
||||
for (int i = 0; i < nq; i++) {
|
||||
for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
delete[] xb;
|
||||
delete[] xq;
|
||||
delete[] ids;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
class AnnoyTest : public DataGen, public TestWithParam<std::string> {
|
||||
protected:
|
||||
void
|
||||
SetUp() override {
|
||||
IndexType = GetParam();
|
||||
std::cout << "IndexType from GetParam() is: " << IndexType << std::endl;
|
||||
Generate(128, 1000, 5);
|
||||
index_ = std::make_shared<milvus::knowhere::IndexAnnoy>();
|
||||
conf = milvus::knowhere::Config{
|
||||
{milvus::knowhere::meta::DIM, dim},
|
||||
{milvus::knowhere::meta::TOPK, 1},
|
||||
{milvus::knowhere::IndexParams::n_trees, 4},
|
||||
{milvus::knowhere::IndexParams::search_k, 100},
|
||||
{milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2},
|
||||
};
|
||||
|
||||
// Init_with_default();
|
||||
}
|
||||
|
||||
protected:
|
||||
milvus::knowhere::Config conf;
|
||||
std::shared_ptr<milvus::knowhere::IndexAnnoy> index_ = nullptr;
|
||||
std::string IndexType;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(AnnoyParameters, AnnoyTest, Values(""));
|
||||
|
||||
TEST_P(AnnoyTest, annoy_basic) {
|
||||
assert(!xb.empty());
|
||||
|
||||
// index_->Train(base_dataset, conf);
|
||||
index_->BuildAll(base_dataset, conf);
|
||||
auto result = index_->Query(query_dataset, conf);
|
||||
AssertAnns(result, nq, k);
|
||||
|
||||
{
|
||||
auto ids = result->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
||||
auto dist = result->Get<float*>(milvus::knowhere::meta::DISTANCE);
|
||||
|
||||
std::stringstream ss_id;
|
||||
std::stringstream ss_dist;
|
||||
for (auto i = 0; i < nq; i++) {
|
||||
for (auto j = 0; j < k; ++j) {
|
||||
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
|
||||
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
|
||||
ss_id << *((int64_t*)(ids) + i * k + j) << " ";
|
||||
ss_dist << *((float*)(dist) + i * k + j) << " ";
|
||||
}
|
||||
ss_id << std::endl;
|
||||
ss_dist << std::endl;
|
||||
}
|
||||
std::cout << "id\n" << ss_id.str() << std::endl;
|
||||
std::cout << "dist\n" << ss_dist.str() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(AnnoyTest, annoy_delete) {
|
||||
assert(!xb.empty());
|
||||
|
||||
// index_->Train(base_dataset, conf);
|
||||
index_->BuildAll(base_dataset, conf);
|
||||
// index_->Add(base_dataset, conf);
|
||||
faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(nb);
|
||||
for (auto i = 0; i < nq; ++ i) {
|
||||
bitset->set(i);
|
||||
|
||||
auto result = index_->Query(query_dataset, conf);
|
||||
AssertAnns(result, nq, k);
|
||||
|
||||
{
|
||||
auto ids = result->Get<int64_t*>(milvus::knowhere::meta::IDS);
|
||||
auto dist = result->Get<float*>(milvus::knowhere::meta::DISTANCE);
|
||||
|
||||
std::stringstream ss_id;
|
||||
std::stringstream ss_dist;
|
||||
for (auto i = 0; i < nq; i++) {
|
||||
for (auto j = 0; j < k; ++j) {
|
||||
// ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
|
||||
// ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
|
||||
ss_id << *((int64_t*)(ids) + i * k + j) << " ";
|
||||
ss_dist << *((float*)(dist) + i * k + j) << " ";
|
||||
}
|
||||
ss_id << std::endl;
|
||||
ss_dist << std::endl;
|
||||
}
|
||||
std::cout << "id\n" << ss_id.str() << std::endl;
|
||||
std::cout << "dist\n" << ss_dist.str() << std::endl;
|
||||
} }
|
||||
}
|
||||
*/
|
||||
@ -71,6 +71,7 @@ DeleteByIDRequest::OnExecute() {
|
||||
if (table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IDMAP &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IDMAP &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::HNSW &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::ANNOY &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFFLAT &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT &&
|
||||
table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8 &&
|
||||
|
||||
@ -38,7 +38,7 @@ class TestConnect:
|
||||
if not connect.connected():
|
||||
milvus = get_milvus(args["handler"])
|
||||
uri_value = "tcp://%s:%s" % (args["ip"], args["port"])
|
||||
milvus.connect(uri=uri_value)
|
||||
milvus.connect(uri=uri_value, timeout=5)
|
||||
res = milvus.disconnect()
|
||||
with pytest.raises(Exception) as e:
|
||||
res = milvus.disconnect()
|
||||
@ -181,9 +181,8 @@ class TestConnect:
|
||||
'''
|
||||
milvus = get_milvus(args["handler"])
|
||||
uri_value = "tcp://%s:%s" % (args["ip"], args["port"])
|
||||
milvus.connect(uri=uri_value)
|
||||
|
||||
milvus.connect(uri=uri_value)
|
||||
milvus.connect(uri=uri_value, timeout=5)
|
||||
milvus.connect(uri=uri_value, timeout=5)
|
||||
assert milvus.connected()
|
||||
|
||||
def test_connect_disconnect_repeatedly_once(self, args):
|
||||
@ -209,10 +208,10 @@ class TestConnect:
|
||||
times = 10
|
||||
milvus = get_milvus(args["handler"])
|
||||
uri_value = "tcp://%s:%s" % (args["ip"], args["port"])
|
||||
milvus.connect(uri=uri_value)
|
||||
milvus.connect(uri=uri_value, timeout=5)
|
||||
for i in range(times):
|
||||
milvus.disconnect()
|
||||
milvus.connect(uri=uri_value)
|
||||
milvus.connect(uri=uri_value, timeout=5)
|
||||
assert milvus.connected()
|
||||
|
||||
# TODO: enable
|
||||
|
||||
@ -851,7 +851,7 @@ class TestSearchBase:
|
||||
'store_raw_vector': False}
|
||||
# create collection
|
||||
milvus = get_milvus(args["handler"])
|
||||
milvus.connect(uri=uri)
|
||||
milvus.connect(uri=uri, timeout=5)
|
||||
milvus.create_collection(param)
|
||||
vectors, ids = self.init_data(milvus, collection, nb=nb)
|
||||
query_vecs = vectors[nb//2:nb]
|
||||
@ -864,7 +864,7 @@ class TestSearchBase:
|
||||
|
||||
for i in range(threads_num):
|
||||
milvus = get_milvus(args["handler"])
|
||||
milvus.connect(uri=uri)
|
||||
milvus.connect(uri=uri, timeout=5)
|
||||
t = threading.Thread(target=search, args=(milvus, ))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
@ -932,7 +932,7 @@ class TestSearchBase:
|
||||
'metric_type': MetricType.L2}
|
||||
# create collection
|
||||
milvus = get_milvus(args["handler"])
|
||||
milvus.connect(uri=uri)
|
||||
milvus.connect(uri=uri, timeout=5)
|
||||
milvus.create_collection(param)
|
||||
status, ids = milvus.add_vectors(collection, vectors)
|
||||
assert status.OK()
|
||||
@ -973,7 +973,7 @@ class TestSearchBase:
|
||||
'metric_type': MetricType.L2}
|
||||
# create collection
|
||||
milvus = get_milvus(args["handler"])
|
||||
milvus.connect(uri=uri)
|
||||
milvus.connect(uri=uri, timeout=5)
|
||||
milvus.create_collection(param)
|
||||
status, ids = milvus.add_vectors(collection, vectors)
|
||||
assert status.OK()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user