From 310d5d70bce99d59adefa0b983ce4a5aaf619da9 Mon Sep 17 00:00:00 2001
From: op-hunter <ophunter52@gmail.com>
Date: Fri, 27 Mar 2020 09:52:31 +0800
Subject: [PATCH] Annoy support (#1746)

* add annoy source code

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* add annoy knowhere

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* annoy local gtest passed

Signed-off-by: lichengming <chengming.li@zilliz.com>

* fix lint error and update changelog

Signed-off-by: lichengming <chengming.li@zilliz.com>

* fix compile error

Signed-off-by: cmli <chengming.li@zilliz.com>

* Update connect timeout in test cases

Signed-off-by: zw <zw@milvus.io>

* fix some potential bugs

Signed-off-by: cmli <chengming.li@zilliz.com>

* retry ci

Signed-off-by: cmli <chengming.li@zilliz.com>

* rerun ci!

Signed-off-by: cmli <chengming.li@zilliz.com>

* fix errors tested by c++ sdk

Signed-off-by: cmli <chengming.li@zilliz.com>

* fix lint error

Signed-off-by: cmli <chengming.li@zilliz.com>

Co-authored-by: shengjun.li <shengjun.li@zilliz.com>
Co-authored-by: lichengming <chengming.li@zilliz.com>
Co-authored-by: zw <zw@milvus.io>
---
 CHANGELOG.md                                  |    1 +
 NOTICE.md                                     |    1 +
 core/src/db/DBImpl.cpp                        |    1 +
 core/src/db/engine/ExecutionEngine.h          |    3 +-
 core/src/db/engine/ExecutionEngineImpl.cpp    |    4 +
 core/src/index/knowhere/CMakeLists.txt        |    1 +
 .../index/vector_index/ConfAdapter.cpp        |   16 +
 .../knowhere/index/vector_index/ConfAdapter.h |    9 +
 .../index/vector_index/ConfAdapterMgr.cpp     |    1 +
 .../index/vector_index/IndexAnnoy.cpp         |  172 ++
 .../knowhere/index/vector_index/IndexAnnoy.h  |   74 +
 .../knowhere/index/vector_index/IndexType.cpp |    2 +
 .../knowhere/index/vector_index/IndexType.h   |    2 +
 .../index/vector_index/VecIndexFactory.cpp    |    3 +
 .../vector_index/helpers/IndexParameter.h     |    4 +
 core/src/index/thirdparty/annoy/LICENSE       |  202 +++
 core/src/index/thirdparty/annoy/RELEASE.md    |   15 +
 .../thirdparty/annoy/examples/mmap_test.py    |   14 +
 .../annoy/examples/precision_test.cpp         |  176 +++
 .../annoy/examples/precision_test.py          |   46 +
 .../annoy/examples/s_compile_cpp.sh           |    7 +
 .../thirdparty/annoy/examples/simple_test.py  |   10 +
 .../thirdparty/annoy/src/annoygomodule.h      |   92 ++
 .../thirdparty/annoy/src/annoygomodule.i      |   96 ++
 .../src/index/thirdparty/annoy/src/annoylib.h | 1377 +++++++++++++++++
 .../thirdparty/annoy/src/annoyluamodule.cc    |  318 ++++
 .../index/thirdparty/annoy/src/annoymodule.cc |  632 ++++++++
 .../index/thirdparty/annoy/src/kissrandom.h   |  106 ++
 core/src/index/thirdparty/annoy/src/mman.h    |  238 +++
 core/src/index/unittest/CMakeLists.txt        |   11 +
 core/src/index/unittest/test_annoy.cpp        |  221 +++
 .../delivery/request/DeleteByIDRequest.cpp    |    1 +
 tests/milvus_python_test/test_connect.py      |   11 +-
 .../milvus_python_test/test_search_vectors.py |    8 +-
 34 files changed, 3864 insertions(+), 11 deletions(-)
 create mode 100644 core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp
 create mode 100644 core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h
 create mode 100644 core/src/index/thirdparty/annoy/LICENSE
 create mode 100644 core/src/index/thirdparty/annoy/RELEASE.md
 create mode 100644 core/src/index/thirdparty/annoy/examples/mmap_test.py
 create mode 100644 core/src/index/thirdparty/annoy/examples/precision_test.cpp
 create mode 100644 core/src/index/thirdparty/annoy/examples/precision_test.py
 create mode 100755 core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh
 create mode 100644 core/src/index/thirdparty/annoy/examples/simple_test.py
 create mode 100644 core/src/index/thirdparty/annoy/src/annoygomodule.h
 create mode 100644 core/src/index/thirdparty/annoy/src/annoygomodule.i
 create mode 100644 core/src/index/thirdparty/annoy/src/annoylib.h
 create mode 100644 core/src/index/thirdparty/annoy/src/annoyluamodule.cc
 create mode 100644 core/src/index/thirdparty/annoy/src/annoymodule.cc
 create mode 100644 core/src/index/thirdparty/annoy/src/kissrandom.h
 create mode 100644 core/src/index/thirdparty/annoy/src/mman.h
 create mode 100644 core/src/index/unittest/test_annoy.cpp

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9832f15217..8e064e6788 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ Please mark all change in change log and use the issue from GitHub
 -   \#1756 Fix memory exhausted during searching 
 
 ## Feature
+-   \#261  Integrate ANNOY into Milvus
 -   \#1603 BinaryFlat add 2 Metric: Substructure and Superstructure
 -   \#1660 IVF PQ CPU support deleted vectors searching
 -   \#1661 HNSW support deleted vectors searching
diff --git a/NOTICE.md b/NOTICE.md
index fb898419e7..b7cf8653e6 100644
--- a/NOTICE.md
+++ b/NOTICE.md
@@ -21,3 +21,4 @@
 | aws-sdk-cpp      | [Apache 2.0](https://github.com/aws/aws-sdk-cpp/blob/master/LICENSE)                                                           |
 | SPTAG            | [MIT](https://github.com/microsoft/SPTAG/blob/master/LICENSE)                                                                  |
 | hnswlib          | [Apache 2.0](https://github.com/nmslib/hnswlib/blob/master/LICENSE)                                                            |
+| annoy            | [Apache 2.0](https://github.com/spotify/annoy/blob/master/LICENSE)                                                             |
diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp
index cbb70b9466..fca45d369b 100644
--- a/core/src/db/DBImpl.cpp
+++ b/core/src/db/DBImpl.cpp
@@ -291,6 +291,7 @@ DBImpl::GetTableInfo(const std::string& table_id, TableInfo& table_info) {
         {(int32_t)engine::EngineType::FAISS_IVFFLAT, "IVFFLAT"},
         {(int32_t)engine::EngineType::FAISS_IVFSQ8, "IVFSQ8"},
         {(int32_t)engine::EngineType::NSG_MIX, "NSG"},
+        {(int32_t)engine::EngineType::ANNOY, "ANNOY"},
         {(int32_t)engine::EngineType::FAISS_IVFSQ8H, "IVFSQ8H"},
         {(int32_t)engine::EngineType::FAISS_PQ, "PQ"},
         {(int32_t)engine::EngineType::SPTAG_KDT, "KDT"},
diff --git a/core/src/db/engine/ExecutionEngine.h b/core/src/db/engine/ExecutionEngine.h
index e7739d4d53..56f829960d 100644
--- a/core/src/db/engine/ExecutionEngine.h
+++ b/core/src/db/engine/ExecutionEngine.h
@@ -35,7 +35,8 @@ enum class EngineType {
     FAISS_BIN_IDMAP,
     FAISS_BIN_IVFFLAT,
     HNSW,
-    MAX_VALUE = HNSW,
+    ANNOY,
+    MAX_VALUE = ANNOY,
 };
 
 enum class MetricType {
diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp
index 3864fe514c..9fcd8863de 100644
--- a/core/src/db/engine/ExecutionEngineImpl.cpp
+++ b/core/src/db/engine/ExecutionEngineImpl.cpp
@@ -216,6 +216,10 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) {
             index = vec_index_factory.CreateVecIndex(knowhere::IndexEnum::INDEX_HNSW, mode);
             break;
         }
+        case EngineType::ANNOY: {
+            index = vec_index_factory.CreateVecIndex(knowhere::IndexEnum::INDEX_ANNOY, mode);
+            break;
+        }
         default: {
             ENGINE_LOG_ERROR << "Unsupported index type " << (int)type;
             return nullptr;
diff --git a/core/src/index/knowhere/CMakeLists.txt b/core/src/index/knowhere/CMakeLists.txt
index 02add8ef97..b6aff8a894 100644
--- a/core/src/index/knowhere/CMakeLists.txt
+++ b/core/src/index/knowhere/CMakeLists.txt
@@ -50,6 +50,7 @@ set(index_srcs
         knowhere/index/vector_index/IndexSPTAG.cpp
         knowhere/index/vector_index/IndexType.cpp
         knowhere/index/vector_index/VecIndexFactory.cpp
+        knowhere/index/vector_index/IndexAnnoy.cpp
         )
 
 set(depend_libs
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp
index a2df3d1c78..0f69887715 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.cpp
@@ -297,5 +297,21 @@ BinIVFConfAdapter::CheckTrain(Config& oricfg, const IndexMode mode) {
     return true;
 }
 
+bool
+ANNOYConfAdapter::CheckTrain(Config& oricfg, const IndexMode mode) {
+    static int64_t MIN_NTREES = 0;
+    // too large of n_trees takes much time, if there is real requirement, change this threshold.
+    static int64_t MAX_NTREES = 16384;
+
+    CheckIntByRange(knowhere::IndexParams::n_trees, MIN_NTREES, MAX_NTREES);
+
+    return ConfAdapter::CheckTrain(oricfg, mode);
+}
+
+bool
+ANNOYConfAdapter::CheckSearch(Config& oricfg, const IndexType type, const IndexMode mode) {
+    return ConfAdapter::CheckSearch(oricfg, type, mode);
+}
+
 }  // namespace knowhere
 }  // namespace milvus
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.h b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.h
index a46da52cbf..2ff6ebe3d3 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapter.h
@@ -84,5 +84,14 @@ class HNSWConfAdapter : public ConfAdapter {
     CheckSearch(Config& oricfg, const IndexType type, const IndexMode mode) override;
 };
 
+class ANNOYConfAdapter : public ConfAdapter {
+ public:
+    bool
+    CheckTrain(Config& oricfg, const IndexMode mode) override;
+
+    bool
+    CheckSearch(Config& oricfg, const IndexType type, const IndexMode mode) override;
+};
+
 }  // namespace knowhere
 }  // namespace milvus
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp
index 91cd5d05be..2de15290e2 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.cpp
@@ -46,6 +46,7 @@ AdapterMgr::RegisterAdapter() {
     REGISTER_CONF_ADAPTER(ConfAdapter, IndexEnum::INDEX_SPTAG_KDT_RNT, sptag_kdt_adapter);
     REGISTER_CONF_ADAPTER(ConfAdapter, IndexEnum::INDEX_SPTAG_BKT_RNT, sptag_bkt_adapter);
     REGISTER_CONF_ADAPTER(HNSWConfAdapter, IndexEnum::INDEX_HNSW, hnsw_adapter);
+    REGISTER_CONF_ADAPTER(ANNOYConfAdapter, IndexEnum::INDEX_ANNOY, annoy_adapter);
 }
 
 }  // namespace knowhere
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp
new file mode 100644
index 0000000000..4adb9f62c5
--- /dev/null
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp
@@ -0,0 +1,172 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#include "knowhere/index/vector_index/IndexAnnoy.h"
+
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "hnswlib/hnswalg.h"
+#include "hnswlib/space_ip.h"
+#include "hnswlib/space_l2.h"
+#include "knowhere/common/Exception.h"
+#include "knowhere/common/Log.h"
+#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
+#include "knowhere/index/vector_index/helpers/FaissIO.h"
+
+namespace milvus {
+namespace knowhere {
+
+BinarySet
+IndexAnnoy::Serialize(const Config& config) {
+    if (!index_) {
+        KNOWHERE_THROW_MSG("index not initialize or trained");
+    }
+
+    BinarySet res_set;
+    auto metric_type_length = metric_type_.length();
+    uint8_t* p = new uint8_t[metric_type_length];
+    std::shared_ptr<uint8_t> metric_type(p, [](uint8_t* p) { delete[] p; });
+    memcpy(p, metric_type_.data(), metric_type_.length());
+
+    uint8_t* p_dim = new uint8_t[sizeof(uint64_t)];
+    std::shared_ptr<uint8_t> dim_data(p_dim, [](uint8_t* p_dim) { delete[] p_dim; });
+    auto dim = Dim();
+    memcpy(p_dim, &dim, sizeof(uint64_t));
+
+    auto index_length = index_->get_index_length();
+    uint8_t* q = new uint8_t[index_length];
+    std::shared_ptr<uint8_t> index_data(q, [](uint8_t* q) { delete[] q; });
+    memcpy(q, index_->get_index(), (size_t)index_length);
+
+    res_set.Append("annoy_metric_type", metric_type, metric_type_length);
+    res_set.Append("annoy_dim", dim_data, sizeof(uint64_t));
+    res_set.Append("annoy_index_data", index_data, index_length);
+    return res_set;
+}
+
+void
+IndexAnnoy::Load(const BinarySet& index_binary) {
+    auto metric_type = index_binary.GetByName("annoy_metric_type");
+    metric_type_.resize((size_t)metric_type->size + 1);
+    memcpy(metric_type_.data(), metric_type->data.get(), (size_t)metric_type->size);
+
+    auto dim_data = index_binary.GetByName("annoy_dim");
+    uint64_t dim;
+    memcpy(&dim, dim_data->data.get(), (size_t)dim_data->size);
+
+    if (metric_type_ == Metric::L2) {
+        index_ = std::make_shared<AnnoyIndex<int64_t, float, ::Euclidean, ::Kiss64Random>>(dim);
+    } else if (metric_type_ == Metric::IP) {
+        index_ = std::make_shared<AnnoyIndex<int64_t, float, ::DotProduct, ::Kiss64Random>>(dim);
+    } else {
+        KNOWHERE_THROW_MSG("metric not supported " + metric_type_);
+    }
+
+    auto index_data = index_binary.GetByName("annoy_index_data");
+    char* p = nullptr;
+    if (!index_->load_index(index_data->data.get(), index_data->size, &p)) {
+        std::string error_msg(p);
+        free(p);
+        KNOWHERE_THROW_MSG(error_msg);
+    }
+}
+
+void
+IndexAnnoy::BuildAll(const DatasetPtr& dataset_ptr, const Config& config) {
+    if (index_) {
+        // it is builded all
+        return;
+    }
+
+    GETTENSORWITHIDS(dataset_ptr)
+
+    metric_type_ = config[Metric::TYPE];
+    if (metric_type_ == Metric::L2) {
+        index_ = std::make_shared<AnnoyIndex<int64_t, float, ::Euclidean, ::Kiss64Random>>(dim);
+    } else if (metric_type_ == Metric::IP) {
+        index_ = std::make_shared<AnnoyIndex<int64_t, float, ::DotProduct, ::Kiss64Random>>(dim);
+    } else {
+        KNOWHERE_THROW_MSG("metric not supported " + metric_type_);
+    }
+
+    for (int i = 0; i < rows; ++i) {
+        index_->add_item(p_ids[i], (const float*)p_data + dim * i);
+    }
+
+    index_->build(config[IndexParams::n_trees].get<int64_t>());
+}
+
+DatasetPtr
+IndexAnnoy::Query(const DatasetPtr& dataset_ptr, const Config& config) {
+    if (!index_) {
+        KNOWHERE_THROW_MSG("index not initialize or trained");
+    }
+
+    GETTENSOR(dataset_ptr)
+    auto k = config[meta::TOPK].get<int64_t>();
+    auto search_k = config[IndexParams::search_k].get<int64_t>();
+    auto all_num = rows * k;
+    auto p_id = (int64_t*)malloc(all_num * sizeof(int64_t));
+    auto p_dist = (float*)malloc(all_num * sizeof(float));
+    faiss::ConcurrentBitsetPtr blacklist = nullptr;
+    GetBlacklist(blacklist);
+
+#pragma omp parallel for
+    for (unsigned int i = 0; i < rows; ++i) {
+        std::vector<int64_t> result;
+        result.reserve(k);
+        std::vector<float> distances;
+        distances.reserve(k);
+        index_->get_nns_by_vector((const float*)p_data + i * dim, k, search_k, &result, &distances, blacklist);
+
+        memcpy(p_id + k * i, result.data(), k * sizeof(int64_t));
+        memcpy(p_dist + k * i, distances.data(), k * sizeof(float));
+    }
+
+    auto ret_ds = std::make_shared<Dataset>();
+    ret_ds->Set(meta::IDS, p_id);
+    ret_ds->Set(meta::DISTANCE, p_dist);
+    return ret_ds;
+}
+
+int64_t
+IndexAnnoy::Count() {
+    if (!index_) {
+        KNOWHERE_THROW_MSG("index not initialize");
+    }
+
+    return index_->get_n_items();
+}
+
+int64_t
+IndexAnnoy::Dim() {
+    if (!index_) {
+        KNOWHERE_THROW_MSG("index not initialize");
+    }
+
+    return index_->get_dim();
+}
+
+int64_t
+IndexAnnoy::IndexSize() {
+    if (index_size_ != -1) {
+        return index_size_;
+    }
+
+    return index_size_ = Dim() * Count() * sizeof(float);
+}
+}  // namespace knowhere
+}  // namespace milvus
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h
new file mode 100644
index 0000000000..cbef69c5a4
--- /dev/null
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexAnnoy.h
@@ -0,0 +1,74 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+
+#include "annoy/src/annoylib.h"
+#include "annoy/src/kissrandom.h"
+
+#include "knowhere/common/Exception.h"
+#include "knowhere/index/vector_index/VecIndex.h"
+
+namespace milvus {
+namespace knowhere {
+
+class IndexAnnoy : public VecIndex {
+ public:
+    IndexAnnoy() {
+        index_type_ = IndexEnum::INDEX_ANNOY;
+    }
+
+    BinarySet
+    Serialize(const Config& config = Config()) override;
+
+    void
+    Load(const BinarySet& index_binary) override;
+
+    void
+    BuildAll(const DatasetPtr& dataset_ptr, const Config& config) override;
+
+    void
+    Train(const DatasetPtr& dataset_ptr, const Config& config) override {
+        KNOWHERE_THROW_MSG("Annoy not support build item dynamically, please invoke BuildAll interface.");
+    }
+
+    void
+    Add(const DatasetPtr& dataset_ptr, const Config& config) override {
+        KNOWHERE_THROW_MSG("Annoy not support add item dynamically, please invoke BuildAll interface.");
+    }
+
+    void
+    AddWithoutIds(const DatasetPtr&, const Config&) override {
+        KNOWHERE_THROW_MSG("Incremental index is not supported");
+    }
+
+    DatasetPtr
+    Query(const DatasetPtr& dataset_ptr, const Config& config) override;
+
+    int64_t
+    Count() override;
+
+    int64_t
+    Dim() override;
+
+    int64_t
+    IndexSize() override;
+
+ private:
+    MetricType metric_type_;
+    std::shared_ptr<AnnoyIndexInterface<int64_t, float>> index_ = nullptr;
+};
+
+}  // namespace knowhere
+}  // namespace milvus
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexType.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexType.cpp
index 27ec3444d1..bb7aec4ed2 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexType.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexType.cpp
@@ -34,6 +34,7 @@ static std::unordered_map<int32_t, std::string> old_index_type_str_map = {
     {(int32_t)OldIndexType::SPTAG_KDT_RNT_CPU, IndexEnum::INDEX_SPTAG_KDT_RNT},
     {(int32_t)OldIndexType::SPTAG_BKT_RNT_CPU, IndexEnum::INDEX_SPTAG_BKT_RNT},
     {(int32_t)OldIndexType::HNSW, IndexEnum::INDEX_HNSW},
+    {(int32_t)OldIndexType::ANNOY, IndexEnum::INDEX_ANNOY},
     {(int32_t)OldIndexType::FAISS_BIN_IDMAP, IndexEnum::INDEX_FAISS_BIN_IDMAP},
     {(int32_t)OldIndexType::FAISS_BIN_IVFLAT_CPU, IndexEnum::INDEX_FAISS_BIN_IVFFLAT},
 };
@@ -49,6 +50,7 @@ static std::unordered_map<std::string, int32_t> str_old_index_type_map = {
     {IndexEnum::INDEX_SPTAG_KDT_RNT, (int32_t)OldIndexType::SPTAG_KDT_RNT_CPU},
     {IndexEnum::INDEX_SPTAG_BKT_RNT, (int32_t)OldIndexType::SPTAG_BKT_RNT_CPU},
     {IndexEnum::INDEX_HNSW, (int32_t)OldIndexType::HNSW},
+    {IndexEnum::INDEX_ANNOY, (int32_t)OldIndexType::ANNOY},
     {IndexEnum::INDEX_FAISS_BIN_IDMAP, (int32_t)OldIndexType::FAISS_BIN_IDMAP},
     {IndexEnum::INDEX_FAISS_BIN_IVFFLAT, (int32_t)OldIndexType::FAISS_BIN_IVFLAT_CPU},
 };
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexType.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexType.h
index 1f2b26866d..fcb1dbd430 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexType.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexType.h
@@ -34,6 +34,7 @@ enum class OldIndexType {
     FAISS_IVFPQ_MIX,
     SPTAG_BKT_RNT_CPU,
     HNSW,
+    ANNOY,
     FAISS_BIN_IDMAP = 100,
     FAISS_BIN_IVFLAT_CPU = 101,
 };
@@ -54,6 +55,7 @@ constexpr const char* INDEX_NSG = "NSG";
 constexpr const char* INDEX_SPTAG_KDT_RNT = "SPTAG_KDT_RNT";
 constexpr const char* INDEX_SPTAG_BKT_RNT = "SPTAG_BKT_RNT";
 constexpr const char* INDEX_HNSW = "HNSW";
+constexpr const char* INDEX_ANNOY = "ANNOY";
 }  // namespace IndexEnum
 
 enum class IndexMode { MODE_CPU = 0, MODE_GPU = 1 };
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp b/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp
index 5a512870ed..8e3119ecac 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/VecIndexFactory.cpp
@@ -13,6 +13,7 @@
 
 #include "knowhere/common/Exception.h"
 #include "knowhere/common/Log.h"
+#include "knowhere/index/vector_index/IndexAnnoy.h"
 #include "knowhere/index/vector_index/IndexBinaryIDMAP.h"
 #include "knowhere/index/vector_index/IndexBinaryIVF.h"
 #include "knowhere/index/vector_index/IndexHNSW.h"
@@ -78,6 +79,8 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
         return std::make_shared<knowhere::CPUSPTAGRNG>("BKT");
     } else if (type == IndexEnum::INDEX_HNSW) {
         return std::make_shared<knowhere::IndexHNSW>();
+    } else if (type == IndexEnum::INDEX_ANNOY) {
+        return std::make_shared<knowhere::IndexAnnoy>();
     } else {
         return nullptr;
     }
diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h
index b9f6b361fe..b37988d881 100644
--- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h
@@ -44,6 +44,10 @@ constexpr const char* candidate = "candidate_pool_size";
 constexpr const char* efConstruction = "efConstruction";
 constexpr const char* M = "M";
 constexpr const char* ef = "ef";
+
+// Annoy Params
+constexpr const char* n_trees = "n_trees";
+constexpr const char* search_k = "search_k";
 }  // namespace IndexParams
 
 namespace Metric {
diff --git a/core/src/index/thirdparty/annoy/LICENSE b/core/src/index/thirdparty/annoy/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/core/src/index/thirdparty/annoy/RELEASE.md b/core/src/index/thirdparty/annoy/RELEASE.md
new file mode 100644
index 0000000000..c3a1147ce9
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/RELEASE.md
@@ -0,0 +1,15 @@
+How to release
+--------------
+
+1. Make sure you're on master. `git checkout master && git fetch && git reset --hard origin/master`
+1. Update `setup.py` to the newest version, `git add setup.py && git commit -m "version 1.2.3"`
+1. `python setup.py sdist bdist_wheel`
+1. `git tag -a v1.2.3 -m "version 1.2.3"`
+1. `git push --tags origin master` to push the last version to Github
+1. Go to https://github.com/spotify/annoy/releases and click "Draft a new release"
+1. `twine upload dist/annoy-1.2.3*`
+
+TODO
+----
+
+* Wheel
diff --git a/core/src/index/thirdparty/annoy/examples/mmap_test.py b/core/src/index/thirdparty/annoy/examples/mmap_test.py
new file mode 100644
index 0000000000..4f86e86713
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/examples/mmap_test.py
@@ -0,0 +1,14 @@
+from annoy import AnnoyIndex
+
+a = AnnoyIndex(3, 'angular')
+a.add_item(0, [1, 0, 0])
+a.add_item(1, [0, 1, 0])
+a.add_item(2, [0, 0, 1])
+a.build(-1)
+a.save('test.tree')
+
+b = AnnoyIndex(3)
+b.load('test.tree')
+
+print(b.get_nns_by_item(0, 100))
+print(b.get_nns_by_vector([1.0, 0.5, 0.5], 100))
diff --git a/core/src/index/thirdparty/annoy/examples/precision_test.cpp b/core/src/index/thirdparty/annoy/examples/precision_test.cpp
new file mode 100644
index 0000000000..2c006487c9
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/examples/precision_test.cpp
@@ -0,0 +1,176 @@
+/*
+ * precision_test.cpp
+
+ *
+ *  Created on: Jul 13, 2016
+ *      Author: Claudio Sanhueza
+ *      Contact: csanhuezalobos@gmail.com
+ */
+
+#include <iostream>
+#include <iomanip>
+#include "../src/kissrandom.h"
+#include "../src/annoylib.h"
+#include <chrono>
+#include <algorithm>
+#include <map>
+#include <random>
+
+
+int precision(int f=40, int n=1000000){
+	std::chrono::high_resolution_clock::time_point t_start, t_end;
+
+	std::default_random_engine generator;
+	std::normal_distribution<double> distribution(0.0, 1.0);
+
+	//******************************************************
+	//Building the tree
+	AnnoyIndex<int, double, Angular, Kiss32Random> t = AnnoyIndex<int, double, Angular, Kiss32Random>(f);
+
+	std::cout << "Building index ... be patient !!" << std::endl;
+	std::cout << "\"Trees that are slow to grow bear the best fruit\" (Moliere)" << std::endl;
+
+
+
+	for(int i=0; i<n; ++i){
+		double *vec = (double *) malloc( f * sizeof(double) );
+
+		for(int z=0; z<f; ++z){
+			vec[z] = (distribution(generator));
+		}
+
+		t.add_item(i, vec);
+
+		std::cout << "Loading objects ...\t object: "<< i+1 << "\tProgress:"<< std::fixed << std::setprecision(2) << (double) i / (double)(n + 1) * 100 << "%\r";
+
+	}
+	std::cout << std::endl;
+	std::cout << "Building index num_trees = 2 * num_features ...";
+	t_start = std::chrono::high_resolution_clock::now();
+	t.build(2 * f);
+	t_end = std::chrono::high_resolution_clock::now();
+	auto duration = std::chrono::duration_cast<std::chrono::seconds>( t_end - t_start ).count();
+	std::cout << " Done in "<< duration << " secs." << std::endl;
+
+
+	std::cout << "Saving index ...";
+	t.save("precision.tree");
+	std::cout << " Done" << std::endl;
+
+
+
+	//******************************************************
+	std::vector<int> limits = {10, 100, 1000, 10000};
+	int K=10;
+	int prec_n = 1000;
+
+	std::map<int, double> prec_sum;
+	std::map<int, double> time_sum;
+	std::vector<int> closest;
+
+	//init precision and timers map
+	for(std::vector<int>::iterator it = limits.begin(); it!=limits.end(); ++it){
+		prec_sum[(*it)] = 0.0;
+		time_sum[(*it)] = 0.0;
+	}
+
+	// doing the work
+	for(int i=0; i<prec_n; ++i){
+
+		//select a random node
+		int j = rand() % n;
+
+		std::cout << "finding nbs for " << j << std::endl;
+
+		// getting the K closest
+		t.get_nns_by_item(j, K, n, &closest, nullptr);
+
+		std::vector<int> toplist;
+		std::vector<int> intersection;
+
+		for(std::vector<int>::iterator limit = limits.begin(); limit!=limits.end(); ++limit){
+
+			t_start = std::chrono::high_resolution_clock::now();
+			t.get_nns_by_item(j, (*limit), (size_t) -1, &toplist, nullptr); //search_k defaults to "n_trees * n" if not provided.
+			t_end = std::chrono::high_resolution_clock::now();
+			auto duration = std::chrono::duration_cast<std::chrono::milliseconds>( t_end - t_start ).count();
+
+			//intersecting results
+			std::sort(closest.begin(), closest.end(), std::less<int>());
+			std::sort(toplist.begin(), toplist.end(), std::less<int>());
+			intersection.resize(std::max(closest.size(), toplist.size()));
+			std::vector<int>::iterator it_set = std::set_intersection(closest.begin(), closest.end(), toplist.begin(), toplist.end(), intersection.begin());
+			intersection.resize(it_set-intersection.begin());
+
+			// storing metrics
+			int found = intersection.size();
+			double hitrate = found / (double) K;
+			prec_sum[(*limit)] += hitrate;
+
+			time_sum[(*limit)] += duration;
+
+
+			//deallocate memory
+			vector<int>().swap(intersection);
+			vector<int>().swap(toplist);
+		}
+
+		//print resulting metrics
+		for(std::vector<int>::iterator limit = limits.begin(); limit!=limits.end(); ++limit){
+			std::cout << "limit: " << (*limit) << "\tprecision: "<< std::fixed << std::setprecision(2) << (100.0 * prec_sum[(*limit)] / (i + 1)) << "% \tavg. time: "<< std::fixed<< std::setprecision(6) << (time_sum[(*limit)] / (i + 1)) * 1e-04 << "s" << std::endl;
+		}
+
+		closest.clear(); vector<int>().swap(closest);
+
+	}
+
+	std::cout << "\nDone" << std::endl;
+	return 0;
+}
+
+
+void help(){
+	std::cout << "Annoy Precision C++ example" << std::endl;
+	std::cout << "Usage:" << std::endl;
+	std::cout << "(default)		./precision" << std::endl;
+	std::cout << "(using parameters)	./precision num_features num_nodes" << std::endl;
+	std::cout << std::endl;
+}
+
+void feedback(int f, int n){
+	std::cout<<"Runing precision example with:" << std::endl;
+	std::cout<<"num. features: "<< f << std::endl;
+	std::cout<<"num. nodes: "<< n << std::endl;
+	std::cout << std::endl;
+}
+
+
+int main(int argc, char **argv) {
+	int f, n;
+
+
+	if(argc == 1){
+		f = 40;
+		n = 1000000;
+
+		feedback(f,n);
+
+		precision(40, 1000000);
+	}
+	else if(argc == 3){
+
+		f = atoi(argv[1]);
+		n = atoi(argv[2]);
+
+		feedback(f,n);
+
+		precision(f, n);
+	}
+	else {
+		help();
+		return EXIT_FAILURE;
+	}
+
+
+	return EXIT_SUCCESS;
+}
diff --git a/core/src/index/thirdparty/annoy/examples/precision_test.py b/core/src/index/thirdparty/annoy/examples/precision_test.py
new file mode 100644
index 0000000000..d179e6b9ba
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/examples/precision_test.py
@@ -0,0 +1,46 @@
+from __future__ import print_function
+import random, time
+from annoy import AnnoyIndex
+
+try:
+    xrange
+except NameError:
+    # Python 3 compat
+    xrange = range
+
+n, f = 100000, 40
+
+t = AnnoyIndex(f, 'angular')
+for i in xrange(n):
+    v = []
+    for z in xrange(f):
+        v.append(random.gauss(0, 1))
+    t.add_item(i, v)
+
+t.build(2 * f)
+t.save('test.tree')
+
+limits = [10, 100, 1000, 10000]
+k = 10
+prec_sum = {}
+prec_n = 1000
+time_sum = {}
+
+for i in xrange(prec_n):
+    j = random.randrange(0, n)
+        
+    closest = set(t.get_nns_by_item(j, k, n))
+    for limit in limits:
+        t0 = time.time()
+        toplist = t.get_nns_by_item(j, k, limit)
+        T = time.time() - t0
+            
+        found = len(closest.intersection(toplist))
+        hitrate = 1.0 * found / k
+        prec_sum[limit] = prec_sum.get(limit, 0.0) + hitrate
+        time_sum[limit] = time_sum.get(limit, 0.0) + T
+
+for limit in limits:
+    print('limit: %-9d precision: %6.2f%% avg time: %.6fs'
+          % (limit, 100.0 * prec_sum[limit] / (i + 1),
+             time_sum[limit] / (i + 1)))
diff --git a/core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh b/core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh
new file mode 100755
index 0000000000..687a6082b2
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+
+echo "compiling precision example..."
+cmd="g++ precision_test.cpp -o precision_test -std=c++11"
+eval $cmd
+echo "Done"
diff --git a/core/src/index/thirdparty/annoy/examples/simple_test.py b/core/src/index/thirdparty/annoy/examples/simple_test.py
new file mode 100644
index 0000000000..27e0343a26
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/examples/simple_test.py
@@ -0,0 +1,10 @@
+from annoy import AnnoyIndex
+
+a = AnnoyIndex(3, 'angular')
+a.add_item(0, [1, 0, 0])
+a.add_item(1, [0, 1, 0])
+a.add_item(2, [0, 0, 1])
+a.build(-1)
+
+print(a.get_nns_by_item(0, 100))
+print(a.get_nns_by_vector([1.0, 0.5, 0.5], 100))
diff --git a/core/src/index/thirdparty/annoy/src/annoygomodule.h b/core/src/index/thirdparty/annoy/src/annoygomodule.h
new file mode 100644
index 0000000000..005ed06558
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/src/annoygomodule.h
@@ -0,0 +1,92 @@
+#include "annoylib.h"
+#include "kissrandom.h"
+
+namespace GoAnnoy {
+
+class AnnoyIndex {
+ protected:
+  ::AnnoyIndexInterface<int32_t, float> *ptr;
+
+  int f;
+
+ public:
+  ~AnnoyIndex() {
+    delete ptr;
+  };
+  void addItem(int item, const float* w) {
+    ptr->add_item(item, w);
+  };
+  void build(int q) {
+    ptr->build(q);
+  };
+  bool save(const char* filename, bool prefault) {
+    return ptr->save(filename, prefault);
+  };
+  bool save(const char* filename) {
+    return ptr->save(filename, true);
+  };
+  void unload() {
+    ptr->unload();
+  };
+  bool load(const char* filename, bool prefault) {
+    return ptr->load(filename, prefault);
+  };
+  bool load(const char* filename) {
+    return ptr->load(filename, true);
+  };
+  float getDistance(int i, int j) {
+    return ptr->get_distance(i, j);
+  };
+  void getNnsByItem(int item, int n, int search_k, vector<int32_t>* result, vector<float>* distances) {
+    ptr->get_nns_by_item(item, n, search_k, result, distances);
+  };
+  void getNnsByVector(const float* w, int n, int search_k, vector<int32_t>* result, vector<float>* distances) {
+    ptr->get_nns_by_vector(w, n, search_k, result, distances);
+  };
+  void getNnsByItem(int item, int n, int search_k, vector<int32_t>* result) {
+    ptr->get_nns_by_item(item, n, search_k, result, NULL);
+  };
+  void getNnsByVector(const float* w, int n, int search_k, vector<int32_t>* result) {
+    ptr->get_nns_by_vector(w, n, search_k, result, NULL);
+  };
+
+  int getNItems() {
+    return (int)ptr->get_n_items();
+  };
+  void verbose(bool v) {
+    ptr->verbose(v);
+  };
+  void getItem(int item, vector<float> *v) {
+    v->resize(this->f);
+    ptr->get_item(item, &v->front());
+  };
+  bool onDiskBuild(const char* filename) {
+    return ptr->on_disk_build(filename);
+  };
+};
+
+class AnnoyIndexAngular : public AnnoyIndex 
+{
+ public:
+  AnnoyIndexAngular(int f) {
+    ptr = new ::AnnoyIndex<int32_t, float, ::Angular, ::Kiss64Random>(f);
+    this->f = f;
+  }
+};
+
+class AnnoyIndexEuclidean : public AnnoyIndex {
+ public:
+  AnnoyIndexEuclidean(int f) {
+    ptr = new ::AnnoyIndex<int32_t, float, ::Euclidean, ::Kiss64Random>(f);
+    this->f = f;
+  }
+};
+
+class AnnoyIndexManhattan : public AnnoyIndex {
+ public:
+  AnnoyIndexManhattan(int f) {
+    ptr = new ::AnnoyIndex<int32_t, float, ::Manhattan, ::Kiss64Random>(f);
+    this->f = f;
+  }
+};
+}
diff --git a/core/src/index/thirdparty/annoy/src/annoygomodule.i b/core/src/index/thirdparty/annoy/src/annoygomodule.i
new file mode 100644
index 0000000000..9882cbeb2c
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/src/annoygomodule.i
@@ -0,0 +1,96 @@
+%module annoyindex
+
+%{
+#include "annoygomodule.h"
+%}
+
+
+// const float *
+%typemap(gotype) (const float *)  "[]float32"
+
+%typemap(in) (const float *)
+%{
+    float *v;
+    vector<float> w;
+    v = (float *)$input.array;
+    for (int i = 0; i < $input.len; i++) {
+       w.push_back(v[i]);
+    }
+    $1 = &w[0];
+%}
+
+// vector<int32_t> *
+%typemap(gotype) (vector<int32_t> *)  "*[]int"
+
+%typemap(in) (vector<int32_t> *)
+%{
+  $1 = new vector<int32_t>();
+%}
+
+%typemap(freearg) (vector<int32_t> *)
+%{
+  delete $1;
+%}
+
+%typemap(argout) (vector<int32_t> *)
+%{
+  {
+    $input->len = $1->size();
+    $input->cap = $1->size();
+    $input->array = malloc($input->len * sizeof(intgo));
+    for (int i = 0; i < $1->size(); i++) {
+        ((intgo *)$input->array)[i] = (intgo)(*$1)[i];
+    }
+  }
+%}
+
+
+// vector<float> *
+%typemap(gotype) (vector<float> *)  "*[]float32"
+
+%typemap(in) (vector<float> *)
+%{
+  $1 = new vector<float>();
+%}
+
+%typemap(freearg) (vector<float> *)
+%{
+  delete $1;
+%}
+
+%typemap(argout) (vector<float> *)
+%{
+  {
+    $input->len = $1->size();
+    $input->cap = $1->size();
+    $input->array = malloc($input->len * sizeof(float));
+    for (int i = 0; i < $1->size(); i++) {
+        ((float *)$input->array)[i] = (float)(*$1)[i];
+    }
+  }
+%}
+
+
+%typemap(gotype) (const char *) "string"
+
+%typemap(in) (const char *)
+%{
+  $1 = (char *)calloc((((_gostring_)$input).n + 1), sizeof(char));
+  strncpy($1, (((_gostring_)$input).p), ((_gostring_)$input).n);
+%}
+
+%typemap(freearg) (const char *)
+%{
+  free($1);
+%}
+
+
+/* Let's just grab the original header file here */
+%include "annoygomodule.h"
+
+%feature("notabstract") GoAnnoyIndexAngular;
+%feature("notabstract") GoAnnoyIndexEuclidean;
+%feature("notabstract") GoAnnoyIndexManhattan;
+
+
+
diff --git a/core/src/index/thirdparty/annoy/src/annoylib.h b/core/src/index/thirdparty/annoy/src/annoylib.h
new file mode 100644
index 0000000000..eebfa78d62
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/src/annoylib.h
@@ -0,0 +1,1377 @@
+// Copyright (c) 2013 Spotify AB
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+
+#ifndef ANNOYLIB_H
+#define ANNOYLIB_H
+
+#include <stdio.h>
+#include <sys/stat.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stddef.h>
+
+#if defined(_MSC_VER) && _MSC_VER == 1500
+typedef unsigned char     uint8_t;
+typedef signed __int32    int32_t;
+typedef unsigned __int64  uint64_t;
+typedef signed __int64    int64_t;
+#else
+#include <stdint.h>
+#endif
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+ // a bit hacky, but override some definitions to support 64 bit
+ #define off_t int64_t
+ #define lseek_getsize(fd) _lseeki64(fd, 0, SEEK_END)
+ #ifndef NOMINMAX
+  #define NOMINMAX
+ #endif
+ #include "mman.h"
+ #include <windows.h>
+#else
+ #include <sys/mman.h>
+ #define lseek_getsize(fd) lseek(fd, 0, SEEK_END)
+#endif
+
+#include <cerrno>
+#include <string.h>
+#include <math.h>
+#include <vector>
+#include <algorithm>
+#include <queue>
+#include <limits>
+
+#ifdef _MSC_VER
+// Needed for Visual Studio to disable runtime checks for mempcy
+#pragma runtime_checks("s", off)
+#endif
+
+// This allows others to supply their own logger / error printer without
+// requiring Annoy to import their headers. See RcppAnnoy for a use case.
+#ifndef __ERROR_PRINTER_OVERRIDE__
+  #define showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); }
+#else
+  #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
+#endif
+
+// Portable alloc definition, cf Writing R Extensions, Section 1.6.4
+#ifdef __GNUC__
+  // Includes GCC, clang and Intel compilers
+  # undef alloca
+  # define alloca(x) __builtin_alloca((x))
+#elif defined(__sun) || defined(_AIX)
+  // this is necessary (and sufficient) for Solaris 10 and AIX 6:
+  # include <alloca.h>
+#endif
+
+inline void set_error_from_errno(char **error, const char* msg) {
+  showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
+  if (error) {
+    *error = (char *)malloc(256);  // TODO: win doesn't support snprintf
+    sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
+  }
+}
+
+inline void set_error_from_string(char **error, const char* msg) {
+  showUpdate("%s\n", msg);
+  if (error) {
+    *error = (char *)malloc(strlen(msg) + 1);
+    strcpy(*error, msg);
+  }
+}
+
+// We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number.
+// Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low.
+#define V_ARRAY_SIZE 65536
+
+#ifndef _MSC_VER
+#define popcount __builtin_popcountll
+#else // See #293, #358
+#define isnan(x) _isnan(x)
+#define popcount cole_popcount
+#endif
+
+#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__)  // See #402
+#define USE_AVX512
+#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__)
+#define USE_AVX
+#else
+#endif
+
+#if defined(USE_AVX) || defined(USE_AVX512)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <x86intrin.h>
+#include <src/index/thirdparty/faiss/utils/ConcurrentBitset.h>
+
+#endif
+#endif
+
+
+using std::vector;
+using std::pair;
+using std::numeric_limits;
+using std::make_pair;
+
+inline void* remap_memory(void* _ptr, int _fd, size_t old_size, size_t new_size) {
+#ifdef __linux__
+  _ptr = mremap(_ptr, old_size, new_size, MREMAP_MAYMOVE);
+#else
+  munmap(_ptr, old_size);
+#ifdef MAP_POPULATE
+  _ptr = mmap(_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
+#else
+  _ptr = mmap(_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0);
+#endif
+#endif
+  return _ptr;
+}
+
+namespace {
+
+template<typename S, typename Node>
+inline Node* get_node_ptr(const void* _nodes, const size_t _s, const S i) {
+  return (Node*)((uint8_t *)_nodes + (_s * i));
+}
+
+template<typename T>
+inline T dot(const T* x, const T* y, int f) {
+  T s = 0;
+  for (int z = 0; z < f; z++) {
+    s += (*x) * (*y);
+    x++;
+    y++;
+  }
+  return s;
+}
+
+template<typename T>
+inline T manhattan_distance(const T* x, const T* y, int f) {
+  T d = 0.0;
+  for (int i = 0; i < f; i++)
+    d += fabs(x[i] - y[i]);
+  return d;
+}
+
+template<typename T>
+inline T euclidean_distance(const T* x, const T* y, int f) {
+  // Don't use dot-product: avoid catastrophic cancellation in #314.
+  T d = 0.0;
+  for (int i = 0; i < f; ++i) {
+    const T tmp=*x - *y;
+    d += tmp * tmp;
+    ++x;
+    ++y;
+  }
+  return d;
+}
+
+#ifdef USE_AVX
+// Horizontal single sum of 256bit vector.
+inline float hsum256_ps_avx(__m256 v) {
+  const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v));
+  const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128));
+  const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
+  return _mm_cvtss_f32(x32);
+}
+
+template<>
+inline float dot<float>(const float* x, const float *y, int f) {
+  float result = 0;
+  if (f > 7) {
+    __m256 d = _mm256_setzero_ps();
+    for (; f > 7; f -= 8) {
+      d = _mm256_add_ps(d, _mm256_mul_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y)));
+      x += 8;
+      y += 8;
+    }
+    // Sum all floats in dot register.
+    result += hsum256_ps_avx(d);
+  }
+  // Don't forget the remaining values.
+  for (; f > 0; f--) {
+    result += *x * *y;
+    x++;
+    y++;
+  }
+  return result;
+}
+
+template<>
+inline float manhattan_distance<float>(const float* x, const float* y, int f) {
+  float result = 0;
+  int i = f;
+  if (f > 7) {
+    __m256 manhattan = _mm256_setzero_ps();
+    __m256 minus_zero = _mm256_set1_ps(-0.0f);
+    for (; i > 7; i -= 8) {
+      const __m256 x_minus_y = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y));
+      const __m256 distance = _mm256_andnot_ps(minus_zero, x_minus_y); // Absolute value of x_minus_y (forces sign bit to zero)
+      manhattan = _mm256_add_ps(manhattan, distance);
+      x += 8;
+      y += 8;
+    }
+    // Sum all floats in manhattan register.
+    result = hsum256_ps_avx(manhattan);
+  }
+  // Don't forget the remaining values.
+  for (; i > 0; i--) {
+    result += fabsf(*x - *y);
+    x++;
+    y++;
+  }
+  return result;
+}
+
+template<>
+inline float euclidean_distance<float>(const float* x, const float* y, int f) {
+  float result=0;
+  if (f > 7) {
+    __m256 d = _mm256_setzero_ps();
+    for (; f > 7; f -= 8) {
+      const __m256 diff = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y));
+      d = _mm256_add_ps(d, _mm256_mul_ps(diff, diff)); // no support for fmadd in AVX...
+      x += 8;
+      y += 8;
+    }
+    // Sum all floats in dot register.
+    result = hsum256_ps_avx(d);
+  }
+  // Don't forget the remaining values.
+  for (; f > 0; f--) {
+    float tmp = *x - *y;
+    result += tmp * tmp;
+    x++;
+    y++;
+  }
+  return result;
+}
+
+#endif
+
+#ifdef USE_AVX512
+template<>
+inline float dot<float>(const float* x, const float *y, int f) {
+  float result = 0;
+  if (f > 15) {
+    __m512 d = _mm512_setzero_ps();
+    for (; f > 15; f -= 16) {
+      //AVX512F includes FMA
+      d = _mm512_fmadd_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y), d);
+      x += 16;
+      y += 16;
+    }
+    // Sum all floats in dot register.
+    result += _mm512_reduce_add_ps(d);
+  }
+  // Don't forget the remaining values.
+  for (; f > 0; f--) {
+    result += *x * *y;
+    x++;
+    y++;
+  }
+  return result;
+}
+
+template<>
+inline float manhattan_distance<float>(const float* x, const float* y, int f) {
+  float result = 0;
+  int i = f;
+  if (f > 15) {
+    __m512 manhattan = _mm512_setzero_ps();
+    for (; i > 15; i -= 16) {
+      const __m512 x_minus_y = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y));
+      manhattan = _mm512_add_ps(manhattan, _mm512_abs_ps(x_minus_y));
+      x += 16;
+      y += 16;
+    }
+    // Sum all floats in manhattan register.
+    result = _mm512_reduce_add_ps(manhattan);
+  }
+  // Don't forget the remaining values.
+  for (; i > 0; i--) {
+    result += fabsf(*x - *y);
+    x++;
+    y++;
+  }
+  return result;
+}
+
+template<>
+inline float euclidean_distance<float>(const float* x, const float* y, int f) {
+  float result=0;
+  if (f > 15) {
+    __m512 d = _mm512_setzero_ps();
+    for (; f > 15; f -= 16) {
+      const __m512 diff = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y));
+      d = _mm512_fmadd_ps(diff, diff, d);
+      x += 16;
+      y += 16;
+    }
+    // Sum all floats in dot register.
+    result = _mm512_reduce_add_ps(d);
+  }
+  // Don't forget the remaining values.
+  for (; f > 0; f--) {
+    float tmp = *x - *y;
+    result += tmp * tmp;
+    x++;
+    y++;
+  }
+  return result;
+}
+
+#endif
+
+ 
+template<typename T>
+inline T get_norm(T* v, int f) {
+  return sqrt(dot(v, v, f));
+}
+
+template<typename T, typename Random, typename Distance, typename Node>
+inline void two_means(const vector<Node*>& nodes, int f, Random& random, bool cosine, Node* p, Node* q) {
+  /*
+    This algorithm is a huge heuristic. Empirically it works really well, but I
+    can't motivate it well. The basic idea is to keep two centroids and assign
+    points to either one of them. We weight each centroid by the number of points
+    assigned to it, so to balance it. 
+  */
+  static int iteration_steps = 200;
+  size_t count = nodes.size();
+
+  size_t i = random.index(count);
+  size_t j = random.index(count-1);
+  j += (j >= i); // ensure that i != j
+
+  Distance::template copy_node<T, Node>(p, nodes[i], f);
+  Distance::template copy_node<T, Node>(q, nodes[j], f);
+
+  if (cosine) { Distance::template normalize<T, Node>(p, f); Distance::template normalize<T, Node>(q, f); }
+  Distance::init_node(p, f);
+  Distance::init_node(q, f);
+
+  int ic = 1, jc = 1;
+  for (int l = 0; l < iteration_steps; l++) {
+    size_t k = random.index(count);
+    T di = ic * Distance::distance(p, nodes[k], f),
+      dj = jc * Distance::distance(q, nodes[k], f);
+    T norm = cosine ? get_norm(nodes[k]->v, f) : 1;
+    if (!(norm > T(0))) {
+      continue;
+    }
+    if (di < dj) {
+      for (int z = 0; z < f; z++)
+        p->v[z] = (p->v[z] * ic + nodes[k]->v[z] / norm) / (ic + 1);
+      Distance::init_node(p, f);
+      ic++;
+    } else if (dj < di) {
+      for (int z = 0; z < f; z++)
+        q->v[z] = (q->v[z] * jc + nodes[k]->v[z] / norm) / (jc + 1);
+      Distance::init_node(q, f);
+      jc++;
+    }
+  }
+}
+} // namespace
+
+struct Base {
+  template<typename T, typename S, typename Node>
+  static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) {
+    // Override this in specific metric structs below if you need to do any pre-processing
+    // on the entire set of nodes passed into this index.
+  }
+
+  template<typename Node>
+  static inline void zero_value(Node* dest) {
+    // Initialize any fields that require sane defaults within this node.
+  }
+
+  template<typename T, typename Node>
+  static inline void copy_node(Node* dest, const Node* source, const int f) {
+    memcpy(dest->v, source->v, f * sizeof(T));
+  }
+
+  template<typename T, typename Node>
+  static inline void normalize(Node* node, int f) {
+    T norm = get_norm(node->v, f);
+    if (norm > 0) {
+      for (int z = 0; z < f; z++)
+        node->v[z] /= norm;
+    }
+  }
+};
+
+struct Angular : Base {
+  template<typename S, typename T>
+  struct Node {
+    /*
+     * We store a binary tree where each node has two things
+     * - A vector associated with it
+     * - Two children
+     * All nodes occupy the same amount of memory
+     * All nodes with n_descendants == 1 are leaf nodes.
+     * A memory optimization is that for nodes with 2 <= n_descendants <= K,
+     * we skip the vector. Instead we store a list of all descendants. K is
+     * determined by the number of items that fits in the space of the vector.
+     * For nodes with n_descendants == 1 the vector is a data point.
+     * For nodes with n_descendants > K the vector is the normal of the split plane.
+     * Note that we can't really do sizeof(node<T>) because we cheat and allocate
+     * more memory to be able to fit the vector outside
+     */
+    S n_descendants;
+    union {
+      S children[2]; // Will possibly store more than 2
+      T norm;
+    };
+    T v[V_ARRAY_SIZE];
+  };
+  template<typename S, typename T>
+  static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
+    // want to calculate (a/|a| - b/|b|)^2
+    // = a^2 / a^2 + b^2 / b^2 - 2ab/|a||b|
+    // = 2 - 2cos
+    T pp = x->norm ? x->norm : dot(x->v, x->v, f); // For backwards compatibility reasons, we need to fall back and compute the norm here
+    T qq = y->norm ? y->norm : dot(y->v, y->v, f);
+    T pq = dot(x->v, y->v, f);
+    T ppqq = pp * qq;
+    if (ppqq > 0) return 2.0 - 2.0 * pq / sqrt(ppqq);
+    else return 2.0; // cos is 0
+  }
+  template<typename S, typename T>
+  static inline T margin(const Node<S, T>* n, const T* y, int f) {
+    return dot(n->v, y, f);
+  }
+  template<typename S, typename T, typename Random>
+  static inline bool side(const Node<S, T>* n, const T* y, int f, Random& random) {
+    T dot = margin(n, y, f);
+    if (dot != 0)
+      return (dot > 0);
+    else
+      return (bool)random.flip();
+  }
+  template<typename S, typename T, typename Random>
+  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
+    two_means<T, Random, Angular, Node<S, T> >(nodes, f, random, true, p, q);
+    for (int z = 0; z < f; z++)
+      n->v[z] = p->v[z] - q->v[z];
+    Base::normalize<T, Node<S, T> >(n, f);
+  }
+  template<typename T>
+  static inline T normalized_distance(T distance) {
+    // Used when requesting distances from Python layer
+    // Turns out sometimes the squared distance is -0.0
+    // so we have to make sure it's a positive number.
+    return sqrt(std::max(distance, T(0)));
+  }
+  template<typename T>
+  static inline T pq_distance(T distance, T margin, int child_nr) {
+    if (child_nr == 0)
+      margin = -margin;
+    return std::min(distance, margin);
+  }
+  template<typename T>
+  static inline T pq_initial_value() {
+    return numeric_limits<T>::infinity();
+  }
+  template<typename S, typename T>
+  static inline void init_node(Node<S, T>* n, int f) {
+    n->norm = dot(n->v, n->v, f);
+  }
+  static const char* name() {
+    return "angular";
+  }
+};
+
+
+struct DotProduct : Angular {
+  template<typename S, typename T>
+  struct Node {
+    /*
+     * This is an extension of the Angular node with an extra attribute for the scaled norm.
+     */
+    S n_descendants;
+    S children[2]; // Will possibly store more than 2
+    T dot_factor;
+    T v[V_ARRAY_SIZE];
+  };
+
+  static const char* name() {
+    return "dot";
+  }
+  template<typename S, typename T>
+  static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
+    return -dot(x->v, y->v, f);
+  }
+
+  template<typename Node>
+  static inline void zero_value(Node* dest) {
+    dest->dot_factor = 0;
+  }
+
+  template<typename S, typename T>
+  static inline void init_node(Node<S, T>* n, int f) {
+  }
+
+  template<typename T, typename Node>
+  static inline void copy_node(Node* dest, const Node* source, const int f) {
+    memcpy(dest->v, source->v, f * sizeof(T));
+    dest->dot_factor = source->dot_factor;
+  }
+
+  template<typename S, typename T, typename Random>
+  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
+    DotProduct::zero_value(p); 
+    DotProduct::zero_value(q);
+    two_means<T, Random, DotProduct, Node<S, T> >(nodes, f, random, true, p, q);
+    for (int z = 0; z < f; z++)
+      n->v[z] = p->v[z] - q->v[z];
+    n->dot_factor = p->dot_factor - q->dot_factor;
+    DotProduct::normalize<T, Node<S, T> >(n, f);
+  }
+
+  template<typename T, typename Node>
+  static inline void normalize(Node* node, int f) {
+    T norm = sqrt(dot(node->v, node->v, f) + pow(node->dot_factor, 2));
+    if (norm > 0) {
+      for (int z = 0; z < f; z++)
+        node->v[z] /= norm;
+      node->dot_factor /= norm;
+    }
+  }
+
+  template<typename S, typename T>
+  static inline T margin(const Node<S, T>* n, const T* y, int f) {
+    return dot(n->v, y, f) + (n->dot_factor * n->dot_factor);
+  }
+
+  template<typename S, typename T, typename Random>
+  static inline bool side(const Node<S, T>* n, const T* y, int f, Random& random) {
+    T dot = margin(n, y, f);
+    if (dot != 0)
+      return (dot > 0);
+    else
+      return (bool)random.flip();
+  }
+
+  template<typename T>
+  static inline T normalized_distance(T distance) {
+    return -distance;
+  }
+
+  template<typename T, typename S, typename Node>
+  static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) {
+    // This uses a method from Microsoft Research for transforming inner product spaces to cosine/angular-compatible spaces.
+    // (Bachrach et al., 2014, see https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf)
+
+    // Step one: compute the norm of each vector and store that in its extra dimension (f-1)
+    for (S i = 0; i < node_count; i++) {
+      Node* node = get_node_ptr<S, Node>(nodes, _s, i);
+      T norm = sqrt(dot(node->v, node->v, f));
+      if (isnan(norm)) norm = 0;
+      node->dot_factor = norm;
+    }
+
+    // Step two: find the maximum norm
+    T max_norm = 0;
+    for (S i = 0; i < node_count; i++) {
+      Node* node = get_node_ptr<S, Node>(nodes, _s, i);
+      if (node->dot_factor > max_norm) {
+        max_norm = node->dot_factor;
+      }
+    }
+
+    // Step three: set each vector's extra dimension to sqrt(max_norm^2 - norm^2)
+    for (S i = 0; i < node_count; i++) {
+      Node* node = get_node_ptr<S, Node>(nodes, _s, i);
+      T node_norm = node->dot_factor;
+
+      T dot_factor = sqrt(pow(max_norm, static_cast<T>(2.0)) - pow(node_norm, static_cast<T>(2.0)));
+      if (isnan(dot_factor)) dot_factor = 0;
+
+      node->dot_factor = dot_factor;
+    }
+  }
+};
+
+struct Hamming : Base {
+  template<typename S, typename T>
+  struct Node {
+    S n_descendants;
+    S children[2];
+    T v[V_ARRAY_SIZE];
+  };
+
+  static const size_t max_iterations = 20;
+
+  template<typename T>
+  static inline T pq_distance(T distance, T margin, int child_nr) {
+    return distance - (margin != (unsigned int) child_nr);
+  }
+
+  template<typename T>
+  static inline T pq_initial_value() {
+    return numeric_limits<T>::max();
+  }
+  template<typename T>
+  static inline int cole_popcount(T v) {
+    // Note: Only used with MSVC 9, which lacks intrinsics and fails to
+    // calculate std::bitset::count for v > 32bit. Uses the generalized
+    // approach by Eric Cole.
+    // See https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
+    v = v - ((v >> 1) & (T)~(T)0/3);
+    v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);
+    v = (v + (v >> 4)) & (T)~(T)0/255*15;
+    return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
+  }
+  template<typename S, typename T>
+  static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
+    size_t dist = 0;
+    for (int i = 0; i < f; i++) {
+      dist += popcount(x->v[i] ^ y->v[i]);
+    }
+    return dist;
+  }
+  template<typename S, typename T>
+  static inline bool margin(const Node<S, T>* n, const T* y, int f) {
+    static const size_t n_bits = sizeof(T) * 8;
+    T chunk = n->v[0] / n_bits;
+    return (y[chunk] & (static_cast<T>(1) << (n_bits - 1 - (n->v[0] % n_bits)))) != 0;
+  }
+  template<typename S, typename T, typename Random>
+  static inline bool side(const Node<S, T>* n, const T* y, int f, Random& random) {
+    return margin(n, y, f);
+  }
+  template<typename S, typename T, typename Random>
+  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
+    size_t cur_size = 0;
+    size_t i = 0;
+    int dim = f * 8 * sizeof(T);
+    for (; i < max_iterations; i++) {
+      // choose random position to split at
+      n->v[0] = random.index(dim);
+      cur_size = 0;
+      for (typename vector<Node<S, T>*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) {
+        if (margin(n, (*it)->v, f)) {
+          cur_size++;
+        }
+      }
+      if (cur_size > 0 && cur_size < nodes.size()) {
+        break;
+      }
+    }
+    // brute-force search for splitting coordinate
+    if (i == max_iterations) {
+      int j = 0;
+      for (; j < dim; j++) {
+        n->v[0] = j;
+        cur_size = 0;
+        for (typename vector<Node<S, T>*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) {
+          if (margin(n, (*it)->v, f)) {
+            cur_size++;
+          }
+        }
+        if (cur_size > 0 && cur_size < nodes.size()) {
+          break;
+        }
+      }
+    }
+  }
+  template<typename T>
+  static inline T normalized_distance(T distance) {
+    return distance;
+  }
+  template<typename S, typename T>
+  static inline void init_node(Node<S, T>* n, int f) {
+  }
+  static const char* name() {
+    return "hamming";
+  }
+};
+
+
+struct Minkowski : Base {
+  template<typename S, typename T>
+  struct Node {
+    S n_descendants;
+    T a; // need an extra constant term to determine the offset of the plane
+    S children[2];
+    T v[V_ARRAY_SIZE];
+  };
+  template<typename S, typename T>
+  static inline T margin(const Node<S, T>* n, const T* y, int f) {
+    return n->a + dot(n->v, y, f);
+  }
+  template<typename S, typename T, typename Random>
+  static inline bool side(const Node<S, T>* n, const T* y, int f, Random& random) {
+    T dot = margin(n, y, f);
+    if (dot != 0)
+      return (dot > 0);
+    else
+      return (bool)random.flip();
+  }
+  template<typename T>
+  static inline T pq_distance(T distance, T margin, int child_nr) {
+    if (child_nr == 0)
+      margin = -margin;
+    return std::min(distance, margin);
+  }
+  template<typename T>
+  static inline T pq_initial_value() {
+    return numeric_limits<T>::infinity();
+  }
+};
+
+
+struct Euclidean : Minkowski {
+  template<typename S, typename T>
+  static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
+    return euclidean_distance(x->v, y->v, f);    
+  }
+  template<typename S, typename T, typename Random>
+  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
+    two_means<T, Random, Euclidean, Node<S, T> >(nodes, f, random, false, p, q);
+
+    for (int z = 0; z < f; z++)
+      n->v[z] = p->v[z] - q->v[z];
+    Base::normalize<T, Node<S, T> >(n, f);
+    n->a = 0.0;
+    for (int z = 0; z < f; z++)
+      n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2;
+  }
+  template<typename T>
+  static inline T normalized_distance(T distance) {
+    return sqrt(std::max(distance, T(0)));
+  }
+  template<typename S, typename T>
+  static inline void init_node(Node<S, T>* n, int f) {
+  }
+  static const char* name() {
+    return "euclidean";
+  }
+
+};
+
+struct Manhattan : Minkowski {
+  template<typename S, typename T>
+  static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
+    return manhattan_distance(x->v, y->v, f);
+  }
+  template<typename S, typename T, typename Random>
+  static inline void create_split(const vector<Node<S, T>*>& nodes, int f, size_t s, Random& random, Node<S, T>* n) {
+    Node<S, T>* p = (Node<S, T>*)alloca(s);
+    Node<S, T>* q = (Node<S, T>*)alloca(s);
+    two_means<T, Random, Manhattan, Node<S, T> >(nodes, f, random, false, p, q);
+
+    for (int z = 0; z < f; z++)
+      n->v[z] = p->v[z] - q->v[z];
+    Base::normalize<T, Node<S, T> >(n, f);
+    n->a = 0.0;
+    for (int z = 0; z < f; z++)
+      n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2;
+  }
+  template<typename T>
+  static inline T normalized_distance(T distance) {
+    return std::max(distance, T(0));
+  }
+  template<typename S, typename T>
+  static inline void init_node(Node<S, T>* n, int f) {
+  }
+  static const char* name() {
+    return "manhattan";
+  }
+};
+
+template<typename S, typename T>
+class AnnoyIndexInterface {
+ public:
+  // Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
+  virtual ~AnnoyIndexInterface() {};
+  virtual bool add_item(S item, const T* w, char** error=NULL) = 0;
+  virtual bool build(int q, char** error=NULL) = 0;
+  virtual bool unbuild(char** error=NULL) = 0;
+  virtual bool save(const char* filename, bool prefault=false, char** error=NULL) = 0;
+  virtual void unload() = 0;
+  virtual bool load(const char* filename, bool prefault=false, char** error=NULL) = 0;
+  virtual bool load_index(const unsigned char* index_data, const int64_t& index_size, char** error = NULL) = 0;
+  virtual T get_distance(S i, S j) const = 0;
+  virtual void get_nns_by_item(S item, size_t n, int search_k, vector<S>* result, vector<T>* distances,
+                               faiss::ConcurrentBitsetPtr bitset = nullptr) const = 0;
+  virtual void get_nns_by_vector(const T* w, size_t n, int search_k, vector<S>* result, vector<T>* distances,
+                               faiss::ConcurrentBitsetPtr bitset = nullptr) const = 0;
+  virtual S get_n_items() const = 0;
+  virtual S get_dim() const = 0;
+  virtual S get_n_trees() const = 0;
+  virtual int64_t get_index_length() const = 0;
+  virtual void* get_index() const = 0;
+  virtual void verbose(bool v) = 0;
+  virtual void get_item(S item, T* v) const = 0;
+  virtual void set_seed(int q) = 0;
+  virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
+};
+
+template<typename S, typename T, typename Distance, typename Random>
+  class AnnoyIndex : public AnnoyIndexInterface<S, T> {
+  /*
+   * We use random projection to build a forest of binary trees of all items.
+   * Basically just split the hyperspace into two sides by a hyperplane,
+   * then recursively split each of those subtrees etc.
+   * We create a tree like this q times. The default q is determined automatically
+   * in such a way that we at most use 2x as much memory as the vectors take.
+   */
+public:
+  typedef Distance D;
+  typedef typename D::template Node<S, T> Node;
+
+protected:
+  const int _f;
+  size_t _s;
+  S _n_items;
+  Random _random;
+  void* _nodes; // Could either be mmapped, or point to a memory buffer that we reallocate
+  S _n_nodes;
+  S _nodes_size;
+  vector<S> _roots;
+  S _K;
+  bool _loaded;
+  bool _verbose;
+  int _fd;
+  bool _on_disk;
+  bool _built;
+public:
+
+   AnnoyIndex(int f) : _f(f), _random() {
+    _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
+    _verbose = false;
+    _built = false;
+    _K = (S) (((size_t) (_s - offsetof(Node, children))) / sizeof(S)); // Max number of descendants to fit into node
+    reinitialize(); // Reset everything
+  }
+  ~AnnoyIndex() {
+    unload();
+  }
+
+  int get_f() const {
+    return _f;
+  }
+
+  bool add_item(S item, const T* w, char** error=NULL) {
+    return add_item_impl(item, w, error);
+  }
+
+  template<typename W>
+  bool add_item_impl(S item, const W& w, char** error=NULL) {
+    if (_loaded) {
+      set_error_from_string(error, "You can't add an item to a loaded index");
+      return false;
+    }
+    _allocate_size(item + 1);
+    Node* n = _get(item);
+
+    D::zero_value(n);
+
+    n->children[0] = 0;
+    n->children[1] = 0;
+    n->n_descendants = 1;
+
+    for (int z = 0; z < _f; z++)
+      n->v[z] = w[z];
+
+    D::init_node(n, _f);
+
+    if (item >= _n_items)
+      _n_items = item + 1;
+
+    return true;
+  }
+    
+  bool on_disk_build(const char* file, char** error=NULL) {
+    _on_disk = true;
+    _fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600);
+    if (_fd == -1) {
+      set_error_from_errno(error, "Unable to open");
+      _fd = 0;
+      return false;
+    }
+    _nodes_size = 1;
+    if (ftruncate(_fd, _s * _nodes_size) == -1) {
+      set_error_from_errno(error, "Unable to truncate");
+      return false;
+    }
+#ifdef MAP_POPULATE
+    _nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
+#else
+    _nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0);
+#endif
+    return true;
+  }
+    
+  bool build(int q, char** error=NULL) {
+    if (_loaded) {
+      set_error_from_string(error, "You can't build a loaded index");
+      return false;
+    }
+
+    if (_built) {
+      set_error_from_string(error, "You can't build a built index");
+      return false;
+    }
+
+    D::template preprocess<T, S, Node>(_nodes, _s, _n_items, _f);
+
+    _n_nodes = _n_items;
+    while (1) {
+      if (q == -1 && _n_nodes >= _n_items * 2)
+        break;
+      if (q != -1 && _roots.size() >= (size_t)q)
+        break;
+      if (_verbose) showUpdate("pass %zd...\n", _roots.size());
+
+      vector<S> indices;
+      for (S i = 0; i < _n_items; i++) {
+        if (_get(i)->n_descendants >= 1) // Issue #223
+          indices.push_back(i);
+      }
+
+      _roots.push_back(_make_tree(indices, true));
+    }
+
+    // Also, copy the roots into the last segment of the array
+    // This way we can load them faster without reading the whole file
+    _allocate_size(_n_nodes + (S)_roots.size());
+    for (size_t i = 0; i < _roots.size(); i++)
+      memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s);
+    _n_nodes += _roots.size();
+
+    if (_verbose) showUpdate("has %d nodes\n", _n_nodes);
+    
+    if (_on_disk) {
+      _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * _n_nodes);
+      if (ftruncate(_fd, _s * _n_nodes)) {
+        // TODO: this probably creates an index in a corrupt state... not sure what to do
+        set_error_from_errno(error, "Unable to truncate");
+        return false;
+      }
+      _nodes_size = _n_nodes;
+    }
+    _built = true;
+    return true;
+  }
+  
+  bool unbuild(char** error=NULL) {
+    if (_loaded) {
+      set_error_from_string(error, "You can't unbuild a loaded index");
+      return false;
+    }
+
+    _roots.clear();
+    _n_nodes = _n_items;
+    _built = false;
+
+    return true;
+  }
+
+  bool save(const char* filename, bool prefault=false, char** error=NULL) {
+    if (!_built) {
+      set_error_from_string(error, "You can't save an index that hasn't been built");
+      return false;
+    }
+    if (_on_disk) {
+      return true;
+    } else {
+      // Delete file if it already exists (See issue #335)
+      unlink(filename);
+
+      FILE *f = fopen(filename, "wb");
+      if (f == NULL) {
+        set_error_from_errno(error, "Unable to open");
+        return false;
+      }
+
+      if (fwrite(_nodes, _s, _n_nodes, f) != (size_t) _n_nodes) {
+        set_error_from_errno(error, "Unable to write");
+        return false;
+      }
+
+      if (fclose(f) == EOF) {
+        set_error_from_errno(error, "Unable to close");
+        return false;
+      }
+
+      unload();
+      return load(filename, prefault, error);
+    }
+  }
+
+  void reinitialize() {
+    _fd = 0;
+    _nodes = NULL;
+    _loaded = false;
+    _n_items = 0;
+    _n_nodes = 0;
+    _nodes_size = 0;
+    _on_disk = false;
+    _roots.clear();
+  }
+
+  void unload() {
+    if (_on_disk && _fd) {
+      close(_fd);
+      munmap(_nodes, _s * _nodes_size);
+    } else {
+      if (_fd) {
+        // we have mmapped data
+        close(_fd);
+        munmap(_nodes, _n_nodes * _s);
+      } else if (_nodes) {
+        // We have heap allocated data
+        free(_nodes);
+      }
+    }
+    reinitialize();
+    if (_verbose) showUpdate("unloaded\n");
+  }
+
+  bool load(const char* filename, bool prefault=false, char** error=NULL) {
+    _fd = open(filename, O_RDONLY, (int)0400);
+    if (_fd == -1) {
+      set_error_from_errno(error, "Unable to open");
+      _fd = 0;
+      return false;
+    }
+    off_t size = lseek_getsize(_fd);
+    if (size == -1) {
+      set_error_from_errno(error, "Unable to get size");
+      return false;
+    } else if (size == 0) {
+      set_error_from_errno(error, "Size of file is zero");
+      return false;
+    } else if (size % _s) {
+      // Something is fishy with this index!
+      set_error_from_errno(error, "Index size is not a multiple of vector size");
+      return false;
+    }
+
+    int flags = MAP_SHARED;
+    if (prefault) {
+#ifdef MAP_POPULATE
+      flags |= MAP_POPULATE;
+#else
+      showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform");
+#endif
+    }
+    _nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0);
+    _n_nodes = (S)(size / _s);
+
+    // Find the roots by scanning the end of the file and taking the nodes with most descendants
+    _roots.clear();
+    S m = -1;
+    for (S i = _n_nodes - 1; i >= 0; i--) {
+      S k = _get(i)->n_descendants;
+      if (m == -1 || k == m) {
+        _roots.push_back(i);
+        m = k;
+      } else {
+        break;
+      }
+    }
+    // hacky fix: since the last root precedes the copy of all roots, delete it
+    if (_roots.size() > 1 && _get(_roots.front())->children[0] == _get(_roots.back())->children[0])
+      _roots.pop_back();
+    _loaded = true;
+    _built = true;
+    _n_items = m;
+    if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m);
+    return true;
+  }
+
+  bool load_index(const unsigned char* index_data, const int64_t& index_size, char** error) {
+    if (index_size == -1) {
+      set_error_from_errno(error, "Unable to get size");
+      return false;
+    } else if (index_size == 0) {
+      set_error_from_errno(error, "Size of file is zero");
+      return false;
+    } else if (index_size % _s) {
+      // Something is fishy with this index!
+      set_error_from_errno(error, "Index size is not a multiple of vector size");
+      return false;
+    }
+
+    _n_nodes = (S)(index_size / _s);
+    _nodes = (Node*)malloc(_s * _n_nodes);
+    memcpy(_nodes, index_data, (size_t)index_size);
+
+    // Find the roots by scanning the end of the file and taking the nodes with most descendants
+    _roots.clear();
+    S m = -1;
+    for (S i = _n_nodes - 1; i >= 0; i--) {
+      S k = _get(i)->n_descendants;
+      if (m == -1 || k == m) {
+        _roots.push_back(i);
+        m = k;
+      } else {
+        break;
+      }
+    }
+    // hacky fix: since the last root precedes the copy of all roots, delete it
+    if (_roots.size() > 1 && _get(_roots.front())->children[0] == _get(_roots.back())->children[0])
+      _roots.pop_back();
+    _loaded = true;
+    _built = true;
+    _n_items = m;
+    if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m);
+    return true;
+  }
+
+  T get_distance(S i, S j) const {
+    return D::normalized_distance(D::distance(_get(i), _get(j), _f));
+  }
+
+  void get_nns_by_item(S item, size_t n, int search_k, vector<S>* result, vector<T>* distances,
+                       faiss::ConcurrentBitsetPtr bitset) const {
+    // TODO: handle OOB
+    const Node* m = _get(item);
+    _get_all_nns(m->v, n, search_k, result, distances, bitset);
+  }
+
+  void get_nns_by_vector(const T* w, size_t n, int search_k, vector<S>* result, vector<T>* distances,
+                         faiss::ConcurrentBitsetPtr bitset) const {
+    _get_all_nns(w, n, search_k, result, distances, bitset);
+  }
+
+  S get_n_items() const {
+    return _n_items;
+  }
+
+  S get_dim() const {
+     return _f;
+  }
+
+  S get_n_trees() const {
+    return (S)_roots.size();
+  }
+
+  int64_t get_index_length() const {
+     return (int64_t)_s * _nodes_size;
+   }
+
+  void* get_index() const {
+     return _nodes;
+   }
+
+  void verbose(bool v) {
+    _verbose = v;
+  }
+
+  void get_item(S item, T* v) const {
+    // TODO: handle OOB
+    Node* m = _get(item);
+    memcpy(v, m->v, (_f) * sizeof(T));
+  }
+
+  void set_seed(int seed) {
+    _random.set_seed(seed);
+  }
+
+protected:
+  void _allocate_size(S n) {
+    if (n > _nodes_size) {
+      const double reallocation_factor = 1.3;
+      S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * reallocation_factor));
+      void *old = _nodes;
+      
+      if (_on_disk) {
+        int rc = ftruncate(_fd, _s * new_nodes_size);
+        if (_verbose && rc) showUpdate("File truncation error\n");
+        _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * new_nodes_size);
+      } else {
+        _nodes = realloc(_nodes, _s * new_nodes_size);
+        memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
+      }
+      
+      _nodes_size = new_nodes_size;
+      if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
+    }
+  }
+
+  inline Node* _get(const S i) const {
+    return get_node_ptr<S, Node>(_nodes, _s, i);
+  }
+
+  S _make_tree(const vector<S >& indices, bool is_root) {
+    // The basic rule is that if we have <= _K items, then it's a leaf node, otherwise it's a split node.
+    // There's some regrettable complications caused by the problem that root nodes have to be "special":
+    // 1. We identify root nodes by the arguable logic that _n_items == n->n_descendants, regardless of how many descendants they actually have
+    // 2. Root nodes with only 1 child need to be a "dummy" parent
+    // 3. Due to the _n_items "hack", we need to be careful with the cases where _n_items <= _K or _n_items > _K
+    if (indices.size() == 1 && !is_root)
+      return indices[0];
+
+    if (indices.size() <= (size_t)_K && (!is_root || (size_t)_n_items <= (size_t)_K || indices.size() == 1)) {
+      _allocate_size(_n_nodes + 1);
+      S item = _n_nodes++;
+      Node* m = _get(item);
+      m->n_descendants = is_root ? _n_items : (S)indices.size();
+
+      // Using std::copy instead of a loop seems to resolve issues #3 and #13,
+      // probably because gcc 4.8 goes overboard with optimizations.
+      // Using memcpy instead of std::copy for MSVC compatibility. #235
+      // Only copy when necessary to avoid crash in MSVC 9. #293
+      if (!indices.empty())
+        memcpy(m->children, &indices[0], indices.size() * sizeof(S));
+      return item;
+    }
+
+    vector<Node*> children;
+    for (size_t i = 0; i < indices.size(); i++) {
+      S j = indices[i];
+      Node* n = _get(j);
+      if (n)
+        children.push_back(n);
+    }
+
+    vector<S> children_indices[2];
+    Node* m = (Node*)alloca(_s);
+    D::create_split(children, _f, _s, _random, m);
+
+    for (size_t i = 0; i < indices.size(); i++) {
+      S j = indices[i];
+      Node* n = _get(j);
+      if (n) {
+        bool side = D::side(m, n->v, _f, _random);
+        children_indices[side].push_back(j);
+      } else {
+        showUpdate("No node for index %d?\n", j);
+      }
+    }
+
+    // If we didn't find a hyperplane, just randomize sides as a last option
+    while (children_indices[0].size() == 0 || children_indices[1].size() == 0) {
+      if (_verbose)
+        showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n",
+          children_indices[0].size(), children_indices[1].size());
+      if (_verbose && indices.size() > 100000)
+        showUpdate("Failed splitting %lu items\n", indices.size());
+
+      children_indices[0].clear();
+      children_indices[1].clear();
+
+      // Set the vector to 0.0
+      for (int z = 0; z < _f; z++)
+        m->v[z] = 0;
+
+      for (size_t i = 0; i < indices.size(); i++) {
+        S j = indices[i];
+        // Just randomize...
+        children_indices[_random.flip()].push_back(j);
+      }
+    }
+
+    int flip = (children_indices[0].size() > children_indices[1].size());
+
+    m->n_descendants = is_root ? _n_items : (S)indices.size();
+    for (int side = 0; side < 2; side++) {
+      // run _make_tree for the smallest child first (for cache locality)
+      m->children[side^flip] = _make_tree(children_indices[side^flip], false);
+    }
+
+    _allocate_size(_n_nodes + 1);
+    S item = _n_nodes++;
+    memcpy(_get(item), m, _s);
+
+    return item;
+  }
+
+  void _get_all_nns(const T* v, size_t n, int search_k, vector<S>* result, vector<T>* distances,
+                    faiss::ConcurrentBitsetPtr bitset) const {
+    Node* v_node = (Node *)alloca(_s);
+    D::template zero_value<Node>(v_node);
+    memcpy(v_node->v, v, sizeof(T) * _f);
+    D::init_node(v_node, _f);
+
+    std::priority_queue<pair<T, S> > q;
+
+    if (search_k <= 0) {
+      search_k = n * _roots.size();
+    }
+
+    for (size_t i = 0; i < _roots.size(); i++) {
+      q.push(make_pair(Distance::template pq_initial_value<T>(), _roots[i]));
+    }
+
+    std::vector<S> nns;
+    while (nns.size() < (size_t)search_k && !q.empty()) {
+      const pair<T, S>& top = q.top();
+      T d = top.first;
+      S i = top.second;
+      Node* nd = _get(i);
+      q.pop();
+      if (nd->n_descendants == 1 && i < _n_items) { // raw data
+        if (bitset == nullptr || !bitset->test((faiss::ConcurrentBitset::id_type_t)i))
+          nns.push_back(i);
+      } else if (nd->n_descendants <= _K) {
+        const S* dst = nd->children;
+        for (auto ii = 0; ii < nd->n_descendants; ++ ii) {
+          if (bitset == nullptr || !bitset->test((faiss::ConcurrentBitset::id_type_t)dst[ii]))
+            nns.push_back(dst[ii]);
+//            nns.insert(nns.end(), dst, &dst[nd->n_descendants]);
+        }
+      } else {
+        T margin = D::margin(nd, v, _f);
+        q.push(make_pair(D::pq_distance(d, margin, 1), static_cast<S>(nd->children[1])));
+        q.push(make_pair(D::pq_distance(d, margin, 0), static_cast<S>(nd->children[0])));
+      }
+    }
+
+    // Get distances for all items
+    // To avoid calculating distance multiple times for any items, sort by id
+    std::sort(nns.begin(), nns.end());
+    vector<pair<T, S> > nns_dist;
+    S last = -1;
+    for (size_t i = 0; i < nns.size(); i++) {
+      S j = nns[i];
+      if (j == last)
+        continue;
+      last = j;
+      if (_get(j)->n_descendants == 1)  // This is only to guard a really obscure case, #284
+        nns_dist.push_back(make_pair(D::distance(v_node, _get(j), _f), j));
+    }
+
+    size_t m = nns_dist.size();
+    size_t p = n < m ? n : m; // Return this many items
+    std::partial_sort(nns_dist.begin(), nns_dist.begin() + p, nns_dist.end());
+    for (size_t i = 0; i < p; i++) {
+      if (distances)
+        distances->push_back(D::normalized_distance(nns_dist[i].first));
+      result->push_back(nns_dist[i].second);
+    }
+  }
+};
+
+#endif
+// vim: tabstop=2 shiftwidth=2
diff --git a/core/src/index/thirdparty/annoy/src/annoyluamodule.cc b/core/src/index/thirdparty/annoy/src/annoyluamodule.cc
new file mode 100644
index 0000000000..76fec7c9e0
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/src/annoyluamodule.cc
@@ -0,0 +1,318 @@
+// Copyright (c) 2016 Boris Nagaev
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include <cstring>
+#include <typeinfo>
+
+#include <lua.hpp>
+
+#include "annoylib.h"
+#include "kissrandom.h"
+
+#if LUA_VERSION_NUM == 501
+#define compat_setfuncs(L, funcs) luaL_register(L, NULL, funcs)
+#define compat_rawlen lua_objlen
+#else
+#define compat_setfuncs(L, funcs) luaL_setfuncs(L, funcs, 0)
+#define compat_rawlen lua_rawlen
+#endif
+
+template<typename Distance>
+class LuaAnnoy {
+public:
+  typedef int32_t AnnoyS;
+  typedef float AnnoyT;
+  typedef AnnoyIndex<AnnoyS, AnnoyT, Distance, Kiss64Random> Impl;
+  typedef LuaAnnoy<Distance> ThisClass;
+
+  class LuaArrayProxy {
+  public:
+    LuaArrayProxy(lua_State* L, int object, int f)
+      : L_(L)
+      , object_(object)
+    {
+      luaL_checktype(L, object, LUA_TTABLE);
+      int v_len = compat_rawlen(L, object);
+      luaL_argcheck(L, v_len == f, object, "Length of v != f");
+    }
+
+    double operator[](int index) const {
+      lua_rawgeti(L_, object_, index + 1);
+      double result = lua_tonumber(L_, -1);
+      lua_pop(L_, 1);
+      return result;
+    }
+
+  private:
+    lua_State* L_;
+    int object_;
+  };
+
+  static void toVector(lua_State* L, int object, int f, AnnoyT* dst) {
+    LuaArrayProxy proxy(L, object, f);
+    for (int i = 0; i < f; i++) {
+      dst[i] = proxy[i];
+    }
+  }
+
+  template <typename Vector>
+  static void pushVector(lua_State* L, const Vector& v) {
+    lua_createtable(L, v.size(), 0);
+    for (int j = 0; j < v.size(); j++) {
+      lua_pushnumber(L, v[j]);
+      lua_rawseti(L, -2, j + 1);
+    }
+  }
+
+  static const char* typeAsString() {
+    return typeid(Impl).name();
+  }
+
+  static Impl* getAnnoy(lua_State* L, int object) {
+    return reinterpret_cast<Impl*>(
+      luaL_checkudata(L, object, typeAsString())
+    );
+  }
+
+  static int getItemIndex(lua_State* L, int object, int size = -1) {
+    int item = luaL_checkinteger(L, object);
+    luaL_argcheck(L, item >= 0, object, "Index must be >= 0");
+    if (size != -1) {
+      luaL_argcheck(L, item < size, object, "Index must be < size");
+    }
+    return item;
+  }
+
+  static int gc(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    self->~Impl();
+    return 0;
+  }
+
+  static int tostring(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    lua_pushfstring(
+      L,
+      "annoy.AnnoyIndex object (%dx%d, %s distance)",
+      self->get_n_items(), self->get_f(), Distance::name()
+    );
+    return 1;
+  }
+
+  static int add_item(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    int item = getItemIndex(L, 2);
+    self->add_item_impl(item, LuaArrayProxy(L, 3, self->get_f()));
+    return 0;
+  }
+
+  static int build(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    int n_trees = luaL_checkinteger(L, 2);
+    self->build(n_trees);
+    lua_pushboolean(L, true);
+    return 1;
+  }
+
+  static int on_disk_build(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    const char* filename = luaL_checkstring(L, 2);
+    self->on_disk_build(filename);
+    lua_pushboolean(L, true);
+    return 1;
+  }
+
+  static int save(lua_State* L) {
+    int nargs = lua_gettop(L);
+    Impl* self = getAnnoy(L, 1);
+    const char* filename = luaL_checkstring(L, 2);
+    bool prefault = true;
+    if (nargs >= 3) {
+      prefault = lua_toboolean(L, 3);
+    }
+    self->save(filename, prefault);
+    lua_pushboolean(L, true);
+    return 1;
+  }
+
+  static int load(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    int nargs = lua_gettop(L);
+    const char* filename = luaL_checkstring(L, 2);
+    bool prefault = true;
+    if (nargs >= 3) {
+      prefault = lua_toboolean(L, 3);
+    }
+    if (!self->load(filename, prefault)) {
+      return luaL_error(L, "Can't load file: %s", filename);
+    }
+    lua_pushboolean(L, true);
+    return 1;
+  }
+
+  static int unload(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    self->unload();
+    lua_pushboolean(L, true);
+    return 1;
+  }
+
+  struct Searcher {
+    std::vector<AnnoyS> result;
+    std::vector<AnnoyT> distances;
+    Impl* self;
+    int n;
+    int search_k;
+    bool include_distances;
+
+    Searcher(lua_State* L) {
+      int nargs = lua_gettop(L);
+      self = getAnnoy(L, 1);
+      n = luaL_checkinteger(L, 3);
+      search_k = -1;
+      if (nargs >= 4) {
+        search_k = luaL_checkinteger(L, 4);
+      }
+      include_distances = false;
+      if (nargs >= 5) {
+        include_distances = lua_toboolean(L, 5);
+      }
+    }
+
+    int pushResults(lua_State* L) {
+      pushVector(L, result);
+      if (include_distances) {
+        pushVector(L, distances);
+      }
+      return include_distances ? 2 : 1;
+    }
+  };
+
+  static int get_nns_by_item(lua_State* L) {
+    Searcher s(L);
+    int item = getItemIndex(L, 2, s.self->get_n_items());
+    s.self->get_nns_by_item(item, s.n, s.search_k, &s.result,
+        s.include_distances ? &s.distances : NULL);
+    return s.pushResults(L);
+  }
+
+  static int get_nns_by_vector(lua_State* L) {
+    Searcher s(L);
+    std::vector<AnnoyT> _vec(s.self->get_f());
+    AnnoyT* vec = &(_vec[0]);
+    toVector(L, 2, s.self->get_f(), vec);
+    s.self->get_nns_by_vector(vec, s.n, s.search_k, &s.result,
+        s.include_distances ? &s.distances : NULL);
+    return s.pushResults(L);
+  }
+
+  static int get_item_vector(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    int item = getItemIndex(L, 2, self->get_n_items());
+    std::vector<AnnoyT> _vec(self->get_f());
+    AnnoyT* vec = &(_vec[0]);
+    self->get_item(item, vec);
+    pushVector(L, _vec);
+    return 1;
+  }
+
+  static int get_distance(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    int i = getItemIndex(L, 2, self->get_n_items());
+    int j = getItemIndex(L, 3, self->get_n_items());
+    AnnoyT distance = self->get_distance(i, j);
+    lua_pushnumber(L, distance);
+    return 1;
+  }
+
+  static int get_n_items(lua_State* L) {
+    Impl* self = getAnnoy(L, 1);
+    lua_pushnumber(L, self->get_n_items());
+    return 1;
+  }
+
+  static const luaL_Reg* getMetatable() {
+    static const luaL_Reg funcs[] = {
+      {"__gc", &ThisClass::gc},
+      {"__tostring", &ThisClass::tostring},
+      {NULL, NULL},
+    };
+    return funcs;
+  }
+
+  static const luaL_Reg* getMethods() {
+    static const luaL_Reg funcs[] = {
+      {"add_item", &ThisClass::add_item},
+      {"build", &ThisClass::build},
+      {"save", &ThisClass::save},
+      {"load", &ThisClass::load},
+      {"unload", &ThisClass::unload},
+      {"get_nns_by_item", &ThisClass::get_nns_by_item},
+      {"get_nns_by_vector", &ThisClass::get_nns_by_vector},
+      {"get_item_vector", &ThisClass::get_item_vector},
+      {"get_distance", &ThisClass::get_distance},
+      {"get_n_items", &ThisClass::get_n_items},
+      {"on_disk_build", &ThisClass::on_disk_build},
+      {NULL, NULL},
+    };
+    return funcs;
+  }
+
+  static void createNew(lua_State* L, int f) {
+    void* self = lua_newuserdata(L, sizeof(Impl));
+    if (luaL_newmetatable(L, typeAsString())) {
+      compat_setfuncs(L, getMetatable());
+      lua_newtable(L);
+      compat_setfuncs(L, getMethods());
+      lua_setfield(L, -2, "__index");
+    }
+    new (self) Impl(f);
+    lua_setmetatable(L, -2);
+  }
+};
+
+static int lua_an_make(lua_State* L) {
+  int f = luaL_checkinteger(L, 1);
+  const char* metric = "angular";
+  if (lua_gettop(L) >= 2) {
+      metric = luaL_checkstring(L, 2);
+  }
+  if (strcmp(metric, "angular") == 0) {
+    LuaAnnoy<Angular>::createNew(L, f);
+    return 1;
+  } else if (strcmp(metric, "euclidean") == 0) {
+    LuaAnnoy<Euclidean>::createNew(L, f);
+    return 1;
+  } else if (strcmp(metric, "manhattan") == 0) {
+    LuaAnnoy<Manhattan>::createNew(L, f);
+    return 1;
+  } else {
+    return luaL_error(L, "Unknown metric: %s", metric);
+  }
+}
+
+static const luaL_Reg LUA_ANNOY_FUNCS[] = {
+  {"AnnoyIndex", lua_an_make},
+  {NULL, NULL},
+};
+
+extern "C" {
+int luaopen_annoy(lua_State* L) {
+  lua_newtable(L);
+  compat_setfuncs(L, LUA_ANNOY_FUNCS);
+  return 1;
+}
+}
+
+// vim: tabstop=2 shiftwidth=2
diff --git a/core/src/index/thirdparty/annoy/src/annoymodule.cc b/core/src/index/thirdparty/annoy/src/annoymodule.cc
new file mode 100644
index 0000000000..f15a0cc692
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/src/annoymodule.cc
@@ -0,0 +1,632 @@
+// Copyright (c) 2013 Spotify AB
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include "annoylib.h"
+#include "kissrandom.h"
+#include "Python.h"
+#include "structmember.h"
+#include <exception>
+#if defined(_MSC_VER) && _MSC_VER == 1500
+typedef signed __int32    int32_t;
+#else
+#include <stdint.h>
+#endif
+
+
+#if defined(USE_AVX512)
+#define AVX_INFO "Using 512-bit AVX instructions"
+#elif defined(USE_AVX128)
+#define AVX_INFO "Using 128-bit AVX instructions"
+#else
+#define AVX_INFO "Not using AVX instructions"
+#endif
+
+#if defined(_MSC_VER)
+#define COMPILER_INFO "Compiled using MSC"
+#elif defined(__GNUC__)
+#define COMPILER_INFO "Compiled on GCC"
+#else
+#define COMPILER_INFO "Compiled on unknown platform"
+#endif
+
+#define ANNOY_DOC (COMPILER_INFO ". " AVX_INFO ".")
+
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#endif
+
+#ifndef Py_TYPE
+    #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+#endif
+
+#ifdef IS_PY3K
+    #define PyInt_FromLong PyLong_FromLong 
+#endif
+
+
+template class AnnoyIndexInterface<int32_t, float>;
+
+class HammingWrapper : public AnnoyIndexInterface<int32_t, float> {
+  // Wrapper class for Hamming distance, using composition.
+  // This translates binary (float) vectors into packed uint64_t vectors.
+  // This is questionable from a performance point of view. Should reconsider this solution.
+private:
+  int32_t _f_external, _f_internal;
+  AnnoyIndex<int32_t, uint64_t, Hamming, Kiss64Random> _index;
+  void _pack(const float* src, uint64_t* dst) const {
+    for (int32_t i = 0; i < _f_internal; i++) {
+      dst[i] = 0;
+      for (int32_t j = 0; j < 64 && i*64+j < _f_external; j++) {
+	dst[i] |= (uint64_t)(src[i * 64 + j] > 0.5) << j;
+      }
+    }
+  };
+  void _unpack(const uint64_t* src, float* dst) const {
+    for (int32_t i = 0; i < _f_external; i++) {
+      dst[i] = (src[i / 64] >> (i % 64)) & 1;
+    }
+  };
+public:
+  HammingWrapper(int f) : _f_external(f), _f_internal((f + 63) / 64), _index((f + 63) / 64) {};
+  bool add_item(int32_t item, const float* w, char**error) {
+    vector<uint64_t> w_internal(_f_internal, 0);
+    _pack(w, &w_internal[0]);
+    return _index.add_item(item, &w_internal[0], error);
+  };
+  bool build(int q, char** error) { return _index.build(q, error); };
+  bool unbuild(char** error) { return _index.unbuild(error); };
+  bool save(const char* filename, bool prefault, char** error) { return _index.save(filename, prefault, error); };
+  void unload() { _index.unload(); };
+  bool load(const char* filename, bool prefault, char** error) { return _index.load(filename, prefault, error); };
+  float get_distance(int32_t i, int32_t j) const { return _index.get_distance(i, j); };
+  void get_nns_by_item(int32_t item, size_t n, int search_k, vector<int32_t>* result, vector<float>* distances) const {
+    if (distances) {
+      vector<uint64_t> distances_internal;
+      _index.get_nns_by_item(item, n, search_k, result, &distances_internal);
+      distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
+    } else {
+      _index.get_nns_by_item(item, n, search_k, result, NULL);
+    }
+  };
+  void get_nns_by_vector(const float* w, size_t n, int search_k, vector<int32_t>* result, vector<float>* distances) const {
+    vector<uint64_t> w_internal(_f_internal, 0);
+    _pack(w, &w_internal[0]);
+    if (distances) {
+      vector<uint64_t> distances_internal;
+      _index.get_nns_by_vector(&w_internal[0], n, search_k, result, &distances_internal);
+      distances->insert(distances->begin(), distances_internal.begin(), distances_internal.end());
+    } else {
+      _index.get_nns_by_vector(&w_internal[0], n, search_k, result, NULL);
+    }
+  };
+  int32_t get_n_items() const { return _index.get_n_items(); };
+  int32_t get_n_trees() const { return _index.get_n_trees(); };
+  void verbose(bool v) { _index.verbose(v); };
+  void get_item(int32_t item, float* v) const {
+    vector<uint64_t> v_internal(_f_internal, 0);
+    _index.get_item(item, &v_internal[0]);
+    _unpack(&v_internal[0], v);
+  };
+  void set_seed(int q) { _index.set_seed(q); };
+  bool on_disk_build(const char* filename, char** error) { return _index.on_disk_build(filename, error); };
+};
+
+// annoy python object
+typedef struct {
+  PyObject_HEAD
+  int f;
+  AnnoyIndexInterface<int32_t, float>* ptr;
+} py_annoy;
+
+
+static PyObject *
+py_an_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
+  py_annoy *self = (py_annoy *)type->tp_alloc(type, 0);
+  if (self == NULL) {
+    return NULL;
+  }
+  const char *metric = NULL;
+
+  static char const * kwlist[] = {"f", "metric", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &self->f, &metric))
+    return NULL;
+  if (!metric) {
+    // This keeps coming up, see #368 etc
+    PyErr_WarnEx(PyExc_FutureWarning, "The default argument for metric will be removed "
+		 "in future version of Annoy. Please pass metric='angular' explicitly.", 1);
+    self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random>(self->f);
+  } else if (!strcmp(metric, "angular")) {
+   self->ptr = new AnnoyIndex<int32_t, float, Angular, Kiss64Random>(self->f);
+  } else if (!strcmp(metric, "euclidean")) {
+    self->ptr = new AnnoyIndex<int32_t, float, Euclidean, Kiss64Random>(self->f);
+  } else if (!strcmp(metric, "manhattan")) {
+    self->ptr = new AnnoyIndex<int32_t, float, Manhattan, Kiss64Random>(self->f);
+  } else if (!strcmp(metric, "hamming")) {
+    self->ptr = new HammingWrapper(self->f);
+  } else if (!strcmp(metric, "dot")) {
+    self->ptr = new AnnoyIndex<int32_t, float, DotProduct, Kiss64Random>(self->f);
+  } else {
+    PyErr_SetString(PyExc_ValueError, "No such metric");
+    return NULL;
+  }
+
+  return (PyObject *)self;
+}
+
+
+static int 
+py_an_init(py_annoy *self, PyObject *args, PyObject *kwargs) {
+  // Seems to be needed for Python 3
+  const char *metric = NULL;
+  int f;
+  static char const * kwlist[] = {"f", "metric", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &f, &metric))
+    return (int) NULL;
+  return 0;
+}
+
+
+static void 
+py_an_dealloc(py_annoy* self) {
+  delete self->ptr;
+  Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+
+static PyMemberDef py_annoy_members[] = {
+  {(char*)"f", T_INT, offsetof(py_annoy, f), 0,
+   (char*)""},
+  {NULL}	/* Sentinel */
+};
+
+
+static PyObject *
+py_an_load(py_annoy *self, PyObject *args, PyObject *kwargs) {
+  char *filename, *error;
+  bool prefault = false;
+  if (!self->ptr) 
+    return NULL;
+  static char const * kwlist[] = {"fn", "prefault", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
+    return NULL;
+
+  if (!self->ptr->load(filename, prefault, &error)) {
+    PyErr_SetString(PyExc_IOError, error);
+    free(error);
+    return NULL;
+  }
+  Py_RETURN_TRUE;
+}
+
+
+static PyObject *
+py_an_save(py_annoy *self, PyObject *args, PyObject *kwargs) {
+  char *filename, *error;
+  bool prefault = false;
+  if (!self->ptr) 
+    return NULL;
+  static char const * kwlist[] = {"fn", "prefault", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|b", (char**)kwlist, &filename, &prefault))
+    return NULL;
+
+  if (!self->ptr->save(filename, prefault, &error)) {
+    PyErr_SetString(PyExc_IOError, error);
+    free(error);
+    return NULL;
+  }
+  Py_RETURN_TRUE;
+}
+
+
+PyObject*
+get_nns_to_python(const vector<int32_t>& result, const vector<float>& distances, int include_distances) {
+  PyObject* l = PyList_New(result.size());
+  for (size_t i = 0; i < result.size(); i++)
+    PyList_SetItem(l, i, PyInt_FromLong(result[i]));
+  if (!include_distances)
+    return l;
+
+  PyObject* d = PyList_New(distances.size());
+  for (size_t i = 0; i < distances.size(); i++)
+    PyList_SetItem(d, i, PyFloat_FromDouble(distances[i]));
+
+  PyObject* t = PyTuple_New(2);
+  PyTuple_SetItem(t, 0, l);
+  PyTuple_SetItem(t, 1, d);
+
+  return t;
+}
+
+
+bool check_constraints(py_annoy *self, int32_t item, bool building) {
+  if (item < 0) {
+    PyErr_SetString(PyExc_IndexError, "Item index can not be negative");
+    return false;
+  } else if (!building && item >= self->ptr->get_n_items()) {
+    PyErr_SetString(PyExc_IndexError, "Item index larger than the largest item index");
+    return false;
+  } else {
+    return true;
+  }
+}
+
+static PyObject* 
+py_an_get_nns_by_item(py_annoy *self, PyObject *args, PyObject *kwargs) {
+  int32_t item, n, search_k=-1, include_distances=0;
+  if (!self->ptr) 
+    return NULL;
+
+  static char const * kwlist[] = {"i", "n", "search_k", "include_distances", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "ii|ii", (char**)kwlist, &item, &n, &search_k, &include_distances))
+    return NULL;
+
+  if (!check_constraints(self, item, false)) {
+    return NULL;
+  }
+
+  vector<int32_t> result;
+  vector<float> distances;
+
+  Py_BEGIN_ALLOW_THREADS;
+  self->ptr->get_nns_by_item(item, n, search_k, &result, include_distances ? &distances : NULL);
+  Py_END_ALLOW_THREADS;
+
+  return get_nns_to_python(result, distances, include_distances);
+}
+
+
+bool
+convert_list_to_vector(PyObject* v, int f, vector<float>* w) {
+  if (PyObject_Size(v) == -1) {
+    char buf[256];
+    snprintf(buf, 256, "Expected an iterable, got an object of type \"%s\"", v->ob_type->tp_name);
+    PyErr_SetString(PyExc_ValueError, buf);
+    return false;
+  }
+  if (PyObject_Size(v) != f) {
+    char buf[128];
+    snprintf(buf, 128, "Vector has wrong length (expected %d, got %ld)", f, PyObject_Size(v));
+    PyErr_SetString(PyExc_IndexError, buf);
+    return false;
+  }
+  for (int z = 0; z < f; z++) {
+    PyObject *key = PyInt_FromLong(z);
+    PyObject *pf = PyObject_GetItem(v, key);
+    (*w)[z] = PyFloat_AsDouble(pf);
+    Py_DECREF(key);
+    Py_DECREF(pf);
+  }
+  return true;
+}
+
+static PyObject* 
+py_an_get_nns_by_vector(py_annoy *self, PyObject *args, PyObject *kwargs) {
+  PyObject* v;
+  int32_t n, search_k=-1, include_distances=0;
+  if (!self->ptr) 
+    return NULL;
+
+  static char const * kwlist[] = {"vector", "n", "search_k", "include_distances", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii", (char**)kwlist, &v, &n, &search_k, &include_distances))
+    return NULL;
+
+  vector<float> w(self->f);
+  if (!convert_list_to_vector(v, self->f, &w)) {
+    return NULL;
+  }
+
+  vector<int32_t> result;
+  vector<float> distances;
+
+  Py_BEGIN_ALLOW_THREADS;
+  self->ptr->get_nns_by_vector(&w[0], n, search_k, &result, include_distances ? &distances : NULL);
+  Py_END_ALLOW_THREADS;
+
+  return get_nns_to_python(result, distances, include_distances);
+}
+
+
+static PyObject* 
+py_an_get_item_vector(py_annoy *self, PyObject *args) {
+  int32_t item;
+  if (!self->ptr) 
+    return NULL;
+  if (!PyArg_ParseTuple(args, "i", &item))
+    return NULL;
+
+  if (!check_constraints(self, item, false)) {
+    return NULL;
+  }
+
+  vector<float> v(self->f);
+  self->ptr->get_item(item, &v[0]);
+  PyObject* l = PyList_New(self->f);
+  for (int z = 0; z < self->f; z++) {
+    PyList_SetItem(l, z, PyFloat_FromDouble(v[z]));
+  }
+
+  return l;
+}
+
+
+static PyObject* 
+py_an_add_item(py_annoy *self, PyObject *args, PyObject* kwargs) {
+  PyObject* v;
+  int32_t item;
+  if (!self->ptr) 
+    return NULL;
+  static char const * kwlist[] = {"i", "vector", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "iO", (char**)kwlist, &item, &v))
+    return NULL;
+
+  if (!check_constraints(self, item, true)) {
+    return NULL;
+  }
+
+  vector<float> w(self->f);
+  if (!convert_list_to_vector(v, self->f, &w)) {
+    return NULL;
+  }
+  char* error;
+  if (!self->ptr->add_item(item, &w[0], &error)) {
+    PyErr_SetString(PyExc_Exception, error);
+    free(error);
+    return NULL;
+  }
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *
+py_an_on_disk_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
+  char *filename, *error;
+  if (!self->ptr)
+    return NULL;
+  static char const * kwlist[] = {"fn", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", (char**)kwlist, &filename))
+    return NULL;
+
+  if (!self->ptr->on_disk_build(filename, &error)) {
+    PyErr_SetString(PyExc_IOError, error);
+    free(error);
+    return NULL;
+  }
+  Py_RETURN_TRUE;
+}
+
+static PyObject *
+py_an_build(py_annoy *self, PyObject *args, PyObject *kwargs) {
+  int q;
+  if (!self->ptr) 
+    return NULL;
+  static char const * kwlist[] = {"n_trees", NULL};
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i", (char**)kwlist, &q))
+    return NULL;
+
+  bool res;
+  char* error;
+  Py_BEGIN_ALLOW_THREADS;
+  res = self->ptr->build(q, &error);
+  Py_END_ALLOW_THREADS;
+  if (!res) {
+    PyErr_SetString(PyExc_Exception, error);
+    free(error);
+    return NULL;
+  }
+
+  Py_RETURN_TRUE;
+}
+
+
+static PyObject *
+py_an_unbuild(py_annoy *self) {
+  if (!self->ptr) 
+    return NULL;
+
+  char* error;
+  if (!self->ptr->unbuild(&error)) {
+    PyErr_SetString(PyExc_Exception, error);
+    free(error);
+    return NULL;
+  }
+
+  Py_RETURN_TRUE;
+}
+
+
+static PyObject *
+py_an_unload(py_annoy *self) {
+  if (!self->ptr) 
+    return NULL;
+
+  self->ptr->unload();
+
+  Py_RETURN_TRUE;
+}
+
+
+static PyObject *
+py_an_get_distance(py_annoy *self, PyObject *args) {
+  int32_t i, j;
+  if (!self->ptr) 
+    return NULL;
+  if (!PyArg_ParseTuple(args, "ii", &i, &j))
+    return NULL;
+
+  if (!check_constraints(self, i, false) || !check_constraints(self, j, false)) {
+    return NULL;
+  }
+
+  double d = self->ptr->get_distance(i,j);
+  return PyFloat_FromDouble(d);
+}
+
+
+static PyObject *
+py_an_get_n_items(py_annoy *self) {
+  if (!self->ptr) 
+    return NULL;
+
+  int32_t n = self->ptr->get_n_items();
+  return PyInt_FromLong(n);
+}
+
+static PyObject *
+py_an_get_n_trees(py_annoy *self) {
+  if (!self->ptr) 
+    return NULL;
+
+  int32_t n = self->ptr->get_n_trees();
+  return PyInt_FromLong(n);
+}
+
+static PyObject *
+py_an_verbose(py_annoy *self, PyObject *args) {
+  int verbose;
+  if (!self->ptr) 
+    return NULL;
+  if (!PyArg_ParseTuple(args, "i", &verbose))
+    return NULL;
+
+  self->ptr->verbose((bool)verbose);
+
+  Py_RETURN_TRUE;
+}
+
+
+static PyObject *
+py_an_set_seed(py_annoy *self, PyObject *args) {
+  int q;
+  if (!self->ptr)
+    return NULL;
+  if (!PyArg_ParseTuple(args, "i", &q))
+    return NULL;
+
+  self->ptr->set_seed(q);
+
+  Py_RETURN_NONE;
+}
+
+
+static PyMethodDef AnnoyMethods[] = {
+  {"load",	(PyCFunction)py_an_load, METH_VARARGS | METH_KEYWORDS, "Loads (mmaps) an index from disk."},
+  {"save",	(PyCFunction)py_an_save, METH_VARARGS | METH_KEYWORDS, "Saves the index to disk."},
+  {"get_nns_by_item",(PyCFunction)py_an_get_nns_by_item, METH_VARARGS | METH_KEYWORDS, "Returns the `n` closest items to item `i`.\n\n:param search_k: the query will inspect up to `search_k` nodes.\n`search_k` gives you a run-time tradeoff between better accuracy and speed.\n`search_k` defaults to `n_trees * n` if not provided.\n\n:param include_distances: If `True`, this function will return a\n2 element tuple of lists. The first list contains the `n` closest items.\nThe second list contains the corresponding distances."},
+  {"get_nns_by_vector",(PyCFunction)py_an_get_nns_by_vector, METH_VARARGS | METH_KEYWORDS, "Returns the `n` closest items to vector `vector`.\n\n:param search_k: the query will inspect up to `search_k` nodes.\n`search_k` gives you a run-time tradeoff between better accuracy and speed.\n`search_k` defaults to `n_trees * n` if not provided.\n\n:param include_distances: If `True`, this function will return a\n2 element tuple of lists. The first list contains the `n` closest items.\nThe second list contains the corresponding distances."},
+  {"get_item_vector",(PyCFunction)py_an_get_item_vector, METH_VARARGS, "Returns the vector for item `i` that was previously added."},
+  {"add_item",(PyCFunction)py_an_add_item, METH_VARARGS | METH_KEYWORDS, "Adds item `i` (any nonnegative integer) with vector `v`.\n\nNote that it will allocate memory for `max(i)+1` items."},
+  {"on_disk_build",(PyCFunction)py_an_on_disk_build, METH_VARARGS | METH_KEYWORDS, "Build will be performed with storage on disk instead of RAM."},
+  {"build",(PyCFunction)py_an_build, METH_VARARGS | METH_KEYWORDS, "Builds a forest of `n_trees` trees.\n\nMore trees give higher precision when querying. After calling `build`,\nno more items can be added."},
+  {"unbuild",(PyCFunction)py_an_unbuild, METH_NOARGS, "Unbuilds the tree in order to allows adding new items.\n\nbuild() has to be called again afterwards in order to\nrun queries."},
+  {"unload",(PyCFunction)py_an_unload, METH_NOARGS, "Unloads an index from disk."},
+  {"get_distance",(PyCFunction)py_an_get_distance, METH_VARARGS, "Returns the distance between items `i` and `j`."},
+  {"get_n_items",(PyCFunction)py_an_get_n_items, METH_NOARGS, "Returns the number of items in the index."},
+  {"get_n_trees",(PyCFunction)py_an_get_n_trees, METH_NOARGS, "Returns the number of trees in the index."},
+  {"verbose",(PyCFunction)py_an_verbose, METH_VARARGS, ""},
+  {"set_seed",(PyCFunction)py_an_set_seed, METH_VARARGS, "Sets the seed of Annoy's random number generator."},
+  {NULL, NULL, 0, NULL}		 /* Sentinel */
+};
+
+
+static PyTypeObject PyAnnoyType = {
+  PyVarObject_HEAD_INIT(NULL, 0)
+  "annoy.Annoy",          /*tp_name*/
+  sizeof(py_annoy),       /*tp_basicsize*/
+  0,                      /*tp_itemsize*/
+  (destructor)py_an_dealloc, /*tp_dealloc*/
+  0,                      /*tp_print*/
+  0,                      /*tp_getattr*/
+  0,                      /*tp_setattr*/
+  0,                      /*tp_compare*/
+  0,                      /*tp_repr*/
+  0,                      /*tp_as_number*/
+  0,                      /*tp_as_sequence*/
+  0,                      /*tp_as_mapping*/
+  0,                      /*tp_hash */
+  0,                      /*tp_call*/
+  0,                      /*tp_str*/
+  0,                      /*tp_getattro*/
+  0,                      /*tp_setattro*/
+  0,                      /*tp_as_buffer*/
+  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+  ANNOY_DOC,              /* tp_doc */
+  0,                      /* tp_traverse */
+  0,                      /* tp_clear */
+  0,                      /* tp_richcompare */
+  0,                      /* tp_weaklistoffset */
+  0,                      /* tp_iter */
+  0,                      /* tp_iternext */
+  AnnoyMethods,           /* tp_methods */
+  py_annoy_members,       /* tp_members */
+  0,                      /* tp_getset */
+  0,                      /* tp_base */
+  0,                      /* tp_dict */
+  0,                      /* tp_descr_get */
+  0,                      /* tp_descr_set */
+  0,                      /* tp_dictoffset */
+  (initproc)py_an_init,   /* tp_init */
+  0,                      /* tp_alloc */
+  py_an_new,              /* tp_new */
+};
+
+static PyMethodDef module_methods[] = {
+  {NULL}	/* Sentinel */
+};
+
+#if PY_MAJOR_VERSION >= 3
+  static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "annoylib",          /* m_name */
+    ANNOY_DOC,           /* m_doc */
+    -1,                  /* m_size */
+    module_methods,      /* m_methods */
+    NULL,                /* m_reload */
+    NULL,                /* m_traverse */
+    NULL,                /* m_clear */
+    NULL,                /* m_free */
+  };
+#endif
+
+PyObject *create_module(void) {
+  PyObject *m;
+
+  if (PyType_Ready(&PyAnnoyType) < 0)
+    return NULL;
+
+#if PY_MAJOR_VERSION >= 3
+  m = PyModule_Create(&moduledef);
+#else
+  m = Py_InitModule("annoylib", module_methods);
+#endif
+
+  if (m == NULL)
+    return NULL;
+
+  Py_INCREF(&PyAnnoyType);
+  PyModule_AddObject(m, "Annoy", (PyObject *)&PyAnnoyType);
+  return m;
+}
+
+#if PY_MAJOR_VERSION >= 3
+  PyMODINIT_FUNC PyInit_annoylib(void) {
+    return create_module();      // it should return moudule object in py3
+  }
+#else
+  PyMODINIT_FUNC initannoylib(void) {
+    create_module();
+  }
+#endif
+
+
+// vim: tabstop=2 shiftwidth=2
diff --git a/core/src/index/thirdparty/annoy/src/kissrandom.h b/core/src/index/thirdparty/annoy/src/kissrandom.h
new file mode 100644
index 0000000000..9e40110f3e
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/src/kissrandom.h
@@ -0,0 +1,106 @@
+#ifndef KISSRANDOM_H
+#define KISSRANDOM_H
+
+#if defined(_MSC_VER) && _MSC_VER == 1500
+typedef unsigned __int32    uint32_t;
+typedef unsigned __int64    uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+// KISS = "keep it simple, stupid", but high quality random number generator
+// http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code"
+// http://mathforum.org/kb/message.jspa?messageID=6627731
+// https://de.wikipedia.org/wiki/KISS_(Zufallszahlengenerator)
+
+// 32 bit KISS
+struct Kiss32Random {
+  uint32_t x;
+  uint32_t y;
+  uint32_t z;
+  uint32_t c;
+
+  // seed must be != 0
+  Kiss32Random(uint32_t seed = 123456789) {
+    x = seed;
+    y = 362436000;
+    z = 521288629;
+    c = 7654321;
+  }
+
+  uint32_t kiss() {
+    // Linear congruence generator
+    x = 69069 * x + 12345;
+
+    // Xor shift
+    y ^= y << 13;
+    y ^= y >> 17;
+    y ^= y << 5;
+
+    // Multiply-with-carry
+    uint64_t t = 698769069ULL * z + c;
+    c = t >> 32;
+    z = (uint32_t) t;
+
+    return x + y + z;
+  }
+  inline int flip() {
+    // Draw random 0 or 1
+    return kiss() & 1;
+  }
+  inline size_t index(size_t n) {
+    // Draw random integer between 0 and n-1 where n is at most the number of data points you have
+    return kiss() % n;
+  }
+  inline void set_seed(uint32_t seed) {
+    x = seed;
+  }
+};
+
+// 64 bit KISS. Use this if you have more than about 2^24 data points ("big data" ;) )
+struct Kiss64Random {
+  uint64_t x;
+  uint64_t y;
+  uint64_t z;
+  uint64_t c;
+
+  // seed must be != 0
+  Kiss64Random(uint64_t seed = 1234567890987654321ULL) {
+    x = seed;
+    y = 362436362436362436ULL;
+    z = 1066149217761810ULL;
+    c = 123456123456123456ULL;
+  }
+
+  uint64_t kiss() {
+    // Linear congruence generator
+    z = 6906969069LL*z+1234567;
+
+    // Xor shift
+    y ^= (y<<13);
+    y ^= (y>>17);
+    y ^= (y<<43);
+
+    // Multiply-with-carry (uint128_t t = (2^58 + 1) * x + c; c = t >> 64; x = (uint64_t) t)
+    uint64_t t = (x<<58)+c;
+    c = (x>>6);
+    x += t;
+    c += (x<t);
+
+    return x + y + z;
+  }
+  inline int flip() {
+    // Draw random 0 or 1
+    return kiss() & 1;
+  }
+  inline size_t index(size_t n) {
+    // Draw random integer between 0 and n-1 where n is at most the number of data points you have
+    return kiss() % n;
+  }
+  inline void set_seed(uint32_t seed) {
+    x = seed;
+  }
+};
+
+#endif
+// vim: tabstop=2 shiftwidth=2
diff --git a/core/src/index/thirdparty/annoy/src/mman.h b/core/src/index/thirdparty/annoy/src/mman.h
new file mode 100644
index 0000000000..113d46d3ee
--- /dev/null
+++ b/core/src/index/thirdparty/annoy/src/mman.h
@@ -0,0 +1,238 @@
+
+// This is from https://code.google.com/p/mman-win32/
+// 
+// Licensed under MIT
+
+#ifndef _MMAN_WIN32_H
+#define _MMAN_WIN32_H
+
+#ifndef _WIN32_WINNT		// Allow use of features specific to Windows XP or later.                   
+#define _WIN32_WINNT 0x0501	// Change this to the appropriate value to target other versions of Windows.
+#endif						
+
+#include <sys/types.h>
+#include <windows.h>
+#include <errno.h>
+#include <io.h>
+
+#define PROT_NONE       0
+#define PROT_READ       1
+#define PROT_WRITE      2
+#define PROT_EXEC       4
+
+#define MAP_FILE        0
+#define MAP_SHARED      1
+#define MAP_PRIVATE     2
+#define MAP_TYPE        0xf
+#define MAP_FIXED       0x10
+#define MAP_ANONYMOUS   0x20
+#define MAP_ANON        MAP_ANONYMOUS
+
+#define MAP_FAILED      ((void *)-1)
+
+/* Flags for msync. */
+#define MS_ASYNC        1
+#define MS_SYNC         2
+#define MS_INVALIDATE   4
+
+#ifndef FILE_MAP_EXECUTE
+#define FILE_MAP_EXECUTE    0x0020
+#endif
+
+static int __map_mman_error(const DWORD err, const int deferr)
+{
+    if (err == 0)
+        return 0;
+    //TODO: implement
+    return err;
+}
+
+static DWORD __map_mmap_prot_page(const int prot)
+{
+    DWORD protect = 0;
+    
+    if (prot == PROT_NONE)
+        return protect;
+        
+    if ((prot & PROT_EXEC) != 0)
+    {
+        protect = ((prot & PROT_WRITE) != 0) ? 
+                    PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
+    }
+    else
+    {
+        protect = ((prot & PROT_WRITE) != 0) ?
+                    PAGE_READWRITE : PAGE_READONLY;
+    }
+    
+    return protect;
+}
+
+static DWORD __map_mmap_prot_file(const int prot)
+{
+    DWORD desiredAccess = 0;
+    
+    if (prot == PROT_NONE)
+        return desiredAccess;
+        
+    if ((prot & PROT_READ) != 0)
+        desiredAccess |= FILE_MAP_READ;
+    if ((prot & PROT_WRITE) != 0)
+        desiredAccess |= FILE_MAP_WRITE;
+    if ((prot & PROT_EXEC) != 0)
+        desiredAccess |= FILE_MAP_EXECUTE;
+    
+    return desiredAccess;
+}
+
+inline void* mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
+{
+    HANDLE fm, h;
+    
+    void * map = MAP_FAILED;
+    
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4293)
+#endif
+
+    const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) ? 
+                    (DWORD)off : (DWORD)(off & 0xFFFFFFFFL);
+    const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
+                    (DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL);
+    const DWORD protect = __map_mmap_prot_page(prot);
+    const DWORD desiredAccess = __map_mmap_prot_file(prot);
+
+    const off_t maxSize = off + (off_t)len;
+
+    const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) ? 
+                    (DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL);
+    const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
+                    (DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+    errno = 0;
+    
+    if (len == 0 
+        /* Unsupported flag combinations */
+        || (flags & MAP_FIXED) != 0
+        /* Usupported protection combinations */
+        || prot == PROT_EXEC)
+    {
+        errno = EINVAL;
+        return MAP_FAILED;
+    }
+    
+    h = ((flags & MAP_ANONYMOUS) == 0) ? 
+                    (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE;
+
+    if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE)
+    {
+        errno = EBADF;
+        return MAP_FAILED;
+    }
+
+    fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
+
+    if (fm == NULL)
+    {
+        errno = __map_mman_error(GetLastError(), EPERM);
+        return MAP_FAILED;
+    }
+  
+    map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
+
+    CloseHandle(fm);
+  
+    if (map == NULL)
+    {
+        errno = __map_mman_error(GetLastError(), EPERM);
+        return MAP_FAILED;
+    }
+
+    return map;
+}
+
+inline int munmap(void *addr, size_t len)
+{
+    if (UnmapViewOfFile(addr))
+        return 0;
+        
+    errno =  __map_mman_error(GetLastError(), EPERM);
+    
+    return -1;
+}
+
+inline int mprotect(void *addr, size_t len, int prot)
+{
+    DWORD newProtect = __map_mmap_prot_page(prot);
+    DWORD oldProtect = 0;
+    
+    if (VirtualProtect(addr, len, newProtect, &oldProtect))
+        return 0;
+    
+    errno =  __map_mman_error(GetLastError(), EPERM);
+    
+    return -1;
+}
+
+inline int msync(void *addr, size_t len, int flags)
+{
+    if (FlushViewOfFile(addr, len))
+        return 0;
+    
+    errno =  __map_mman_error(GetLastError(), EPERM);
+    
+    return -1;
+}
+
+inline int mlock(const void *addr, size_t len)
+{
+    if (VirtualLock((LPVOID)addr, len))
+        return 0;
+        
+    errno =  __map_mman_error(GetLastError(), EPERM);
+    
+    return -1;
+}
+
+inline int munlock(const void *addr, size_t len)
+{
+    if (VirtualUnlock((LPVOID)addr, len))
+        return 0;
+        
+    errno =  __map_mman_error(GetLastError(), EPERM);
+    
+    return -1;
+}
+
+#if !defined(__MINGW32__)
+inline int ftruncate(int fd, unsigned int size) {
+    if (fd < 0) {
+        errno = EBADF;
+        return -1;
+    }
+
+    HANDLE h = (HANDLE)_get_osfhandle(fd);
+    unsigned int cur = SetFilePointer(h, 0, NULL, FILE_CURRENT);
+    if (cur == ~0 || SetFilePointer(h, size, NULL, FILE_BEGIN) == ~0 || !SetEndOfFile(h)) {
+        int error = GetLastError();
+        switch (GetLastError()) {
+            case ERROR_INVALID_HANDLE:
+                errno = EBADF;
+                break;
+            default:
+                errno = EIO;
+                break;
+        }
+        return -1;
+    }
+
+    return 0;
+}
+#endif
+
+#endif 
diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt
index 234b75394c..b9ce7ee834 100644
--- a/core/src/index/unittest/CMakeLists.txt
+++ b/core/src/index/unittest/CMakeLists.txt
@@ -89,6 +89,16 @@ if (NOT TARGET test_idmap)
 endif ()
 target_link_libraries(test_idmap ${depend_libs} ${unittest_libs} ${basic_libs})
 
+#<ANNOY-TEST>
+set(annoy_srcs
+        ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/IndexAnnoy.cpp
+        )
+
+if (NOT TARGET test_annoy)
+    add_executable(test_annoy test_annoy.cpp ${annoy_srcs} ${util_srcs})
+endif ()
+target_link_libraries(test_annoy ${depend_libs} ${unittest_libs} ${basic_libs})
+
 #<HNSW-TEST>
 set(hnsw_srcs
         ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/IndexHNSW.cpp
@@ -144,6 +154,7 @@ install(TARGETS test_idmap DESTINATION unittest)
 install(TARGETS test_binaryidmap DESTINATION unittest)
 install(TARGETS test_sptag DESTINATION unittest)
 install(TARGETS test_knowhere_common DESTINATION unittest)
+install(TARGETS test_annoy DESTINATION unittest)
 
 if (KNOWHERE_GPU_VERSION)
     install(TARGETS test_gpuresource DESTINATION unittest)
diff --git a/core/src/index/unittest/test_annoy.cpp b/core/src/index/unittest/test_annoy.cpp
new file mode 100644
index 0000000000..77dc49d19a
--- /dev/null
+++ b/core/src/index/unittest/test_annoy.cpp
@@ -0,0 +1,221 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#include <gtest/gtest.h>
+#include <src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h>
+#include <iostream>
+#include <sstream>
+
+#include "knowhere/common/Exception.h"
+#include "knowhere/index/vector_index/IndexAnnoy.h"
+
+#include "unittest/utils.h"
+
+using ::testing::Combine;
+using ::testing::TestWithParam;
+using ::testing::Values;
+
+int
+main() {
+    int64_t d = 64;      // dimension
+    int64_t nb = 10000;  // database size
+    int64_t nq = 10;     // 10000;                        // nb of queries
+    faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(nb);
+
+    int64_t* ids = new int64_t[nb];
+    float* xb = new float[d * nb];
+    float* xq = new float[d * nq];
+
+    for (int i = 0; i < nb; i++) {
+        for (int j = 0; j < d; j++) xb[d * i + j] = (float)drand48();
+        xb[d * i] += i / 1000.;
+        ids[i] = i;
+    }
+    printf("gen xb and ids done! \n");
+
+    //    srand((unsigned)time(NULL));
+    auto random_seed = (unsigned)time(NULL);
+    printf("delete ids: \n");
+    for (int i = 0; i < nq; i++) {
+        auto tmp = rand_r(&random_seed) % nb;
+        printf("%d\n", tmp);
+        //        std::cout << "before delete, test result: " << bitset->test(tmp) << std::endl;
+        bitset->set(tmp);
+        //        std::cout << "after delete, test result: " << bitset->test(tmp) << std::endl;
+        for (int j = 0; j < d; j++) xq[d * i + j] = xb[d * tmp + j];
+        //        xq[d * i] += i / 1000.;
+    }
+    printf("\n");
+
+    int k = 4;
+    int n_trees = 5;
+    int search_k = 100;
+    milvus::knowhere::IndexAnnoy index;
+    milvus::knowhere::DatasetPtr base_dataset = generate_dataset(nb, d, (const void*)xb, ids);
+
+    milvus::knowhere::Config base_conf{
+        {milvus::knowhere::meta::DIM, d},
+        {milvus::knowhere::meta::TOPK, k},
+        {milvus::knowhere::IndexParams::n_trees, n_trees},
+        {milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2},
+    };
+    milvus::knowhere::DatasetPtr query_dataset = generate_query_dataset(nq, d, (const void*)xq);
+    milvus::knowhere::Config query_conf{
+        {milvus::knowhere::meta::DIM, d},
+        {milvus::knowhere::meta::TOPK, k},
+        {milvus::knowhere::IndexParams::search_k, search_k},
+    };
+
+    index.BuildAll(base_dataset, base_conf);
+
+    printf("------------sanity check----------------\n");
+    {  // sanity check
+        auto res = index.Query(query_dataset, query_conf);
+        printf("Query done!\n");
+        const int64_t* I = res->Get<int64_t*>(milvus::knowhere::meta::IDS);
+        float* D = res->Get<float*>(milvus::knowhere::meta::DISTANCE);
+
+        printf("I=\n");
+        for (int i = 0; i < 5; i++) {
+            for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]);
+            printf("\n");
+        }
+
+        printf("D=\n");
+        for (int i = 0; i < 5; i++) {
+            for (int j = 0; j < k; j++) printf("%7g ", D[i * k + j]);
+            printf("\n");
+        }
+    }
+
+    printf("---------------search xq-------------\n");
+    {  // search xq
+        auto res = index.Query(query_dataset, query_conf);
+        const int64_t* I = res->Get<int64_t*>(milvus::knowhere::meta::IDS);
+
+        printf("I=\n");
+        for (int i = 0; i < nq; i++) {
+            for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]);
+            printf("\n");
+        }
+    }
+
+    printf("----------------search xq with delete------------\n");
+    {  // search xq with delete
+        index.SetBlacklist(bitset);
+        auto res = index.Query(query_dataset, query_conf);
+        auto I = res->Get<int64_t*>(milvus::knowhere::meta::IDS);
+
+        printf("I=\n");
+        for (int i = 0; i < nq; i++) {
+            for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]);
+            printf("\n");
+        }
+    }
+
+    delete[] xb;
+    delete[] xq;
+    delete[] ids;
+
+    return 0;
+}
+
+/*
+class AnnoyTest : public DataGen, public TestWithParam<std::string> {
+ protected:
+    void
+    SetUp() override {
+        IndexType = GetParam();
+        std::cout << "IndexType from GetParam() is: " << IndexType << std::endl;
+        Generate(128, 1000, 5);
+        index_ = std::make_shared<milvus::knowhere::IndexAnnoy>();
+        conf = milvus::knowhere::Config{
+            {milvus::knowhere::meta::DIM, dim},
+            {milvus::knowhere::meta::TOPK, 1},
+            {milvus::knowhere::IndexParams::n_trees, 4},
+            {milvus::knowhere::IndexParams::search_k, 100},
+            {milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2},
+        };
+
+//        Init_with_default();
+    }
+
+ protected:
+    milvus::knowhere::Config conf;
+    std::shared_ptr<milvus::knowhere::IndexAnnoy> index_ = nullptr;
+    std::string IndexType;
+};
+
+INSTANTIATE_TEST_CASE_P(AnnoyParameters, AnnoyTest, Values(""));
+
+TEST_P(AnnoyTest, annoy_basic) {
+    assert(!xb.empty());
+
+//    index_->Train(base_dataset, conf);
+    index_->BuildAll(base_dataset, conf);
+    auto result = index_->Query(query_dataset, conf);
+    AssertAnns(result, nq, k);
+
+    {
+        auto ids = result->Get<int64_t*>(milvus::knowhere::meta::IDS);
+        auto dist = result->Get<float*>(milvus::knowhere::meta::DISTANCE);
+
+        std::stringstream ss_id;
+        std::stringstream ss_dist;
+        for (auto i = 0; i < nq; i++) {
+            for (auto j = 0; j < k; ++j) {
+                // ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
+                // ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
+                ss_id << *((int64_t*)(ids) + i * k + j) << " ";
+                ss_dist << *((float*)(dist) + i * k + j) << " ";
+            }
+            ss_id << std::endl;
+            ss_dist << std::endl;
+        }
+        std::cout << "id\n" << ss_id.str() << std::endl;
+        std::cout << "dist\n" << ss_dist.str() << std::endl;
+    }
+}
+
+TEST_P(AnnoyTest, annoy_delete) {
+    assert(!xb.empty());
+
+//    index_->Train(base_dataset, conf);
+    index_->BuildAll(base_dataset, conf);
+    // index_->Add(base_dataset, conf);
+    faiss::ConcurrentBitsetPtr bitset = std::make_shared<faiss::ConcurrentBitset>(nb);
+    for (auto i = 0; i < nq; ++ i) {
+        bitset->set(i);
+
+    auto result = index_->Query(query_dataset, conf);
+    AssertAnns(result, nq, k);
+
+    {
+        auto ids = result->Get<int64_t*>(milvus::knowhere::meta::IDS);
+        auto dist = result->Get<float*>(milvus::knowhere::meta::DISTANCE);
+
+        std::stringstream ss_id;
+        std::stringstream ss_dist;
+        for (auto i = 0; i < nq; i++) {
+            for (auto j = 0; j < k; ++j) {
+                // ss_id << *ids->data()->GetValues<int64_t>(1, i * k + j) << " ";
+                // ss_dist << *dists->data()->GetValues<float>(1, i * k + j) << " ";
+                ss_id << *((int64_t*)(ids) + i * k + j) << " ";
+                ss_dist << *((float*)(dist) + i * k + j) << " ";
+            }
+            ss_id << std::endl;
+            ss_dist << std::endl;
+        }
+        std::cout << "id\n" << ss_id.str() << std::endl;
+        std::cout << "dist\n" << ss_dist.str() << std::endl;
+    }   }
+}
+*/
diff --git a/core/src/server/delivery/request/DeleteByIDRequest.cpp b/core/src/server/delivery/request/DeleteByIDRequest.cpp
index 9262c8386a..92bd066d42 100644
--- a/core/src/server/delivery/request/DeleteByIDRequest.cpp
+++ b/core/src/server/delivery/request/DeleteByIDRequest.cpp
@@ -71,6 +71,7 @@ DeleteByIDRequest::OnExecute() {
         if (table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IDMAP &&
             table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IDMAP &&
             table_schema.engine_type_ != (int32_t)engine::EngineType::HNSW &&
+            table_schema.engine_type_ != (int32_t)engine::EngineType::ANNOY &&
             table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFFLAT &&
             table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_BIN_IVFFLAT &&
             table_schema.engine_type_ != (int32_t)engine::EngineType::FAISS_IVFSQ8 &&
diff --git a/tests/milvus_python_test/test_connect.py b/tests/milvus_python_test/test_connect.py
index efc0f92359..14704b657d 100644
--- a/tests/milvus_python_test/test_connect.py
+++ b/tests/milvus_python_test/test_connect.py
@@ -38,7 +38,7 @@ class TestConnect:
         if not connect.connected():
             milvus = get_milvus(args["handler"])
             uri_value = "tcp://%s:%s" % (args["ip"], args["port"])
-            milvus.connect(uri=uri_value)
+            milvus.connect(uri=uri_value, timeout=5)
             res = milvus.disconnect()
             with pytest.raises(Exception) as e:
                 res = milvus.disconnect()
@@ -181,9 +181,8 @@ class TestConnect:
         '''
         milvus = get_milvus(args["handler"])
         uri_value = "tcp://%s:%s" % (args["ip"], args["port"])
-        milvus.connect(uri=uri_value)
-        
-        milvus.connect(uri=uri_value)
+        milvus.connect(uri=uri_value, timeout=5)
+        milvus.connect(uri=uri_value, timeout=5)
         assert milvus.connected()
 
     def test_connect_disconnect_repeatedly_once(self, args):
@@ -209,10 +208,10 @@ class TestConnect:
         times = 10
         milvus = get_milvus(args["handler"])
         uri_value = "tcp://%s:%s" % (args["ip"], args["port"])
-        milvus.connect(uri=uri_value)
+        milvus.connect(uri=uri_value, timeout=5)
         for i in range(times):
             milvus.disconnect()
-            milvus.connect(uri=uri_value)
+            milvus.connect(uri=uri_value, timeout=5)
         assert milvus.connected()
 
     # TODO: enable
diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py
index 2033715692..21a482d830 100644
--- a/tests/milvus_python_test/test_search_vectors.py
+++ b/tests/milvus_python_test/test_search_vectors.py
@@ -851,7 +851,7 @@ class TestSearchBase:
                  'store_raw_vector': False}
         # create collection
         milvus = get_milvus(args["handler"])
-        milvus.connect(uri=uri)
+        milvus.connect(uri=uri, timeout=5)
         milvus.create_collection(param)
         vectors, ids = self.init_data(milvus, collection, nb=nb)
         query_vecs = vectors[nb//2:nb]
@@ -864,7 +864,7 @@ class TestSearchBase:
 
         for i in range(threads_num):
             milvus = get_milvus(args["handler"])
-            milvus.connect(uri=uri)
+            milvus.connect(uri=uri, timeout=5)
             t = threading.Thread(target=search, args=(milvus, ))
             threads.append(t)
             t.start()
@@ -932,7 +932,7 @@ class TestSearchBase:
                      'metric_type': MetricType.L2}
             # create collection
             milvus = get_milvus(args["handler"])
-            milvus.connect(uri=uri)
+            milvus.connect(uri=uri, timeout=5)
             milvus.create_collection(param)
             status, ids = milvus.add_vectors(collection, vectors)
             assert status.OK()
@@ -973,7 +973,7 @@ class TestSearchBase:
                      'metric_type': MetricType.L2}
             # create collection
             milvus = get_milvus(args["handler"])
-            milvus.connect(uri=uri)
+            milvus.connect(uri=uri, timeout=5)
             milvus.create_collection(param)
             status, ids = milvus.add_vectors(collection, vectors)
             assert status.OK()