Merge remote-tracking branch 'official/0.6.0' into issue-548

2026-01-07 19:31:51 +08:00 · 2019-11-30 18:04:22 +08:00 · 2019-11-30 18:04:22 +08:00 · 6b84ec5f79
commit 6b84ec5f79
parent 8f09b1f230 6f58f7550b
577 changed files with 100359 additions and 532 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -1,7 +1,7 @@
 ---
 name: "\U0001F41B Bug report"
 about: Create a bug report to help us improve Milvus
-title: "[BUG]"
+title: ''
 labels: ''
 assignees: ''

--- a/.github/ISSUE_TEMPLATE/documentation-request.md
+++ b/.github/ISSUE_TEMPLATE/documentation-request.md
@ -1,7 +1,7 @@
 ---
 name: "\U0001F4DD Documentation request"
 about: Report incorrect or needed documentation
-title: "[DOC]"
+title: ''
 labels: ''
 assignees: ''

--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@ -1,7 +1,7 @@
 ---
 name: "\U0001F680 Feature request"
 about: Suggest an idea for Milvus
-title: "[FEATURE]"
+title: ''
 labels: ''
 assignees: ''

--- a/.github/ISSUE_TEMPLATE/general-question.md
+++ b/.github/ISSUE_TEMPLATE/general-question.md
@ -1,7 +1,7 @@
 ---
 name: "\U0001F914 General question"
 about: Ask a general question about Milvus
-title: "[QUESTION]"
+title: ''
 labels: ''
 assignees: ''

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -21,13 +21,26 @@ Please mark all change in change log and use the ticket from JIRA.
 - \#440 - Server cannot startup with gpu_resource_config.enable=false in GPU version
 - \#458 - Index data is not compatible between 0.5 and 0.6
 - \#465 - Server hang caused by searching with nsg index
+- \#485 - Increase code coverage rate
 - \#486 - gpu no usage during index building
+- \#497 - CPU-version search performance decreased
+- \#504 - The code coverage rate of core/src/scheduler/optimizer is too low
 - \#509 - IVF_PQ index build trapped into dead loop caused by invalid params
 - \#513 - Unittest DELETE_BY_RANGE sometimes failed
+- \#523 - Erase file data from cache once the file is marked as deleted
 - \#527 - faiss benchmark not compatible with faiss 1.6.0
 - \#530 - BuildIndex stop when do build index and search simultaneously
+- \#532 - assigin value to `table_name` from confest shell
 - \#533 - NSG build failed with MetricType Inner Product
+- \#543 - client raise exception in shards when search results is empty
+- \#545 - Avoid dead circle of build index thread when error occurs
+- \#547 - NSG build failed using GPU-edition if set gpu_enable false
 - \#548 - NSG search accuracy is too low
+- \#552 - Server down during building index_type: IVF_PQ using GPU-edition
+- \#561 - Milvus server should report exception/error message or terminate on mysql metadata backend error
+- \#599 - Build index log is incorrect
+- \#602 - Optimizer specify wrong gpu_id
+- \#606 - No log generated during building index with CPU

 ## Feature
 - \#12 - Pure CPU version for Milvus
@ -36,25 +49,32 @@ Please mark all change in change log and use the ticket from JIRA.
 - \#226 - Experimental shards middleware for Milvus
 - \#227 - Support new index types SPTAG-KDT and SPTAG-BKT
 - \#346 - Support build index with multiple gpu
+- \#420 - Update shards merge part to match v0.5.3
 - \#488 - Add log in scheduler/optimizer
 - \#502 - C++ SDK support IVFPQ and SPTAG
+- \#560 - Add version in server config file
+- \#605 - Print more messages when server start

 ## Improvement
 - \#255 - Add ivfsq8 test report detailed version
 - \#260 - C++ SDK README
 - \#266 - Rpc request source code refactor
+- \#274 - Logger the time cost during preloading data
 - \#275 - Rename C++ SDK IndexType
 - \#284 - Change C++ SDK to shared library
 - \#306 - Use int64 for all config integer
 - \#310 - Add Q&A for 'protocol https not supported or disable in libcurl' issue
 - \#314 - add Find FAISS in CMake
 - \#322 - Add option to enable / disable prometheus
+- \#354 - Build migration scripts into milvus docker image
 - \#358 - Add more information in build.sh and install.md
 - \#404 - Add virtual method Init() in Pass abstract class
 - \#409 - Add a Fallback pass in optimizer
 - \#433 - C++ SDK query result is not easy to use
 - \#449 - Add ShowPartitions example for C++ SDK
 - \#470 - Small raw files should not be build index
+- \#584 - Intergrate internal FAISS
+- \#611 - Remove MILVUS_CPU_VERSION

 ## Task

--- a/README.md
+++ b/README.md
@ -69,6 +69,8 @@ Below is a list of Milvus contributors. We greatly appreciate your contributions

 - [Milvus test reports](https://github.com/milvus-io/milvus/tree/master/docs)

+- [Milvus FAQ](https://www.milvus.io/docs/en/faq/operational_faq/)
+
 - [Milvus Medium](https://medium.com/@milvusio)

 - [Milvus CSDN](https://zilliz.blog.csdn.net/)
@ -79,4 +81,4 @@ Below is a list of Milvus contributors. We greatly appreciate your contributions

 ## License

-[Apache License 2.0](LICENSE)
+[Apache License 2.0](LICENSE)
--- a/README_CN.md
+++ b/README_CN.md
@ -69,6 +69,8 @@ Milvus 提供稳定的 [Python](https://github.com/milvus-io/pymilvus)、[Java](

 - [Milvus 测试报告](https://github.com/milvus-io/milvus/tree/master/docs)

+- [Milvus 常见问题](https://www.milvus.io/docs/zh-CN/faq/operational_faq/)
+
 - [Milvus Medium](https://medium.com/@milvusio)

 - [Milvus CSDN](https://zilliz.blog.csdn.net/)
--- a/README_JP.md
+++ b/README_JP.md
@ -61,6 +61,8 @@ C++サンプルコードを実行するために、次のコマンドをつか

 - [Milvus テストレポート](https://github.com/milvus-io/milvus/tree/master/docs)

+- [Milvusのよくある質問](https://www.milvus.io/docs/en/faq/operational_faq/)
+
 - [Milvus Medium](https://medium.com/@milvusio)

 - [Milvus CSDN](https://zilliz.blog.csdn.net/)
@ -72,4 +74,4 @@ C++サンプルコードを実行するために、次のコマンドをつか

 ## ライセンス

-[Apache 2.0ライセンス](LICENSE)
+[Apache 2.0ライセンス](LICENSE)
--- a/ci/jenkins/Jenkinsfile
+++ b/ci/jenkins/Jenkinsfile
@ -17,7 +17,7 @@ pipeline {
    }

    parameters{
-        choice choices: ['Release', 'Debug'], description: '', name: 'BUILD_TYPE'
+        choice choices: ['Release', 'Debug'], description: 'Build Type', name: 'BUILD_TYPE'
        string defaultValue: 'registry.zilliz.com', description: 'DOCKER REGISTRY URL', name: 'DOKCER_REGISTRY_URL', trim: true
        string defaultValue: 'ba070c98-c8cc-4f7c-b657-897715f359fc', description: 'DOCKER CREDENTIALS ID', name: 'DOCKER_CREDENTIALS_ID', trim: true
        string defaultValue: 'http://192.168.1.202/artifactory/milvus', description: 'JFROG ARTFACTORY URL', name: 'JFROG_ARTFACTORY_URL', trim: true
@ -27,9 +27,8 @@ pipeline {
    environment {
        PROJECT_NAME = "milvus"
        LOWER_BUILD_TYPE = params.BUILD_TYPE.toLowerCase()
-        SEMVER = "${BRANCH_NAME}"
-        JOBNAMES = env.JOB_NAME.split('/')
-        PIPELINE_NAME = "${JOBNAMES[0]}"
+        SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}"
+        PIPELINE_NAME = "${env.JOB_NAME.contains('/') ? env.JOB_NAME.getAt(0..(env.JOB_NAME.indexOf('/') - 1)) : env.JOB_NAME}"
    }

    stages {
@ -102,7 +101,7 @@ pipeline {
                            stages {
                                stage('Publish') {
                                    steps {
-                                        container('publish-images'){
+                                        container('publish-images') {
                                            script {
                                                load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy"
                                            }
--- a/ci/jenkins/internalJenkinsfile.groovy
+++ b/ci/jenkins/internalJenkinsfile.groovy
@ -0,0 +1,477 @@
+#!/usr/bin/env groovy
+
+pipeline {
+    agent none
+    
+    options {
+        timestamps()
+    }
+
+    parameters{
+        choice choices: ['Release', 'Debug'], description: 'Build Type', name: 'BUILD_TYPE'
+        string defaultValue: 'registry.zilliz.com', description: 'DOCKER REGISTRY URL', name: 'DOKCER_REGISTRY_URL', trim: true
+        string defaultValue: 'a54e38ef-c424-4ea9-9224-b25fc20e3924', description: 'DOCKER CREDENTIALS ID', name: 'DOCKER_CREDENTIALS_ID', trim: true
+        string defaultValue: 'http://192.168.1.201/artifactory/milvus', description: 'JFROG ARTFACTORY URL', name: 'JFROG_ARTFACTORY_URL', trim: true
+        string defaultValue: '76fd48ab-2b8e-4eed-834d-2eefd23bb3a6', description: 'JFROG CREDENTIALS ID', name: 'JFROG_CREDENTIALS_ID', trim: true
+    }
+
+    environment {
+        PROJECT_NAME = "milvus"
+        LOWER_BUILD_TYPE = params.BUILD_TYPE.toLowerCase()
+        SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}"
+        PIPELINE_NAME = "${env.JOB_NAME.contains('/') ? env.JOB_NAME.getAt(0..(env.JOB_NAME.indexOf('/') - 1)) : env.JOB_NAME}"
+    }
+
+    stages {
+        stage("Ubuntu 18.04 x86_64") {
+            environment {
+                OS_NAME = "ubuntu18.04"
+                CPU_ARCH = "amd64"
+            }
+
+            parallel {
+                stage ("GPU Version") {
+                    environment {
+                        BINRARY_VERSION = "gpu"
+                        PACKAGE_VERSION = VersionNumber([
+                            versionNumberString : '${SEMVER}-gpu-${OS_NAME}-${CPU_ARCH}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, "yyyyMMdd"}-${BUILDS_TODAY}'
+                        ]);
+                        DOCKER_VERSION = "${SEMVER}-gpu-${OS_NAME}-${LOWER_BUILD_TYPE}"
+                    }
+
+                    stages {
+                        stage("Run Build") {
+                            agent {
+                                kubernetes {
+                                    label "${env.BINRARY_VERSION}-build"
+                                    defaultContainer 'jnlp'
+                                    yaml """
+apiVersion: v1
+kind: Pod
+metadata:
+  name: milvus-gpu-build-env
+  labels:
+    app: milvus
+    componet: gpu-build-env
+spec:
+  containers:
+  - name: milvus-gpu-build-env
+    image: registry.zilliz.com/milvus/milvus-gpu-build-env:v0.6.0-ubuntu18.04
+    env:
+    - name: POD_IP
+      valueFrom:
+        fieldRef:
+          fieldPath: status.podIP
+    - name: BUILD_ENV_IMAGE_ID
+      value: "da9023b0f858f072672f86483a869aa87e90a5140864f89e5a012ec766d96dea"
+    command:
+    - cat
+    tty: true
+    resources:
+      limits:
+        memory: "24Gi"
+        cpu: "8.0"
+        nvidia.com/gpu: 1
+      requests:
+        memory: "16Gi"
+        cpu: "4.0"
+  - name: milvus-mysql
+    image: mysql:5.6
+    env:
+    - name: MYSQL_ROOT_PASSWORD
+      value: 123456
+    ports:
+    - containerPort: 3306
+      name: mysql
+                                    """
+                                }
+                            }
+
+                            stages {
+                                stage('Build') {
+                                    steps {
+                                        container("milvus-${env.BINRARY_VERSION}-build-env") {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/build.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                                stage('Code Coverage') {
+                                    steps {
+                                        container("milvus-${env.BINRARY_VERSION}-build-env") {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/internalCoverage.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                                stage('Upload Package') {
+                                    steps {
+                                        container("milvus-${env.BINRARY_VERSION}-build-env") {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/package.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+
+                        stage("Publish docker images") {
+                            agent {
+                                kubernetes {
+                                    label "${env.BINRARY_VERSION}-publish"
+                                    defaultContainer 'jnlp'
+                                    yaml """
+apiVersion: v1
+kind: Pod
+metadata:
+  labels:
+    app: publish
+    componet: docker
+spec:
+  containers:
+  - name: publish-images
+    image: registry.zilliz.com/library/docker:v1.0.0
+    securityContext:
+      privileged: true
+    command:
+    - cat
+    tty: true
+    volumeMounts:
+    - name: docker-sock
+      mountPath: /var/run/docker.sock
+  volumes:
+  - name: docker-sock
+    hostPath:
+      path: /var/run/docker.sock
+"""
+                                }
+                            }
+
+                            stages {
+                                stage('Publish') {
+                                    steps {
+                                        container('publish-images') {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+
+                        stage("Deploy to Development") {
+                            environment {
+                                FROMAT_SEMVER = "${env.SEMVER}".replaceAll("\\.", "-")
+                                HELM_RELEASE_NAME = "${env.PIPELINE_NAME}-${env.FROMAT_SEMVER}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION}".toLowerCase()
+                            }
+
+                            agent {
+                                kubernetes {
+                                    label "${env.BINRARY_VERSION}-dev-test"
+                                    defaultContainer 'jnlp'
+                                    yaml """
+apiVersion: v1
+kind: Pod
+metadata:
+  labels:
+    app: milvus
+    componet: test-env
+spec:
+  containers:
+  - name: milvus-test-env
+    image: registry.zilliz.com/milvus/milvus-test-env:v0.1
+    command:
+    - cat
+    tty: true
+    volumeMounts:
+    - name: kubeconf
+      mountPath: /root/.kube/
+      readOnly: true
+  volumes:
+  - name: kubeconf
+    secret:
+      secretName: test-cluster-config
+"""
+                                }
+                            }
+
+                            stages {
+                                stage("Deploy to Dev") {
+                                    steps {
+                                        container('milvus-test-env') {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/deploySingle2Dev.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+
+                                stage("Dev Test") {
+                                    steps {
+                                        container('milvus-test-env') {
+                                            script {
+                                                boolean isNightlyTest = isTimeTriggeredBuild()
+                                                if (isNightlyTest) {
+                                                    load "${env.WORKSPACE}/ci/jenkins/step/singleDevNightlyTest.groovy"
+                                                } else {
+                                                    load "${env.WORKSPACE}/ci/jenkins/step/singleDevTest.groovy"
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+
+                                stage ("Cleanup Dev") {
+                                    steps {
+                                        container('milvus-test-env') {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                            post {
+                                unsuccessful {
+                                    container('milvus-test-env') {
+                                        script {
+                                            load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
+                                        }
+                                    }
+                                }
+                            }
+                        }
+    				}
+                }
+
+                stage ("CPU Version") {
+                    environment {
+                        BINRARY_VERSION = "cpu"
+                        PACKAGE_VERSION = VersionNumber([
+                            versionNumberString : '${SEMVER}-cpu-${OS_NAME}-${CPU_ARCH}-${LOWER_BUILD_TYPE}-${BUILD_DATE_FORMATTED, "yyyyMMdd"}-${BUILDS_TODAY}'
+                        ]);
+                        DOCKER_VERSION = "${SEMVER}-cpu-${OS_NAME}-${LOWER_BUILD_TYPE}"
+                    }
+
+                    stages {
+                        stage("Run Build") {
+                            agent {
+                                kubernetes {
+                                    label "${env.BINRARY_VERSION}-build"
+                                    defaultContainer 'jnlp'
+                                    yaml """
+apiVersion: v1
+kind: Pod
+metadata:
+  name: milvus-cpu-build-env
+  labels:
+    app: milvus
+    componet: cpu-build-env
+spec:
+  containers:
+  - name: milvus-cpu-build-env
+    image: registry.zilliz.com/milvus/milvus-cpu-build-env:v0.6.0-ubuntu18.04
+    env:
+    - name: POD_IP
+      valueFrom:
+        fieldRef:
+          fieldPath: status.podIP
+    - name: BUILD_ENV_IMAGE_ID
+      value: "23476391bec80c64f10d44a6370c73c71f011a6b95114b10ff82a60e771e11c7"
+    command:
+    - cat
+    tty: true
+    resources:
+      limits:
+        memory: "24Gi"
+        cpu: "8.0"
+      requests:
+        memory: "16Gi"
+        cpu: "4.0"
+  - name: milvus-mysql
+    image: mysql:5.6
+    env:
+    - name: MYSQL_ROOT_PASSWORD
+      value: 123456
+    ports:
+    - containerPort: 3306
+      name: mysql
+                                    """
+                                }
+                            }
+
+                            stages {
+                                stage('Build') {
+                                    steps {
+                                        container("milvus-${env.BINRARY_VERSION}-build-env") {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/build.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                                stage('Code Coverage') {
+                                    steps {
+                                        container("milvus-${env.BINRARY_VERSION}-build-env") {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/internalCoverage.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                                stage('Upload Package') {
+                                    steps {
+                                        container("milvus-${env.BINRARY_VERSION}-build-env") {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/package.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+
+                        stage("Publish docker images") {
+                            agent {
+                                kubernetes {
+                                    label "${env.BINRARY_VERSION}-publish"
+                                    defaultContainer 'jnlp'
+                                    yaml """
+apiVersion: v1
+kind: Pod
+metadata:
+  labels:
+    app: publish
+    componet: docker
+spec:
+  containers:
+  - name: publish-images
+    image: registry.zilliz.com/library/docker:v1.0.0
+    securityContext:
+      privileged: true
+    command:
+    - cat
+    tty: true
+    volumeMounts:
+    - name: docker-sock
+      mountPath: /var/run/docker.sock
+  volumes:
+  - name: docker-sock
+    hostPath:
+      path: /var/run/docker.sock
+"""
+                                }
+                            }
+
+                            stages {
+                                stage('Publish') {
+                                    steps {
+                                        container('publish-images'){
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/publishImages.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+
+                        stage("Deploy to Development") {
+                            environment {
+                                FROMAT_SEMVER = "${env.SEMVER}".replaceAll("\\.", "-")
+                                HELM_RELEASE_NAME = "${env.PIPELINE_NAME}-${env.FROMAT_SEMVER}-${env.BUILD_NUMBER}-single-${env.BINRARY_VERSION}".toLowerCase()
+                            }
+
+                            agent {
+                                kubernetes {
+                                    label "${env.BINRARY_VERSION}-dev-test"
+                                    defaultContainer 'jnlp'
+                                    yaml """
+apiVersion: v1
+kind: Pod
+metadata:
+  labels:
+    app: milvus
+    componet: test-env
+spec:
+  containers:
+  - name: milvus-test-env
+    image: registry.zilliz.com/milvus/milvus-test-env:v0.1
+    command:
+    - cat
+    tty: true
+    volumeMounts:
+    - name: kubeconf
+      mountPath: /root/.kube/
+      readOnly: true
+  volumes:
+  - name: kubeconf
+    secret:
+      secretName: test-cluster-config
+"""
+                                }
+                            }
+
+                            stages {
+                                stage("Deploy to Dev") {
+                                    steps {
+                                        container('milvus-test-env') {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/deploySingle2Dev.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+
+                                stage("Dev Test") {
+                                    steps {
+                                        container('milvus-test-env') {
+                                            script {
+                                                boolean isNightlyTest = isTimeTriggeredBuild()
+                                                if (isNightlyTest) {
+                                                    load "${env.WORKSPACE}/ci/jenkins/step/singleDevNightlyTest.groovy"
+                                                } else {
+                                                    load "${env.WORKSPACE}/ci/jenkins/step/singleDevTest.groovy"
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+
+                                stage ("Cleanup Dev") {
+                                    steps {
+                                        container('milvus-test-env') {
+                                            script {
+                                                load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                            post {
+                                unsuccessful {
+                                    container('milvus-test-env') {
+                                        script {
+                                            load "${env.WORKSPACE}/ci/jenkins/step/cleanupSingleDev.groovy"
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+boolean isTimeTriggeredBuild() {
+    if (currentBuild.getBuildCauses('hudson.triggers.TimerTrigger$TimerTriggerCause').size() != 0) {
+        return true
+    }
+    return false
+}
--- a/ci/jenkins/pod/milvus-cpu-version-build-env-pod.yaml
+++ b/ci/jenkins/pod/milvus-cpu-version-build-env-pod.yaml
@ -21,10 +21,10 @@ spec:
    tty: true
    resources:
      limits:
-        memory: "32Gi"
+        memory: "12Gi"
        cpu: "8.0"
      requests:
-        memory: "16Gi"
+        memory: "8Gi"
        cpu: "4.0"
  - name: milvus-mysql
    image: mysql:5.6
--- a/ci/jenkins/pod/milvus-gpu-version-build-env-pod.yaml
+++ b/ci/jenkins/pod/milvus-gpu-version-build-env-pod.yaml
@ -21,11 +21,11 @@ spec:
    tty: true
    resources:
      limits:
-        memory: "32Gi"
+        memory: "12Gi"
        cpu: "8.0"
        nvidia.com/gpu: 1
      requests:
-        memory: "16Gi"
+        memory: "8Gi"
        cpu: "4.0"
  - name: milvus-mysql
    image: mysql:5.6
--- a/ci/jenkins/step/build.groovy
+++ b/ci/jenkins/step/build.groovy
@ -3,9 +3,9 @@ timeout(time: 60, unit: 'MINUTES') {
        withCredentials([usernamePassword(credentialsId: "${params.JFROG_CREDENTIALS_ID}", usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
            def checkResult = sh(script: "./check_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache", returnStatus: true)
            if ("${env.BINRARY_VERSION}" == "gpu") {
-                sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -g -u -c"
+                sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -g -x -u -c"
            } else {
-                sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -m -u -c"
+                sh ". ./before-install.sh && ./build.sh -t ${params.BUILD_TYPE} -o /opt/milvus -l -u -c"
            }
            sh "./update_ccache.sh -l ${params.JFROG_ARTFACTORY_URL}/ccache -u ${USERNAME} -p ${PASSWORD}"
        }
--- a/ci/jenkins/step/internalCoverage.groovy
+++ b/ci/jenkins/step/internalCoverage.groovy
@ -0,0 +1,6 @@
+timeout(time: 30, unit: 'MINUTES') {
+    dir ("ci/scripts") {
+        sh "./coverage.sh -o /opt/milvus -u root -p 123456 -t \$POD_IP"
+    }
+}
+
--- a/ci/scripts/check_ccache.sh
+++ b/ci/scripts/check_ccache.sh
@ -41,12 +41,12 @@ if [[ -z "${ARTIFACTORY_URL}" || "${ARTIFACTORY_URL}" == "" ]];then
    exit 1
 fi

-for BRANCH_NAME in ${BRANCH_NAMES}
-do
-    echo "fetching ${BRANCH_NAME}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz"
-    wget -q --method HEAD "${ARTIFACTORY_URL}/${BRANCH_NAME}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz"
+check_ccache() {
+    BRANCH=$1
+    echo "fetching ${BRANCH}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz"
+    wget -q --method HEAD "${ARTIFACTORY_URL}/${BRANCH}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz"
    if [[ $? == 0 ]];then
-        wget "${ARTIFACTORY_URL}/${BRANCH_NAME}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz" && \
+        wget -q "${ARTIFACTORY_URL}/${BRANCH}/ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz" && \
        mkdir -p ${CCACHE_DIRECTORY} && \
        tar zxf ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz -C ${CCACHE_DIRECTORY} && \
        rm ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz
@ -55,6 +55,18 @@ do
            exit 0
        fi
    fi
+}
+
+if [[ -n "${CHANGE_BRANCH}" && "${BRANCH_NAME}" =~ "PR-" ]];then
+    check_ccache ${CHANGE_BRANCH}
+    check_ccache ${BRANCH_NAME}
+fi
+
+for CURRENT_BRANCH in ${BRANCH_NAMES}
+do
+    if [[ "${CURRENT_BRANCH}" != "HEAD" ]];then
+        check_ccache ${CURRENT_BRANCH}
+    fi
 done

 echo "could not download cache" && exit 1
--- a/ci/scripts/update_ccache.sh
+++ b/ci/scripts/update_ccache.sh
@ -54,14 +54,18 @@ fi
 PACKAGE_FILE="ccache-${OS_NAME}-${CODE_NAME}-${BUILD_ENV_DOCKER_IMAGE_ID}.tar.gz"
 REMOTE_PACKAGE_PATH="${ARTIFACTORY_URL}/${BRANCH_NAME}"

-echo "Updating ccache package file: ${PACKAGE_FILE}"
-tar zcf ./${PACKAGE_FILE} -C ${HOME}/.ccache .
-echo "Uploading ccache package file ${PACKAGE_FILE} to ${REMOTE_PACKAGE_PATH}"
-curl -u${ARTIFACTORY_USER}:${ARTIFACTORY_PASSWORD} -T ${PACKAGE_FILE} ${REMOTE_PACKAGE_PATH}/${PACKAGE_FILE}
-if [[ $? == 0 ]];then
-    echo "Uploading ccache package file success !"
-    exit 0
-else
-    echo "Uploading ccache package file fault !"
-    exit 1
+ccache --show-stats
+
+if [[ "${BRANCH_NAME}" != "HEAD" ]];then
+    echo "Updating ccache package file: ${PACKAGE_FILE}"
+    tar zcf ./${PACKAGE_FILE} -C ${HOME}/.ccache .
+    echo "Uploading ccache package file ${PACKAGE_FILE} to ${REMOTE_PACKAGE_PATH}"
+    curl -u${ARTIFACTORY_USER}:${ARTIFACTORY_PASSWORD} -T ${PACKAGE_FILE} ${REMOTE_PACKAGE_PATH}/${PACKAGE_FILE}
+    if [[ $? == 0 ]];then
+        echo "Uploading ccache package file success !"
+        exit 0
+    else
+        echo "Uploading ccache package file fault !"
+        exit 1
+    fi
 fi
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@ -35,15 +35,15 @@ if (NOT DEFINED CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build.")
 endif ()

-set (GIT_BRANCH_NAME_REGEX "[0-9]+\\.[0-9]+\\.[0-9]")
+set(GIT_BRANCH_NAME_REGEX "[0-9]+\\.[0-9]+\\.[0-9]")

 MACRO(GET_GIT_BRANCH_NAME GIT_BRANCH_NAME)
    execute_process(COMMAND sh "-c" "git log --decorate | head -n 1 | sed 's/.*(\\(.*\\))/\\1/' | sed 's/.*, //' | sed 's=[a-zA-Z]*\/==g'"
            OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
-    if(NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
+    if (NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
        execute_process(COMMAND "git" rev-parse --abbrev-ref HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
    endif ()
-    if(NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
+    if (NOT GIT_BRANCH_NAME MATCHES "${GIT_BRANCH_NAME_REGEX}")
        execute_process(COMMAND "git" symbolic-ref --short -q HEAD HEAD OUTPUT_VARIABLE ${GIT_BRANCH_NAME})
    endif ()
 ENDMACRO(GET_GIT_BRANCH_NAME)
@ -79,7 +79,7 @@ if (MILVUS_VERSION_MAJOR STREQUAL ""
        OR MILVUS_VERSION_PATCH STREQUAL "")
    message(WARNING "Failed to determine Milvus version from git branch name")
    set(MILVUS_VERSION "0.6.0")
-endif()
+endif ()

 message(STATUS "Build version = ${MILVUS_VERSION}")
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/version.h.in ${CMAKE_CURRENT_SOURCE_DIR}/src/version.h @ONLY)
@ -141,7 +141,11 @@ if (MILVUS_USE_CCACHE)
    endif (CCACHE_FOUND)
 endif ()

-set(MILVUS_CPU_VERSION false)
+if (CUSTOMIZATION)
+    set(MILVUS_GPU_VERSION ON)
+    add_compile_definitions(CUSTOMIZATION)
+endif ()
+
 if (MILVUS_GPU_VERSION)
    message(STATUS "Building Milvus GPU version")
    add_compile_definitions("MILVUS_GPU_VERSION")
@ -150,8 +154,6 @@ if (MILVUS_GPU_VERSION)
    set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES --expt-extended-lambda")
 else ()
    message(STATUS "Building Milvus CPU version")
-    set(MILVUS_CPU_VERSION true)
-    add_compile_definitions("MILVUS_CPU_VERSION")
 endif ()

 if (MILVUS_WITH_PROMETHEUS)
@ -170,10 +172,6 @@ else ()
    endif ()
 endif ()

-if (CUSTOMIZATION)
-    add_definitions(-DCUSTOMIZATION)
-endif (CUSTOMIZATION)
-
 config_summary()
 add_subdirectory(src)

@ -187,7 +185,7 @@ endif ()
 add_custom_target(Clean-All COMMAND ${CMAKE_BUILD_TOOL} clean)

 if ("${MILVUS_DB_PATH}" STREQUAL "")
-    set(MILVUS_DB_PATH "/tmp/milvus")
+    set(MILVUS_DB_PATH "${CMAKE_INSTALL_PREFIX}")
 endif ()

 if (MILVUS_GPU_VERSION)
@ -204,6 +202,11 @@ install(DIRECTORY scripts/
        GROUP_EXECUTE GROUP_READ
        WORLD_EXECUTE WORLD_READ
        FILES_MATCHING PATTERN "*.sh")
+install(DIRECTORY scripts/migration
+        DESTINATION scripts
+        FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
+        GROUP_EXECUTE GROUP_READ
+        WORLD_EXECUTE WORLD_READ)
 install(FILES
        conf/server_config.yaml
        conf/log_config.conf
--- a/core/cmake/DefineOptions.cmake
+++ b/core/cmake/DefineOptions.cmake
@ -41,10 +41,12 @@ macro(define_option_string name description default)
 endmacro()

 #----------------------------------------------------------------------
-set_option_category("GPU version")
+set_option_category("Milvus Build Option")

 define_option(MILVUS_GPU_VERSION "Build GPU version" OFF)

+define_option(CUSTOMIZATION "Build with customized FAISS library" OFF)
+
 #----------------------------------------------------------------------
 set_option_category("Thirdparty")

--- a/core/conf/server_cpu_config.template
+++ b/core/conf/server_cpu_config.template
@ -1,5 +1,7 @@
 # Default values are used when you make no changes to the following parameters.

+version: 0.1                        # config version
+
 server_config:
  address: 0.0.0.0                  # milvus server ip address (IPv4)
  port: 19530                       # milvus server port, must in range [1025, 65534]
--- a/core/conf/server_gpu_config.template
+++ b/core/conf/server_gpu_config.template
@ -1,5 +1,7 @@
 # Default values are used when you make no changes to the following parameters.

+version: 0.1                        # config version
+
 server_config:
  address: 0.0.0.0                  # milvus server ip address (IPv4)
  port: 19530                       # milvus server port, must in range [1025, 65534]
--- a/core/src/cache/Cache.inl
+++ b/core/src/cache/Cache.inl
@ -99,8 +99,8 @@ Cache<ItemObj>::insert(const std::string& key, const ItemObj& item) {
        std::lock_guard<std::mutex> lock(mutex_);

        lru_.put(key, item);
-        SERVER_LOG_DEBUG << "Insert " << key << " size:" << item->Size() << " bytes into cache, usage: " << usage_
-                         << " bytes";
+        SERVER_LOG_DEBUG << "Insert " << key << " size: " << item->Size() << " bytes into cache, usage: " << usage_
+                         << " bytes," << " capacity: " << capacity_ << " bytes";
    }
 }

@ -115,7 +115,8 @@ Cache<ItemObj>::erase(const std::string& key) {
    const ItemObj& old_item = lru_.get(key);
    usage_ -= old_item->Size();

-    SERVER_LOG_DEBUG << "Erase " << key << " size: " << old_item->Size();
+    SERVER_LOG_DEBUG << "Erase " << key << " size: " << old_item->Size() << " bytes from cache, usage: " << usage_
+                     << " bytes," << " capacity: " << capacity_ << " bytes";

    lru_.erase(key);
 }
--- a/core/src/db/DBImpl.cpp
+++ b/core/src/db/DBImpl.cpp
@ -41,6 +41,7 @@
 #include <iostream>
 #include <set>
 #include <thread>
+#include <utility>

 namespace milvus {
 namespace engine {
@ -51,6 +52,8 @@ constexpr uint64_t METRIC_ACTION_INTERVAL = 1;
 constexpr uint64_t COMPACT_ACTION_INTERVAL = 1;
 constexpr uint64_t INDEX_ACTION_INTERVAL = 1;

+constexpr uint64_t INDEX_FAILED_RETRY_TIME = 1;
+
 static const Status SHUTDOWN_ERROR = Status(DB_ERROR, "Milsvus server is shutdown!");

 void
@ -112,7 +115,7 @@ DBImpl::Stop() {
    bg_timer_thread_.join();

    if (options_.mode_ != DBOptions::MODE::CLUSTER_READONLY) {
-        meta_ptr_->CleanUp();
+        meta_ptr_->CleanUpShadowFiles();
    }

    // ENGINE_LOG_TRACE << "DB service stop";
@ -179,7 +182,7 @@ DBImpl::PreloadTable(const std::string& table_id) {
        return SHUTDOWN_ERROR;
    }

-    // get all table files from parent table
+    // step 1: get all table files from parent table
    meta::DatesT dates;
    std::vector<size_t> ids;
    meta::TableFilesSchema files_array;
@ -188,7 +191,7 @@ DBImpl::PreloadTable(const std::string& table_id) {
        return status;
    }

-    // get files from partition tables
+    // step 2: get files from partition tables
    std::vector<meta::TableSchema> partiton_array;
    status = meta_ptr_->ShowPartitions(table_id, partiton_array);
    for (auto& schema : partiton_array) {
@ -200,6 +203,10 @@ DBImpl::PreloadTable(const std::string& table_id) {
    int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
    int64_t available_size = cache_total - cache_usage;

+    // step 3: load file one by one
+    ENGINE_LOG_DEBUG << "Begin pre-load table:" + table_id + ", totally " << files_array.size()
+                     << " files need to be pre-loaded";
+    TimeRecorderAuto rc("Pre-load table:" + table_id);
    for (auto& file : files_array) {
        ExecutionEnginePtr engine = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_,
                                                         (MetricType)file.metric_type_, file.nlist_);
@ -210,10 +217,12 @@ DBImpl::PreloadTable(const std::string& table_id) {

        size += engine->PhysicalSize();
        if (size > available_size) {
+            ENGINE_LOG_DEBUG << "Pre-load canceled since cache almost full";
            return Status(SERVER_CACHE_FULL, "Cache is full");
        } else {
            try {
-                // step 1: load index
+                std::string msg = "Pre-loaded file: " + file.file_id_ + " size: " + std::to_string(file.file_size_);
+                TimeRecorderAuto rc_1(msg);
                engine->Load(true);
            } catch (std::exception& ex) {
                std::string msg = "Pre-load table encounter exception: " + std::string(ex.what());
@ -361,6 +370,7 @@ DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) {
    WaitMergeFileFinish();

    // step 4: wait and build index
+    status = CleanFailedIndexFileOfTable(table_id);
    status = BuildTableIndexRecursively(table_id, index);

    return status;
@ -777,11 +787,18 @@ DBImpl::BackgroundCompaction(std::set<std::string> table_ids) {

    meta_ptr_->Archive();

-    int ttl = 5 * meta::M_SEC;  // default: file will be deleted after 5 minutes
-    if (options_.mode_ == DBOptions::MODE::CLUSTER_WRITABLE) {
-        ttl = meta::D_SEC;
+    {
+        uint64_t ttl = 10 * meta::SECOND;  // default: file data will be erase from cache after few seconds
+        meta_ptr_->CleanUpCacheWithTTL(ttl);
+    }
+
+    {
+        uint64_t ttl = 5 * meta::M_SEC;  // default: file will be deleted after few minutes
+        if (options_.mode_ == DBOptions::MODE::CLUSTER_WRITABLE) {
+            ttl = meta::D_SEC;
+        }
+        meta_ptr_->CleanUpFilesWithTTL(ttl);
    }
-    meta_ptr_->CleanUpFilesWithTTL(ttl);

    // ENGINE_LOG_TRACE << " Background compaction thread exit";
 }
@ -821,22 +838,35 @@ DBImpl::BackgroundBuildIndex() {
    std::unique_lock<std::mutex> lock(build_index_mutex_);
    meta::TableFilesSchema to_index_files;
    meta_ptr_->FilesToIndex(to_index_files);
-    Status status;
+    Status status = IgnoreFailedIndexFiles(to_index_files);

    if (!to_index_files.empty()) {
-        scheduler::BuildIndexJobPtr job = std::make_shared<scheduler::BuildIndexJob>(meta_ptr_, options_);
-
        // step 2: put build index task to scheduler
+        std::vector<std::pair<scheduler::BuildIndexJobPtr, scheduler::TableFileSchemaPtr>> job2file_map;
        for (auto& file : to_index_files) {
+            scheduler::BuildIndexJobPtr job = std::make_shared<scheduler::BuildIndexJob>(meta_ptr_, options_);
            scheduler::TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
            job->AddToIndexFiles(file_ptr);
+            scheduler::JobMgrInst::GetInstance()->Put(job);
+            job2file_map.push_back(std::make_pair(job, file_ptr));
        }
-        scheduler::JobMgrInst::GetInstance()->Put(job);
-        job->WaitBuildIndexFinish();
-        if (!job->GetStatus().ok()) {
-            Status status = job->GetStatus();
-            ENGINE_LOG_ERROR << "Building index failed: " << status.ToString();
+
+        for (auto iter = job2file_map.begin(); iter != job2file_map.end(); ++iter) {
+            scheduler::BuildIndexJobPtr job = iter->first;
+            meta::TableFileSchema& file_schema = *(iter->second.get());
+            job->WaitBuildIndexFinish();
+            if (!job->GetStatus().ok()) {
+                Status status = job->GetStatus();
+                ENGINE_LOG_ERROR << "Building index job " << job->id() << " failed: " << status.ToString();
+
+                MarkFailedIndexFile(file_schema);
+            } else {
+                MarkSucceedIndexFile(file_schema);
+                ENGINE_LOG_DEBUG << "Building index job " << job->id() << " succeed.";
+            }
        }
+
+        ENGINE_LOG_DEBUG << "Background build index thread finished";
    }

    // ENGINE_LOG_TRACE << "Background build index thread exit";
@ -904,6 +934,7 @@ DBImpl::DropTableRecursively(const std::string& table_id, const meta::DatesT& da
    if (dates.empty()) {
        status = mem_mgr_->EraseMemVector(table_id);  // not allow insert
        status = meta_ptr_->DropTable(table_id);      // soft delete table
+        CleanFailedIndexFileOfTable(table_id);

        // scheduler will determine when to delete table files
        auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource();
@ -982,6 +1013,8 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
        std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10 * 1000, times * 100)));
        GetFilesToBuildIndex(table_id, file_types, table_files);
        times++;
+
+        IgnoreFailedIndexFiles(table_files);
    }

    // build index for partition
@ -994,12 +1027,27 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
        }
    }

+    // failed to build index for some files, return error
+    std::vector<std::string> failed_files;
+    GetFailedIndexFileOfTable(table_id, failed_files);
+    if (!failed_files.empty()) {
+        std::string msg = "Failed to build index for " + std::to_string(failed_files.size()) +
+                          ((failed_files.size() == 1) ? " file" : " files");
+#ifdef MILVUS_GPU_VERSION
+        msg += ", file size is too large or gpu memory is not enough.";
+#else
+        msg += ", please double check index parameters.";
+#endif
+        return Status(DB_ERROR, msg);
+    }
+
    return Status::OK();
 }

 Status
 DBImpl::DropTableIndexRecursively(const std::string& table_id) {
    ENGINE_LOG_DEBUG << "Drop index for table: " << table_id;
+    CleanFailedIndexFileOfTable(table_id);
    auto status = meta_ptr_->DropTableIndex(table_id);
    if (!status.ok()) {
        return status;
@ -1042,5 +1090,86 @@ DBImpl::GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_c
    return Status::OK();
 }

+Status
+DBImpl::CleanFailedIndexFileOfTable(const std::string& table_id) {
+    std::lock_guard<std::mutex> lck(index_failed_mutex_);
+    index_failed_files_.erase(table_id);  // rebuild failed index files for this table
+
+    return Status::OK();
+}
+
+Status
+DBImpl::GetFailedIndexFileOfTable(const std::string& table_id, std::vector<std::string>& failed_files) {
+    failed_files.clear();
+    std::lock_guard<std::mutex> lck(index_failed_mutex_);
+    auto iter = index_failed_files_.find(table_id);
+    if (iter != index_failed_files_.end()) {
+        FileID2FailedTimes& failed_map = iter->second;
+        for (auto it_file = failed_map.begin(); it_file != failed_map.end(); ++it_file) {
+            failed_files.push_back(it_file->first);
+        }
+    }
+
+    return Status::OK();
+}
+
+Status
+DBImpl::MarkFailedIndexFile(const meta::TableFileSchema& file) {
+    std::lock_guard<std::mutex> lck(index_failed_mutex_);
+
+    auto iter = index_failed_files_.find(file.table_id_);
+    if (iter == index_failed_files_.end()) {
+        FileID2FailedTimes failed_files;
+        failed_files.insert(std::make_pair(file.file_id_, 1));
+        index_failed_files_.insert(std::make_pair(file.table_id_, failed_files));
+    } else {
+        auto it_failed_files = iter->second.find(file.file_id_);
+        if (it_failed_files != iter->second.end()) {
+            it_failed_files->second++;
+        } else {
+            iter->second.insert(std::make_pair(file.file_id_, 1));
+        }
+    }
+
+    return Status::OK();
+}
+
+Status
+DBImpl::MarkSucceedIndexFile(const meta::TableFileSchema& file) {
+    std::lock_guard<std::mutex> lck(index_failed_mutex_);
+
+    auto iter = index_failed_files_.find(file.table_id_);
+    if (iter != index_failed_files_.end()) {
+        iter->second.erase(file.file_id_);
+    }
+
+    return Status::OK();
+}
+
+Status
+DBImpl::IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files) {
+    std::lock_guard<std::mutex> lck(index_failed_mutex_);
+
+    // there could be some failed files belong to different table.
+    // some files may has failed for several times, no need to build index for these files.
+    // thus we can avoid dead circle for build index operation
+    for (auto it_file = table_files.begin(); it_file != table_files.end();) {
+        auto it_failed_files = index_failed_files_.find((*it_file).table_id_);
+        if (it_failed_files != index_failed_files_.end()) {
+            auto it_failed_file = it_failed_files->second.find((*it_file).file_id_);
+            if (it_failed_file != it_failed_files->second.end()) {
+                if (it_failed_file->second >= INDEX_FAILED_RETRY_TIME) {
+                    it_file = table_files.erase(it_file);
+                    continue;
+                }
+            }
+        }
+
+        ++it_file;
+    }
+
+    return Status::OK();
+}
+
 }  // namespace engine
 }  // namespace milvus
--- a/core/src/db/DBImpl.h
+++ b/core/src/db/DBImpl.h
@ -25,6 +25,7 @@
 #include <atomic>
 #include <condition_variable>
 #include <list>
+#include <map>
 #include <memory>
 #include <mutex>
 #include <set>
@ -35,8 +36,6 @@
 namespace milvus {
 namespace engine {

-class Env;
-
 namespace meta {
 class Meta;
 }
@ -179,6 +178,21 @@ class DBImpl : public DB {
    Status
    GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_count);

+    Status
+    CleanFailedIndexFileOfTable(const std::string& table_id);
+
+    Status
+    GetFailedIndexFileOfTable(const std::string& table_id, std::vector<std::string>& failed_files);
+
+    Status
+    MarkFailedIndexFile(const meta::TableFileSchema& file);
+
+    Status
+    MarkSucceedIndexFile(const meta::TableFileSchema& file);
+
+    Status
+    IgnoreFailedIndexFiles(meta::TableFilesSchema& table_files);
+
 private:
    const DBOptions options_;

@ -200,7 +214,11 @@ class DBImpl : public DB {
    std::list<std::future<void>> index_thread_results_;

    std::mutex build_index_mutex_;
-};  // DBImpl
+    std::mutex index_failed_mutex_;
+    using FileID2FailedTimes = std::map<std::string, uint64_t>;
+    using Table2FailedFiles = std::map<std::string, FileID2FailedTimes>;
+    Table2FailedFiles index_failed_files_;  // file id mapping to failed times
+};                                          // DBImpl

 }  // namespace engine
 }  // namespace milvus
--- a/core/src/db/Utils.cpp
+++ b/core/src/db/Utils.cpp
@ -154,7 +154,9 @@ GetTableFilePath(const DBMetaOptions& options, meta::TableFileSchema& table_file
    }

    std::string msg = "Table file doesn't exist: " + file_path;
-    ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_;
+    if (table_file.file_size_ > 0) {  // no need to pop error for empty file
+        ENGINE_LOG_ERROR << msg << " in path: " << options.path_ << " for table: " << table_file.table_id_;
+    }

    return Status(DB_ERROR, msg);
 }
--- a/core/src/db/engine/ExecutionEngine.h
+++ b/core/src/db/engine/ExecutionEngine.h
@ -77,8 +77,8 @@ class ExecutionEngine {
    virtual Status
    CopyToCpu() = 0;

-    virtual std::shared_ptr<ExecutionEngine>
-    Clone() = 0;
+    //    virtual std::shared_ptr<ExecutionEngine>
+    //    Clone() = 0;

    virtual Status
    Merge(const std::string& location) = 0;
--- a/core/src/db/engine/ExecutionEngineImpl.cpp
+++ b/core/src/db/engine/ExecutionEngineImpl.cpp
@ -93,18 +93,18 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) {
            break;
        }
        case EngineType::FAISS_IVFFLAT: {
-#ifdef MILVUS_CPU_VERSION
-            index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU);
-#else
+#ifdef MILVUS_GPU_VERSION
            index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_MIX);
+#else
+            index = GetVecIndexFactory(IndexType::FAISS_IVFFLAT_CPU);
 #endif
            break;
        }
        case EngineType::FAISS_IVFSQ8: {
-#ifdef MILVUS_CPU_VERSION
-            index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_CPU);
-#else
+#ifdef MILVUS_GPU_VERSION
            index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_MIX);
+#else
+            index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_CPU);
 #endif
            break;
        }
@ -112,15 +112,17 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) {
            index = GetVecIndexFactory(IndexType::NSG_MIX);
            break;
        }
+#ifdef CUSTOMIZATION
        case EngineType::FAISS_IVFSQ8H: {
            index = GetVecIndexFactory(IndexType::FAISS_IVFSQ8_HYBRID);
            break;
        }
+#endif
        case EngineType::FAISS_PQ: {
-#ifdef MILVUS_CPU_VERSION
-            index = GetVecIndexFactory(IndexType::FAISS_IVFPQ_CPU);
-#else
+#ifdef MILVUS_GPU_VERSION
            index = GetVecIndexFactory(IndexType::FAISS_IVFPQ_MIX);
+#else
+            index = GetVecIndexFactory(IndexType::FAISS_IVFPQ_CPU);
 #endif
            break;
        }
@ -257,6 +259,11 @@ ExecutionEngineImpl::PhysicalSize() const {
 Status
 ExecutionEngineImpl::Serialize() {
    auto status = write_index(index_, location_);
+
+    // here we reset index size by file size,
+    // since some index type(such as SQ8) data size become smaller after serialized
+    index_->set_size(PhysicalSize());
+
    return status;
 }

@ -410,18 +417,18 @@ ExecutionEngineImpl::CopyToCpu() {
    return Status::OK();
 }

-ExecutionEnginePtr
-ExecutionEngineImpl::Clone() {
-    if (index_ == nullptr) {
-        ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to clone";
-        return nullptr;
-    }
-
-    auto ret = std::make_shared<ExecutionEngineImpl>(dim_, location_, index_type_, metric_type_, nlist_);
-    ret->Init();
-    ret->index_ = index_->Clone();
-    return ret;
-}
+// ExecutionEnginePtr
+// ExecutionEngineImpl::Clone() {
+//    if (index_ == nullptr) {
+//        ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to clone";
+//        return nullptr;
+//    }
+//
+//    auto ret = std::make_shared<ExecutionEngineImpl>(dim_, location_, index_type_, metric_type_, nlist_);
+//    ret->Init();
+//    ret->index_ = index_->Clone();
+//    return ret;
+//}

 Status
 ExecutionEngineImpl::Merge(const std::string& location) {
@ -604,6 +611,9 @@ ExecutionEngineImpl::Init() {
    server::Config& config = server::Config::GetInstance();
    std::vector<int64_t> gpu_ids;
    Status s = config.GetGpuResourceConfigBuildIndexResources(gpu_ids);
+    if (!s.ok()) {
+        gpu_num_ = knowhere::INVALID_VALUE;
+    }
    for (auto id : gpu_ids) {
        if (gpu_num_ == id) {
            return Status::OK();
--- a/core/src/db/engine/ExecutionEngineImpl.h
+++ b/core/src/db/engine/ExecutionEngineImpl.h
@ -64,8 +64,8 @@ class ExecutionEngineImpl : public ExecutionEngine {
    Status
    CopyToCpu() override;

-    ExecutionEnginePtr
-    Clone() override;
+    //    ExecutionEnginePtr
+    //    Clone() override;

    Status
    Merge(const std::string& location) override;
--- a/core/src/db/meta/Meta.h
+++ b/core/src/db/meta/Meta.h
@ -118,9 +118,13 @@ class Meta {
    Archive() = 0;

    virtual Status
-    CleanUp() = 0;
+    CleanUpShadowFiles() = 0;

-    virtual Status CleanUpFilesWithTTL(uint16_t) = 0;
+    virtual Status
+    CleanUpCacheWithTTL(uint64_t seconds) = 0;
+
+    virtual Status
+    CleanUpFilesWithTTL(uint64_t seconds) = 0;

    virtual Status
    DropAll() = 0;
--- a/core/src/db/meta/MySQLMetaImpl.cpp
+++ b/core/src/db/meta/MySQLMetaImpl.cpp
@ -20,6 +20,7 @@
 #include "db/IDGenerator.h"
 #include "db/Utils.h"
 #include "metrics/Metrics.h"
+#include "utils/CommonUtil.h"
 #include "utils/Exception.h"
 #include "utils/Log.h"
 #include "utils/StringHelpFunctions.h"
@ -289,45 +290,50 @@ MySQLMetaImpl::Initialize() {
    // step 4: validate to avoid open old version schema
    ValidateMetaSchema();

-    // step 5: create meta tables
-    try {
-        if (mode_ != DBOptions::MODE::CLUSTER_READONLY) {
-            CleanUp();
-        }
+    // step 5: clean shadow files
+    if (mode_ != DBOptions::MODE::CLUSTER_READONLY) {
+        CleanUpShadowFiles();
+    }

-        {
-            mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);
+    // step 6: try connect mysql server
+    mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);

-            if (connectionPtr == nullptr) {
-                return Status(DB_ERROR, "Failed to connect to meta server(mysql)");
-            }
+    if (connectionPtr == nullptr) {
+        std::string msg = "Failed to connect MySQL meta server: " + uri;
+        ENGINE_LOG_ERROR << msg;
+        throw Exception(DB_INVALID_META_URI, msg);
+    }

-            if (!connectionPtr->thread_aware()) {
-                ENGINE_LOG_ERROR << "MySQL++ wasn't built with thread awareness! Can't run without it.";
-                return Status(DB_ERROR, "MySQL++ wasn't built with thread awareness! Can't run without it.");
-            }
-            mysqlpp::Query InitializeQuery = connectionPtr->query();
+    if (!connectionPtr->thread_aware()) {
+        std::string msg =
+            "Failed to initialize MySQL meta backend: MySQL client component wasn't built with thread awareness";
+        ENGINE_LOG_ERROR << msg;
+        throw Exception(DB_INVALID_META_URI, msg);
+    }

-            InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLES_SCHEMA.name() << " ("
-                            << TABLES_SCHEMA.ToString() + ");";
+    // step 7: create meta table Tables
+    mysqlpp::Query InitializeQuery = connectionPtr->query();

-            ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();
+    InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLES_SCHEMA.name() << " (" << TABLES_SCHEMA.ToString() + ");";

-            if (!InitializeQuery.exec()) {
-                return HandleException("Initialization Error", InitializeQuery.error());
-            }
+    ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();

-            InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLEFILES_SCHEMA.name() << " ("
-                            << TABLEFILES_SCHEMA.ToString() + ");";
+    if (!InitializeQuery.exec()) {
+        std::string msg = "Failed to create meta table 'Tables' in MySQL";
+        ENGINE_LOG_ERROR << msg;
+        throw Exception(DB_META_TRANSACTION_FAILED, msg);
+    }

-            ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();
+    // step 8: create meta table TableFiles
+    InitializeQuery << "CREATE TABLE IF NOT EXISTS " << TABLEFILES_SCHEMA.name() << " ("
+                    << TABLEFILES_SCHEMA.ToString() + ");";

-            if (!InitializeQuery.exec()) {
-                return HandleException("Initialization Error", InitializeQuery.error());
-            }
-        }  // Scoped Connection
-    } catch (std::exception& e) {
-        return HandleException("GENERAL ERROR DURING INITIALIZATION", e.what());
+    ENGINE_LOG_DEBUG << "MySQLMetaImpl::Initialize: " << InitializeQuery.str();
+
+    if (!InitializeQuery.exec()) {
+        std::string msg = "Failed to create meta table 'TableFiles' in MySQL";
+        ENGINE_LOG_ERROR << msg;
+        throw Exception(DB_META_TRANSACTION_FAILED, msg);
    }

    return Status::OK();
@ -1609,10 +1615,35 @@ MySQLMetaImpl::FilesByType(const std::string& table_id, const std::vector<int>&
                }
            }

-            ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count
-                             << " new files:" << new_count << " new_merge files:" << new_merge_count
-                             << " new_index files:" << new_index_count << " to_index files:" << to_index_count
-                             << " index files:" << index_count << " backup files:" << backup_count;
+            std::string msg = "Get table files by type.";
+            for (int file_type : file_types) {
+                switch (file_type) {
+                    case (int)TableFileSchema::RAW:
+                        msg = msg + " raw files:" + std::to_string(raw_count);
+                        break;
+                    case (int)TableFileSchema::NEW:
+                        msg = msg + " new files:" + std::to_string(new_count);
+                        break;
+                    case (int)TableFileSchema::NEW_MERGE:
+                        msg = msg + " new_merge files:" + std::to_string(new_merge_count);
+                        break;
+                    case (int)TableFileSchema::NEW_INDEX:
+                        msg = msg + " new_index files:" + std::to_string(new_index_count);
+                        break;
+                    case (int)TableFileSchema::TO_INDEX:
+                        msg = msg + " to_index files:" + std::to_string(to_index_count);
+                        break;
+                    case (int)TableFileSchema::INDEX:
+                        msg = msg + " index files:" + std::to_string(index_count);
+                        break;
+                    case (int)TableFileSchema::BACKUP:
+                        msg = msg + " backup files:" + std::to_string(backup_count);
+                        break;
+                    default:
+                        break;
+                }
+            }
+            ENGINE_LOG_DEBUG << msg;
        }
    } catch (std::exception& e) {
        return HandleException("GENERAL ERROR WHEN GET FILE BY TYPE", e.what());
@ -1710,7 +1741,7 @@ MySQLMetaImpl::Size(uint64_t& result) {
 }

 Status
-MySQLMetaImpl::CleanUp() {
+MySQLMetaImpl::CleanUpShadowFiles() {
    try {
        mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);

@ -1752,7 +1783,49 @@ MySQLMetaImpl::CleanUp() {
 }

 Status
-MySQLMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) {
+MySQLMetaImpl::CleanUpCacheWithTTL(uint64_t seconds) {
+    auto now = utils::GetMicroSecTimeStamp();
+
+    // erase deleted/backup files from cache
+    try {
+        server::MetricCollector metric;
+
+        mysqlpp::ScopedConnection connectionPtr(*mysql_connection_pool_, safe_grab_);
+
+        if (connectionPtr == nullptr) {
+            return Status(DB_ERROR, "Failed to connect to meta server(mysql)");
+        }
+
+        mysqlpp::Query cleanUpFilesWithTTLQuery = connectionPtr->query();
+        cleanUpFilesWithTTLQuery << "SELECT id, table_id, file_id, date"
+                                 << " FROM " << META_TABLEFILES << " WHERE file_type IN ("
+                                 << std::to_string(TableFileSchema::TO_DELETE) << ","
+                                 << std::to_string(TableFileSchema::BACKUP) << ")"
+                                 << " AND updated_time < " << std::to_string(now - seconds * US_PS) << ";";
+
+        mysqlpp::StoreQueryResult res = cleanUpFilesWithTTLQuery.store();
+
+        TableFileSchema table_file;
+        std::vector<std::string> idsToDelete;
+
+        for (auto& resRow : res) {
+            table_file.id_ = resRow["id"];  // implicit conversion
+            resRow["table_id"].to_string(table_file.table_id_);
+            resRow["file_id"].to_string(table_file.file_id_);
+            table_file.date_ = resRow["date"];
+
+            utils::GetTableFilePath(options_, table_file);
+            server::CommonUtil::EraseFromCache(table_file.location_);
+        }
+    } catch (std::exception& e) {
+        return HandleException("GENERAL ERROR WHEN CLEANING UP FILES WITH TTL", e.what());
+    }
+
+    return Status::OK();
+}
+
+Status
+MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds) {
    auto now = utils::GetMicroSecTimeStamp();
    std::set<std::string> table_ids;

--- a/core/src/db/meta/MySQLMetaImpl.h
+++ b/core/src/db/meta/MySQLMetaImpl.h
@ -117,10 +117,13 @@ class MySQLMetaImpl : public Meta {
    Size(uint64_t& result) override;

    Status
-    CleanUp() override;
+    CleanUpShadowFiles() override;

    Status
-    CleanUpFilesWithTTL(uint16_t seconds) override;
+    CleanUpCacheWithTTL(uint64_t seconds) override;
+
+    Status
+    CleanUpFilesWithTTL(uint64_t seconds) override;

    Status
    DropAll() override;
--- a/core/src/db/meta/SqliteMetaImpl.cpp
+++ b/core/src/db/meta/SqliteMetaImpl.cpp
@ -20,6 +20,7 @@
 #include "db/IDGenerator.h"
 #include "db/Utils.h"
 #include "metrics/Metrics.h"
+#include "utils/CommonUtil.h"
 #include "utils/Exception.h"
 #include "utils/Log.h"
 #include "utils/StringHelpFunctions.h"
@ -154,7 +155,7 @@ SqliteMetaImpl::Initialize() {
    ConnectorPtr->open_forever();                          // thread safe option
    ConnectorPtr->pragma.journal_mode(journal_mode::WAL);  // WAL => write ahead log

-    CleanUp();
+    CleanUpShadowFiles();

    return Status::OK();
 }
@ -1156,10 +1157,34 @@ SqliteMetaImpl::FilesByType(const std::string& table_id,
                table_files.emplace_back(file_schema);
            }

-            ENGINE_LOG_DEBUG << "Table " << table_id << " currently has raw files:" << raw_count
-                             << " new files:" << new_count << " new_merge files:" << new_merge_count
-                             << " new_index files:" << new_index_count << " to_index files:" << to_index_count
-                             << " index files:" << index_count << " backup files:" << backup_count;
+            std::string msg = "Get table files by type.";
+            for (int file_type : file_types) {
+                switch (file_type) {
+                    case (int)TableFileSchema::RAW:
+                        msg = msg + " raw files:" + std::to_string(raw_count);
+                        break;
+                    case (int)TableFileSchema::NEW:
+                        msg = msg + " new files:" + std::to_string(new_count);
+                        break;
+                    case (int)TableFileSchema::NEW_MERGE:
+                        msg = msg + " new_merge files:" + std::to_string(new_merge_count);
+                        break;
+                    case (int)TableFileSchema::NEW_INDEX:
+                        msg = msg + " new_index files:" + std::to_string(new_index_count);
+                        break;
+                    case (int)TableFileSchema::TO_INDEX:
+                        msg = msg + " to_index files:" + std::to_string(to_index_count);
+                        break;
+                    case (int)TableFileSchema::INDEX:
+                        msg = msg + " index files:" + std::to_string(index_count);
+                        break;
+                    case (int)TableFileSchema::BACKUP:
+                        msg = msg + " backup files:" + std::to_string(backup_count);
+                        break;
+                    default:break;
+                }
+            }
+            ENGINE_LOG_DEBUG << msg;
        }
    } catch (std::exception& e) {
        return HandleException("Encounter exception when check non index files", e.what());
@ -1231,7 +1256,7 @@ SqliteMetaImpl::Size(uint64_t& result) {
 }

 Status
-SqliteMetaImpl::CleanUp() {
+SqliteMetaImpl::CleanUpShadowFiles() {
    try {
        server::MetricCollector metric;

@ -1269,7 +1294,51 @@ SqliteMetaImpl::CleanUp() {
 }

 Status
-SqliteMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) {
+SqliteMetaImpl::CleanUpCacheWithTTL(uint64_t seconds) {
+    auto now = utils::GetMicroSecTimeStamp();
+
+    // erase deleted/backup files from cache
+    try {
+        server::MetricCollector metric;
+
+        // multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here
+        std::lock_guard<std::mutex> meta_lock(meta_mutex_);
+
+        std::vector<int> file_types = {
+            (int)TableFileSchema::TO_DELETE,
+            (int)TableFileSchema::BACKUP,
+        };
+
+        auto files = ConnectorPtr->select(columns(&TableFileSchema::id_,
+                                                  &TableFileSchema::table_id_,
+                                                  &TableFileSchema::file_id_,
+                                                  &TableFileSchema::date_),
+                                          where(
+                                              in(&TableFileSchema::file_type_, file_types)
+                                              and
+                                              c(&TableFileSchema::updated_time_)
+                                              < now - seconds * US_PS));
+
+        for (auto& file : files) {
+            TableFileSchema table_file;
+            table_file.id_ = std::get<0>(file);
+            table_file.table_id_ = std::get<1>(file);
+            table_file.file_id_ = std::get<2>(file);
+            table_file.date_ = std::get<3>(file);
+
+            utils::GetTableFilePath(options_, table_file);
+            server::CommonUtil::EraseFromCache(table_file.location_);
+        }
+
+    } catch (std::exception& e) {
+        return HandleException("Encounter exception when clean cache", e.what());
+    }
+
+    return Status::OK();
+}
+
+Status
+SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds) {
    auto now = utils::GetMicroSecTimeStamp();
    std::set<std::string> table_ids;

--- a/core/src/db/meta/SqliteMetaImpl.h
+++ b/core/src/db/meta/SqliteMetaImpl.h
@ -117,10 +117,13 @@ class SqliteMetaImpl : public Meta {
    Archive() override;

    Status
-    CleanUp() override;
+    CleanUpShadowFiles() override;

    Status
-    CleanUpFilesWithTTL(uint16_t seconds) override;
+    CleanUpCacheWithTTL(uint64_t seconds) override;
+
+    Status
+    CleanUpFilesWithTTL(uint64_t seconds) override;

    Status
    DropAll() override;
--- a/core/src/grpc/README.md
+++ b/core/src/grpc/README.md
@ -1,6 +0,0 @@
-We manually change two APIs in "milvus.pd.h":
-    add_vector_data()
-    add_row_id_array()
-    add_ids()
-    add_distances()
-If proto files need be generated again, remember to re-change above APIs.
--- a/core/src/index/CMakeLists.txt
+++ b/core/src/index/CMakeLists.txt
@ -72,6 +72,11 @@ include(ExternalProject)
 include(DefineOptionsCore)
 include(BuildUtilsCore)

+if (CUSTOMIZATION)
+    set(MILVUS_GPU_VERSION ON)
+    add_compile_definitions(CUSTOMIZATION)
+endif ()
+
 set(KNOWHERE_CPU_VERSION false)
 if (MILVUS_GPU_VERSION OR KNOWHERE_GPU_VERSION)
    message(STATUS "Building Knowhere GPU version")
--- a/core/src/index/cmake/DefineOptionsCore.cmake
+++ b/core/src/index/cmake/DefineOptionsCore.cmake
@ -49,6 +49,8 @@ else ()
    define_option(KNOWHERE_GPU_VERSION "Build GPU version" OFF)
 endif ()

+define_option(CUSTOMIZATION "Build with customized FAISS library" OFF)
+
 #----------------------------------------------------------------------
 set_option_category("Thirdparty")

--- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake
+++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake
@ -225,11 +225,11 @@ foreach (_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
    set(${_LIB_NAME} "${_LIB_VERSION}")
 endforeach ()

+set(FAISS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/faiss)
 if (DEFINED ENV{FAISS_SOURCE_URL})
    set(FAISS_SOURCE_URL "$ENV{FAISS_SOURCE_URL}")
 else ()
    set(FAISS_SOURCE_URL "https://github.com/JinHai-CN/faiss/archive/${FAISS_VERSION}.tar.gz")
-    set(FAISS_MD5 "b02c1a53234f5acc9bea1b0c55524f50")
 endif ()

 if (DEFINED ENV{KNOWHERE_ARROW_URL})
@ -708,7 +708,7 @@ macro(build_faiss)
    set(FAISS_CONFIGURE_ARGS
            "--prefix=${FAISS_PREFIX}"
            "CFLAGS=${EP_C_FLAGS}"
-            "CXXFLAGS=${EP_CXX_FLAGS}"
+            "CXXFLAGS=${EP_CXX_FLAGS} -mavx2 -mf16c"
            --without-python)

    if (FAISS_WITH_MKL)
@ -737,12 +737,12 @@ macro(build_faiss)
            set(FAISS_COMPUTE_TYPE "gpu")
        else ()
            set(FAISS_COMPUTE_TYPE "cpu")
-        endif()
+        endif ()
        if (FAISS_WITH_MKL)
            set(FAISS_CACHE_PACKAGE_NAME "faiss_${FAISS_COMPUTE_TYPE}_mkl_${FAISS_COMBINE_MD5}.tar.gz")
        else ()
            set(FAISS_CACHE_PACKAGE_NAME "faiss_${FAISS_COMPUTE_TYPE}_openblas_${FAISS_COMBINE_MD5}.tar.gz")
-        endif()
+        endif ()
        set(FAISS_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${FAISS_CACHE_PACKAGE_NAME}")
        set(FAISS_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${FAISS_CACHE_PACKAGE_NAME}")

@ -779,21 +779,41 @@ macro(build_faiss)
            endif ()
        endif ()
    else ()
-        externalproject_add(faiss_ep
-                URL
-                ${FAISS_SOURCE_URL}
-                ${EP_LOG_OPTIONS}
-                CONFIGURE_COMMAND
-                "./configure"
-                ${FAISS_CONFIGURE_ARGS}
-                BUILD_COMMAND
-                ${MAKE} ${MAKE_BUILD_ARGS} all
-                BUILD_IN_SOURCE
-                1
-                INSTALL_COMMAND
-                ${MAKE} install
-                BUILD_BYPRODUCTS
-                ${FAISS_STATIC_LIB})
+        if (CUSTOMIZATION)
+            externalproject_add(faiss_ep
+                    DOWNLOAD_COMMAND
+                    ""
+                    SOURCE_DIR
+                    ${FAISS_SOURCE_DIR}
+                    ${EP_LOG_OPTIONS}
+                    CONFIGURE_COMMAND
+                    "./configure"
+                    ${FAISS_CONFIGURE_ARGS}
+                    BUILD_COMMAND
+                    ${MAKE} ${MAKE_BUILD_ARGS} all
+                    BUILD_IN_SOURCE
+                    1
+                    INSTALL_COMMAND
+                    ${MAKE} install
+                    BUILD_BYPRODUCTS
+                    ${FAISS_STATIC_LIB})
+        else ()
+            externalproject_add(faiss_ep
+                    URL
+                    ${FAISS_SOURCE_URL}
+                    ${EP_LOG_OPTIONS}
+                    CONFIGURE_COMMAND
+                    "./configure"
+                    ${FAISS_CONFIGURE_ARGS}
+                    BUILD_COMMAND
+                    ${MAKE} ${MAKE_BUILD_ARGS} all
+                    BUILD_IN_SOURCE
+                    1
+                    INSTALL_COMMAND
+                    ${MAKE} install
+                    BUILD_BYPRODUCTS
+                    ${FAISS_STATIC_LIB})
+        endif ()

        if (NOT FAISS_WITH_MKL)
            ExternalProject_Add_StepDependencies(faiss_ep build openblas_ep lapack_ep)
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.cpp
@ -47,16 +47,16 @@ GPUIDMAP::CopyGpuToCpu(const Config& config) {
    return std::make_shared<IDMAP>(new_index);
 }

-VectorIndexPtr
-GPUIDMAP::Clone() {
-    auto cpu_idx = CopyGpuToCpu(Config());
-
-    if (auto idmap = std::dynamic_pointer_cast<IDMAP>(cpu_idx)) {
-        return idmap->CopyCpuToGpu(gpu_id_, Config());
-    } else {
-        KNOWHERE_THROW_MSG("IndexType not Support GpuClone");
-    }
-}
+// VectorIndexPtr
+// GPUIDMAP::Clone() {
+//    auto cpu_idx = CopyGpuToCpu(Config());
+//
+//    if (auto idmap = std::dynamic_pointer_cast<IDMAP>(cpu_idx)) {
+//        return idmap->CopyCpuToGpu(gpu_id_, Config());
+//    } else {
+//        KNOWHERE_THROW_MSG("IndexType not Support GpuClone");
+//    }
+//}

 BinarySet
 GPUIDMAP::SerializeImpl() {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIDMAP.h
@ -41,8 +41,8 @@ class GPUIDMAP : public IDMAP, public GPUIndex {
    int64_t*
    GetRawIds() override;

-    VectorIndexPtr
-    Clone() override;
+    //    VectorIndexPtr
+    //    Clone() override;

    VectorIndexPtr
    CopyGpuToGpu(const int64_t& device_id, const Config& config) override;
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.cpp
@ -158,11 +158,11 @@ GPUIVF::CopyGpuToCpu(const Config& config) {
    }
 }

-VectorIndexPtr
-GPUIVF::Clone() {
-    auto cpu_idx = CopyGpuToCpu(Config());
-    return knowhere::cloner::CopyCpuToGpu(cpu_idx, gpu_id_, Config());
-}
+// VectorIndexPtr
+// GPUIVF::Clone() {
+//    auto cpu_idx = CopyGpuToCpu(Config());
+//    return knowhere::cloner::CopyCpuToGpu(cpu_idx, gpu_id_, Config());
+//}

 VectorIndexPtr
 GPUIVF::CopyGpuToGpu(const int64_t& device_id, const Config& config) {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexGPUIVF.h
@ -75,8 +75,8 @@ class GPUIVF : public IVF, public GPUIndex {
    VectorIndexPtr
    CopyGpuToGpu(const int64_t& device_id, const Config& config) override;

-    VectorIndexPtr
-    Clone() final;
+    //    VectorIndexPtr
+    //    Clone() final;

 protected:
    void
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.cpp
@ -184,15 +184,15 @@ IDMAP::Train(const Config& config) {
    index_.reset(index);
 }

-VectorIndexPtr
-IDMAP::Clone() {
-    std::lock_guard<std::mutex> lk(mutex_);
-
-    auto clone_index = faiss::clone_index(index_.get());
-    std::shared_ptr<faiss::Index> new_index;
-    new_index.reset(clone_index);
-    return std::make_shared<IDMAP>(new_index);
-}
+// VectorIndexPtr
+// IDMAP::Clone() {
+//    std::lock_guard<std::mutex> lk(mutex_);
+//
+//    auto clone_index = faiss::clone_index(index_.get());
+//    std::shared_ptr<faiss::Index> new_index;
+//    new_index.reset(clone_index);
+//    return std::make_shared<IDMAP>(new_index);
+//}

 VectorIndexPtr
 IDMAP::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIDMAP.h
@ -47,8 +47,8 @@ class IDMAP : public VectorIndex, public FaissBaseIndex {
    int64_t
    Count() override;

-    VectorIndexPtr
-    Clone() override;
+    //    VectorIndexPtr
+    //    Clone() override;

    int64_t
    Dimension() override;
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.cpp
@ -256,20 +256,20 @@ IVF::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
 #endif
 }

-VectorIndexPtr
-IVF::Clone() {
-    std::lock_guard<std::mutex> lk(mutex_);
-
-    auto clone_index = faiss::clone_index(index_.get());
-    std::shared_ptr<faiss::Index> new_index;
-    new_index.reset(clone_index);
-    return Clone_impl(new_index);
-}
-
-VectorIndexPtr
-IVF::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
-    return std::make_shared<IVF>(index);
-}
+// VectorIndexPtr
+// IVF::Clone() {
+//    std::lock_guard<std::mutex> lk(mutex_);
+//
+//    auto clone_index = faiss::clone_index(index_.get());
+//    std::shared_ptr<faiss::Index> new_index;
+//    new_index.reset(clone_index);
+//    return Clone_impl(new_index);
+//}
+//
+// VectorIndexPtr
+// IVF::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
+//    return std::make_shared<IVF>(index);
+//}

 void
 IVF::Seal() {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVF.h
@ -38,8 +38,8 @@ class IVF : public VectorIndex, public FaissBaseIndex {
    explicit IVF(std::shared_ptr<faiss::Index> index) : FaissBaseIndex(std::move(index)) {
    }

-    VectorIndexPtr
-    Clone() override;
+    //    VectorIndexPtr
+    //    Clone() override;

    IndexModelPtr
    Train(const DatasetPtr& dataset, const Config& config) override;
@ -81,8 +81,8 @@ class IVF : public VectorIndex, public FaissBaseIndex {
    virtual std::shared_ptr<faiss::IVFSearchParameters>
    GenParams(const Config& config);

-    virtual VectorIndexPtr
-    Clone_impl(const std::shared_ptr<faiss::Index>& index);
+    //    virtual VectorIndexPtr
+    //    Clone_impl(const std::shared_ptr<faiss::Index>& index);

    virtual void
    search_impl(int64_t n, const float* data, int64_t k, float* distances, int64_t* labels, const Config& cfg);
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.cpp
@ -63,10 +63,10 @@ IVFPQ::GenParams(const Config& config) {
    return params;
 }

-VectorIndexPtr
-IVFPQ::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
-    return std::make_shared<IVFPQ>(index);
-}
+// VectorIndexPtr
+// IVFPQ::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
+//    return std::make_shared<IVFPQ>(index);
+//}

 VectorIndexPtr
 IVFPQ::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h
@ -41,8 +41,8 @@ class IVFPQ : public IVF {
    std::shared_ptr<faiss::IVFSearchParameters>
    GenParams(const Config& config) override;

-    VectorIndexPtr
-    Clone_impl(const std::shared_ptr<faiss::Index>& index) override;
+    //    VectorIndexPtr
+    //    Clone_impl(const std::shared_ptr<faiss::Index>& index) override;
 };

 }  // namespace knowhere
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.cpp
@ -54,10 +54,10 @@ IVFSQ::Train(const DatasetPtr& dataset, const Config& config) {
    return std::make_shared<IVFIndexModel>(ret_index);
 }

-VectorIndexPtr
-IVFSQ::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
-    return std::make_shared<IVFSQ>(index);
-}
+// VectorIndexPtr
+// IVFSQ::Clone_impl(const std::shared_ptr<faiss::Index>& index) {
+//    return std::make_shared<IVFSQ>(index);
+//}

 VectorIndexPtr
 IVFSQ::CopyCpuToGpu(const int64_t& device_id, const Config& config) {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexIVFSQ.h
@ -38,8 +38,8 @@ class IVFSQ : public IVF {
    CopyCpuToGpu(const int64_t& device_id, const Config& config) override;

 protected:
-    VectorIndexPtr
-    Clone_impl(const std::shared_ptr<faiss::Index>& index) override;
+    //    VectorIndexPtr
+    //    Clone_impl(const std::shared_ptr<faiss::Index>& index) override;
 };

 }  // namespace knowhere
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.cpp
@ -20,12 +20,13 @@
 #include "knowhere/common/Exception.h"
 #include "knowhere/common/Timer.h"
 #ifdef MILVUS_GPU_VERSION
-#include "knowhere/index/vector_index/IndexGPUIVF.h"
 #include "knowhere/index/vector_index/IndexGPUIDMAP.h"
+#include "knowhere/index/vector_index/IndexGPUIVF.h"
 #include "knowhere/index/vector_index/helpers/Cloner.h"
 #endif

 #include "knowhere/index/vector_index/IndexIVF.h"
+#include "knowhere/index/vector_index/IndexIDMAP.h"
 #include "knowhere/index/vector_index/nsg/NSG.h"
 #include "knowhere/index/vector_index/nsg/NSGIO.h"

@ -118,23 +119,32 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) {
        build_cfg->CheckValid();  // throw exception
    }

-    // TODO(linxj): dev IndexFactory, support more IndexType
+    auto idmap = std::make_shared<IDMAP>();
+    idmap->Train(config);
+    idmap->AddWithoutId(dataset, config);
+    Graph knng;
+    float* raw_data = idmap->GetRawVectors();
 #ifdef MILVUS_GPU_VERSION
-//     auto preprocess_index = std::make_shared<GPUIVF>(build_cfg->gpu_id);
+    if (build_cfg->gpu_id == knowhere::INVALID_VALUE) {
+        auto preprocess_index = std::make_shared<IVF>();
+        auto model = preprocess_index->Train(dataset, config);
+        preprocess_index->set_index_model(model);
+        preprocess_index->Add(dataset, config);
+        preprocess_index->GenGraph(raw_data, build_cfg->knng, knng, config);
+    } else {
+        // TODO(linxj): use ivf instead?
+        auto gpu_idx = cloner::CopyCpuToGpu(idmap, build_cfg->gpu_id, config);
+        auto gpu_idmap = std::dynamic_pointer_cast<GPUIDMAP>(gpu_idx);
+        gpu_idmap->GenGraph(raw_data, build_cfg->knng, knng, config);
+    }
 #else
    auto preprocess_index = std::make_shared<IVF>();
+    auto model = preprocess_index->Train(dataset, config);
+    preprocess_index->set_index_model(model);
+    preprocess_index->AddWithoutIds(dataset, config);
+    preprocess_index->GenGraph(raw_data, build_cfg->knng, knng, config);
 #endif
-    auto preprocess_index = std::make_shared<IDMAP>();
-    preprocess_index->Train(config);
-    preprocess_index->AddWithoutId(dataset, config);
-    float* raw_data = preprocess_index->GetRawVectors();
-    auto xx = cloner::CopyCpuToGpu(preprocess_index, 0, config);
-    auto ss = std::dynamic_pointer_cast<GPUIDMAP>(xx);

-    Graph knng;
-    ss->GenGraph(raw_data, build_cfg->knng, knng, config);
-
-    GETTENSOR(dataset)
    algo::BuildParams b_params;
    b_params.candidate_pool_size = build_cfg->candidate_pool_size;
    b_params.out_degree = build_cfg->out_degree;
@ -143,6 +153,7 @@ NSG::Train(const DatasetPtr& dataset, const Config& config) {
    auto array = dataset->array()[0];
    auto p_ids = array->data()->GetValues<int64_t>(1, 0);

+    GETTENSOR(dataset)
    index_ = std::make_shared<algo::NsgIndex>(dim, rows);
    index_->SetKnnGraph(knng);
    index_->Build_with_ids(rows, (float*)p_data, (int64_t*)p_ids, b_params);
@ -164,10 +175,10 @@ NSG::Dimension() {
    return index_->dimension;
 }

-VectorIndexPtr
-NSG::Clone() {
-    KNOWHERE_THROW_MSG("not support");
-}
+// VectorIndexPtr
+// NSG::Clone() {
+//    KNOWHERE_THROW_MSG("not support");
+//}

 void
 NSG::Seal() {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexNSG.h
@ -49,8 +49,8 @@ class NSG : public VectorIndex {
    Count() override;
    int64_t
    Dimension() override;
-    VectorIndexPtr
-    Clone() override;
+    //    VectorIndexPtr
+    //    Clone() override;
    void
    Seal() override;

--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp
@ -210,6 +210,9 @@ CPUSPTAGRNG::Load(const BinarySet& binary_set) {
 IndexModelPtr
 CPUSPTAGRNG::Train(const DatasetPtr& origin, const Config& train_config) {
    SetParameters(train_config);
+    if (train_config != nullptr) {
+        train_config->CheckValid();  // throw exception
+    }
    DatasetPtr dataset = origin->Clone();

    // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine
@ -295,6 +298,9 @@ CPUSPTAGRNG::SetParameters(const Config& config) {
 DatasetPtr
 CPUSPTAGRNG::Search(const DatasetPtr& dataset, const Config& config) {
    SetParameters(config);
+    if (config != nullptr) {
+        config->CheckValid();  // throw exception
+    }
    auto tensor = dataset->tensor()[0];
    auto p = (float*)tensor->raw_mutable_data();
    for (auto i = 0; i < 10; ++i) {
@ -325,10 +331,10 @@ CPUSPTAGRNG::Dimension() {
    return index_ptr_->GetFeatureDim();
 }

-VectorIndexPtr
-CPUSPTAGRNG::Clone() {
-    KNOWHERE_THROW_MSG("not support");
-}
+// VectorIndexPtr
+// CPUSPTAGRNG::Clone() {
+//    KNOWHERE_THROW_MSG("not support");
+//}

 void
 CPUSPTAGRNG::Seal() {
--- a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h
@ -36,8 +36,8 @@ class CPUSPTAGRNG : public VectorIndex {
    BinarySet
    Serialize() override;

-    VectorIndexPtr
-    Clone() override;
+    //    VectorIndexPtr
+    //    Clone() override;

    void
    Load(const BinarySet& index_array) override;
--- a/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/VectorIndex.h
@ -49,8 +49,8 @@ class VectorIndex : public Index {
    Seal() = 0;

    // TODO(linxj): Deprecated
-    virtual VectorIndexPtr
-    Clone() = 0;
+    //    virtual VectorIndexPtr
+    //    Clone() = 0;

    virtual int64_t
    Count() = 0;
--- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h
+++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h
@ -180,10 +180,10 @@ struct SPTAGCfg : public Cfg {

    SPTAGCfg() = default;

-    bool
-    CheckValid() override {
-        return true;
-    };
+    //    bool
+    //    CheckValid() override {
+    //        return true;
+    //    };
 };
 using SPTAGConfig = std::shared_ptr<SPTAGCfg>;

--- a/core/src/index/thirdparty/faiss/.dockerignore
+++ b/core/src/index/thirdparty/faiss/.dockerignore
@ -0,0 +1 @@
+sift1M
--- a/core/src/index/thirdparty/faiss/.gitignore
+++ b/core/src/index/thirdparty/faiss/.gitignore
@ -0,0 +1,21 @@
+*.swp
+*.swo
+*.o
+*.a
+*.dSYM
+*.so
+*.dylib
+*.pyc
+*~
+.DS_Store
+depend
+/config.*
+/aclocal.m4
+/autom4te.cache/
+/makefile.inc
+/bin/
+/c_api/bin/
+/c_api/gpu/bin/
+/tests/test
+/tests/gtest/
+include/
--- a/core/src/index/thirdparty/faiss/AutoTune.cpp
+++ b/core/src/index/thirdparty/faiss/AutoTune.cpp
@ -0,0 +1,719 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+/*
+ * implementation of Hyper-parameter auto-tuning
+ */
+
+#include <faiss/AutoTune.h>
+
+#include <cmath>
+
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/utils.h>
+#include <faiss/utils/random.h>
+
+#include <faiss/IndexFlat.h>
+#include <faiss/VectorTransform.h>
+#include <faiss/IndexPreTransform.h>
+#include <faiss/IndexLSH.h>
+#include <faiss/IndexPQ.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/IndexIVFPQR.h>
+#include <faiss/IndexIVFFlat.h>
+#include <faiss/MetaIndexes.h>
+#include <faiss/IndexScalarQuantizer.h>
+#include <faiss/IndexHNSW.h>
+
+#include <faiss/IndexBinaryFlat.h>
+#include <faiss/IndexBinaryHNSW.h>
+#include <faiss/IndexBinaryIVF.h>
+
+namespace faiss {
+
+
+AutoTuneCriterion::AutoTuneCriterion (idx_t nq, idx_t nnn):
+    nq (nq), nnn (nnn), gt_nnn (0)
+{}
+
+
+void AutoTuneCriterion::set_groundtruth (
+     int gt_nnn, const float *gt_D_in, const idx_t *gt_I_in)
+{
+    this->gt_nnn = gt_nnn;
+    if (gt_D_in) { // allow null for this, as it is often not used
+        gt_D.resize (nq * gt_nnn);
+        memcpy (gt_D.data(), gt_D_in, sizeof (gt_D[0]) * nq * gt_nnn);
+    }
+    gt_I.resize (nq * gt_nnn);
+    memcpy (gt_I.data(), gt_I_in, sizeof (gt_I[0]) * nq * gt_nnn);
+}
+
+
+
+OneRecallAtRCriterion::OneRecallAtRCriterion (idx_t nq, idx_t R):
+    AutoTuneCriterion(nq, R), R(R)
+{}
+
+double OneRecallAtRCriterion::evaluate(const float* /*D*/, const idx_t* I)
+    const {
+  FAISS_THROW_IF_NOT_MSG(
+      (gt_I.size() == gt_nnn * nq && gt_nnn >= 1 && nnn >= R),
+      "ground truth not initialized");
+  idx_t n_ok = 0;
+  for (idx_t q = 0; q < nq; q++) {
+    idx_t gt_nn = gt_I[q * gt_nnn];
+    const idx_t* I_line = I + q * nnn;
+    for (int i = 0; i < R; i++) {
+      if (I_line[i] == gt_nn) {
+        n_ok++;
+        break;
+      }
+    }
+  }
+  return n_ok / double(nq);
+}
+
+
+IntersectionCriterion::IntersectionCriterion (idx_t nq, idx_t R):
+    AutoTuneCriterion(nq, R), R(R)
+{}
+
+double IntersectionCriterion::evaluate(const float* /*D*/, const idx_t* I)
+    const {
+    FAISS_THROW_IF_NOT_MSG(
+      (gt_I.size() == gt_nnn * nq && gt_nnn >= R && nnn >= R),
+      "ground truth not initialized");
+    int64_t n_ok = 0;
+#pragma omp parallel for reduction(+: n_ok)
+    for (idx_t q = 0; q < nq; q++) {
+        n_ok += ranklist_intersection_size (
+             R, &gt_I [q * gt_nnn],
+             R, I + q * nnn);
+    }
+    return n_ok / double (nq * R);
+}
+
+/***************************************************************
+ * OperatingPoints
+ ***************************************************************/
+
+OperatingPoints::OperatingPoints ()
+{
+    clear();
+}
+
+void OperatingPoints::clear ()
+{
+    all_pts.clear();
+    optimal_pts.clear();
+    /// default point: doing nothing gives 0 performance and takes 0 time
+    OperatingPoint op = {0, 0, "", -1};
+    optimal_pts.push_back(op);
+}
+
+/// add a performance measure
+bool OperatingPoints::add (double perf, double t, const std::string & key,
+                           size_t cno)
+{
+    OperatingPoint op = {perf, t, key, int64_t(cno)};
+    all_pts.push_back (op);
+    if (perf == 0) {
+        return false;  // no method for 0 accuracy is faster than doing nothing
+    }
+    std::vector<OperatingPoint> & a = optimal_pts;
+    if (perf > a.back().perf) {
+        // keep unconditionally
+        a.push_back (op);
+    } else if (perf == a.back().perf) {
+        if (t < a.back ().t) {
+            a.back() = op;
+        } else {
+            return false;
+        }
+    } else {
+        int i;
+        // stricto sensu this should be a bissection
+        for (i = 0; i < a.size(); i++) {
+            if (a[i].perf >= perf) break;
+        }
+        assert (i < a.size());
+        if (t < a[i].t) {
+            if (a[i].perf == perf) {
+                a[i] = op;
+            } else {
+                a.insert (a.begin() + i, op);
+            }
+        } else {
+            return false;
+        }
+    }
+    { // remove non-optimal points from array
+        int i = a.size() - 1;
+        while (i > 0) {
+            if (a[i].t < a[i - 1].t)
+                a.erase (a.begin() + (i - 1));
+            i--;
+        }
+    }
+    return true;
+}
+
+
+int OperatingPoints::merge_with (const OperatingPoints &other,
+                                 const std::string & prefix)
+{
+    int n_add = 0;
+    for (int i = 0; i < other.all_pts.size(); i++) {
+        const OperatingPoint & op = other.all_pts[i];
+        if (add (op.perf, op.t, prefix + op.key, op.cno))
+            n_add++;
+    }
+    return n_add;
+}
+
+
+
+/// get time required to obtain a given performance measure
+double OperatingPoints::t_for_perf (double perf) const
+{
+    const std::vector<OperatingPoint> & a = optimal_pts;
+    if (perf > a.back().perf) return 1e50;
+    int i0 = -1, i1 = a.size() - 1;
+    while (i0 + 1 < i1) {
+        int imed = (i0 + i1 + 1) / 2;
+        if (a[imed].perf < perf) i0 = imed;
+        else                     i1 = imed;
+    }
+    return a[i1].t;
+}
+
+
+void OperatingPoints::all_to_gnuplot (const char *fname) const
+{
+    FILE *f = fopen(fname, "w");
+    if (!f) {
+        fprintf (stderr, "cannot open %s", fname);
+        perror("");
+        abort();
+    }
+    for (int i = 0; i < all_pts.size(); i++) {
+        const OperatingPoint & op = all_pts[i];
+        fprintf (f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
+    }
+    fclose(f);
+}
+
+void OperatingPoints::optimal_to_gnuplot (const char *fname) const
+{
+    FILE *f = fopen(fname, "w");
+    if (!f) {
+        fprintf (stderr, "cannot open %s", fname);
+        perror("");
+        abort();
+    }
+    double prev_perf = 0.0;
+    for (int i = 0; i < optimal_pts.size(); i++) {
+        const OperatingPoint & op = optimal_pts[i];
+        fprintf (f, "%g %g\n", prev_perf, op.t);
+        fprintf (f, "%g %g %s\n", op.perf, op.t, op.key.c_str());
+        prev_perf = op.perf;
+    }
+    fclose(f);
+}
+
+void OperatingPoints::display (bool only_optimal) const
+{
+    const std::vector<OperatingPoint> &pts =
+        only_optimal ? optimal_pts : all_pts;
+    printf("Tested %ld operating points, %ld ones are optimal:\n",
+           all_pts.size(), optimal_pts.size());
+
+    for (int i = 0; i < pts.size(); i++) {
+        const OperatingPoint & op = pts[i];
+        const char *star = "";
+        if (!only_optimal) {
+            for (int j = 0; j < optimal_pts.size(); j++) {
+                if (op.cno == optimal_pts[j].cno) {
+                    star = "*";
+                    break;
+                }
+            }
+        }
+        printf ("cno=%ld key=%s perf=%.4f t=%.3f %s\n",
+                op.cno, op.key.c_str(), op.perf, op.t, star);
+    }
+
+}
+
+/***************************************************************
+ * ParameterSpace
+ ***************************************************************/
+
+ParameterSpace::ParameterSpace ():
+    verbose (1), n_experiments (500),
+    batchsize (1<<30), thread_over_batches (false),
+    min_test_duration (0)
+{
+}
+
+/* not keeping this constructor as inheritors will call the parent
+   initialize()
+ */
+
+#if 0
+ParameterSpace::ParameterSpace (Index *index):
+    verbose (1), n_experiments (500),
+    batchsize (1<<30), thread_over_batches (false)
+
+{
+    initialize(index);
+}
+#endif
+
+size_t ParameterSpace::n_combinations () const
+{
+    size_t n = 1;
+    for (int i = 0; i < parameter_ranges.size(); i++)
+        n *= parameter_ranges[i].values.size();
+    return n;
+}
+
+/// get string representation of the combination
+std::string ParameterSpace::combination_name (size_t cno) const {
+    char buf[1000], *wp = buf;
+    *wp = 0;
+    for (int i = 0; i < parameter_ranges.size(); i++) {
+        const ParameterRange & pr = parameter_ranges[i];
+        size_t j = cno % pr.values.size();
+        cno /= pr.values.size();
+        wp += snprintf (
+              wp, buf + 1000 - wp, "%s%s=%g", i == 0 ? "" : ",",
+              pr.name.c_str(), pr.values[j]);
+    }
+    return std::string (buf);
+}
+
+
+bool ParameterSpace::combination_ge (size_t c1, size_t c2) const
+{
+    for (int i = 0; i < parameter_ranges.size(); i++) {
+        int nval = parameter_ranges[i].values.size();
+        size_t j1 = c1 % nval;
+        size_t j2 = c2 % nval;
+        if (!(j1 >= j2)) return false;
+        c1 /= nval;
+        c2 /= nval;
+    }
+    return true;
+}
+
+
+
+#define DC(classname) \
+    const classname *ix = dynamic_cast<const classname *>(index)
+
+static void init_pq_ParameterRange (const ProductQuantizer & pq,
+                                    ParameterRange & pr)
+{
+    if (pq.code_size % 4 == 0) {
+        // Polysemous not supported for code sizes that are not a
+        // multiple of 4
+        for (int i = 2; i <= pq.code_size * 8 / 2; i+= 2)
+            pr.values.push_back(i);
+    }
+    pr.values.push_back (pq.code_size * 8);
+}
+
+ParameterRange &ParameterSpace::add_range(const char * name)
+{
+    for (auto & pr : parameter_ranges) {
+        if (pr.name == name) {
+            return pr;
+        }
+    }
+    parameter_ranges.push_back (ParameterRange ());
+    parameter_ranges.back ().name = name;
+    return parameter_ranges.back ();
+}
+
+
+/// initialize with reasonable parameters for the index
+void ParameterSpace::initialize (const Index * index)
+{
+    if (DC (IndexPreTransform)) {
+        index = ix->index;
+    }
+    if (DC (IndexRefineFlat)) {
+        ParameterRange & pr = add_range("k_factor_rf");
+        for (int i = 0; i <= 6; i++) {
+            pr.values.push_back (1 << i);
+        }
+        index = ix->base_index;
+    }
+    if (DC (IndexPreTransform)) {
+        index = ix->index;
+    }
+
+    if (DC (IndexIVF)) {
+        {
+            ParameterRange & pr = add_range("nprobe");
+            for (int i = 0; i < 13; i++) {
+                size_t nprobe = 1 << i;
+                if (nprobe >= ix->nlist) break;
+                pr.values.push_back (nprobe);
+            }
+        }
+        if (dynamic_cast<const IndexHNSW*>(ix->quantizer)) {
+            ParameterRange & pr = add_range("efSearch");
+            for (int i = 2; i <= 9; i++) {
+                pr.values.push_back (1 << i);
+            }
+        }
+    }
+    if (DC (IndexPQ)) {
+        ParameterRange & pr = add_range("ht");
+        init_pq_ParameterRange (ix->pq, pr);
+    }
+    if (DC (IndexIVFPQ)) {
+        ParameterRange & pr = add_range("ht");
+        init_pq_ParameterRange (ix->pq, pr);
+    }
+
+    if (DC (IndexIVF)) {
+        const MultiIndexQuantizer *miq =
+            dynamic_cast<const MultiIndexQuantizer *> (ix->quantizer);
+        if (miq) {
+            ParameterRange & pr_max_codes = add_range("max_codes");
+            for (int i = 8; i < 20; i++) {
+                pr_max_codes.values.push_back (1 << i);
+            }
+            pr_max_codes.values.push_back (
+                std::numeric_limits<double>::infinity()
+            );
+        }
+    }
+    if (DC (IndexIVFPQR)) {
+        ParameterRange & pr = add_range("k_factor");
+        for (int i = 0; i <= 6; i++) {
+            pr.values.push_back (1 << i);
+        }
+    }
+    if (dynamic_cast<const IndexHNSW*>(index)) {
+        ParameterRange & pr = add_range("efSearch");
+        for (int i = 2; i <= 9; i++) {
+            pr.values.push_back (1 << i);
+        }
+    }
+}
+
+#undef DC
+
+// non-const version
+#define DC(classname) classname *ix = dynamic_cast<classname *>(index)
+
+
+/// set a combination of parameters on an index
+void ParameterSpace::set_index_parameters (Index *index, size_t cno) const
+{
+
+    for (int i = 0; i < parameter_ranges.size(); i++) {
+        const ParameterRange & pr = parameter_ranges[i];
+        size_t j = cno % pr.values.size();
+        cno /= pr.values.size();
+        double val = pr.values [j];
+        set_index_parameter (index, pr.name, val);
+    }
+}
+
+/// set a combination of parameters on an index
+void ParameterSpace::set_index_parameters (
+     Index *index, const char *description_in) const
+{
+    char description[strlen(description_in) + 1];
+    char *ptr;
+    memcpy (description, description_in, strlen(description_in) + 1);
+
+    for (char *tok = strtok_r (description, " ,", &ptr);
+         tok;
+         tok = strtok_r (nullptr, " ,", &ptr)) {
+        char name[100];
+        double val;
+        int ret = sscanf (tok, "%100[^=]=%lf", name, &val);
+        FAISS_THROW_IF_NOT_FMT (
+           ret == 2, "could not interpret parameters %s", tok);
+        set_index_parameter (index, name, val);
+    }
+
+}
+
+void ParameterSpace::set_index_parameter (
+        Index * index, const std::string & name, double val) const
+{
+    if (verbose > 1)
+        printf("    set %s=%g\n", name.c_str(), val);
+
+    if (name == "verbose") {
+        index->verbose = int(val);
+        // and fall through to also enable it on sub-indexes
+    }
+    if (DC (IndexPreTransform)) {
+        set_index_parameter (ix->index, name, val);
+        return;
+    }
+    if (DC (IndexShards)) {
+        // call on all sub-indexes
+        auto fn =
+          [this, name, val](int, Index* subIndex) {
+            set_index_parameter(subIndex, name, val);
+          };
+
+        ix->runOnIndex(fn);
+        return;
+    }
+    if (DC (IndexReplicas)) {
+        // call on all sub-indexes
+        auto fn =
+          [this, name, val](int, Index* subIndex) {
+            set_index_parameter(subIndex, name, val);
+          };
+
+        ix->runOnIndex(fn);
+        return;
+    }
+    if (DC (IndexRefineFlat)) {
+        if (name == "k_factor_rf") {
+            ix->k_factor = int(val);
+            return;
+        }
+        // otherwise it is for the sub-index
+        set_index_parameter (&ix->refine_index, name, val);
+        return;
+    }
+
+    if (name == "verbose") {
+        index->verbose = int(val);
+        return; // last verbose that we could find
+    }
+
+    if (name == "nprobe") {
+        if (DC (IndexIDMap)) {
+            set_index_parameter (ix->index, name, val);
+            return;
+        } else if (DC (IndexIVF)) {
+            ix->nprobe = int(val);
+            return;
+        }
+    }
+
+    if (name == "ht") {
+        if (DC (IndexPQ)) {
+            if (val >= ix->pq.code_size * 8) {
+                ix->search_type = IndexPQ::ST_PQ;
+            } else {
+                ix->search_type = IndexPQ::ST_polysemous;
+                ix->polysemous_ht = int(val);
+            }
+            return;
+        } else if (DC (IndexIVFPQ)) {
+            if (val >= ix->pq.code_size * 8) {
+                ix->polysemous_ht = 0;
+            } else {
+                ix->polysemous_ht = int(val);
+            }
+            return;
+        }
+    }
+
+    if (name == "k_factor") {
+        if (DC (IndexIVFPQR)) {
+            ix->k_factor = val;
+            return;
+        }
+    }
+    if (name == "max_codes") {
+        if (DC (IndexIVF)) {
+            ix->max_codes = std::isfinite(val) ? size_t(val) : 0;
+            return;
+        }
+    }
+
+    if (name == "efSearch") {
+        if (DC (IndexHNSW)) {
+            ix->hnsw.efSearch = int(val);
+            return;
+        }
+        if (DC (IndexIVF)) {
+            if (IndexHNSW *cq =
+                dynamic_cast<IndexHNSW *>(ix->quantizer)) {
+                cq->hnsw.efSearch = int(val);
+                return;
+            }
+        }
+    }
+
+    FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:"
+                     "could not set parameter %s",
+                     name.c_str());
+}
+
+void ParameterSpace::display () const
+{
+    printf ("ParameterSpace, %ld parameters, %ld combinations:\n",
+            parameter_ranges.size (), n_combinations ());
+    for (int i = 0; i < parameter_ranges.size(); i++) {
+        const ParameterRange & pr = parameter_ranges[i];
+        printf ("   %s: ", pr.name.c_str ());
+        char sep = '[';
+        for (int j = 0; j < pr.values.size(); j++) {
+            printf ("%c %g", sep, pr.values [j]);
+            sep = ',';
+        }
+        printf ("]\n");
+    }
+}
+
+
+
+void ParameterSpace::update_bounds (size_t cno, const OperatingPoint & op,
+                                    double *upper_bound_perf,
+                                    double *lower_bound_t) const
+{
+    if (combination_ge (cno, op.cno)) {
+        if (op.t > *lower_bound_t) *lower_bound_t = op.t;
+    }
+    if (combination_ge (op.cno, cno)) {
+        if (op.perf < *upper_bound_perf) *upper_bound_perf = op.perf;
+    }
+}
+
+
+
+void ParameterSpace::explore (Index *index,
+                              size_t nq, const float *xq,
+                              const AutoTuneCriterion & crit,
+                              OperatingPoints * ops) const
+{
+    FAISS_THROW_IF_NOT_MSG (nq == crit.nq,
+                      "criterion does not have the same nb of queries");
+
+    size_t n_comb = n_combinations ();
+
+    if (n_experiments == 0) {
+
+        for (size_t cno = 0; cno < n_comb; cno++) {
+            set_index_parameters (index, cno);
+            std::vector<Index::idx_t> I(nq * crit.nnn);
+            std::vector<float> D(nq * crit.nnn);
+
+            double t0 = getmillisecs ();
+            index->search (nq, xq, crit.nnn, D.data(), I.data());
+            double t_search = (getmillisecs() - t0) / 1e3;
+
+            double perf = crit.evaluate (D.data(), I.data());
+
+            bool keep = ops->add (perf, t_search, combination_name (cno), cno);
+
+            if (verbose)
+                printf("  %ld/%ld: %s perf=%.3f t=%.3f s %s\n", cno, n_comb,
+                       combination_name (cno).c_str(), perf, t_search,
+                       keep ? "*" : "");
+        }
+        return;
+    }
+
+    int n_exp = n_experiments;
+
+    if (n_exp > n_comb) n_exp = n_comb;
+    FAISS_THROW_IF_NOT (n_comb == 1 || n_exp > 2);
+    std::vector<int> perm (n_comb);
+    // make sure the slowest and fastest experiment are run
+    perm[0] = 0;
+    if (n_comb > 1) {
+        perm[1] = n_comb - 1;
+        rand_perm (&perm[2], n_comb - 2, 1234);
+        for (int i = 2; i < perm.size(); i++) perm[i] ++;
+    }
+
+    for (size_t xp = 0; xp < n_exp; xp++) {
+        size_t cno = perm[xp];
+
+        if (verbose)
+            printf("  %ld/%d: cno=%ld %s ", xp, n_exp, cno,
+                   combination_name (cno).c_str());
+
+        {
+            double lower_bound_t = 0.0;
+            double upper_bound_perf = 1.0;
+            for (int i = 0; i < ops->all_pts.size(); i++) {
+                update_bounds (cno, ops->all_pts[i],
+                               &upper_bound_perf, &lower_bound_t);
+            }
+            double best_t = ops->t_for_perf (upper_bound_perf);
+            if (verbose)
+                printf ("bounds [perf<=%.3f t>=%.3f] %s",
+                        upper_bound_perf, lower_bound_t,
+                        best_t <= lower_bound_t ? "skip\n" : "");
+            if (best_t <= lower_bound_t) continue;
+        }
+
+        set_index_parameters (index, cno);
+        std::vector<Index::idx_t> I(nq * crit.nnn);
+        std::vector<float> D(nq * crit.nnn);
+
+        double t0 = getmillisecs ();
+
+        int nrun = 0;
+        double t_search;
+
+        do {
+
+            if (thread_over_batches) {
+#pragma omp parallel for
+                for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
+                    size_t q1 = q0 + batchsize;
+                    if (q1 > nq) q1 = nq;
+                    index->search (q1 - q0, xq + q0 * index->d,
+                                   crit.nnn,
+                                   D.data() + q0 * crit.nnn,
+                                   I.data() + q0 * crit.nnn);
+                }
+            } else {
+                for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
+                    size_t q1 = q0 + batchsize;
+                    if (q1 > nq) q1 = nq;
+                    index->search (q1 - q0, xq + q0 * index->d,
+                                   crit.nnn,
+                                   D.data() + q0 * crit.nnn,
+                                   I.data() + q0 * crit.nnn);
+                }
+            }
+            nrun ++;
+            t_search = (getmillisecs() - t0) / 1e3;
+
+        } while (t_search < min_test_duration);
+
+        t_search /= nrun;
+
+        double perf = crit.evaluate (D.data(), I.data());
+
+        bool keep = ops->add (perf, t_search, combination_name (cno), cno);
+
+        if (verbose)
+            printf(" perf %.3f t %.3f (%d runs) %s\n",
+                   perf, t_search, nrun,
+                   keep ? "*" : "");
+    }
+}
+
+
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/AutoTune.h
+++ b/core/src/index/thirdparty/faiss/AutoTune.h
@ -0,0 +1,212 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_AUTO_TUNE_H
+#define FAISS_AUTO_TUNE_H
+
+#include <vector>
+#include <unordered_map>
+#include <stdint.h>
+
+#include <faiss/Index.h>
+#include <faiss/IndexBinary.h>
+
+namespace faiss {
+
+
+/**
+ * Evaluation criterion. Returns a performance measure in [0,1],
+ * higher is better.
+ */
+struct AutoTuneCriterion {
+    typedef Index::idx_t idx_t;
+    idx_t nq;  ///< nb of queries this criterion is evaluated on
+    idx_t nnn; ///< nb of NNs that the query should request
+    idx_t gt_nnn; ///< nb of GT NNs required to evaluate crterion
+
+    std::vector<float> gt_D;  ///< Ground-truth distances (size nq * gt_nnn)
+    std::vector<idx_t> gt_I;  ///< Ground-truth indexes (size nq * gt_nnn)
+
+    AutoTuneCriterion (idx_t nq, idx_t nnn);
+
+    /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
+     *
+     * @param gt_D_in  size nq * gt_nnn
+     * @param gt_I_in  size nq * gt_nnn
+     */
+    void set_groundtruth (int gt_nnn, const float *gt_D_in,
+                          const idx_t *gt_I_in);
+
+    /** Evaluate the criterion.
+     *
+     * @param D  size nq * nnn
+     * @param I  size nq * nnn
+     * @return the criterion, between 0 and 1. Larger is better.
+     */
+    virtual double evaluate (const float *D, const idx_t *I) const = 0;
+
+    virtual ~AutoTuneCriterion () {}
+
+};
+
+struct OneRecallAtRCriterion: AutoTuneCriterion {
+
+    idx_t R;
+
+    OneRecallAtRCriterion (idx_t nq, idx_t R);
+
+    double evaluate(const float* D, const idx_t* I) const override;
+
+    ~OneRecallAtRCriterion() override {}
+};
+
+
+struct IntersectionCriterion: AutoTuneCriterion {
+
+    idx_t R;
+
+    IntersectionCriterion (idx_t nq, idx_t R);
+
+    double evaluate(const float* D, const idx_t* I) const override;
+
+    ~IntersectionCriterion() override {}
+};
+
+/**
+ * Maintains a list of experimental results. Each operating point is a
+ * (perf, t, key) triplet, where higher perf and lower t is
+ * better. The key field is an arbitrary identifier for the operating point
+ */
+
+struct OperatingPoint {
+    double perf;     ///< performance measure (output of a Criterion)
+    double t;        ///< corresponding execution time (ms)
+    std::string key; ///< key that identifies this op pt
+    int64_t cno;        ///< integer identifer
+};
+
+struct OperatingPoints {
+    /// all operating points
+    std::vector<OperatingPoint> all_pts;
+
+    /// optimal operating points, sorted by perf
+    std::vector<OperatingPoint> optimal_pts;
+
+    // begins with a single operating point: t=0, perf=0
+    OperatingPoints ();
+
+    /// add operating points from other to this, with a prefix to the keys
+    int merge_with (const OperatingPoints &other,
+                    const std::string & prefix = "");
+
+    void clear ();
+
+    /// add a performance measure. Return whether it is an optimal point
+    bool add (double perf, double t, const std::string & key, size_t cno = 0);
+
+    /// get time required to obtain a given performance measure
+    double t_for_perf (double perf) const;
+
+    /// easy-to-read output
+    void display (bool only_optimal = true) const;
+
+    /// output to a format easy to digest by gnuplot
+    void all_to_gnuplot (const char *fname) const;
+    void optimal_to_gnuplot (const char *fname) const;
+
+};
+
+/// possible values of a parameter, sorted from least to most expensive/accurate
+struct ParameterRange {
+    std::string name;
+    std::vector<double> values;
+};
+
+/** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
+ */
+struct ParameterSpace {
+    /// all tunable parameters
+    std::vector<ParameterRange> parameter_ranges;
+
+    // exploration parameters
+
+    /// verbosity during exploration
+    int verbose;
+
+    /// nb of experiments during optimization (0 = try all combinations)
+    int n_experiments;
+
+    /// maximum number of queries to submit at a time.
+    size_t batchsize;
+
+    /// use multithreading over batches (useful to benchmark
+    /// independent single-searches)
+    bool thread_over_batches;
+
+    /// run tests several times until they reach at least this
+    /// duration (to avoid jittering in MT mode)
+    double min_test_duration;
+
+    ParameterSpace ();
+
+    /// nb of combinations, = product of values sizes
+    size_t n_combinations () const;
+
+    /// returns whether combinations c1 >= c2 in the tuple sense
+    bool combination_ge (size_t c1, size_t c2) const;
+
+    /// get string representation of the combination
+    std::string combination_name (size_t cno) const;
+
+    /// print a description on stdout
+    void display () const;
+
+    /// add a new parameter (or return it if it exists)
+    ParameterRange &add_range(const char * name);
+
+    /// initialize with reasonable parameters for the index
+    virtual void initialize (const Index * index);
+
+    /// set a combination of parameters on an index
+    void set_index_parameters (Index *index, size_t cno) const;
+
+    /// set a combination of parameters described by a string
+    void set_index_parameters (Index *index, const char *param_string) const;
+
+    /// set one of the parameters
+    virtual void set_index_parameter (
+        Index * index, const std::string & name, double val) const;
+
+    /** find an upper bound on the performance and a lower bound on t
+     * for configuration cno given another operating point op */
+    void update_bounds (size_t cno, const OperatingPoint & op,
+                        double *upper_bound_perf,
+                        double *lower_bound_t) const;
+
+    /** explore operating points
+     * @param index   index to run on
+     * @param xq      query vectors (size nq * index.d)
+     * @param crit    selection criterion
+     * @param ops     resulting operating points
+     */
+    void explore (Index *index,
+                  size_t nq, const float *xq,
+                  const AutoTuneCriterion & crit,
+                  OperatingPoints * ops)  const;
+
+    virtual ~ParameterSpace () {}
+};
+
+
+
+} // namespace faiss
+
+
+
+#endif
--- a/core/src/index/thirdparty/faiss/CODE_OF_CONDUCT.md
+++ b/core/src/index/thirdparty/faiss/CODE_OF_CONDUCT.md
@ -0,0 +1,2 @@
+# Code of Conduct
+Facebook has adopted a Code of Conduct that we expect project participants to adhere to. Please [read the full text](https://code.fb.com/codeofconduct) so that you can understand what actions will and will not be tolerated.
--- a/core/src/index/thirdparty/faiss/CONTRIBUTING.md
+++ b/core/src/index/thirdparty/faiss/CONTRIBUTING.md
@ -0,0 +1,53 @@
+# Contributing to Faiss
+
+We want to make contributing to this project as easy and transparent as
+possible. 
+
+## Our Development Process
+
+We mainly develop Faiss within Facebook. Sometimes, we will sync the 
+github version of Faiss with the internal state. 
+
+## Pull Requests
+
+We welcome pull requests that add significant value to Faiss. If you plan to do
+a major development and contribute it back to Faiss, please contact us first before
+putting too much effort into it.
+
+1. Fork the repo and create your branch from `master`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+There is a Facebook internal test suite for Faiss, and we need to run 
+all changes to Faiss through it.
+
+## Contributor License Agreement ("CLA")
+
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+## Issues
+
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
+## Coding Style  
+
+* 4 or 2 spaces for indentation in C++ (no tabs)
+* 80 character line length (both for C++ and Python)
+* C++ language level: C++11
+
+## License
+
+By contributing to Faiss, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.
+
--- a/core/src/index/thirdparty/faiss/Clustering.cpp
+++ b/core/src/index/thirdparty/faiss/Clustering.cpp
@ -0,0 +1,261 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/Clustering.h>
+#include <faiss/impl/AuxIndexStructures.h>
+
+
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+
+#include <faiss/utils/utils.h>
+#include <faiss/utils/random.h>
+#include <faiss/utils/distances.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/IndexFlat.h>
+
+namespace faiss {
+
+ClusteringParameters::ClusteringParameters ():
+    niter(25),
+    nredo(1),
+    verbose(false),
+    spherical(false),
+    int_centroids(false),
+    update_index(false),
+    frozen_centroids(false),
+    min_points_per_centroid(39),
+    max_points_per_centroid(256),
+    seed(1234)
+{}
+// 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
+
+
+Clustering::Clustering (int d, int k):
+    d(d), k(k) {}
+
+Clustering::Clustering (int d, int k, const ClusteringParameters &cp):
+    ClusteringParameters (cp), d(d), k(k) {}
+
+
+
+static double imbalance_factor (int n, int k, int64_t *assign) {
+    std::vector<int> hist(k, 0);
+    for (int i = 0; i < n; i++)
+        hist[assign[i]]++;
+
+    double tot = 0, uf = 0;
+
+    for (int i = 0 ; i < k ; i++) {
+        tot += hist[i];
+        uf += hist[i] * (double) hist[i];
+    }
+    uf = uf * k / (tot * tot);
+
+    return uf;
+}
+
+void Clustering::post_process_centroids ()
+{
+
+    if (spherical) {
+        fvec_renorm_L2 (d, k, centroids.data());
+    }
+
+    if (int_centroids) {
+        for (size_t i = 0; i < centroids.size(); i++)
+            centroids[i] = roundf (centroids[i]);
+    }
+}
+
+
+void Clustering::train (idx_t nx, const float *x_in, Index & index) {
+    FAISS_THROW_IF_NOT_FMT (nx >= k,
+             "Number of training points (%ld) should be at least "
+             "as large as number of clusters (%ld)", nx, k);
+
+    double t0 = getmillisecs();
+
+    // yes it is the user's responsibility, but it may spare us some
+    // hard-to-debug reports.
+    for (size_t i = 0; i < nx * d; i++) {
+      FAISS_THROW_IF_NOT_MSG (finite (x_in[i]),
+                        "input contains NaN's or Inf's");
+    }
+
+    const float *x = x_in;
+    ScopeDeleter<float> del1;
+
+    if (nx > k * max_points_per_centroid) {
+        if (verbose)
+            printf("Sampling a subset of %ld / %ld for training\n",
+                   k * max_points_per_centroid, nx);
+        std::vector<int> perm (nx);
+        rand_perm (perm.data (), nx, seed);
+        nx = k * max_points_per_centroid;
+        float * x_new = new float [nx * d];
+        for (idx_t i = 0; i < nx; i++)
+            memcpy (x_new + i * d, x + perm[i] * d, sizeof(x_new[0]) * d);
+        x = x_new;
+        del1.set (x);
+    } else if (nx < k * min_points_per_centroid) {
+        fprintf (stderr,
+                 "WARNING clustering %ld points to %ld centroids: "
+                 "please provide at least %ld training points\n",
+                 nx, k, idx_t(k) * min_points_per_centroid);
+    }
+
+
+    if (nx == k) {
+        if (verbose) {
+            printf("Number of training points (%ld) same as number of "
+                   "clusters, just copying\n", nx);
+        }
+        // this is a corner case, just copy training set to clusters
+        centroids.resize (d * k);
+        memcpy (centroids.data(), x_in, sizeof (*x_in) * d * k);
+        index.reset();
+        index.add(k, x_in);
+        return;
+    }
+
+
+    if (verbose)
+        printf("Clustering %d points in %ldD to %ld clusters, "
+               "redo %d times, %d iterations\n",
+               int(nx), d, k, nredo, niter);
+
+    idx_t * assign = new idx_t[nx];
+    ScopeDeleter<idx_t> del (assign);
+    float * dis = new float[nx];
+    ScopeDeleter<float> del2(dis);
+
+    // for redo
+    float best_err = HUGE_VALF;
+    std::vector<float> best_obj;
+    std::vector<float> best_centroids;
+
+    // support input centroids
+
+    FAISS_THROW_IF_NOT_MSG (
+       centroids.size() % d == 0,
+       "size of provided input centroids not a multiple of dimension");
+
+    size_t n_input_centroids = centroids.size() / d;
+
+    if (verbose && n_input_centroids > 0) {
+        printf ("  Using %zd centroids provided as input (%sfrozen)\n",
+                n_input_centroids, frozen_centroids ? "" : "not ");
+    }
+
+    double t_search_tot = 0;
+    if (verbose) {
+        printf("  Preprocessing in %.2f s\n",
+               (getmillisecs() - t0) / 1000.);
+    }
+    t0 = getmillisecs();
+
+    for (int redo = 0; redo < nredo; redo++) {
+
+        if (verbose && nredo > 1) {
+            printf("Outer iteration %d / %d\n", redo, nredo);
+        }
+
+        // initialize remaining centroids with random points from the dataset
+        centroids.resize (d * k);
+        std::vector<int> perm (nx);
+
+        rand_perm (perm.data(), nx, seed + 1 + redo * 15486557L);
+        for (int i = n_input_centroids; i < k ; i++)
+            memcpy (&centroids[i * d], x + perm[i] * d,
+                    d * sizeof (float));
+
+        post_process_centroids ();
+
+        if (index.ntotal != 0) {
+            index.reset();
+        }
+
+        if (!index.is_trained) {
+            index.train (k, centroids.data());
+        }
+
+        index.add (k, centroids.data());
+        float err = 0;
+        for (int i = 0; i < niter; i++) {
+            double t0s = getmillisecs();
+            index.search (nx, x, 1, dis, assign);
+            InterruptCallback::check();
+            t_search_tot += getmillisecs() - t0s;
+
+            err = 0;
+            for (int j = 0; j < nx; j++)
+                err += dis[j];
+            obj.push_back (err);
+
+            int nsplit = km_update_centroids (
+                  x, centroids.data(),
+                  assign, d, k, nx, frozen_centroids ? n_input_centroids : 0);
+
+            if (verbose) {
+                printf ("  Iteration %d (%.2f s, search %.2f s): "
+                        "objective=%g imbalance=%.3f nsplit=%d       \r",
+                        i, (getmillisecs() - t0) / 1000.0,
+                        t_search_tot / 1000,
+                        err, imbalance_factor (nx, k, assign),
+                        nsplit);
+                fflush (stdout);
+            }
+
+            post_process_centroids ();
+
+            index.reset ();
+            if (update_index)
+                index.train (k, centroids.data());
+
+            assert (index.ntotal == 0);
+            index.add (k, centroids.data());
+            InterruptCallback::check ();
+        }
+        if (verbose) printf("\n");
+        if (nredo > 1) {
+            if (err < best_err) {
+                if (verbose)
+                    printf ("Objective improved: keep new clusters\n");
+                best_centroids = centroids;
+                best_obj = obj;
+                best_err = err;
+            }
+            index.reset ();
+        }
+    }
+    if (nredo > 1) {
+        centroids = best_centroids;
+        obj = best_obj;
+        index.reset();
+        index.add(k, best_centroids.data());
+    }
+
+}
+
+float kmeans_clustering (size_t d, size_t n, size_t k,
+                         const float *x,
+                         float *centroids)
+{
+    Clustering clus (d, k);
+    clus.verbose = d * n * k > (1L << 30);
+    // display logs if > 1Gflop per iteration
+    IndexFlatL2 index (d);
+    clus.train (n, x, index);
+    memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
+    return clus.obj.back();
+}
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/Clustering.h
+++ b/core/src/index/thirdparty/faiss/Clustering.h
@ -0,0 +1,101 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_CLUSTERING_H
+#define FAISS_CLUSTERING_H
+#include <faiss/Index.h>
+
+#include <vector>
+
+namespace faiss {
+
+
+/** Class for the clustering parameters. Can be passed to the
+ * constructor of the Clustering object.
+ */
+struct ClusteringParameters {
+    int niter;          ///< clustering iterations
+    int nredo;          ///< redo clustering this many times and keep best
+
+    bool verbose;
+    bool spherical;     ///< do we want normalized centroids?
+    bool int_centroids; ///< round centroids coordinates to integer
+    bool update_index;  ///< update index after each iteration?
+    bool frozen_centroids;  ///< use the centroids provided as input and do not change them during iterations
+
+    int min_points_per_centroid; ///< otherwise you get a warning
+    int max_points_per_centroid;  ///< to limit size of dataset
+
+    int seed; ///< seed for the random number generator
+
+    /// sets reasonable defaults
+    ClusteringParameters ();
+};
+
+
+/** clustering based on assignment - centroid update iterations
+ *
+ * The clustering is based on an Index object that assigns training
+ * points to the centroids. Therefore, at each iteration the centroids
+ * are added to the index.
+ *
+ * On output, the centoids table is set to the latest version
+ * of the centroids and they are also added to the index. If the
+ * centroids table it is not empty on input, it is also used for
+ * initialization.
+ *
+ * To do several clusterings, just call train() several times on
+ * different training sets, clearing the centroid table in between.
+ */
+struct Clustering: ClusteringParameters {
+    typedef Index::idx_t idx_t;
+    size_t d;              ///< dimension of the vectors
+    size_t k;              ///< nb of centroids
+
+    /// centroids (k * d)
+    std::vector<float> centroids;
+
+    /// objective values (sum of distances reported by index) over
+    /// iterations
+    std::vector<float> obj;
+
+    /// the only mandatory parameters are k and d
+    Clustering (int d, int k);
+    Clustering (int d, int k, const ClusteringParameters &cp);
+
+    /// Index is used during the assignment stage
+    virtual void train (idx_t n, const float * x, faiss::Index & index);
+
+    /// Post-process the centroids after each centroid update.
+    /// includes optional L2 normalization and nearest integer rounding
+    void post_process_centroids ();
+
+    virtual ~Clustering() {}
+};
+
+
+/** simplified interface
+ *
+ * @param d dimension of the data
+ * @param n nb of training vectors
+ * @param k nb of output centroids
+ * @param x training set (size n * d)
+ * @param centroids output centroids (size k * d)
+ * @return final quantization error
+ */
+float kmeans_clustering (size_t d, size_t n, size_t k,
+                         const float *x,
+                         float *centroids);
+
+
+
+}
+
+
+#endif
--- a/core/src/index/thirdparty/faiss/Dockerfile
+++ b/core/src/index/thirdparty/faiss/Dockerfile
@ -0,0 +1,29 @@
+FROM nvidia/cuda:8.0-devel-centos7
+
+# Install MKL
+RUN yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo
+RUN rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
+RUN yum install -y intel-mkl-2019.3-062
+ENV LD_LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
+ENV LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LIBRARY_PATH
+ENV LD_PRELOAD /usr/lib64/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\
+/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:\
+/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so
+
+# Install necessary build tools
+RUN yum install -y gcc-c++ make swig3
+
+# Install necesary headers/libs
+RUN yum install -y python-devel numpy
+
+COPY . /opt/faiss
+
+WORKDIR /opt/faiss
+
+# --with-cuda=/usr/local/cuda-8.0 
+RUN ./configure --prefix=/usr --libdir=/usr/lib64 --without-cuda
+RUN make -j $(nproc)
+RUN make -C python
+RUN make test
+RUN make install
+RUN make -C demos demo_ivfpq_indexing && ./demos/demo_ivfpq_indexing
--- a/core/src/index/thirdparty/faiss/INSTALL.md
+++ b/core/src/index/thirdparty/faiss/INSTALL.md
@ -0,0 +1,353 @@
+[//]: # "**********************************************************"
+[//]: # "** INSTALL file for Faiss (Fair AI Similarity Search    **"
+[//]: # "**********************************************************"
+
+INSTALL file for Faiss (Fair AI Similarity Search)
+==================================================
+
+Install via Conda
+-----------------
+
+The easiest way to install FAISS is from Anaconda. We regularly push stable releases to the pytorch conda channel.
+
+Currently we support faiss-cpu both on Linux and OSX. We also provide faiss-gpu compiled with CUDA8/CUDA9/CUDA10 on Linux systems.
+
+You can easily install it by
+
+```
+# CPU version only
+conda install faiss-cpu -c pytorch
+
+# GPU version
+conda install faiss-gpu cudatoolkit=8.0 -c pytorch # For CUDA8
+conda install faiss-gpu cudatoolkit=9.0 -c pytorch # For CUDA9
+conda install faiss-gpu cudatoolkit=10.0 -c pytorch # For CUDA10
+```
+
+Compile from source
+-------------------
+
+The Faiss compilation works in 2 steps:
+
+1. compile the C++ core and examples
+
+2. compile the Python interface
+
+Steps 2 depends on 1.
+
+It is also possible to build a pure C interface. This optional process is
+described separately (please see the [C interface installation file](c_api/INSTALL.md))
+
+General compilation instructions
+================================
+
+TL;DR: `./configure && make (&& make install)` for the C++ library, and then `cd python; make && make install` for the python interface.
+
+1. `./configure`
+
+This generates the system-dependent configuration for the `Makefile`, stored in
+a file called `makefile.inc`.
+
+A few useful options:
+- `./configure --without-cuda` in order to build the CPU part only.
+- `./configure --with-cuda=/path/to/cuda-10.1` in order to hint to the path of
+the cudatoolkit.
+- `./configure --with-cuda-arch="-gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_72,code=sm_72"` for specifying which GPU architectures to build against.
+- `./configure --with-python=/path/to/python3.7` in order to build a python
+interface for a different python than the default one.
+- `LDFLAGS=-L/path_to_mkl/lib/ ./configure` so that configure detects the MKL BLAS imeplementation. Note that this may require to set the LD_LIBRARY_PATH at runtime.
+
+2. `make`
+
+This builds the C++ library (the whole library if a suitable cuda toolkit was
+found, or the CPU part only otherwise).
+
+3. `make install` (optional)
+
+This installs the headers and libraries.
+
+4. `make -C python` (or `make py`)
+
+This builds the python interface.
+
+5. `make -C python install`
+
+This installs the python library.
+
+
+Faiss has been tested only on x86_64 machines on Linux and Mac OS.
+
+Faiss requires a C++ compiler that understands:
+- the Intel intrinsics for SSE instructions,
+- the GCC intrinsic for the popcount instruction,
+- basic OpenMP.
+
+There are a few examples for makefile.inc in the example_makefiles/
+subdirectory. There are also indications for specific configurations in the
+troubleshooting section of the wiki.
+
+https://github.com/facebookresearch/faiss/wiki/Troubleshooting
+
+Faiss comes as a .a archive, that can be linked with executables or
+dynamic libraries (useful for the Python wrapper).
+
+
+BLAS/Lapack
+-----------
+
+The only variables that need to be configured for the C++ Faiss are
+the BLAS/Lapack flags (a linear aglebra software package). It needs a
+flag telling whether BLAS/Lapack uses 32 or 64 bit integers and the
+linking flags. Faiss uses the Fortran 77 interface of BLAS/Lapack and
+thus does not need an include path.
+
+There are several BLAS implementations, depending on the OS and
+machine. To have reasonable performance, the BLAS library should be
+multithreaded. See the example makefile.inc's for hints and examples
+on how to set the flags, or simply run the configure script:
+
+   `./configure`
+
+To check that the link flags are correct, and verify whether the
+implementation uses 32 or 64 bit integers, you can
+
+  `make misc/test_blas`
+
+and run
+
+  `./misc/test_blas`
+
+
+Testing Faiss
+-------------
+
+A basic usage example is in
+
+  `demos/demo_ivfpq_indexing`
+
+which you can build by calling
+  `make -C demos demo_ivfpq_indexing`
+
+It makes a small index, stores it and performs some searches. A normal
+runtime is around 20s. With a fast machine and Intel MKL's BLAS it
+runs in 2.5s.
+
+To run the whole test suite:
+
+   `make test` (for the CPU part)
+
+   `make test_gpu` (for the GPU part)
+
+
+A real-life benchmark
+---------------------
+
+A bit longer example runs and evaluates Faiss on the SIFT1M
+dataset. To run it, please download the ANN_SIFT1M dataset from
+
+http://corpus-texmex.irisa.fr/
+
+and unzip it to the subdirectory `sift1M` at the root of the source
+directory for this repository.
+
+Then compile and run the following (after ensuring you have installed faiss):
+
+```
+make demos
+./demos/demo_sift1M
+```
+
+This is a demonstration of the high-level auto-tuning API. You can try
+setting a different index_key to find the indexing structure that
+gives the best performance.
+
+
+The Python interface
+======================================
+
+The Python interface is compiled with
+
+  `make -C python` (or `make py`)
+
+How it works
+------------
+
+The Python interface is provided via SWIG (Simple Wrapper and
+Interface Generator) and an additional level of manual wrappers (in python/faiss.py).
+
+SWIG generates two wrapper files: a Python file (`python/swigfaiss.py`) and a
+C++ file that must be compiled to a dynamic library (`python/_swigfaiss.so`).
+
+Testing the Python wrapper
+--------------------------
+
+Often, a successful compile does not mean that the library works,
+because missing symbols are detected only at runtime. You should be
+able to load the Faiss dynamic library:
+
+  `python -c "import faiss"`
+
+In case of failure, it reports the first missing symbol. To see all
+missing symbols (on Linux), use
+
+  `ldd -r _swigfaiss.so`
+
+Sometimes, problems (eg with BLAS libraries) appear only when actually
+calling a BLAS function. A simple way to check this
+
+```python
+python -c "import faiss, numpy
+faiss.Kmeans(10, 20).train(numpy.random.rand(1000, 10).astype('float32'))
+```
+
+
+Real-life test
+--------------
+
+The following script extends the demo_sift1M test to several types of
+indexes.  This must be run from the root of the source directory for this
+repository:
+
+```
+mkdir tmp             # graphs of the output will be written here
+PYTHONPATH=. python demos/demo_auto_tune.py
+```
+
+It will cycle through a few types of indexes and find optimal
+operating points. You can play around with the types of indexes.
+
+
+Step 3: Compiling the GPU implementation
+========================================
+
+The GPU version is a superset of the CPU version. In addition it
+requires the cuda compiler and related libraries (Cublas)
+
+The nvcc-specific flags to pass to the compiler, based on your desired
+compute capability can be customized by providing the `--with-cuda-arch` to
+`./configure`. Only compute capability 3.5+ is supported. For example, we enable
+by default:
+
+```
+-gencode=arch=compute_35,code=compute_35
+-gencode=arch=compute_52,code=compute_52
+-gencode=arch=compute_60,code=compute_60
+-gencode=arch=compute_61,code=compute_61
+-gencode=arch=compute_70,code=compute_70
+-gencode=arch=compute_75,code=compute_75
+```
+
+However, look at https://developer.nvidia.com/cuda-gpus to determine
+what compute capability you need to use, and replace our gencode
+specifications with the one(s) you need.
+
+Most other flags are related to the C++11 compiler used by nvcc to
+complile the actual C++ code. They are normally just transmitted by
+nvcc, except some of them that are not recognized and that should be
+escaped by prefixing them with -Xcompiler. Also link flags that are
+prefixed with -Wl, should be passed with -Xlinker.
+
+You may want to add `-j 10` to use 10 threads during compile.
+
+Testing the GPU implementation
+------------------------------
+
+Compile the example with
+
+  `make -C gpu/test demo_ivfpq_indexing_gpu`
+
+This produce the GPU code equivalent to the CPU
+demo_ivfpq_indexing. It also shows how to translate indexed from/to
+the GPU.
+
+
+Python example with GPU support
+-------------------------------
+
+The auto-tuning example above also runs on the GPU. Edit
+`demos/demo_auto_tune.py` at line 100 with the values
+
+```python
+keys_to_test = keys_gpu
+use_gpu = True
+```
+
+and you can run
+
+```
+export PYTHONPATH=.
+python demos/demo_auto_tune.py
+```
+
+to test the GPU code.
+
+
+Docker instructions
+===================
+
+For using GPU capabilities of Faiss, you'll need to run "nvidia-docker"
+rather than "docker". Make sure that docker
+(https://docs.docker.com/engine/installation/) and nvidia-docker
+(https://github.com/NVIDIA/nvidia-docker) are installed on your system
+
+To build the "faiss" image, run
+
+  `nvidia-docker build -t faiss .`
+
+or if you don't want/need to clone the sources, just run
+
+  `nvidia-docker build -t faiss github.com/facebookresearch/faiss`
+
+If you want to run the tests during the docker build, uncomment the
+last 3 "RUN" steps in the Dockerfile. But you might want to run the
+tests by yourself, so just run
+
+  `nvidia-docker run -ti --name faiss faiss bash`
+
+and run what you want. If you need a dataset (like sift1M), download it
+inside the created container, or better, mount a directory from the host
+
+  nvidia-docker run -ti --name faiss -v /my/host/data/folder/ann_dataset/sift/:/opt/faiss/sift1M faiss bash
+
+
+How to use Faiss in your own projects
+=====================================
+
+C++
+---
+
+The makefile generates a static and a dynamic library
+
+```
+libfaiss.a
+libfaiss.so (or libfaiss.dylib)
+```
+
+the executable should be linked to one of these. If you use
+the static version (.a), add the LDFLAGS used in the Makefile.
+
+For binary-only distributions, the headers should be under
+a `faiss/` directory, so that they can be included as
+
+```c++
+#include <faiss/IndexIVFPQ.h>
+#include <faiss/gpu/GpuIndexFlat.h>
+```
+
+Python
+------
+
+To import Faiss in your own Python project, you need the files
+
+```
+__init__.py
+swigfaiss.py
+_swigfaiss.so
+```
+to be present in a `faiss/` directory visible in the PYTHONPATH or in the
+current directory.
+Then Faiss can be used in python with
+
+```python
+import faiss
+```
--- a/core/src/index/thirdparty/faiss/IVFlib.cpp
+++ b/core/src/index/thirdparty/faiss/IVFlib.cpp
@ -0,0 +1,344 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IVFlib.h>
+
+#include <memory>
+
+#include <faiss/IndexPreTransform.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/utils.h>
+
+
+namespace faiss { namespace ivflib {
+
+
+void check_compatible_for_merge (const Index * index0,
+                                 const Index * index1)
+{
+
+    const faiss::IndexPreTransform *pt0 =
+        dynamic_cast<const faiss::IndexPreTransform *>(index0);
+
+    if (pt0) {
+        const faiss::IndexPreTransform *pt1 =
+            dynamic_cast<const faiss::IndexPreTransform *>(index1);
+        FAISS_THROW_IF_NOT_MSG (pt1, "both indexes should be pretransforms");
+
+        FAISS_THROW_IF_NOT (pt0->chain.size() == pt1->chain.size());
+        for (int i = 0; i < pt0->chain.size(); i++) {
+            FAISS_THROW_IF_NOT (typeid(pt0->chain[i]) == typeid(pt1->chain[i]));
+        }
+
+        index0 = pt0->index;
+        index1 = pt1->index;
+    }
+    FAISS_THROW_IF_NOT (typeid(index0) == typeid(index1));
+    FAISS_THROW_IF_NOT (index0->d == index1->d &&
+                        index0->metric_type == index1->metric_type);
+
+    const faiss::IndexIVF *ivf0 = dynamic_cast<const faiss::IndexIVF *>(index0);
+    if (ivf0) {
+        const faiss::IndexIVF *ivf1 =
+            dynamic_cast<const faiss::IndexIVF *>(index1);
+        FAISS_THROW_IF_NOT (ivf1);
+
+        ivf0->check_compatible_for_merge (*ivf1);
+    }
+
+    // TODO: check as thoroughfully for other index types
+
+}
+
+const IndexIVF * extract_index_ivf (const Index * index)
+{
+    if (auto *pt =
+        dynamic_cast<const IndexPreTransform *>(index)) {
+        index = pt->index;
+    }
+
+    auto *ivf = dynamic_cast<const IndexIVF *>(index);
+
+    FAISS_THROW_IF_NOT (ivf);
+
+    return ivf;
+}
+
+IndexIVF * extract_index_ivf (Index * index) {
+    return const_cast<IndexIVF*> (extract_index_ivf ((const Index*)(index)));
+}
+
+void merge_into(faiss::Index *index0, faiss::Index *index1, bool shift_ids) {
+
+    check_compatible_for_merge (index0, index1);
+    IndexIVF * ivf0 = extract_index_ivf (index0);
+    IndexIVF * ivf1 = extract_index_ivf (index1);
+
+    ivf0->merge_from (*ivf1, shift_ids ? ivf0->ntotal : 0);
+
+    // useful for IndexPreTransform
+    index0->ntotal = ivf0->ntotal;
+    index1->ntotal = ivf1->ntotal;
+}
+
+
+
+void search_centroid(faiss::Index *index,
+                     const float* x, int n,
+                     idx_t* centroid_ids)
+{
+    std::unique_ptr<float[]> del;
+    if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
+        x = index_pre->apply_chain(n, x);
+        del.reset((float*)x);
+        index = index_pre->index;
+    }
+    faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
+    assert(index_ivf);
+    index_ivf->quantizer->assign(n, x, centroid_ids);
+}
+
+
+
+void search_and_return_centroids(faiss::Index *index,
+                                 size_t n,
+                                 const float* xin,
+                                 long k,
+                                 float *distances,
+                                 idx_t* labels,
+                                 idx_t* query_centroid_ids,
+                                 idx_t* result_centroid_ids)
+{
+    const float *x = xin;
+    std::unique_ptr<float []> del;
+    if (auto index_pre = dynamic_cast<faiss::IndexPreTransform*>(index)) {
+        x = index_pre->apply_chain(n, x);
+        del.reset((float*)x);
+        index = index_pre->index;
+    }
+    faiss::IndexIVF* index_ivf = dynamic_cast<faiss::IndexIVF*>(index);
+    assert(index_ivf);
+
+    size_t nprobe = index_ivf->nprobe;
+    std::vector<idx_t> cent_nos (n * nprobe);
+    std::vector<float> cent_dis (n * nprobe);
+    index_ivf->quantizer->search(
+        n, x, nprobe, cent_dis.data(), cent_nos.data());
+
+    if (query_centroid_ids) {
+        for (size_t i = 0; i < n; i++)
+            query_centroid_ids[i] = cent_nos[i * nprobe];
+    }
+
+    index_ivf->search_preassigned (n, x, k,
+                                   cent_nos.data(), cent_dis.data(),
+                                   distances, labels, true);
+
+    for (size_t i = 0; i < n * k; i++) {
+        idx_t label = labels[i];
+        if (label < 0) {
+            if (result_centroid_ids)
+                result_centroid_ids[i] = -1;
+        } else {
+            long list_no = label >> 32;
+            long list_index = label & 0xffffffff;
+            if (result_centroid_ids)
+                result_centroid_ids[i] = list_no;
+            labels[i] = index_ivf->invlists->get_single_id(list_no, list_index);
+        }
+    }
+}
+
+
+SlidingIndexWindow::SlidingIndexWindow (Index *index): index (index) {
+    n_slice = 0;
+    IndexIVF* index_ivf = const_cast<IndexIVF*>(extract_index_ivf (index));
+    ils = dynamic_cast<ArrayInvertedLists *> (index_ivf->invlists);
+    nlist = ils->nlist;
+    FAISS_THROW_IF_NOT_MSG (ils,
+               "only supports indexes with ArrayInvertedLists");
+    sizes.resize(nlist);
+}
+
+template<class T>
+static void shift_and_add (std::vector<T> & dst,
+                           size_t remove,
+                           const std::vector<T> & src)
+{
+    if (remove > 0)
+        memmove (dst.data(), dst.data() + remove,
+                 (dst.size() - remove) * sizeof (T));
+    size_t insert_point = dst.size() - remove;
+    dst.resize (insert_point + src.size());
+    memcpy (dst.data() + insert_point, src.data (), src.size() * sizeof(T));
+}
+
+template<class T>
+static void remove_from_begin (std::vector<T> & v,
+                               size_t remove)
+{
+    if (remove > 0)
+        v.erase (v.begin(), v.begin() + remove);
+}
+
+void SlidingIndexWindow::step(const Index *sub_index, bool remove_oldest) {
+
+    FAISS_THROW_IF_NOT_MSG (!remove_oldest || n_slice > 0,
+                            "cannot remove slice: there is none");
+
+    const ArrayInvertedLists *ils2 = nullptr;
+    if(sub_index) {
+        check_compatible_for_merge (index, sub_index);
+        ils2 = dynamic_cast<const ArrayInvertedLists*>(
+                                   extract_index_ivf (sub_index)->invlists);
+        FAISS_THROW_IF_NOT_MSG (ils2, "supports only ArrayInvertedLists");
+    }
+    IndexIVF *index_ivf = extract_index_ivf (index);
+
+    if (remove_oldest && ils2) {
+        for (int i = 0; i < nlist; i++) {
+            std::vector<size_t> & sizesi = sizes[i];
+            size_t amount_to_remove = sizesi[0];
+            index_ivf->ntotal += ils2->ids[i].size() - amount_to_remove;
+
+            shift_and_add (ils->ids[i], amount_to_remove, ils2->ids[i]);
+            shift_and_add (ils->codes[i], amount_to_remove * ils->code_size,
+                           ils2->codes[i]);
+            for (int j = 0; j + 1 < n_slice; j++) {
+                sizesi[j] = sizesi[j + 1] - amount_to_remove;
+            }
+            sizesi[n_slice - 1] = ils->ids[i].size();
+        }
+    } else if (ils2) {
+        for (int i = 0; i < nlist; i++) {
+            index_ivf->ntotal += ils2->ids[i].size();
+            shift_and_add (ils->ids[i], 0, ils2->ids[i]);
+            shift_and_add (ils->codes[i], 0, ils2->codes[i]);
+            sizes[i].push_back(ils->ids[i].size());
+        }
+        n_slice++;
+    } else if (remove_oldest) {
+        for (int i = 0; i < nlist; i++) {
+            size_t amount_to_remove = sizes[i][0];
+            index_ivf->ntotal -= amount_to_remove;
+            remove_from_begin (ils->ids[i], amount_to_remove);
+            remove_from_begin (ils->codes[i],
+                               amount_to_remove * ils->code_size);
+            for (int j = 0; j + 1 < n_slice; j++) {
+                sizes[i][j] = sizes[i][j + 1] - amount_to_remove;
+            }
+            sizes[i].pop_back ();
+        }
+        n_slice--;
+    } else {
+        FAISS_THROW_MSG ("nothing to do???");
+    }
+    index->ntotal = index_ivf->ntotal;
+}
+
+
+
+// Get a subset of inverted lists [i0, i1). Works on IndexIVF's and
+// IndexIVF's embedded in a IndexPreTransform
+
+ArrayInvertedLists *
+get_invlist_range (const Index *index, long i0, long i1)
+{
+    const IndexIVF *ivf = extract_index_ivf (index);
+
+    FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
+
+    const InvertedLists *src = ivf->invlists;
+
+    ArrayInvertedLists * il = new ArrayInvertedLists(i1 - i0, src->code_size);
+
+    for (long i = i0; i < i1; i++) {
+        il->add_entries(i - i0, src->list_size(i),
+                        InvertedLists::ScopedIds (src, i).get(),
+                        InvertedLists::ScopedCodes (src, i).get());
+    }
+    return il;
+}
+
+
+
+void set_invlist_range (Index *index, long i0, long i1,
+                        ArrayInvertedLists * src)
+{
+    IndexIVF *ivf = extract_index_ivf (index);
+
+    FAISS_THROW_IF_NOT (0 <= i0 && i0 <= i1 && i1 <= ivf->nlist);
+
+    ArrayInvertedLists *dst = dynamic_cast<ArrayInvertedLists *>(ivf->invlists);
+    FAISS_THROW_IF_NOT_MSG (dst, "only ArrayInvertedLists supported");
+    FAISS_THROW_IF_NOT (src->nlist == i1 - i0 &&
+                        dst->code_size == src->code_size);
+
+    size_t ntotal = index->ntotal;
+    for (long i = i0 ; i < i1; i++) {
+        ntotal -= dst->list_size (i);
+        ntotal += src->list_size (i - i0);
+        std::swap (src->codes[i - i0], dst->codes[i]);
+        std::swap (src->ids[i - i0], dst->ids[i]);
+    }
+    ivf->ntotal = index->ntotal = ntotal;
+}
+
+
+void search_with_parameters (const Index *index,
+                             idx_t n, const float *x, idx_t k,
+                             float *distances, idx_t *labels,
+                             IVFSearchParameters *params,
+                             size_t *nb_dis_ptr)
+{
+    FAISS_THROW_IF_NOT (params);
+    const float *prev_x = x;
+    ScopeDeleter<float> del;
+
+    if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
+        x = ip->apply_chain (n, x);
+        if (x != prev_x) {
+            del.set(x);
+        }
+        index = ip->index;
+    }
+
+    std::vector<idx_t> Iq(params->nprobe * n);
+    std::vector<float> Dq(params->nprobe * n);
+
+    const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
+    FAISS_THROW_IF_NOT (index_ivf);
+
+    double t0 = getmillisecs();
+    index_ivf->quantizer->search(n, x, params->nprobe,
+                                 Dq.data(), Iq.data());
+    double t1 = getmillisecs();
+    indexIVF_stats.quantization_time += t1 - t0;
+
+    if (nb_dis_ptr) {
+        size_t nb_dis = 0;
+        const InvertedLists *il = index_ivf->invlists;
+        for (idx_t i = 0; i < n * params->nprobe; i++) {
+          if (Iq[i] >= 0) {
+              nb_dis += il->list_size(Iq[i]);
+          }
+        }
+        *nb_dis_ptr = nb_dis;
+    }
+
+    index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
+                                  distances, labels,
+                                  false, params);
+    double t2 = getmillisecs();
+    indexIVF_stats.search_time += t2 - t1;
+}
+
+
+
+} } // namespace faiss::ivflib
--- a/core/src/index/thirdparty/faiss/IVFlib.h
+++ b/core/src/index/thirdparty/faiss/IVFlib.h
@ -0,0 +1,132 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_IVFLIB_H
+#define FAISS_IVFLIB_H
+
+/** Since IVF (inverted file) indexes are of so much use for
+ * large-scale use cases, we group a few functions related to them in
+ * this small library. Most functions work both on IndexIVFs and
+ * IndexIVFs embedded within an IndexPreTransform.
+ */
+
+#include <vector>
+#include <faiss/IndexIVF.h>
+
+namespace faiss { namespace ivflib {
+
+
+/** check if two indexes have the same parameters and are trained in
+ * the same way, otherwise throw. */
+void check_compatible_for_merge (const Index * index1,
+                                 const Index * index2);
+
+/** get an IndexIVF from an index. The index may be an IndexIVF or
+ * some wrapper class that encloses an IndexIVF
+ *
+ * throws an exception if this is not the case.
+ */
+const IndexIVF * extract_index_ivf (const Index * index);
+IndexIVF * extract_index_ivf (Index * index);
+
+/** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
+ *  embedded in a IndexPreTransform. On output, the index1 is empty.
+ *
+ * @param shift_ids: translate the ids from index1 to index0->prev_ntotal
+ */
+void merge_into(Index *index0, Index *index1, bool shift_ids);
+
+typedef Index::idx_t idx_t;
+
+/* Returns the cluster the embeddings belong to.
+ *
+ * @param index      Index, which should be an IVF index
+ *                   (otherwise there are no clusters)
+ * @param embeddings object descriptors for which the centroids should be found,
+ *                   size num_objects * d
+ * @param centroid_ids
+ *                   cluster id each object belongs to, size num_objects
+ */
+void search_centroid(Index *index,
+                     const float* x, int n,
+                     idx_t* centroid_ids);
+
+/* Returns the cluster the embeddings belong to.
+ *
+ * @param index      Index, which should be an IVF index
+ *                   (otherwise there are no clusters)
+ * @param query_centroid_ids
+ *                   centroid ids corresponding to the query vectors (size n)
+ * @param result_centroid_ids
+ *                   centroid ids corresponding to the results (size n * k)
+ * other arguments are the same as the standard search function
+ */
+void search_and_return_centroids(Index *index,
+                                 size_t n,
+                                 const float* xin,
+                                 long k,
+                                 float *distances,
+                                 idx_t* labels,
+                                 idx_t* query_centroid_ids,
+                                 idx_t* result_centroid_ids);
+
+
+/** A set of IndexIVFs concatenated together in a FIFO fashion.
+ * at each "step", the oldest index slice is removed and a new index is added.
+ */
+struct SlidingIndexWindow {
+    /// common index that contains the sliding window
+    Index * index;
+
+    /// InvertedLists of index
+    ArrayInvertedLists *ils;
+
+    /// number of slices currently in index
+    int n_slice;
+
+    /// same as index->nlist
+    size_t nlist;
+
+    /// cumulative list sizes at each slice
+    std::vector<std::vector<size_t> > sizes;
+
+    /// index should be initially empty and trained
+    SlidingIndexWindow (Index *index);
+
+    /** Add one index to the current index and remove the oldest one.
+     *
+     * @param sub_index        slice to swap in (can be NULL)
+     * @param remove_oldest    if true, remove the oldest slices */
+    void step(const Index *sub_index, bool remove_oldest);
+
+};
+
+
+/// Get a subset of inverted lists [i0, i1)
+ArrayInvertedLists * get_invlist_range (const Index *index,
+                                        long i0, long i1);
+
+/// Set a subset of inverted lists
+void set_invlist_range (Index *index, long i0, long i1,
+                        ArrayInvertedLists * src);
+
+// search an IndexIVF, possibly embedded in an IndexPreTransform with
+// given parameters. Optionally returns the number of distances
+// computed
+void search_with_parameters (const Index *index,
+                             idx_t n, const float *x, idx_t k,
+                             float *distances, idx_t *labels,
+                             IVFSearchParameters *params,
+                             size_t *nb_dis = nullptr);
+
+
+
+} } // namespace faiss::ivflib
+
+#endif
--- a/core/src/index/thirdparty/faiss/Index.cpp
+++ b/core/src/index/thirdparty/faiss/Index.cpp
@ -0,0 +1,171 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/Index.h>
+
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/distances.h>
+
+#include <cstring>
+
+
+namespace faiss {
+
+Index::~Index ()
+{
+}
+
+
+void Index::train(idx_t /*n*/, const float* /*x*/) {
+    // does nothing by default
+}
+
+
+void Index::range_search (idx_t , const float *, float,
+                          RangeSearchResult *) const
+{
+  FAISS_THROW_MSG ("range search not implemented");
+}
+
+void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k)
+{
+  float * distances = new float[n * k];
+  ScopeDeleter<float> del(distances);
+  search (n, x, k, distances, labels);
+}
+
+void Index::add_with_ids(
+    idx_t /*n*/,
+    const float* /*x*/,
+    const idx_t* /*xids*/) {
+  FAISS_THROW_MSG ("add_with_ids not implemented for this type of index");
+}
+
+size_t Index::remove_ids(const IDSelector& /*sel*/) {
+  FAISS_THROW_MSG ("remove_ids not implemented for this type of index");
+  return -1;
+}
+
+
+void Index::reconstruct (idx_t, float * ) const {
+  FAISS_THROW_MSG ("reconstruct not implemented for this type of index");
+}
+
+
+void Index::reconstruct_n (idx_t i0, idx_t ni, float *recons) const {
+  for (idx_t i = 0; i < ni; i++) {
+    reconstruct (i0 + i, recons + i * d);
+  }
+}
+
+
+void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                    float *distances, idx_t *labels,
+                                    float *recons) const {
+  search (n, x, k, distances, labels);
+  for (idx_t i = 0; i < n; ++i) {
+    for (idx_t j = 0; j < k; ++j) {
+      idx_t ij = i * k + j;
+      idx_t key = labels[ij];
+      float* reconstructed = recons + ij * d;
+      if (key < 0) {
+        // Fill with NaNs
+        memset(reconstructed, -1, sizeof(*reconstructed) * d);
+      } else {
+        reconstruct (key, reconstructed);
+      }
+    }
+  }
+}
+
+void Index::compute_residual (const float * x,
+                              float * residual, idx_t key) const {
+  reconstruct (key, residual);
+  for (size_t i = 0; i < d; i++) {
+    residual[i] = x[i] - residual[i];
+  }
+}
+
+void Index::compute_residual_n (idx_t n, const float* xs,
+                                float* residuals,
+                                const idx_t* keys) const {
+#pragma omp parallel for
+  for (idx_t i = 0; i < n; ++i) {
+    compute_residual(&xs[i * d], &residuals[i * d], keys[i]);
+  }
+}
+
+
+
+size_t Index::sa_code_size () const
+{
+    FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
+}
+
+void Index::sa_encode (idx_t, const float *,
+                             uint8_t *) const
+{
+    FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
+}
+
+void Index::sa_decode (idx_t, const uint8_t *,
+                            float *) const
+{
+    FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
+}
+
+
+namespace {
+
+
+// storage that explicitly reconstructs vectors before computing distances
+struct GenericDistanceComputer : DistanceComputer {
+  size_t d;
+  const Index& storage;
+  std::vector<float> buf;
+  const float *q;
+
+  explicit GenericDistanceComputer(const Index& storage)
+      : storage(storage) {
+    d = storage.d;
+    buf.resize(d * 2);
+  }
+
+  float operator () (idx_t i) override {
+    storage.reconstruct(i, buf.data());
+    return fvec_L2sqr(q, buf.data(), d);
+  }
+
+  float symmetric_dis(idx_t i, idx_t j) override {
+    storage.reconstruct(i, buf.data());
+    storage.reconstruct(j, buf.data() + d);
+    return fvec_L2sqr(buf.data() + d, buf.data(), d);
+  }
+
+  void set_query(const float *x) override {
+    q = x;
+  }
+
+};
+
+
+}  // namespace
+
+
+DistanceComputer * Index::get_distance_computer() const {
+    if (metric_type == METRIC_L2) {
+        return new GenericDistanceComputer(*this);
+    } else {
+        FAISS_THROW_MSG ("get_distance_computer() not implemented");
+    }
+}
+
+
+}
--- a/core/src/index/thirdparty/faiss/Index.h
+++ b/core/src/index/thirdparty/faiss/Index.h
@ -0,0 +1,261 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_H
+#define FAISS_INDEX_H
+
+
+#include <cstdio>
+#include <typeinfo>
+#include <string>
+#include <sstream>
+
+#define FAISS_VERSION_MAJOR 1
+#define FAISS_VERSION_MINOR 6
+#define FAISS_VERSION_PATCH 0
+
+/**
+ * @namespace faiss
+ *
+ * Throughout the library, vectors are provided as float * pointers.
+ * Most algorithms can be optimized when several vectors are processed
+ * (added/searched) together in a batch. In this case, they are passed
+ * in as a matrix. When n vectors of size d are provided as float * x,
+ * component j of vector i is
+ *
+ *   x[ i * d + j ]
+ *
+ * where 0 <= i < n and 0 <= j < d. In other words, matrices are
+ * always compact. When specifying the size of the matrix, we call it
+ * an n*d matrix, which implies a row-major storage.
+ */
+
+
+namespace faiss {
+
+
+/// Some algorithms support both an inner product version and a L2 search version.
+enum MetricType {
+    METRIC_INNER_PRODUCT = 0,  ///< maximum inner product search
+    METRIC_L2 = 1,             ///< squared L2 search
+    METRIC_L1,                 ///< L1 (aka cityblock)
+    METRIC_Linf,               ///< infinity distance
+    METRIC_Lp,                 ///< L_p distance, p is given by metric_arg
+
+    /// some additional metrics defined in scipy.spatial.distance
+    METRIC_Canberra = 20,
+    METRIC_BrayCurtis,
+    METRIC_JensenShannon,
+
+};
+
+
+/// Forward declarations see AuxIndexStructures.h
+struct IDSelector;
+struct RangeSearchResult;
+struct DistanceComputer;
+
+/** Abstract structure for an index
+ *
+ * Supports adding vertices and searching them.
+ *
+ * Currently only asymmetric queries are supported:
+ * database-to-database queries are not implemented.
+ */
+struct Index {
+    using idx_t = int64_t;  ///< all indices are this type
+    using component_t = float;
+    using distance_t = float;
+
+    int d;                 ///< vector dimension
+    idx_t ntotal;          ///< total nb of indexed vectors
+    bool verbose;          ///< verbosity level
+
+    /// set if the Index does not require training, or if training is
+    /// done already
+    bool is_trained;
+
+    /// type of metric this index uses for search
+    MetricType metric_type;
+    float metric_arg;     ///< argument of the metric type
+
+    explicit Index (idx_t d = 0, MetricType metric = METRIC_L2):
+                    d(d),
+                    ntotal(0),
+                    verbose(false),
+                    is_trained(true),
+                    metric_type (metric),
+                    metric_arg(0) {}
+
+    virtual ~Index ();
+
+
+    /** Perform training on a representative set of vectors
+     *
+     * @param n      nb of training vectors
+     * @param x      training vecors, size n * d
+     */
+    virtual void train(idx_t n, const float* x);
+
+    /** Add n vectors of dimension d to the index.
+     *
+     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
+     * This function slices the input vectors in chuncks smaller than
+     * blocksize_add and calls add_core.
+     * @param x      input matrix, size n * d
+     */
+    virtual void add (idx_t n, const float *x) = 0;
+
+    /** Same as add, but stores xids instead of sequential ids.
+     *
+     * The default implementation fails with an assertion, as it is
+     * not supported by all indexes.
+     *
+     * @param xids if non-null, ids to store for the vectors (size n)
+     */
+    virtual void add_with_ids (idx_t n, const float * x, const idx_t *xids);
+
+    /** query n vectors of dimension d to the index.
+     *
+     * return at most k vectors. If there are not enough results for a
+     * query, the result array is padded with -1s.
+     *
+     * @param x           input vectors to search, size n * d
+     * @param labels      output labels of the NNs, size n*k
+     * @param distances   output pairwise distances, size n*k
+     */
+    virtual void search (idx_t n, const float *x, idx_t k,
+                         float *distances, idx_t *labels) const = 0;
+
+    /** query n vectors of dimension d to the index.
+     *
+     * return all vectors with distance < radius. Note that many
+     * indexes do not implement the range_search (only the k-NN search
+     * is mandatory).
+     *
+     * @param x           input vectors to search, size n * d
+     * @param radius      search radius
+     * @param result      result table
+     */
+    virtual void range_search (idx_t n, const float *x, float radius,
+                               RangeSearchResult *result) const;
+
+    /** return the indexes of the k vectors closest to the query x.
+     *
+     * This function is identical as search but only return labels of neighbors.
+     * @param x           input vectors to search, size n * d
+     * @param labels      output labels of the NNs, size n*k
+     */
+    void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1);
+
+    /// removes all elements from the database.
+    virtual void reset() = 0;
+
+    /** removes IDs from the index. Not supported by all
+     * indexes. Returns the number of elements removed.
+     */
+    virtual size_t remove_ids (const IDSelector & sel);
+
+    /** Reconstruct a stored vector (or an approximation if lossy coding)
+     *
+     * this function may not be defined for some indexes
+     * @param key         id of the vector to reconstruct
+     * @param recons      reconstucted vector (size d)
+     */
+    virtual void reconstruct (idx_t key, float * recons) const;
+
+    /** Reconstruct vectors i0 to i0 + ni - 1
+     *
+     * this function may not be defined for some indexes
+     * @param recons      reconstucted vector (size ni * d)
+     */
+    virtual void reconstruct_n (idx_t i0, idx_t ni, float *recons) const;
+
+    /** Similar to search, but also reconstructs the stored vectors (or an
+     * approximation in the case of lossy coding) for the search results.
+     *
+     * If there are not enough results for a query, the resulting arrays
+     * is padded with -1s.
+     *
+     * @param recons      reconstructed vectors size (n, k, d)
+     **/
+    virtual void search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                         float *distances, idx_t *labels,
+                                         float *recons) const;
+
+    /** Computes a residual vector after indexing encoding.
+     *
+     * The residual vector is the difference between a vector and the
+     * reconstruction that can be decoded from its representation in
+     * the index. The residual can be used for multiple-stage indexing
+     * methods, like IndexIVF's methods.
+     *
+     * @param x           input vector, size d
+     * @param residual    output residual vector, size d
+     * @param key         encoded index, as returned by search and assign
+     */
+    virtual void compute_residual (const float * x,
+                                   float * residual, idx_t key) const;
+
+    /** Computes a residual vector after indexing encoding (batch form).
+     * Equivalent to calling compute_residual for each vector.
+     *
+     * The residual vector is the difference between a vector and the
+     * reconstruction that can be decoded from its representation in
+     * the index. The residual can be used for multiple-stage indexing
+     * methods, like IndexIVF's methods.
+     *
+     * @param n           number of vectors
+     * @param xs          input vectors, size (n x d)
+     * @param residuals   output residual vectors, size (n x d)
+     * @param keys        encoded index, as returned by search and assign
+     */
+    virtual void compute_residual_n (idx_t n, const float* xs,
+                                     float* residuals,
+                                     const idx_t* keys) const;
+
+    /** Get a DistanceComputer (defined in AuxIndexStructures) object
+     * for this kind of index.
+     *
+     * DistanceComputer is implemented for indexes that support random
+     * access of their vectors.
+     */
+    virtual DistanceComputer * get_distance_computer() const;
+
+
+    /* The standalone codec interface */
+
+    /** size of the produced codes in bytes */
+    virtual size_t sa_code_size () const;
+
+    /** encode a set of vectors
+     *
+     * @param n       number of vectors
+     * @param x       input vectors, size n * d
+     * @param bytes   output encoded vectors, size n * sa_code_size()
+     */
+    virtual void sa_encode (idx_t n, const float *x,
+                                  uint8_t *bytes) const;
+
+    /** encode a set of vectors
+     *
+     * @param n       number of vectors
+     * @param bytes   input encoded vectors, size n * sa_code_size()
+     * @param x       output vectors, size n * d
+     */
+    virtual void sa_decode (idx_t n, const uint8_t *bytes,
+                                    float *x) const;
+
+
+};
+
+}
+
+
+#endif
--- a/core/src/index/thirdparty/faiss/Index2Layer.cpp
+++ b/core/src/index/thirdparty/faiss/Index2Layer.cpp
@ -0,0 +1,437 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/Index2Layer.h>
+
+#include <cmath>
+#include <cstdio>
+#include <cassert>
+#include <stdint.h>
+
+#ifdef __SSE__
+#include <immintrin.h>
+#endif
+
+#include <algorithm>
+
+#include <faiss/IndexIVFPQ.h>
+
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/utils.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/utils/distances.h>
+
+
+/*
+#include <faiss/utils/Heap.h>
+
+#include <faiss/Clustering.h>
+
+#include <faiss/utils/hamming.h>
+
+
+*/
+
+
+namespace faiss {
+
+using idx_t = Index::idx_t;
+
+/*************************************
+ * Index2Layer implementation
+ *************************************/
+
+
+Index2Layer::Index2Layer (Index * quantizer, size_t nlist,
+                          int M, int nbit,
+                          MetricType metric):
+    Index (quantizer->d, metric),
+    q1 (quantizer, nlist),
+    pq (quantizer->d, M, nbit)
+{
+    is_trained = false;
+    for (int nbyte = 0; nbyte < 7; nbyte++) {
+        if ((1L << (8 * nbyte)) >= nlist) {
+            code_size_1 = nbyte;
+            break;
+        }
+    }
+    code_size_2 = pq.code_size;
+    code_size = code_size_1 + code_size_2;
+}
+
+Index2Layer::Index2Layer ()
+{
+    code_size = code_size_1 = code_size_2 = 0;
+}
+
+Index2Layer::~Index2Layer ()
+{}
+
+void Index2Layer::train(idx_t n, const float* x)
+{
+    if (verbose) {
+        printf ("training level-1 quantizer %ld vectors in %dD\n",
+                n, d);
+    }
+
+    q1.train_q1 (n, x, verbose, metric_type);
+
+    if (verbose) {
+        printf("computing residuals\n");
+    }
+
+    const float * x_in = x;
+
+    x = fvecs_maybe_subsample (
+         d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
+         x, verbose, pq.cp.seed);
+
+    ScopeDeleter<float> del_x (x_in == x ? nullptr : x);
+
+    std::vector<idx_t> assign(n); // assignement to coarse centroids
+    q1.quantizer->assign (n, x, assign.data());
+    std::vector<float> residuals(n * d);
+    for (idx_t i = 0; i < n; i++) {
+        q1.quantizer->compute_residual (
+           x + i * d, residuals.data() + i * d, assign[i]);
+    }
+
+    if (verbose)
+        printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
+                pq.M, pq.ksub, n, d);
+    pq.verbose = verbose;
+    pq.train (n, residuals.data());
+
+    is_trained = true;
+}
+
+void Index2Layer::add(idx_t n, const float* x)
+{
+    idx_t bs = 32768;
+    if (n > bs) {
+        for (idx_t i0 = 0; i0 < n; i0 += bs) {
+            idx_t i1 = std::min(i0 + bs, n);
+            if (verbose) {
+                printf("Index2Layer::add: adding %ld:%ld / %ld\n",
+                       i0, i1, n);
+            }
+            add (i1 - i0, x + i0 * d);
+        }
+        return;
+    }
+
+    std::vector<idx_t> codes1 (n);
+    q1.quantizer->assign (n, x, codes1.data());
+    std::vector<float> residuals(n * d);
+    for (idx_t i = 0; i < n; i++) {
+        q1.quantizer->compute_residual (
+            x + i * d, residuals.data() + i * d, codes1[i]);
+    }
+    std::vector<uint8_t> codes2 (n * code_size_2);
+
+    pq.compute_codes (residuals.data(), codes2.data(), n);
+
+    codes.resize ((ntotal + n) * code_size);
+    uint8_t *wp = &codes[ntotal * code_size];
+
+    {
+        int i = 0x11223344;
+        const char *ip = (char*)&i;
+        FAISS_THROW_IF_NOT_MSG (ip[0] == 0x44,
+                                "works only on a little-endian CPU");
+    }
+
+    // copy to output table
+    for (idx_t i = 0; i < n; i++) {
+        memcpy (wp, &codes1[i], code_size_1);
+        wp += code_size_1;
+        memcpy (wp, &codes2[i * code_size_2], code_size_2);
+        wp += code_size_2;
+    }
+
+    ntotal += n;
+
+}
+
+void Index2Layer::search(
+    idx_t /*n*/,
+    const float* /*x*/,
+    idx_t /*k*/,
+    float* /*distances*/,
+    idx_t* /*labels*/) const {
+  FAISS_THROW_MSG("not implemented");
+}
+
+
+void Index2Layer::reconstruct_n(idx_t i0, idx_t ni, float* recons) const
+{
+    float recons1[d];
+    FAISS_THROW_IF_NOT (i0 >= 0 && i0 + ni <= ntotal);
+    const uint8_t *rp = &codes[i0 * code_size];
+
+    for (idx_t i = 0; i < ni; i++) {
+        idx_t key = 0;
+        memcpy (&key, rp, code_size_1);
+        q1.quantizer->reconstruct (key, recons1);
+        rp += code_size_1;
+        pq.decode (rp, recons);
+        for (idx_t j = 0; j < d; j++) {
+            recons[j] += recons1[j];
+        }
+        rp += code_size_2;
+        recons += d;
+    }
+}
+
+void Index2Layer::transfer_to_IVFPQ (IndexIVFPQ & other) const
+{
+    FAISS_THROW_IF_NOT (other.nlist == q1.nlist);
+    FAISS_THROW_IF_NOT (other.code_size == code_size_2);
+    FAISS_THROW_IF_NOT (other.ntotal == 0);
+
+    const uint8_t *rp = codes.data();
+
+    for (idx_t i = 0; i < ntotal; i++) {
+        idx_t key = 0;
+        memcpy (&key, rp, code_size_1);
+        rp += code_size_1;
+        other.invlists->add_entry (key, i, rp);
+        rp += code_size_2;
+    }
+
+    other.ntotal = ntotal;
+
+}
+
+
+
+void Index2Layer::reconstruct(idx_t key, float* recons) const
+{
+    reconstruct_n (key, 1, recons);
+}
+
+void Index2Layer::reset()
+{
+    ntotal = 0;
+    codes.clear ();
+}
+
+
+namespace {
+
+
+struct Distance2Level : DistanceComputer {
+    size_t d;
+    const Index2Layer& storage;
+    std::vector<float> buf;
+    const float *q;
+
+    const float *pq_l1_tab, *pq_l2_tab;
+
+    explicit Distance2Level(const Index2Layer& storage)
+        : storage(storage) {
+        d = storage.d;
+        FAISS_ASSERT(storage.pq.dsub == 4);
+        pq_l2_tab = storage.pq.centroids.data();
+        buf.resize(2 * d);
+    }
+
+    float symmetric_dis(idx_t i, idx_t j) override {
+        storage.reconstruct(i, buf.data());
+        storage.reconstruct(j, buf.data() + d);
+        return fvec_L2sqr(buf.data() + d, buf.data(), d);
+    }
+
+    void set_query(const float *x) override {
+        q = x;
+    }
+};
+
+// well optimized for xNN+PQNN
+struct DistanceXPQ4 : Distance2Level {
+
+    int M, k;
+
+    explicit DistanceXPQ4(const Index2Layer& storage)
+        : Distance2Level (storage) {
+        const IndexFlat *quantizer =
+            dynamic_cast<IndexFlat*> (storage.q1.quantizer);
+
+        FAISS_ASSERT(quantizer);
+        M = storage.pq.M;
+        pq_l1_tab = quantizer->xb.data();
+    }
+
+    float operator () (idx_t i) override {
+#ifdef __SSE__
+        const uint8_t *code = storage.codes.data() + i * storage.code_size;
+        long key = 0;
+        memcpy (&key, code, storage.code_size_1);
+        code += storage.code_size_1;
+
+        // walking pointers
+        const float *qa = q;
+        const __m128 *l1_t = (const __m128 *)(pq_l1_tab + d * key);
+        const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
+        __m128 accu = _mm_setzero_ps();
+
+        for (int m = 0; m < M; m++) {
+            __m128 qi = _mm_loadu_ps(qa);
+            __m128 recons = l1_t[m] + pq_l2_t[*code++];
+            __m128 diff = qi - recons;
+            accu += diff * diff;
+            pq_l2_t += 256;
+            qa += 4;
+        }
+
+        accu = _mm_hadd_ps (accu, accu);
+        accu = _mm_hadd_ps (accu, accu);
+        return  _mm_cvtss_f32 (accu);
+#else
+        FAISS_THROW_MSG("not implemented for non-x64 platforms");
+#endif
+    }
+
+};
+
+// well optimized for 2xNN+PQNN
+struct Distance2xXPQ4 : Distance2Level {
+
+    int M_2, mi_nbits;
+
+    explicit Distance2xXPQ4(const Index2Layer& storage)
+        : Distance2Level(storage) {
+        const MultiIndexQuantizer *mi =
+            dynamic_cast<MultiIndexQuantizer*> (storage.q1.quantizer);
+
+        FAISS_ASSERT(mi);
+        FAISS_ASSERT(storage.pq.M % 2 == 0);
+        M_2 = storage.pq.M / 2;
+        mi_nbits = mi->pq.nbits;
+        pq_l1_tab = mi->pq.centroids.data();
+    }
+
+    float operator () (idx_t i) override {
+        const uint8_t *code = storage.codes.data() + i * storage.code_size;
+        long key01 = 0;
+        memcpy (&key01, code, storage.code_size_1);
+        code += storage.code_size_1;
+#ifdef __SSE__
+
+        // walking pointers
+        const float *qa = q;
+        const __m128 *pq_l1_t = (const __m128 *)pq_l1_tab;
+        const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab;
+        __m128 accu = _mm_setzero_ps();
+
+        for (int mi_m = 0; mi_m < 2; mi_m++) {
+            long l1_idx = key01 & ((1L << mi_nbits) - 1);
+            const __m128 * pq_l1 = pq_l1_t + M_2 * l1_idx;
+
+            for (int m = 0; m < M_2; m++) {
+                __m128 qi = _mm_loadu_ps(qa);
+                __m128 recons = pq_l1[m] + pq_l2_t[*code++];
+                __m128 diff = qi - recons;
+                accu += diff * diff;
+                pq_l2_t += 256;
+                qa += 4;
+            }
+            pq_l1_t += M_2 << mi_nbits;
+            key01 >>= mi_nbits;
+        }
+        accu = _mm_hadd_ps (accu, accu);
+        accu = _mm_hadd_ps (accu, accu);
+        return  _mm_cvtss_f32 (accu);
+#else
+        FAISS_THROW_MSG("not implemented for non-x64 platforms");
+#endif
+    }
+
+};
+
+
+}  // namespace
+
+
+DistanceComputer * Index2Layer::get_distance_computer() const {
+#ifdef __SSE__
+    const MultiIndexQuantizer *mi =
+        dynamic_cast<MultiIndexQuantizer*> (q1.quantizer);
+
+    if (mi && pq.M % 2 == 0 && pq.dsub == 4) {
+        return new Distance2xXPQ4(*this);
+    }
+
+    const IndexFlat *fl =
+        dynamic_cast<IndexFlat*> (q1.quantizer);
+
+    if (fl && pq.dsub == 4) {
+        return new DistanceXPQ4(*this);
+    }
+#endif
+
+    return Index::get_distance_computer();
+}
+
+
+/* The standalone codec interface */
+size_t Index2Layer::sa_code_size () const
+{
+    return code_size;
+}
+
+void Index2Layer::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    std::unique_ptr<int64_t []> list_nos (new int64_t [n]);
+    q1.quantizer->assign (n, x, list_nos.get());
+    std::vector<float> residuals(n * d);
+    for (idx_t i = 0; i < n; i++) {
+        q1.quantizer->compute_residual (
+            x + i * d, residuals.data() + i * d, list_nos[i]);
+    }
+    pq.compute_codes (residuals.data(), bytes, n);
+
+    for (idx_t i = n - 1; i >= 0; i--) {
+        uint8_t * code = bytes + i * code_size;
+        memmove (code + code_size_1,
+                 bytes + i * code_size_2, code_size_2);
+        q1.encode_listno (list_nos[i], code);
+    }
+
+}
+
+void Index2Layer::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
+{
+
+#pragma omp parallel
+    {
+        std::vector<float> residual (d);
+
+#pragma omp for
+        for (size_t i = 0; i < n; i++) {
+            const uint8_t *code = bytes + i * code_size;
+            int64_t list_no = q1.decode_listno (code);
+            float *xi = x + i * d;
+            pq.decode (code + code_size_1, xi);
+            q1.quantizer->reconstruct (list_no, residual.data());
+            for (size_t j = 0; j < d; j++) {
+                xi[j] += residual[j];
+            }
+        }
+    }
+
+}
+
+
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/Index2Layer.h
+++ b/core/src/index/thirdparty/faiss/Index2Layer.h
@ -0,0 +1,85 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#pragma once
+
+#include <vector>
+
+#include <faiss/IndexPQ.h>
+#include <faiss/IndexIVF.h>
+
+namespace faiss {
+
+struct IndexIVFPQ;
+
+
+/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially
+ *
+ * The class is mainly inteded to store encoded vectors that can be
+ * accessed randomly, the search function is not implemented.
+ */
+struct Index2Layer: Index {
+    /// first level quantizer
+    Level1Quantizer q1;
+
+    /// second level quantizer is always a PQ
+    ProductQuantizer pq;
+
+    /// Codes. Size ntotal * code_size.
+    std::vector<uint8_t> codes;
+
+    /// size of the code for the first level (ceil(log8(q1.nlist)))
+    size_t code_size_1;
+
+    /// size of the code for the second level
+    size_t code_size_2;
+
+    /// code_size_1 + code_size_2
+    size_t code_size;
+
+    Index2Layer (Index * quantizer, size_t nlist,
+                 int M, int nbit = 8,
+                 MetricType metric = METRIC_L2);
+
+    Index2Layer ();
+    ~Index2Layer ();
+
+    void train(idx_t n, const float* x) override;
+
+    void add(idx_t n, const float* x) override;
+
+    /// not implemented
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+
+    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
+
+    void reconstruct(idx_t key, float* recons) const override;
+
+    void reset() override;
+
+    DistanceComputer * get_distance_computer() const override;
+
+    /// transfer the flat codes to an IVFPQ index
+    void transfer_to_IVFPQ(IndexIVFPQ & other) const;
+
+
+    /* The standalone codec interface */
+    size_t sa_code_size () const override;
+    void sa_encode (idx_t n, const float *x, uint8_t *bytes) const override;
+    void sa_decode (idx_t n, const uint8_t *bytes, float *x) const override;
+
+};
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexBinary.cpp
+++ b/core/src/index/thirdparty/faiss/IndexBinary.cpp
@ -0,0 +1,77 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexBinary.h>
+#include <faiss/impl/FaissAssert.h>
+
+#include <cstring>
+
+namespace faiss {
+
+IndexBinary::~IndexBinary() {}
+
+void IndexBinary::train(idx_t, const uint8_t *) {
+  // Does nothing by default.
+}
+
+void IndexBinary::range_search(idx_t, const uint8_t *, int,
+                               RangeSearchResult *) const {
+  FAISS_THROW_MSG("range search not implemented");
+}
+
+void IndexBinary::assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k) {
+  int *distances = new int[n * k];
+  ScopeDeleter<int> del(distances);
+  search(n, x, k, distances, labels);
+}
+
+void IndexBinary::add_with_ids(idx_t, const uint8_t *, const idx_t *) {
+  FAISS_THROW_MSG("add_with_ids not implemented for this type of index");
+}
+
+size_t IndexBinary::remove_ids(const IDSelector&) {
+  FAISS_THROW_MSG("remove_ids not implemented for this type of index");
+  return 0;
+}
+
+void IndexBinary::reconstruct(idx_t, uint8_t *) const {
+  FAISS_THROW_MSG("reconstruct not implemented for this type of index");
+}
+
+void IndexBinary::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
+  for (idx_t i = 0; i < ni; i++) {
+    reconstruct(i0 + i, recons + i * d);
+  }
+}
+
+void IndexBinary::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
+                                         int32_t *distances, idx_t *labels,
+                                         uint8_t *recons) const {
+  search(n, x, k, distances, labels);
+  for (idx_t i = 0; i < n; ++i) {
+    for (idx_t j = 0; j < k; ++j) {
+      idx_t ij = i * k + j;
+      idx_t key = labels[ij];
+      uint8_t *reconstructed = recons + ij * d;
+      if (key < 0) {
+        // Fill with NaNs
+        memset(reconstructed, -1, sizeof(*reconstructed) * d);
+      } else {
+        reconstruct(key, reconstructed);
+      }
+    }
+  }
+}
+
+void IndexBinary::display() const {
+  printf("Index: %s  -> %ld elements\n", typeid (*this).name(), ntotal);
+}
+
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexBinary.h
+++ b/core/src/index/thirdparty/faiss/IndexBinary.h
@ -0,0 +1,163 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_BINARY_H
+#define FAISS_INDEX_BINARY_H
+
+#include <cstdio>
+#include <typeinfo>
+#include <string>
+#include <sstream>
+
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/Index.h>
+
+
+namespace faiss {
+
+
+/// Forward declarations see AuxIndexStructures.h
+struct IDSelector;
+struct RangeSearchResult;
+
+/** Abstract structure for a binary index.
+ *
+ * Supports adding vertices and searching them.
+ *
+ * All queries are symmetric because there is no distinction between codes and
+ * vectors.
+ */
+struct IndexBinary {
+  using idx_t = Index::idx_t;    ///< all indices are this type
+  using component_t = uint8_t;
+  using distance_t = int32_t;
+
+  int d;                 ///< vector dimension
+  int code_size;   ///< number of bytes per vector ( = d / 8 )
+  idx_t ntotal;          ///< total nb of indexed vectors
+  bool verbose;          ///< verbosity level
+
+  /// set if the Index does not require training, or if training is done already
+  bool is_trained;
+
+  /// type of metric this index uses for search
+  MetricType metric_type;
+
+  explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
+      : d(d),
+        code_size(d / 8),
+        ntotal(0),
+        verbose(false),
+        is_trained(true),
+        metric_type(metric) {
+        FAISS_THROW_IF_NOT(d % 8 == 0);
+      }
+
+  virtual ~IndexBinary();
+
+
+  /** Perform training on a representative set of vectors.
+   *
+   * @param n      nb of training vectors
+   * @param x      training vecors, size n * d / 8
+   */
+  virtual void train(idx_t n, const uint8_t *x);
+
+  /** Add n vectors of dimension d to the index.
+   *
+   * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
+   * @param x      input matrix, size n * d / 8
+   */
+  virtual void add(idx_t n, const uint8_t *x) = 0;
+
+  /** Same as add, but stores xids instead of sequential ids.
+   *
+   * The default implementation fails with an assertion, as it is
+   * not supported by all indexes.
+   *
+   * @param xids if non-null, ids to store for the vectors (size n)
+   */
+  virtual void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids);
+
+  /** Query n vectors of dimension d to the index.
+   *
+   * return at most k vectors. If there are not enough results for a
+   * query, the result array is padded with -1s.
+   *
+   * @param x           input vectors to search, size n * d / 8
+   * @param labels      output labels of the NNs, size n*k
+   * @param distances   output pairwise distances, size n*k
+   */
+  virtual void search(idx_t n, const uint8_t *x, idx_t k,
+                      int32_t *distances, idx_t *labels) const = 0;
+
+  /** Query n vectors of dimension d to the index.
+   *
+   * return all vectors with distance < radius. Note that many
+   * indexes do not implement the range_search (only the k-NN search
+   * is mandatory).
+   *
+   * @param x           input vectors to search, size n * d / 8
+   * @param radius      search radius
+   * @param result      result table
+   */
+  virtual void range_search(idx_t n, const uint8_t *x, int radius,
+                            RangeSearchResult *result) const;
+
+  /** Return the indexes of the k vectors closest to the query x.
+   *
+   * This function is identical to search but only returns labels of neighbors.
+   * @param x           input vectors to search, size n * d / 8
+   * @param labels      output labels of the NNs, size n*k
+   */
+  void assign(idx_t n, const uint8_t *x, idx_t *labels, idx_t k = 1);
+
+  /// Removes all elements from the database.
+  virtual void reset() = 0;
+
+  /** Removes IDs from the index. Not supported by all indexes.
+   */
+  virtual size_t remove_ids(const IDSelector& sel);
+
+  /** Reconstruct a stored vector.
+   *
+   * This function may not be defined for some indexes.
+   * @param key         id of the vector to reconstruct
+   * @param recons      reconstucted vector (size d / 8)
+   */
+  virtual void reconstruct(idx_t key, uint8_t *recons) const;
+
+
+  /** Reconstruct vectors i0 to i0 + ni - 1.
+   *
+   * This function may not be defined for some indexes.
+   * @param recons      reconstucted vectors (size ni * d / 8)
+   */
+  virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const;
+
+  /** Similar to search, but also reconstructs the stored vectors (or an
+   * approximation in the case of lossy coding) for the search results.
+   *
+   * If there are not enough results for a query, the resulting array
+   * is padded with -1s.
+   *
+   * @param recons      reconstructed vectors size (n, k, d)
+   **/
+  virtual void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
+                                      int32_t *distances, idx_t *labels,
+                                      uint8_t *recons) const;
+
+  /** Display the actual class name and some more info. */
+  void display() const;
+};
+
+
+}  // namespace faiss
+
+#endif  // FAISS_INDEX_BINARY_H
--- a/core/src/index/thirdparty/faiss/IndexBinaryFlat.cpp
+++ b/core/src/index/thirdparty/faiss/IndexBinaryFlat.cpp
@ -0,0 +1,83 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexBinaryFlat.h>
+
+#include <cstring>
+#include <faiss/utils/hamming.h>
+#include <faiss/utils/utils.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/AuxIndexStructures.h>
+
+namespace faiss {
+
+IndexBinaryFlat::IndexBinaryFlat(idx_t d)
+    : IndexBinary(d) {}
+
+void IndexBinaryFlat::add(idx_t n, const uint8_t *x) {
+  xb.insert(xb.end(), x, x + n * code_size);
+  ntotal += n;
+}
+
+void IndexBinaryFlat::reset() {
+  xb.clear();
+  ntotal = 0;
+}
+
+void IndexBinaryFlat::search(idx_t n, const uint8_t *x, idx_t k,
+                             int32_t *distances, idx_t *labels) const {
+  const idx_t block_size = query_batch_size;
+  for (idx_t s = 0; s < n; s += block_size) {
+    idx_t nn = block_size;
+    if (s + block_size > n) {
+      nn = n - s;
+    }
+
+    if (use_heap) {
+      // We see the distances and labels as heaps.
+      int_maxheap_array_t res = {
+        size_t(nn), size_t(k), labels + s * k, distances + s * k
+      };
+
+      hammings_knn_hc(&res, x + s * code_size, xb.data(), ntotal, code_size,
+                      /* ordered = */ true);
+    } else {
+      hammings_knn_mc(x + s * code_size, xb.data(), nn, ntotal, k, code_size,
+                      distances + s * k, labels + s * k);
+    }
+  }
+}
+
+size_t IndexBinaryFlat::remove_ids(const IDSelector& sel) {
+  idx_t j = 0;
+  for (idx_t i = 0; i < ntotal; i++) {
+    if (sel.is_member(i)) {
+      // should be removed
+    } else {
+      if (i > j) {
+        memmove(&xb[code_size * j], &xb[code_size * i], sizeof(xb[0]) * code_size);
+      }
+      j++;
+    }
+  }
+  long nremove = ntotal - j;
+  if (nremove > 0) {
+    ntotal = j;
+    xb.resize(ntotal * code_size);
+  }
+  return nremove;
+}
+
+void IndexBinaryFlat::reconstruct(idx_t key, uint8_t *recons) const {
+  memcpy(recons, &(xb[code_size * key]), sizeof(*recons) * code_size);
+}
+
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexBinaryFlat.h
+++ b/core/src/index/thirdparty/faiss/IndexBinaryFlat.h
@ -0,0 +1,54 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef INDEX_BINARY_FLAT_H
+#define INDEX_BINARY_FLAT_H
+
+#include <vector>
+
+#include <faiss/IndexBinary.h>
+
+namespace faiss {
+
+
+/** Index that stores the full vectors and performs exhaustive search. */
+struct IndexBinaryFlat : IndexBinary {
+  /// database vectors, size ntotal * d / 8
+  std::vector<uint8_t> xb;
+
+  /** Select between using a heap or counting to select the k smallest values
+   * when scanning inverted lists.
+   */
+  bool use_heap = true;
+
+  size_t query_batch_size = 32;
+
+  explicit IndexBinaryFlat(idx_t d);
+
+  void add(idx_t n, const uint8_t *x) override;
+
+  void reset() override;
+
+  void search(idx_t n, const uint8_t *x, idx_t k,
+              int32_t *distances, idx_t *labels) const override;
+
+  void reconstruct(idx_t key, uint8_t *recons) const override;
+
+  /** Remove some ids. Note that because of the indexing structure,
+   * the semantics of this operation are different from the usual ones:
+   * the new ids are shifted. */
+  size_t remove_ids(const IDSelector& sel) override;
+
+  IndexBinaryFlat() {}
+};
+
+
+}  // namespace faiss
+
+#endif  // INDEX_BINARY_FLAT_H
--- a/core/src/index/thirdparty/faiss/IndexBinaryFromFloat.cpp
+++ b/core/src/index/thirdparty/faiss/IndexBinaryFromFloat.cpp
@ -0,0 +1,78 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexBinaryFromFloat.h>
+
+#include <memory>
+#include <faiss/utils/utils.h>
+
+namespace faiss {
+
+
+IndexBinaryFromFloat::IndexBinaryFromFloat() {}
+
+IndexBinaryFromFloat::IndexBinaryFromFloat(Index *index)
+    : IndexBinary(index->d),
+      index(index),
+      own_fields(false) {
+  is_trained = index->is_trained;
+  ntotal = index->ntotal;
+}
+
+IndexBinaryFromFloat::~IndexBinaryFromFloat() {
+  if (own_fields) {
+    delete index;
+  }
+}
+
+void IndexBinaryFromFloat::add(idx_t n, const uint8_t *x) {
+  constexpr idx_t bs = 32768;
+  std::unique_ptr<float[]> xf(new float[bs * d]);
+
+  for (idx_t b = 0; b < n; b += bs) {
+    idx_t bn = std::min(bs, n - b);
+    binary_to_real(bn * d, x + b * code_size, xf.get());
+
+    index->add(bn, xf.get());
+  }
+  ntotal = index->ntotal;
+}
+
+void IndexBinaryFromFloat::reset() {
+  index->reset();
+  ntotal = index->ntotal;
+}
+
+void IndexBinaryFromFloat::search(idx_t n, const uint8_t *x, idx_t k,
+                                  int32_t *distances, idx_t *labels) const {
+  constexpr idx_t bs = 32768;
+  std::unique_ptr<float[]> xf(new float[bs * d]);
+  std::unique_ptr<float[]> df(new float[bs * k]);
+
+  for (idx_t b = 0; b < n; b += bs) {
+    idx_t bn = std::min(bs, n - b);
+    binary_to_real(bn * d, x + b * code_size, xf.get());
+
+    index->search(bn, xf.get(), k, df.get(), labels + b * k);
+    for (int i = 0; i < bn * k; ++i) {
+      distances[b * k + i] = int32_t(std::round(df[i] / 4.0));
+    }
+  }
+}
+
+void IndexBinaryFromFloat::train(idx_t n, const uint8_t *x) {
+  std::unique_ptr<float[]> xf(new float[n * d]);
+  binary_to_real(n * d, x, xf.get());
+
+  index->train(n, xf.get());
+  is_trained = true;
+  ntotal = index->ntotal;
+}
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexBinaryFromFloat.h
+++ b/core/src/index/thirdparty/faiss/IndexBinaryFromFloat.h
@ -0,0 +1,52 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_BINARY_FROM_FLOAT_H
+#define FAISS_INDEX_BINARY_FROM_FLOAT_H
+
+#include <faiss/IndexBinary.h>
+
+
+namespace faiss {
+
+
+struct Index;
+
+/** IndexBinary backed by a float Index.
+ *
+ * Supports adding vertices and searching them.
+ *
+ * All queries are symmetric because there is no distinction between codes and
+ * vectors.
+ */
+struct IndexBinaryFromFloat : IndexBinary {
+  Index *index = nullptr;
+
+  bool own_fields = false; ///< Whether object owns the index pointer.
+
+  IndexBinaryFromFloat();
+
+  explicit IndexBinaryFromFloat(Index *index);
+
+  ~IndexBinaryFromFloat();
+
+  void add(idx_t n, const uint8_t *x) override;
+
+  void reset() override;
+
+  void search(idx_t n, const uint8_t *x, idx_t k,
+              int32_t *distances, idx_t *labels) const override;
+
+  void train(idx_t n, const uint8_t *x) override;
+};
+
+
+}  // namespace faiss
+
+#endif  // FAISS_INDEX_BINARY_FROM_FLOAT_H
--- a/core/src/index/thirdparty/faiss/IndexBinaryHNSW.cpp
+++ b/core/src/index/thirdparty/faiss/IndexBinaryHNSW.cpp
@ -0,0 +1,325 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexBinaryHNSW.h>
+
+
+#include <memory>
+#include <cstdlib>
+#include <cassert>
+#include <cstring>
+#include <cstdio>
+#include <cmath>
+#include <omp.h>
+
+#include <unordered_set>
+#include <queue>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include <faiss/utils/random.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/IndexBinaryFlat.h>
+#include <faiss/utils/hamming.h>
+#include <faiss/impl/AuxIndexStructures.h>
+
+namespace faiss {
+
+
+/**************************************************************
+ * add / search blocks of descriptors
+ **************************************************************/
+
+namespace {
+
+
+void hnsw_add_vertices(IndexBinaryHNSW& index_hnsw,
+                       size_t n0,
+                       size_t n, const uint8_t *x,
+                       bool verbose,
+                       bool preset_levels = false) {
+  HNSW& hnsw = index_hnsw.hnsw;
+  size_t ntotal = n0 + n;
+  double t0 = getmillisecs();
+  if (verbose) {
+    printf("hnsw_add_vertices: adding %ld elements on top of %ld "
+           "(preset_levels=%d)\n",
+           n, n0, int(preset_levels));
+  }
+
+  int max_level = hnsw.prepare_level_tab(n, preset_levels);
+
+  if (verbose) {
+    printf("  max_level = %d\n", max_level);
+  }
+
+  std::vector<omp_lock_t> locks(ntotal);
+  for(int i = 0; i < ntotal; i++) {
+    omp_init_lock(&locks[i]);
+  }
+
+  // add vectors from highest to lowest level
+  std::vector<int> hist;
+  std::vector<int> order(n);
+
+  { // make buckets with vectors of the same level
+
+    // build histogram
+    for (int i = 0; i < n; i++) {
+      HNSW::storage_idx_t pt_id = i + n0;
+      int pt_level = hnsw.levels[pt_id] - 1;
+      while (pt_level >= hist.size()) {
+        hist.push_back(0);
+      }
+      hist[pt_level] ++;
+    }
+
+    // accumulate
+    std::vector<int> offsets(hist.size() + 1, 0);
+    for (int i = 0; i < hist.size() - 1; i++) {
+      offsets[i + 1] = offsets[i] + hist[i];
+    }
+
+    // bucket sort
+    for (int i = 0; i < n; i++) {
+      HNSW::storage_idx_t pt_id = i + n0;
+      int pt_level = hnsw.levels[pt_id] - 1;
+      order[offsets[pt_level]++] = pt_id;
+    }
+  }
+
+  { // perform add
+    RandomGenerator rng2(789);
+
+    int i1 = n;
+
+    for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
+      int i0 = i1 - hist[pt_level];
+
+      if (verbose) {
+        printf("Adding %d elements at level %d\n",
+               i1 - i0, pt_level);
+      }
+
+      // random permutation to get rid of dataset order bias
+      for (int j = i0; j < i1; j++) {
+        std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
+      }
+
+#pragma omp parallel
+      {
+        VisitedTable vt (ntotal);
+
+        std::unique_ptr<DistanceComputer> dis(
+          index_hnsw.get_distance_computer()
+        );
+        int prev_display = verbose && omp_get_thread_num() == 0 ? 0 : -1;
+
+#pragma omp  for schedule(dynamic)
+        for (int i = i0; i < i1; i++) {
+          HNSW::storage_idx_t pt_id = order[i];
+          dis->set_query((float *)(x + (pt_id - n0) * index_hnsw.code_size));
+
+          hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
+
+          if (prev_display >= 0 && i - i0 > prev_display + 10000) {
+            prev_display = i - i0;
+            printf("  %d / %d\r", i - i0, i1 - i0);
+            fflush(stdout);
+          }
+        }
+      }
+      i1 = i0;
+    }
+    FAISS_ASSERT(i1 == 0);
+  }
+  if (verbose) {
+    printf("Done in %.3f ms\n", getmillisecs() - t0);
+  }
+
+  for(int i = 0; i < ntotal; i++)
+    omp_destroy_lock(&locks[i]);
+}
+
+
+} // anonymous namespace
+
+
+/**************************************************************
+ * IndexBinaryHNSW implementation
+ **************************************************************/
+
+IndexBinaryHNSW::IndexBinaryHNSW()
+{
+  is_trained = true;
+}
+
+IndexBinaryHNSW::IndexBinaryHNSW(int d, int M)
+    : IndexBinary(d),
+      hnsw(M),
+      own_fields(true),
+      storage(new IndexBinaryFlat(d))
+{
+  is_trained = true;
+}
+
+IndexBinaryHNSW::IndexBinaryHNSW(IndexBinary *storage, int M)
+    : IndexBinary(storage->d),
+      hnsw(M),
+      own_fields(false),
+      storage(storage)
+{
+  is_trained = true;
+}
+
+IndexBinaryHNSW::~IndexBinaryHNSW() {
+  if (own_fields) {
+    delete storage;
+  }
+}
+
+void IndexBinaryHNSW::train(idx_t n, const uint8_t *x)
+{
+  // hnsw structure does not require training
+  storage->train(n, x);
+  is_trained = true;
+}
+
+void IndexBinaryHNSW::search(idx_t n, const uint8_t *x, idx_t k,
+                             int32_t *distances, idx_t *labels) const
+{
+#pragma omp parallel
+  {
+    VisitedTable vt(ntotal);
+    std::unique_ptr<DistanceComputer> dis(get_distance_computer());
+
+#pragma omp for
+    for(idx_t i = 0; i < n; i++) {
+      idx_t *idxi = labels + i * k;
+      float *simi = (float *)(distances + i * k);
+
+      dis->set_query((float *)(x + i * code_size));
+
+      maxheap_heapify(k, simi, idxi);
+      hnsw.search(*dis, k, idxi, simi, vt);
+      maxheap_reorder(k, simi, idxi);
+    }
+  }
+
+#pragma omp parallel for
+  for (int i = 0; i < n * k; ++i) {
+    distances[i] = std::round(((float *)distances)[i]);
+  }
+}
+
+
+void IndexBinaryHNSW::add(idx_t n, const uint8_t *x)
+{
+  FAISS_THROW_IF_NOT(is_trained);
+  int n0 = ntotal;
+  storage->add(n, x);
+  ntotal = storage->ntotal;
+
+  hnsw_add_vertices(*this, n0, n, x, verbose,
+                    hnsw.levels.size() == ntotal);
+}
+
+void IndexBinaryHNSW::reset()
+{
+  hnsw.reset();
+  storage->reset();
+  ntotal = 0;
+}
+
+void IndexBinaryHNSW::reconstruct(idx_t key, uint8_t *recons) const
+{
+  storage->reconstruct(key, recons);
+}
+
+
+namespace {
+
+
+template<class HammingComputer>
+struct FlatHammingDis : DistanceComputer {
+  const int code_size;
+  const uint8_t *b;
+  size_t ndis;
+  HammingComputer hc;
+
+  float operator () (idx_t i) override {
+    ndis++;
+    return hc.hamming(b + i * code_size);
+  }
+
+  float symmetric_dis(idx_t i, idx_t j) override {
+    return HammingComputerDefault(b + j * code_size, code_size)
+      .hamming(b + i * code_size);
+  }
+
+
+  explicit FlatHammingDis(const IndexBinaryFlat& storage)
+      : code_size(storage.code_size),
+        b(storage.xb.data()),
+        ndis(0),
+        hc() {}
+
+  // NOTE: Pointers are cast from float in order to reuse the floating-point
+  //   DistanceComputer.
+  void set_query(const float *x) override {
+    hc.set((uint8_t *)x, code_size);
+  }
+
+  ~FlatHammingDis() override {
+#pragma omp critical
+    {
+      hnsw_stats.ndis += ndis;
+    }
+  }
+};
+
+
+}  // namespace
+
+
+DistanceComputer *IndexBinaryHNSW::get_distance_computer() const {
+  IndexBinaryFlat *flat_storage = dynamic_cast<IndexBinaryFlat *>(storage);
+
+  FAISS_ASSERT(flat_storage != nullptr);
+
+  switch(code_size) {
+    case 4:
+      return new FlatHammingDis<HammingComputer4>(*flat_storage);
+    case 8:
+      return new FlatHammingDis<HammingComputer8>(*flat_storage);
+    case 16:
+      return new FlatHammingDis<HammingComputer16>(*flat_storage);
+    case 20:
+      return new FlatHammingDis<HammingComputer20>(*flat_storage);
+    case 32:
+      return new FlatHammingDis<HammingComputer32>(*flat_storage);
+    case 64:
+      return new FlatHammingDis<HammingComputer64>(*flat_storage);
+    default:
+      if (code_size % 8 == 0) {
+        return new FlatHammingDis<HammingComputerM8>(*flat_storage);
+      } else if (code_size % 4 == 0) {
+        return new FlatHammingDis<HammingComputerM4>(*flat_storage);
+      }
+  }
+
+  return new FlatHammingDis<HammingComputerDefault>(*flat_storage);
+}
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexBinaryHNSW.h
+++ b/core/src/index/thirdparty/faiss/IndexBinaryHNSW.h
@ -0,0 +1,56 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#pragma once
+
+#include <faiss/impl/HNSW.h>
+#include <faiss/IndexBinaryFlat.h>
+#include <faiss/utils/utils.h>
+
+
+namespace faiss {
+
+
+/** The HNSW index is a normal random-access index with a HNSW
+ * link structure built on top */
+
+struct IndexBinaryHNSW : IndexBinary {
+  typedef HNSW::storage_idx_t storage_idx_t;
+
+  // the link strcuture
+  HNSW hnsw;
+
+  // the sequential storage
+  bool own_fields;
+  IndexBinary *storage;
+
+  explicit IndexBinaryHNSW();
+  explicit IndexBinaryHNSW(int d, int M = 32);
+  explicit IndexBinaryHNSW(IndexBinary *storage, int M = 32);
+
+  ~IndexBinaryHNSW() override;
+
+  DistanceComputer *get_distance_computer() const;
+
+  void add(idx_t n, const uint8_t *x) override;
+
+  /// Trains the storage if needed
+  void train(idx_t n, const uint8_t* x) override;
+
+  /// entry point for search
+  void search(idx_t n, const uint8_t *x, idx_t k,
+              int32_t *distances, idx_t *labels) const override;
+
+  void reconstruct(idx_t key, uint8_t* recons) const override;
+
+  void reset() override;
+};
+
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexBinaryIVF.cpp
+++ b/core/src/index/thirdparty/faiss/IndexBinaryIVF.cpp
@ -0,0 +1,671 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Copyright 2004-present Facebook. All Rights Reserved
+// -*- c++ -*-
+
+#include <faiss/IndexBinaryIVF.h>
+
+#include <cstdio>
+#include <memory>
+
+#include <faiss/utils/hamming.h>
+#include <faiss/utils/utils.h>
+
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/IndexFlat.h>
+
+
+namespace faiss {
+
+IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist)
+    : IndexBinary(d),
+      invlists(new ArrayInvertedLists(nlist, code_size)),
+      own_invlists(true),
+      nprobe(1),
+      max_codes(0),
+      maintain_direct_map(false),
+      quantizer(quantizer),
+      nlist(nlist),
+      own_fields(false),
+      clustering_index(nullptr)
+{
+  FAISS_THROW_IF_NOT (d == quantizer->d);
+  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
+
+  cp.niter = 10;
+}
+
+IndexBinaryIVF::IndexBinaryIVF()
+    : invlists(nullptr),
+      own_invlists(false),
+      nprobe(1),
+      max_codes(0),
+      maintain_direct_map(false),
+      quantizer(nullptr),
+      nlist(0),
+      own_fields(false),
+      clustering_index(nullptr)
+{}
+
+void IndexBinaryIVF::add(idx_t n, const uint8_t *x) {
+  add_with_ids(n, x, nullptr);
+}
+
+void IndexBinaryIVF::add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) {
+  add_core(n, x, xids, nullptr);
+}
+
+void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const idx_t *xids,
+                              const idx_t *precomputed_idx) {
+  FAISS_THROW_IF_NOT(is_trained);
+  assert(invlists);
+  FAISS_THROW_IF_NOT_MSG(!(maintain_direct_map && xids),
+                         "cannot have direct map and add with ids");
+
+  const idx_t * idx;
+
+  std::unique_ptr<idx_t[]> scoped_idx;
+
+  if (precomputed_idx) {
+    idx = precomputed_idx;
+  } else {
+    scoped_idx.reset(new idx_t[n]);
+    quantizer->assign(n, x, scoped_idx.get());
+    idx = scoped_idx.get();
+  }
+
+  long n_add = 0;
+  for (size_t i = 0; i < n; i++) {
+    idx_t id = xids ? xids[i] : ntotal + i;
+    idx_t list_no = idx[i];
+
+    if (list_no < 0)
+      continue;
+    const uint8_t *xi = x + i * code_size;
+    size_t offset = invlists->add_entry(list_no, id, xi);
+
+    if (maintain_direct_map)
+      direct_map.push_back(list_no << 32 | offset);
+    n_add++;
+  }
+  if (verbose) {
+    printf("IndexBinaryIVF::add_with_ids: added %ld / %ld vectors\n",
+           n_add, n);
+  }
+  ntotal += n_add;
+}
+
+void IndexBinaryIVF::make_direct_map(bool new_maintain_direct_map) {
+  // nothing to do
+  if (new_maintain_direct_map == maintain_direct_map)
+    return;
+
+  if (new_maintain_direct_map) {
+    direct_map.resize(ntotal, -1);
+    for (size_t key = 0; key < nlist; key++) {
+      size_t list_size = invlists->list_size(key);
+      const idx_t *idlist = invlists->get_ids(key);
+
+      for (size_t ofs = 0; ofs < list_size; ofs++) {
+        FAISS_THROW_IF_NOT_MSG(0 <= idlist[ofs] && idlist[ofs] < ntotal,
+                               "direct map supported only for seuquential ids");
+        direct_map[idlist[ofs]] = key << 32 | ofs;
+      }
+    }
+  } else {
+    direct_map.clear();
+  }
+  maintain_direct_map = new_maintain_direct_map;
+}
+
+void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
+                            int32_t *distances, idx_t *labels) const {
+  std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
+  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
+
+  double t0 = getmillisecs();
+  quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
+  indexIVF_stats.quantization_time += getmillisecs() - t0;
+
+  t0 = getmillisecs();
+  invlists->prefetch_lists(idx.get(), n * nprobe);
+
+  search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
+                     distances, labels, false);
+  indexIVF_stats.search_time += getmillisecs() - t0;
+}
+
+void IndexBinaryIVF::reconstruct(idx_t key, uint8_t *recons) const {
+  FAISS_THROW_IF_NOT_MSG(direct_map.size() == ntotal,
+                         "direct map is not initialized");
+  idx_t list_no = direct_map[key] >> 32;
+  idx_t offset = direct_map[key] & 0xffffffff;
+  reconstruct_from_offset(list_no, offset, recons);
+}
+
+void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
+  FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
+
+  for (idx_t list_no = 0; list_no < nlist; list_no++) {
+    size_t list_size = invlists->list_size(list_no);
+    const Index::idx_t *idlist = invlists->get_ids(list_no);
+
+    for (idx_t offset = 0; offset < list_size; offset++) {
+      idx_t id = idlist[offset];
+      if (!(id >= i0 && id < i0 + ni)) {
+        continue;
+      }
+
+      uint8_t *reconstructed = recons + (id - i0) * d;
+      reconstruct_from_offset(list_no, offset, reconstructed);
+    }
+  }
+}
+
+void IndexBinaryIVF::search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
+                                            int32_t *distances, idx_t *labels,
+                                            uint8_t *recons) const {
+  std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
+  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
+
+  quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
+
+  invlists->prefetch_lists(idx.get(), n * nprobe);
+
+  // search_preassigned() with `store_pairs` enabled to obtain the list_no
+  // and offset into `codes` for reconstruction
+  search_preassigned(n, x, k, idx.get(), coarse_dis.get(),
+                     distances, labels, /* store_pairs */true);
+  for (idx_t i = 0; i < n; ++i) {
+    for (idx_t j = 0; j < k; ++j) {
+      idx_t ij = i * k + j;
+      idx_t key = labels[ij];
+      uint8_t *reconstructed = recons + ij * d;
+      if (key < 0) {
+        // Fill with NaNs
+        memset(reconstructed, -1, sizeof(*reconstructed) * d);
+      } else {
+        int list_no = key >> 32;
+        int offset = key & 0xffffffff;
+
+        // Update label to the actual id
+        labels[ij] = invlists->get_single_id(list_no, offset);
+
+        reconstruct_from_offset(list_no, offset, reconstructed);
+      }
+    }
+  }
+}
+
+void IndexBinaryIVF::reconstruct_from_offset(idx_t list_no, idx_t offset,
+                                             uint8_t *recons) const {
+  memcpy(recons, invlists->get_single_code(list_no, offset), code_size);
+}
+
+void IndexBinaryIVF::reset() {
+  direct_map.clear();
+  invlists->reset();
+  ntotal = 0;
+}
+
+size_t IndexBinaryIVF::remove_ids(const IDSelector& sel) {
+  FAISS_THROW_IF_NOT_MSG(!maintain_direct_map,
+                         "direct map remove not implemented");
+
+  std::vector<idx_t> toremove(nlist);
+
+#pragma omp parallel for
+  for (idx_t i = 0; i < nlist; i++) {
+    idx_t l0 = invlists->list_size (i), l = l0, j = 0;
+    const idx_t *idsi = invlists->get_ids(i);
+    while (j < l) {
+      if (sel.is_member(idsi[j])) {
+        l--;
+        invlists->update_entry(
+          i, j,
+          invlists->get_single_id(i, l),
+          invlists->get_single_code(i, l));
+      } else {
+        j++;
+      }
+    }
+    toremove[i] = l0 - l;
+  }
+  // this will not run well in parallel on ondisk because of possible shrinks
+  size_t nremove = 0;
+  for (idx_t i = 0; i < nlist; i++) {
+    if (toremove[i] > 0) {
+      nremove += toremove[i];
+      invlists->resize(
+        i, invlists->list_size(i) - toremove[i]);
+    }
+  }
+  ntotal -= nremove;
+  return nremove;
+}
+
+void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
+  if (verbose) {
+    printf("Training quantizer\n");
+  }
+
+  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
+    if (verbose) {
+      printf("IVF quantizer does not need training.\n");
+    }
+  } else {
+    if (verbose) {
+      printf("Training quantizer on %ld vectors in %dD\n", n, d);
+    }
+
+    Clustering clus(d, nlist, cp);
+    quantizer->reset();
+
+    std::unique_ptr<float[]> x_f(new float[n * d]);
+    binary_to_real(n * d, x, x_f.get());
+
+    IndexFlatL2 index_tmp(d);
+
+    if (clustering_index && verbose) {
+      printf("using clustering_index of dimension %d to do the clustering\n",
+             clustering_index->d);
+    }
+
+    clus.train(n, x_f.get(), clustering_index ? *clustering_index : index_tmp);
+
+    std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
+    real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
+
+    quantizer->add(clus.k, x_b.get());
+    quantizer->is_trained = true;
+  }
+
+  is_trained = true;
+}
+
+void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) {
+  // minimal sanity checks
+  FAISS_THROW_IF_NOT(other.d == d);
+  FAISS_THROW_IF_NOT(other.nlist == nlist);
+  FAISS_THROW_IF_NOT(other.code_size == code_size);
+  FAISS_THROW_IF_NOT_MSG((!maintain_direct_map &&
+                          !other.maintain_direct_map),
+                         "direct map copy not implemented");
+  FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other),
+                         "can only merge indexes of the same type");
+
+  invlists->merge_from (other.invlists, add_id);
+
+  ntotal += other.ntotal;
+  other.ntotal = 0;
+}
+
+void IndexBinaryIVF::replace_invlists(InvertedLists *il, bool own) {
+  FAISS_THROW_IF_NOT(il->nlist == nlist &&
+                     il->code_size == code_size);
+  if (own_invlists) {
+    delete invlists;
+  }
+  invlists = il;
+  own_invlists = own;
+}
+
+
+namespace {
+
+using idx_t = Index::idx_t;
+
+
+template<class HammingComputer, bool store_pairs>
+struct IVFBinaryScannerL2: BinaryInvertedListScanner {
+
+    HammingComputer hc;
+    size_t code_size;
+
+    IVFBinaryScannerL2 (size_t code_size): code_size (code_size)
+    {}
+
+    void set_query (const uint8_t *query_vector) override {
+        hc.set (query_vector, code_size);
+    }
+
+    idx_t list_no;
+    void set_list (idx_t list_no, uint8_t /* coarse_dis */) override {
+        this->list_no = list_no;
+    }
+
+    uint32_t distance_to_code (const uint8_t *code) const override {
+        return hc.hamming (code);
+    }
+
+    size_t scan_codes (size_t n,
+                       const uint8_t *codes,
+                       const idx_t *ids,
+                       int32_t *simi, idx_t *idxi,
+                       size_t k) const override
+    {
+        using C = CMax<int32_t, idx_t>;
+
+        size_t nup = 0;
+        for (size_t j = 0; j < n; j++) {
+            uint32_t dis = hc.hamming (codes);
+            if (dis < simi[0]) {
+                heap_pop<C> (k, simi, idxi);
+                idx_t id = store_pairs ? (list_no << 32 | j) : ids[j];
+                heap_push<C> (k, simi, idxi, dis, id);
+                nup++;
+            }
+            codes += code_size;
+        }
+        return nup;
+    }
+
+
+};
+
+
+template <bool store_pairs>
+BinaryInvertedListScanner *select_IVFBinaryScannerL2 (size_t code_size) {
+
+    switch (code_size) {
+#define HANDLE_CS(cs)                                                  \
+    case cs:                                                            \
+        return new IVFBinaryScannerL2<HammingComputer ## cs, store_pairs> (cs);
+      HANDLE_CS(4);
+      HANDLE_CS(8);
+      HANDLE_CS(16);
+      HANDLE_CS(20);
+      HANDLE_CS(32);
+      HANDLE_CS(64);
+#undef HANDLE_CS
+    default:
+        if (code_size % 8 == 0) {
+            return new IVFBinaryScannerL2<HammingComputerM8,
+                store_pairs> (code_size);
+        } else if (code_size % 4 == 0) {
+            return new IVFBinaryScannerL2<HammingComputerM4,
+                store_pairs> (code_size);
+        } else {
+            return new IVFBinaryScannerL2<HammingComputerDefault,
+                store_pairs> (code_size);
+        }
+    }
+}
+
+
+void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
+                             size_t n,
+                             const uint8_t *x,
+                             idx_t k,
+                             const idx_t *keys,
+                             const int32_t * coarse_dis,
+                             int32_t *distances, idx_t *labels,
+                             bool store_pairs,
+                             const IVFSearchParameters *params)
+{
+    long nprobe = params ? params->nprobe : ivf.nprobe;
+    long max_codes = params ? params->max_codes : ivf.max_codes;
+    MetricType metric_type = ivf.metric_type;
+
+    // almost verbatim copy from IndexIVF::search_preassigned
+
+    size_t nlistv = 0, ndis = 0, nheap = 0;
+    using HeapForIP = CMin<int32_t, idx_t>;
+    using HeapForL2 = CMax<int32_t, idx_t>;
+
+#pragma omp parallel if(n > 1) reduction(+: nlistv, ndis, nheap)
+    {
+        std::unique_ptr<BinaryInvertedListScanner> scanner
+            (ivf.get_InvertedListScanner (store_pairs));
+
+#pragma omp for
+        for (size_t i = 0; i < n; i++) {
+            const uint8_t *xi = x + i * ivf.code_size;
+            scanner->set_query(xi);
+
+            const idx_t * keysi = keys + i * nprobe;
+            int32_t * simi = distances + k * i;
+            idx_t * idxi = labels + k * i;
+
+            if (metric_type == METRIC_INNER_PRODUCT) {
+                heap_heapify<HeapForIP> (k, simi, idxi);
+            } else {
+                heap_heapify<HeapForL2> (k, simi, idxi);
+            }
+
+            size_t nscan = 0;
+
+            for (size_t ik = 0; ik < nprobe; ik++) {
+                idx_t key = keysi[ik];  /* select the list  */
+                if (key < 0) {
+                    // not enough centroids for multiprobe
+                    continue;
+                }
+                FAISS_THROW_IF_NOT_FMT
+                    (key < (idx_t) ivf.nlist,
+                     "Invalid key=%ld  at ik=%ld nlist=%ld\n",
+                     key, ik, ivf.nlist);
+
+                scanner->set_list (key, coarse_dis[i * nprobe + ik]);
+
+                nlistv++;
+
+                size_t list_size = ivf.invlists->list_size(key);
+                InvertedLists::ScopedCodes scodes (ivf.invlists, key);
+                std::unique_ptr<InvertedLists::ScopedIds> sids;
+                const Index::idx_t * ids = nullptr;
+
+                if (!store_pairs) {
+                    sids.reset (new InvertedLists::ScopedIds (ivf.invlists, key));
+                    ids = sids->get();
+                }
+
+                nheap += scanner->scan_codes (list_size, scodes.get(),
+                                              ids, simi, idxi, k);
+
+                nscan += list_size;
+                if (max_codes && nscan >= max_codes)
+                    break;
+            }
+
+            ndis += nscan;
+            if (metric_type == METRIC_INNER_PRODUCT) {
+                heap_reorder<HeapForIP> (k, simi, idxi);
+            } else {
+                heap_reorder<HeapForL2> (k, simi, idxi);
+            }
+
+        } // parallel for
+    } // parallel
+
+    indexIVF_stats.nq += n;
+    indexIVF_stats.nlist += nlistv;
+    indexIVF_stats.ndis += ndis;
+    indexIVF_stats.nheap_updates += nheap;
+
+}
+
+template<class HammingComputer, bool store_pairs>
+void search_knn_hamming_count(const IndexBinaryIVF& ivf,
+                              size_t nx,
+                              const uint8_t *x,
+                              const idx_t *keys,
+                              int k,
+                              int32_t *distances,
+                              idx_t *labels,
+                              const IVFSearchParameters *params) {
+  const int nBuckets = ivf.d + 1;
+  std::vector<int> all_counters(nx * nBuckets, 0);
+  std::unique_ptr<idx_t[]> all_ids_per_dis(new idx_t[nx * nBuckets * k]);
+
+  long nprobe = params ? params->nprobe : ivf.nprobe;
+  long max_codes = params ? params->max_codes : ivf.max_codes;
+
+  std::vector<HCounterState<HammingComputer>> cs;
+  for (size_t i = 0; i < nx; ++i) {
+    cs.push_back(HCounterState<HammingComputer>(
+                   all_counters.data() + i * nBuckets,
+                   all_ids_per_dis.get() + i * nBuckets * k,
+                   x + i * ivf.code_size,
+                   ivf.d,
+                   k
+                 ));
+  }
+
+  size_t nlistv = 0, ndis = 0;
+
+#pragma omp parallel for reduction(+: nlistv, ndis)
+  for (size_t i = 0; i < nx; i++) {
+    const idx_t * keysi = keys + i * nprobe;
+    HCounterState<HammingComputer>& csi = cs[i];
+
+    size_t nscan = 0;
+
+    for (size_t ik = 0; ik < nprobe; ik++) {
+      idx_t key = keysi[ik];  /* select the list  */
+      if (key < 0) {
+        // not enough centroids for multiprobe
+        continue;
+      }
+      FAISS_THROW_IF_NOT_FMT (
+        key < (idx_t) ivf.nlist,
+        "Invalid key=%ld  at ik=%ld nlist=%ld\n",
+        key, ik, ivf.nlist);
+
+      nlistv++;
+      size_t list_size = ivf.invlists->list_size(key);
+      InvertedLists::ScopedCodes scodes (ivf.invlists, key);
+      const uint8_t *list_vecs = scodes.get();
+      const Index::idx_t *ids = store_pairs
+        ? nullptr
+        : ivf.invlists->get_ids(key);
+
+      for (size_t j = 0; j < list_size; j++) {
+        const uint8_t * yj = list_vecs + ivf.code_size * j;
+
+        idx_t id = store_pairs ? (key << 32 | j) : ids[j];
+        csi.update_counter(yj, id);
+      }
+      if (ids)
+          ivf.invlists->release_ids (key, ids);
+
+      nscan += list_size;
+      if (max_codes && nscan >= max_codes)
+        break;
+    }
+    ndis += nscan;
+
+    int nres = 0;
+    for (int b = 0; b < nBuckets && nres < k; b++) {
+      for (int l = 0; l < csi.counters[b] && nres < k; l++) {
+        labels[i * k + nres] = csi.ids_per_dis[b * k + l];
+        distances[i * k + nres] = b;
+        nres++;
+      }
+    }
+    while (nres < k) {
+      labels[i * k + nres] = -1;
+      distances[i * k + nres] = std::numeric_limits<int32_t>::max();
+      ++nres;
+    }
+  }
+
+  indexIVF_stats.nq += nx;
+  indexIVF_stats.nlist += nlistv;
+  indexIVF_stats.ndis += ndis;
+}
+
+
+
+template<bool store_pairs>
+void search_knn_hamming_count_1 (
+                        const IndexBinaryIVF& ivf,
+                        size_t nx,
+                        const uint8_t *x,
+                        const idx_t *keys,
+                        int k,
+                        int32_t *distances,
+                        idx_t *labels,
+                        const IVFSearchParameters *params) {
+    switch (ivf.code_size) {
+#define HANDLE_CS(cs)                                                  \
+    case cs:                                                            \
+       search_knn_hamming_count<HammingComputer ## cs, store_pairs>(    \
+           ivf, nx, x, keys, k, distances, labels, params);             \
+      break;
+      HANDLE_CS(4);
+      HANDLE_CS(8);
+      HANDLE_CS(16);
+      HANDLE_CS(20);
+      HANDLE_CS(32);
+      HANDLE_CS(64);
+#undef HANDLE_CS
+    default:
+        if (ivf.code_size % 8 == 0) {
+            search_knn_hamming_count<HammingComputerM8, store_pairs>
+                (ivf, nx, x, keys, k, distances, labels, params);
+        } else if (ivf.code_size % 4 == 0) {
+            search_knn_hamming_count<HammingComputerM4, store_pairs>
+                (ivf, nx, x, keys, k, distances, labels, params);
+        } else {
+            search_knn_hamming_count<HammingComputerDefault, store_pairs>
+                (ivf, nx, x, keys, k, distances, labels, params);
+        }
+        break;
+    }
+
+}
+
+}  // namespace
+
+BinaryInvertedListScanner *IndexBinaryIVF::get_InvertedListScanner
+      (bool store_pairs) const
+{
+    if (store_pairs) {
+        return select_IVFBinaryScannerL2<true> (code_size);
+    } else {
+        return select_IVFBinaryScannerL2<false> (code_size);
+    }
+}
+
+void IndexBinaryIVF::search_preassigned(idx_t n, const uint8_t *x, idx_t k,
+                                        const idx_t *idx,
+                                        const int32_t * coarse_dis,
+                                        int32_t *distances, idx_t *labels,
+                                        bool store_pairs,
+                                        const IVFSearchParameters *params
+                                        ) const {
+
+    if (use_heap) {
+        search_knn_hamming_heap (*this, n, x, k, idx, coarse_dis,
+                                 distances, labels, store_pairs,
+                                 params);
+    } else {
+        if (store_pairs) {
+            search_knn_hamming_count_1<true>
+                (*this, n, x, idx, k, distances, labels, params);
+        } else {
+            search_knn_hamming_count_1<false>
+                (*this, n, x, idx, k, distances, labels, params);
+        }
+    }
+}
+
+IndexBinaryIVF::~IndexBinaryIVF() {
+  if (own_invlists) {
+    delete invlists;
+  }
+
+  if (own_fields) {
+    delete quantizer;
+  }
+}
+
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexBinaryIVF.h
+++ b/core/src/index/thirdparty/faiss/IndexBinaryIVF.h
@ -0,0 +1,211 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_BINARY_IVF_H
+#define FAISS_INDEX_BINARY_IVF_H
+
+
+#include <vector>
+
+#include <faiss/IndexBinary.h>
+#include <faiss/IndexIVF.h>
+#include <faiss/Clustering.h>
+#include <faiss/utils/Heap.h>
+
+
+namespace faiss {
+
+struct BinaryInvertedListScanner;
+
+/** Index based on a inverted file (IVF)
+ *
+ * In the inverted file, the quantizer (an IndexBinary instance) provides a
+ * quantization index for each vector to be added. The quantization
+ * index maps to a list (aka inverted list or posting list), where the
+ * id of the vector is stored.
+ *
+ * Otherwise the object is similar to the IndexIVF
+ */
+struct IndexBinaryIVF : IndexBinary {
+    /// Acess to the actual data
+    InvertedLists *invlists;
+    bool own_invlists;
+
+    size_t nprobe;            ///< number of probes at query time
+    size_t max_codes;         ///< max nb of codes to visit to do a query
+
+    /** Select between using a heap or counting to select the k smallest values
+     * when scanning inverted lists.
+     */
+    bool use_heap = true;
+
+    /// map for direct access to the elements. Enables reconstruct().
+    bool maintain_direct_map;
+    std::vector<idx_t> direct_map;
+
+    IndexBinary *quantizer;   ///< quantizer that maps vectors to inverted lists
+    size_t nlist;             ///< number of possible key values
+
+    bool own_fields;          ///< whether object owns the quantizer
+
+    ClusteringParameters cp; ///< to override default clustering params
+    Index *clustering_index; ///< to override index used during clustering
+
+    /** The Inverted file takes a quantizer (an IndexBinary) on input,
+     * which implements the function mapping a vector to a list
+     * identifier. The pointer is borrowed: the quantizer should not
+     * be deleted while the IndexBinaryIVF is in use.
+     */
+    IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist);
+
+    IndexBinaryIVF();
+
+    ~IndexBinaryIVF() override;
+
+    void reset() override;
+
+    /// Trains the quantizer
+    void train(idx_t n, const uint8_t *x) override;
+
+    void add(idx_t n, const uint8_t *x) override;
+
+    void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) override;
+
+    /// same as add_with_ids, with precomputed coarse quantizer
+    void add_core (idx_t n, const uint8_t * x, const idx_t *xids,
+                   const idx_t *precomputed_idx);
+
+    /** Search a set of vectors, that are pre-quantized by the IVF
+     *  quantizer. Fill in the corresponding heaps with the query
+     *  results. search() calls this.
+     *
+     * @param n      nb of vectors to query
+     * @param x      query vectors, size nx * d
+     * @param assign coarse quantization indices, size nx * nprobe
+     * @param centroid_dis
+     *               distances to coarse centroids, size nx * nprobe
+     * @param distance
+     *               output distances, size n * k
+     * @param labels output labels, size n * k
+     * @param store_pairs store inv list index + inv list offset
+     *                     instead in upper/lower 32 bit of result,
+     *                     instead of ids (used for reranking).
+     * @param params used to override the object's search parameters
+     */
+    void search_preassigned(idx_t n, const uint8_t *x, idx_t k,
+                            const idx_t *assign,
+                            const int32_t *centroid_dis,
+                            int32_t *distances, idx_t *labels,
+                            bool store_pairs,
+                            const IVFSearchParameters *params=nullptr
+                            ) const;
+
+    virtual BinaryInvertedListScanner *get_InvertedListScanner (
+                                         bool store_pairs=false) const;
+
+    /** assign the vectors, then call search_preassign */
+    virtual void search(idx_t n, const uint8_t *x, idx_t k,
+                        int32_t *distances, idx_t *labels) const override;
+
+    void reconstruct(idx_t key, uint8_t *recons) const override;
+
+    /** Reconstruct a subset of the indexed vectors.
+     *
+     * Overrides default implementation to bypass reconstruct() which requires
+     * direct_map to be maintained.
+     *
+     * @param i0     first vector to reconstruct
+     * @param ni     nb of vectors to reconstruct
+     * @param recons output array of reconstructed vectors, size ni * d / 8
+     */
+    void reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const override;
+
+    /** Similar to search, but also reconstructs the stored vectors (or an
+     * approximation in the case of lossy coding) for the search results.
+     *
+     * Overrides default implementation to avoid having to maintain direct_map
+     * and instead fetch the code offsets through the `store_pairs` flag in
+     * search_preassigned().
+     *
+     * @param recons      reconstructed vectors size (n, k, d / 8)
+     */
+    void search_and_reconstruct(idx_t n, const uint8_t *x, idx_t k,
+                                int32_t *distances, idx_t *labels,
+                                uint8_t *recons) const override;
+
+    /** Reconstruct a vector given the location in terms of (inv list index +
+     * inv list offset) instead of the id.
+     *
+     * Useful for reconstructing when the direct_map is not maintained and
+     * the inv list offset is computed by search_preassigned() with
+     * `store_pairs` set.
+     */
+    virtual void reconstruct_from_offset(idx_t list_no, idx_t offset,
+                                         uint8_t* recons) const;
+
+
+    /// Dataset manipulation functions
+    size_t remove_ids(const IDSelector& sel) override;
+
+    /** moves the entries from another dataset to self. On output,
+     * other is empty. add_id is added to all moved ids (for
+     * sequential ids, this would be this->ntotal */
+    virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
+
+    size_t get_list_size(size_t list_no) const
+    { return invlists->list_size(list_no); }
+
+    /** intialize a direct map
+     *
+     * @param new_maintain_direct_map    if true, create a direct map,
+     *                                   else clear it
+     */
+    void make_direct_map(bool new_maintain_direct_map=true);
+
+    void replace_invlists(InvertedLists *il, bool own=false);
+};
+
+
+struct BinaryInvertedListScanner {
+
+    using idx_t = Index::idx_t;
+
+    /// from now on we handle this query.
+    virtual void set_query (const uint8_t *query_vector) = 0;
+
+    /// following codes come from this inverted list
+    virtual void set_list (idx_t list_no, uint8_t coarse_dis) = 0;
+
+    /// compute a single query-to-code distance
+    virtual uint32_t distance_to_code (const uint8_t *code) const = 0;
+
+    /** compute the distances to codes. (distances, labels) should be
+     * organized as a min- or max-heap
+     *
+     * @param n      number of codes to scan
+     * @param codes  codes to scan (n * code_size)
+     * @param ids        corresponding ids (ignored if store_pairs)
+     * @param distances  heap distances (size k)
+     * @param labels     heap labels (size k)
+     * @param k          heap size
+     */
+    virtual size_t scan_codes (size_t n,
+                               const uint8_t *codes,
+                               const idx_t *ids,
+                               int32_t *distances, idx_t *labels,
+                               size_t k) const = 0;
+
+    virtual ~BinaryInvertedListScanner () {}
+
+};
+
+
+}  // namespace faiss
+
+#endif  // FAISS_INDEX_BINARY_IVF_H
--- a/core/src/index/thirdparty/faiss/IndexFlat.cpp
+++ b/core/src/index/thirdparty/faiss/IndexFlat.cpp
@ -0,0 +1,508 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexFlat.h>
+
+#include <cstring>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/extra_distances.h>
+#include <faiss/utils/utils.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/AuxIndexStructures.h>
+
+
+namespace faiss {
+
+IndexFlat::IndexFlat (idx_t d, MetricType metric):
+            Index(d, metric)
+{
+}
+
+
+
+void IndexFlat::add (idx_t n, const float *x) {
+    xb.insert(xb.end(), x, x + n * d);
+    ntotal += n;
+}
+
+
+void IndexFlat::reset() {
+    xb.clear();
+    ntotal = 0;
+}
+
+
+void IndexFlat::search (idx_t n, const float *x, idx_t k,
+                               float *distances, idx_t *labels) const
+{
+    // we see the distances and labels as heaps
+
+    if (metric_type == METRIC_INNER_PRODUCT) {
+        float_minheap_array_t res = {
+            size_t(n), size_t(k), labels, distances};
+        knn_inner_product (x, xb.data(), d, n, ntotal, &res);
+    } else if (metric_type == METRIC_L2) {
+        float_maxheap_array_t res = {
+            size_t(n), size_t(k), labels, distances};
+        knn_L2sqr (x, xb.data(), d, n, ntotal, &res);
+    } else {
+        float_maxheap_array_t res = {
+            size_t(n), size_t(k), labels, distances};
+        knn_extra_metrics (x, xb.data(), d, n, ntotal,
+                           metric_type, metric_arg,
+                           &res);
+    }
+}
+
+void IndexFlat::range_search (idx_t n, const float *x, float radius,
+                              RangeSearchResult *result) const
+{
+    switch (metric_type) {
+    case METRIC_INNER_PRODUCT:
+        range_search_inner_product (x, xb.data(), d, n, ntotal,
+                                    radius, result);
+        break;
+    case METRIC_L2:
+        range_search_L2sqr (x, xb.data(), d, n, ntotal, radius, result);
+        break;
+    default:
+        FAISS_THROW_MSG("metric type not supported");
+    }
+}
+
+
+void IndexFlat::compute_distance_subset (
+            idx_t n,
+            const float *x,
+            idx_t k,
+            float *distances,
+            const idx_t *labels) const
+{
+    switch (metric_type) {
+        case METRIC_INNER_PRODUCT:
+            fvec_inner_products_by_idx (
+                 distances,
+                 x, xb.data(), labels, d, n, k);
+            break;
+        case METRIC_L2:
+            fvec_L2sqr_by_idx (
+                 distances,
+                 x, xb.data(), labels, d, n, k);
+            break;
+        default:
+            FAISS_THROW_MSG("metric type not supported");
+    }
+
+}
+
+size_t IndexFlat::remove_ids (const IDSelector & sel)
+{
+    idx_t j = 0;
+    for (idx_t i = 0; i < ntotal; i++) {
+        if (sel.is_member (i)) {
+            // should be removed
+        } else {
+            if (i > j) {
+                memmove (&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
+            }
+            j++;
+        }
+    }
+    size_t nremove = ntotal - j;
+    if (nremove > 0) {
+        ntotal = j;
+        xb.resize (ntotal * d);
+    }
+    return nremove;
+}
+
+
+namespace {
+
+
+struct FlatL2Dis : DistanceComputer {
+    size_t d;
+    Index::idx_t nb;
+    const float *q;
+    const float *b;
+    size_t ndis;
+
+    float operator () (idx_t i) override {
+        ndis++;
+        return fvec_L2sqr(q, b + i * d, d);
+    }
+
+    float symmetric_dis(idx_t i, idx_t j) override {
+        return fvec_L2sqr(b + j * d, b + i * d, d);
+    }
+
+    explicit FlatL2Dis(const IndexFlat& storage, const float *q = nullptr)
+        : d(storage.d),
+          nb(storage.ntotal),
+          q(q),
+          b(storage.xb.data()),
+          ndis(0) {}
+
+    void set_query(const float *x) override {
+        q = x;
+    }
+};
+
+struct FlatIPDis : DistanceComputer {
+    size_t d;
+    Index::idx_t nb;
+    const float *q;
+    const float *b;
+    size_t ndis;
+
+    float operator () (idx_t i) override {
+        ndis++;
+        return fvec_inner_product (q, b + i * d, d);
+    }
+
+    float symmetric_dis(idx_t i, idx_t j) override {
+        return fvec_inner_product (b + j * d, b + i * d, d);
+    }
+
+    explicit FlatIPDis(const IndexFlat& storage, const float *q = nullptr)
+        : d(storage.d),
+          nb(storage.ntotal),
+          q(q),
+          b(storage.xb.data()),
+          ndis(0) {}
+
+    void set_query(const float *x) override {
+        q = x;
+    }
+};
+
+
+
+
+}  // namespace
+
+
+DistanceComputer * IndexFlat::get_distance_computer() const {
+    if (metric_type == METRIC_L2) {
+        return new FlatL2Dis(*this);
+    } else if (metric_type == METRIC_INNER_PRODUCT) {
+        return new FlatIPDis(*this);
+    } else {
+        return get_extra_distance_computer (d, metric_type, metric_arg,
+                                            ntotal, xb.data());
+    }
+}
+
+
+void IndexFlat::reconstruct (idx_t key, float * recons) const
+{
+    memcpy (recons, &(xb[key * d]), sizeof(*recons) * d);
+}
+
+
+/* The standalone codec interface */
+size_t IndexFlat::sa_code_size () const
+{
+    return sizeof(float) * d;
+}
+
+void IndexFlat::sa_encode (idx_t n, const float *x, uint8_t *bytes) const
+{
+    memcpy (bytes, x, sizeof(float) * d * n);
+}
+
+void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const
+{
+    memcpy (x, bytes, sizeof(float) * d * n);
+}
+
+
+
+
+/***************************************************
+ * IndexFlatL2BaseShift
+ ***************************************************/
+
+IndexFlatL2BaseShift::IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift):
+    IndexFlatL2 (d), shift (nshift)
+{
+    memcpy (this->shift.data(), shift, sizeof(float) * nshift);
+}
+
+void IndexFlatL2BaseShift::search (
+            idx_t n,
+            const float *x,
+            idx_t k,
+            float *distances,
+            idx_t *labels) const
+{
+    FAISS_THROW_IF_NOT (shift.size() == ntotal);
+
+    float_maxheap_array_t res = {
+        size_t(n), size_t(k), labels, distances};
+    knn_L2sqr_base_shift (x, xb.data(), d, n, ntotal, &res, shift.data());
+}
+
+
+
+/***************************************************
+ * IndexRefineFlat
+ ***************************************************/
+
+IndexRefineFlat::IndexRefineFlat (Index *base_index):
+    Index (base_index->d, base_index->metric_type),
+    refine_index (base_index->d, base_index->metric_type),
+    base_index (base_index), own_fields (false),
+    k_factor (1)
+{
+    is_trained = base_index->is_trained;
+    FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0,
+                      "base_index should be empty in the beginning");
+}
+
+IndexRefineFlat::IndexRefineFlat () {
+    base_index = nullptr;
+    own_fields = false;
+    k_factor = 1;
+}
+
+
+void IndexRefineFlat::train (idx_t n, const float *x)
+{
+    base_index->train (n, x);
+    is_trained = true;
+}
+
+void IndexRefineFlat::add (idx_t n, const float *x) {
+    FAISS_THROW_IF_NOT (is_trained);
+    base_index->add (n, x);
+    refine_index.add (n, x);
+    ntotal = refine_index.ntotal;
+}
+
+void IndexRefineFlat::reset ()
+{
+    base_index->reset ();
+    refine_index.reset ();
+    ntotal = 0;
+}
+
+namespace {
+typedef faiss::Index::idx_t idx_t;
+
+template<class C>
+static void reorder_2_heaps (
+      idx_t n,
+      idx_t k, idx_t *labels, float *distances,
+      idx_t k_base, const idx_t *base_labels, const float *base_distances)
+{
+#pragma omp parallel for
+    for (idx_t i = 0; i < n; i++) {
+        idx_t *idxo = labels + i * k;
+        float *diso = distances + i * k;
+        const idx_t *idxi = base_labels + i * k_base;
+        const float *disi = base_distances + i * k_base;
+
+        heap_heapify<C> (k, diso, idxo, disi, idxi, k);
+        if (k_base != k) { // add remaining elements
+            heap_addn<C> (k, diso, idxo, disi + k, idxi + k, k_base - k);
+        }
+        heap_reorder<C> (k, diso, idxo);
+    }
+}
+
+
+}
+
+
+void IndexRefineFlat::search (
+              idx_t n, const float *x, idx_t k,
+              float *distances, idx_t *labels) const
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    idx_t k_base = idx_t (k * k_factor);
+    idx_t * base_labels = labels;
+    float * base_distances = distances;
+    ScopeDeleter<idx_t> del1;
+    ScopeDeleter<float> del2;
+
+
+    if (k != k_base) {
+        base_labels = new idx_t [n * k_base];
+        del1.set (base_labels);
+        base_distances = new float [n * k_base];
+        del2.set (base_distances);
+    }
+
+    base_index->search (n, x, k_base, base_distances, base_labels);
+
+    for (int i = 0; i < n * k_base; i++)
+        assert (base_labels[i] >= -1 &&
+                base_labels[i] < ntotal);
+
+    // compute refined distances
+    refine_index.compute_distance_subset (
+        n, x, k_base, base_distances, base_labels);
+
+    // sort and store result
+    if (metric_type == METRIC_L2) {
+        typedef CMax <float, idx_t> C;
+        reorder_2_heaps<C> (
+            n, k, labels, distances,
+            k_base, base_labels, base_distances);
+
+    } else if (metric_type == METRIC_INNER_PRODUCT) {
+        typedef CMin <float, idx_t> C;
+        reorder_2_heaps<C> (
+            n, k, labels, distances,
+            k_base, base_labels, base_distances);
+    } else {
+        FAISS_THROW_MSG("Metric type not supported");
+    }
+
+}
+
+
+
+IndexRefineFlat::~IndexRefineFlat ()
+{
+    if (own_fields) delete base_index;
+}
+
+/***************************************************
+ * IndexFlat1D
+ ***************************************************/
+
+
+IndexFlat1D::IndexFlat1D (bool continuous_update):
+    IndexFlatL2 (1),
+    continuous_update (continuous_update)
+{
+}
+
+/// if not continuous_update, call this between the last add and
+/// the first search
+void IndexFlat1D::update_permutation ()
+{
+    perm.resize (ntotal);
+    if (ntotal < 1000000) {
+        fvec_argsort (ntotal, xb.data(), (size_t*)perm.data());
+    } else {
+        fvec_argsort_parallel (ntotal, xb.data(), (size_t*)perm.data());
+    }
+}
+
+void IndexFlat1D::add (idx_t n, const float *x)
+{
+    IndexFlatL2::add (n, x);
+    if (continuous_update)
+        update_permutation();
+}
+
+void IndexFlat1D::reset()
+{
+    IndexFlatL2::reset();
+    perm.clear();
+}
+
+void IndexFlat1D::search (
+            idx_t n,
+            const float *x,
+            idx_t k,
+            float *distances,
+            idx_t *labels) const
+{
+    FAISS_THROW_IF_NOT_MSG (perm.size() == ntotal,
+                    "Call update_permutation before search");
+
+#pragma omp parallel for
+    for (idx_t i = 0; i < n; i++) {
+
+        float q = x[i]; // query
+        float *D = distances + i * k;
+        idx_t *I = labels + i * k;
+
+        // binary search
+        idx_t i0 = 0, i1 = ntotal;
+        idx_t wp = 0;
+
+        if (xb[perm[i0]] > q) {
+            i1 = 0;
+            goto finish_right;
+        }
+
+        if (xb[perm[i1 - 1]] <= q) {
+            i0 = i1 - 1;
+            goto finish_left;
+        }
+
+        while (i0 + 1 < i1) {
+            idx_t imed = (i0 + i1) / 2;
+            if (xb[perm[imed]] <= q) i0 = imed;
+            else                    i1 = imed;
+        }
+
+        // query is between xb[perm[i0]] and xb[perm[i1]]
+        // expand to nearest neighs
+
+        while (wp < k) {
+            float xleft = xb[perm[i0]];
+            float xright = xb[perm[i1]];
+
+            if (q - xleft < xright - q) {
+                D[wp] = q - xleft;
+                I[wp] = perm[i0];
+                i0--; wp++;
+                if (i0 < 0) { goto finish_right; }
+            } else {
+                D[wp] = xright - q;
+                I[wp] = perm[i1];
+                i1++; wp++;
+                if (i1 >= ntotal) { goto finish_left; }
+            }
+        }
+        goto done;
+
+    finish_right:
+        // grow to the right from i1
+        while (wp < k) {
+            if (i1 < ntotal) {
+                D[wp] = xb[perm[i1]] - q;
+                I[wp] = perm[i1];
+                i1++;
+            } else {
+                D[wp] = std::numeric_limits<float>::infinity();
+                I[wp] = -1;
+            }
+            wp++;
+        }
+        goto done;
+
+    finish_left:
+        // grow to the left from i0
+        while (wp < k) {
+            if (i0 >= 0) {
+                D[wp] = q - xb[perm[i0]];
+                I[wp] = perm[i0];
+                i0--;
+            } else {
+                D[wp] = std::numeric_limits<float>::infinity();
+                I[wp] = -1;
+            }
+            wp++;
+        }
+    done:  ;
+    }
+
+}
+
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexFlat.h
+++ b/core/src/index/thirdparty/faiss/IndexFlat.h
@ -0,0 +1,175 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef INDEX_FLAT_H
+#define INDEX_FLAT_H
+
+#include <vector>
+
+#include <faiss/Index.h>
+
+
+namespace faiss {
+
+/** Index that stores the full vectors and performs exhaustive search */
+struct IndexFlat: Index {
+    /// database vectors, size ntotal * d
+    std::vector<float> xb;
+
+    explicit IndexFlat (idx_t d, MetricType metric = METRIC_L2);
+
+    void add(idx_t n, const float* x) override;
+
+    void reset() override;
+
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+
+    void range_search(
+        idx_t n,
+        const float* x,
+        float radius,
+        RangeSearchResult* result) const override;
+
+    void reconstruct(idx_t key, float* recons) const override;
+
+    /** compute distance with a subset of vectors
+     *
+     * @param x       query vectors, size n * d
+     * @param labels  indices of the vectors that should be compared
+     *                for each query vector, size n * k
+     * @param distances
+     *                corresponding output distances, size n * k
+     */
+    void compute_distance_subset (
+            idx_t n,
+            const float *x,
+            idx_t k,
+            float *distances,
+            const idx_t *labels) const;
+
+    /** remove some ids. NB that Because of the structure of the
+     * indexing structure, the semantics of this operation are
+     * different from the usual ones: the new ids are shifted */
+    size_t remove_ids(const IDSelector& sel) override;
+
+    IndexFlat () {}
+
+    DistanceComputer * get_distance_computer() const override;
+
+    /* The stanadlone codec interface (just memcopies in this case) */
+    size_t sa_code_size () const override;
+
+    void sa_encode (idx_t n, const float *x,
+                          uint8_t *bytes) const override;
+
+    void sa_decode (idx_t n, const uint8_t *bytes,
+                            float *x) const override;
+
+};
+
+
+
+struct IndexFlatIP:IndexFlat {
+    explicit IndexFlatIP (idx_t d): IndexFlat (d, METRIC_INNER_PRODUCT) {}
+    IndexFlatIP () {}
+};
+
+
+struct IndexFlatL2:IndexFlat {
+    explicit IndexFlatL2 (idx_t d): IndexFlat (d, METRIC_L2) {}
+    IndexFlatL2 () {}
+};
+
+
+// same as an IndexFlatL2 but a value is subtracted from each distance
+struct IndexFlatL2BaseShift: IndexFlatL2 {
+    std::vector<float> shift;
+
+    IndexFlatL2BaseShift (idx_t d, size_t nshift, const float *shift);
+
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+};
+
+
+/** Index that queries in a base_index (a fast one) and refines the
+ *  results with an exact search, hopefully improving the results.
+ */
+struct IndexRefineFlat: Index {
+
+    /// storage for full vectors
+    IndexFlat refine_index;
+
+    /// faster index to pre-select the vectors that should be filtered
+    Index *base_index;
+    bool own_fields;  ///< should the base index be deallocated?
+
+    /// factor between k requested in search and the k requested from
+    /// the base_index (should be >= 1)
+    float k_factor;
+
+    explicit IndexRefineFlat (Index *base_index);
+
+    IndexRefineFlat ();
+
+    void train(idx_t n, const float* x) override;
+
+    void add(idx_t n, const float* x) override;
+
+    void reset() override;
+
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+
+    ~IndexRefineFlat() override;
+};
+
+
+/// optimized version for 1D "vectors"
+struct IndexFlat1D:IndexFlatL2 {
+    bool continuous_update; ///< is the permutation updated continuously?
+
+    std::vector<idx_t> perm; ///< sorted database indices
+
+    explicit IndexFlat1D (bool continuous_update=true);
+
+    /// if not continuous_update, call this between the last add and
+    /// the first search
+    void update_permutation ();
+
+    void add(idx_t n, const float* x) override;
+
+    void reset() override;
+
+    /// Warn: the distances returned are L1 not L2
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+};
+
+
+}
+
+#endif
--- a/core/src/index/thirdparty/faiss/IndexHNSW.cpp
+++ b/core/src/index/thirdparty/faiss/IndexHNSW.cpp
--- a/core/src/index/thirdparty/faiss/IndexHNSW.h
+++ b/core/src/index/thirdparty/faiss/IndexHNSW.h
@ -0,0 +1,170 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#pragma once
+
+#include <vector>
+
+#include <faiss/impl/HNSW.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexPQ.h>
+#include <faiss/IndexScalarQuantizer.h>
+#include <faiss/utils/utils.h>
+
+
+namespace faiss {
+
+struct IndexHNSW;
+
+struct ReconstructFromNeighbors {
+    typedef Index::idx_t idx_t;
+    typedef HNSW::storage_idx_t storage_idx_t;
+
+    const IndexHNSW & index;
+    size_t M; // number of neighbors
+    size_t k; // number of codebook entries
+    size_t nsq; // number of subvectors
+    size_t code_size;
+    int k_reorder; // nb to reorder. -1 = all
+
+    std::vector<float> codebook; // size nsq * k * (M + 1)
+
+    std::vector<uint8_t> codes; // size ntotal * code_size
+    size_t ntotal;
+    size_t d, dsub; // derived values
+
+    explicit ReconstructFromNeighbors(const IndexHNSW& index,
+                                      size_t k=256, size_t nsq=1);
+
+    /// codes must be added in the correct order and the IndexHNSW
+    /// must be populated and sorted
+    void add_codes(size_t n, const float *x);
+
+    size_t compute_distances(size_t n, const idx_t *shortlist,
+                             const float *query, float *distances) const;
+
+    /// called by add_codes
+    void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const;
+
+    /// called by compute_distances
+    void reconstruct(storage_idx_t i, float *x, float *tmp) const;
+
+    void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float *x) const;
+
+    /// get the M+1 -by-d table for neighbor coordinates for vector i
+    void get_neighbor_table(storage_idx_t i, float *out) const;
+
+};
+
+
+/** The HNSW index is a normal random-access index with a HNSW
+ * link structure built on top */
+
+struct IndexHNSW : Index {
+
+    typedef HNSW::storage_idx_t storage_idx_t;
+
+    // the link strcuture
+    HNSW hnsw;
+
+    // the sequential storage
+    bool own_fields;
+    Index *storage;
+
+    ReconstructFromNeighbors *reconstruct_from_neighbors;
+
+    explicit IndexHNSW (int d = 0, int M = 32);
+    explicit IndexHNSW (Index *storage, int M = 32);
+
+    ~IndexHNSW() override;
+
+    void add(idx_t n, const float *x) override;
+
+    /// Trains the storage if needed
+    void train(idx_t n, const float* x) override;
+
+    /// entry point for search
+    void search (idx_t n, const float *x, idx_t k,
+                 float *distances, idx_t *labels) const override;
+
+    void reconstruct(idx_t key, float* recons) const override;
+
+    void reset () override;
+
+    void shrink_level_0_neighbors(int size);
+
+    /** Perform search only on level 0, given the starting points for
+     * each vertex.
+     *
+     * @param search_type 1:perform one search per nprobe, 2: enqueue
+     *                    all entry points
+     */
+    void search_level_0(idx_t n, const float *x, idx_t k,
+                        const storage_idx_t *nearest, const float *nearest_d,
+                        float *distances, idx_t *labels, int nprobe = 1,
+                        int search_type = 1) const;
+
+    /// alternative graph building
+    void init_level_0_from_knngraph(
+                        int k, const float *D, const idx_t *I);
+
+    /// alternative graph building
+    void init_level_0_from_entry_points(
+                        int npt, const storage_idx_t *points,
+                        const storage_idx_t *nearests);
+
+    // reorder links from nearest to farthest
+    void reorder_links();
+
+    void link_singletons();
+};
+
+
+/** Flat index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+
+struct IndexHNSWFlat : IndexHNSW {
+    IndexHNSWFlat();
+    IndexHNSWFlat(int d, int M);
+};
+
+/** PQ index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+struct IndexHNSWPQ : IndexHNSW {
+    IndexHNSWPQ();
+    IndexHNSWPQ(int d, int pq_m, int M);
+    void train(idx_t n, const float* x) override;
+};
+
+/** SQ index topped with with a HNSW structure to access elements
+ *  more efficiently.
+ */
+struct IndexHNSWSQ : IndexHNSW {
+    IndexHNSWSQ();
+    IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
+};
+
+/** 2-level code structure with fast random access
+ */
+struct IndexHNSW2Level : IndexHNSW {
+    IndexHNSW2Level();
+    IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
+
+    void flip_to_ivf();
+
+    /// entry point for search
+    void search (idx_t n, const float *x, idx_t k,
+                 float *distances, idx_t *labels) const override;
+
+};
+
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexIVF.cpp
+++ b/core/src/index/thirdparty/faiss/IndexIVF.cpp
@ -0,0 +1,966 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexIVF.h>
+
+
+#include <omp.h>
+
+#include <cstdio>
+#include <memory>
+#include <iostream>
+
+#include <faiss/utils/utils.h>
+#include <faiss/utils/hamming.h>
+
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/IndexFlat.h>
+#include <faiss/impl/AuxIndexStructures.h>
+
+namespace faiss {
+
+using ScopedIds = InvertedLists::ScopedIds;
+using ScopedCodes = InvertedLists::ScopedCodes;
+
+/*****************************************
+ * Level1Quantizer implementation
+ ******************************************/
+
+
+Level1Quantizer::Level1Quantizer (Index * quantizer, size_t nlist):
+    quantizer (quantizer),
+    nlist (nlist),
+    quantizer_trains_alone (0),
+    own_fields (false),
+    clustering_index (nullptr)
+{
+    // here we set a low # iterations because this is typically used
+    // for large clusterings (nb this is not used for the MultiIndex,
+    // for which quantizer_trains_alone = true)
+    cp.niter = 10;
+}
+
+Level1Quantizer::Level1Quantizer ():
+    quantizer (nullptr),
+    nlist (0),
+    quantizer_trains_alone (0), own_fields (false),
+    clustering_index (nullptr)
+{}
+
+Level1Quantizer::~Level1Quantizer ()
+{
+    if (own_fields) {
+        if(quantizer == quantizer_backup) {
+            if(quantizer != nullptr) {
+                delete quantizer;
+            }
+        } else {
+            if(quantizer != nullptr) {
+                delete quantizer;
+            }
+
+            if(quantizer_backup != nullptr) {
+                delete quantizer_backup;
+            }
+        }
+        quantizer = nullptr;
+        quantizer_backup = nullptr;
+    }
+}
+
+void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricType metric_type)
+{
+    size_t d = quantizer->d;
+    if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
+        if (verbose)
+            printf ("IVF quantizer does not need training.\n");
+    } else if (quantizer_trains_alone == 1) {
+        if (verbose)
+            printf ("IVF quantizer trains alone...\n");
+        quantizer->train (n, x);
+        quantizer->verbose = verbose;
+        FAISS_THROW_IF_NOT_MSG (quantizer->ntotal == nlist,
+                          "nlist not consistent with quantizer size");
+    } else if (quantizer_trains_alone == 0) {
+        if (verbose)
+            printf ("Training level-1 quantizer on %ld vectors in %ldD\n",
+                    n, d);
+
+        Clustering clus (d, nlist, cp);
+        quantizer->reset();
+        if (clustering_index) {
+            clus.train (n, x, *clustering_index);
+            quantizer->add (nlist, clus.centroids.data());
+        } else {
+            clus.train (n, x, *quantizer);
+        }
+        quantizer->is_trained = true;
+    } else if (quantizer_trains_alone == 2) {
+        if (verbose)
+            printf (
+                "Training L2 quantizer on %ld vectors in %ldD%s\n",
+                n, d,
+                clustering_index ? "(user provided index)" : "");
+        FAISS_THROW_IF_NOT (metric_type == METRIC_L2);
+        Clustering clus (d, nlist, cp);
+        if (!clustering_index) {
+            IndexFlatL2 assigner (d);
+            clus.train(n, x, assigner);
+        } else {
+            clus.train(n, x, *clustering_index);
+        }
+        if (verbose)
+            printf ("Adding centroids to quantizer\n");
+        quantizer->add (nlist, clus.centroids.data());
+    }
+}
+
+size_t Level1Quantizer::coarse_code_size () const
+{
+    size_t nl = nlist - 1;
+    size_t nbyte = 0;
+    while (nl > 0) {
+        nbyte ++;
+        nl >>= 8;
+    }
+    return nbyte;
+}
+
+void Level1Quantizer::encode_listno (Index::idx_t list_no, uint8_t *code) const
+{
+    // little endian
+    size_t nl = nlist - 1;
+    while (nl > 0) {
+        *code++ = list_no & 0xff;
+        list_no >>= 8;
+        nl >>= 8;
+    }
+}
+
+Index::idx_t Level1Quantizer::decode_listno (const uint8_t *code) const
+{
+    size_t nl = nlist - 1;
+    int64_t list_no = 0;
+    int nbit = 0;
+    while (nl > 0) {
+        list_no |= int64_t(*code++) << nbit;
+        nbit += 8;
+        nl >>= 8;
+    }
+    FAISS_THROW_IF_NOT (list_no >= 0 && list_no < nlist);
+    return list_no;
+}
+
+
+
+/*****************************************
+ * IndexIVF implementation
+ ******************************************/
+
+
+IndexIVF::IndexIVF (Index * quantizer, size_t d,
+                    size_t nlist, size_t code_size,
+                    MetricType metric):
+    Index (d, metric),
+    Level1Quantizer (quantizer, nlist),
+    invlists (new ArrayInvertedLists (nlist, code_size)),
+    own_invlists (true),
+    code_size (code_size),
+    nprobe (1),
+    max_codes (0),
+    parallel_mode (0),
+    maintain_direct_map (false)
+{
+    FAISS_THROW_IF_NOT (d == quantizer->d);
+    is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
+    // Spherical by default if the metric is inner_product
+    if (metric_type == METRIC_INNER_PRODUCT) {
+        cp.spherical = true;
+    }
+
+}
+
+IndexIVF::IndexIVF ():
+    invlists (nullptr), own_invlists (false),
+    code_size (0),
+    nprobe (1), max_codes (0), parallel_mode (0),
+    maintain_direct_map (false)
+{}
+
+void IndexIVF::add (idx_t n, const float * x)
+{
+    add_with_ids (n, x, nullptr);
+}
+
+
+void IndexIVF::add_with_ids (idx_t n, const float * x, const idx_t *xids)
+{
+    // do some blocking to avoid excessive allocs
+    idx_t bs = 65536;
+    if (n > bs) {
+        for (idx_t i0 = 0; i0 < n; i0 += bs) {
+            idx_t i1 = std::min (n, i0 + bs);
+            if (verbose) {
+                printf("   IndexIVF::add_with_ids %ld:%ld\n", i0, i1);
+            }
+            add_with_ids (i1 - i0, x + i0 * d,
+                          xids ? xids + i0 : nullptr);
+        }
+        return;
+    }
+
+    FAISS_THROW_IF_NOT (is_trained);
+    std::unique_ptr<idx_t []> idx(new idx_t[n]);
+    quantizer->assign (n, x, idx.get());
+    size_t nadd = 0, nminus1 = 0;
+
+    for (size_t i = 0; i < n; i++) {
+        if (idx[i] < 0) nminus1++;
+    }
+
+    std::unique_ptr<uint8_t []> flat_codes(new uint8_t [n * code_size]);
+    encode_vectors (n, x, idx.get(), flat_codes.get());
+
+#pragma omp parallel reduction(+: nadd)
+    {
+        int nt = omp_get_num_threads();
+        int rank = omp_get_thread_num();
+
+        // each thread takes care of a subset of lists
+        for (size_t i = 0; i < n; i++) {
+            idx_t list_no = idx [i];
+            if (list_no >= 0 && list_no % nt == rank) {
+                idx_t id = xids ? xids[i] : ntotal + i;
+                invlists->add_entry (list_no, id,
+                                     flat_codes.get() + i * code_size);
+                nadd++;
+            }
+        }
+    }
+
+    if (verbose) {
+        printf("    added %ld / %ld vectors (%ld -1s)\n", nadd, n, nminus1);
+    }
+
+    ntotal += n;
+}
+
+void IndexIVF::to_readonly() {
+    if (is_readonly()) return;
+    auto readonly_lists = this->invlists->to_readonly();
+    if (!readonly_lists) return;
+    this->replace_invlists(readonly_lists, true);
+}
+
+bool IndexIVF::is_readonly() const {
+    return this->invlists->is_readonly();
+}
+
+void IndexIVF::backup_quantizer() {
+    this->quantizer_backup = quantizer;
+}
+
+void IndexIVF::restore_quantizer() {
+    if(this->quantizer_backup != nullptr) {
+        quantizer = this->quantizer_backup;
+    }
+}
+
+void IndexIVF::make_direct_map (bool new_maintain_direct_map)
+{
+    // nothing to do
+    if (new_maintain_direct_map == maintain_direct_map)
+        return;
+
+    if (new_maintain_direct_map) {
+        direct_map.resize (ntotal, -1);
+        for (size_t key = 0; key < nlist; key++) {
+            size_t list_size = invlists->list_size (key);
+            ScopedIds idlist (invlists, key);
+
+            for (long ofs = 0; ofs < list_size; ofs++) {
+                FAISS_THROW_IF_NOT_MSG (
+                       0 <= idlist [ofs] && idlist[ofs] < ntotal,
+                       "direct map supported only for seuquential ids");
+                direct_map [idlist [ofs]] = key << 32 | ofs;
+            }
+        }
+    } else {
+        direct_map.clear ();
+    }
+    maintain_direct_map = new_maintain_direct_map;
+}
+
+
+void IndexIVF::search (idx_t n, const float *x, idx_t k,
+                         float *distances, idx_t *labels) const
+{
+    std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
+    std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
+
+    double t0 = getmillisecs();
+    quantizer->search (n, x, nprobe, coarse_dis.get(), idx.get());
+    indexIVF_stats.quantization_time += getmillisecs() - t0;
+
+    t0 = getmillisecs();
+    invlists->prefetch_lists (idx.get(), n * nprobe);
+
+    search_preassigned (n, x, k, idx.get(), coarse_dis.get(),
+                        distances, labels, false);
+    indexIVF_stats.search_time += getmillisecs() - t0;
+}
+
+
+
+void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k,
+                                   const idx_t *keys,
+                                   const float *coarse_dis ,
+                                   float *distances, idx_t *labels,
+                                   bool store_pairs,
+                                   const IVFSearchParameters *params) const
+{
+    long nprobe = params ? params->nprobe : this->nprobe;
+    long max_codes = params ? params->max_codes : this->max_codes;
+
+    size_t nlistv = 0, ndis = 0, nheap = 0;
+
+    using HeapForIP = CMin<float, idx_t>;
+    using HeapForL2 = CMax<float, idx_t>;
+
+    bool interrupt = false;
+
+    // don't start parallel section if single query
+    bool do_parallel =
+        parallel_mode == 0 ? n > 1 :
+        parallel_mode == 1 ? nprobe > 1 :
+        nprobe * n > 1;
+
+#pragma omp parallel if(do_parallel) reduction(+: nlistv, ndis, nheap)
+    {
+        InvertedListScanner *scanner = get_InvertedListScanner(store_pairs);
+        ScopeDeleter1<InvertedListScanner> del(scanner);
+
+        /*****************************************************
+         * Depending on parallel_mode, there are two possible ways
+         * to organize the search. Here we define local functions
+         * that are in common between the two
+         ******************************************************/
+
+        // intialize + reorder a result heap
+
+        auto init_result = [&](float *simi, idx_t *idxi) {
+            if (metric_type == METRIC_INNER_PRODUCT) {
+                heap_heapify<HeapForIP> (k, simi, idxi);
+            } else {
+                heap_heapify<HeapForL2> (k, simi, idxi);
+            }
+        };
+
+        auto reorder_result = [&] (float *simi, idx_t *idxi) {
+            if (metric_type == METRIC_INNER_PRODUCT) {
+                heap_reorder<HeapForIP> (k, simi, idxi);
+            } else {
+                heap_reorder<HeapForL2> (k, simi, idxi);
+            }
+        };
+
+        // single list scan using the current scanner (with query
+        // set porperly) and storing results in simi and idxi
+        auto scan_one_list = [&] (idx_t key, float coarse_dis_i,
+                                  float *simi, idx_t *idxi) {
+
+            if (key < 0) {
+                // not enough centroids for multiprobe
+                return (size_t)0;
+            }
+            FAISS_THROW_IF_NOT_FMT (key < (idx_t) nlist,
+                                    "Invalid key=%ld nlist=%ld\n",
+                                    key, nlist);
+
+            size_t list_size = invlists->list_size(key);
+
+            // don't waste time on empty lists
+            if (list_size == 0) {
+                return (size_t)0;
+            }
+
+            scanner->set_list (key, coarse_dis_i);
+
+            nlistv++;
+
+            InvertedLists::ScopedCodes scodes (invlists, key);
+
+            std::unique_ptr<InvertedLists::ScopedIds> sids;
+            const Index::idx_t * ids = nullptr;
+
+            if (!store_pairs)  {
+                sids.reset (new InvertedLists::ScopedIds (invlists, key));
+                ids = sids->get();
+            }
+
+            nheap += scanner->scan_codes (list_size, scodes.get(),
+                                          ids, simi, idxi, k);
+
+            return list_size;
+        };
+
+        /****************************************************
+         * Actual loops, depending on parallel_mode
+         ****************************************************/
+
+        if (parallel_mode == 0) {
+
+#pragma omp for
+            for (size_t i = 0; i < n; i++) {
+
+                if (interrupt) {
+                    continue;
+                }
+
+                // loop over queries
+                scanner->set_query (x + i * d);
+                float * simi = distances + i * k;
+                idx_t * idxi = labels + i * k;
+
+                init_result (simi, idxi);
+
+                long nscan = 0;
+
+                // loop over probes
+                for (size_t ik = 0; ik < nprobe; ik++) {
+
+                    nscan += scan_one_list (
+                         keys [i * nprobe + ik],
+                         coarse_dis[i * nprobe + ik],
+                         simi, idxi
+                    );
+
+                    if (max_codes && nscan >= max_codes) {
+                        break;
+                    }
+                }
+
+                ndis += nscan;
+                reorder_result (simi, idxi);
+
+                if (InterruptCallback::is_interrupted ()) {
+                    interrupt = true;
+                }
+
+            } // parallel for
+        } else if (parallel_mode == 1) {
+            std::vector <idx_t> local_idx (k);
+            std::vector <float> local_dis (k);
+
+            for (size_t i = 0; i < n; i++) {
+                scanner->set_query (x + i * d);
+                init_result (local_dis.data(), local_idx.data());
+
+#pragma omp for schedule(dynamic)
+                for (size_t ik = 0; ik < nprobe; ik++) {
+                    ndis += scan_one_list
+                        (keys [i * nprobe + ik],
+                         coarse_dis[i * nprobe + ik],
+                         local_dis.data(), local_idx.data());
+
+                    // can't do the test on max_codes
+                }
+                // merge thread-local results
+
+                float * simi = distances + i * k;
+                idx_t * idxi = labels + i * k;
+#pragma omp single
+                init_result (simi, idxi);
+
+#pragma omp barrier
+#pragma omp critical
+                {
+                    if (metric_type == METRIC_INNER_PRODUCT) {
+                        heap_addn<HeapForIP>
+                            (k, simi, idxi,
+                             local_dis.data(), local_idx.data(), k);
+                    } else {
+                        heap_addn<HeapForL2>
+                            (k, simi, idxi,
+                             local_dis.data(), local_idx.data(), k);
+                    }
+                }
+#pragma omp barrier
+#pragma omp single
+                reorder_result (simi, idxi);
+            }
+        } else {
+            FAISS_THROW_FMT ("parallel_mode %d not supported\n",
+                             parallel_mode);
+        }
+    } // parallel section
+
+    if (interrupt) {
+        FAISS_THROW_MSG ("computation interrupted");
+    }
+
+    indexIVF_stats.nq += n;
+    indexIVF_stats.nlist += nlistv;
+    indexIVF_stats.ndis += ndis;
+    indexIVF_stats.nheap_updates += nheap;
+
+}
+
+
+
+
+void IndexIVF::range_search (idx_t nx, const float *x, float radius,
+                             RangeSearchResult *result) const
+{
+    std::unique_ptr<idx_t[]> keys (new idx_t[nx * nprobe]);
+    std::unique_ptr<float []> coarse_dis (new float[nx * nprobe]);
+
+    double t0 = getmillisecs();
+    quantizer->search (nx, x, nprobe, coarse_dis.get (), keys.get ());
+    indexIVF_stats.quantization_time += getmillisecs() - t0;
+
+    t0 = getmillisecs();
+    invlists->prefetch_lists (keys.get(), nx * nprobe);
+
+    range_search_preassigned (nx, x, radius, keys.get (), coarse_dis.get (),
+                              result);
+
+    indexIVF_stats.search_time += getmillisecs() - t0;
+}
+
+void IndexIVF::range_search_preassigned (
+         idx_t nx, const float *x, float radius,
+         const idx_t *keys, const float *coarse_dis,
+         RangeSearchResult *result) const
+{
+
+    size_t nlistv = 0, ndis = 0;
+    bool store_pairs = false;
+
+    std::vector<RangeSearchPartialResult *> all_pres (omp_get_max_threads());
+
+#pragma omp parallel reduction(+: nlistv, ndis)
+    {
+        RangeSearchPartialResult pres(result);
+        std::unique_ptr<InvertedListScanner> scanner
+            (get_InvertedListScanner(store_pairs));
+        FAISS_THROW_IF_NOT (scanner.get ());
+        all_pres[omp_get_thread_num()] = &pres;
+
+        // prepare the list scanning function
+
+        auto scan_list_func = [&](size_t i, size_t ik, RangeQueryResult &qres) {
+
+            idx_t key = keys[i * nprobe + ik];  /* select the list  */
+            if (key < 0) return;
+            FAISS_THROW_IF_NOT_FMT (
+                  key < (idx_t) nlist,
+                  "Invalid key=%ld  at ik=%ld nlist=%ld\n",
+                  key, ik, nlist);
+            const size_t list_size = invlists->list_size(key);
+
+            if (list_size == 0) return;
+
+            InvertedLists::ScopedCodes scodes (invlists, key);
+            InvertedLists::ScopedIds ids (invlists, key);
+
+            scanner->set_list (key, coarse_dis[i * nprobe + ik]);
+            nlistv++;
+            ndis += list_size;
+            scanner->scan_codes_range (list_size, scodes.get(),
+                                       ids.get(), radius, qres);
+        };
+
+        if (parallel_mode == 0) {
+
+#pragma omp for
+            for (size_t i = 0; i < nx; i++) {
+                scanner->set_query (x + i * d);
+
+                RangeQueryResult & qres = pres.new_result (i);
+
+                for (size_t ik = 0; ik < nprobe; ik++) {
+                    scan_list_func (i, ik, qres);
+                }
+
+            }
+
+        } else if (parallel_mode == 1) {
+
+            for (size_t i = 0; i < nx; i++) {
+                scanner->set_query (x + i * d);
+
+                RangeQueryResult & qres = pres.new_result (i);
+
+#pragma omp for schedule(dynamic)
+                for (size_t ik = 0; ik < nprobe; ik++) {
+                    scan_list_func (i, ik, qres);
+                }
+            }
+        } else if (parallel_mode == 2) {
+            std::vector<RangeQueryResult *> all_qres (nx);
+            RangeQueryResult *qres = nullptr;
+
+#pragma omp for schedule(dynamic)
+            for (size_t iik = 0; iik < nx * nprobe; iik++) {
+                size_t i = iik / nprobe;
+                size_t ik = iik % nprobe;
+                if (qres == nullptr || qres->qno != i) {
+                    FAISS_ASSERT (!qres || i > qres->qno);
+                    qres = &pres.new_result (i);
+                    scanner->set_query (x + i * d);
+                }
+                scan_list_func (i, ik, *qres);
+            }
+        } else {
+            FAISS_THROW_FMT ("parallel_mode %d not supported\n", parallel_mode);
+        }
+        if (parallel_mode == 0) {
+            pres.finalize ();
+        } else {
+#pragma omp barrier
+#pragma omp single
+            RangeSearchPartialResult::merge (all_pres, false);
+#pragma omp barrier
+
+        }
+    }
+    indexIVF_stats.nq += nx;
+    indexIVF_stats.nlist += nlistv;
+    indexIVF_stats.ndis += ndis;
+}
+
+
+InvertedListScanner *IndexIVF::get_InvertedListScanner (
+    bool /*store_pairs*/) const
+{
+    return nullptr;
+}
+
+void IndexIVF::reconstruct (idx_t key, float* recons) const
+{
+    FAISS_THROW_IF_NOT_MSG (direct_map.size() == ntotal,
+                            "direct map is not initialized");
+    FAISS_THROW_IF_NOT_MSG (key >= 0 && key < direct_map.size(),
+                            "invalid key");
+    idx_t list_no = direct_map[key] >> 32;
+    idx_t offset = direct_map[key] & 0xffffffff;
+    reconstruct_from_offset (list_no, offset, recons);
+}
+
+
+void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const
+{
+    FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
+
+    for (idx_t list_no = 0; list_no < nlist; list_no++) {
+        size_t list_size = invlists->list_size (list_no);
+        ScopedIds idlist (invlists, list_no);
+
+        for (idx_t offset = 0; offset < list_size; offset++) {
+            idx_t id = idlist[offset];
+            if (!(id >= i0 && id < i0 + ni)) {
+                continue;
+            }
+
+            float* reconstructed = recons + (id - i0) * d;
+            reconstruct_from_offset (list_no, offset, reconstructed);
+        }
+    }
+}
+
+
+/* standalone codec interface */
+size_t IndexIVF::sa_code_size () const
+{
+    size_t coarse_size = coarse_code_size();
+    return code_size + coarse_size;
+}
+
+void IndexIVF::sa_encode (idx_t n, const float *x,
+                                 uint8_t *bytes) const
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    std::unique_ptr<int64_t []> idx (new int64_t [n]);
+    quantizer->assign (n, x, idx.get());
+    encode_vectors (n, x, idx.get(), bytes, true);
+}
+
+
+void IndexIVF::search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                       float *distances, idx_t *labels,
+                                       float *recons) const
+{
+    idx_t * idx = new idx_t [n * nprobe];
+    ScopeDeleter<idx_t> del (idx);
+    float * coarse_dis = new float [n * nprobe];
+    ScopeDeleter<float> del2 (coarse_dis);
+
+    quantizer->search (n, x, nprobe, coarse_dis, idx);
+
+    invlists->prefetch_lists (idx, n * nprobe);
+
+    // search_preassigned() with `store_pairs` enabled to obtain the list_no
+    // and offset into `codes` for reconstruction
+    search_preassigned (n, x, k, idx, coarse_dis,
+                        distances, labels, true /* store_pairs */);
+    for (idx_t i = 0; i < n; ++i) {
+        for (idx_t j = 0; j < k; ++j) {
+            idx_t ij = i * k + j;
+            idx_t key = labels[ij];
+            float* reconstructed = recons + ij * d;
+            if (key < 0) {
+                // Fill with NaNs
+                memset(reconstructed, -1, sizeof(*reconstructed) * d);
+            } else {
+                int list_no = key >> 32;
+                int offset = key & 0xffffffff;
+
+                // Update label to the actual id
+                labels[ij] = invlists->get_single_id (list_no, offset);
+
+                reconstruct_from_offset (list_no, offset, reconstructed);
+            }
+        }
+    }
+}
+
+void IndexIVF::reconstruct_from_offset(
+    int64_t /*list_no*/,
+    int64_t /*offset*/,
+    float* /*recons*/) const {
+  FAISS_THROW_MSG ("reconstruct_from_offset not implemented");
+}
+
+void IndexIVF::reset ()
+{
+    direct_map.clear ();
+    invlists->reset ();
+    ntotal = 0;
+}
+
+
+size_t IndexIVF::remove_ids (const IDSelector & sel)
+{
+    FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
+                    "direct map remove not implemented");
+
+    std::vector<idx_t> toremove(nlist);
+
+#pragma omp parallel for
+    for (idx_t i = 0; i < nlist; i++) {
+        idx_t l0 = invlists->list_size (i), l = l0, j = 0;
+        ScopedIds idsi (invlists, i);
+        while (j < l) {
+            if (sel.is_member (idsi[j])) {
+                l--;
+                invlists->update_entry (
+                     i, j,
+                     invlists->get_single_id (i, l),
+                     ScopedCodes (invlists, i, l).get());
+            } else {
+                j++;
+            }
+        }
+        toremove[i] = l0 - l;
+    }
+    // this will not run well in parallel on ondisk because of possible shrinks
+    size_t nremove = 0;
+    for (idx_t i = 0; i < nlist; i++) {
+        if (toremove[i] > 0) {
+            nremove += toremove[i];
+            invlists->resize(
+                i, invlists->list_size(i) - toremove[i]);
+        }
+    }
+    ntotal -= nremove;
+    return nremove;
+}
+
+
+
+
+void IndexIVF::train (idx_t n, const float *x)
+{
+    if (verbose)
+        printf ("Training level-1 quantizer\n");
+
+    train_q1 (n, x, verbose, metric_type);
+
+    if (verbose)
+        printf ("Training IVF residual\n");
+
+    train_residual (n, x);
+    is_trained = true;
+
+}
+
+void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
+  if (verbose)
+    printf("IndexIVF: no residual training\n");
+  // does nothing by default
+}
+
+
+void IndexIVF::check_compatible_for_merge (const IndexIVF &other) const
+{
+    // minimal sanity checks
+    FAISS_THROW_IF_NOT (other.d == d);
+    FAISS_THROW_IF_NOT (other.nlist == nlist);
+    FAISS_THROW_IF_NOT (other.code_size == code_size);
+    FAISS_THROW_IF_NOT_MSG (typeid (*this) == typeid (other),
+                  "can only merge indexes of the same type");
+}
+
+
+void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
+{
+    check_compatible_for_merge (other);
+    FAISS_THROW_IF_NOT_MSG ((!maintain_direct_map &&
+                             !other.maintain_direct_map),
+                  "direct map copy not implemented");
+
+    invlists->merge_from (other.invlists, add_id);
+
+    ntotal += other.ntotal;
+    other.ntotal = 0;
+}
+
+
+void IndexIVF::replace_invlists (InvertedLists *il, bool own)
+{
+    if (own_invlists) {
+        delete invlists;
+    }
+    // FAISS_THROW_IF_NOT (ntotal == 0);
+    if (il) {
+        FAISS_THROW_IF_NOT (il->nlist == nlist &&
+                            il->code_size == code_size);
+    }
+    invlists = il;
+    own_invlists = own;
+}
+
+
+void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type,
+                                 idx_t a1, idx_t a2) const
+{
+
+    FAISS_THROW_IF_NOT (nlist == other.nlist);
+    FAISS_THROW_IF_NOT (code_size == other.code_size);
+    FAISS_THROW_IF_NOT (!other.maintain_direct_map);
+    FAISS_THROW_IF_NOT_FMT (
+          subset_type == 0 || subset_type == 1 || subset_type == 2,
+          "subset type %d not implemented", subset_type);
+
+    size_t accu_n = 0;
+    size_t accu_a1 = 0;
+    size_t accu_a2 = 0;
+
+    InvertedLists *oivf = other.invlists;
+
+    for (idx_t list_no = 0; list_no < nlist; list_no++) {
+        size_t n = invlists->list_size (list_no);
+        ScopedIds ids_in (invlists, list_no);
+
+        if (subset_type == 0) {
+            for (idx_t i = 0; i < n; i++) {
+                idx_t id = ids_in[i];
+                if (a1 <= id && id < a2) {
+                    oivf->add_entry (list_no,
+                                     invlists->get_single_id (list_no, i),
+                                     ScopedCodes (invlists, list_no, i).get());
+                    other.ntotal++;
+                }
+            }
+        } else if (subset_type == 1) {
+            for (idx_t i = 0; i < n; i++) {
+                idx_t id = ids_in[i];
+                if (id % a1 == a2) {
+                    oivf->add_entry (list_no,
+                                     invlists->get_single_id (list_no, i),
+                                     ScopedCodes (invlists, list_no, i).get());
+                    other.ntotal++;
+                }
+            }
+        } else if (subset_type == 2) {
+            // see what is allocated to a1 and to a2
+            size_t next_accu_n = accu_n + n;
+            size_t next_accu_a1 = next_accu_n * a1 / ntotal;
+            size_t i1 = next_accu_a1 - accu_a1;
+            size_t next_accu_a2 = next_accu_n * a2 / ntotal;
+            size_t i2 = next_accu_a2 - accu_a2;
+
+            for (idx_t i = i1; i < i2; i++) {
+                oivf->add_entry (list_no,
+                                 invlists->get_single_id (list_no, i),
+                                 ScopedCodes (invlists, list_no, i).get());
+            }
+
+            other.ntotal += i2 - i1;
+            accu_a1 = next_accu_a1;
+            accu_a2 = next_accu_a2;
+        }
+        accu_n += n;
+    }
+    FAISS_ASSERT(accu_n == ntotal);
+
+}
+
+void
+IndexIVF::dump() {
+    for (auto i = 0; i < invlists->nlist; ++ i) {
+        auto numVecs = invlists->list_size(i);
+        auto ids = invlists->get_ids(i);
+        auto codes = invlists->get_codes(i);
+        int code_size = invlists->code_size;
+
+        std::cout << "Bucket ID: " << i << ", with code size: " << code_size << ", vectors number: " << numVecs << std::endl;
+        if(code_size == 8) {
+            // int8 types
+            for (auto j=0; j < numVecs; ++j) {
+                std::cout << *(ids+j) << ": " << std::endl;
+                for(int k = 0; k < this->d; ++ k) {
+                    printf("%u ", (uint8_t)(codes[j * d + k]));
+                }
+                std::cout << std::endl;
+            }
+        }
+        std::cout << "Bucket End." << std::endl;
+    }
+}
+
+IndexIVF::~IndexIVF()
+{
+    if (own_invlists) {
+        delete invlists;
+    }
+}
+
+
+void IndexIVFStats::reset()
+{
+    memset ((void*)this, 0, sizeof (*this));
+}
+
+
+IndexIVFStats indexIVF_stats;
+
+void InvertedListScanner::scan_codes_range (size_t ,
+                       const uint8_t *,
+                       const idx_t *,
+                       float ,
+                       RangeQueryResult &) const
+{
+    FAISS_THROW_MSG ("scan_codes_range not implemented");
+}
+
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexIVF.h
+++ b/core/src/index/thirdparty/faiss/IndexIVF.h
@ -0,0 +1,363 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_IVF_H
+#define FAISS_INDEX_IVF_H
+
+
+#include <vector>
+#include <stdint.h>
+
+#include <faiss/Index.h>
+#include <faiss/InvertedLists.h>
+#include <faiss/Clustering.h>
+#include <faiss/utils/Heap.h>
+
+
+namespace faiss {
+
+
+/** Encapsulates a quantizer object for the IndexIVF
+ *
+ * The class isolates the fields that are independent of the storage
+ * of the lists (especially training)
+ */
+struct Level1Quantizer {
+    Index * quantizer = nullptr;        ///< quantizer that maps vectors to inverted lists
+    Index * quantizer_backup = nullptr; ///< quantizer for backup
+    size_t nlist;             ///< number of possible key values
+
+
+    /**
+     * = 0: use the quantizer as index in a kmeans training
+     * = 1: just pass on the training set to the train() of the quantizer
+     * = 2: kmeans training on a flat index + add the centroids to the quantizer
+     */
+    char quantizer_trains_alone;
+    bool own_fields;          ///< whether object owns the quantizer
+
+    ClusteringParameters cp; ///< to override default clustering params
+    Index *clustering_index; ///< to override index used during clustering
+
+    /// Trains the quantizer and calls train_residual to train sub-quantizers
+    void train_q1 (size_t n, const float *x, bool verbose,
+                   MetricType metric_type);
+
+
+    /// compute the number of bytes required to store list ids
+    size_t coarse_code_size () const;
+    void encode_listno (Index::idx_t list_no, uint8_t *code) const;
+    Index::idx_t decode_listno (const uint8_t *code) const;
+
+    Level1Quantizer (Index * quantizer, size_t nlist);
+
+    Level1Quantizer ();
+
+    ~Level1Quantizer ();
+
+};
+
+
+
+struct IVFSearchParameters {
+    size_t nprobe;            ///< number of probes at query time
+    size_t max_codes;         ///< max nb of codes to visit to do a query
+    virtual ~IVFSearchParameters () {}
+};
+
+
+
+struct InvertedListScanner;
+
+/** Index based on a inverted file (IVF)
+ *
+ * In the inverted file, the quantizer (an Index instance) provides a
+ * quantization index for each vector to be added. The quantization
+ * index maps to a list (aka inverted list or posting list), where the
+ * id of the vector is stored.
+ *
+ * The inverted list object is required only after trainng. If none is
+ * set externally, an ArrayInvertedLists is used automatically.
+ *
+ * At search time, the vector to be searched is also quantized, and
+ * only the list corresponding to the quantization index is
+ * searched. This speeds up the search by making it
+ * non-exhaustive. This can be relaxed using multi-probe search: a few
+ * (nprobe) quantization indices are selected and several inverted
+ * lists are visited.
+ *
+ * Sub-classes implement a post-filtering of the index that refines
+ * the distance estimation from the query to databse vectors.
+ */
+struct IndexIVF: Index, Level1Quantizer {
+    /// Acess to the actual data
+    InvertedLists *invlists;
+    bool own_invlists;
+
+    size_t code_size;              ///< code size per vector in bytes
+
+    size_t nprobe;            ///< number of probes at query time
+    size_t max_codes;         ///< max nb of codes to visit to do a query
+
+    /** Parallel mode determines how queries are parallelized with OpenMP
+     *
+     * 0 (default): parallelize over queries
+     * 1: parallelize over over inverted lists
+     * 2: parallelize over both
+     */
+    int parallel_mode;
+
+    /// map for direct access to the elements. Enables reconstruct().
+    bool maintain_direct_map;
+    std::vector <idx_t> direct_map;
+
+    /** The Inverted file takes a quantizer (an Index) on input,
+     * which implements the function mapping a vector to a list
+     * identifier. The pointer is borrowed: the quantizer should not
+     * be deleted while the IndexIVF is in use.
+     */
+    IndexIVF (Index * quantizer, size_t d,
+              size_t nlist, size_t code_size,
+              MetricType metric = METRIC_L2);
+
+    void reset() override;
+
+    /// Trains the quantizer and calls train_residual to train sub-quantizers
+    void train(idx_t n, const float* x) override;
+
+    /// Calls add_with_ids with NULL ids
+    void add(idx_t n, const float* x) override;
+
+    /// default implementation that calls encode_vectors
+    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
+
+    /** Encodes a set of vectors as they would appear in the inverted lists
+     *
+     * @param list_nos   inverted list ids as returned by the
+     *                   quantizer (size n). -1s are ignored.
+     * @param codes      output codes, size n * code_size
+     * @param include_listno
+     *                   include the list ids in the code (in this case add
+     *                   ceil(log8(nlist)) to the code size)
+     */
+    virtual void encode_vectors(idx_t n, const float* x,
+                                const idx_t *list_nos,
+                                uint8_t * codes,
+                                bool include_listno = false) const = 0;
+
+    /// Sub-classes that encode the residuals can train their encoders here
+    /// does nothing by default
+    virtual void train_residual (idx_t n, const float *x);
+
+    /** search a set of vectors, that are pre-quantized by the IVF
+     *  quantizer. Fill in the corresponding heaps with the query
+     *  results. The default implementation uses InvertedListScanners
+     *  to do the search.
+     *
+     * @param n      nb of vectors to query
+     * @param x      query vectors, size nx * d
+     * @param assign coarse quantization indices, size nx * nprobe
+     * @param centroid_dis
+     *               distances to coarse centroids, size nx * nprobe
+     * @param distance
+     *               output distances, size n * k
+     * @param labels output labels, size n * k
+     * @param store_pairs store inv list index + inv list offset
+     *                     instead in upper/lower 32 bit of result,
+     *                     instead of ids (used for reranking).
+     * @param params used to override the object's search parameters
+     */
+    virtual void search_preassigned (idx_t n, const float *x, idx_t k,
+                                     const idx_t *assign,
+                                     const float *centroid_dis,
+                                     float *distances, idx_t *labels,
+                                     bool store_pairs,
+                                     const IVFSearchParameters *params=nullptr
+                                     ) const;
+
+    /** assign the vectors, then call search_preassign */
+    void search (idx_t n, const float *x, idx_t k,
+                 float *distances, idx_t *labels) const override;
+
+    void range_search (idx_t n, const float* x, float radius,
+                       RangeSearchResult* result) const override;
+
+    void range_search_preassigned(idx_t nx, const float *x, float radius,
+                                  const idx_t *keys, const float *coarse_dis,
+                                  RangeSearchResult *result) const;
+
+    /// get a scanner for this index (store_pairs means ignore labels)
+    virtual InvertedListScanner *get_InvertedListScanner (
+        bool store_pairs=false) const;
+
+    void reconstruct (idx_t key, float* recons) const override;
+
+    /** Reconstruct a subset of the indexed vectors.
+     *
+     * Overrides default implementation to bypass reconstruct() which requires
+     * direct_map to be maintained.
+     *
+     * @param i0     first vector to reconstruct
+     * @param ni     nb of vectors to reconstruct
+     * @param recons output array of reconstructed vectors, size ni * d
+     */
+    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
+
+    /** Similar to search, but also reconstructs the stored vectors (or an
+     * approximation in the case of lossy coding) for the search results.
+     *
+     * Overrides default implementation to avoid having to maintain direct_map
+     * and instead fetch the code offsets through the `store_pairs` flag in
+     * search_preassigned().
+     *
+     * @param recons      reconstructed vectors size (n, k, d)
+     */
+    void search_and_reconstruct (idx_t n, const float *x, idx_t k,
+                                 float *distances, idx_t *labels,
+                                 float *recons) const override;
+
+    /** Reconstruct a vector given the location in terms of (inv list index +
+     * inv list offset) instead of the id.
+     *
+     * Useful for reconstructing when the direct_map is not maintained and
+     * the inv list offset is computed by search_preassigned() with
+     * `store_pairs` set.
+     */
+    virtual void reconstruct_from_offset (int64_t list_no, int64_t offset,
+                                          float* recons) const;
+
+
+    /// Dataset manipulation functions
+
+    size_t remove_ids(const IDSelector& sel) override;
+
+    /** check that the two indexes are compatible (ie, they are
+     * trained in the same way and have the same
+     * parameters). Otherwise throw. */
+    void check_compatible_for_merge (const IndexIVF &other) const;
+
+    /** moves the entries from another dataset to self. On output,
+     * other is empty. add_id is added to all moved ids (for
+     * sequential ids, this would be this->ntotal */
+    virtual void merge_from (IndexIVF &other, idx_t add_id);
+
+    /** copy a subset of the entries index to the other index
+     *
+     * if subset_type == 0: copies ids in [a1, a2)
+     * if subset_type == 1: copies ids if id % a1 == a2
+     * if subset_type == 2: copies inverted lists such that a1
+     *                      elements are left before and a2 elements are after
+     */
+    virtual void copy_subset_to (IndexIVF & other, int subset_type,
+                                 idx_t a1, idx_t a2) const;
+
+    virtual void to_readonly();
+    virtual bool is_readonly() const;
+
+    virtual void backup_quantizer();
+
+    virtual void restore_quantizer();
+
+    ~IndexIVF() override;
+
+    size_t get_list_size (size_t list_no) const
+    { return invlists->list_size(list_no); }
+
+    /** intialize a direct map
+     *
+     * @param new_maintain_direct_map    if true, create a direct map,
+     *                                   else clear it
+     */
+    void make_direct_map (bool new_maintain_direct_map=true);
+
+    /// replace the inverted lists, old one is deallocated if own_invlists
+    void replace_invlists (InvertedLists *il, bool own=false);
+
+    /* The standalone codec interface (except sa_decode that is specific) */
+    size_t sa_code_size () const override;
+
+    void sa_encode (idx_t n, const float *x,
+                          uint8_t *bytes) const override;
+
+    void dump();
+
+    IndexIVF ();
+};
+
+struct RangeQueryResult;
+
+/** Object that handles a query. The inverted lists to scan are
+ * provided externally. The object has a lot of state, but
+ * distance_to_code and scan_codes can be called in multiple
+ * threads */
+struct InvertedListScanner {
+
+    using idx_t = Index::idx_t;
+
+    /// from now on we handle this query.
+    virtual void set_query (const float *query_vector) = 0;
+
+    /// following codes come from this inverted list
+    virtual void set_list (idx_t list_no, float coarse_dis) = 0;
+
+    /// compute a single query-to-code distance
+    virtual float distance_to_code (const uint8_t *code) const = 0;
+
+    /** scan a set of codes, compute distances to current query and
+     * update heap of results if necessary.
+     *
+     * @param n      number of codes to scan
+     * @param codes  codes to scan (n * code_size)
+     * @param ids        corresponding ids (ignored if store_pairs)
+     * @param distances  heap distances (size k)
+     * @param labels     heap labels (size k)
+     * @param k          heap size
+     * @return number of heap updates performed
+     */
+    virtual size_t scan_codes (size_t n,
+                               const uint8_t *codes,
+                               const idx_t *ids,
+                               float *distances, idx_t *labels,
+                               size_t k) const = 0;
+
+    /** scan a set of codes, compute distances to current query and
+     * update results if distances are below radius
+     *
+     * (default implementation fails) */
+    virtual void scan_codes_range (size_t n,
+                                   const uint8_t *codes,
+                                   const idx_t *ids,
+                                   float radius,
+                                   RangeQueryResult &result) const;
+
+    virtual ~InvertedListScanner () {}
+
+};
+
+
+struct IndexIVFStats {
+    size_t nq;       // nb of queries run
+    size_t nlist;    // nb of inverted lists scanned
+    size_t ndis;     // nb of distancs computed
+    size_t nheap_updates; // nb of times the heap was updated
+    double quantization_time; // time spent quantizing vectors (in ms)
+    double search_time;       // time spent searching lists (in ms)
+
+    IndexIVFStats () {reset (); }
+    void reset ();
+};
+
+// global var that collects them all
+extern IndexIVFStats indexIVF_stats;
+
+
+} // namespace faiss
+
+
+#endif
--- a/core/src/index/thirdparty/faiss/IndexIVFFlat.cpp
+++ b/core/src/index/thirdparty/faiss/IndexIVFFlat.cpp
@ -0,0 +1,502 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexIVFFlat.h>
+
+#include <cstdio>
+
+#include <faiss/IndexFlat.h>
+
+#include <faiss/utils/distances.h>
+#include <faiss/utils/utils.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/AuxIndexStructures.h>
+
+
+namespace faiss {
+
+
+/*****************************************
+ * IndexIVFFlat implementation
+ ******************************************/
+
+IndexIVFFlat::IndexIVFFlat (Index * quantizer,
+                            size_t d, size_t nlist, MetricType metric):
+    IndexIVF (quantizer, d, nlist, sizeof(float) * d, metric)
+{
+    code_size = sizeof(float) * d;
+}
+
+
+void IndexIVFFlat::add_with_ids (idx_t n, const float * x, const idx_t *xids)
+{
+    add_core (n, x, xids, nullptr);
+}
+
+void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
+                             const int64_t *precomputed_idx)
+
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    assert (invlists);
+    FAISS_THROW_IF_NOT_MSG (!(maintain_direct_map && xids),
+                            "cannot have direct map and add with ids");
+    const int64_t * idx;
+    ScopeDeleter<int64_t> del;
+
+    if (precomputed_idx) {
+        idx = precomputed_idx;
+    } else {
+        int64_t * idx0 = new int64_t [n];
+        del.set (idx0);
+        quantizer->assign (n, x, idx0);
+        idx = idx0;
+    }
+    int64_t n_add = 0;
+    for (size_t i = 0; i < n; i++) {
+        int64_t id = xids ? xids[i] : ntotal + i;
+        int64_t list_no = idx [i];
+
+        if (list_no < 0)
+            continue;
+        const float *xi = x + i * d;
+        size_t offset = invlists->add_entry (
+              list_no, id, (const uint8_t*) xi);
+
+        if (maintain_direct_map)
+            direct_map.push_back (list_no << 32 | offset);
+        n_add++;
+    }
+    if (verbose) {
+        printf("IndexIVFFlat::add_core: added %ld / %ld vectors\n",
+               n_add, n);
+    }
+    ntotal += n;
+}
+
+void IndexIVFFlat::encode_vectors(idx_t n, const float* x,
+                                  const idx_t * list_nos,
+                                  uint8_t * codes,
+                                  bool include_listnos) const
+{
+    if (!include_listnos) {
+        memcpy (codes, x, code_size * n);
+    } else {
+        size_t coarse_size = coarse_code_size ();
+        for (size_t i = 0; i < n; i++) {
+            int64_t list_no = list_nos [i];
+            uint8_t *code = codes + i * (code_size + coarse_size);
+            const float *xi = x + i * d;
+            if (list_no >= 0) {
+                encode_listno (list_no, code);
+                memcpy (code + coarse_size, xi, code_size);
+            } else {
+                memset (code, 0, code_size + coarse_size);
+            }
+
+        }
+    }
+}
+
+void IndexIVFFlat::sa_decode (idx_t n, const uint8_t *bytes,
+                                      float *x) const
+{
+    size_t coarse_size = coarse_code_size ();
+    for (size_t i = 0; i < n; i++) {
+        const uint8_t *code = bytes + i * (code_size + coarse_size);
+        float *xi = x + i * d;
+        memcpy (xi, code + coarse_size, code_size);
+    }
+}
+
+
+namespace {
+
+
+template<MetricType metric, class C>
+struct IVFFlatScanner: InvertedListScanner {
+    size_t d;
+    bool store_pairs;
+
+    IVFFlatScanner(size_t d, bool store_pairs):
+        d(d), store_pairs(store_pairs) {}
+
+    const float *xi;
+    void set_query (const float *query) override {
+        this->xi = query;
+    }
+
+    idx_t list_no;
+    void set_list (idx_t list_no, float /* coarse_dis */) override {
+        this->list_no = list_no;
+    }
+
+    float distance_to_code (const uint8_t *code) const override {
+        const float *yj = (float*)code;
+        float dis = metric == METRIC_INNER_PRODUCT ?
+            fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
+        return dis;
+    }
+
+    size_t scan_codes (size_t list_size,
+                       const uint8_t *codes,
+                       const idx_t *ids,
+                       float *simi, idx_t *idxi,
+                       size_t k) const override
+    {
+        const float *list_vecs = (const float*)codes;
+        size_t nup = 0;
+        for (size_t j = 0; j < list_size; j++) {
+            const float * yj = list_vecs + d * j;
+            float dis = metric == METRIC_INNER_PRODUCT ?
+                fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
+            if (C::cmp (simi[0], dis)) {
+                heap_pop<C> (k, simi, idxi);
+                int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
+                heap_push<C> (k, simi, idxi, dis, id);
+                nup++;
+            }
+        }
+        return nup;
+    }
+
+    void scan_codes_range (size_t list_size,
+                           const uint8_t *codes,
+                           const idx_t *ids,
+                           float radius,
+                           RangeQueryResult & res) const override
+    {
+        const float *list_vecs = (const float*)codes;
+        for (size_t j = 0; j < list_size; j++) {
+            const float * yj = list_vecs + d * j;
+            float dis = metric == METRIC_INNER_PRODUCT ?
+                fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
+            if (C::cmp (radius, dis)) {
+                int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
+                res.add (dis, id);
+            }
+        }
+    }
+
+
+};
+
+
+} // anonymous namespace
+
+
+
+InvertedListScanner* IndexIVFFlat::get_InvertedListScanner
+     (bool store_pairs) const
+{
+    if (metric_type == METRIC_INNER_PRODUCT) {
+        return new IVFFlatScanner<
+            METRIC_INNER_PRODUCT, CMin<float, int64_t> > (d, store_pairs);
+    } else if (metric_type == METRIC_L2) {
+        return new IVFFlatScanner<
+            METRIC_L2, CMax<float, int64_t> >(d, store_pairs);
+    } else {
+        FAISS_THROW_MSG("metric type not supported");
+    }
+    return nullptr;
+}
+
+
+
+void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
+{
+
+    FAISS_THROW_IF_NOT (maintain_direct_map);
+    FAISS_THROW_IF_NOT (is_trained);
+    std::vector<idx_t> assign (n);
+    quantizer->assign (n, x, assign.data());
+
+    for (size_t i = 0; i < n; i++) {
+        idx_t id = new_ids[i];
+        FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
+                                "id to update out of range");
+        { // remove old one
+            int64_t dm = direct_map[id];
+            int64_t ofs = dm & 0xffffffff;
+            int64_t il = dm >> 32;
+            size_t l = invlists->list_size (il);
+            if (ofs != l - 1) { // move l - 1 to ofs
+                int64_t id2 = invlists->get_single_id (il, l - 1);
+                direct_map[id2] = (il << 32) | ofs;
+                invlists->update_entry (il, ofs, id2,
+                                        invlists->get_single_code (il, l - 1));
+            }
+            invlists->resize (il, l - 1);
+        }
+        { // insert new one
+            int64_t il = assign[i];
+            size_t l = invlists->list_size (il);
+            int64_t dm = (il << 32) | l;
+            direct_map[id] = dm;
+            invlists->add_entry (il, id, (const uint8_t*)(x + i * d));
+        }
+    }
+
+}
+
+void IndexIVFFlat::reconstruct_from_offset (int64_t list_no, int64_t offset,
+                                            float* recons) const
+{
+    memcpy (recons, invlists->get_single_code (list_no, offset), code_size);
+}
+
+/*****************************************
+ * IndexIVFFlatDedup implementation
+ ******************************************/
+
+IndexIVFFlatDedup::IndexIVFFlatDedup (
+            Index * quantizer, size_t d, size_t nlist_,
+            MetricType metric_type):
+    IndexIVFFlat (quantizer, d, nlist_, metric_type)
+{}
+
+
+void IndexIVFFlatDedup::train(idx_t n, const float* x)
+{
+    std::unordered_map<uint64_t, idx_t> map;
+    float * x2 = new float [n * d];
+    ScopeDeleter<float> del (x2);
+
+    int64_t n2 = 0;
+    for (int64_t i = 0; i < n; i++) {
+        uint64_t hash = hash_bytes((uint8_t *)(x + i * d), code_size);
+        if (map.count(hash) &&
+            !memcmp (x2 + map[hash] * d, x + i * d, code_size)) {
+            // is duplicate, skip
+        } else {
+            map [hash] = n2;
+            memcpy (x2 + n2 * d, x + i * d, code_size);
+            n2 ++;
+        }
+    }
+    if (verbose) {
+        printf ("IndexIVFFlatDedup::train: train on %ld points after dedup "
+                "(was %ld points)\n", n2, n);
+    }
+    IndexIVFFlat::train (n2, x2);
+}
+
+
+
+void IndexIVFFlatDedup::add_with_ids(
+           idx_t na, const float* x, const idx_t* xids)
+{
+
+    FAISS_THROW_IF_NOT (is_trained);
+    assert (invlists);
+    FAISS_THROW_IF_NOT_MSG (
+           !maintain_direct_map,
+           "IVFFlatDedup not implemented with direct_map");
+    int64_t * idx = new int64_t [na];
+    ScopeDeleter<int64_t> del (idx);
+    quantizer->assign (na, x, idx);
+
+    int64_t n_add = 0, n_dup = 0;
+    // TODO make a omp loop with this
+    for (size_t i = 0; i < na; i++) {
+        idx_t id = xids ? xids[i] : ntotal + i;
+        int64_t list_no = idx [i];
+
+        if (list_no < 0) {
+            continue;
+        }
+        const float *xi = x + i * d;
+
+        // search if there is already an entry with that id
+        InvertedLists::ScopedCodes codes (invlists, list_no);
+
+        int64_t n = invlists->list_size (list_no);
+        int64_t offset = -1;
+        for (int64_t o = 0; o < n; o++) {
+            if (!memcmp (codes.get() + o * code_size,
+                         xi, code_size)) {
+                offset = o;
+                break;
+            }
+        }
+
+        if (offset == -1) { // not found
+            invlists->add_entry (list_no, id, (const uint8_t*) xi);
+        } else {
+            // mark equivalence
+            idx_t id2 = invlists->get_single_id (list_no, offset);
+            std::pair<idx_t, idx_t> pair (id2, id);
+            instances.insert (pair);
+            n_dup ++;
+        }
+        n_add++;
+    }
+    if (verbose) {
+        printf("IndexIVFFlat::add_with_ids: added %ld / %ld vectors"
+               " (out of which %ld are duplicates)\n",
+               n_add, na, n_dup);
+    }
+    ntotal += n_add;
+}
+
+void IndexIVFFlatDedup::search_preassigned (
+           idx_t n, const float *x, idx_t k,
+           const idx_t *assign,
+           const float *centroid_dis,
+           float *distances, idx_t *labels,
+           bool store_pairs,
+           const IVFSearchParameters *params) const
+{
+    FAISS_THROW_IF_NOT_MSG (
+           !store_pairs, "store_pairs not supported in IVFDedup");
+
+    IndexIVFFlat::search_preassigned (n, x, k, assign, centroid_dis,
+                                      distances, labels, false,
+                                      params);
+
+    std::vector <idx_t> labels2 (k);
+    std::vector <float> dis2 (k);
+
+    for (int64_t i = 0; i < n; i++) {
+        idx_t *labels1 = labels + i * k;
+        float *dis1 = distances + i * k;
+        int64_t j = 0;
+        for (; j < k; j++) {
+            if (instances.find (labels1[j]) != instances.end ()) {
+                // a duplicate: special handling
+                break;
+            }
+        }
+        if (j < k) {
+            // there are duplicates, special handling
+            int64_t j0 = j;
+            int64_t rp = j;
+            while (j < k) {
+                auto range = instances.equal_range (labels1[rp]);
+                float dis = dis1[rp];
+                labels2[j] = labels1[rp];
+                dis2[j] = dis;
+                j ++;
+                for (auto it = range.first; j < k && it != range.second; ++it) {
+                    labels2[j] = it->second;
+                    dis2[j] = dis;
+                    j++;
+                }
+                rp++;
+            }
+            memcpy (labels1 + j0, labels2.data() + j0,
+                    sizeof(labels1[0]) * (k - j0));
+            memcpy (dis1 + j0, dis2.data() + j0,
+                    sizeof(dis2[0]) * (k - j0));
+        }
+    }
+
+}
+
+
+size_t IndexIVFFlatDedup::remove_ids(const IDSelector& sel)
+{
+    std::unordered_map<idx_t, idx_t> replace;
+    std::vector<std::pair<idx_t, idx_t> > toadd;
+    for (auto it = instances.begin(); it != instances.end(); ) {
+        if (sel.is_member(it->first)) {
+            // then we erase this entry
+            if (!sel.is_member(it->second)) {
+                // if the second is not erased
+                if (replace.count(it->first) == 0) {
+                    replace[it->first] = it->second;
+                } else { // remember we should add an element
+                    std::pair<idx_t, idx_t> new_entry (
+                          replace[it->first], it->second);
+                    toadd.push_back(new_entry);
+                }
+            }
+            it = instances.erase(it);
+        } else {
+            if (sel.is_member(it->second)) {
+                it = instances.erase(it);
+            } else {
+                ++it;
+            }
+        }
+    }
+
+    instances.insert (toadd.begin(), toadd.end());
+
+    // mostly copied from IndexIVF.cpp
+
+    FAISS_THROW_IF_NOT_MSG (!maintain_direct_map,
+                    "direct map remove not implemented");
+
+    std::vector<int64_t> toremove(nlist);
+
+#pragma omp parallel for
+    for (int64_t i = 0; i < nlist; i++) {
+        int64_t l0 = invlists->list_size (i), l = l0, j = 0;
+        InvertedLists::ScopedIds idsi (invlists, i);
+        while (j < l) {
+            if (sel.is_member (idsi[j])) {
+                if (replace.count(idsi[j]) == 0) {
+                    l--;
+                    invlists->update_entry (
+                        i, j,
+                        invlists->get_single_id (i, l),
+                        InvertedLists::ScopedCodes (invlists, i, l).get());
+                } else {
+                    invlists->update_entry (
+                        i, j,
+                        replace[idsi[j]],
+                        InvertedLists::ScopedCodes (invlists, i, j).get());
+                    j++;
+                }
+            } else {
+                j++;
+            }
+        }
+        toremove[i] = l0 - l;
+    }
+    // this will not run well in parallel on ondisk because of possible shrinks
+    int64_t nremove = 0;
+    for (int64_t i = 0; i < nlist; i++) {
+        if (toremove[i] > 0) {
+            nremove += toremove[i];
+            invlists->resize(
+                i, invlists->list_size(i) - toremove[i]);
+        }
+    }
+    ntotal -= nremove;
+    return nremove;
+}
+
+
+void IndexIVFFlatDedup::range_search(
+        idx_t ,
+        const float* ,
+        float ,
+        RangeSearchResult* ) const
+{
+    FAISS_THROW_MSG ("not implemented");
+}
+
+void IndexIVFFlatDedup::update_vectors (int , idx_t *, const float *)
+{
+    FAISS_THROW_MSG ("not implemented");
+}
+
+
+void IndexIVFFlatDedup::reconstruct_from_offset (
+         int64_t , int64_t , float* ) const
+{
+    FAISS_THROW_MSG ("not implemented");
+}
+
+
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexIVFFlat.h
+++ b/core/src/index/thirdparty/faiss/IndexIVFFlat.h
@ -0,0 +1,118 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_IVF_FLAT_H
+#define FAISS_INDEX_IVF_FLAT_H
+
+#include <unordered_map>
+#include <stdint.h>
+
+#include <faiss/IndexIVF.h>
+
+
+namespace faiss {
+
+/** Inverted file with stored vectors. Here the inverted file
+ * pre-selects the vectors to be searched, but they are not otherwise
+ * encoded, the code array just contains the raw float entries.
+ */
+struct IndexIVFFlat: IndexIVF {
+
+    IndexIVFFlat (
+            Index * quantizer, size_t d, size_t nlist_,
+            MetricType = METRIC_L2);
+
+    /// same as add_with_ids, with precomputed coarse quantizer
+    virtual void add_core (idx_t n, const float * x, const int64_t *xids,
+                   const int64_t *precomputed_idx);
+
+    /// implemented for all IndexIVF* classes
+    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
+
+    void encode_vectors(idx_t n, const float* x,
+                        const idx_t *list_nos,
+                        uint8_t * codes,
+                        bool include_listnos=false) const override;
+
+
+    InvertedListScanner *get_InvertedListScanner (bool store_pairs)
+        const override;
+
+    /** Update a subset of vectors.
+     *
+     * The index must have a direct_map
+     *
+     * @param nv     nb of vectors to update
+     * @param idx    vector indices to update, size nv
+     * @param v      vectors of new values, size nv*d
+     */
+    virtual void update_vectors (int nv, idx_t *idx, const float *v);
+
+    void reconstruct_from_offset (int64_t list_no, int64_t offset,
+                                  float* recons) const override;
+
+    void sa_decode (idx_t n, const uint8_t *bytes,
+                            float *x) const override;
+
+    IndexIVFFlat () {}
+};
+
+
+struct IndexIVFFlatDedup: IndexIVFFlat {
+
+    /** Maps ids stored in the index to the ids of vectors that are
+     *  the same. When a vector is unique, it does not appear in the
+     *  instances map */
+    std::unordered_multimap <idx_t, idx_t> instances;
+
+    IndexIVFFlatDedup (
+            Index * quantizer, size_t d, size_t nlist_,
+            MetricType = METRIC_L2);
+
+    /// also dedups the training set
+    void train(idx_t n, const float* x) override;
+
+    /// implemented for all IndexIVF* classes
+    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
+
+    void search_preassigned (idx_t n, const float *x, idx_t k,
+                             const idx_t *assign,
+                             const float *centroid_dis,
+                             float *distances, idx_t *labels,
+                             bool store_pairs,
+                             const IVFSearchParameters *params=nullptr
+                             ) const override;
+
+    size_t remove_ids(const IDSelector& sel) override;
+
+    /// not implemented
+    void range_search(
+        idx_t n,
+        const float* x,
+        float radius,
+        RangeSearchResult* result) const override;
+
+    /// not implemented
+    void update_vectors (int nv, idx_t *idx, const float *v) override;
+
+
+    /// not implemented
+    void reconstruct_from_offset (int64_t list_no, int64_t offset,
+                                  float* recons) const override;
+
+    IndexIVFFlatDedup () {}
+
+
+};
+
+
+
+} // namespace faiss
+
+#endif
--- a/core/src/index/thirdparty/faiss/IndexIVFPQ.cpp
+++ b/core/src/index/thirdparty/faiss/IndexIVFPQ.cpp
--- a/core/src/index/thirdparty/faiss/IndexIVFPQ.h
+++ b/core/src/index/thirdparty/faiss/IndexIVFPQ.h
@ -0,0 +1,161 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_IVFPQ_H
+#define FAISS_INDEX_IVFPQ_H
+
+
+#include <vector>
+
+#include <faiss/IndexIVF.h>
+#include <faiss/IndexPQ.h>
+
+
+namespace faiss {
+
+struct IVFPQSearchParameters: IVFSearchParameters {
+    size_t scan_table_threshold;   ///< use table computation or on-the-fly?
+    int polysemous_ht;             ///< Hamming thresh for polysemous filtering
+    ~IVFPQSearchParameters () {}
+};
+
+
+/** Inverted file with Product Quantizer encoding. Each residual
+ * vector is encoded as a product quantizer code.
+ */
+struct IndexIVFPQ: IndexIVF {
+    bool by_residual;              ///< Encode residual or plain vector?
+
+    ProductQuantizer pq;           ///< produces the codes
+
+    bool do_polysemous_training;   ///< reorder PQ centroids after training?
+    PolysemousTraining *polysemous_training; ///< if NULL, use default
+
+    // search-time parameters
+    size_t scan_table_threshold;   ///< use table computation or on-the-fly?
+    int polysemous_ht;             ///< Hamming thresh for polysemous filtering
+
+    /** Precompute table that speed up query preprocessing at some
+     * memory cost
+     * =-1: force disable
+     * =0: decide heuristically (default: use tables only if they are
+     *     < precomputed_tables_max_bytes)
+     * =1: tables that work for all quantizers (size 256 * nlist * M)
+     * =2: specific version for MultiIndexQuantizer (much more compact)
+     */
+    int use_precomputed_table;     ///< if by_residual, build precompute tables
+    static size_t precomputed_table_max_bytes;
+
+    /// if use_precompute_table
+    /// size nlist * pq.M * pq.ksub
+    std::vector <float> precomputed_table;
+
+    IndexIVFPQ (
+            Index * quantizer, size_t d, size_t nlist,
+            size_t M, size_t nbits_per_idx);
+
+    void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
+        override;
+
+    void encode_vectors(idx_t n, const float* x,
+                        const idx_t *list_nos,
+                        uint8_t * codes,
+                        bool include_listnos = false) const override;
+
+    void sa_decode (idx_t n, const uint8_t *bytes,
+                    float *x) const override;
+
+
+    /// same as add_core, also:
+    /// - output 2nd level residuals if residuals_2 != NULL
+    /// - use precomputed list numbers if precomputed_idx != NULL
+    void add_core_o (idx_t n, const float *x,
+                     const idx_t *xids, float *residuals_2,
+                     const idx_t *precomputed_idx = nullptr);
+
+    /// trains the product quantizer
+    void train_residual(idx_t n, const float* x) override;
+
+    /// same as train_residual, also output 2nd level residuals
+    void train_residual_o (idx_t n, const float *x, float *residuals_2);
+
+    void reconstruct_from_offset (int64_t list_no, int64_t offset,
+                                  float* recons) const override;
+
+    /** Find exact duplicates in the dataset.
+     *
+     * the duplicates are returned in pre-allocated arrays (see the
+     * max sizes).
+     *
+     * @params lims   limits between groups of duplicates
+     *                (max size ntotal / 2 + 1)
+     * @params ids    ids[lims[i]] : ids[lims[i+1]-1] is a group of
+     *                duplicates (max size ntotal)
+     * @return n      number of groups found
+     */
+    size_t find_duplicates (idx_t *ids, size_t *lims) const;
+
+    // map a vector to a binary code knowning the index
+    void encode (idx_t key, const float * x, uint8_t * code) const;
+
+    /** Encode multiple vectors
+     *
+     * @param n       nb vectors to encode
+     * @param keys    posting list ids for those vectors (size n)
+     * @param x       vectors (size n * d)
+     * @param codes   output codes (size n * code_size)
+     * @param compute_keys  if false, assume keys are precomputed,
+     *                      otherwise compute them
+     */
+    void encode_multiple (size_t n, idx_t *keys,
+                          const float * x, uint8_t * codes,
+                          bool compute_keys = false) const;
+
+    /// inverse of encode_multiple
+    void decode_multiple (size_t n, const idx_t *keys,
+                          const uint8_t * xcodes, float * x) const;
+
+    InvertedListScanner *get_InvertedListScanner (bool store_pairs)
+        const override;
+
+    /// build precomputed table
+    void precompute_table ();
+
+    IndexIVFPQ ();
+
+};
+
+
+/// statistics are robust to internal threading, but not if
+/// IndexIVFPQ::search_preassigned is called by multiple threads
+struct IndexIVFPQStats {
+    size_t nrefine;  // nb of refines (IVFPQR)
+
+    size_t n_hamming_pass;
+    // nb of passed Hamming distance tests (for polysemous)
+
+    // timings measured with the CPU RTC
+    // on all threads
+    size_t search_cycles;
+    size_t refine_cycles; // only for IVFPQR
+
+    IndexIVFPQStats () {reset (); }
+    void reset ();
+};
+
+// global var that collects them all
+extern IndexIVFPQStats indexIVFPQ_stats;
+
+
+
+
+} // namespace faiss
+
+
+#endif
--- a/core/src/index/thirdparty/faiss/IndexIVFPQR.cpp
+++ b/core/src/index/thirdparty/faiss/IndexIVFPQR.cpp
@ -0,0 +1,219 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexIVFPQR.h>
+
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/utils.h>
+#include <faiss/utils/distances.h>
+
+#include <faiss/impl/FaissAssert.h>
+
+
+namespace faiss {
+
+/*****************************************
+ * IndexIVFPQR implementation
+ ******************************************/
+
+IndexIVFPQR::IndexIVFPQR (
+            Index * quantizer, size_t d, size_t nlist,
+            size_t M, size_t nbits_per_idx,
+            size_t M_refine, size_t nbits_per_idx_refine):
+    IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
+    refine_pq (d, M_refine, nbits_per_idx_refine),
+    k_factor (4)
+{
+    by_residual = true;
+}
+
+IndexIVFPQR::IndexIVFPQR ():
+    k_factor (1)
+{
+    by_residual = true;
+}
+
+
+
+void IndexIVFPQR::reset()
+{
+    IndexIVFPQ::reset();
+    refine_codes.clear();
+}
+
+
+
+
+void IndexIVFPQR::train_residual (idx_t n, const float *x)
+{
+
+    float * residual_2 = new float [n * d];
+    ScopeDeleter <float> del(residual_2);
+
+    train_residual_o (n, x, residual_2);
+
+    if (verbose)
+        printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
+                refine_pq.M, refine_pq.ksub, n, d);
+
+    refine_pq.cp.max_points_per_centroid = 1000;
+    refine_pq.cp.verbose = verbose;
+
+    refine_pq.train (n, residual_2);
+
+}
+
+
+void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const idx_t *xids) {
+    add_core (n, x, xids, nullptr);
+}
+
+void IndexIVFPQR::add_core (idx_t n, const float *x, const idx_t *xids,
+                                const idx_t *precomputed_idx) {
+
+    float * residual_2 = new float [n * d];
+    ScopeDeleter <float> del(residual_2);
+
+    idx_t n0 = ntotal;
+
+    add_core_o (n, x, xids, residual_2, precomputed_idx);
+
+    refine_codes.resize (ntotal * refine_pq.code_size);
+
+    refine_pq.compute_codes (
+        residual_2, &refine_codes[n0 * refine_pq.code_size], n);
+
+
+}
+#define TIC t0 = get_cycles()
+#define TOC get_cycles () - t0
+
+
+void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k,
+                                      const idx_t *idx,
+                                      const float *L1_dis,
+                                      float *distances, idx_t *labels,
+                                      bool store_pairs,
+                                      const IVFSearchParameters *params
+                                      ) const
+{
+    uint64_t t0;
+    TIC;
+    size_t k_coarse = long(k * k_factor);
+    idx_t *coarse_labels = new idx_t [k_coarse * n];
+    ScopeDeleter<idx_t> del1 (coarse_labels);
+    { // query with quantizer levels 1 and 2.
+        float *coarse_distances = new float [k_coarse * n];
+        ScopeDeleter<float> del(coarse_distances);
+
+        IndexIVFPQ::search_preassigned (
+                   n, x, k_coarse,
+                   idx, L1_dis, coarse_distances, coarse_labels,
+                   true, params);
+    }
+
+
+    indexIVFPQ_stats.search_cycles += TOC;
+
+    TIC;
+
+    // 3rd level refinement
+    size_t n_refine = 0;
+#pragma omp parallel reduction(+ : n_refine)
+    {
+        // tmp buffers
+        float *residual_1 = new float [2 * d];
+        ScopeDeleter<float> del (residual_1);
+        float *residual_2 = residual_1 + d;
+#pragma omp for
+        for (idx_t i = 0; i < n; i++) {
+            const float *xq = x + i * d;
+            const idx_t * shortlist = coarse_labels + k_coarse * i;
+            float * heap_sim = distances + k * i;
+            idx_t * heap_ids = labels + k * i;
+            maxheap_heapify (k, heap_sim, heap_ids);
+
+            for (int j = 0; j < k_coarse; j++) {
+                idx_t sl = shortlist[j];
+
+                if (sl == -1) continue;
+
+                int list_no = sl >> 32;
+                int ofs = sl & 0xffffffff;
+
+                assert (list_no >= 0 && list_no < nlist);
+                assert (ofs >= 0 && ofs < invlists->list_size (list_no));
+
+                // 1st level residual
+                quantizer->compute_residual (xq, residual_1, list_no);
+
+                // 2nd level residual
+                const uint8_t * l2code =
+                    invlists->get_single_code (list_no, ofs);
+
+                pq.decode (l2code, residual_2);
+                for (int l = 0; l < d; l++)
+                    residual_2[l] = residual_1[l] - residual_2[l];
+
+                // 3rd level residual's approximation
+                idx_t id = invlists->get_single_id (list_no, ofs);
+                assert (0 <= id && id < ntotal);
+                refine_pq.decode (&refine_codes [id * refine_pq.code_size],
+                                  residual_1);
+
+                float dis = fvec_L2sqr (residual_1, residual_2, d);
+
+                if (dis < heap_sim[0]) {
+                    maxheap_pop (k, heap_sim, heap_ids);
+                    idx_t id_or_pair = store_pairs ? sl : id;
+                    maxheap_push (k, heap_sim, heap_ids, dis, id_or_pair);
+                }
+                n_refine ++;
+            }
+            maxheap_reorder (k, heap_sim, heap_ids);
+        }
+    }
+    indexIVFPQ_stats.nrefine += n_refine;
+    indexIVFPQ_stats.refine_cycles += TOC;
+}
+
+void IndexIVFPQR::reconstruct_from_offset (int64_t list_no, int64_t offset,
+                                           float* recons) const
+{
+    IndexIVFPQ::reconstruct_from_offset (list_no, offset, recons);
+
+    idx_t id = invlists->get_single_id (list_no, offset);
+    assert (0 <= id && id < ntotal);
+
+    std::vector<float> r3(d);
+    refine_pq.decode (&refine_codes [id * refine_pq.code_size], r3.data());
+    for (int i = 0; i < d; ++i) {
+      recons[i] += r3[i];
+    }
+}
+
+void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id)
+{
+    IndexIVFPQR *other = dynamic_cast<IndexIVFPQR *> (&other_in);
+    FAISS_THROW_IF_NOT(other);
+
+    IndexIVF::merge_from (other_in, add_id);
+
+    refine_codes.insert (refine_codes.end(),
+                         other->refine_codes.begin(),
+                         other->refine_codes.end());
+    other->refine_codes.clear();
+}
+
+size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
+  FAISS_THROW_MSG("not implemented");
+  return 0;
+}
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexIVFPQR.h
+++ b/core/src/index/thirdparty/faiss/IndexIVFPQR.h
@ -0,0 +1,65 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#pragma once
+
+#include <vector>
+
+#include <faiss/IndexIVFPQ.h>
+
+
+namespace faiss {
+
+
+
+/** Index with an additional level of PQ refinement */
+struct IndexIVFPQR: IndexIVFPQ {
+    ProductQuantizer refine_pq;           ///< 3rd level quantizer
+    std::vector <uint8_t> refine_codes;   ///< corresponding codes
+
+    /// factor between k requested in search and the k requested from the IVFPQ
+    float k_factor;
+
+    IndexIVFPQR (
+            Index * quantizer, size_t d, size_t nlist,
+            size_t M, size_t nbits_per_idx,
+            size_t M_refine, size_t nbits_per_idx_refine);
+
+    void reset() override;
+
+    size_t remove_ids(const IDSelector& sel) override;
+
+    /// trains the two product quantizers
+    void train_residual(idx_t n, const float* x) override;
+
+    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
+
+    /// same as add_with_ids, but optionally use the precomputed list ids
+    void add_core (idx_t n, const float *x, const idx_t *xids,
+                     const idx_t *precomputed_idx = nullptr);
+
+    void reconstruct_from_offset (int64_t list_no, int64_t offset,
+                                  float* recons) const override;
+
+    void merge_from (IndexIVF &other, idx_t add_id) override;
+
+
+    void search_preassigned (idx_t n, const float *x, idx_t k,
+                             const idx_t *assign,
+                             const float *centroid_dis,
+                             float *distances, idx_t *labels,
+                             bool store_pairs,
+                             const IVFSearchParameters *params=nullptr
+                             ) const override;
+
+    IndexIVFPQR();
+};
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexIVFSpectralHash.cpp
+++ b/core/src/index/thirdparty/faiss/IndexIVFSpectralHash.cpp
@ -0,0 +1,331 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+
+#include <faiss/IndexIVFSpectralHash.h>
+
+#include <memory>
+#include <algorithm>
+#include <stdint.h>
+
+#include <faiss/utils/hamming.h>
+#include <faiss/utils/utils.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/VectorTransform.h>
+
+namespace faiss {
+
+
+IndexIVFSpectralHash::IndexIVFSpectralHash (
+        Index * quantizer, size_t d, size_t nlist,
+        int nbit, float period):
+    IndexIVF (quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
+    nbit (nbit), period (period), threshold_type (Thresh_global)
+{
+    FAISS_THROW_IF_NOT (code_size % 4 == 0);
+    RandomRotationMatrix *rr = new RandomRotationMatrix (d, nbit);
+    rr->init (1234);
+    vt = rr;
+    own_fields = true;
+    is_trained = false;
+}
+
+IndexIVFSpectralHash::IndexIVFSpectralHash():
+    IndexIVF(), vt(nullptr), own_fields(false),
+    nbit(0), period(0), threshold_type(Thresh_global)
+{}
+
+IndexIVFSpectralHash::~IndexIVFSpectralHash ()
+{
+    if (own_fields) {
+        delete vt;
+    }
+}
+
+namespace {
+
+
+float median (size_t n, float *x) {
+    std::sort(x, x + n);
+    if (n % 2 == 1) {
+        return x [n / 2];
+    } else {
+        return (x [n / 2 - 1] + x [n / 2]) / 2;
+    }
+}
+
+}
+
+
+void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
+{
+    if (!vt->is_trained) {
+        vt->train (n, x);
+    }
+
+    if (threshold_type == Thresh_global) {
+        // nothing to do
+        return;
+    } else if (threshold_type == Thresh_centroid ||
+        threshold_type == Thresh_centroid_half) {
+        // convert all centroids with vt
+        std::vector<float> centroids (nlist * d);
+        quantizer->reconstruct_n (0, nlist, centroids.data());
+        trained.resize(nlist * nbit);
+        vt->apply_noalloc (nlist, centroids.data(), trained.data());
+        if (threshold_type == Thresh_centroid_half) {
+            for (size_t i = 0; i < nlist * nbit; i++) {
+                trained[i] -= 0.25 * period;
+            }
+        }
+        return;
+    }
+    // otherwise train medians
+
+    // assign
+    std::unique_ptr<idx_t []> idx (new idx_t [n]);
+    quantizer->assign (n, x, idx.get());
+
+    std::vector<size_t> sizes(nlist + 1);
+    for (size_t i = 0; i < n; i++) {
+        FAISS_THROW_IF_NOT (idx[i] >= 0);
+        sizes[idx[i]]++;
+    }
+
+    size_t ofs = 0;
+    for (int j = 0; j < nlist; j++) {
+        size_t o0 = ofs;
+        ofs += sizes[j];
+        sizes[j] = o0;
+    }
+
+    // transform
+    std::unique_ptr<float []> xt (vt->apply (n, x));
+
+    // transpose + reorder
+    std::unique_ptr<float []> xo (new float[n * nbit]);
+
+    for (size_t i = 0; i < n; i++) {
+        size_t idest = sizes[idx[i]]++;
+        for (size_t j = 0; j < nbit; j++) {
+            xo[idest + n * j] = xt[i * nbit + j];
+        }
+    }
+
+    trained.resize (n * nbit);
+    // compute medians
+#pragma omp for
+    for (int i = 0; i < nlist; i++) {
+        size_t i0 = i == 0 ? 0 : sizes[i - 1];
+        size_t i1 = sizes[i];
+        for (int j = 0; j < nbit; j++) {
+            float *xoi = xo.get() + i0 + n * j;
+            if (i0 == i1) { // nothing to train
+                trained[i * nbit + j] = 0.0;
+            } else if (i1 == i0 + 1) {
+                trained[i * nbit + j] = xoi[0];
+            } else {
+                trained[i * nbit + j] = median(i1 - i0, xoi);
+            }
+        }
+    }
+}
+
+
+namespace {
+
+void binarize_with_freq(size_t nbit, float freq,
+                        const float *x, const float *c,
+                        uint8_t *codes)
+{
+    memset (codes, 0, (nbit + 7) / 8);
+    for (size_t i = 0; i < nbit; i++) {
+        float xf = (x[i] - c[i]);
+        int xi = int(floor(xf * freq));
+        int bit = xi & 1;
+        codes[i >> 3] |= bit << (i & 7);
+    }
+}
+
+
+};
+
+
+
+void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in,
+                                          const idx_t *list_nos,
+                                          uint8_t * codes,
+                                          bool include_listnos) const
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    float freq = 2.0 / period;
+
+    FAISS_THROW_IF_NOT_MSG (!include_listnos, "listnos encoding not supported");
+
+    // transform with vt
+    std::unique_ptr<float []> x (vt->apply (n, x_in));
+
+#pragma omp parallel
+    {
+        std::vector<float> zero (nbit);
+
+        // each thread takes care of a subset of lists
+#pragma omp for
+        for (size_t i = 0; i < n; i++) {
+            int64_t list_no = list_nos [i];
+
+            if (list_no >= 0) {
+                const float *c;
+                if (threshold_type == Thresh_global) {
+                    c = zero.data();
+                } else {
+                    c = trained.data() + list_no * nbit;
+                }
+                binarize_with_freq (nbit, freq,
+                                    x.get() + i * nbit, c,
+                                    codes + i * code_size) ;
+            }
+        }
+    }
+}
+
+namespace {
+
+
+template<class HammingComputer>
+struct IVFScanner: InvertedListScanner {
+
+    // copied from index structure
+    const IndexIVFSpectralHash *index;
+    size_t code_size;
+    size_t nbit;
+    bool store_pairs;
+
+    float period, freq;
+    std::vector<float> q;
+    std::vector<float> zero;
+    std::vector<uint8_t> qcode;
+    HammingComputer hc;
+
+    using idx_t = Index::idx_t;
+
+    IVFScanner (const IndexIVFSpectralHash * index,
+                bool store_pairs):
+        index (index),
+        code_size(index->code_size),
+        nbit(index->nbit),
+        store_pairs(store_pairs),
+        period(index->period), freq(2.0 / index->period),
+        q(nbit), zero(nbit), qcode(code_size),
+        hc(qcode.data(), code_size)
+    {
+    }
+
+
+    void set_query (const float *query) override {
+        FAISS_THROW_IF_NOT(query);
+        FAISS_THROW_IF_NOT(q.size() == nbit);
+        index->vt->apply_noalloc (1, query, q.data());
+
+        if (index->threshold_type ==
+            IndexIVFSpectralHash::Thresh_global) {
+            binarize_with_freq
+                (nbit, freq, q.data(), zero.data(), qcode.data());
+            hc.set (qcode.data(), code_size);
+        }
+    }
+
+    idx_t list_no;
+
+    void set_list (idx_t list_no, float /*coarse_dis*/) override {
+        this->list_no = list_no;
+        if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
+            const float *c = index->trained.data() + list_no * nbit;
+            binarize_with_freq (nbit, freq, q.data(), c, qcode.data());
+            hc.set (qcode.data(), code_size);
+        }
+    }
+
+    float distance_to_code (const uint8_t *code) const final {
+        return hc.hamming (code);
+    }
+
+    size_t scan_codes (size_t list_size,
+                       const uint8_t *codes,
+                       const idx_t *ids,
+                       float *simi, idx_t *idxi,
+                       size_t k) const override
+    {
+        size_t nup = 0;
+        for (size_t j = 0; j < list_size; j++) {
+
+            float dis = hc.hamming (codes);
+
+            if (dis < simi [0]) {
+                maxheap_pop (k, simi, idxi);
+                int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
+                maxheap_push (k, simi, idxi, dis, id);
+                nup++;
+            }
+            codes += code_size;
+        }
+        return nup;
+    }
+
+    void scan_codes_range (size_t list_size,
+                           const uint8_t *codes,
+                           const idx_t *ids,
+                           float radius,
+                           RangeQueryResult & res) const override
+    {
+        for (size_t j = 0; j < list_size; j++) {
+            float dis = hc.hamming (codes);
+            if (dis < radius) {
+                int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
+                res.add (dis, id);
+            }
+            codes += code_size;
+        }
+    }
+
+
+};
+
+} // anonymous namespace
+
+InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
+    (bool store_pairs) const
+{
+    switch (code_size) {
+#define HANDLE_CODE_SIZE(cs) \
+    case cs: \
+        return new IVFScanner<HammingComputer ## cs> (this, store_pairs)
+        HANDLE_CODE_SIZE(4);
+        HANDLE_CODE_SIZE(8);
+        HANDLE_CODE_SIZE(16);
+        HANDLE_CODE_SIZE(20);
+        HANDLE_CODE_SIZE(32);
+        HANDLE_CODE_SIZE(64);
+#undef HANDLE_CODE_SIZE
+        default:
+            if (code_size % 8 == 0) {
+                return new IVFScanner<HammingComputerM8>(this, store_pairs);
+            } else if (code_size % 4 == 0) {
+                return new IVFScanner<HammingComputerM4>(this, store_pairs);
+            } else {
+                FAISS_THROW_MSG("not supported");
+            }
+    }
+
+}
+
+
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexIVFSpectralHash.h
+++ b/core/src/index/thirdparty/faiss/IndexIVFSpectralHash.h
@ -0,0 +1,75 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_IVFSH_H
+#define FAISS_INDEX_IVFSH_H
+
+
+#include <vector>
+
+#include <faiss/IndexIVF.h>
+
+
+namespace faiss {
+
+struct VectorTransform;
+
+/** Inverted list that stores binary codes of size nbit. Before the
+ * binary conversion, the dimension of the vectors is transformed from
+ * dim d into dim nbit by vt (a random rotation by default).
+ *
+ * Each coordinate is subtracted from a value determined by
+ * threshold_type, and split into intervals of size period. Half of
+ * the interval is a 0 bit, the other half a 1.
+ */
+struct IndexIVFSpectralHash: IndexIVF {
+
+    VectorTransform *vt; // transformation from d to nbit dim
+    bool own_fields;
+
+    int nbit;
+    float period;
+
+    enum ThresholdType {
+        Thresh_global,
+        Thresh_centroid,
+        Thresh_centroid_half,
+        Thresh_median
+    };
+    ThresholdType threshold_type;
+
+    // size nlist * nbit or 0 if Thresh_global
+    std::vector<float> trained;
+
+    IndexIVFSpectralHash (Index * quantizer, size_t d, size_t nlist,
+                          int nbit, float period);
+
+    IndexIVFSpectralHash ();
+
+    void train_residual(idx_t n, const float* x) override;
+
+    void encode_vectors(idx_t n, const float* x,
+                        const idx_t *list_nos,
+                        uint8_t * codes,
+                        bool include_listnos = false) const override;
+
+    InvertedListScanner *get_InvertedListScanner (bool store_pairs)
+        const override;
+
+    ~IndexIVFSpectralHash () override;
+
+};
+
+
+
+
+}; // namespace faiss
+
+
+#endif
--- a/core/src/index/thirdparty/faiss/IndexLSH.cpp
+++ b/core/src/index/thirdparty/faiss/IndexLSH.cpp
@ -0,0 +1,225 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#include <faiss/IndexLSH.h>
+
+#include <cstdio>
+#include <cstring>
+
+#include <algorithm>
+
+#include <faiss/utils/utils.h>
+#include <faiss/utils/hamming.h>
+#include <faiss/impl/FaissAssert.h>
+
+
+namespace faiss {
+
+/***************************************************************
+ * IndexLSH
+ ***************************************************************/
+
+
+IndexLSH::IndexLSH (idx_t d, int nbits, bool rotate_data, bool train_thresholds):
+    Index(d), nbits(nbits), rotate_data(rotate_data),
+    train_thresholds (train_thresholds), rrot(d, nbits)
+{
+    is_trained = !train_thresholds;
+
+    bytes_per_vec = (nbits + 7) / 8;
+
+    if (rotate_data) {
+        rrot.init(5);
+    } else {
+        FAISS_THROW_IF_NOT (d >= nbits);
+    }
+}
+
+IndexLSH::IndexLSH ():
+    nbits (0), bytes_per_vec(0), rotate_data (false), train_thresholds (false)
+{
+}
+
+
+const float * IndexLSH::apply_preprocess (idx_t n, const float *x) const
+{
+
+    float *xt = nullptr;
+    if (rotate_data) {
+        // also applies bias if exists
+        xt = rrot.apply (n, x);
+    } else if (d != nbits) {
+        assert (nbits < d);
+        xt = new float [nbits * n];
+        float *xp = xt;
+        for (idx_t i = 0; i < n; i++) {
+            const float *xl = x + i * d;
+            for (int j = 0; j < nbits; j++)
+                *xp++ = xl [j];
+        }
+    }
+
+    if (train_thresholds) {
+
+        if (xt == NULL) {
+            xt = new float [nbits * n];
+            memcpy (xt, x, sizeof(*x) * n * nbits);
+        }
+
+        float *xp = xt;
+        for (idx_t i = 0; i < n; i++)
+            for (int j = 0; j < nbits; j++)
+                *xp++ -= thresholds [j];
+    }
+
+    return xt ? xt : x;
+}
+
+
+
+void IndexLSH::train (idx_t n, const float *x)
+{
+    if (train_thresholds) {
+        thresholds.resize (nbits);
+        train_thresholds = false;
+        const float *xt = apply_preprocess (n, x);
+        ScopeDeleter<float> del (xt == x ? nullptr : xt);
+        train_thresholds = true;
+
+        float * transposed_x = new float [n * nbits];
+        ScopeDeleter<float> del2 (transposed_x);
+
+        for (idx_t i = 0; i < n; i++)
+            for (idx_t j = 0; j < nbits; j++)
+                transposed_x [j * n + i] = xt [i * nbits + j];
+
+        for (idx_t i = 0; i < nbits; i++) {
+            float *xi = transposed_x + i * n;
+            // std::nth_element
+            std::sort (xi, xi + n);
+            if (n % 2 == 1)
+                thresholds [i] = xi [n / 2];
+            else
+                thresholds [i] = (xi [n / 2 - 1] + xi [n / 2]) / 2;
+
+        }
+    }
+    is_trained = true;
+}
+
+
+void IndexLSH::add (idx_t n, const float *x)
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    codes.resize ((ntotal + n) * bytes_per_vec);
+
+    sa_encode (n, x, &codes[ntotal * bytes_per_vec]);
+
+    ntotal += n;
+}
+
+
+void IndexLSH::search (
+        idx_t n,
+        const float *x,
+        idx_t k,
+        float *distances,
+        idx_t *labels) const
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    const float *xt = apply_preprocess (n, x);
+    ScopeDeleter<float> del (xt == x ? nullptr : xt);
+
+    uint8_t * qcodes = new uint8_t [n * bytes_per_vec];
+    ScopeDeleter<uint8_t> del2 (qcodes);
+
+    fvecs2bitvecs (xt, qcodes, nbits, n);
+
+    int * idistances = new int [n * k];
+    ScopeDeleter<int> del3 (idistances);
+
+    int_maxheap_array_t res = { size_t(n), size_t(k), labels, idistances};
+
+    hammings_knn_hc (&res, qcodes, codes.data(),
+                     ntotal, bytes_per_vec, true);
+
+
+    // convert distances to floats
+    for (int i = 0; i < k * n; i++)
+        distances[i] = idistances[i];
+
+}
+
+
+void IndexLSH::transfer_thresholds (LinearTransform *vt) {
+    if (!train_thresholds) return;
+    FAISS_THROW_IF_NOT (nbits == vt->d_out);
+    if (!vt->have_bias) {
+        vt->b.resize (nbits, 0);
+        vt->have_bias = true;
+    }
+    for (int i = 0; i < nbits; i++)
+        vt->b[i] -= thresholds[i];
+    train_thresholds = false;
+    thresholds.clear();
+}
+
+void IndexLSH::reset() {
+    codes.clear();
+    ntotal = 0;
+}
+
+
+size_t IndexLSH::sa_code_size () const
+{
+    return bytes_per_vec;
+}
+
+void IndexLSH::sa_encode (idx_t n, const float *x,
+                                uint8_t *bytes) const
+{
+    FAISS_THROW_IF_NOT (is_trained);
+    const float *xt = apply_preprocess (n, x);
+    ScopeDeleter<float> del (xt == x ? nullptr : xt);
+    fvecs2bitvecs (xt, bytes, nbits, n);
+}
+
+void IndexLSH::sa_decode (idx_t n, const uint8_t *bytes,
+                                  float *x) const
+{
+    float *xt = x;
+    ScopeDeleter<float> del;
+    if (rotate_data || nbits != d) {
+        xt = new float [n * nbits];
+        del.set(xt);
+    }
+    bitvecs2fvecs (bytes, xt, nbits, n);
+
+    if (train_thresholds) {
+        float *xp = xt;
+        for (idx_t i = 0; i < n; i++) {
+            for (int j = 0; j < nbits; j++) {
+                *xp++ += thresholds [j];
+            }
+        }
+    }
+
+    if (rotate_data) {
+        rrot.reverse_transform (n, xt, x);
+    } else if (nbits != d) {
+        for (idx_t i = 0; i < n; i++) {
+            memcpy (x + i * d, xt + i * nbits,
+                    nbits * sizeof(xt[0]));
+        }
+    }
+}
+
+
+
+} // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexLSH.h
+++ b/core/src/index/thirdparty/faiss/IndexLSH.h
@ -0,0 +1,87 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef INDEX_LSH_H
+#define INDEX_LSH_H
+
+#include <vector>
+
+#include <faiss/Index.h>
+#include <faiss/VectorTransform.h>
+
+namespace faiss {
+
+
+/** The sign of each vector component is put in a binary signature */
+struct IndexLSH:Index {
+    typedef unsigned char uint8_t;
+
+    int nbits;              ///< nb of bits per vector
+    int bytes_per_vec;      ///< nb of 8-bits per encoded vector
+    bool rotate_data;       ///< whether to apply a random rotation to input
+    bool train_thresholds;  ///< whether we train thresholds or use 0
+
+    RandomRotationMatrix rrot; ///< optional random rotation
+
+    std::vector <float> thresholds; ///< thresholds to compare with
+
+    /// encoded dataset
+    std::vector<uint8_t> codes;
+
+    IndexLSH (
+            idx_t d, int nbits,
+            bool rotate_data = true,
+            bool train_thresholds = false);
+
+    /** Preprocesses and resizes the input to the size required to
+     * binarize the data
+     *
+     * @param x input vectors, size n * d
+     * @return output vectors, size n * bits. May be the same pointer
+     *         as x, otherwise it should be deleted by the caller
+     */
+    const float *apply_preprocess (idx_t n, const float *x) const;
+
+    void train(idx_t n, const float* x) override;
+
+    void add(idx_t n, const float* x) override;
+
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+
+    void reset() override;
+
+    /// transfer the thresholds to a pre-processing stage (and unset
+    /// train_thresholds)
+    void transfer_thresholds (LinearTransform * vt);
+
+    ~IndexLSH() override {}
+
+    IndexLSH ();
+
+    /* standalone codec interface */
+    size_t sa_code_size () const override;
+
+    void sa_encode (idx_t n, const float *x,
+                          uint8_t *bytes) const override;
+
+    void sa_decode (idx_t n, const uint8_t *bytes,
+                            float *x) const override;
+
+};
+
+
+}
+
+
+#endif
--- a/core/src/index/thirdparty/faiss/IndexLattice.cpp
+++ b/core/src/index/thirdparty/faiss/IndexLattice.cpp
@ -0,0 +1,143 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+
+#include <faiss/IndexLattice.h>
+#include <faiss/utils/hamming.h>    // for the bitstring routines
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/distances.h>
+
+namespace faiss {
+
+
+IndexLattice::IndexLattice (idx_t d, int nsq, int scale_nbit, int r2):
+    Index (d),
+    nsq (nsq),
+    dsq (d / nsq),
+    zn_sphere_codec (dsq, r2),
+    scale_nbit (scale_nbit)
+{
+    FAISS_THROW_IF_NOT (d % nsq == 0);
+
+    lattice_nbit = 0;
+    while (!( ((uint64_t)1 << lattice_nbit) >= zn_sphere_codec.nv)) {
+        lattice_nbit++;
+    }
+
+    int total_nbit = (lattice_nbit + scale_nbit) * nsq;
+
+    code_size = (total_nbit + 7) / 8;
+
+    is_trained = false;
+}
+
+void IndexLattice::train(idx_t n, const float* x)
+{
+    // compute ranges per sub-block
+    trained.resize (nsq * 2);
+    float * mins = trained.data();
+    float * maxs = trained.data() + nsq;
+    for (int sq = 0; sq < nsq; sq++) {
+        mins[sq] = HUGE_VAL;
+        maxs[sq] = -1;
+    }
+
+    for (idx_t i = 0; i < n; i++) {
+        for (int sq = 0; sq < nsq; sq++) {
+            float norm2 = fvec_norm_L2sqr (x + i * d + sq * dsq, dsq);
+            if (norm2 > maxs[sq]) maxs[sq] = norm2;
+            if (norm2 < mins[sq]) mins[sq] = norm2;
+        }
+    }
+
+    for (int sq = 0; sq < nsq; sq++) {
+        mins[sq] = sqrtf (mins[sq]);
+        maxs[sq] = sqrtf (maxs[sq]);
+    }
+
+    is_trained = true;
+}
+
+/* The standalone codec interface */
+size_t IndexLattice::sa_code_size () const
+{
+    return code_size;
+}
+
+
+
+void IndexLattice::sa_encode (idx_t n, const float *x, uint8_t *codes) const
+{
+
+    const float * mins = trained.data();
+    const float * maxs = mins + nsq;
+    int64_t sc = int64_t(1) << scale_nbit;
+
+#pragma omp parallel for
+    for (idx_t i = 0; i < n; i++) {
+        BitstringWriter wr(codes + i * code_size, code_size);
+        const float *xi = x + i * d;
+        for (int j = 0; j < nsq; j++) {
+            float nj =
+                (sqrtf(fvec_norm_L2sqr(xi, dsq)) - mins[j])
+                * sc / (maxs[j] - mins[j]);
+            if (nj < 0) nj = 0;
+            if (nj >= sc) nj = sc - 1;
+            wr.write((int64_t)nj, scale_nbit);
+            wr.write(zn_sphere_codec.encode(xi), lattice_nbit);
+            xi += dsq;
+        }
+    }
+}
+
+void IndexLattice::sa_decode (idx_t n, const uint8_t *codes, float *x) const
+{
+    const float * mins = trained.data();
+    const float * maxs = mins + nsq;
+    float sc = int64_t(1) << scale_nbit;
+    float r = sqrtf(zn_sphere_codec.r2);
+
+#pragma omp parallel for
+    for (idx_t i = 0; i < n; i++) {
+        BitstringReader rd(codes + i * code_size, code_size);
+        float *xi = x + i * d;
+        for (int j = 0; j < nsq; j++) {
+            float norm =
+                (rd.read (scale_nbit) + 0.5) *
+                (maxs[j] - mins[j]) / sc + mins[j];
+            norm /= r;
+            zn_sphere_codec.decode (rd.read (lattice_nbit), xi);
+            for (int l = 0; l < dsq; l++) {
+                xi[l] *= norm;
+            }
+            xi += dsq;
+        }
+    }
+}
+
+void IndexLattice::add(idx_t , const float* )
+{
+    FAISS_THROW_MSG("not implemented");
+}
+
+
+void  IndexLattice::search(idx_t , const float* , idx_t ,
+                           float* , idx_t* ) const
+{
+    FAISS_THROW_MSG("not implemented");
+}
+
+
+void IndexLattice::reset()
+{
+    FAISS_THROW_MSG("not implemented");
+}
+
+
+}  // namespace faiss
--- a/core/src/index/thirdparty/faiss/IndexLattice.h
+++ b/core/src/index/thirdparty/faiss/IndexLattice.h
@ -0,0 +1,68 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_LATTICE_H
+#define FAISS_INDEX_LATTICE_H
+
+
+#include <vector>
+
+#include <faiss/IndexIVF.h>
+#include <faiss/impl/lattice_Zn.h>
+
+namespace faiss {
+
+
+
+
+
+/** Index that encodes a vector with a series of Zn lattice quantizers
+ */
+struct IndexLattice: Index {
+
+    /// number of sub-vectors
+    int nsq;
+    /// dimension of sub-vectors
+    size_t dsq;
+
+    /// the lattice quantizer
+    ZnSphereCodecAlt zn_sphere_codec;
+
+    /// nb bits used to encode the scale, per subvector
+    int scale_nbit, lattice_nbit;
+    /// total, in bytes
+    size_t code_size;
+
+    /// mins and maxes of the vector norms, per subquantizer
+    std::vector<float> trained;
+
+    IndexLattice (idx_t d, int nsq, int scale_nbit, int r2);
+
+    void train(idx_t n, const float* x) override;
+
+    /* The standalone codec interface */
+    size_t sa_code_size () const override;
+
+    void sa_encode (idx_t n, const float *x,
+                          uint8_t *bytes) const override;
+
+    void sa_decode (idx_t n, const uint8_t *bytes,
+                            float *x) const override;
+
+    /// not implemented
+    void add(idx_t n, const float* x) override;
+    void search(idx_t n, const float* x, idx_t k,
+                float* distances, idx_t* labels) const override;
+    void reset() override;
+
+};
+
+} // namespace faiss
+
+#endif
--- a/core/src/index/thirdparty/faiss/IndexPQ.cpp
+++ b/core/src/index/thirdparty/faiss/IndexPQ.cpp
--- a/core/src/index/thirdparty/faiss/IndexPQ.h
+++ b/core/src/index/thirdparty/faiss/IndexPQ.h
@ -0,0 +1,199 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// -*- c++ -*-
+
+#ifndef FAISS_INDEX_PQ_H
+#define FAISS_INDEX_PQ_H
+
+#include <stdint.h>
+
+#include <vector>
+
+#include <faiss/Index.h>
+#include <faiss/impl/ProductQuantizer.h>
+#include <faiss/impl/PolysemousTraining.h>
+
+namespace faiss {
+
+
+/** Index based on a product quantizer. Stored vectors are
+ * approximated by PQ codes. */
+struct IndexPQ: Index {
+
+    /// The product quantizer used to encode the vectors
+    ProductQuantizer pq;
+
+    /// Codes. Size ntotal * pq.code_size
+    std::vector<uint8_t> codes;
+
+    /** Constructor.
+     *
+     * @param d      dimensionality of the input vectors
+     * @param M      number of subquantizers
+     * @param nbits  number of bit per subvector index
+     */
+    IndexPQ (int d,                    ///< dimensionality of the input vectors
+             size_t M,                 ///< number of subquantizers
+             size_t nbits,             ///< number of bit per subvector index
+             MetricType metric = METRIC_L2);
+
+    IndexPQ ();
+
+    void train(idx_t n, const float* x) override;
+
+    void add(idx_t n, const float* x) override;
+
+    void search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const override;
+
+    void reset() override;
+
+    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
+
+    void reconstruct(idx_t key, float* recons) const override;
+
+    size_t remove_ids(const IDSelector& sel) override;
+
+    /* The standalone codec interface */
+    size_t sa_code_size () const override;
+
+    void sa_encode (idx_t n, const float *x,
+                          uint8_t *bytes) const override;
+
+    void sa_decode (idx_t n, const uint8_t *bytes,
+                            float *x) const override;
+
+
+    DistanceComputer * get_distance_computer() const override;
+
+    /******************************************************
+     * Polysemous codes implementation
+     ******************************************************/
+    bool do_polysemous_training; ///< false = standard PQ
+
+    /// parameters used for the polysemous training
+    PolysemousTraining polysemous_training;
+
+    /// how to perform the search in search_core
+    enum Search_type_t {
+        ST_PQ,             ///< asymmetric product quantizer (default)
+        ST_HE,             ///< Hamming distance on codes
+        ST_generalized_HE, ///< nb of same codes
+        ST_SDC,            ///< symmetric product quantizer (SDC)
+        ST_polysemous,     ///< HE filter (using ht) + PQ combination
+        ST_polysemous_generalize,  ///< Filter on generalized Hamming
+    };
+
+    Search_type_t search_type;
+
+    // just encode the sign of the components, instead of using the PQ encoder
+    // used only for the queries
+    bool encode_signs;
+
+    /// Hamming threshold used for polysemy
+    int polysemous_ht;
+
+    // actual polysemous search
+    void search_core_polysemous (idx_t n, const float *x, idx_t k,
+                               float *distances, idx_t *labels) const;
+
+    /// prepare query for a polysemous search, but instead of
+    /// computing the result, just get the histogram of Hamming
+    /// distances. May be computed on a provided dataset if xb != NULL
+    /// @param dist_histogram (M * nbits + 1)
+    void hamming_distance_histogram (idx_t n, const float *x,
+                                     idx_t nb, const float *xb,
+                                     int64_t *dist_histogram);
+
+    /** compute pairwise distances between queries and database
+     *
+     * @param n    nb of query vectors
+     * @param x    query vector, size n * d
+     * @param dis  output distances, size n * ntotal
+     */
+    void hamming_distance_table (idx_t n, const float *x,
+                                 int32_t *dis) const;
+
+};
+
+
+/// statistics are robust to internal threading, but not if
+/// IndexPQ::search is called by multiple threads
+struct IndexPQStats {
+    size_t nq;       // nb of queries run
+    size_t ncode;    // nb of codes visited
+
+    size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
+
+    IndexPQStats () {reset (); }
+    void reset ();
+};
+
+extern IndexPQStats indexPQ_stats;
+
+
+
+/** Quantizer where centroids are virtual: they are the Cartesian
+ *  product of sub-centroids. */
+struct MultiIndexQuantizer: Index  {
+    ProductQuantizer pq;
+
+    MultiIndexQuantizer (int d,         ///< dimension of the input vectors
+                         size_t M,      ///< number of subquantizers
+                         size_t nbits); ///< number of bit per subvector index
+
+    void train(idx_t n, const float* x) override;
+
+    void search(
+        idx_t n, const float* x, idx_t k,
+        float* distances, idx_t* labels) const override;
+
+    /// add and reset will crash at runtime
+    void add(idx_t n, const float* x) override;
+    void reset() override;
+
+    MultiIndexQuantizer () {}
+
+    void reconstruct(idx_t key, float* recons) const override;
+};
+
+
+/** MultiIndexQuantizer where the PQ assignmnet is performed by sub-indexes
+ */
+struct MultiIndexQuantizer2: MultiIndexQuantizer {
+
+    /// M Indexes on d / M dimensions
+    std::vector<Index*> assign_indexes;
+    bool own_fields;
+
+    MultiIndexQuantizer2 (
+        int d, size_t M, size_t nbits,
+        Index **indexes);
+
+    MultiIndexQuantizer2 (
+        int d, size_t nbits,
+        Index *assign_index_0,
+        Index *assign_index_1);
+
+    void train(idx_t n, const float* x) override;
+
+    void search(
+        idx_t n, const float* x, idx_t k,
+        float* distances, idx_t* labels) const override;
+
+};
+
+
+} // namespace faiss
+
+
+#endif
--- a/Show More
+++ b/Show More