From 635434126c8b7a6372ee0a46a653cdc517ecdf5d Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 17:44:57 +0800 Subject: [PATCH 01/11] #89 add faiss benchmark Former-commit-id: 12f9741900e36bb22cd8b7839f16174fd5d0c6f9 --- core/src/index/unittest/CMakeLists.txt | 1 + .../unittest/faiss_benchmark/CMakeLists.txt | 24 + .../faiss_benchmark/faiss_benchmark_test.cpp | 546 ++++++++++++++++++ 3 files changed, 571 insertions(+) create mode 100644 core/src/index/unittest/faiss_benchmark/CMakeLists.txt create mode 100644 core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index f840b28e28..2e84908cd7 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -86,5 +86,6 @@ install(TARGETS test_gpuresource DESTINATION unittest) install(TARGETS test_customized_index DESTINATION unittest) #add_subdirectory(faiss_ori) +#add_subdirectory(faiss_benchmark) add_subdirectory(test_nsg) diff --git a/core/src/index/unittest/faiss_benchmark/CMakeLists.txt b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt new file mode 100644 index 0000000000..556364b68a --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt @@ -0,0 +1,24 @@ +include_directories(${INDEX_SOURCE_DIR}/thirdparty) +include_directories(${INDEX_SOURCE_DIR}/include) +include_directories(/usr/local/cuda/include) +include_directories(/usr/local/hdf5/include) + +link_directories(/usr/local/cuda/lib64) +link_directories(/usr/local/hdf5/lib) + +set(unittest_libs + gtest gmock gtest_main gmock_main) + +set(depend_libs + faiss openblas lapack hdf5 + arrow ${ARROW_PREFIX}/lib/libjemalloc_pic.a + ) + +set(basic_libs + cudart cublas + gomp gfortran pthread + ) + +add_executable(test_faiss_benchmark faiss_benchmark_test.cpp) +target_link_libraries(test_faiss_benchmark ${depend_libs} ${unittest_libs} ${basic_libs}) +install(TARGETS test_faiss_benchmark DESTINATION unittest) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp new file mode 100644 index 0000000000..5ece23c7aa --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -0,0 +1,546 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +/***************************************************** + * To run this test, please download the HDF5 from + * https://support.hdfgroup.org/ftp/HDF5/releases/ + * and install it to /usr/local/hdf5 . + *****************************************************/ + +double elapsed() { + struct timeval tv; + gettimeofday(&tv, nullptr); + return tv.tv_sec + tv.tv_usec * 1e-6; +} + +void* hdf5_read(const char *file_name, + const char *dataset_name, + H5T_class_t dataset_class, + size_t &d_out, + size_t &n_out) { + hid_t file, dataset, datatype, dataspace, memspace; + H5T_class_t t_class; /* data type class */ + H5T_order_t order; /* data order */ + size_t size; /* size of the data element stored in file */ + hsize_t dimsm[3]; /* memory space dimensions */ + hsize_t dims_out[2]; /* dataset dimensions */ + hsize_t count[2]; /* size of the hyperslab in the file */ + hsize_t offset[2]; /* hyperslab offset in the file */ + hsize_t count_out[3]; /* size of the hyperslab in memory */ + hsize_t offset_out[3]; /* hyperslab offset in memory */ + int rank; + void* data_out; /* output buffer */ + + /* Open the file and the dataset. */ + file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); + dataset = H5Dopen2(file, dataset_name, H5P_DEFAULT); + + /* + * Get datatype and dataspace handles and then query + * dataset class, order, size, rank and dimensions. + */ + datatype = H5Dget_type(dataset); /* datatype handle */ + t_class = H5Tget_class(datatype); + assert(t_class == dataset_class || !"Illegal dataset class type"); + + order = H5Tget_order(datatype); + switch (order) { + case H5T_ORDER_LE: + printf("Little endian order \n"); + break; + case H5T_ORDER_BE: + printf("Big endian order \n"); + break; + default: + printf("Illegal endian order \n"); + break; + } + + size = H5Tget_size(datatype); + printf("Data size is %d \n", (int)size); + + dataspace = H5Dget_space(dataset); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); + H5Sget_simple_extent_dims(dataspace, dims_out, NULL); + n_out = dims_out[0]; + d_out = dims_out[1]; + printf("rank %d, dimensions %lu x %lu \n", rank, n_out, d_out); + + /* Define hyperslab in the dataset. */ + offset[0] = offset[1] = 0; + count[0] = dims_out[0]; + count[1] = dims_out[1]; + H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); + + /* Define the memory dataspace. */ + dimsm[0] = dims_out[0]; + dimsm[1] = dims_out[1]; + dimsm[2] = 1; + memspace = H5Screate_simple(3, dimsm, NULL); + + /* Define memory hyperslab. */ + offset_out[0] = offset_out[1] = offset_out[2] = 0; + count_out[0] = dims_out[0]; + count_out[1] = dims_out[1]; + count_out[2] = 1; + H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); + + /* Read data from hyperslab in the file into the hyperslab in memory and display. */ + switch (t_class) { + case H5T_INTEGER: + data_out = new int[dims_out[0] * dims_out[1]]; + H5Dread(dataset, H5T_NATIVE_INT, memspace, dataspace, H5P_DEFAULT, data_out); + break; + case H5T_FLOAT: + data_out = new float[dims_out[0] * dims_out[1]]; + H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace, H5P_DEFAULT, data_out); + break; + default: + printf("Illegal dataset class type\n"); + break; + } + + /* Close/release resources. */ + H5Tclose(datatype); + H5Dclose(dataset); + H5Sclose(dataspace); + H5Sclose(memspace); + H5Fclose(file); + + return data_out; +} + +std::string get_index_file_name(const std::string& ann_test_name, + const std::string& index_key, + int32_t data_loops) { + size_t pos = index_key.find_first_of(',', 0); + std::string file_name = ann_test_name; + file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos+1); + file_name = file_name + "_" + std::to_string(data_loops) + ".index"; + return file_name; +} + +bool parse_ann_test_name(const std::string& ann_test_name, + size_t &dim, + faiss::MetricType &metric_type) { + size_t pos1, pos2; + + if (ann_test_name.empty()) return false; + + pos1 = ann_test_name.find_first_of('-', 0); + if (pos1 == std::string::npos) return false; + pos2 = ann_test_name.find_first_of('-', pos1 + 1); + if (pos2 == std::string::npos) return false; + + dim = std::stoi(ann_test_name.substr(pos1+1, pos2-pos1-1)); + std::string metric_str = ann_test_name.substr(pos2+1); + if (metric_str == "angular") { + metric_type = faiss::METRIC_INNER_PRODUCT; + } else if (metric_str == "euclidean") { + metric_type = faiss::METRIC_L2; + } else { + return false; + } + + return true; +} + +void test_ann_hdf5(const std::string& ann_test_name, + const std::string& index_key, + int32_t index_add_loops, + const std::vector& nprobes) { + double t0 = elapsed(); + + const std::string ann_file_name = ann_test_name + ".hdf5"; + + faiss::MetricType metric_type; + size_t dim; + + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { + printf("Invalid ann test name: %s\n", ann_test_name.c_str()); + return; + } + + faiss::Index * index; + size_t d; + + std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); + try { + index = faiss::read_index(index_file_name.c_str()); + d = dim; + } + catch (...) { + printf("Cannot read index file: %s\n", index_file_name.c_str()); + + printf ("[%.3f s] Loading train set\n", elapsed() - t0); + + size_t nb; + float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + assert(d == dim || !"dataset does not have correct dimension"); + + printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", + elapsed() - t0, index_key.c_str(), d); + + index = faiss::index_factory(d, index_key.c_str(), metric_type); + + printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + + index->train(nb, xb); + + printf ("[%.3f s] Loading database\n", elapsed() - t0); + + // add index multiple times to get ~1G data set + for (int i = 0; i < index_add_loops; i++) { + printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + index->add(nb, xb); + } + + faiss::write_index(index, index_file_name.c_str()); + + delete [] xb; + } + + size_t nq; + float *xq; + { + printf ("[%.3f s] Loading queries\n", elapsed() - t0); + + size_t d2; + xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); + assert(d == d2 || !"query does not have same dimension as train set"); + } + + size_t k; // nb of results per query in the GT + faiss::Index::idx_t *gt; // nq * k matrix of ground-truth nearest-neighbors + { + printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + + // load ground-truth and convert int to long + size_t nq2; + int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth entries"); + + gt = new faiss::Index::idx_t[k * nq]; + for(int i = 0; i < k * nq; i++) { + gt[i] = gt_int[i]; + } + delete [] gt_int; + } + + for (auto nprobe : nprobes) { + + faiss::ParameterSpace params; + + printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + + std::string nprobe_str = "nprobe=" + std::to_string(nprobe); + params.set_index_parameters(index, nprobe_str.c_str()); + + // output buffers +#if 1 + const size_t NQ = 1000, K = 1000; + faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; + float *D = new float[NQ * K]; + + printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("====================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + double t_start = elapsed(), t_end; + + index->search(t_nq, xq, t_k, D, I); + + t_end = elapsed(); + + // k = 100 for ground truth + int hit = 0; + for (int i = 0; i < t_nq; i++) { + // count the num of results exist in ground truth result set + // consider: each result replicates DATA_LOOPS times + for (int j_c = 0; j_c < k; j_c++) { + int r_c = I[i * t_k + j_c]; + for (int j_g = 0; j_g < k/index_add_loops; j_g++) { + if (gt[i * k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + } + } + printf ("====================================================\n"); +#else + printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); + + faiss::Index::idx_t *I = new faiss::Index::idx_t[nq * k]; + float *D = new float[nq * k]; + + index->search(nq, xq, k, D, I); + + printf ("[%.3f s] Compute recalls\n", elapsed() - t0); + + // evaluate result by hand. + int n_1 = 0, n_10 = 0, n_100 = 0; + for(int i = 0; i < nq; i++) { + int gt_nn = gt[i * k]; + for(int j = 0; j < k; j++) { + if (I[i * k + j] == gt_nn) { + if(j < 1) n_1++; + if(j < 10) n_10++; + if(j < 100) n_100++; + } + } + } + printf("R@1 = %.4f\n", n_1 / float(nq)); + printf("R@10 = %.4f\n", n_10 / float(nq)); + printf("R@100 = %.4f\n", n_100 / float(nq)); +#endif + + printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete [] I; + delete [] D; + } + + delete [] xq; + delete [] gt; + delete index; +} + +#ifdef CUSTOMIZATION +void test_ivfsq8h_gpu(const std::string& ann_test_name, + int32_t index_add_loops, + const std::vector& nprobes){ + double t0 = elapsed(); + + const std::string ann_file_name = ann_test_name + ".hdf5"; + + faiss::MetricType metric_type; + size_t dim; + + if (!parse_ann_test_name(ann_test_name, dim, metric_type)) { + printf("Invalid ann test name: %s\n", ann_test_name.c_str()); + return; + } + + faiss::distance_compute_blas_threshold = 800; + faiss::gpu::StandardGpuResources res; + + const std::string index_key = "IVF16384,SQ8Hybrid"; + + faiss::Index* cpu_index = nullptr; + size_t d; + + std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); + try{ + cpu_index = faiss::read_index(index_file_name.c_str()); + d = dim; + } + catch (...){ + printf("Cannot read index file: %s\n", index_file_name.c_str()); + + printf ("[%.3f s] Loading train set\n", elapsed() - t0); + + size_t nb; + float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + assert(d == dim || !"dataset does not have correct dimension"); + + printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + + faiss::Index *ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); + + auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); + + printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + + device_index->train(nb, xb); + + printf ("[%.3f s] Loading database\n", elapsed() - t0); + + for (int i = 0; i < index_add_loops; i++) { + printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + device_index->add(nb, xb); + } + + cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); + faiss::write_index(cpu_index, index_file_name.c_str()); + + delete []xb; + } + + faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); + if(cpu_ivf_index != nullptr) { + cpu_ivf_index->to_readonly(); + } + + faiss::gpu::GpuClonerOptions option; + option.allInGpu = true; + + faiss::IndexComposition index_composition; + index_composition.index = cpu_index; + index_composition.quantizer = nullptr; + index_composition.mode = 1; + + auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + delete index; + + size_t nq; + float *xq; + { + printf ("[%.3f s] Loading queries\n", elapsed() - t0); + + size_t d2; + xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); + assert(d == d2 || !"query does not have same dimension as train set"); + } + + size_t k; + faiss::Index::idx_t *gt; + { + printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + + size_t nq2; + int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + assert(nq2 == nq || !"incorrect nb of ground truth entries"); + + gt = new faiss::Index::idx_t[k * nq]; + for (unsigned long i = 0; i < k * nq; ++i) { + gt[i] = gt_int[i]; + } + delete []gt_int; + } + + for (auto nprobe : nprobes){ + printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", + elapsed() - t0, nprobe); + + auto ivf_index = dynamic_cast(cpu_index); + ivf_index->nprobe = nprobe; + + auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); + if(is_gpu_flat_index == nullptr) { + delete ivf_index->quantizer; + ivf_index->quantizer = index_composition.quantizer; + } + + const size_t NQ = 1000, K = 1000; + long *I = new faiss::Index::idx_t[NQ * K]; + float *D = new float[NQ * K]; + + printf ("\n%s %ld\n", index_key.c_str(), nprobe); + printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("====================================================\n"); + + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + double t_start = elapsed(), t_end; + + cpu_index->search(t_nq, xq, t_k, D, I); + + t_end = elapsed(); + + // k = 100 for ground truth + int hit = 0; + for (unsigned long i = 0; i < t_nq; i++) { + // count the num of results exist in ground truth result set + // consider: each result replicates DATA_LOOPS times + for (unsigned long j_c = 0; j_c < k; j_c++) { + int r_c = I[i * t_k + j_c]; + for (unsigned long j_g = 0; j_g < k/index_add_loops; j_g++) { + if (gt[i * k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + } + } + printf ("====================================================\n"); + + printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete [] I; + delete [] D; + } + + delete [] xq; + delete [] gt; + delete cpu_index; +} +#endif + +/************************************************************************************ + * https://github.com/erikbern/ann-benchmarks + * + * Dataset Dimensions Train_size Test_size Neighbors Distance Download + * Fashion- + * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) + * GIST 960 1,000,000 1,000 100 Euclidean HDF5 (3.6GB) + * GloVe 100 1,183,514 10,000 100 Angular HDF5 (463MB) + * GloVe 200 1,183,514 10,000 100 Angular HDF5 (918MB) + * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) + * NYTimes 256 290,000 10,000 100 Angular HDF5 (301MB) + * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) +*************************************************************************************/ + +TEST(FAISSTEST, sift1m_L2) { + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); +#ifdef CUSTOMIZATION + test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); +#endif + + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); +#ifdef CUSTOMIZATION + test_ivfsq8h_gpu("glove-200-angular", 2, {128, 1024}); +#endif +} + From 52ca4c4ae2578e270ca4b41a6f8f1f4e1b2a68d9 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 17:46:27 +0800 Subject: [PATCH 02/11] update test parameter Former-commit-id: 0871f4f0acd87280b5327caf4f14dae5b0d5e165 --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5ece23c7aa..5f787a415c 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -540,7 +540,7 @@ TEST(FAISSTEST, sift1m_L2) { test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); #ifdef CUSTOMIZATION - test_ivfsq8h_gpu("glove-200-angular", 2, {128, 1024}); + test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); #endif } From 4c7d590eb22323d1ef0f6a474a1f332d5c4ebd66 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 18:24:35 +0800 Subject: [PATCH 03/11] 9 update unittest name Former-commit-id: 4bd87de1ceee872b9de13c5e5c65bab7291eb9c7 --- .../index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5f787a415c..f1dc060825 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -469,10 +469,8 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, long *I = new faiss::Index::idx_t[NQ * K]; float *D = new float[NQ * K]; - printf ("\n%s %ld\n", index_key.c_str(), nprobe); printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); printf ("====================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} double t_start = elapsed(), t_end; @@ -528,7 +526,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) *************************************************************************************/ -TEST(FAISSTEST, sift1m_L2) { +TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); From 82ddcf8340b84a1eb2f4ea0209554a006f4dac6a Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Wed, 23 Oct 2019 18:39:43 +0800 Subject: [PATCH 04/11] #89 display quant/search time Former-commit-id: 563141ab22274ca0e3e84253df8a79af58c16eca --- .../faiss_benchmark/faiss_benchmark_test.cpp | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index f1dc060825..5d63d63003 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -282,9 +282,12 @@ void test_ann_hdf5(const std::string& ann_test_name, float *D = new float[NQ * K]; printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("====================================================\n"); + printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + double t_start = elapsed(), t_end; index->search(t_nq, xq, t_k, D, I); @@ -306,11 +309,14 @@ void test_ann_hdf5(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), + faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, + (hit / float(t_nq * k / index_add_loops))); } } - printf ("====================================================\n"); + printf ("============================================================================================\n"); #else printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); @@ -470,9 +476,12 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, float *D = new float[NQ * K]; printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("====================================================\n"); + printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + double t_start = elapsed(), t_end; cpu_index->search(t_nq, xq, t_k, D, I); @@ -494,11 +503,14 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs, R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", + t_nq, t_k, (t_end - t_start), + faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, + (hit / float(t_nq * k / index_add_loops))); } } - printf ("====================================================\n"); + printf ("============================================================================================\n"); printf ("[%.3f s] Search test done\n\n", elapsed() - t0); From ec022c330d264ec6fcbd28ef28e613b4d3804b45 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 09:33:09 +0800 Subject: [PATCH 05/11] #89 update unittest Former-commit-id: f9b518f2961f3c7da30a76a53a49da8403208a0b --- .../index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 5d63d63003..3d60574231 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -541,15 +541,15 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); #ifdef CUSTOMIZATION + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); #endif test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); #ifdef CUSTOMIZATION + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); #endif } From 4f5906b9bc6316523f2de708b2d3e6cb448aec2d Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 09:44:01 +0800 Subject: [PATCH 06/11] #89 update SQ8Hybrid-gpu log Former-commit-id: c2e70121ee65ed044c059ac3948b3412353e829e --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 3d60574231..0c7cb97807 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -475,7 +475,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, long *I = new faiss::Index::idx_t[NQ * K]; float *D = new float[NQ * K]; - printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf ("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); printf ("============================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} From d3d6077eb2daedb92b1a33a2e281e6445afd548f Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 10:24:02 +0800 Subject: [PATCH 07/11] #89 update unittest parameter Former-commit-id: 4692890b67109edefbd0cc0a0a5a628f6433306d --- .../src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index 0c7cb97807..ed00e74a98 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -550,7 +550,7 @@ TEST(FAISSTEST, BENCHMARK) { test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); - test_ivfsq8h_gpu("glove-200-angular", 2, {8, 128}); + test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); #endif } From 65b46de1ac7629948cd27a2ba00c5872b48b2f8b Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Thu, 24 Oct 2019 10:56:12 +0800 Subject: [PATCH 08/11] #89 code format Former-commit-id: ced158f26d9c18e38c7afb84ad17fdb6f9057259 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 282 +++++++++--------- 1 file changed, 136 insertions(+), 146 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index ed00e74a98..d1db0e9049 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -17,28 +17,28 @@ #include +#include #include #include #include -#include #include #include #include #include -#include #include #include +#include #include #include #include -#include #include #include #include #include +#include /***************************************************** * To run this test, please download the HDF5 from @@ -46,29 +46,27 @@ * and install it to /usr/local/hdf5 . *****************************************************/ -double elapsed() { +double +elapsed() { struct timeval tv; gettimeofday(&tv, nullptr); return tv.tv_sec + tv.tv_usec * 1e-6; } -void* hdf5_read(const char *file_name, - const char *dataset_name, - H5T_class_t dataset_class, - size_t &d_out, - size_t &n_out) { - hid_t file, dataset, datatype, dataspace, memspace; - H5T_class_t t_class; /* data type class */ - H5T_order_t order; /* data order */ - size_t size; /* size of the data element stored in file */ - hsize_t dimsm[3]; /* memory space dimensions */ - hsize_t dims_out[2]; /* dataset dimensions */ - hsize_t count[2]; /* size of the hyperslab in the file */ - hsize_t offset[2]; /* hyperslab offset in the file */ - hsize_t count_out[3]; /* size of the hyperslab in memory */ - hsize_t offset_out[3]; /* hyperslab offset in memory */ - int rank; - void* data_out; /* output buffer */ +void* +hdf5_read(const char* file_name, const char* dataset_name, H5T_class_t dataset_class, size_t& d_out, size_t& n_out) { + hid_t file, dataset, datatype, dataspace, memspace; + H5T_class_t t_class; /* data type class */ + H5T_order_t order; /* data order */ + size_t size; /* size of the data element stored in file */ + hsize_t dimsm[3]; /* memory space dimensions */ + hsize_t dims_out[2]; /* dataset dimensions */ + hsize_t count[2]; /* size of the hyperslab in the file */ + hsize_t offset[2]; /* hyperslab offset in the file */ + hsize_t count_out[3]; /* size of the hyperslab in memory */ + hsize_t offset_out[3]; /* hyperslab offset in memory */ + int rank; + void* data_out; /* output buffer */ /* Open the file and the dataset. */ file = H5Fopen(file_name, H5F_ACC_RDONLY, H5P_DEFAULT); @@ -78,7 +76,7 @@ void* hdf5_read(const char *file_name, * Get datatype and dataspace handles and then query * dataset class, order, size, rank and dimensions. */ - datatype = H5Dget_type(dataset); /* datatype handle */ + datatype = H5Dget_type(dataset); /* datatype handle */ t_class = H5Tget_class(datatype); assert(t_class == dataset_class || !"Illegal dataset class type"); @@ -95,11 +93,11 @@ void* hdf5_read(const char *file_name, break; } - size = H5Tget_size(datatype); + size = H5Tget_size(datatype); printf("Data size is %d \n", (int)size); - dataspace = H5Dget_space(dataset); /* dataspace handle */ - rank = H5Sget_simple_extent_ndims(dataspace); + dataspace = H5Dget_space(dataset); /* dataspace handle */ + rank = H5Sget_simple_extent_ndims(dataspace); H5Sget_simple_extent_dims(dataspace, dims_out, NULL); n_out = dims_out[0]; d_out = dims_out[1]; @@ -107,8 +105,8 @@ void* hdf5_read(const char *file_name, /* Define hyperslab in the dataset. */ offset[0] = offset[1] = 0; - count[0] = dims_out[0]; - count[1] = dims_out[1]; + count[0] = dims_out[0]; + count[1] = dims_out[1]; H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL); /* Define the memory dataspace. */ @@ -119,9 +117,9 @@ void* hdf5_read(const char *file_name, /* Define memory hyperslab. */ offset_out[0] = offset_out[1] = offset_out[2] = 0; - count_out[0] = dims_out[0]; - count_out[1] = dims_out[1]; - count_out[2] = 1; + count_out[0] = dims_out[0]; + count_out[1] = dims_out[1]; + count_out[2] = 1; H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL); /* Read data from hyperslab in the file into the hyperslab in memory and display. */ @@ -149,30 +147,31 @@ void* hdf5_read(const char *file_name, return data_out; } -std::string get_index_file_name(const std::string& ann_test_name, - const std::string& index_key, - int32_t data_loops) { +std::string +get_index_file_name(const std::string& ann_test_name, const std::string& index_key, int32_t data_loops) { size_t pos = index_key.find_first_of(',', 0); std::string file_name = ann_test_name; - file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos+1); + file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos + 1); file_name = file_name + "_" + std::to_string(data_loops) + ".index"; return file_name; } -bool parse_ann_test_name(const std::string& ann_test_name, - size_t &dim, - faiss::MetricType &metric_type) { +bool +parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::MetricType& metric_type) { size_t pos1, pos2; - if (ann_test_name.empty()) return false; + if (ann_test_name.empty()) + return false; pos1 = ann_test_name.find_first_of('-', 0); - if (pos1 == std::string::npos) return false; + if (pos1 == std::string::npos) + return false; pos2 = ann_test_name.find_first_of('-', pos1 + 1); - if (pos2 == std::string::npos) return false; + if (pos2 == std::string::npos) + return false; - dim = std::stoi(ann_test_name.substr(pos1+1, pos2-pos1-1)); - std::string metric_str = ann_test_name.substr(pos2+1); + dim = std::stoi(ann_test_name.substr(pos1 + 1, pos2 - pos1 - 1)); + std::string metric_str = ann_test_name.substr(pos2 + 1); if (metric_str == "angular") { metric_type = faiss::METRIC_INNER_PRODUCT; } else if (metric_str == "euclidean") { @@ -184,10 +183,9 @@ bool parse_ann_test_name(const std::string& ann_test_name, return true; } -void test_ann_hdf5(const std::string& ann_test_name, - const std::string& index_key, - int32_t index_add_loops, - const std::vector& nprobes) { +void +test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops, + const std::vector& nprobes) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -200,77 +198,74 @@ void test_ann_hdf5(const std::string& ann_test_name, return; } - faiss::Index * index; + faiss::Index* index; size_t d; std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); try { index = faiss::read_index(index_file_name.c_str()); d = dim; - } - catch (...) { + } catch (...) { printf("Cannot read index file: %s\n", index_file_name.c_str()); - printf ("[%.3f s] Loading train set\n", elapsed() - t0); + printf("[%.3f s] Loading train set\n", elapsed() - t0); size_t nb; - float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); - printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", - elapsed() - t0, index_key.c_str(), d); + printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); index = faiss::index_factory(d, index_key.c_str(), metric_type); - printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); index->train(nb, xb); - printf ("[%.3f s] Loading database\n", elapsed() - t0); + printf("[%.3f s] Loading database\n", elapsed() - t0); // add index multiple times to get ~1G data set for (int i = 0; i < index_add_loops; i++) { - printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); index->add(nb, xb); } faiss::write_index(index, index_file_name.c_str()); - delete [] xb; + delete[] xb; } size_t nq; - float *xq; + float* xq; { - printf ("[%.3f s] Loading queries\n", elapsed() - t0); + printf("[%.3f s] Loading queries\n", elapsed() - t0); size_t d2; xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); assert(d == d2 || !"query does not have same dimension as train set"); } - size_t k; // nb of results per query in the GT - faiss::Index::idx_t *gt; // nq * k matrix of ground-truth nearest-neighbors + size_t k; // nb of results per query in the GT + faiss::Index::idx_t* gt; // nq * k matrix of ground-truth nearest-neighbors { - printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); // load ground-truth and convert int to long size_t nq2; - int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; - for(int i = 0; i < k * nq; i++) { + for (int i = 0; i < k * nq; i++) { gt[i] = gt_int[i]; } - delete [] gt_int; + delete[] gt_int; } for (auto nprobe : nprobes) { - faiss::ParameterSpace params; - printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); std::string nprobe_str = "nprobe=" + std::to_string(nprobe); params.set_index_parameters(index, nprobe_str.c_str()); @@ -278,13 +273,13 @@ void test_ann_hdf5(const std::string& ann_test_name, // output buffers #if 1 const size_t NQ = 1000, K = 1000; - faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K]; - float *D = new float[NQ * K]; + faiss::Index::idx_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; - printf ("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("============================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -301,7 +296,7 @@ void test_ann_hdf5(const std::string& ann_test_name, // consider: each result replicates DATA_LOOPS times for (int j_c = 0; j_c < k; j_c++) { int r_c = I[i * t_k + j_c]; - for (int j_g = 0; j_g < k/index_add_loops; j_g++) { + for (int j_g = 0; j_g < k / index_add_loops; j_g++) { if (gt[i * k + j_g] == r_c) { hit++; continue; @@ -309,33 +304,34 @@ void test_ann_hdf5(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), - faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, - (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); } } - printf ("============================================================================================\n"); + printf("============================================================================================\n"); #else - printf ("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); - faiss::Index::idx_t *I = new faiss::Index::idx_t[nq * k]; - float *D = new float[nq * k]; + faiss::Index::idx_t* I = new faiss::Index::idx_t[nq * k]; + float* D = new float[nq * k]; index->search(nq, xq, k, D, I); - printf ("[%.3f s] Compute recalls\n", elapsed() - t0); + printf("[%.3f s] Compute recalls\n", elapsed() - t0); // evaluate result by hand. int n_1 = 0, n_10 = 0, n_100 = 0; - for(int i = 0; i < nq; i++) { + for (int i = 0; i < nq; i++) { int gt_nn = gt[i * k]; - for(int j = 0; j < k; j++) { + for (int j = 0; j < k; j++) { if (I[i * k + j] == gt_nn) { - if(j < 1) n_1++; - if(j < 10) n_10++; - if(j < 100) n_100++; + if (j < 1) + n_1++; + if (j < 10) + n_10++; + if (j < 100) + n_100++; } } } @@ -344,21 +340,20 @@ void test_ann_hdf5(const std::string& ann_test_name, printf("R@100 = %.4f\n", n_100 / float(nq)); #endif - printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + printf("[%.3f s] Search test done\n\n", elapsed() - t0); - delete [] I; - delete [] D; + delete[] I; + delete[] D; } - delete [] xq; - delete [] gt; + delete[] xq; + delete[] gt; delete index; } #ifdef CUSTOMIZATION -void test_ivfsq8h_gpu(const std::string& ann_test_name, - int32_t index_add_loops, - const std::vector& nprobes){ +void +test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -380,44 +375,43 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, size_t d; std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops); - try{ + try { cpu_index = faiss::read_index(index_file_name.c_str()); d = dim; - } - catch (...){ + } catch (...) { printf("Cannot read index file: %s\n", index_file_name.c_str()); - printf ("[%.3f s] Loading train set\n", elapsed() - t0); + printf("[%.3f s] Loading train set\n", elapsed() - t0); - size_t nb; - float *xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); + size_t nb; + float* xb = (float*)hdf5_read(ann_file_name.c_str(), "train", H5T_FLOAT, d, nb); assert(d == dim || !"dataset does not have correct dimension"); - printf ("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); + printf("[%.3f s] Preparing index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d); - faiss::Index *ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); + faiss::Index* ori_index = faiss::index_factory(d, index_key.c_str(), metric_type); auto device_index = faiss::gpu::index_cpu_to_gpu(&res, 0, ori_index); - printf ("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); + printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb); device_index->train(nb, xb); - printf ("[%.3f s] Loading database\n", elapsed() - t0); + printf("[%.3f s] Loading database\n", elapsed() - t0); for (int i = 0; i < index_add_loops; i++) { - printf ("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); + printf("[%.3f s] Indexing database, size %ld*%ld\n", elapsed() - t0, nb, d); device_index->add(nb, xb); } cpu_index = faiss::gpu::index_gpu_to_cpu(device_index); faiss::write_index(cpu_index, index_file_name.c_str()); - delete []xb; + delete[] xb; } - faiss::IndexIVF *cpu_ivf_index = dynamic_cast(cpu_index); - if(cpu_ivf_index != nullptr) { + faiss::IndexIVF* cpu_ivf_index = dynamic_cast(cpu_index); + if (cpu_ivf_index != nullptr) { cpu_ivf_index->to_readonly(); } @@ -433,9 +427,9 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, delete index; size_t nq; - float *xq; + float* xq; { - printf ("[%.3f s] Loading queries\n", elapsed() - t0); + printf("[%.3f s] Loading queries\n", elapsed() - t0); size_t d2; xq = (float*)hdf5_read(ann_file_name.c_str(), "test", H5T_FLOAT, d2, nq); @@ -443,42 +437,41 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } size_t k; - faiss::Index::idx_t *gt; + faiss::Index::idx_t* gt; { - printf ("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); + printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq); size_t nq2; - int *gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); + int* gt_int = (int*)hdf5_read(ann_file_name.c_str(), "neighbors", H5T_INTEGER, k, nq2); assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; for (unsigned long i = 0; i < k * nq; ++i) { gt[i] = gt_int[i]; } - delete []gt_int; + delete[] gt_int; } - for (auto nprobe : nprobes){ - printf ("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", - elapsed() - t0, nprobe); + for (auto nprobe : nprobes) { + printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); - auto ivf_index = dynamic_cast(cpu_index); + auto ivf_index = dynamic_cast(cpu_index); ivf_index->nprobe = nprobe; auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); - if(is_gpu_flat_index == nullptr) { + if (is_gpu_flat_index == nullptr) { delete ivf_index->quantizer; ivf_index->quantizer = index_composition.quantizer; } const size_t NQ = 1000, K = 1000; - long *I = new faiss::Index::idx_t[NQ * K]; - float *D = new float[NQ * K]; + long* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; - printf ("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf ("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + printf("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("============================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; @@ -495,7 +488,7 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, // consider: each result replicates DATA_LOOPS times for (unsigned long j_c = 0; j_c < k; j_c++) { int r_c = I[i * t_k + j_c]; - for (unsigned long j_g = 0; j_g < k/index_add_loops; j_g++) { + for (unsigned long j_g = 0; j_g < k / index_add_loops; j_g++) { if (gt[i * k + j_g] == r_c) { hit++; continue; @@ -503,23 +496,21 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, } } } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", - t_nq, t_k, (t_end - t_start), - faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, - (hit / float(t_nq * k / index_add_loops))); + printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, + faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); } } - printf ("============================================================================================\n"); + printf("============================================================================================\n"); - printf ("[%.3f s] Search test done\n\n", elapsed() - t0); + printf("[%.3f s] Search test done\n\n", elapsed() - t0); - delete [] I; - delete [] D; + delete[] I; + delete[] D; } - delete [] xq; - delete [] gt; + delete[] xq; + delete[] gt; delete cpu_index; } #endif @@ -536,21 +527,20 @@ void test_ivfsq8h_gpu(const std::string& ann_test_name, * MNIST 784 60,000 10,000 100 Euclidean HDF5 (217MB) * NYTimes 256 290,000 10,000 100 Angular HDF5 (301MB) * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) -*************************************************************************************/ + *************************************************************************************/ TEST(FAISSTEST, BENCHMARK) { - test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); #endif - test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); #ifdef CUSTOMIZATION test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); #endif } - From 7f6092b6baf8d529e762f508fbac9feff80b77b4 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 19:34:38 +0800 Subject: [PATCH 09/11] #89 add SQ8Hybrid MIX test and pure-GPU test Former-commit-id: 25b5f419d3deedd4ec39c014ea47822a83a0af38 --- .../faiss_benchmark/faiss_benchmark_test.cpp | 228 +++++++++++------- 1 file changed, 143 insertions(+), 85 deletions(-) diff --git a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp index d1db0e9049..bb50198f92 100644 --- a/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp +++ b/core/src/index/unittest/faiss_benchmark/faiss_benchmark_test.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -183,9 +184,31 @@ parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::Metric return true; } +int32_t +GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::idx_t* index, size_t ground_k, size_t k, + size_t nq, int32_t index_add_loops) { + assert(ground_k <= k); + int hit = 0; + for (int i = 0; i < nq; i++) { + // count the num of results exist in ground truth result set + // each result replicates INDEX_ADD_LOOPS times + for (int j_c = 0; j_c < ground_k; j_c++) { + int r_c = index[i * k + j_c]; + int j_g = 0; + for (; j_g < ground_k / index_add_loops; j_g++) { + if (ground_index[i * ground_k + j_g] == r_c) { + hit++; + continue; + } + } + } + } + return hit; +} + void test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, int32_t index_add_loops, - const std::vector& nprobes) { + const std::vector& nprobes, int32_t search_loops) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -265,8 +288,6 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in for (auto nprobe : nprobes) { faiss::ParameterSpace params; - printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); - std::string nprobe_str = "nprobe=" + std::to_string(nprobe); params.set_index_parameters(index, nprobe_str.c_str()); @@ -277,39 +298,28 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in float* D = new float[NQ * K]; printf("\n%s | %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("============================================================================================\n"); + printf("======================================================================================\n"); for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} faiss::indexIVF_stats.quantization_time = 0.0; faiss::indexIVF_stats.search_time = 0.0; double t_start = elapsed(), t_end; - - index->search(t_nq, xq, t_k, D, I); - + for (int i = 0; i < search_loops; i++) { + index->search(t_nq, xq, t_k, D, I); + } t_end = elapsed(); // k = 100 for ground truth - int hit = 0; - for (int i = 0; i < t_nq; i++) { - // count the num of results exist in ground truth result set - // consider: each result replicates DATA_LOOPS times - for (int j_c = 0; j_c < k; j_c++) { - int r_c = I[i * t_k + j_c]; - for (int j_g = 0; j_g < k / index_add_loops; j_g++) { - if (gt[i * k + j_g] == r_c) { - hit++; - continue; - } - } - } - } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, - (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, t_k, + (t_end - t_start) / search_loops, faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); } } - printf("============================================================================================\n"); + printf("======================================================================================\n"); #else printf("[%.3f s] Perform a search on %ld queries\n", elapsed() - t0, nq); @@ -353,7 +363,8 @@ test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, in #ifdef CUSTOMIZATION void -test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes) { +test_ivfsq8h(const std::string& ann_test_name, int32_t index_add_loops, const std::vector& nprobes, + bool pure_gpu_mode, int32_t search_loops) { double t0 = elapsed(); const std::string ann_file_name = ann_test_name + ".hdf5"; @@ -423,9 +434,18 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons index_composition.quantizer = nullptr; index_composition.mode = 1; + double copy_time = elapsed(); auto index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); delete index; + if (pure_gpu_mode) { + index_composition.mode = 2; // 0: all data, 1: copy quantizer, 2: copy data + index = faiss::gpu::index_cpu_to_gpu(&res, 0, &index_composition, &option); + } + + copy_time = elapsed() - copy_time; + printf("[%.3f s] Copy quantizer completed, cost %f s\n", elapsed() - t0, copy_time); + size_t nq; float* xq; { @@ -446,67 +466,98 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons assert(nq2 == nq || !"incorrect nb of ground truth entries"); gt = new faiss::Index::idx_t[k * nq]; - for (unsigned long i = 0; i < k * nq; ++i) { + for (uint64_t i = 0; i < k * nq; ++i) { gt[i] = gt_int[i]; } delete[] gt_int; } - for (auto nprobe : nprobes) { - printf("[%.3f s] Setting parameter configuration 'nprobe=%lu' on index\n", elapsed() - t0, nprobe); + const size_t NQ = 1000, K = 1000; + if (!pure_gpu_mode) { + for (auto nprobe : nprobes) { + auto ivf_index = dynamic_cast(cpu_index); + ivf_index->nprobe = nprobe; - auto ivf_index = dynamic_cast(cpu_index); - ivf_index->nprobe = nprobe; - - auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); - if (is_gpu_flat_index == nullptr) { - delete ivf_index->quantizer; - ivf_index->quantizer = index_composition.quantizer; - } - - const size_t NQ = 1000, K = 1000; - long* I = new faiss::Index::idx_t[NQ * K]; - float* D = new float[NQ * K]; - - printf("\n%s | %s-gpu | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); - printf("============================================================================================\n"); - for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} - for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} - faiss::indexIVF_stats.quantization_time = 0.0; - faiss::indexIVF_stats.search_time = 0.0; - - double t_start = elapsed(), t_end; - - cpu_index->search(t_nq, xq, t_k, D, I); - - t_end = elapsed(); - - // k = 100 for ground truth - int hit = 0; - for (unsigned long i = 0; i < t_nq; i++) { - // count the num of results exist in ground truth result set - // consider: each result replicates DATA_LOOPS times - for (unsigned long j_c = 0; j_c < k; j_c++) { - int r_c = I[i * t_k + j_c]; - for (unsigned long j_g = 0; j_g < k / index_add_loops; j_g++) { - if (gt[i * k + j_g] == r_c) { - hit++; - continue; - } - } - } - } - printf("nq = %4ld, k = %4ld, elapse = %fs (quant = %fs, search = %fs), R@ = %.4f\n", t_nq, t_k, - (t_end - t_start), faiss::indexIVF_stats.quantization_time / 1000, - faiss::indexIVF_stats.search_time / 1000, (hit / float(t_nq * k / index_add_loops))); + auto is_gpu_flat_index = dynamic_cast(ivf_index->quantizer); + if (is_gpu_flat_index == nullptr) { + delete ivf_index->quantizer; + ivf_index->quantizer = index_composition.quantizer; } + + int64_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; + + printf("\n%s | %s-MIX | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("======================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + + double t_start = elapsed(), t_end; + for (int32_t i = 0; i < search_loops; i++) { + cpu_index->search(t_nq, xq, t_k, D, I); + } + t_end = elapsed(); + + // k = 100 for ground truth + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, + t_k, (t_end - t_start) / search_loops, + faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); + } + } + printf("======================================================================================\n"); + + printf("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete[] I; + delete[] D; } - printf("============================================================================================\n"); + } else { + std::shared_ptr gpu_index_ivf_ptr = std::shared_ptr(index); - printf("[%.3f s] Search test done\n\n", elapsed() - t0); + for (auto nprobe : nprobes) { + faiss::gpu::GpuIndexIVFSQHybrid* gpu_index_ivf_hybrid = + dynamic_cast(gpu_index_ivf_ptr.get()); + gpu_index_ivf_hybrid->setNumProbes(nprobe); - delete[] I; - delete[] D; + int64_t* I = new faiss::Index::idx_t[NQ * K]; + float* D = new float[NQ * K]; + + printf("\n%s | %s-GPU | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(), nprobe); + printf("======================================================================================\n"); + for (size_t t_nq = 10; t_nq <= NQ; t_nq *= 10) { // nq = {10, 100, 1000} + for (size_t t_k = 100; t_k <= K; t_k *= 10) { // k = {100, 1000} + faiss::indexIVF_stats.quantization_time = 0.0; + faiss::indexIVF_stats.search_time = 0.0; + + double t_start = elapsed(), t_end; + for (int32_t i = 0; i < search_loops; i++) { + gpu_index_ivf_ptr->search(nq, xq, k, D, I); + } + t_end = elapsed(); + + // k = 100 for ground truth + int32_t hit = GetResultHitCount(gt, I, k, t_k, t_nq, index_add_loops); + + printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, + t_k, (t_end - t_start) / search_loops, + faiss::indexIVF_stats.quantization_time / 1000 / search_loops, + faiss::indexIVF_stats.search_time / 1000 / search_loops, + (hit / float(t_nq * k / index_add_loops))); + } + } + printf("======================================================================================\n"); + + printf("[%.3f s] Search test done\n\n", elapsed() - t0); + + delete[] I; + delete[] D; + } } delete[] xq; @@ -530,17 +581,24 @@ test_ivfsq8h_gpu(const std::string& ann_test_name, int32_t index_add_loops, cons *************************************************************************************/ TEST(FAISSTEST, BENCHMARK) { - test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", 2, {8, 128}); - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", 2, {8, 128}); + std::vector param_nprobes = {8, 128}; + const int32_t SEARCH_LOOPS = 5; + const int32_t SIFT_INSERT_LOOPS = 2; // insert twice to get ~1G data set + const int32_t GLOVE_INSERT_LOOPS = 1; + + test_ann_hdf5("sift-128-euclidean", "IVF4096,Flat", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #ifdef CUSTOMIZATION - test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", 2, {8, 128}); - test_ivfsq8h_gpu("sift-128-euclidean", 2, {8, 128}); + test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); + test_ivfsq8h("sift-128-euclidean", SIFT_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); #endif - test_ann_hdf5("glove-200-angular", "IVF4096,Flat", 1, {8, 128}); - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF4096,Flat", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); #ifdef CUSTOMIZATION - test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", 1, {8, 128}); - test_ivfsq8h_gpu("glove-200-angular", 1, {8, 128}); + test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS); + test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, false, SEARCH_LOOPS); + test_ivfsq8h("glove-200-angular", GLOVE_INSERT_LOOPS, param_nprobes, true, SEARCH_LOOPS); #endif } From 97ae8a780ca0009e7f1749a339f50cb954cc4fa2 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 19:55:43 +0800 Subject: [PATCH 10/11] #89 add README.txt Former-commit-id: a84501ce6c2c94275819ace9e7d7a4afc14fbeca --- core/src/index/unittest/faiss_benchmark/README.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 core/src/index/unittest/faiss_benchmark/README.txt diff --git a/core/src/index/unittest/faiss_benchmark/README.txt b/core/src/index/unittest/faiss_benchmark/README.txt new file mode 100644 index 0000000000..81114d8381 --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/README.txt @@ -0,0 +1,13 @@ +To run this FAISS benchmark, please follow these steps: + +1. Download the HDF5 from: + https://support.hdfgroup.org/ftp/HDF5/releases/ + and install to /usr/local/hdf5 + +2. Download HDF5 data files from: + https://github.com/erikbern/ann-benchmarks + +3. Put HDF5 data files into the same directory with test binary + +4. Run the test binary + From b439da8a36dcfa2f63867cb70786859fe992dcf3 Mon Sep 17 00:00:00 2001 From: "yudong.cai" Date: Mon, 28 Oct 2019 20:17:25 +0800 Subject: [PATCH 11/11] #89 update README.md Former-commit-id: 0b85b430c0d4a69e0470e916a76adda16f96c12b --- .../index/unittest/faiss_benchmark/README.md | 25 +++++++++++++++++++ .../index/unittest/faiss_benchmark/README.txt | 13 ---------- 2 files changed, 25 insertions(+), 13 deletions(-) create mode 100644 core/src/index/unittest/faiss_benchmark/README.md delete mode 100644 core/src/index/unittest/faiss_benchmark/README.txt diff --git a/core/src/index/unittest/faiss_benchmark/README.md b/core/src/index/unittest/faiss_benchmark/README.md new file mode 100644 index 0000000000..c451ac13b0 --- /dev/null +++ b/core/src/index/unittest/faiss_benchmark/README.md @@ -0,0 +1,25 @@ +### To run this FAISS benchmark, please follow these steps: + +#### Step 1: +Download the HDF5 source from: + https://support.hdfgroup.org/ftp/HDF5/releases/ +and build/install to "/usr/local/hdf5". + +#### Step 2: +Download HDF5 data files from: + https://github.com/erikbern/ann-benchmarks + +#### Step 3: +Update 'milvus/core/src/index/unittest/CMakeLists.txt', +uncomment "#add_subdirectory(faiss_benchmark)". + +#### Step 4: +Build Milvus with unittest enabled: "./build.sh -t Release -u", +binary 'test_faiss_benchmark' will be generated. + +#### Step 5: +Put HDF5 data files into the same directory with binary 'test_faiss_benchmark'. + +#### Step 6: +Run test binary 'test_faiss_benchmark'. + diff --git a/core/src/index/unittest/faiss_benchmark/README.txt b/core/src/index/unittest/faiss_benchmark/README.txt deleted file mode 100644 index 81114d8381..0000000000 --- a/core/src/index/unittest/faiss_benchmark/README.txt +++ /dev/null @@ -1,13 +0,0 @@ -To run this FAISS benchmark, please follow these steps: - -1. Download the HDF5 from: - https://support.hdfgroup.org/ftp/HDF5/releases/ - and install to /usr/local/hdf5 - -2. Download HDF5 data files from: - https://github.com/erikbern/ann-benchmarks - -3. Put HDF5 data files into the same directory with test binary - -4. Run the test binary -