diff --git a/CHANGELOG.md b/CHANGELOG.md index 62c82df007..a8b243546e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,12 +17,14 @@ Please mark all change in change log and use the ticket from JIRA. - \#399 - Create partition should be failed if partition tag existed - \#412 - Message returned is confused when partition created with null partition name - \#416 - Drop the same partition success repeatally +- \#440 - Query API in customization still uses old version ## Feature - \#12 - Pure CPU version for Milvus - \#77 - Support table partition - \#127 - Support new Index type IVFPQ - \#226 - Experimental shards middleware for Milvus +- \#227 - Support new index types SPTAG-KDT and SPTAG-BKT - \#346 - Support build index with multiple gpu ## Improvement @@ -38,6 +40,7 @@ Please mark all change in change log and use the ticket from JIRA. - \#358 - Add more information in build.sh and install.md - \#404 - Add virtual method Init() in Pass abstract class - \#409 - Add a Fallback pass in optimizer +- \#433 - C++ SDK query result is not easy to use ## Task diff --git a/ci/jenkins/Jenkinsfile b/ci/jenkins/Jenkinsfile index bf7a70d11a..8d3953b112 100644 --- a/ci/jenkins/Jenkinsfile +++ b/ci/jenkins/Jenkinsfile @@ -50,7 +50,7 @@ pipeline { } stages { - stage("Run GPU Version Build") { + stage("Run Build") { agent { kubernetes { label "${BINRARY_VERSION}-build" @@ -60,7 +60,7 @@ pipeline { } stages { - stage('GPU Version Build') { + stage('Build') { steps { container('milvus-build-env') { script { @@ -69,7 +69,7 @@ pipeline { } } } - stage('GPU Version Code Coverage') { + stage('Code Coverage') { steps { container('milvus-build-env') { script { @@ -78,7 +78,7 @@ pipeline { } } } - stage('Upload GPU Version Package') { + stage('Upload Package') { steps { container('milvus-build-env') { script { @@ -90,7 +90,7 @@ pipeline { } } - stage("Publish GPU Version docker images") { + stage("Publish docker images") { agent { kubernetes { label "${BINRARY_VERSION}-publish" @@ -100,7 +100,7 @@ pipeline { } stages { - stage('Publish GPU Version') { + stage('Publish') { steps { container('publish-images'){ script { @@ -112,7 +112,7 @@ pipeline { } } - stage("Deploy GPU Version to Development") { + stage("Deploy to Development") { agent { kubernetes { label "${BINRARY_VERSION}-dev-test" @@ -122,7 +122,7 @@ pipeline { } stages { - stage("Deploy GPU Version to Dev") { + stage("Deploy to Dev") { steps { container('milvus-test-env') { script { @@ -132,7 +132,7 @@ pipeline { } } - stage("GPU Version Dev Test") { + stage("Dev Test") { steps { container('milvus-test-env') { script { @@ -147,7 +147,7 @@ pipeline { } } - stage ("Cleanup GPU Version Dev") { + stage ("Cleanup Dev") { steps { container('milvus-test-env') { script { @@ -180,7 +180,7 @@ pipeline { } stages { - stage("Run CPU Version Build") { + stage("Run Build") { agent { kubernetes { label "${BINRARY_VERSION}-build" @@ -190,7 +190,7 @@ pipeline { } stages { - stage('Build CPU Version') { + stage('Build') { steps { container('milvus-build-env') { script { @@ -199,7 +199,7 @@ pipeline { } } } - stage('CPU Version Code Coverage') { + stage('Code Coverage') { steps { container('milvus-build-env') { script { @@ -208,7 +208,7 @@ pipeline { } } } - stage('Upload CPU Version Package') { + stage('Upload Package') { steps { container('milvus-build-env') { script { @@ -220,7 +220,7 @@ pipeline { } } - stage("Publish CPU Version docker images") { + stage("Publish docker images") { agent { kubernetes { label "${BINRARY_VERSION}-publish" @@ -230,7 +230,7 @@ pipeline { } stages { - stage('Publish CPU Version') { + stage('Publish') { steps { container('publish-images'){ script { @@ -242,7 +242,7 @@ pipeline { } } - stage("Deploy CPU Version to Development") { + stage("Deploy to Development") { agent { kubernetes { label "${BINRARY_VERSION}-dev-test" @@ -252,7 +252,7 @@ pipeline { } stages { - stage("Deploy CPU Version to Dev") { + stage("Deploy to Dev") { steps { container('milvus-test-env') { script { @@ -262,7 +262,7 @@ pipeline { } } - stage("CPU Version Dev Test") { + stage("Dev Test") { steps { container('milvus-test-env') { script { @@ -277,7 +277,7 @@ pipeline { } } - stage ("Cleanup CPU Version Dev") { + stage ("Cleanup Dev") { steps { container('milvus-test-env') { script { diff --git a/ci/scripts/coverage.sh b/ci/scripts/coverage.sh index dcd460eaeb..cece6f0dcc 100755 --- a/ci/scripts/coverage.sh +++ b/ci/scripts/coverage.sh @@ -116,7 +116,7 @@ for test in `ls ${DIR_UNITTEST}`; do if [ $? -ne 0 ]; then echo ${args} echo ${DIR_UNITTEST}/${test} "run failed" - exit -1 + exit 1 fi done @@ -143,7 +143,7 @@ ${LCOV_CMD} -r "${FILE_INFO_OUTPUT}" -o "${FILE_INFO_OUTPUT_NEW}" \ if [ $? -ne 0 ]; then echo "gen ${FILE_INFO_OUTPUT_NEW} failed" - exit -2 + exit 2 fi # gen html report diff --git a/core/migration/README.md b/core/scripts/migration/README.md similarity index 100% rename from core/migration/README.md rename to core/scripts/migration/README.md diff --git a/core/migration/mysql_4_to_6.sql b/core/scripts/migration/mysql_4_to_6.sql similarity index 100% rename from core/migration/mysql_4_to_6.sql rename to core/scripts/migration/mysql_4_to_6.sql diff --git a/core/scripts/migration/mysql_6_to_4.sql b/core/scripts/migration/mysql_6_to_4.sql new file mode 100644 index 0000000000..96c60e0280 --- /dev/null +++ b/core/scripts/migration/mysql_6_to_4.sql @@ -0,0 +1,3 @@ +alter table Tables drop column owner_table; +alter table Tables drop column partition_tag; +alter table Tables drop column version; diff --git a/core/migration/sqlite_4_to_6.sql b/core/scripts/migration/sqlite_4_to_6.sql similarity index 100% rename from core/migration/sqlite_4_to_6.sql rename to core/scripts/migration/sqlite_4_to_6.sql diff --git a/core/scripts/migration/sqlite_6_to_4.sql b/core/scripts/migration/sqlite_6_to_4.sql new file mode 100644 index 0000000000..686d276f46 --- /dev/null +++ b/core/scripts/migration/sqlite_6_to_4.sql @@ -0,0 +1,7 @@ +CREATE TABLE 'TempTables' ( 'id' INTEGER PRIMARY KEY NOT NULL , 'table_id' TEXT UNIQUE NOT NULL , 'state' INTEGER NOT NULL , 'dimension' INTEGER NOT NULL , 'created_on' INTEGER NOT NULL , 'flag' INTEGER DEFAULT 0 NOT NULL , 'index_file_size' INTEGER NOT NULL , 'engine_type' INTEGER NOT NULL , 'nlist' INTEGER NOT NULL , 'metric_type' INTEGER NOT NULL); + +INSERT INTO TempTables SELECT id, table_id, state, dimension, created_on, flag, index_file_size, engine_type, nlist, metric_type FROM Tables; + +DROP TABLE Tables; + +ALTER TABLE TempTables RENAME TO Tables; diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 7b0103f52e..dd230ce0d1 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -84,12 +84,12 @@ DBImpl::Start() { return Status::OK(); } - ENGINE_LOG_TRACE << "DB service start"; + // ENGINE_LOG_TRACE << "DB service start"; shutting_down_.store(false, std::memory_order_release); // for distribute version, some nodes are read only if (options_.mode_ != DBOptions::MODE::CLUSTER_READONLY) { - ENGINE_LOG_TRACE << "StartTimerTasks"; + // ENGINE_LOG_TRACE << "StartTimerTasks"; bg_timer_thread_ = std::thread(&DBImpl::BackgroundTimerTask, this); } @@ -114,7 +114,7 @@ DBImpl::Stop() { meta_ptr_->CleanUp(); } - ENGINE_LOG_TRACE << "DB service stop"; + // ENGINE_LOG_TRACE << "DB service stop"; return Status::OK(); } @@ -558,7 +558,7 @@ DBImpl::StartMetricTask() { return; } - ENGINE_LOG_TRACE << "Start metric task"; + // ENGINE_LOG_TRACE << "Start metric task"; server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL); int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage(); @@ -584,7 +584,7 @@ DBImpl::StartMetricTask() { server::Metrics::GetInstance().GPUTemperature(); server::Metrics::GetInstance().CPUTemperature(); - ENGINE_LOG_TRACE << "Metric task finished"; + // ENGINE_LOG_TRACE << "Metric task finished"; } Status @@ -756,7 +756,7 @@ DBImpl::BackgroundMergeFiles(const std::string& table_id) { void DBImpl::BackgroundCompaction(std::set table_ids) { - ENGINE_LOG_TRACE << "Background compaction thread start"; + // ENGINE_LOG_TRACE << " Background compaction thread start"; Status status; for (auto& table_id : table_ids) { @@ -779,7 +779,7 @@ DBImpl::BackgroundCompaction(std::set table_ids) { } meta_ptr_->CleanUpFilesWithTTL(ttl); - ENGINE_LOG_TRACE << "Background compaction thread exit"; + // ENGINE_LOG_TRACE << " Background compaction thread exit"; } void @@ -812,7 +812,7 @@ DBImpl::StartBuildIndexTask(bool force) { void DBImpl::BackgroundBuildIndex() { - ENGINE_LOG_TRACE << "Background build index thread start"; + // ENGINE_LOG_TRACE << "Background build index thread start"; std::unique_lock lock(build_index_mutex_); meta::TableFilesSchema to_index_files; @@ -835,7 +835,7 @@ DBImpl::BackgroundBuildIndex() { } } - ENGINE_LOG_TRACE << "Background build index thread exit"; + // ENGINE_LOG_TRACE << "Background build index thread exit"; } Status diff --git a/core/src/db/engine/ExecutionEngine.h b/core/src/db/engine/ExecutionEngine.h index 86a014cf66..c8784e8a90 100644 --- a/core/src/db/engine/ExecutionEngine.h +++ b/core/src/db/engine/ExecutionEngine.h @@ -35,7 +35,9 @@ enum class EngineType { NSG_MIX, FAISS_IVFSQ8H, FAISS_PQ, - MAX_VALUE = FAISS_PQ, + SPTAG_KDT, + SPTAG_BKT, + MAX_VALUE = SPTAG_BKT, }; enum class MetricType { diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index 973d8b5912..ca307b90fc 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -124,6 +124,14 @@ ExecutionEngineImpl::CreatetVecIndex(EngineType type) { #endif break; } + case EngineType::SPTAG_KDT: { + index = GetVecIndexFactory(IndexType::SPTAG_KDT_RNT_CPU); + break; + } + case EngineType::SPTAG_BKT: { + index = GetVecIndexFactory(IndexType::SPTAG_BKT_RNT_CPU); + break; + } default: { ENGINE_LOG_ERROR << "Unsupported index type"; return nullptr; diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index c046bc3a56..1a22a9d2be 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -733,7 +733,16 @@ macro(build_faiss) if (USE_JFROG_CACHE STREQUAL "ON") string(MD5 FAISS_COMBINE_MD5 "${FAISS_MD5}${LAPACK_MD5}${OPENBLAS_MD5}") - set(FAISS_CACHE_PACKAGE_NAME "faiss_${FAISS_COMBINE_MD5}.tar.gz") + if (KNOWHERE_GPU_VERSION) + set(FAISS_COMPUTE_TYPE "gpu") + else () + set(FAISS_COMPUTE_TYPE "cpu") + endif() + if (FAISS_WITH_MKL) + set(FAISS_CACHE_PACKAGE_NAME "faiss_${FAISS_COMPUTE_TYPE}_mkl_${FAISS_COMBINE_MD5}.tar.gz") + else () + set(FAISS_CACHE_PACKAGE_NAME "faiss_${FAISS_COMPUTE_TYPE}_openblas_${FAISS_COMBINE_MD5}.tar.gz") + endif() set(FAISS_CACHE_URL "${JFROG_ARTFACTORY_CACHE_URL}/${FAISS_CACHE_PACKAGE_NAME}") set(FAISS_CACHE_PACKAGE_PATH "${THIRDPARTY_PACKAGE_CACHE}/${FAISS_CACHE_PACKAGE_NAME}") diff --git a/core/src/index/knowhere/CMakeLists.txt b/core/src/index/knowhere/CMakeLists.txt index 5f8e4d6970..285461bdef 100644 --- a/core/src/index/knowhere/CMakeLists.txt +++ b/core/src/index/knowhere/CMakeLists.txt @@ -30,10 +30,10 @@ set(external_srcs set(index_srcs knowhere/index/preprocessor/Normalize.cpp - knowhere/index/vector_index/IndexKDT.cpp + knowhere/index/vector_index/IndexSPTAG.cpp knowhere/index/vector_index/IndexIDMAP.cpp knowhere/index/vector_index/IndexIVF.cpp - knowhere/index/vector_index/helpers/KDTParameterMgr.cpp + knowhere/index/vector_index/helpers/SPTAGParameterMgr.cpp knowhere/index/vector_index/IndexNSG.cpp knowhere/index/vector_index/nsg/NSG.cpp knowhere/index/vector_index/nsg/NSGIO.cpp diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.cpp deleted file mode 100644 index 1bd45075e3..0000000000 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include -#include -#include - -#undef mkdir - -#include "knowhere/index/vector_index/IndexKDT.h" -#include "knowhere/index/vector_index/helpers/Definitions.h" -//#include "knowhere/index/preprocessor/normalize.h" -#include "knowhere/adapter/SptagAdapter.h" -#include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h" - -namespace knowhere { - -BinarySet -CPUKDTRNG::Serialize() { - std::vector index_blobs; - std::vector index_len; - - // TODO(zirui): dev - // index_ptr_->SaveIndexToMemory(index_blobs, index_len); - BinarySet binary_set; - - // - // auto sample = std::make_shared(); - // sample.reset(static_cast(index_blobs[0])); - // auto tree = std::make_shared(); - // tree.reset(static_cast(index_blobs[1])); - // auto graph = std::make_shared(); - // graph.reset(static_cast(index_blobs[2])); - // auto metadata = std::make_shared(); - // metadata.reset(static_cast(index_blobs[3])); - // - // binary_set.Append("samples", sample, index_len[0]); - // binary_set.Append("tree", tree, index_len[1]); - // binary_set.Append("graph", graph, index_len[2]); - // binary_set.Append("metadata", metadata, index_len[3]); - return binary_set; -} - -void -CPUKDTRNG::Load(const BinarySet& binary_set) { - // TODO(zirui): dev - - // std::vector index_blobs; - // - // auto samples = binary_set.GetByName("samples"); - // index_blobs.push_back(samples->data.get()); - // - // auto tree = binary_set.GetByName("tree"); - // index_blobs.push_back(tree->data.get()); - // - // auto graph = binary_set.GetByName("graph"); - // index_blobs.push_back(graph->data.get()); - // - // auto metadata = binary_set.GetByName("metadata"); - // index_blobs.push_back(metadata->data.get()); - // - // index_ptr_->LoadIndexFromMemory(index_blobs); -} - -// PreprocessorPtr -// CPUKDTRNG::BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { -// return std::make_shared(); -//} - -IndexModelPtr -CPUKDTRNG::Train(const DatasetPtr& origin, const Config& train_config) { - SetParameters(train_config); - DatasetPtr dataset = origin->Clone(); - - // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine - // && preprocessor_) { - // preprocessor_->Preprocess(dataset); - //} - - auto vectorset = ConvertToVectorSet(dataset); - auto metaset = ConvertToMetadataSet(dataset); - index_ptr_->BuildIndex(vectorset, metaset); - - // TODO: return IndexModelPtr - return nullptr; -} - -void -CPUKDTRNG::Add(const DatasetPtr& origin, const Config& add_config) { - SetParameters(add_config); - DatasetPtr dataset = origin->Clone(); - - // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine - // && preprocessor_) { - // preprocessor_->Preprocess(dataset); - //} - - auto vectorset = ConvertToVectorSet(dataset); - auto metaset = ConvertToMetadataSet(dataset); - index_ptr_->AddIndex(vectorset, metaset); -} - -void -CPUKDTRNG::SetParameters(const Config& config) { - for (auto& para : KDTParameterMgr::GetInstance().GetKDTParameters()) { - // auto value = config.get_with_default(para.first, para.second); - index_ptr_->SetParameter(para.first, para.second); - } -} - -DatasetPtr -CPUKDTRNG::Search(const DatasetPtr& dataset, const Config& config) { - SetParameters(config); - auto tensor = dataset->tensor()[0]; - auto p = (float*)tensor->raw_mutable_data(); - for (auto i = 0; i < 10; ++i) { - for (auto j = 0; j < 10; ++j) { - std::cout << p[i * 10 + j] << " "; - } - std::cout << std::endl; - } - std::vector query_results = ConvertToQueryResult(dataset, config); - -#pragma omp parallel for - for (auto i = 0; i < query_results.size(); ++i) { - auto target = (float*)query_results[i].GetTarget(); - std::cout << target[0] << ", " << target[1] << ", " << target[2] << std::endl; - index_ptr_->SearchIndex(query_results[i]); - } - - return ConvertToDataset(query_results); -} - -int64_t -CPUKDTRNG::Count() { - index_ptr_->GetNumSamples(); -} - -int64_t -CPUKDTRNG::Dimension() { - index_ptr_->GetFeatureDim(); -} - -VectorIndexPtr -CPUKDTRNG::Clone() { - KNOWHERE_THROW_MSG("not support"); -} - -void -CPUKDTRNG::Seal() { - // do nothing -} - -// TODO(linxj): -BinarySet -CPUKDTRNGIndexModel::Serialize() { -} - -void -CPUKDTRNGIndexModel::Load(const BinarySet& binary) { -} - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp new file mode 100644 index 0000000000..17a93fdcc7 --- /dev/null +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp @@ -0,0 +1,348 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include +#include +#include + +#undef mkdir + +#include "knowhere/adapter/SptagAdapter.h" +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexSPTAG.h" +#include "knowhere/index/vector_index/helpers/Definitions.h" +#include "knowhere/index/vector_index/helpers/SPTAGParameterMgr.h" + +namespace knowhere { + +CPUSPTAGRNG::CPUSPTAGRNG(const std::string& IndexType) { + if (IndexType == "KDT") { + index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT, SPTAG::VectorValueType::Float); + index_ptr_->SetParameter("DistCalcMethod", "L2"); + index_type_ = SPTAG::IndexAlgoType::KDT; + } else { + index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::BKT, SPTAG::VectorValueType::Float); + index_ptr_->SetParameter("DistCalcMethod", "L2"); + index_type_ = SPTAG::IndexAlgoType::BKT; + } +} + +BinarySet +CPUSPTAGRNG::Serialize() { + std::string index_config; + std::vector index_blobs; + + std::shared_ptr> buffersize = index_ptr_->CalculateBufferSize(); + std::vector res(buffersize->size() + 1); + for (uint64_t i = 1; i < res.size(); i++) { + res[i] = new char[buffersize->at(i - 1)]; + auto ptr = &res[i][0]; + index_blobs.emplace_back(SPTAG::ByteArray((std::uint8_t*)ptr, buffersize->at(i - 1), false)); + } + + index_ptr_->SaveIndex(index_config, index_blobs); + + size_t length = index_config.length(); + char* cstr = new char[length]; + snprintf(cstr, length, "%s", index_config.c_str()); + + BinarySet binary_set; + auto sample = std::make_shared(); + sample.reset(static_cast(index_blobs[0].Data())); + auto tree = std::make_shared(); + tree.reset(static_cast(index_blobs[1].Data())); + auto graph = std::make_shared(); + graph.reset(static_cast(index_blobs[2].Data())); + auto deleteid = std::make_shared(); + deleteid.reset(static_cast(index_blobs[3].Data())); + auto metadata1 = std::make_shared(); + metadata1.reset(static_cast(index_blobs[4].Data())); + auto metadata2 = std::make_shared(); + metadata2.reset(static_cast(index_blobs[5].Data())); + auto config = std::make_shared(); + config.reset(static_cast((void*)cstr)); + + binary_set.Append("samples", sample, index_blobs[0].Length()); + binary_set.Append("tree", tree, index_blobs[1].Length()); + binary_set.Append("deleteid", deleteid, index_blobs[3].Length()); + binary_set.Append("metadata1", metadata1, index_blobs[4].Length()); + binary_set.Append("metadata2", metadata2, index_blobs[5].Length()); + binary_set.Append("config", config, length); + binary_set.Append("graph", graph, index_blobs[2].Length()); + + // MemoryIOWriter writer; + // size_t len = 0; + // for (int i = 0; i < 6; ++i) { + // len = index_blobs[i].Length(); + // assert(len != 0); + // writer(&len, sizeof(size_t), 1); + // writer(index_blobs[i].Data(), len, 1); + // len = 0; + // } + // writer(&length, sizeof(size_t), 1); + // writer(cstr, length, 1); + // auto data = std::make_shared(); + // data.reset(writer.data_); + // BinarySet binary_set; + // binary_set.Append("sptag", data, writer.total); + + // MemoryIOWriter writer; + // size_t len = 0; + // for (int i = 0; i < 6; ++i) { + // if (i == 2) continue; + // len = index_blobs[i].Length(); + // assert(len != 0); + // writer(&len, sizeof(size_t), 1); + // writer(index_blobs[i].Data(), len, 1); + // len = 0; + // } + // writer(&length, sizeof(size_t), 1); + // writer(cstr, length, 1); + // auto data = std::make_shared(); + // data.reset(writer.data_); + // BinarySet binary_set; + // binary_set.Append("sptag", data, writer.total); + // auto graph = std::make_shared(); + // graph.reset(static_cast(index_blobs[2].Data())); + // binary_set.Append("graph", graph, index_blobs[2].Length()); + + return binary_set; +} + +void +CPUSPTAGRNG::Load(const BinarySet& binary_set) { + std::string index_config; + std::vector index_blobs; + + auto samples = binary_set.GetByName("samples"); + index_blobs.push_back(SPTAG::ByteArray(samples->data.get(), samples->size, false)); + + auto tree = binary_set.GetByName("tree"); + index_blobs.push_back(SPTAG::ByteArray(tree->data.get(), tree->size, false)); + + auto graph = binary_set.GetByName("graph"); + index_blobs.push_back(SPTAG::ByteArray(graph->data.get(), graph->size, false)); + + auto deleteid = binary_set.GetByName("deleteid"); + index_blobs.push_back(SPTAG::ByteArray(deleteid->data.get(), deleteid->size, false)); + + auto metadata1 = binary_set.GetByName("metadata1"); + index_blobs.push_back(SPTAG::ByteArray(metadata1->data.get(), metadata1->size, false)); + + auto metadata2 = binary_set.GetByName("metadata2"); + index_blobs.push_back(SPTAG::ByteArray(metadata2->data.get(), metadata2->size, false)); + + auto config = binary_set.GetByName("config"); + index_config = reinterpret_cast(config->data.get()); + + // std::vector index_blobs; + // auto data = binary_set.GetByName("sptag"); + // MemoryIOReader reader; + // reader.total = data->size; + // reader.data_ = data->data.get(); + // size_t len = 0; + // for (int i = 0; i < 6; ++i) { + // reader(&len, sizeof(size_t), 1); + // assert(len != 0); + // auto binary = new uint8_t[len]; + // reader(binary, len, 1); + // index_blobs.emplace_back(SPTAG::ByteArray(binary, len, true)); + // len = 0; + // } + // reader(&len, sizeof(size_t), 1); + // assert(len != 0); + // auto config = new char[len]; + // reader(config, len, 1); + // std::string index_config = config; + // delete[] config; + + // std::vector index_blobs; + // auto data = binary_set.GetByName("sptag"); + // MemoryIOReader reader; + // reader.total = data->size; + // reader.data_ = data->data.get(); + // size_t len = 0; + // for (int i = 0; i < 6; ++i) { + // if (i == 2) { + // auto graph = binary_set.GetByName("graph"); + // index_blobs.emplace_back(SPTAG::ByteArray(graph->data.get(), graph->size, false)); + // continue; + // } + // reader(&len, sizeof(size_t), 1); + // assert(len != 0); + // auto binary = new uint8_t[len]; + // reader(binary, len, 1); + // index_blobs.emplace_back(SPTAG::ByteArray(binary, len, true)); + // len = 0; + // } + // reader(&len, sizeof(size_t), 1); + // assert(len != 0); + // auto config = new char[len]; + // reader(config, len, 1); + // std::string index_config = config; + // delete[] config; + index_ptr_->LoadIndex(index_config, index_blobs); +} + +// PreprocessorPtr +// CPUKDTRNG::BuildPreprocessor(const DatasetPtr &dataset, const Config &config) { +// return std::make_shared(); +//} + +IndexModelPtr +CPUSPTAGRNG::Train(const DatasetPtr& origin, const Config& train_config) { + SetParameters(train_config); + DatasetPtr dataset = origin->Clone(); + + // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine + // && preprocessor_) { + // preprocessor_->Preprocess(dataset); + //} + + auto vectorset = ConvertToVectorSet(dataset); + auto metaset = ConvertToMetadataSet(dataset); + index_ptr_->BuildIndex(vectorset, metaset); + + // TODO: return IndexModelPtr + return nullptr; +} + +void +CPUSPTAGRNG::Add(const DatasetPtr& origin, const Config& add_config) { + // SetParameters(add_config); + // DatasetPtr dataset = origin->Clone(); + // + // // if (index_ptr_->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine + // // && preprocessor_) { + // // preprocessor_->Preprocess(dataset); + // //} + // + // auto vectorset = ConvertToVectorSet(dataset); + // auto metaset = ConvertToMetadataSet(dataset); + // index_ptr_->AddIndex(vectorset, metaset); +} + +void +CPUSPTAGRNG::SetParameters(const Config& config) { +#define Assign(param_name, str_name) \ + conf->param_name == INVALID_VALUE ? index_ptr_->SetParameter(str_name, std::to_string(build_cfg->param_name)) \ + : index_ptr_->SetParameter(str_name, std::to_string(conf->param_name)) + + if (index_type_ == SPTAG::IndexAlgoType::KDT) { + auto conf = std::dynamic_pointer_cast(config); + auto build_cfg = SPTAGParameterMgr::GetInstance().GetKDTParameters(); + + Assign(kdtnumber, "KDTNumber"); + Assign(numtopdimensionkdtsplit, "NumTopDimensionKDTSplit"); + Assign(samples, "Samples"); + Assign(tptnumber, "TPTNumber"); + Assign(tptleafsize, "TPTLeafSize"); + Assign(numtopdimensiontptsplit, "NumTopDimensionTPTSplit"); + Assign(neighborhoodsize, "NeighborhoodSize"); + Assign(graphneighborhoodscale, "GraphNeighborhoodScale"); + Assign(graphcefscale, "GraphCEFScale"); + Assign(refineiterations, "RefineIterations"); + Assign(cef, "CEF"); + Assign(maxcheckforrefinegraph, "MaxCheckForRefineGraph"); + Assign(numofthreads, "NumberOfThreads"); + Assign(maxcheck, "MaxCheck"); + Assign(thresholdofnumberofcontinuousnobetterpropagation, "ThresholdOfNumberOfContinuousNoBetterPropagation"); + Assign(numberofinitialdynamicpivots, "NumberOfInitialDynamicPivots"); + Assign(numberofotherdynamicpivots, "NumberOfOtherDynamicPivots"); + } else { + auto conf = std::dynamic_pointer_cast(config); + auto build_cfg = SPTAGParameterMgr::GetInstance().GetBKTParameters(); + + Assign(bktnumber, "BKTNumber"); + Assign(bktkmeansk, "BKTKMeansK"); + Assign(bktleafsize, "BKTLeafSize"); + Assign(samples, "Samples"); + Assign(tptnumber, "TPTNumber"); + Assign(tptleafsize, "TPTLeafSize"); + Assign(numtopdimensiontptsplit, "NumTopDimensionTPTSplit"); + Assign(neighborhoodsize, "NeighborhoodSize"); + Assign(graphneighborhoodscale, "GraphNeighborhoodScale"); + Assign(graphcefscale, "GraphCEFScale"); + Assign(refineiterations, "RefineIterations"); + Assign(cef, "CEF"); + Assign(maxcheckforrefinegraph, "MaxCheckForRefineGraph"); + Assign(numofthreads, "NumberOfThreads"); + Assign(maxcheck, "MaxCheck"); + Assign(thresholdofnumberofcontinuousnobetterpropagation, "ThresholdOfNumberOfContinuousNoBetterPropagation"); + Assign(numberofinitialdynamicpivots, "NumberOfInitialDynamicPivots"); + Assign(numberofotherdynamicpivots, "NumberOfOtherDynamicPivots"); + } +} + +DatasetPtr +CPUSPTAGRNG::Search(const DatasetPtr& dataset, const Config& config) { + SetParameters(config); + auto tensor = dataset->tensor()[0]; + auto p = (float*)tensor->raw_mutable_data(); + for (auto i = 0; i < 10; ++i) { + for (auto j = 0; j < 10; ++j) { + std::cout << p[i * 10 + j] << " "; + } + std::cout << std::endl; + } + std::vector query_results = ConvertToQueryResult(dataset, config); + +#pragma omp parallel for + for (auto i = 0; i < query_results.size(); ++i) { + auto target = (float*)query_results[i].GetTarget(); + std::cout << target[0] << ", " << target[1] << ", " << target[2] << std::endl; + index_ptr_->SearchIndex(query_results[i]); + } + + return ConvertToDataset(query_results); +} + +int64_t +CPUSPTAGRNG::Count() { + return index_ptr_->GetNumSamples(); +} + +int64_t +CPUSPTAGRNG::Dimension() { + return index_ptr_->GetFeatureDim(); +} + +VectorIndexPtr +CPUSPTAGRNG::Clone() { + KNOWHERE_THROW_MSG("not support"); +} + +void +CPUSPTAGRNG::Seal() { + return; // do nothing +} + +BinarySet +CPUSPTAGRNGIndexModel::Serialize() { + // KNOWHERE_THROW_MSG("not support"); // not support +} + +void +CPUSPTAGRNGIndexModel::Load(const BinarySet& binary) { + // KNOWHERE_THROW_MSG("not support"); // not support +} + +} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.h b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h similarity index 83% rename from core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.h rename to core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h index f6d436995b..01380ce943 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/IndexKDT.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/IndexSPTAG.h @@ -18,33 +18,37 @@ #pragma once #include + #include #include +#include + #include "VectorIndex.h" #include "knowhere/index/IndexModel.h" namespace knowhere { -class CPUKDTRNG : public VectorIndex { +class CPUSPTAGRNG : public VectorIndex { public: - CPUKDTRNG() { - index_ptr_ = SPTAG::VectorIndex::CreateInstance(SPTAG::IndexAlgoType::KDT, SPTAG::VectorValueType::Float); - index_ptr_->SetParameter("DistCalcMethod", "L2"); - } + explicit CPUSPTAGRNG(const std::string& IndexType); public: BinarySet Serialize() override; + VectorIndexPtr Clone() override; + void Load(const BinarySet& index_array) override; public: // PreprocessorPtr // BuildPreprocessor(const DatasetPtr &dataset, const Config &config) override; + int64_t Count() override; + int64_t Dimension() override; @@ -56,6 +60,7 @@ class CPUKDTRNG : public VectorIndex { DatasetPtr Search(const DatasetPtr& dataset, const Config& config) override; + void Seal() override; @@ -66,11 +71,12 @@ class CPUKDTRNG : public VectorIndex { private: PreprocessorPtr preprocessor_; std::shared_ptr index_ptr_; + SPTAG::IndexAlgoType index_type_; }; -using CPUKDTRNGPtr = std::shared_ptr; +using CPUSPTAGRNGPtr = std::shared_ptr; -class CPUKDTRNGIndexModel : public IndexModel { +class CPUSPTAGRNGIndexModel : public IndexModel { public: BinarySet Serialize() override; @@ -82,6 +88,6 @@ class CPUKDTRNGIndexModel : public IndexModel { std::shared_ptr index_; }; -using CPUKDTRNGIndexModelPtr = std::shared_ptr; +using CPUSPTAGRNGIndexModelPtr = std::shared_ptr; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h index b931790b04..e30088ecdf 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h @@ -42,6 +42,32 @@ constexpr int64_t DEFAULT_OUT_DEGREE = INVALID_VALUE; constexpr int64_t DEFAULT_CANDIDATE_SISE = INVALID_VALUE; constexpr int64_t DEFAULT_NNG_K = INVALID_VALUE; +// SPTAG Config +constexpr int64_t DEFAULT_SAMPLES = INVALID_VALUE; +constexpr int64_t DEFAULT_TPTNUMBER = INVALID_VALUE; +constexpr int64_t DEFAULT_TPTLEAFSIZE = INVALID_VALUE; +constexpr int64_t DEFAULT_NUMTOPDIMENSIONTPTSPLIT = INVALID_VALUE; +constexpr int64_t DEFAULT_NEIGHBORHOODSIZE = INVALID_VALUE; +constexpr int64_t DEFAULT_GRAPHNEIGHBORHOODSCALE = INVALID_VALUE; +constexpr int64_t DEFAULT_GRAPHCEFSCALE = INVALID_VALUE; +constexpr int64_t DEFAULT_REFINEITERATIONS = INVALID_VALUE; +constexpr int64_t DEFAULT_CEF = INVALID_VALUE; +constexpr int64_t DEFAULT_MAXCHECKFORREFINEGRAPH = INVALID_VALUE; +constexpr int64_t DEFAULT_NUMOFTHREADS = INVALID_VALUE; +constexpr int64_t DEFAULT_MAXCHECK = INVALID_VALUE; +constexpr int64_t DEFAULT_THRESHOLDOFNUMBEROFCONTINUOUSNOBETTERPROPAGATION = INVALID_VALUE; +constexpr int64_t DEFAULT_NUMBEROFINITIALDYNAMICPIVOTS = INVALID_VALUE; +constexpr int64_t DEFAULT_NUMBEROFOTHERDYNAMICPIVOTS = INVALID_VALUE; + +// KDT Config +constexpr int64_t DEFAULT_KDTNUMBER = INVALID_VALUE; +constexpr int64_t DEFAULT_NUMTOPDIMENSIONKDTSPLIT = INVALID_VALUE; + +// BKT Config +constexpr int64_t DEFAULT_BKTNUMBER = INVALID_VALUE; +constexpr int64_t DEFAULT_BKTKMEANSK = INVALID_VALUE; +constexpr int64_t DEFAULT_BKTLEAFSIZE = INVALID_VALUE; + struct IVFCfg : public Cfg { int64_t nlist = DEFAULT_NLIST; int64_t nprobe = DEFAULT_NPROBE; @@ -135,8 +161,57 @@ struct NSGCfg : public IVFCfg { }; using NSGConfig = std::shared_ptr; -struct KDTCfg : public Cfg { - int64_t tptnubmber = -1; +struct SPTAGCfg : public Cfg { + int64_t samples = DEFAULT_SAMPLES; + int64_t tptnumber = DEFAULT_TPTNUMBER; + int64_t tptleafsize = DEFAULT_TPTLEAFSIZE; + int64_t numtopdimensiontptsplit = DEFAULT_NUMTOPDIMENSIONTPTSPLIT; + int64_t neighborhoodsize = DEFAULT_NEIGHBORHOODSIZE; + int64_t graphneighborhoodscale = DEFAULT_GRAPHNEIGHBORHOODSCALE; + int64_t graphcefscale = DEFAULT_GRAPHCEFSCALE; + int64_t refineiterations = DEFAULT_REFINEITERATIONS; + int64_t cef = DEFAULT_CEF; + int64_t maxcheckforrefinegraph = DEFAULT_MAXCHECKFORREFINEGRAPH; + int64_t numofthreads = DEFAULT_NUMOFTHREADS; + int64_t maxcheck = DEFAULT_MAXCHECK; + int64_t thresholdofnumberofcontinuousnobetterpropagation = DEFAULT_THRESHOLDOFNUMBEROFCONTINUOUSNOBETTERPROPAGATION; + int64_t numberofinitialdynamicpivots = DEFAULT_NUMBEROFINITIALDYNAMICPIVOTS; + int64_t numberofotherdynamicpivots = DEFAULT_NUMBEROFOTHERDYNAMICPIVOTS; + + SPTAGCfg() = default; + + bool + CheckValid() override { + return true; + }; }; +using SPTAGConfig = std::shared_ptr; + +struct KDTCfg : public SPTAGCfg { + int64_t kdtnumber = DEFAULT_KDTNUMBER; + int64_t numtopdimensionkdtsplit = DEFAULT_NUMTOPDIMENSIONKDTSPLIT; + + KDTCfg() = default; + + bool + CheckValid() override { + return true; + }; +}; +using KDTConfig = std::shared_ptr; + +struct BKTCfg : public SPTAGCfg { + int64_t bktnumber = DEFAULT_BKTNUMBER; + int64_t bktkmeansk = DEFAULT_BKTKMEANSK; + int64_t bktleafsize = DEFAULT_BKTLEAFSIZE; + + BKTCfg() = default; + + bool + CheckValid() override { + return true; + }; +}; +using BKTConfig = std::shared_ptr; } // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp b/core/src/index/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp deleted file mode 100644 index 19bf070dba..0000000000 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "knowhere/index/vector_index/helpers/KDTParameterMgr.h" - -namespace knowhere { - -const std::vector& -KDTParameterMgr::GetKDTParameters() { - return kdt_parameters_; -} - -KDTParameterMgr::KDTParameterMgr() { - kdt_parameters_ = std::vector{ - {"KDTNumber", "1"}, - {"NumTopDimensionKDTSplit", "5"}, - {"NumSamplesKDTSplitConsideration", "100"}, - - {"TPTNumber", "1"}, - {"TPTLeafSize", "2000"}, - {"NumTopDimensionTPTSplit", "5"}, - - {"NeighborhoodSize", "32"}, - {"GraphNeighborhoodScale", "2"}, - {"GraphCEFScale", "2"}, - {"RefineIterations", "0"}, - {"CEF", "1000"}, - {"MaxCheckForRefineGraph", "10000"}, - - {"NumberOfThreads", "1"}, - - {"MaxCheck", "8192"}, - {"ThresholdOfNumberOfContinuousNoBetterPropagation", "3"}, - {"NumberOfInitialDynamicPivots", "50"}, - {"NumberOfOtherDynamicPivots", "4"}, - }; -} - -} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/SPTAGParameterMgr.cpp b/core/src/index/knowhere/knowhere/index/vector_index/helpers/SPTAGParameterMgr.cpp new file mode 100644 index 0000000000..836f204c77 --- /dev/null +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/SPTAGParameterMgr.cpp @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "knowhere/index/vector_index/helpers/SPTAGParameterMgr.h" + +namespace knowhere { + +const KDTConfig& +SPTAGParameterMgr::GetKDTParameters() { + return kdt_config_; +} + +const BKTConfig& +SPTAGParameterMgr::GetBKTParameters() { + return bkt_config_; +} + +SPTAGParameterMgr::SPTAGParameterMgr() { + kdt_config_ = std::make_shared(); + kdt_config_->kdtnumber = 1; + kdt_config_->numtopdimensionkdtsplit = 5; + kdt_config_->samples = 100; + kdt_config_->tptnumber = 1; + kdt_config_->tptleafsize = 2000; + kdt_config_->numtopdimensiontptsplit = 5; + kdt_config_->neighborhoodsize = 32; + kdt_config_->graphneighborhoodscale = 2; + kdt_config_->graphcefscale = 2; + kdt_config_->refineiterations = 0; + kdt_config_->cef = 1000; + kdt_config_->maxcheckforrefinegraph = 10000; + kdt_config_->numofthreads = 1; + kdt_config_->maxcheck = 8192; + kdt_config_->thresholdofnumberofcontinuousnobetterpropagation = 3; + kdt_config_->numberofinitialdynamicpivots = 50; + kdt_config_->numberofotherdynamicpivots = 4; + + bkt_config_ = std::make_shared(); + bkt_config_->bktnumber = 1; + bkt_config_->bktkmeansk = 32; + bkt_config_->bktleafsize = 8; + bkt_config_->samples = 100; + bkt_config_->tptnumber = 1; + bkt_config_->tptleafsize = 2000; + bkt_config_->numtopdimensiontptsplit = 5; + bkt_config_->neighborhoodsize = 32; + bkt_config_->graphneighborhoodscale = 2; + bkt_config_->graphcefscale = 2; + bkt_config_->refineiterations = 0; + bkt_config_->cef = 1000; + bkt_config_->maxcheckforrefinegraph = 10000; + bkt_config_->numofthreads = 1; + bkt_config_->maxcheck = 8192; + bkt_config_->thresholdofnumberofcontinuousnobetterpropagation = 3; + bkt_config_->numberofinitialdynamicpivots = 50; + bkt_config_->numberofotherdynamicpivots = 4; +} + +} // namespace knowhere diff --git a/core/src/index/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.h b/core/src/index/knowhere/knowhere/index/vector_index/helpers/SPTAGParameterMgr.h similarity index 66% rename from core/src/index/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.h rename to core/src/index/knowhere/knowhere/index/vector_index/helpers/SPTAGParameterMgr.h index fe90761e17..6a6f7c48d1 100644 --- a/core/src/index/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.h +++ b/core/src/index/knowhere/knowhere/index/vector_index/helpers/SPTAGParameterMgr.h @@ -22,31 +22,40 @@ #include #include +#include +#include "IndexParameter.h" + namespace knowhere { -using KDTParameter = std::pair; +using KDTConfig = std::shared_ptr; +using BKTConfig = std::shared_ptr; -class KDTParameterMgr { +class SPTAGParameterMgr { public: - const std::vector& + const KDTConfig& GetKDTParameters(); + const BKTConfig& + GetBKTParameters(); + public: - static KDTParameterMgr& + static SPTAGParameterMgr& GetInstance() { - static KDTParameterMgr instance; + static SPTAGParameterMgr instance; return instance; } - KDTParameterMgr(const KDTParameterMgr&) = delete; - KDTParameterMgr& - operator=(const KDTParameterMgr&) = delete; + SPTAGParameterMgr(const SPTAGParameterMgr&) = delete; + + SPTAGParameterMgr& + operator=(const SPTAGParameterMgr&) = delete; private: - KDTParameterMgr(); + SPTAGParameterMgr(); private: - std::vector kdt_parameters_; + KDTConfig kdt_config_; + BKTConfig bkt_config_; }; } // namespace knowhere diff --git a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h index d00ea45365..0208f6d983 100644 --- a/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h +++ b/core/src/index/thirdparty/SPTAG/AnnService/inc/Core/Common/Dataset.h @@ -195,7 +195,7 @@ namespace SPTAG C = *((DimensionType*)pDataPointsMemFile); pDataPointsMemFile += sizeof(DimensionType); - Initialize(R, C, (T*)pDataPointsMemFile); + Initialize(R, C, (T*)pDataPointsMemFile, false); std::cout << "Load " << name << " (" << R << ", " << C << ") Finish!" << std::endl; return true; } diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index d3ba84e557..93ae63a9ec 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -82,17 +82,17 @@ if (NOT TARGET test_idmap) endif () target_link_libraries(test_idmap ${depend_libs} ${unittest_libs} ${basic_libs}) -# -set(kdt_srcs +# +set(sptag_srcs ${INDEX_SOURCE_DIR}/knowhere/knowhere/adapter/SptagAdapter.cpp ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/preprocessor/Normalize.cpp - ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/KDTParameterMgr.cpp - ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/IndexKDT.cpp + ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/helpers/SPTAGParameterMgr.cpp + ${INDEX_SOURCE_DIR}/knowhere/knowhere/index/vector_index/IndexSPTAG.cpp ) -if (NOT TARGET test_kdt) - add_executable(test_kdt test_kdt.cpp ${kdt_srcs} ${util_srcs}) +if (NOT TARGET test_sptag) + add_executable(test_sptag test_sptag.cpp ${sptag_srcs} ${util_srcs}) endif () -target_link_libraries(test_kdt +target_link_libraries(test_sptag SPTAGLibStatic ${depend_libs} ${unittest_libs} ${basic_libs}) @@ -106,7 +106,7 @@ endif () install(TARGETS test_ivf DESTINATION unittest) install(TARGETS test_idmap DESTINATION unittest) -install(TARGETS test_kdt DESTINATION unittest) +install(TARGETS test_sptag DESTINATION unittest) if (KNOWHERE_GPU_VERSION) install(TARGETS test_gpuresource DESTINATION unittest) install(TARGETS test_customized_index DESTINATION unittest) diff --git a/core/src/index/unittest/test_kdt.cpp b/core/src/index/unittest/test_kdt.cpp deleted file mode 100644 index bbc7dcf94c..0000000000 --- a/core/src/index/unittest/test_kdt.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include -#include - -#include "knowhere/adapter/SptagAdapter.h" -#include "knowhere/adapter/Structure.h" -#include "knowhere/common/Exception.h" -#include "knowhere/index/vector_index/IndexKDT.h" -#include "knowhere/index/vector_index/helpers/Definitions.h" - -#include "unittest/utils.h" - -using ::testing::Combine; -using ::testing::TestWithParam; -using ::testing::Values; - -class KDTTest : public DataGen, public ::testing::Test { - protected: - void - SetUp() override { - Generate(96, 1000, 10); - index_ = std::make_shared(); - - auto tempconf = std::make_shared(); - tempconf->tptnubmber = 1; - tempconf->k = 10; - conf = tempconf; - - Init_with_default(); - } - - protected: - knowhere::Config conf; - std::shared_ptr index_ = nullptr; -}; - -// TODO(lxj): add test about count() and dimension() -TEST_F(KDTTest, kdt_basic) { - assert(!xb.empty()); - - auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); - index_->set_preprocessor(preprocessor); - - auto model = index_->Train(base_dataset, conf); - index_->set_index_model(model); - index_->Add(base_dataset, conf); - auto result = index_->Search(query_dataset, conf); - AssertAnns(result, nq, k); - - { - // auto ids = result->array()[0]; - // auto dists = result->array()[1]; - auto ids = result->ids(); - auto dists = result->dist(); - - std::stringstream ss_id; - std::stringstream ss_dist; - for (auto i = 0; i < nq; i++) { - for (auto j = 0; j < k; ++j) { - ss_id << *((int64_t*)(ids) + i * k + j) << " "; - ss_dist << *((float*)(dists) + i * k + j) << " "; - // ss_id << *ids->data()->GetValues(1, i * k + j) << " "; - // ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; - } - ss_id << std::endl; - ss_dist << std::endl; - } - std::cout << "id\n" << ss_id.str() << std::endl; - std::cout << "dist\n" << ss_dist.str() << std::endl; - } -} - -// TODO(zirui): enable test -// TEST_F(KDTTest, kdt_serialize) { -// assert(!xb.empty()); -// -// auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); -// index_->set_preprocessor(preprocessor); -// -// auto model = index_->Train(base_dataset, conf); -// // index_->Add(base_dataset, conf); -// auto binaryset = index_->Serialize(); -// auto new_index = std::make_shared(); -// new_index->Load(binaryset); -// auto result = new_index->Search(query_dataset, conf); -// AssertAnns(result, nq, k); -// PrintResult(result, nq, k); -// ASSERT_EQ(new_index->Count(), nb); -// ASSERT_EQ(new_index->Dimension(), dim); -// ASSERT_THROW({ new_index->Clone(); }, knowhere::KnowhereException); -// ASSERT_NO_THROW({ new_index->Seal(); }); -// -// { -// int fileno = 0; -// const std::string& base_name = "/tmp/kdt_serialize_test_bin_"; -// std::vector filename_list; -// std::vector> meta_list; -// for (auto& iter : binaryset.binary_map_) { -// const std::string& filename = base_name + std::to_string(fileno); -// FileIOWriter writer(filename); -// writer(iter.second->data.get(), iter.second->size); -// -// meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); -// filename_list.push_back(filename); -// ++fileno; -// } -// -// knowhere::BinarySet load_data_list; -// for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { -// auto bin_size = meta_list[i].second; -// FileIOReader reader(filename_list[i]); -// -// auto load_data = new uint8_t[bin_size]; -// reader(load_data, bin_size); -// auto data = std::make_shared(); -// data.reset(load_data); -// load_data_list.Append(meta_list[i].first, data, bin_size); -// } -// -// auto new_index = std::make_shared(); -// new_index->Load(load_data_list); -// auto result = new_index->Search(query_dataset, conf); -// AssertAnns(result, nq, k); -// PrintResult(result, nq, k); -// } -//} diff --git a/core/src/index/unittest/test_sptag.cpp b/core/src/index/unittest/test_sptag.cpp new file mode 100644 index 0000000000..d472dab0d7 --- /dev/null +++ b/core/src/index/unittest/test_sptag.cpp @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +#include "knowhere/adapter/SptagAdapter.h" +#include "knowhere/adapter/Structure.h" +#include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexSPTAG.h" +#include "knowhere/index/vector_index/helpers/Definitions.h" + +#include "unittest/utils.h" + +using ::testing::Combine; +using ::testing::TestWithParam; +using ::testing::Values; + +class SPTAGTest : public DataGen, public TestWithParam { + protected: + void + SetUp() override { + IndexType = GetParam(); + Generate(128, 100, 5); + index_ = std::make_shared(IndexType); + if (IndexType == "KDT") { + auto tempconf = std::make_shared(); + tempconf->tptnumber = 1; + tempconf->k = 10; + conf = tempconf; + } else { + auto tempconf = std::make_shared(); + tempconf->tptnumber = 1; + tempconf->k = 10; + conf = tempconf; + } + + Init_with_default(); + } + + protected: + knowhere::Config conf; + std::shared_ptr index_ = nullptr; + std::string IndexType; +}; + +INSTANTIATE_TEST_CASE_P(SPTAGParameters, SPTAGTest, Values("KDT", "BKT")); + +// TODO(lxj): add test about count() and dimension() +TEST_P(SPTAGTest, sptag_basic) { + assert(!xb.empty()); + + auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); + index_->set_preprocessor(preprocessor); + + auto model = index_->Train(base_dataset, conf); + index_->set_index_model(model); + index_->Add(base_dataset, conf); + auto result = index_->Search(query_dataset, conf); + AssertAnns(result, nq, k); + + { + // auto ids = result->array()[0]; + // auto dists = result->array()[1]; + auto ids = result->ids(); + auto dists = result->dist(); + + std::stringstream ss_id; + std::stringstream ss_dist; + for (auto i = 0; i < nq; i++) { + for (auto j = 0; j < k; ++j) { + // ss_id << *ids->data()->GetValues(1, i * k + j) << " "; + // ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; + ss_id << *((int64_t*)(ids) + i * k + j) << " "; + ss_dist << *((float*)(dists) + i * k + j) << " "; + } + ss_id << std::endl; + ss_dist << std::endl; + } + std::cout << "id\n" << ss_id.str() << std::endl; + std::cout << "dist\n" << ss_dist.str() << std::endl; + } +} + +TEST_P(SPTAGTest, sptag_serialize) { + assert(!xb.empty()); + + auto preprocessor = index_->BuildPreprocessor(base_dataset, conf); + index_->set_preprocessor(preprocessor); + + auto model = index_->Train(base_dataset, conf); + + index_->Add(base_dataset, conf); + auto binaryset = index_->Serialize(); + auto new_index = std::make_shared(IndexType); + new_index->Load(binaryset); + auto result = new_index->Search(query_dataset, conf); + AssertAnns(result, nq, k); + PrintResult(result, nq, k); + ASSERT_EQ(new_index->Count(), nb); + ASSERT_EQ(new_index->Dimension(), dim); + // ASSERT_THROW({ new_index->Clone(); }, knowhere::KnowhereException); + // ASSERT_NO_THROW({ new_index->Seal(); }); + + { + int fileno = 0; + const std::string& base_name = "/tmp/sptag_serialize_test_bin_"; + std::vector filename_list; + std::vector> meta_list; + for (auto& iter : binaryset.binary_map_) { + const std::string& filename = base_name + std::to_string(fileno); + FileIOWriter writer(filename); + writer(iter.second->data.get(), iter.second->size); + + meta_list.emplace_back(std::make_pair(iter.first, iter.second->size)); + filename_list.push_back(filename); + ++fileno; + } + + knowhere::BinarySet load_data_list; + for (int i = 0; i < filename_list.size() && i < meta_list.size(); ++i) { + auto bin_size = meta_list[i].second; + FileIOReader reader(filename_list[i]); + + auto load_data = new uint8_t[bin_size]; + reader(load_data, bin_size); + auto data = std::make_shared(); + data.reset(load_data); + load_data_list.Append(meta_list[i].first, data, bin_size); + } + + auto new_index = std::make_shared(IndexType); + new_index->Load(load_data_list); + auto result = new_index->Search(query_dataset, conf); + AssertAnns(result, nq, k); + PrintResult(result, nq, k); + } +} diff --git a/core/src/index/unittest/utils.cpp b/core/src/index/unittest/utils.cpp index 2556b60fad..11dad4a8b9 100644 --- a/core/src/index/unittest/utils.cpp +++ b/core/src/index/unittest/utils.cpp @@ -160,15 +160,17 @@ AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) { void PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) { - auto ids = result->array()[0]; - auto dists = result->array()[1]; + auto ids = result->ids(); + auto dists = result->dist(); std::stringstream ss_id; std::stringstream ss_dist; - for (auto i = 0; i < 10; i++) { + for (auto i = 0; i < nq; i++) { for (auto j = 0; j < k; ++j) { - ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; - ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; + // ss_id << *(ids->data()->GetValues(1, i * k + j)) << " "; + // ss_dist << *(dists->data()->GetValues(1, i * k + j)) << " "; + ss_id << *((int64_t*)(ids) + i * k + j) << " "; + ss_dist << *((float*)(dists) + i * k + j) << " "; } ss_id << std::endl; ss_dist << std::endl; diff --git a/core/src/scheduler/action/PushTaskToNeighbour.cpp b/core/src/scheduler/action/PushTaskToNeighbour.cpp index f49f1d871f..562041cee1 100644 --- a/core/src/scheduler/action/PushTaskToNeighbour.cpp +++ b/core/src/scheduler/action/PushTaskToNeighbour.cpp @@ -106,41 +106,6 @@ Action::SpecifiedResourceLabelTaskScheduler(const ResourceMgrPtr& res_mgr, Resou std::shared_ptr event) { auto task_item = event->task_table_item_; auto task = event->task_table_item_->task; - // if (resource->type() == ResourceType::DISK) { - // // step 1: calculate shortest path per resource, from disk to compute resource - // auto compute_resources = res_mgr->GetComputeResources(); - // std::vector> paths; - // std::vector transport_costs; - // for (auto& res : compute_resources) { - // std::vector path; - // uint64_t transport_cost = ShortestPath(resource, res, res_mgr, path); - // transport_costs.push_back(transport_cost); - // paths.emplace_back(path); - // } - // if (task->job_.lock()->type() == JobType::BUILD) { - // // step2: Read device id in config - // // get build index gpu resource - // server::Config& config = server::Config::GetInstance(); - // int32_t build_index_gpu; - // Status stat = config.GetResourceConfigIndexBuildDevice(build_index_gpu); - // - // bool find_gpu_res = false; - // if (res_mgr->GetResource(ResourceType::GPU, build_index_gpu) != nullptr) { - // for (uint64_t i = 0; i < compute_resources.size(); ++i) { - // if (compute_resources[i]->name() == - // res_mgr->GetResource(ResourceType::GPU, build_index_gpu)->name()) { - // find_gpu_res = true; - // Path task_path(paths[i], paths[i].size() - 1); - // task->path() = task_path; - // break; - // } - // } - // } - // if (not find_gpu_res) { - // task->path() = Path(paths[0], paths[0].size() - 1); - // } - // } - // } if (resource->name() == task->path().Last()) { resource->WakeupExecutor(); diff --git a/core/src/scheduler/task/BuildIndexTask.cpp b/core/src/scheduler/task/BuildIndexTask.cpp index d8602c141e..f561fa947d 100644 --- a/core/src/scheduler/task/BuildIndexTask.cpp +++ b/core/src/scheduler/task/BuildIndexTask.cpp @@ -146,8 +146,7 @@ XBuildIndexTask::Execute() { status = meta_ptr->UpdateTableFile(table_file); ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete"; - std::cout << "ERROR: failed to build index, index file is too large or gpu memory is not enough" - << std::endl; + ENGINE_LOG_ERROR << "Failed to build index, index file is too large or gpu memory is not enough"; build_index_job->BuildIndexDone(to_index_id_); build_index_job->GetStatus() = Status(DB_ERROR, msg); @@ -179,8 +178,8 @@ XBuildIndexTask::Execute() { status = meta_ptr->UpdateTableFile(table_file); ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete"; - std::cout << "ERROR: failed to persist index file: " << table_file.location_ - << ", possible out of disk space" << std::endl; + ENGINE_LOG_ERROR << "Failed to persist index file: " << table_file.location_ + << ", possible out of disk space"; build_index_job->BuildIndexDone(to_index_id_); build_index_job->GetStatus() = Status(DB_ERROR, msg); diff --git a/core/src/sdk/examples/partition/src/ClientTest.cpp b/core/src/sdk/examples/partition/src/ClientTest.cpp index 775e1f6d60..a12a7ff50e 100644 --- a/core/src/sdk/examples/partition/src/ClientTest.cpp +++ b/core/src/sdk/examples/partition/src/ClientTest.cpp @@ -148,6 +148,7 @@ ClientTest::Test(const std::string& address, const std::string& port) { } { // wait unit build index finish + milvus_sdk::TimeRecorder rc("Create index"); std::cout << "Wait until create all index done" << std::endl; milvus::IndexParam index1 = BuildIndexParam(); milvus_sdk::Utils::PrintIndexParam(index1); diff --git a/core/src/sdk/examples/simple/src/ClientTest.cpp b/core/src/sdk/examples/simple/src/ClientTest.cpp index dfa5e2219e..016c9eceac 100644 --- a/core/src/sdk/examples/simple/src/ClientTest.cpp +++ b/core/src/sdk/examples/simple/src/ClientTest.cpp @@ -150,6 +150,7 @@ ClientTest::Test(const std::string& address, const std::string& port) { } { // wait unit build index finish + milvus_sdk::TimeRecorder rc("Create index"); std::cout << "Wait until create all index done" << std::endl; milvus::IndexParam index1 = BuildIndexParam(); milvus_sdk::Utils::PrintIndexParam(index1); diff --git a/core/src/sdk/examples/utils/Utils.cpp b/core/src/sdk/examples/utils/Utils.cpp index da5e854e9b..fa373cd498 100644 --- a/core/src/sdk/examples/utils/Utils.cpp +++ b/core/src/sdk/examples/utils/Utils.cpp @@ -157,18 +157,20 @@ void Utils::PrintSearchResult(const std::vector>& search_record_array, const milvus::TopKQueryResult& topk_query_result) { BLOCK_SPLITER - size_t nq = topk_query_result.row_num; - size_t topk = topk_query_result.ids.size() / nq; - std::cout << "Returned result count: " << nq * topk << std::endl; + std::cout << "Returned result count: " << topk_query_result.size() << std::endl; - int32_t index = 0; - for (size_t i = 0; i < nq; i++) { - auto search_id = search_record_array[index].first; - index++; - std::cout << "No." << index << " vector " << search_id << " top " << topk << " search result:" << std::endl; + if (topk_query_result.size() != search_record_array.size()) { + std::cout << "ERROR: Returned result count dones equal nq" << std::endl; + return; + } + + for (size_t i = 0; i < topk_query_result.size(); i++) { + const milvus::QueryResult& one_result = topk_query_result[i]; + size_t topk = one_result.ids.size(); + auto search_id = search_record_array[i].first; + std::cout << "No." << i << " vector " << search_id << " top " << topk << " search result:" << std::endl; for (size_t j = 0; j < topk; j++) { - size_t idx = i * topk + j; - std::cout << "\t" << topk_query_result.ids[idx] << "\t" << topk_query_result.distances[idx] << std::endl; + std::cout << "\t" << one_result.ids[j] << "\t" << one_result.distances[j] << std::endl; } } BLOCK_SPLITER @@ -178,12 +180,11 @@ void Utils::CheckSearchResult(const std::vector>& search_record_array, const milvus::TopKQueryResult& topk_query_result) { BLOCK_SPLITER - size_t nq = topk_query_result.row_num; - size_t result_k = topk_query_result.ids.size() / nq; - int64_t index = 0; + size_t nq = topk_query_result.size(); for (size_t i = 0; i < nq; i++) { - auto result_id = topk_query_result.ids[i * result_k]; - auto search_id = search_record_array[index++].first; + const milvus::QueryResult& one_result = topk_query_result[i]; + auto search_id = search_record_array[i].first; + int64_t result_id = one_result.ids[0]; if (result_id != search_id) { std::cout << "The top 1 result is wrong: " << result_id << " vs. " << search_id << std::endl; } else { @@ -198,9 +199,7 @@ Utils::DoSearch(std::shared_ptr conn, const std::string& tab const std::vector& partiton_tags, int64_t top_k, int64_t nprobe, const std::vector>& search_record_array, milvus::TopKQueryResult& topk_query_result) { - topk_query_result.distances.clear(); - topk_query_result.ids.clear(); - topk_query_result.row_num = 0; + topk_query_result.clear(); std::vector query_range_array; milvus::Range rg; diff --git a/core/src/sdk/grpc/ClientProxy.cpp b/core/src/sdk/grpc/ClientProxy.cpp index 4a9c319b4d..fd19281343 100644 --- a/core/src/sdk/grpc/ClientProxy.cpp +++ b/core/src/sdk/grpc/ClientProxy.cpp @@ -250,12 +250,17 @@ ClientProxy::Search(const std::string& table_name, const std::vectorSearch(result, search_param); // step 4: convert result array - topk_query_result.row_num = result.row_num(); - topk_query_result.ids.resize(result.ids().size()); - memcpy(topk_query_result.ids.data(), result.ids().data(), result.ids().size() * sizeof(int64_t)); - topk_query_result.distances.resize(result.distances().size()); - memcpy(topk_query_result.distances.data(), result.distances().data(), - result.distances().size() * sizeof(float)); + topk_query_result.reserve(result.row_num()); + int64_t nq = result.row_num(); + int64_t topk = result.ids().size() / nq; + for (int64_t i = 0; i < result.row_num(); i++) { + milvus::QueryResult one_result; + one_result.ids.resize(topk); + one_result.distances.resize(topk); + memcpy(one_result.ids.data(), result.ids().data() + topk * i, topk * sizeof(int64_t)); + memcpy(one_result.distances.data(), result.distances().data() + topk * i, topk * sizeof(float)); + topk_query_result.emplace_back(one_result); + } return status; } catch (std::exception& ex) { diff --git a/core/src/sdk/include/MilvusApi.h b/core/src/sdk/include/MilvusApi.h index 0ec37fa9a4..9fa98deb40 100644 --- a/core/src/sdk/include/MilvusApi.h +++ b/core/src/sdk/include/MilvusApi.h @@ -81,11 +81,11 @@ struct RowRecord { /** * @brief TopK query result */ -struct TopKQueryResult { - int64_t row_num; +struct QueryResult { std::vector ids; std::vector distances; }; +using TopKQueryResult = std::vector; /** * @brief index parameters diff --git a/core/src/wrapper/ConfAdapter.cpp b/core/src/wrapper/ConfAdapter.cpp index 0ab38d3394..aa4b3c12b8 100644 --- a/core/src/wrapper/ConfAdapter.cpp +++ b/core/src/wrapper/ConfAdapter.cpp @@ -201,5 +201,35 @@ NSGConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) return conf; } +knowhere::Config +SPTAGKDTConfAdapter::Match(const TempMetaConf& metaconf) { + auto conf = std::make_shared(); + conf->d = metaconf.dim; + conf->metric_type = metaconf.metric_type; + return conf; +} + +knowhere::Config +SPTAGKDTConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { + auto conf = std::make_shared(); + conf->k = metaconf.k; + return conf; +} + +knowhere::Config +SPTAGBKTConfAdapter::Match(const TempMetaConf& metaconf) { + auto conf = std::make_shared(); + conf->d = metaconf.dim; + conf->metric_type = metaconf.metric_type; + return conf; +} + +knowhere::Config +SPTAGBKTConfAdapter::MatchSearch(const TempMetaConf& metaconf, const IndexType& type) { + auto conf = std::make_shared(); + conf->k = metaconf.k; + return conf; +} + } // namespace engine } // namespace milvus diff --git a/core/src/wrapper/ConfAdapter.h b/core/src/wrapper/ConfAdapter.h index 85637a4969..46fc27eb3b 100644 --- a/core/src/wrapper/ConfAdapter.h +++ b/core/src/wrapper/ConfAdapter.h @@ -94,5 +94,23 @@ class NSGConfAdapter : public IVFConfAdapter { MatchSearch(const TempMetaConf& metaconf, const IndexType& type) final; }; +class SPTAGKDTConfAdapter : public ConfAdapter { + public: + knowhere::Config + Match(const TempMetaConf& metaconf) override; + + knowhere::Config + MatchSearch(const TempMetaConf& metaconf, const IndexType& type) override; +}; + +class SPTAGBKTConfAdapter : public ConfAdapter { + public: + knowhere::Config + Match(const TempMetaConf& metaconf) override; + + knowhere::Config + MatchSearch(const TempMetaConf& metaconf, const IndexType& type) override; +}; + } // namespace engine } // namespace milvus diff --git a/core/src/wrapper/ConfAdapterMgr.cpp b/core/src/wrapper/ConfAdapterMgr.cpp index d0eba04529..cf58c0110f 100644 --- a/core/src/wrapper/ConfAdapterMgr.cpp +++ b/core/src/wrapper/ConfAdapterMgr.cpp @@ -56,6 +56,9 @@ AdapterMgr::RegisterAdapter() { REGISTER_CONF_ADAPTER(IVFPQConfAdapter, IndexType::FAISS_IVFPQ_MIX, ivfpq_mix); REGISTER_CONF_ADAPTER(NSGConfAdapter, IndexType::NSG_MIX, nsg_mix); + + REGISTER_CONF_ADAPTER(SPTAGKDTConfAdapter, IndexType::SPTAG_KDT_RNT_CPU, sptag_kdt); + REGISTER_CONF_ADAPTER(SPTAGBKTConfAdapter, IndexType::SPTAG_BKT_RNT_CPU, sptag_bkt); } } // namespace engine diff --git a/core/src/wrapper/VecImpl.cpp b/core/src/wrapper/VecImpl.cpp index e7967cbf59..74e9e94a2f 100644 --- a/core/src/wrapper/VecImpl.cpp +++ b/core/src/wrapper/VecImpl.cpp @@ -21,17 +21,19 @@ #include "knowhere/index/vector_index/IndexIDMAP.h" #include "utils/Log.h" #include "wrapper/WrapperException.h" +#include "wrapper/gpu/GPUVecImpl.h" #ifdef MILVUS_GPU_VERSION -#include -#include +#include "knowhere/index/vector_index/IndexGPUIVF.h" +#include "knowhere/index/vector_index/IndexIVFSQHybrid.h" +#include "knowhere/index/vector_index/helpers/Cloner.h" #endif /* * no parameter check in this layer. - * only responible for index combination + * only responsible for index combination */ namespace milvus { diff --git a/core/src/wrapper/VecIndex.cpp b/core/src/wrapper/VecIndex.cpp index 81a13c60f9..75f75fb983 100644 --- a/core/src/wrapper/VecIndex.cpp +++ b/core/src/wrapper/VecIndex.cpp @@ -22,8 +22,8 @@ #include "knowhere/index/vector_index/IndexIVF.h" #include "knowhere/index/vector_index/IndexIVFPQ.h" #include "knowhere/index/vector_index/IndexIVFSQ.h" -#include "knowhere/index/vector_index/IndexKDT.h" #include "knowhere/index/vector_index/IndexNSG.h" +#include "knowhere/index/vector_index/IndexSPTAG.h" #include "utils/Log.h" #ifdef MILVUS_GPU_VERSION @@ -128,7 +128,11 @@ GetVecIndexFactory(const IndexType& type, const Config& cfg) { break; } case IndexType::SPTAG_KDT_RNT_CPU: { - index = std::make_shared(); + index = std::make_shared("KDT"); + break; + } + case IndexType::SPTAG_BKT_RNT_CPU: { + index = std::make_shared("BKT"); break; } case IndexType::FAISS_IVFSQ8_CPU: { diff --git a/core/src/wrapper/VecIndex.h b/core/src/wrapper/VecIndex.h index efe01a25d7..e69655b087 100644 --- a/core/src/wrapper/VecIndex.h +++ b/core/src/wrapper/VecIndex.h @@ -49,6 +49,7 @@ enum class IndexType { FAISS_IVFSQ8_HYBRID, // only support build on gpu. NSG_MIX, FAISS_IVFPQ_MIX, + SPTAG_BKT_RNT_CPU, }; class VecIndex; @@ -139,6 +140,9 @@ write_index(VecIndexPtr index, const std::string& location); extern VecIndexPtr read_index(const std::string& location); +VecIndexPtr +read_index(const std::string& location, knowhere::BinarySet& index_binary); + extern VecIndexPtr GetVecIndexFactory(const IndexType& type, const Config& cfg = Config()); diff --git a/core/unittest/db/test_db.cpp b/core/unittest/db/test_db.cpp index 343e924e8e..217fbe429e 100644 --- a/core/unittest/db/test_db.cpp +++ b/core/unittest/db/test_db.cpp @@ -305,24 +305,30 @@ TEST_F(DBTest, SEARCH_TEST) { // test FAISS_IVFSQ8H optimizer index.engine_type_ = (int)milvus::engine::EngineType::FAISS_IVFSQ8H; db_->CreateIndex(TABLE_NAME, index); // wait until build index finish + std::vector partition_tag; + milvus::engine::ResultIds result_ids; + milvus::engine::ResultDistances result_dists; { - milvus::engine::QueryResults results; - stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), results); + result_ids.clear(); + result_dists.clear(); + stat = db_->Query(TABLE_NAME, partition_tag, k, nq, 10, xq.data(), result_ids, result_dists); ASSERT_TRUE(stat.ok()); } { - milvus::engine::QueryResults large_nq_results; - stat = db_->Query(TABLE_NAME, k, 200, 10, xq.data(), large_nq_results); + result_ids.clear(); + result_dists.clear(); + stat = db_->Query(TABLE_NAME, partition_tag, k, 200, 10, xq.data(), result_ids, result_dists); ASSERT_TRUE(stat.ok()); } { // search by specify index file milvus::engine::meta::DatesT dates; std::vector file_ids = {"1", "2", "3", "4", "5", "6"}; - milvus::engine::QueryResults results; - stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, results); + result_ids.clear(); + result_dists.clear(); + stat = db_->QueryByFileID(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, result_ids, result_dists); ASSERT_TRUE(stat.ok()); } diff --git a/core/unittest/wrapper/test_wrapper.cpp b/core/unittest/wrapper/test_wrapper.cpp index a07fafc7b8..4019c0f63c 100644 --- a/core/unittest/wrapper/test_wrapper.cpp +++ b/core/unittest/wrapper/test_wrapper.cpp @@ -16,28 +16,29 @@ // under the License. #include "easyloggingpp/easylogging++.h" -#include "wrapper/VecIndex.h" #ifdef MILVUS_GPU_VERSION #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h" #endif #include "knowhere/index/vector_index/helpers/IndexParameter.h" +#include "wrapper/VecIndex.h" #include "wrapper/utils.h" #include INITIALIZE_EASYLOGGINGPP +using ::testing::Combine; using ::testing::TestWithParam; using ::testing::Values; -using ::testing::Combine; class KnowhereWrapperTest - : public DataGenBase, - public TestWithParam<::std::tuple> { + : public DataGenBase, + public TestWithParam<::std::tuple> { protected: - void SetUp() override { + void + SetUp() override { #ifdef MILVUS_GPU_VERSION knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, PINMEM, TEMPMEM, RESNUM); #endif @@ -58,7 +59,8 @@ class KnowhereWrapperTest searchconf = ParamGenerator::GetInstance().GenSearchConf(index_type, tempconf); } - void TearDown() override { + void + TearDown() override { #ifdef MILVUS_GPU_VERSION knowhere::FaissGpuResourceMgr::GetInstance().Free(); #endif @@ -71,22 +73,20 @@ class KnowhereWrapperTest knowhere::Config searchconf; }; -INSTANTIATE_TEST_CASE_P(WrapperParam, KnowhereWrapperTest, - Values( - //["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] - +INSTANTIATE_TEST_CASE_P( + WrapperParam, KnowhereWrapperTest, + Values( + //["Index type", "Generator type", "dim", "nb", "nq", "k", "build config", "search config"] #ifdef MILVUS_GPU_VERSION std::make_tuple(milvus::engine::IndexType::FAISS_IVFFLAT_GPU, "Default", DIM, NB, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFFLAT_MIX, "Default", 64, 1000, 10, 10), - // std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, - // 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_GPU, "Default", DIM, NB, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_MIX, "Default", DIM, NB, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFPQ_MIX, "Default", 64, 1000, 10, 10), - -// std::make_tuple(IndexType::NSG_MIX, "Default", 128, 250000, 10, 10), + // std::make_tuple(milvus::engine::IndexType::NSG_MIX, "Default", 128, 250000, 10, 10), #endif - // std::make_tuple(IndexType::SPTAG_KDT_RNT_CPU, "Default", 128, 250000, 10, 10), + // std::make_tuple(milvus::engine::IndexType::SPTAG_KDT_RNT_CPU, "Default", 128, 100, 10, 10), + // std::make_tuple(milvus::engine::IndexType::SPTAG_BKT_RNT_CPU, "Default", 128, 100, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IDMAP, "Default", 64, 1000, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFFLAT_CPU, "Default", 64, 1000, 10, 10), std::make_tuple(milvus::engine::IndexType::FAISS_IVFSQ8_CPU, "Default", DIM, NB, 10, 10))); diff --git a/docs/README.md b/docs/README.md index 64409d8240..f5f964de0d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,3 +9,15 @@ The tests are run on [SIFT1B dataset](http://corpus-texmex.irisa.fr/), and provi - Recall: The fraction of the total amount of relevant instances that were actually retrieved. Test variables are `nq` and `topk`. + +## Test reports + +The following is a list of existing test reports: + +- [IVF_SQ8](test_report/milvus_ivfsq8_test_report_detailed_version.md) +- [IVF_SQ8H](test_report/milvus_ivfsq8h_test_report_detailed_version.md) + +To read the CN version of these reports: + +- [IVF_SQ8_cn](test_report/milvus_ivfsq8_test_report_detailed_version_cn.md) +- [IVF_SQ8H_cn](test_report/milvus_ivfsq8h_test_report_detailed_version_cn.md)