From 02a8a07322ffcd7b8a5574a7c6a73dc574957ac4 Mon Sep 17 00:00:00 2001 From: zhagnlu <1542303831@qq.com> Date: Thu, 20 Oct 2022 10:23:27 +0800 Subject: [PATCH] Fix BucketExists bug in minio chunk manager (#19548) (#19854) Signed-off-by: zhagnlu Signed-off-by: zhagnlu Co-authored-by: zhagnlu --- internal/core/src/storage/CMakeLists.txt | 50 +++++++------- .../core/src/storage/DiskFileManagerImpl.cpp | 30 +------- .../core/src/storage/LocalChunkManager.cpp | 1 + .../core/src/storage/MinioChunkManager.cpp | 41 ++++++++--- internal/core/src/storage/MinioChunkManager.h | 3 +- .../core/thirdparty/aws_sdk/CMakeLists.txt | 68 ++++++++++++------- internal/core/unittest/CMakeLists.txt | 4 +- 7 files changed, 106 insertions(+), 91 deletions(-) diff --git a/internal/core/src/storage/CMakeLists.txt b/internal/core/src/storage/CMakeLists.txt index 47acb32e0d..00d587136e 100644 --- a/internal/core/src/storage/CMakeLists.txt +++ b/internal/core/src/storage/CMakeLists.txt @@ -6,7 +6,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -14,47 +14,47 @@ # See the License for the specific language governing permissions and # limitations under the License. -option( EMBEDDED_MILVUS "Enable embedded Milvus" OFF ) +option(EMBEDDED_MILVUS "Enable embedded Milvus" OFF) -if ( EMBEDDED_MILVUS ) - add_compile_definitions( EMBEDDED_MILVUS ) +if(EMBEDDED_MILVUS) + add_compile_definitions(EMBEDDED_MILVUS) endif() milvus_add_pkg_config("milvus_storage") set(STORAGE_FILES - parquet_c.cpp - PayloadStream.cpp - DataCodec.cpp - Util.cpp - PayloadReader.cpp - PayloadWriter.cpp - FieldData.cpp - IndexData.cpp - InsertData.cpp - Event.cpp - storage_c.cpp) + parquet_c.cpp + PayloadStream.cpp + DataCodec.cpp + Util.cpp + PayloadReader.cpp + PayloadWriter.cpp + FieldData.cpp + IndexData.cpp + InsertData.cpp + Event.cpp + storage_c.cpp) -if ( BUILD_DISK_ANN STREQUAL "ON" ) +if(BUILD_DISK_ANN STREQUAL "ON") set(STORAGE_FILES - ${STORAGE_FILES} - LocalChunkManager.cpp - MinioChunkManager.cpp - DiskFileManagerImpl.cpp) -endif () + ${STORAGE_FILES} + LocalChunkManager.cpp + MinioChunkManager.cpp + DiskFileManagerImpl.cpp) +endif() add_library(milvus_storage SHARED ${STORAGE_FILES}) find_package(Boost REQUIRED COMPONENTS filesystem) -if ( BUILD_DISK_ANN STREQUAL "ON" ) - target_link_libraries( milvus_storage PUBLIC milvus_common Boost::filesystem aws-cpp-sdk-s3 pthread) +if(BUILD_DISK_ANN STREQUAL "ON") + target_link_libraries(milvus_storage PUBLIC milvus_common Boost::filesystem aws-cpp-sdk-s3 pthread) else() - target_link_libraries( milvus_storage PUBLIC milvus_common Boost::filesystem pthread) + target_link_libraries(milvus_storage PUBLIC milvus_common Boost::filesystem pthread) endif() if(NOT CMAKE_INSTALL_PREFIX) set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}) endif() -install(TARGETS milvus_storage DESTINATION "${CMAKE_INSTALL_LIBDIR}") +install(TARGETS milvus_storage DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/internal/core/src/storage/DiskFileManagerImpl.cpp b/internal/core/src/storage/DiskFileManagerImpl.cpp index 75503771b5..e38a6007ee 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.cpp +++ b/internal/core/src/storage/DiskFileManagerImpl.cpp @@ -184,34 +184,8 @@ DiskFileManagerImpl::GetLocalRawDataObjectPrefix() { bool DiskFileManagerImpl::RemoveFile(const std::string& file) noexcept { - // remove local file - bool localExist = false; - auto& local_chunk_manager = LocalChunkManager::GetInstance(); - FILEMANAGER_TRY - localExist = local_chunk_manager.Exist(file); - FILEMANAGER_CATCH - FILEMANAGER_END - if (localExist) { - FILEMANAGER_TRY - local_chunk_manager.Remove(file); - FILEMANAGER_CATCH - FILEMANAGER_END - } - - // remove according remote file - std::string remoteFile = ""; - bool remoteExist = false; - FILEMANAGER_TRY - remoteExist = rcm_->Exist(remoteFile); - FILEMANAGER_CATCH - FILEMANAGER_END - if (remoteExist) { - FILEMANAGER_TRY - rcm_->Remove(file); - FILEMANAGER_CATCH - FILEMANAGER_END - } - return true; + // TODO: implement this interface + return false; } std::optional diff --git a/internal/core/src/storage/LocalChunkManager.cpp b/internal/core/src/storage/LocalChunkManager.cpp index c2fcf5c3ff..6c79061210 100644 --- a/internal/core/src/storage/LocalChunkManager.cpp +++ b/internal/core/src/storage/LocalChunkManager.cpp @@ -18,6 +18,7 @@ #include "Exception.h" #include +#include #include #include diff --git a/internal/core/src/storage/MinioChunkManager.cpp b/internal/core/src/storage/MinioChunkManager.cpp index 27b801cee6..e2788e86c0 100644 --- a/internal/core/src/storage/MinioChunkManager.cpp +++ b/internal/core/src/storage/MinioChunkManager.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ #define S3NoSuchBucket "NoSuchBucket" namespace milvus::storage { +std::atomic MinioChunkManager::init_count_(0); + /** * @brief convert std::string to Aws::String * because Aws has String type internally @@ -66,7 +69,10 @@ ConvertFromAwsString(const Aws::String& aws_str) { MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config) : default_bucket_name_(storage_config.bucket_name) { - Aws::InitAPI(sdk_options_); + const size_t initCount = init_count_++; + if (initCount == 0) { + Aws::InitAPI(sdk_options_); + } Aws::Client::ClientConfiguration config; config.endpointOverride = ConvertToAwsString(storage_config.address); @@ -105,7 +111,10 @@ MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config) } MinioChunkManager::~MinioChunkManager() { - Aws::ShutdownAPI(sdk_options_); + const size_t initCount = --init_count_; + if (initCount == 0) { + Aws::ShutdownAPI(sdk_options_); + } client_.reset(); } @@ -146,17 +155,31 @@ MinioChunkManager::Write(const std::string& filepath, void* buf, uint64_t size) bool MinioChunkManager::BucketExists(const std::string& bucket_name) { - auto outcome = client_->ListBuckets(); + // auto outcome = client_->ListBuckets(); + + // if (!outcome.IsSuccess()) { + // THROWS3ERROR(BucketExists); + // } + // for (auto&& b : outcome.GetResult().GetBuckets()) { + // if (ConvertFromAwsString(b.GetName()) == bucket_name) { + // return true; + // } + // } + Aws::S3::Model::HeadBucketRequest request; + request.SetBucket(bucket_name.c_str()); + + auto outcome = client_->HeadBucket(request); if (!outcome.IsSuccess()) { - THROWS3ERROR(BucketExists); - } - for (auto&& b : outcome.GetResult().GetBuckets()) { - if (ConvertFromAwsString(b.GetName()) == bucket_name) { - return true; + auto error = outcome.GetError(); + if (!error.GetExceptionName().empty()) { + std::stringstream err_msg; + err_msg << "Error: BucketExists: " << error.GetExceptionName() + " - " + error.GetMessage(); + throw S3ErrorException(err_msg.str()); } + return false; } - return false; + return true; } std::vector diff --git a/internal/core/src/storage/MinioChunkManager.h b/internal/core/src/storage/MinioChunkManager.h index 5be2c1f33c..e1261d9349 100644 --- a/internal/core/src/storage/MinioChunkManager.h +++ b/internal/core/src/storage/MinioChunkManager.h @@ -114,7 +114,8 @@ class MinioChunkManager : public RemoteChunkManager { ListObjects(const char* bucket_name, const char* prefix = NULL); private: - Aws::SDKOptions sdk_options_; + const Aws::SDKOptions sdk_options_; + static std::atomic init_count_; std::shared_ptr client_; std::string default_bucket_name_; }; diff --git a/internal/core/thirdparty/aws_sdk/CMakeLists.txt b/internal/core/thirdparty/aws_sdk/CMakeLists.txt index d169611219..1206e78eb3 100644 --- a/internal/core/thirdparty/aws_sdk/CMakeLists.txt +++ b/internal/core/thirdparty/aws_sdk/CMakeLists.txt @@ -1,4 +1,4 @@ -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- # Copyright (C) 2019-2020 Zilliz. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance @@ -9,46 +9,63 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under the License. -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- -set ( AWS_SDK_VERSION "1.8.186" ) -set ( AWS_SDK_SOURCE_URL - "https://github.com/aws/aws-sdk-cpp/archive/refs/tags/${AWS_SDK_VERSION}.tar.gz" ) -set ( AWS_SDK_MD5 "ef4351b0969474cb85f39bc9f1975eb5" ) +set(AWS_SDK_VERSION "1.8.186") -macro ( build_aws_sdk_s3 ) - message( STATUS "Building AWS-SDK-${AWS_SDK_VERSION} from source" ) +macro(build_aws_sdk_s3) + message(STATUS "Building AWS-SDK-${AWS_SDK_VERSION} from source") - set ( AWS_SDK_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX} ) + set(AWS_SDK_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) - set( AWS_SDK_BUILD_COMMAND make ) - set( AWS_SDK_INSTALL_COMMAND make install ) + set(AWS_SDK_BUILD_COMMAND make) + set(AWS_SDK_INSTALL_COMMAND make install) - set (AWS_SDK_S3_CMAKE_ARGS + set(AWS_SDK_S3_CMAKE_ARGS "-DCMAKE_BUILD_TYPE=Release" "-DBUILD_ONLY=s3" - "-DENABLE_TESTING=OFF" + "-DENABLE_TESTING=OFF" "-DAUTORUN_UNIT_TESTS=OFF" - "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}" ) + "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}") ExternalProject_Add(aws_sdk_s3_ep - URL ${AWS_SDK_SOURCE_URL} - URL_MD5 ${AWS_SDK_MD5} - BINARY_DIR aws-s3-bin - PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/aws-sdk-subbuild - BUILD_COMMAND ${AWS_SDK_BUILD_COMMAND} - INSTALL_COMMAND ${AWS_SDK_INSTALL_COMMAND} - CMAKE_ARGS ${AWS_SDK_S3_CMAKE_ARGS} + + GIT_REPOSITORY https://github.com/aws/aws-sdk-cpp.git + GIT_TAG ${AWS_SDK_VERSION} + + # BINARY_DIR aws-s3-bin + PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/aws-sdk-subbuild + BUILD_IN_SOURCE 1 + #PATCH_COMMAND sh prefetch_crt_dependency.sh + LIST_SEPARATOR "|" + BUILD_COMMAND ${AWS_SDK_BUILD_COMMAND} + INSTALL_COMMAND ${AWS_SDK_INSTALL_COMMAND} + CMAKE_ARGS ${AWS_SDK_S3_CMAKE_ARGS} ) + add_library(aws-cpp-sdk-core SHARED IMPORTED) + set_target_properties(aws-cpp-sdk-core + PROPERTIES + IMPORTED_GLOBAL TRUE + IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-core${CMAKE_SHARED_LIBRARY_SUFFIX} + INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + add_dependencies(aws-cpp-sdk-core aws_sdk_s3_ep) + add_library(aws-cpp-sdk-s3 SHARED IMPORTED) set_target_properties(aws-cpp-sdk-s3 - PROPERTIES - IMPORTED_GLOBAL TRUE - IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_SHARED_LIBRARY_SUFFIX} - INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + PROPERTIES + IMPORTED_GLOBAL TRUE + IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_SHARED_LIBRARY_SUFFIX} + INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) add_dependencies(aws-cpp-sdk-s3 aws_sdk_s3_ep) + # add_library(aws-cpp-sdk-sts SHARED IMPORTED) + # set_target_properties(aws-cpp-sdk-sts + # PROPERTIES + # IMPORTED_GLOBAL TRUE + # IMPORTED_LOCATION ${AWS_SDK_INSTALL_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-sts${CMAKE_SHARED_LIBRARY_SUFFIX} + # INTERFACE_INCLUDE_DIRECTORIES ${AWS_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + # add_dependencies(aws-cpp-sdk-sts aws_sdk_s3_ep) get_target_property(S3_IMPORTED_LOCATION aws-cpp-sdk-s3 IMPORTED_LOCATION) get_target_property(S3_INTERFACE_INCLUDE_DIRECTORIES aws-cpp-sdk-s3 INTERFACE_INCLUDE_DIRECTORIES) message("AWS_SDK_INSTALL_PREFIX: ${AWS_SDK_INSTALL_PREFIX}") @@ -57,5 +74,4 @@ macro ( build_aws_sdk_s3 ) message("S3_INTERFACE_INCLUDE_DIRECTORIES: ${S3_INTERFACE_INCLUDE_DIRECTORIES}") endmacro() - build_aws_sdk_s3() diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 43057c1d4b..bfaa342ec5 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -52,8 +52,8 @@ set(MILVUS_TEST_FILES if ( BUILD_DISK_ANN STREQUAL "ON" ) set(MILVUS_TEST_FILES ${MILVUS_TEST_FILES} -# test_minio_chunk_manager.cpp -# test_disk_file_manager_test.cpp + #test_minio_chunk_manager.cpp + #test_disk_file_manager_test.cpp test_local_chunk_manager.cpp ) endif()