diff --git a/internal/core/src/storage/CMakeLists.txt b/internal/core/src/storage/CMakeLists.txt index 2e48a47799..652670d845 100644 --- a/internal/core/src/storage/CMakeLists.txt +++ b/internal/core/src/storage/CMakeLists.txt @@ -51,7 +51,7 @@ add_library(milvus_storage SHARED ${STORAGE_FILES}) find_package(Boost REQUIRED COMPONENTS filesystem) if(BUILD_DISK_ANN STREQUAL "ON") - target_link_libraries(milvus_storage PUBLIC milvus_common Boost::filesystem aws-cpp-sdk-s3 pthread) + target_link_libraries(milvus_storage PUBLIC milvus_common Boost::filesystem aws-cpp-sdk-s3 google_cloud_cpp_storage google_cloud_cpp_common google_cloud_cpp_rest_internal pthread) else() target_link_libraries(milvus_storage PUBLIC milvus_common Boost::filesystem pthread) endif() diff --git a/internal/core/src/storage/MinioChunkManager.cpp b/internal/core/src/storage/MinioChunkManager.cpp index 94965a45dd..179fc1edc2 100644 --- a/internal/core/src/storage/MinioChunkManager.cpp +++ b/internal/core/src/storage/MinioChunkManager.cpp @@ -69,10 +69,18 @@ ConvertFromAwsString(const Aws::String& aws_str) { } void -MinioChunkManager::InitSDKAPI() { +MinioChunkManager::InitSDKAPI(RemoteStorageType type) { std::scoped_lock lock{client_mutex_}; const size_t initCount = init_count_++; if (initCount == 0) { + if (type == RemoteStorageType::GOOGLE_CLOUD) { + sdk_options_.httpOptions.httpClientFactory_create_fn = []() { + // auto credentials = google::cloud::oauth2_internal::GOOGLE_CLOUD_CPP_NS::GoogleDefaultCredentials(); + auto credentials = + std::make_shared(); + return Aws::MakeShared(GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG, credentials); + }; + } Aws::InitAPI(sdk_options_); } } @@ -86,20 +94,8 @@ MinioChunkManager::ShutdownSDKAPI() { } } -MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config) - : default_bucket_name_(storage_config.bucket_name) { - InitSDKAPI(); - Aws::Client::ClientConfiguration config; - config.endpointOverride = ConvertToAwsString(storage_config.address); - - if (storage_config.useSSL) { - config.scheme = Aws::Http::Scheme::HTTPS; - config.verifySSL = true; - } else { - config.scheme = Aws::Http::Scheme::HTTP; - config.verifySSL = false; - } - +void +MinioChunkManager::BuildS3Client(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config) { if (storage_config.useIAM) { auto provider = std::make_shared(); auto aws_credentials = provider->GetAWSCredentials(); @@ -118,6 +114,46 @@ MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config) ConvertToAwsString(storage_config.access_key_value)), config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false); } +} + +void +MinioChunkManager::BuildGoogleCloudClient(const StorageConfig& storage_config, + const Aws::Client::ClientConfiguration& config) { + if (storage_config.useIAM) { + // Using S3 client instead of google client because of compatible protocol + client_ = std::make_shared(config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + } else { + throw std::runtime_error("google cloud only support iam mode now"); + } +} + +MinioChunkManager::MinioChunkManager(const StorageConfig& storage_config) + : default_bucket_name_(storage_config.bucket_name) { + RemoteStorageType storageType; + if (storage_config.address.find("google") != std::string::npos) { + storageType = RemoteStorageType::GOOGLE_CLOUD; + } else { + storageType = RemoteStorageType::S3; + } + + InitSDKAPI(storageType); + + Aws::Client::ClientConfiguration config; + config.endpointOverride = ConvertToAwsString(storage_config.address); + if (storage_config.useSSL) { + config.scheme = Aws::Http::Scheme::HTTPS; + config.verifySSL = true; + } else { + config.scheme = Aws::Http::Scheme::HTTP; + config.verifySSL = false; + } + + if (storageType == RemoteStorageType::S3) { + BuildS3Client(storage_config, config); + } else if (storageType == RemoteStorageType::GOOGLE_CLOUD) { + BuildGoogleCloudClient(storage_config, config); + } // TODO ::BucketExist and CreateBucket func not work, should be fixed // index node has already tried to create bucket when receive index task if bucket not exist diff --git a/internal/core/src/storage/MinioChunkManager.h b/internal/core/src/storage/MinioChunkManager.h index 580b1bba76..0322dc8038 100644 --- a/internal/core/src/storage/MinioChunkManager.h +++ b/internal/core/src/storage/MinioChunkManager.h @@ -17,7 +17,19 @@ #pragma once #include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include #include #include #include @@ -30,6 +42,8 @@ namespace milvus::storage { +enum class RemoteStorageType { S3 = 0, GOOGLE_CLOUD = 1 }; + /** * @brief This MinioChunkManager is responsible for read and write file in S3. */ @@ -113,12 +127,16 @@ class MinioChunkManager : public RemoteChunkManager { std::vector ListObjects(const char* bucket_name, const char* prefix = NULL); void - InitSDKAPI(); + InitSDKAPI(RemoteStorageType type); void ShutdownSDKAPI(); + void + BuildS3Client(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); + void + BuildGoogleCloudClient(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); private: - const Aws::SDKOptions sdk_options_; + Aws::SDKOptions sdk_options_; static std::atomic init_count_; static std::mutex client_mutex_; std::shared_ptr client_; @@ -127,4 +145,50 @@ class MinioChunkManager : public RemoteChunkManager { using MinioChunkManagerPtr = std::unique_ptr; +static const char* GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG = "GoogleHttpClientFactory"; + +class GoogleHttpClientFactory : public Aws::Http::HttpClientFactory { + public: + explicit GoogleHttpClientFactory(std::shared_ptr credentials) { + credentials_ = credentials; + } + + void + SetCredentials(std::shared_ptr credentials) { + credentials_ = credentials; + } + + std::shared_ptr + CreateHttpClient(const Aws::Client::ClientConfiguration& clientConfiguration) const override { + return Aws::MakeShared(GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG, clientConfiguration); + } + + std::shared_ptr + CreateHttpRequest(const Aws::String& uri, + Aws::Http::HttpMethod method, + const Aws::IOStreamFactory& streamFactory) const override { + return CreateHttpRequest(Aws::Http::URI(uri), method, streamFactory); + } + + std::shared_ptr + CreateHttpRequest(const Aws::Http::URI& uri, + Aws::Http::HttpMethod method, + const Aws::IOStreamFactory& streamFactory) const override { + auto request = Aws::MakeShared(GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG, + uri, method); + request->SetResponseStreamFactory(streamFactory); + auto auth_header = credentials_->AuthorizationHeader(); + if (!auth_header.ok()) { + throw std::runtime_error("get authorization failed, errcode:" + + StatusCodeToString(auth_header.status().code())); + } + request->SetHeaderValue(auth_header->first.c_str(), auth_header->second.c_str()); + + return request; + } + + private: + std::shared_ptr credentials_; +}; + } // namespace milvus::storage diff --git a/internal/core/thirdparty/CMakeLists.txt b/internal/core/thirdparty/CMakeLists.txt index 31fe6cfcdd..55750f3710 100644 --- a/internal/core/thirdparty/CMakeLists.txt +++ b/internal/core/thirdparty/CMakeLists.txt @@ -69,6 +69,11 @@ if ( LINUX AND BUILD_DISK_ANN STREQUAL "ON" ) add_subdirectory( aws_sdk ) endif() +# ******************************* Thirdparty google cloud sdk ******************************** +if ( LINUX AND BUILD_DISK_ANN STREQUAL "ON" ) + add_subdirectory( google_cloud_sdk ) +endif() + # ******************************* Thirdparty marisa ******************************** # TODO: support win. if ( LINUX OR APPLE) diff --git a/internal/core/thirdparty/aws_sdk/CMakeLists.txt b/internal/core/thirdparty/aws_sdk/CMakeLists.txt index 1206e78eb3..f48ceec7e3 100644 --- a/internal/core/thirdparty/aws_sdk/CMakeLists.txt +++ b/internal/core/thirdparty/aws_sdk/CMakeLists.txt @@ -36,7 +36,8 @@ macro(build_aws_sdk_s3) # BINARY_DIR aws-s3-bin PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/aws-sdk-subbuild BUILD_IN_SOURCE 1 - #PATCH_COMMAND sh prefetch_crt_dependency.sh + + # PATCH_COMMAND sh prefetch_crt_dependency.sh LIST_SEPARATOR "|" BUILD_COMMAND ${AWS_SDK_BUILD_COMMAND} INSTALL_COMMAND ${AWS_SDK_INSTALL_COMMAND} diff --git a/internal/core/thirdparty/google_cloud_sdk/CMakeLists.txt b/internal/core/thirdparty/google_cloud_sdk/CMakeLists.txt new file mode 100644 index 0000000000..bfcdd8ead7 --- /dev/null +++ b/internal/core/thirdparty/google_cloud_sdk/CMakeLists.txt @@ -0,0 +1,142 @@ +# ------------------------------------------------------------------------------- +# Copyright (C) 2019-2020 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. +# ------------------------------------------------------------------------------- + +set(GOOGLE_SDK_VERSION "v2.5.0") +set(GOOGLE_CRC32_VERSION "1.1.2") +set(GOOGLE_GRPC_VERSION "v1.50.1") +set(GOOGLE_ABSEIL_VERSION "20220623.1") + +macro(build_google_sdk_s3) + message(STATUS "Building GOOGLE_ABSEIL-${GOOGLE_ABSEIL_VERSION} from source") + + set(GOOGLE_ABSEIL_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) + + set(GOOGLE_ABSEIL_BUILD_COMMAND make -j $(nproc)) + set(GOOGLE_ABSEIL_INSTALL_COMMAND make install) + set(GOOGLE_PATCH_COMMAND sed -i "s/^#define ABSL_OPTION_USE_\\(.*\\) 2/#define ABSL_OPTION_USE_\\1 0/" "absl/base/options.h") + + set(GOOGLE_ABSEIL_CMAKE_ARGS + "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}" + "-DABSL_BUILD_TESTING=OFF" + "-DBUILD_SHARED_LIBS=yes") + + ExternalProject_Add(google_abseil_ep + GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git + GIT_TAG ${GOOGLE_ABSEIL_VERSION} + PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/google-abseil-subbuild + PATCH_COMMAND ${GOOGLE_PATCH_COMMAND} + BUILD_COMMAND ${GOOGLE_ABSEIL_BUILD_COMMAND} + INSTALL_COMMAND ${GOOGLE_ABSEIL_INSTALL_COMMAND} + CMAKE_ARGS ${GOOGLE_ABSEIL_CMAKE_ARGS} + ) + + message(STATUS "Building GOOGLE_CRC32-${GOOGLE_CRC32_VERSION} from source") + + set(GOOGLE_CRC32_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) + + set(GOOGLE_CRC32_BUILD_COMMAND make -j $(nproc)) + set(GOOGLE_CRC32_INSTALL_COMMAND make install) + + set(GOOGLE_CRC32_CMAKE_ARGS + "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}" + "-DBUILD_SHARED_LIBS=yes" + "-DCRC32C_BUILD_TESTS=OFF" + "-DCRC32C_BUILD_BENCHMARKS=OFF" + "-DCRC32C_USE_GLOG=OFF") + + ExternalProject_Add(google_crc32_ep + GIT_REPOSITORY https://github.com/google/crc32c.git + GIT_TAG ${GOOGLE_CRC32_VERSION} + PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/google-crc32-subbuild + BUILD_COMMAND ${GOOGLE_CRC32_BUILD_COMMAND} + INSTALL_COMMAND ${GOOGLE_CRC32_INSTALL_COMMAND} + CMAKE_ARGS ${GOOGLE_CRC32_CMAKE_ARGS} + ) + + message(STATUS "Building GOOGLE_JSON-${NLOHMANN_JSON_VERSION} from source") + + set(GOOGLE_JSON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) + + set(GOOGLE_JSON_BUILD_COMMAND make -j $(nproc)) + set(GOOGLE_JSON_INSTALL_COMMAND make install) + + set(GOOGLE_JSON_CMAKE_ARGS + "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}" + "-DBUILD_SHARED_LIBS=yes" + "-DBUILD_TESTING=OFF" + "-DJSON_BuildTests=OFF") + + ExternalProject_Add(google_json_ep + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG ${GOOGLE_JSON_VERSION} + PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/google-json-subbuild + BUILD_COMMAND ${GOOGLE_JSON_BUILD_COMMAND} + INSTALL_COMMAND ${GOOGLE_JSON_INSTALL_COMMAND} + CMAKE_ARGS ${GOOGLE_JSON_CMAKE_ARGS} + ) + + message(STATUS "Building GOOGLE_SDK-${GOOGLE_SDK_VERSION} from source") + + set(GOOGLE_SDK_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) + + set(GOOGLE_SDK_BUILD_COMMAND make) + set(GOOGLE_SDK_INSTALL_COMMAND make install) + + set(GOOGLE_SDK_CMAKE_ARGS + "-DCMAKE_BUILD_TYPE=Release" + "-DBUILD_DEPS=OFF" + "-DBUILD_TESTING=OFF" + "-DBUILD_SHARED_LIBS=ON" + "-DGOOGLE_CLOUD_CPP_ENABLE=storage" + "-DGOOGLE_CLOUD_CPP_ENABLE_EXAMPLES=OFF" + "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}") + + ExternalProject_Add(google_sdk_ep + GIT_REPOSITORY https://github.com/googleapis/google-cloud-cpp.git + GIT_TAG ${GOOGLE_SDK_VERSION} + PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/google-sdk-subbuild + BUILD_IN_SOURCE 1 + BUILD_COMMAND ${GOOGLE_SDK_BUILD_COMMAND} + INSTALL_COMMAND ${GOOGLE_SDK_INSTALL_COMMAND} + CMAKE_ARGS ${GOOGLE_SDK_CMAKE_ARGS} + ) + add_dependencies(google_sdk_ep google_abseil_ep) + add_dependencies(google_sdk_ep google_json_ep) + add_dependencies(google_sdk_ep google_crc32_ep) + + add_library(google_cloud_cpp_storage SHARED IMPORTED) + set_target_properties(google_cloud_cpp_storage + PROPERTIES + IMPORTED_GLOBAL TRUE + IMPORTED_LOCATION ${GOOGLE_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}google_cloud_cpp_storage${CMAKE_SHARED_LIBRARY_SUFFIX} + INTERFACE_INCLUDE_DIRECTORIES ${GOOGLE_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + add_dependencies(google_cloud_cpp_storage google_sdk_ep) + + add_library(google_cloud_cpp_common SHARED IMPORTED) + set_target_properties(google_cloud_cpp_common + PROPERTIES + IMPORTED_GLOBAL TRUE + IMPORTED_LOCATION ${GOOGLE_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}google_cloud_cpp_common${CMAKE_SHARED_LIBRARY_SUFFIX} + INTERFACE_INCLUDE_DIRECTORIES ${GOOGLE_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + add_dependencies(google_cloud_cpp_common google_sdk_ep) + + add_library(google_cloud_cpp_rest_internal SHARED IMPORTED) + set_target_properties(google_cloud_cpp_rest_internal + PROPERTIES + IMPORTED_GLOBAL TRUE + IMPORTED_LOCATION ${GOOGLE_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}google_cloud_cpp_rest_internal${CMAKE_SHARED_LIBRARY_SUFFIX} + INTERFACE_INCLUDE_DIRECTORIES ${GOOGLE_SDK_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + add_dependencies(google_cloud_cpp_rest_internal google_sdk_ep) +endmacro() + +build_google_sdk_s3() diff --git a/internal/core/unittest/test_minio_chunk_manager.cpp b/internal/core/unittest/test_minio_chunk_manager.cpp index d9259f864c..31fe6820f4 100644 --- a/internal/core/unittest/test_minio_chunk_manager.cpp +++ b/internal/core/unittest/test_minio_chunk_manager.cpp @@ -37,6 +37,36 @@ class MinioChunkManagerTest : public testing::Test { MinioChunkManagerPtr chunk_manager_; }; +StorageConfig +get_google_cloud_storage_config() { + auto endpoint = "storage.googleapis.com:443"; + auto accessKey = ""; + auto accessValue = ""; + auto rootPath = "files"; + auto useSSL = true; + auto useIam = true; + auto iamEndPoint = ""; + auto bucketName = "gcp-zilliz-infra-test"; + + return StorageConfig{endpoint, bucketName, accessKey, accessValue, rootPath, "minio", iamEndPoint, useSSL, useIam}; +} + +class GoogleChunkManagerTest : public testing::Test { + public: + GoogleChunkManagerTest() { + } + ~GoogleChunkManagerTest() { + } + + virtual void + SetUp() { + chunk_manager_ = std::make_unique(get_google_cloud_storage_config()); + } + + protected: + MinioChunkManagerPtr chunk_manager_; +}; + TEST_F(MinioChunkManagerTest, BucketPositive) { string testBucketName = "test-bucket"; chunk_manager_->SetBucketName(testBucketName); @@ -218,3 +248,52 @@ TEST_F(MinioChunkManagerTest, ListWithPrefixPositive) { chunk_manager_->Remove(path3); chunk_manager_->DeleteBucket(testBucketName); } + +TEST_F(GoogleChunkManagerTest, ReadPositive) { + string testBucketName = "gcp-zilliz-infra-test"; + chunk_manager_->SetBucketName(testBucketName); + EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName); + + // if (!chunk_manager_->BucketExists(testBucketName)) { + // chunk_manager_->CreateBucket(testBucketName); + // } + + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + string path = "1/4/6"; + chunk_manager_->Write(path, data, sizeof(data)); + bool exist = chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + auto size = chunk_manager_->Size(path); + EXPECT_EQ(size, 5); + + uint8_t readdata[20] = {0}; + size = chunk_manager_->Read(path, readdata, 20); + EXPECT_EQ(size, 5); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x45); + EXPECT_EQ(readdata[3], 0x34); + EXPECT_EQ(readdata[4], 0x23); + + size = chunk_manager_->Read(path, readdata, 3); + EXPECT_EQ(size, 3); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x45); + + uint8_t dataWithNULL[] = {0x17, 0x32, 0x00, 0x34, 0x23}; + chunk_manager_->Write(path, dataWithNULL, sizeof(dataWithNULL)); + exist = chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + size = chunk_manager_->Size(path); + EXPECT_EQ(size, 5); + size = chunk_manager_->Read(path, readdata, 20); + EXPECT_EQ(size, 5); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x00); + EXPECT_EQ(readdata[3], 0x34); + EXPECT_EQ(readdata[4], 0x23); + + chunk_manager_->Remove(path); +} diff --git a/scripts/install_deps.sh b/scripts/install_deps.sh index 5d58bb3f55..a9612b200c 100755 --- a/scripts/install_deps.sh +++ b/scripts/install_deps.sh @@ -53,6 +53,7 @@ function install_linux_deps() { echo "Error Install Dependencies ..." exit 1 fi + } function install_mac_deps() { diff --git a/scripts/run_cpp_codecov.sh b/scripts/run_cpp_codecov.sh index 012874164a..337c3e600a 100755 --- a/scripts/run_cpp_codecov.sh +++ b/scripts/run_cpp_codecov.sh @@ -29,6 +29,7 @@ ROOT_DIR="$( cd -P "$( dirname "$SOURCE" )/.." && pwd )" MILVUS_CORE_DIR="${ROOT_DIR}/internal/core/" MILVUS_CORE_UNITTEST_DIR="${MILVUS_CORE_DIR}/output/unittest/" +MILVUS_CORE_LIB_DIR="${MILVUS_CORE_DIR}/output/lib/" echo "ROOT_DIR = ${ROOT_DIR}" echo "MILVUS_CORE_DIR = ${MILVUS_CORE_DIR}" @@ -58,6 +59,10 @@ if [ $? -ne 0 ]; then exit -1 fi +if [ -d "${MILVUS_CORE_LIB_DIR}" ]; then + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MILVUS_CORE_LIB_DIR} +fi + # run unittest for test in `ls ${MILVUS_CORE_UNITTEST_DIR}`; do echo "Running cpp unittest: ${MILVUS_CORE_UNITTEST_DIR}/$test"