// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/EasyAssert.h" #include "common/Exception.h" #include "storage/ChunkManager.h" #include "storage/Types.h" #include "log/Log.h" namespace milvus::storage { enum class RemoteStorageType { S3 = 0, GOOGLE_CLOUD = 1, ALIYUN_CLOUD = 2, }; template static std::string S3ErrorMessage(const std::string& func, const Aws::S3::S3Error& err, const std::string& fmt_string, Args&&... args) { std::ostringstream oss; const auto& message = fmt::format(fmt_string, std::forward(args)...); oss << "Error in " << func << "[errcode:" << int(err.GetResponseCode()) << ", exception:" << err.GetExceptionName() << ", errmessage:" << err.GetMessage() << ", params:" << message << "]"; return oss.str(); } template static SegcoreError ThrowS3Error(const std::string& func, const Aws::S3::S3Error& err, const std::string& fmt_string, Args&&... args) { std::string error_message = S3ErrorMessage(func, err, fmt_string, args...); LOG_WARN(error_message); throw SegcoreError(S3Error, error_message); } static bool IsNotFound(const Aws::S3::S3Errors& s3err) { return (s3err == Aws::S3::S3Errors::NO_SUCH_KEY || s3err == Aws::S3::S3Errors::RESOURCE_NOT_FOUND); } /** * @brief user defined aws logger, redirect aws log to segcore log */ class AwsLogger : public Aws::Utils::Logging::FormattedLogSystem { public: AwsLogger(Aws::Utils::Logging::LogLevel log_level) : Aws::Utils::Logging::FormattedLogSystem(log_level) { } virtual ~AwsLogger() { } virtual void Flush() override { } protected: virtual void ProcessFormattedStatement(Aws::String&& statement) override; }; /** * @brief This MinioChunkManager is responsible for read and write file in S3. */ class MinioChunkManager : public ChunkManager { public: MinioChunkManager() { } explicit MinioChunkManager(const StorageConfig& storage_config); MinioChunkManager(const MinioChunkManager&); MinioChunkManager& operator=(const MinioChunkManager&); public: virtual ~MinioChunkManager(); virtual bool Exist(const std::string& filepath); virtual uint64_t Size(const std::string& filepath); virtual uint64_t Read(const std::string& filepath, uint64_t offset, void* buf, uint64_t len) { ThrowInfo(NotImplemented, GetName() + "Read with offset not implement"); } virtual void Write(const std::string& filepath, uint64_t offset, void* buf, uint64_t len) { ThrowInfo(NotImplemented, GetName() + "Write with offset not implement"); } virtual uint64_t Read(const std::string& filepath, void* buf, uint64_t len); virtual void Write(const std::string& filepath, void* buf, uint64_t len); virtual std::vector ListWithPrefix(const std::string& filepath = ""); virtual void Remove(const std::string& filepath); virtual std::string GetName() const { return "MinioChunkManager"; } virtual std::string GetRootPath() const { return remote_root_path_; } virtual std::string GetBucketName() const { return default_bucket_name_; } inline void SetBucketName(const std::string& bucket_name) { default_bucket_name_ = bucket_name; } bool BucketExists(const std::string& bucket_name); bool CreateBucket(const std::string& bucket_name); bool DeleteBucket(const std::string& bucket_name); std::vector ListBuckets(); public: bool ObjectExists(const std::string& bucket_name, const std::string& object_name); uint64_t GetObjectSize(const std::string& bucket_name, const std::string& object_name); bool DeleteObject(const std::string& bucket_name, const std::string& object_name); bool PutObjectBuffer(const std::string& bucket_name, const std::string& object_name, void* buf, uint64_t size); uint64_t GetObjectBuffer(const std::string& bucket_name, const std::string& object_name, void* buf, uint64_t size); std::vector ListObjects(const std::string& bucket_name, const std::string& prefix = ""); void InitSDKAPIDefault(const std::string& log_level); void InitSDKAPI(RemoteStorageType type, bool useIAM, const std::string& log_level); // Precheck whether client is configure ready or not. void PreCheck(const StorageConfig& storage_config); void ShutdownSDKAPI(); void BuildS3Client(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); void BuildAliyunCloudClient(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); void BuildGoogleCloudClient(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); protected: void BuildAccessKeyClient(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); Aws::SDKOptions sdk_options_; static std::atomic init_count_; static std::mutex client_mutex_; std::shared_ptr client_; std::string default_bucket_name_; std::string remote_root_path_; }; class AwsChunkManager : public MinioChunkManager { public: explicit AwsChunkManager(const StorageConfig& storage_config); virtual std::string GetName() const { return "AwsChunkManager"; } }; class GcpChunkManager : public MinioChunkManager { public: explicit GcpChunkManager(const StorageConfig& storage_config); virtual std::string GetName() const { return "GcpChunkManager"; } }; class AliyunChunkManager : public MinioChunkManager { public: explicit AliyunChunkManager(const StorageConfig& storage_config); virtual std::string GetName() const { return "AliyunChunkManager"; } }; class TencentCloudChunkManager : public MinioChunkManager { public: explicit TencentCloudChunkManager(const StorageConfig& storage_config); virtual std::string GetName() const { return "TencentCloudChunkManager"; } }; using MinioChunkManagerPtr = std::unique_ptr; static const char* GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG = "GoogleHttpClientFactory"; class GoogleHttpClientFactory : public Aws::Http::HttpClientFactory { public: explicit GoogleHttpClientFactory( std::shared_ptr credentials) { credentials_ = credentials; } void SetCredentials(std::shared_ptr credentials) { credentials_ = credentials; } std::shared_ptr CreateHttpClient(const Aws::Client::ClientConfiguration& clientConfiguration) const override { return Aws::MakeShared( GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG, clientConfiguration); } std::shared_ptr CreateHttpRequest( const Aws::String& uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory& streamFactory) const override { return CreateHttpRequest(Aws::Http::URI(uri), method, streamFactory); } std::shared_ptr CreateHttpRequest( const Aws::Http::URI& uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory& streamFactory) const override { auto request = Aws::MakeShared( GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG, uri, method); request->SetResponseStreamFactory(streamFactory); auto auth_header = credentials_->AuthorizationHeader(); if (!auth_header.ok()) { ThrowInfo( S3Error, fmt::format("get authorization failed, errcode: {}", StatusCodeToString(auth_header.status().code()))); } request->SetHeaderValue(auth_header->first.c_str(), auth_header->second.c_str()); return request; } private: std::shared_ptr credentials_; }; } // namespace milvus::storage