From 4feb3fa7c67cfa420a58eee3c762946cdb21b467 Mon Sep 17 00:00:00 2001 From: PowderLi <135960789+PowderLi@users.noreply.github.com> Date: Tue, 19 Sep 2023 10:01:23 +0800 Subject: [PATCH] support azure (#26398) Signed-off-by: PowderLi --- .env | 1 + .github/workflows/code-checker.yaml | 4 +- .github/workflows/main.yaml | 10 +- docker-compose.yml | 8 + go.mod | 8 + go.sum | 21 +- internal/core/src/common/type_c.h | 1 + internal/core/src/indexbuilder/index_c.cpp | 2 + .../core/src/storage/AzureChunkManager.cpp | 155 +++ internal/core/src/storage/AzureChunkManager.h | 144 +++ internal/core/src/storage/CMakeLists.txt | 31 +- internal/core/src/storage/ChunkManager.h | 5 +- internal/core/src/storage/ChunkManagers.cpp | 163 +++ .../core/src/storage/MinioChunkManager.cpp | 43 + internal/core/src/storage/MinioChunkManager.h | 33 +- internal/core/src/storage/Types.h | 1 + internal/core/src/storage/Util.cpp | 46 +- .../AzureBlobChunkManager.cpp | 244 +++++ .../AzureBlobChunkManager.h | 78 ++ .../storage/azure-blob-storage/CMakeLists.txt | 29 + .../cmake-modules/AzureVcpkg.cmake | 169 +++ .../src/storage/azure-blob-storage/vcpkg.json | 8 + internal/core/src/storage/storage_c.cpp | 2 + internal/core/unittest/CMakeLists.txt | 10 + .../unittest/test_azure_chunk_manager.cpp | 288 ++++++ .../unittest/test_remote_chunk_manager.cpp | 277 +++++ internal/datacoord/garbage_collector.go | 15 +- internal/datacoord/garbage_collector_test.go | 44 +- internal/storage/azure_object_storage.go | 143 +++ internal/storage/azure_object_storage_test.go | 167 +++ internal/storage/factory.go | 4 +- internal/storage/minio_chunk_manager.go | 14 +- internal/storage/minio_object_storage.go | 149 +++ internal/storage/minio_object_storage_test.go | 171 +++ internal/storage/remote_chunk_manager.go | 458 +++++++++ internal/storage/remote_chunk_manager_test.go | 973 ++++++++++++++++++ .../util/indexcgowrapper/build_index_info.go | 3 + internal/util/initcore/init_core.go | 3 + pkg/util/paramtable/http_param.go | 2 +- scripts/azure_build.sh | 9 + scripts/core_build.sh | 36 +- scripts/install_deps.sh | 2 +- 42 files changed, 3915 insertions(+), 59 deletions(-) create mode 100644 internal/core/src/storage/AzureChunkManager.cpp create mode 100644 internal/core/src/storage/AzureChunkManager.h create mode 100644 internal/core/src/storage/ChunkManagers.cpp create mode 100644 internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.cpp create mode 100644 internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.h create mode 100644 internal/core/src/storage/azure-blob-storage/CMakeLists.txt create mode 100644 internal/core/src/storage/azure-blob-storage/cmake-modules/AzureVcpkg.cmake create mode 100644 internal/core/src/storage/azure-blob-storage/vcpkg.json create mode 100644 internal/core/unittest/test_azure_chunk_manager.cpp create mode 100644 internal/core/unittest/test_remote_chunk_manager.cpp create mode 100644 internal/storage/azure_object_storage.go create mode 100644 internal/storage/azure_object_storage_test.go create mode 100644 internal/storage/minio_object_storage.go create mode 100644 internal/storage/minio_object_storage_test.go create mode 100644 internal/storage/remote_chunk_manager.go create mode 100644 internal/storage/remote_chunk_manager_test.go create mode 100644 scripts/azure_build.sh diff --git a/.env b/.env index 1a853ce7b8..47d2634bc6 100644 --- a/.env +++ b/.env @@ -8,3 +8,4 @@ LATEST_GPU_DATE_VERSION=20230317-a1c7b0c MINIO_ADDRESS=minio:9000 PULSAR_ADDRESS=pulsar://pulsar:6650 ETCD_ENDPOINTS=etcd:2379 +AZURITE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite:10000/devstoreaccount1;" \ No newline at end of file diff --git a/.github/workflows/code-checker.yaml b/.github/workflows/code-checker.yaml index cc15910fd7..d4f4416745 100644 --- a/.github/workflows/code-checker.yaml +++ b/.github/workflows/code-checker.yaml @@ -53,7 +53,7 @@ jobs: uses: actions/cache@v3 with: path: .docker/amd64-ubuntu20.04-go-mod - key: ubuntu20.04-go-mod-${{ hashFiles('**/go.sum') }} + key: ubuntu20.04-go-mod-${{ hashFiles('go.sum, */go.sum') }} restore-keys: ubuntu20.04-go-mod- - name: Cache Conan Packages uses: pat-s/always-upload-cache@v3 @@ -98,7 +98,7 @@ jobs: uses: actions/cache@v3 with: path: .docker/amd64-amazonlinux2023-go-mod - key: amazonlinux2023-go-mod-${{ hashFiles('**/go.sum') }} + key: amazonlinux2023-go-mod-${{ hashFiles('go.sum, */go.sum') }} restore-keys: amazonlinux2023-go-mod- - name: Cache Conan Packages uses: pat-s/always-upload-cache@v3 diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 79509edd62..78b7a80d79 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -123,6 +123,12 @@ jobs: path: .docker/amd64-ubuntu${{ matrix.ubuntu }}-conan key: ubuntu${{ matrix.ubuntu }}-conan-${{ hashFiles('internal/core/conanfile.*') }} restore-keys: ubuntu${{ matrix.ubuntu }}-conan- + - name: Start Service + shell: bash + run: | + docker-compose up -d azurite +# - name: 'Setup upterm session' +# uses: lhotari/action-upterm@v1 - name: UT run: | chmod +x build/builder.sh @@ -166,7 +172,9 @@ jobs: - name: Start Service shell: bash run: | - docker-compose up -d pulsar etcd minio + docker-compose up -d pulsar etcd minio azurite +# - name: 'Setup upterm session' +# uses: lhotari/action-upterm@v1 - name: UT run: | chmod +x build/builder.sh diff --git a/docker-compose.yml b/docker-compose.yml index 6166fe8c7b..c8b0b8f7a1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,6 +28,7 @@ services: ETCD_ENDPOINTS: ${ETCD_ENDPOINTS} MINIO_ADDRESS: ${MINIO_ADDRESS} CONAN_USER_HOME: /home/milvus + AZURE_STORAGE_CONNECTION_STRING: ${AZURITE_CONNECTION_STRING} volumes: &builder-volumes - .:/go/src/github.com/milvus-io/milvus:delegated - ${DOCKER_VOLUME_DIRECTORY:-.docker}/${IMAGE_ARCH}-${OS_NAME}-ccache:/ccache:delegated @@ -39,6 +40,7 @@ services: - etcd - minio - pulsar + - azurite # Command command: &builder-command > /bin/bash -c " @@ -64,6 +66,7 @@ services: ETCD_ENDPOINTS: ${ETCD_ENDPOINTS} MINIO_ADDRESS: ${MINIO_ADDRESS} CONAN_USER_HOME: /home/milvus + AZURE_STORAGE_CONNECTION_STRING: ${AZURITE_CONNECTION_STRING} volumes: &builder-volumes-gpu - .:/go/src/github.com/milvus-io/milvus:delegated - ${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/${OS_NAME}-ccache:/ccache:delegated @@ -75,6 +78,7 @@ services: - etcd - minio - pulsar + - azurite # Command command: &builder-command-gpu > /bin/bash -c " @@ -110,6 +114,10 @@ services: timeout: 20s retries: 3 + azurite: + image: mcr.microsoft.com/azure-storage/azurite + command: azurite-blob --blobHost 0.0.0.0 + jaeger: image: jaegertracing/all-in-one:latest diff --git a/go.mod b/go.mod index d96ef11e09..34628e8d32 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,10 @@ module github.com/milvus-io/milvus go 1.18 require ( + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 + github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0 github.com/aliyun/credentials-go v1.2.7 github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20210826220005-b48c857c3a0e github.com/antonmedv/expr v1.8.9 @@ -62,6 +66,7 @@ require ( github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.1 // indirect github.com/AthenZ/athenz v1.10.39 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 // indirect github.com/DataDog/zstd v1.5.0 // indirect github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible // indirect @@ -110,6 +115,7 @@ require ( github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect github.com/godbus/dbus/v5 v5.0.4 // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect + github.com/golang-jwt/jwt/v4 v4.5.0 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v2.0.5+incompatible // indirect github.com/google/uuid v1.3.0 // indirect @@ -126,6 +132,7 @@ require ( github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/kr/pretty v0.3.0 // indirect github.com/kr/text v0.2.0 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/leodido/go-urn v1.2.4 // indirect github.com/linkedin/goavro/v2 v2.11.1 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect @@ -159,6 +166,7 @@ require ( github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a // indirect github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect + github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect diff --git a/go.sum b/go.sum index 81016e82fc..61993185c5 100644 --- a/go.sum +++ b/go.sum @@ -50,6 +50,17 @@ github.com/99designs/keyring v1.2.1/go.mod h1:fc+wB5KTk9wQ9sDx0kFXB3A0MaeGHM9AwR github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= github.com/AthenZ/athenz v1.10.39 h1:mtwHTF/v62ewY2Z5KWhuZgVXftBej1/Tn80zx4DcawY= github.com/AthenZ/athenz v1.10.39/go.mod h1:3Tg8HLsiQZp81BJY58JBeU2BR6B/H4/0MQGfCwhHNEA= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 h1:8q4SaHjFsClSvuVne0ID/5Ka8u3fcIHyqkLjcFpNRHQ= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0/go.mod h1:bjGvMhVMb+EEm3VRNQawDMUyMMjo+S5ewNjflkep/0Q= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0 h1:vcYCAze6p19qBW7MhZybIsqD8sMV8js0NyQM8JDnVtg= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.0/go.mod h1:OQeznEEkTZ9OrhHJoDD8ZDq51FHgXjqtP9z6bEwBq9U= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInmMgOsuGwdjjVkEIde0OtY= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aovMlrjvvXoPMBVSPzk9185BT0+eZM= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.2.0 h1:Ma67P/GGprNwsslzEH6+Kb8nybI8jpDTm4Wmzu2ReK8= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0 h1:nVocQV40OQne5613EeLayJiRAJuKlBGy+m22qWG+WRg= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.1.0/go.mod h1:7QJP7dr2wznCMeqIrhMgWGf7XpAQnVrJqDm9nvV3Cu4= +github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0 h1:OBhqkivkhkMqLPymWEppkm7vgPQY2XsHoEkaMQ0AdZY= +github.com/AzureAD/microsoft-authentication-library-for-go v1.0.0/go.mod h1:kgDmCTgBzIEPFElEF+FK0SdjAor06dRq2Go927dnQ6o= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v1.2.1 h1:9F2/+DoOYIOksmaJFPw1tGFy1eDnIJXg+UHjuD8lTak= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= @@ -111,8 +122,6 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/bigsheeper/milvus-proto/go-api/v2 v2.0.0-20230906082705-4e84b4cf314b h1:OPGVqhJrJMOAUJeEuboKGTIsrllhJb2+ZgQMDBEdbS0= -github.com/bigsheeper/milvus-proto/go-api/v2 v2.0.0-20230906082705-4e84b4cf314b/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/bits-and-blooms/bloom/v3 v3.0.1 h1:Inlf0YXbgehxVjMPmCGv86iMCKMGPPrPSHtBF5yRHwA= @@ -203,6 +212,7 @@ github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUn github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/dimfeld/httptreemux v5.0.1+incompatible h1:Qj3gVcDNoOthBAqftuD596rm4wg/adLLz5xh5CmpiCA= github.com/dimfeld/httptreemux v5.0.1+incompatible/go.mod h1:rbUlSV+CCpv/SuqUTP/8Bk2O3LyUV436/yaRGkhP6Z0= +github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= @@ -320,6 +330,8 @@ github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg= +github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ= @@ -532,6 +544,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kris-nova/logger v0.0.0-20181127235838-fd0d87064b06 h1:vN4d3jSss3ExzUn2cE0WctxztfOgiKvMKnDrydBsg00= github.com/kris-nova/lolgopher v0.0.0-20180921204813-313b3abb0d9b h1:xYEM2oBUhBEhQjrV+KJ9lEWDWYZoNVZUaBF++Wyljq4= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/labstack/echo/v4 v4.5.0/go.mod h1:czIriw4a0C1dFun+ObrXp7ok03xON0N1awStJ6ArI7Y= github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= @@ -681,6 +695,8 @@ github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a h1:LzIZsQpXQlj8yF7 github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY= github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= +github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU= +github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8/go.mod h1:HKlIX3XHQyzLZPlr7++PzdhaXEj94dEiJgZDTsxEqUI= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -1192,6 +1208,7 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210819135213-f52c844e1c1c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/internal/core/src/common/type_c.h b/internal/core/src/common/type_c.h index fdc2a1fcd9..085b6e8a36 100644 --- a/internal/core/src/common/type_c.h +++ b/internal/core/src/common/type_c.h @@ -78,6 +78,7 @@ typedef struct CStorageConfig { const char* access_key_value; const char* root_path; const char* storage_type; + const char* cloud_provider; const char* iam_endpoint; const char* log_level; const char* region; diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 33ded49fe6..107909f071 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -294,6 +294,8 @@ NewBuildIndexInfo(CBuildIndexInfo* c_build_index_info, storage_config.root_path = std::string(c_storage_config.root_path); storage_config.storage_type = std::string(c_storage_config.storage_type); + storage_config.cloud_provider = + std::string(c_storage_config.cloud_provider); storage_config.iam_endpoint = std::string(c_storage_config.iam_endpoint); storage_config.useSSL = c_storage_config.useSSL; diff --git a/internal/core/src/storage/AzureChunkManager.cpp b/internal/core/src/storage/AzureChunkManager.cpp new file mode 100644 index 0000000000..dcb33bd77a --- /dev/null +++ b/internal/core/src/storage/AzureChunkManager.cpp @@ -0,0 +1,155 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "common/EasyAssert.h" +#include "storage/AzureChunkManager.h" + +namespace milvus { +namespace storage { + +AzureChunkManager::AzureChunkManager(const StorageConfig& storage_config) + : default_bucket_name_(storage_config.bucket_name), + path_prefix_(storage_config.root_path) { + client_ = std::make_shared( + storage_config.access_key_id, + storage_config.access_key_value, + storage_config.address, + storage_config.useIAM); +} + +AzureChunkManager::~AzureChunkManager() { +} + +uint64_t +AzureChunkManager::Size(const std::string& filepath) { + return GetObjectSize(default_bucket_name_, filepath); +} + +bool +AzureChunkManager::Exist(const std::string& filepath) { + return ObjectExists(default_bucket_name_, filepath); +} + +void +AzureChunkManager::Remove(const std::string& filepath) { + DeleteObject(default_bucket_name_, filepath); +} + +std::vector +AzureChunkManager::ListWithPrefix(const std::string& filepath) { + return ListObjects(default_bucket_name_.c_str(), filepath.c_str()); +} + +uint64_t +AzureChunkManager::Read(const std::string& filepath, void* buf, uint64_t size) { + if (!ObjectExists(default_bucket_name_, filepath)) { + std::stringstream err_msg; + err_msg << "object('" << default_bucket_name_ << "', " << filepath + << "') not exists"; + throw SegcoreError(ObjectNotExist, err_msg.str()); + } + return GetObjectBuffer(default_bucket_name_, filepath, buf, size); +} + +void +AzureChunkManager::Write(const std::string& filepath, + void* buf, + uint64_t size) { + PutObjectBuffer(default_bucket_name_, filepath, buf, size); +} + +bool +AzureChunkManager::BucketExists(const std::string& bucket_name) { + return client_->BucketExists(bucket_name); +} + +std::vector +AzureChunkManager::ListBuckets() { + return client_->ListBuckets(); +} + +bool +AzureChunkManager::CreateBucket(const std::string& bucket_name) { + try { + client_->CreateBucket(bucket_name); + } catch (std::exception& e) { + throw SegcoreError(BucketInvalid, e.what()); + } + return true; +} + +bool +AzureChunkManager::DeleteBucket(const std::string& bucket_name) { + try { + client_->DeleteBucket(bucket_name); + } catch (std::exception& e) { + throw SegcoreError(BucketInvalid, e.what()); + } + return true; +} + +bool +AzureChunkManager::ObjectExists(const std::string& bucket_name, + const std::string& object_name) { + return client_->ObjectExists(bucket_name, object_name); +} + +int64_t +AzureChunkManager::GetObjectSize(const std::string& bucket_name, + const std::string& object_name) { + try { + return client_->GetObjectSize(bucket_name, object_name); + } catch (std::exception& e) { + throw SegcoreError(ObjectNotExist, e.what()); + } +} + +bool +AzureChunkManager::DeleteObject(const std::string& bucket_name, + const std::string& object_name) { + try { + client_->DeleteObject(bucket_name, object_name); + } catch (std::exception& e) { + throw SegcoreError(ObjectNotExist, e.what()); + } + return true; +} + +bool +AzureChunkManager::PutObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size) { + return client_->PutObjectBuffer(bucket_name, object_name, buf, size); +} + +uint64_t +AzureChunkManager::GetObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size) { + return client_->GetObjectBuffer(bucket_name, object_name, buf, size); +} + +std::vector +AzureChunkManager::ListObjects(const char* bucket_name, const char* prefix) { + return client_->ListObjects(bucket_name, prefix); +} + +} // namespace storage +} // namespace milvus diff --git a/internal/core/src/storage/AzureChunkManager.h b/internal/core/src/storage/AzureChunkManager.h new file mode 100644 index 0000000000..dc4c6ab5e4 --- /dev/null +++ b/internal/core/src/storage/AzureChunkManager.h @@ -0,0 +1,144 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// export CPLUS_INCLUDE_PATH=/opt/homebrew/Cellar/boost/1.81.0_1/include/ + +#pragma once + +#include +#include +#include +#include +#include "storage/azure-blob-storage/AzureBlobChunkManager.h" +#include "storage/ChunkManager.h" +#include "storage/Types.h" + +namespace milvus { +namespace storage { + +/** + * @brief This AzureChunkManager is responsible for read and write file in blob. + */ +class AzureChunkManager : public ChunkManager { + public: + explicit AzureChunkManager(const StorageConfig& storage_config); + + AzureChunkManager(const AzureChunkManager&); + AzureChunkManager& + operator=(const AzureChunkManager&); + + public: + virtual ~AzureChunkManager(); + + virtual bool + Exist(const std::string& filepath); + + virtual uint64_t + Size(const std::string& filepath); + + virtual uint64_t + Read(const std::string& filepath, + uint64_t offset, + void* buf, + uint64_t len) { + throw SegcoreError(NotImplemented, + GetName() + "Read with offset not implement"); + } + + virtual void + Write(const std::string& filepath, + uint64_t offset, + void* buf, + uint64_t len) { + throw SegcoreError(NotImplemented, + GetName() + "Write with offset not implement"); + } + + virtual uint64_t + Read(const std::string& filepath, void* buf, uint64_t len); + + virtual void + Write(const std::string& filepath, void* buf, uint64_t len); + + virtual std::vector + ListWithPrefix(const std::string& filepath); + + virtual void + Remove(const std::string& filepath); + + virtual std::string + GetName() const { + return "AzureChunkManager"; + } + + virtual std::string + GetRootPath() const { + return path_prefix_; + } + + inline std::string + GetBucketName() { + return default_bucket_name_; + } + + inline void + SetBucketName(const std::string& bucket_name) { + default_bucket_name_ = bucket_name; + } + + bool + BucketExists(const std::string& bucket_name); + + bool + CreateBucket(const std::string& bucket_name); + + bool + DeleteBucket(const std::string& bucket_name); + + std::vector + ListBuckets(); + + public: + bool + ObjectExists(const std::string& bucket_name, + const std::string& object_name); + int64_t + GetObjectSize(const std::string& bucket_name, + const std::string& object_name); + bool + DeleteObject(const std::string& bucket_name, + const std::string& object_name); + bool + PutObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size); + uint64_t + GetObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size); + std::vector + ListObjects(const char* bucket_name, const char* prefix = nullptr); + + private: + std::shared_ptr client_; + std::string default_bucket_name_; + std::string path_prefix_; +}; + +using AzureChunkManagerPtr = std::unique_ptr; + +} // namespace storage +} // namespace milvus diff --git a/internal/core/src/storage/CMakeLists.txt b/internal/core/src/storage/CMakeLists.txt index 3fa5f9708b..2e698161b9 100644 --- a/internal/core/src/storage/CMakeLists.txt +++ b/internal/core/src/storage/CMakeLists.txt @@ -22,7 +22,18 @@ endif() milvus_add_pkg_config("milvus_storage") +if (DEFINED AZURE_BUILD_DIR) + add_definitions(-DAZURE_BUILD_DIR) + include_directories(azure-blob-storage) + include_directories("${AZURE_BUILD_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include") + set(STORAGE_FILES + ${STORAGE_FILES} + AzureChunkManager.cpp + ) +endif() + set(STORAGE_FILES + ${STORAGE_FILES} parquet_c.cpp PayloadStream.cpp DataCodec.cpp @@ -37,6 +48,7 @@ set(STORAGE_FILES ThreadPool.cpp storage_c.cpp MinioChunkManager.cpp + ChunkManagers.cpp AliyunSTSClient.cpp AliyunCredentialsProvider.cpp MemFileManagerImpl.cpp @@ -47,10 +59,19 @@ set(STORAGE_FILES add_library(milvus_storage SHARED ${STORAGE_FILES}) -target_link_libraries(milvus_storage PUBLIC - milvus_common - pthread - ${CONAN_LIBS} - ) +if (DEFINED AZURE_BUILD_DIR) + target_link_libraries(milvus_storage PUBLIC + "-L${AZURE_BUILD_DIR} -lblob-chunk-manager" + milvus_common + pthread + ${CONAN_LIBS} + ) +else () + target_link_libraries(milvus_storage PUBLIC + milvus_common + pthread + ${CONAN_LIBS} + ) +endif() install(TARGETS milvus_storage DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/internal/core/src/storage/ChunkManager.h b/internal/core/src/storage/ChunkManager.h index c80fef4c68..bec5addd2a 100644 --- a/internal/core/src/storage/ChunkManager.h +++ b/internal/core/src/storage/ChunkManager.h @@ -124,10 +124,11 @@ class ChunkManager { using ChunkManagerPtr = std::shared_ptr; -enum ChunkManagerType : int8_t { - None_CM = 0, +enum class ChunkManagerType : int8_t { + None = 0, Local = 1, Minio = 2, + Remote = 3, }; extern std::map ChunkManagerType_Map; diff --git a/internal/core/src/storage/ChunkManagers.cpp b/internal/core/src/storage/ChunkManagers.cpp new file mode 100644 index 0000000000..b6f844f1a3 --- /dev/null +++ b/internal/core/src/storage/ChunkManagers.cpp @@ -0,0 +1,163 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "storage/MinioChunkManager.h" +#include "storage/AliyunSTSClient.h" +#include "storage/AliyunCredentialsProvider.h" +#include "common/EasyAssert.h" +#include "log/Log.h" +#include "signal.h" + +namespace milvus::storage { + +Aws::String +ConvertToAwsString(const std::string& str) { + return Aws::String(str.c_str(), str.size()); +} + +Aws::Client::ClientConfiguration +generateConfig(const StorageConfig& storage_config) { + // The ClientConfiguration default constructor will take a long time. + // For more details, please refer to https://github.com/aws/aws-sdk-cpp/issues/1440 + static Aws::Client::ClientConfiguration g_config; + Aws::Client::ClientConfiguration config = g_config; + config.endpointOverride = ConvertToAwsString(storage_config.address); + + if (storage_config.useSSL) { + config.scheme = Aws::Http::Scheme::HTTPS; + config.verifySSL = true; + } else { + config.scheme = Aws::Http::Scheme::HTTP; + config.verifySSL = false; + } + + if (!storage_config.region.empty()) { + config.region = ConvertToAwsString(storage_config.region); + } + return config; +} + +AwsChunkManager::AwsChunkManager(const StorageConfig& storage_config) { + default_bucket_name_ = storage_config.bucket_name; + + InitSDKAPIDefault(storage_config.log_level); + + Aws::Client::ClientConfiguration config = generateConfig(storage_config); + if (storage_config.useIAM) { + auto provider = + std::make_shared(); + auto aws_credentials = provider->GetAWSCredentials(); + AssertInfo(!aws_credentials.GetAWSAccessKeyId().empty(), + "if use iam, access key id should not be empty"); + AssertInfo(!aws_credentials.GetAWSSecretKey().empty(), + "if use iam, secret key should not be empty"); + AssertInfo(!aws_credentials.GetSessionToken().empty(), + "if use iam, token should not be empty"); + + client_ = std::make_shared( + provider, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + storage_config.useVirtualHost); + } else { + BuildAccessKeyClient(storage_config, config); + } + + LOG_SEGCORE_INFO_ << "init AwsChunkManager with parameter[endpoint: '" + << storage_config.address << "', default_bucket_name:'" + << storage_config.bucket_name << "', use_secure:'" + << std::boolalpha << storage_config.useSSL << "']"; +} + +GcpChunkManager::GcpChunkManager(const StorageConfig& storage_config) { + default_bucket_name_ = storage_config.bucket_name; + + if (storage_config.useIAM) { + sdk_options_.httpOptions.httpClientFactory_create_fn = []() { + auto credentials = std::make_shared< + google::cloud::oauth2_internal::GOOGLE_CLOUD_CPP_NS:: + ComputeEngineCredentials>(); + return Aws::MakeShared( + GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG, credentials); + }; + } + + InitSDKAPIDefault(storage_config.log_level); + + Aws::Client::ClientConfiguration config = generateConfig(storage_config); + if (storage_config.useIAM) { + // Using S3 client instead of google client because of compatible protocol + client_ = std::make_shared( + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + storage_config.useVirtualHost); + } else { + BuildAccessKeyClient(storage_config, config); + } + + LOG_SEGCORE_INFO_ << "init GcpChunkManager with parameter[endpoint: '" + << storage_config.address << "', default_bucket_name:'" + << storage_config.bucket_name << "', use_secure:'" + << std::boolalpha << storage_config.useSSL << "']"; +} + +AliyunChunkManager::AliyunChunkManager(const StorageConfig& storage_config) { + default_bucket_name_ = storage_config.bucket_name; + + InitSDKAPIDefault(storage_config.log_level); + + Aws::Client::ClientConfiguration config = generateConfig(storage_config); + if (storage_config.useIAM) { + auto aliyun_provider = Aws::MakeShared< + Aws::Auth::AliyunSTSAssumeRoleWebIdentityCredentialsProvider>( + "AliyunSTSAssumeRoleWebIdentityCredentialsProvider"); + auto aliyun_credentials = aliyun_provider->GetAWSCredentials(); + AssertInfo(!aliyun_credentials.GetAWSAccessKeyId().empty(), + "if use iam, access key id should not be empty"); + AssertInfo(!aliyun_credentials.GetAWSSecretKey().empty(), + "if use iam, secret key should not be empty"); + AssertInfo(!aliyun_credentials.GetSessionToken().empty(), + "if use iam, token should not be empty"); + client_ = std::make_shared( + aliyun_provider, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + storage_config.useVirtualHost); + } else { + BuildAccessKeyClient(storage_config, config); + } + + LOG_SEGCORE_INFO_ << "init AliyunChunkManager with parameter[endpoint: '" + << storage_config.address << "', default_bucket_name:'" + << storage_config.bucket_name << "', use_secure:'" + << std::boolalpha << storage_config.useSSL << "']"; +} + +} // namespace milvus::storage diff --git a/internal/core/src/storage/MinioChunkManager.cpp b/internal/core/src/storage/MinioChunkManager.cpp index e3a683d411..2be1cbd80e 100644 --- a/internal/core/src/storage/MinioChunkManager.cpp +++ b/internal/core/src/storage/MinioChunkManager.cpp @@ -150,6 +150,49 @@ MinioChunkManager::InitSDKAPI(RemoteStorageType type, } } +void +MinioChunkManager::InitSDKAPIDefault(const std::string& log_level_str) { + std::scoped_lock lock{client_mutex_}; + const size_t initCount = init_count_++; + if (initCount == 0) { + // sdk_options_.httpOptions.installSigPipeHandler = true; + struct sigaction psa; + memset(&psa, 0, sizeof psa); + psa.sa_handler = SwallowHandler; + psa.sa_flags = psa.sa_flags | SA_ONSTACK; + sigaction(SIGPIPE, &psa, 0); + // block multiple SIGPIPE concurrently processing + sigemptyset(&psa.sa_mask); + sigaddset(&psa.sa_mask, SIGPIPE); + sigaction(SIGPIPE, &psa, 0); + LOG_SEGCORE_INFO_ << "init aws with log level:" << log_level_str; + auto get_aws_log_level = [](const std::string& level_str) { + Aws::Utils::Logging::LogLevel level = + Aws::Utils::Logging::LogLevel::Off; + if (level_str == "fatal") { + level = Aws::Utils::Logging::LogLevel::Fatal; + } else if (level_str == "error") { + level = Aws::Utils::Logging::LogLevel::Error; + } else if (level_str == "warn") { + level = Aws::Utils::Logging::LogLevel::Warn; + } else if (level_str == "info") { + level = Aws::Utils::Logging::LogLevel::Info; + } else if (level_str == "debug") { + level = Aws::Utils::Logging::LogLevel::Debug; + } else if (level_str == "trace") { + level = Aws::Utils::Logging::LogLevel::Trace; + } + return level; + }; + auto log_level = get_aws_log_level(log_level_str); + sdk_options_.loggingOptions.logLevel = log_level; + sdk_options_.loggingOptions.logger_create_fn = [log_level]() { + return std::make_shared(log_level); + }; + Aws::InitAPI(sdk_options_); + } +} + void MinioChunkManager::ShutdownSDKAPI() { std::scoped_lock lock{client_mutex_}; diff --git a/internal/core/src/storage/MinioChunkManager.h b/internal/core/src/storage/MinioChunkManager.h index 1cfc4b896e..275c739cd5 100644 --- a/internal/core/src/storage/MinioChunkManager.h +++ b/internal/core/src/storage/MinioChunkManager.h @@ -69,6 +69,8 @@ class AwsLogger : public Aws::Utils::Logging::FormattedLogSystem { */ class MinioChunkManager : public ChunkManager { public: + MinioChunkManager() { + } explicit MinioChunkManager(const StorageConfig& storage_config); MinioChunkManager(const MinioChunkManager&); @@ -169,6 +171,8 @@ class MinioChunkManager : public ChunkManager { std::vector ListObjects(const char* bucket_name, const char* prefix = nullptr); + void + InitSDKAPIDefault(const std::string& log_level); void InitSDKAPI(RemoteStorageType type, bool useIAM, @@ -185,7 +189,7 @@ class MinioChunkManager : public ChunkManager { BuildGoogleCloudClient(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); - private: + protected: void BuildAccessKeyClient(const StorageConfig& storage_config, const Aws::Client::ClientConfiguration& config); @@ -198,6 +202,33 @@ class MinioChunkManager : public ChunkManager { std::string remote_root_path_; }; +class AwsChunkManager : public MinioChunkManager { + public: + explicit AwsChunkManager(const StorageConfig& storage_config); + virtual std::string + GetName() const { + return "AwsChunkManager"; + } +}; + +class GcpChunkManager : public MinioChunkManager { + public: + explicit GcpChunkManager(const StorageConfig& storage_config); + virtual std::string + GetName() const { + return "GcpChunkManager"; + } +}; + +class AliyunChunkManager : public MinioChunkManager { + public: + explicit AliyunChunkManager(const StorageConfig& storage_config); + virtual std::string + GetName() const { + return "AliyunChunkManager"; + } +}; + using MinioChunkManagerPtr = std::unique_ptr; static const char* GOOGLE_CLIENT_FACTORY_ALLOCATION_TAG = diff --git a/internal/core/src/storage/Types.h b/internal/core/src/storage/Types.h index e79c8f34ec..0f0ac1aefe 100644 --- a/internal/core/src/storage/Types.h +++ b/internal/core/src/storage/Types.h @@ -88,6 +88,7 @@ struct StorageConfig { std::string access_key_value = "minioadmin"; std::string root_path = "files"; std::string storage_type = "minio"; + std::string cloud_provider = "aws"; std::string iam_endpoint = ""; std::string log_level = "error"; std::string region = ""; diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index a19dde62f1..68f33cf191 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -1,3 +1,4 @@ + // Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -21,6 +22,9 @@ #include "common/EasyAssert.h" #include "common/Consts.h" #include "fmt/format.h" +#ifdef AZURE_BUILD_DIR +#include "storage/AzureChunkManager.h" +#endif #include "storage/FieldData.h" #include "storage/InsertData.h" #include "storage/FieldDataInterface.h" @@ -33,7 +37,23 @@ namespace milvus::storage { std::map ChunkManagerType_Map = { - {"local", ChunkManagerType::Local}, {"minio", ChunkManagerType::Minio}}; + {"local", ChunkManagerType::Local}, + {"minio", ChunkManagerType::Minio}, + {"remote", ChunkManagerType::Remote}}; + +enum class CloudProviderType : int8_t { + UNKNOWN = 0, + AWS = 1, + GCP = 2, + ALIYUN = 3, + AZURE = 4, +}; + +std::map CloudProviderType_Map = { + {"aws", CloudProviderType::AWS}, + {"gcp", CloudProviderType::GCP}, + {"aliyun", CloudProviderType::ALIYUN}, + {"azure", CloudProviderType::AZURE}}; StorageType ReadMediumType(BinlogReaderPtr reader) { @@ -561,6 +581,30 @@ CreateChunkManager(const StorageConfig& storage_config) { case ChunkManagerType::Minio: { return std::make_shared(storage_config); } + case ChunkManagerType::Remote: { + auto cloud_provider_type = + CloudProviderType_Map[storage_config.cloud_provider]; + switch (cloud_provider_type) { + case CloudProviderType::AWS: { + return std::make_shared(storage_config); + } + case CloudProviderType::GCP: { + return std::make_shared(storage_config); + } + case CloudProviderType::ALIYUN: { + return std::make_shared(storage_config); + } +#ifdef AZURE_BUILD_DIR + case CloudProviderType::AZURE: { + return std::make_shared(storage_config); + } +#endif + default: { + return std::make_shared(storage_config); + } + } + } + default: { PanicCodeInfo( ConfigInvalid, diff --git a/internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.cpp b/internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.cpp new file mode 100644 index 0000000000..f29120de04 --- /dev/null +++ b/internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.cpp @@ -0,0 +1,244 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "AzureBlobChunkManager.h" + +namespace azure { + +std::string +GetTenantId() { + return std::getenv("AZURE_TENANT_ID"); +} +std::string +GetClientId() { + return std::getenv("AZURE_CLIENT_ID"); +} +std::string +GetTokenFilePath() { + return std::getenv("AZURE_FEDERATED_TOKEN_FILE"); +} +std::string +GetConnectionString(const std::string& access_key_id, + const std::string& access_key_value, + const std::string& address) { + char const* tmp = getenv("AZURE_STORAGE_CONNECTION_STRING"); + if (tmp != NULL) { + std::string envConnectionString(tmp); + if (!envConnectionString.empty()) { + return envConnectionString; + } + } + return "DefaultEndpointsProtocol=https;AccountName=" + access_key_id + + ";AccountKey=" + access_key_value + ";EndpointSuffix=" + address; +} + +AzureBlobChunkManager::AzureBlobChunkManager( + const std::string& access_key_id, + const std::string& access_key_value, + const std::string& address, + bool useIAM) { + if (useIAM) { + auto workloadIdentityCredential = + std::make_shared( + GetTenantId(), GetClientId(), GetTokenFilePath()); + client_ = std::make_shared( + "https://" + access_key_id + ".blob." + address + "/", + workloadIdentityCredential); + } else { + client_ = std::make_shared( + Azure::Storage::Blobs::BlobServiceClient:: + CreateFromConnectionString(GetConnectionString( + access_key_id, access_key_value, address))); + } +} + +AzureBlobChunkManager::~AzureBlobChunkManager() { +} + +bool +AzureBlobChunkManager::BucketExists(const std::string& bucket_name) { + std::vector buckets; + for (auto containerPage = client_->ListBlobContainers(); + containerPage.HasPage(); + containerPage.MoveToNextPage()) { + for (auto& container : containerPage.BlobContainers) { + if (container.Name == bucket_name) { + return true; + } + } + } + return false; +} + +std::vector +AzureBlobChunkManager::ListBuckets() { + std::vector buckets; + for (auto containerPage = client_->ListBlobContainers(); + containerPage.HasPage(); + containerPage.MoveToNextPage()) { + for (auto& container : containerPage.BlobContainers) { + buckets.emplace_back(container.Name); + } + } + return buckets; +} + +void +AzureBlobChunkManager::CreateBucket(const std::string& bucket_name) { + client_->GetBlobContainerClient(bucket_name).Create(); +} + +void +AzureBlobChunkManager::DeleteBucket(const std::string& bucket_name) { + client_->GetBlobContainerClient(bucket_name).Delete(); +} + +bool +AzureBlobChunkManager::ObjectExists(const std::string& bucket_name, + const std::string& object_name) { + for (auto blobPage = + client_->GetBlobContainerClient(bucket_name).ListBlobs(); + blobPage.HasPage(); + blobPage.MoveToNextPage()) { + for (auto& blob : blobPage.Blobs) { + if (blob.Name == object_name) { + return true; + } + } + } + return false; +} + +int64_t +AzureBlobChunkManager::GetObjectSize(const std::string& bucket_name, + const std::string& object_name) { + for (auto blobPage = + client_->GetBlobContainerClient(bucket_name).ListBlobs(); + blobPage.HasPage(); + blobPage.MoveToNextPage()) { + for (auto& blob : blobPage.Blobs) { + if (blob.Name == object_name) { + return blob.BlobSize; + } + } + } + std::stringstream err_msg; + err_msg << "object('" << bucket_name << "', " << object_name + << "') not exists"; + throw std::runtime_error(err_msg.str()); +} + +void +AzureBlobChunkManager::DeleteObject(const std::string& bucket_name, + const std::string& object_name) { + client_->GetBlobContainerClient(bucket_name) + .GetBlockBlobClient(object_name) + .Delete(); +} + +bool +AzureBlobChunkManager::PutObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size) { + std::vector str(static_cast(buf), + static_cast(buf) + size); + client_->GetBlobContainerClient(bucket_name) + .GetBlockBlobClient(object_name) + .UploadFrom(str.data(), str.size()); + return true; +} + +uint64_t +AzureBlobChunkManager::GetObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size) { + Azure::Storage::Blobs::DownloadBlobOptions downloadOptions; + downloadOptions.Range = Azure::Core::Http::HttpRange(); + downloadOptions.Range.Value().Offset = 0; + downloadOptions.Range.Value().Length = size; + auto downloadResponse = client_->GetBlobContainerClient(bucket_name) + .GetBlockBlobClient(object_name) + .Download(downloadOptions); + std::vector str = + downloadResponse.Value.BodyStream->ReadToEnd(); + memcpy(static_cast(buf), &str[0], str.size() * sizeof(str[0])); + return str.size(); +} + +std::vector +AzureBlobChunkManager::ListObjects(const char* bucket_name, + const char* prefix) { + std::vector objects_vec; + for (auto blobPage = + client_->GetBlobContainerClient(bucket_name).ListBlobs(); + blobPage.HasPage(); + blobPage.MoveToNextPage()) { + for (auto& blob : blobPage.Blobs) { + if (blob.Name.rfind(prefix, 0) == 0) { + objects_vec.emplace_back(blob.Name); + } + } + } + return objects_vec; +} + +} // namespace azure + +int +main() { + const char* containerName = "default"; + const char* blobName = "sample-blob"; + using namespace azure; + AzureBlobChunkManager chunkManager = AzureBlobChunkManager("", "", ""); + std::vector buckets = chunkManager.ListBuckets(); + for (const auto& bucket : buckets) { + std::cout << bucket << std::endl; + } + std::vector objects = + chunkManager.ListObjects(containerName, blobName); + for (const auto& object : objects) { + std::cout << object << std::endl; + } + std::cout << chunkManager.GetObjectSize(containerName, blobName) + << std::endl; + std::cout << chunkManager.ObjectExists(containerName, blobName) + << std::endl; + std::cout << chunkManager.ObjectExists(containerName, "blobName") + << std::endl; + std::cout << chunkManager.BucketExists(containerName) << std::endl; + char buffer[1024 * 1024]; + chunkManager.GetObjectBuffer(containerName, blobName, buffer, 1024 * 1024); + std::cout << buffer << std::endl; + + char msg[12]; + memcpy(msg, "Azure hello!", 12); + if (!chunkManager.ObjectExists(containerName, "blobName")) { + chunkManager.PutObjectBuffer(containerName, "blobName", msg, 12); + } + char buffer0[1024 * 1024]; + chunkManager.GetObjectBuffer( + containerName, "blobName", buffer0, 1024 * 1024); + std::cout << buffer0 << std::endl; + chunkManager.DeleteObject(containerName, "blobName"); + chunkManager.CreateBucket("sample-container1"); + chunkManager.DeleteBucket("sample-container1"); + exit(EXIT_SUCCESS); +} \ No newline at end of file diff --git a/internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.h b/internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.h new file mode 100644 index 0000000000..3ff19ad9f0 --- /dev/null +++ b/internal/core/src/storage/azure-blob-storage/AzureBlobChunkManager.h @@ -0,0 +1,78 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// export CPLUS_INCLUDE_PATH=/opt/homebrew/Cellar/boost/1.81.0_1/include/ + +#pragma once + +#include +#include +#include +#include +#include +#include "azure/storage/common/storage_exception.hpp" + +namespace azure { +/** + * @brief This AzureBlobChunkManager is responsible for read and write file in blob. + */ +class AzureBlobChunkManager { + public: + explicit AzureBlobChunkManager(const std::string& access_key_id, + const std::string& access_key_value, + const std::string& address, + bool useIAM = false); + + AzureBlobChunkManager(const AzureBlobChunkManager&); + AzureBlobChunkManager& + operator=(const AzureBlobChunkManager&); + + public: + virtual ~AzureBlobChunkManager(); + + bool + BucketExists(const std::string& bucket_name); + void + CreateBucket(const std::string& bucket_name); + void + DeleteBucket(const std::string& bucket_name); + std::vector + ListBuckets(); + bool + ObjectExists(const std::string& bucket_name, + const std::string& object_name); + int64_t + GetObjectSize(const std::string& bucket_name, + const std::string& object_name); + void + DeleteObject(const std::string& bucket_name, + const std::string& object_name); + bool + PutObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size); + uint64_t + GetObjectBuffer(const std::string& bucket_name, + const std::string& object_name, + void* buf, + uint64_t size); + std::vector + ListObjects(const char* bucket_name, const char* prefix = nullptr); + + private: + std::shared_ptr client_; +}; + +} // namespace azure diff --git a/internal/core/src/storage/azure-blob-storage/CMakeLists.txt b/internal/core/src/storage/azure-blob-storage/CMakeLists.txt new file mode 100644 index 0000000000..91c2cc3471 --- /dev/null +++ b/internal/core/src/storage/azure-blob-storage/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# SPDX-License-Identifier: MIT + +cmake_minimum_required (VERSION 3.12) +set(CMAKE_CXX_STANDARD 17) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake-modules") +message("${CMAKE_CURRENT_SOURCE_DIR}") +include(AzureVcpkg) +az_vcpkg_integrate() + +project(azure-blob-storage) + +find_program(NUGET_EXE NAMES nuget) + +if(NOT NUGET_EXE) + message(FATAL "CMake could not find the nuget command line tool. Please install it from https://www.nuget.org/downloads!") +else() + exec_program(${NUGET_EXE} + ARGS install "Microsoft.Attestation.Client" -Version 0.1.181 -ExcludeVersion -OutputDirectory ${CMAKE_BINARY_DIR}/packages) +endif() + +find_package(azure-storage-blobs-cpp CONFIG REQUIRED) +find_package(azure-identity-cpp CONFIG REQUIRED) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-return-type -Wno-pedantic") +add_library(blob-chunk-manager SHARED AzureBlobChunkManager.cpp) +target_link_libraries(blob-chunk-manager PRIVATE Azure::azure-identity Azure::azure-storage-blobs) + +install(TARGETS blob-chunk-manager DESTINATION "${CMAKE_INSTALL_LIBDIR}") + diff --git a/internal/core/src/storage/azure-blob-storage/cmake-modules/AzureVcpkg.cmake b/internal/core/src/storage/azure-blob-storage/cmake-modules/AzureVcpkg.cmake new file mode 100644 index 0000000000..c49a433e59 --- /dev/null +++ b/internal/core/src/storage/azure-blob-storage/cmake-modules/AzureVcpkg.cmake @@ -0,0 +1,169 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +# We need to know an absolute path to our repo root to do things like referencing ./LICENSE.txt file. +set(AZ_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/..") + +macro(az_vcpkg_integrate) + message("Vcpkg integrate step.") + # AUTO CMAKE_TOOLCHAIN_FILE: + # User can call `cmake -DCMAKE_TOOLCHAIN_FILE="path_to_the_toolchain"` as the most specific scenario. + # As the last alternative (default case), Azure SDK will automatically clone VCPKG folder and set toolchain from there. + if(NOT DEFINED CMAKE_TOOLCHAIN_FILE) + message("CMAKE_TOOLCHAIN_FILE is not defined. Define it for the user.") + # Set AZURE_SDK_DISABLE_AUTO_VCPKG env var to avoid Azure SDK from cloning and setting VCPKG automatically + # This option delegate package's dependencies installation to user. + if(NOT DEFINED ENV{AZURE_SDK_DISABLE_AUTO_VCPKG}) + message("AZURE_SDK_DISABLE_AUTO_VCPKG is not defined. Fetch a local copy of vcpkg.") + # GET VCPKG FROM SOURCE + # User can set env var AZURE_SDK_VCPKG_COMMIT to pick the VCPKG commit to fetch + set(VCPKG_COMMIT_STRING 71d875654e32ee216b0b7e0dc684e589dffa1b1c) # default SDK tested commit + if(DEFINED ENV{AZURE_SDK_VCPKG_COMMIT}) + message("AZURE_SDK_VCPKG_COMMIT is defined. Using that instead of the default.") + set(VCPKG_COMMIT_STRING "$ENV{AZURE_SDK_VCPKG_COMMIT}") # default SDK tested commit + endif() + message("Vcpkg commit string used: ${VCPKG_COMMIT_STRING}") + include(FetchContent) + FetchContent_Declare( + vcpkg + GIT_REPOSITORY https://github.com/milvus-io/vcpkg.git + GIT_TAG ${VCPKG_COMMIT_STRING} + ) + FetchContent_GetProperties(vcpkg) + # make sure to pull vcpkg only once. + if(NOT vcpkg_POPULATED) + FetchContent_Populate(vcpkg) + endif() + # use the vcpkg source path + set(CMAKE_TOOLCHAIN_FILE "${vcpkg_SOURCE_DIR}/scripts/buildsystems/vcpkg.cmake" CACHE STRING "") + endif() + endif() + + # enable triplet customization + if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET} AND NOT DEFINED VCPKG_TARGET_TRIPLET) + set(VCPKG_TARGET_TRIPLET "$ENV{VCPKG_DEFAULT_TRIPLET}" CACHE STRING "") + endif() +endmacro() + +macro(az_vcpkg_portfile_prep targetName fileName contentToRemove) + # with sdk//vcpkg/ + file(READ "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/${fileName}" fileContents) + + # Windows -> Unix line endings + string(FIND fileContents "\r\n" crLfPos) + + if (crLfPos GREATER -1) + string(REPLACE "\r\n" "\n" fileContents ${fileContents}) + endif() + + # remove comment header + string(REPLACE "${contentToRemove}" "" fileContents ${fileContents}) + + # undo Windows -> Unix line endings (if applicable) + if (crLfPos GREATER -1) + string(REPLACE "\n" "\r\n" fileContents ${fileContents}) + endif() + unset(crLfPos) + + # output to an intermediate location + file (WRITE "${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}" ${fileContents}) + unset(fileContents) + + # Produce the files to help with the vcpkg release. + # Go to the /out/build//vcpkg directory, and copy (merge) "ports" folder to the vcpkg repo. + # Then, update the portfile.cmake file SHA512 from "1" to the actual hash (a good way to do it is to uninstall a package, + # clean vcpkg/downloads, vcpkg/buildtrees, run "vcpkg install ", and get the SHA from the error message). + configure_file( + "${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}" + "${CMAKE_BINARY_DIR}/vcpkg/ports/${targetName}-cpp/${fileName}" + @ONLY + ) +endmacro() + +macro(az_vcpkg_export targetName macroNamePart dllImportExportHeaderPath) + foreach(vcpkgFile "vcpkg.json" "portfile.cmake") + az_vcpkg_portfile_prep( + "${targetName}" + "${vcpkgFile}" + "# Copyright (c) Microsoft Corporation.\n# Licensed under the MIT License.\n\n" + ) + endforeach() + + # Standard names for folders such as "bin", "lib", "include". We could hardcode, but some other libs use it too (curl). + include(GNUInstallDirs) + + # When installing, copy our "inc" directory (headers) to "include" directory at the install location. + install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/azure/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/azure") + + # Copy license as "copyright" (vcpkg dictates naming and location). + install(FILES "${AZ_ROOT_DIR}/LICENSE.txt" DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" RENAME "copyright") + + # Indicate where to install targets. Mirrors what other ports do. + install( + TARGETS "${targetName}" + EXPORT "${targetName}-cppTargets" + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} # DLLs (if produced by build) go to "/bin" + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} # static .lib files + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} # .lib files for DLL build + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # headers + ) + + # If building a Windows DLL, patch the dll_import_export.hpp + if(WIN32 AND BUILD_SHARED_LIBS) + add_compile_definitions(AZ_${macroNamePart}_BEING_BUILT) + target_compile_definitions(${targetName} PUBLIC AZ_${macroNamePart}_DLL) + + set(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE "*/ + 1 /*") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/inc/${dllImportExportHeaderPath}" + "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}" + @ONLY + ) + unset(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE) + + get_filename_component(dllImportExportHeaderDir ${dllImportExportHeaderPath} DIRECTORY) + install( + FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderDir}" + ) + unset(dllImportExportHeaderDir) + endif() + + # Export the targets file itself. + install( + EXPORT "${targetName}-cppTargets" + DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" + NAMESPACE Azure:: # Not the C++ namespace, but a namespace in terms of cmake. + FILE "${targetName}-cppTargets.cmake" + ) + + # configure_package_config_file(), write_basic_package_version_file() + include(CMakePackageConfigHelpers) + + # Produce package config file. + configure_package_config_file( + "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/Config.cmake.in" + "${targetName}-cppConfig.cmake" + INSTALL_DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" + PATH_VARS + CMAKE_INSTALL_LIBDIR) + + # Produce version file. + write_basic_package_version_file( + "${targetName}-cppConfigVersion.cmake" + VERSION ${AZ_LIBRARY_VERSION} # the version that we extracted from package_version.hpp + COMPATIBILITY SameMajorVersion + ) + + # Install package config and version files. + install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfigVersion.cmake" + DESTINATION + "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" # to shares/ + ) + + # Export all the installs above as package. + export(PACKAGE "${targetName}-cpp") +endmacro() diff --git a/internal/core/src/storage/azure-blob-storage/vcpkg.json b/internal/core/src/storage/azure-blob-storage/vcpkg.json new file mode 100644 index 0000000000..ac0d797d5d --- /dev/null +++ b/internal/core/src/storage/azure-blob-storage/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "azure-blob-storage", + "version-string": "1.0.0", + "dependencies": [ + "azure-identity-cpp", + "azure-storage-blobs-cpp" + ] +} diff --git a/internal/core/src/storage/storage_c.cpp b/internal/core/src/storage/storage_c.cpp index ef60cd6ace..2dcea75c20 100644 --- a/internal/core/src/storage/storage_c.cpp +++ b/internal/core/src/storage/storage_c.cpp @@ -62,6 +62,8 @@ InitRemoteChunkManagerSingleton(CStorageConfig c_storage_config) { storage_config.root_path = std::string(c_storage_config.root_path); storage_config.storage_type = std::string(c_storage_config.storage_type); + storage_config.cloud_provider = + std::string(c_storage_config.cloud_provider); storage_config.iam_endpoint = std::string(c_storage_config.iam_endpoint); storage_config.log_level = std::string(c_storage_config.log_level); diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index ef721fef09..8267d01e01 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -74,6 +74,16 @@ if (LINUX OR APPLE) ) endif() +if (DEFINED AZURE_BUILD_DIR) + set(MILVUS_TEST_FILES + ${MILVUS_TEST_FILES} + test_azure_chunk_manager.cpp + #need update aws-sdk-cpp, see more from https://github.com/aws/aws-sdk-cpp/issues/2119 + #test_remote_chunk_manager.cpp + ) + include_directories("${AZURE_BUILD_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include") +endif() + if (LINUX) message( STATUS "Building Milvus Unit Test on Linux") option(USE_ASAN "Whether to use AddressSanitizer" OFF) diff --git a/internal/core/unittest/test_azure_chunk_manager.cpp b/internal/core/unittest/test_azure_chunk_manager.cpp new file mode 100644 index 0000000000..71e9a78d82 --- /dev/null +++ b/internal/core/unittest/test_azure_chunk_manager.cpp @@ -0,0 +1,288 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include + +#include "common/EasyAssert.h" +#include "storage/AzureChunkManager.h" +#include "storage/Util.h" + +using namespace std; +using namespace milvus; +using namespace milvus::storage; + +StorageConfig +get_default_storage_config() { + auto endpoint = "core.windows.net"; + auto accessKey = "devstoreaccount1"; + auto accessValue = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + auto rootPath = "files"; + auto useSSL = false; + auto useIam = false; + auto iamEndPoint = ""; + auto bucketName = "a-bucket"; + + return StorageConfig{endpoint, + bucketName, + accessKey, + accessValue, + rootPath, + "remote", + "azure", + iamEndPoint, + "error", + "", + useSSL, + useIam}; +} + +class AzureChunkManagerTest : public testing::Test { + public: + AzureChunkManagerTest() { + } + ~AzureChunkManagerTest() { + } + + virtual void + SetUp() { + configs_ = get_default_storage_config(); + chunk_manager_ = make_unique(configs_); + chunk_manager_ptr_ = CreateChunkManager(configs_); + } + + protected: + AzureChunkManagerPtr chunk_manager_; + ChunkManagerPtr chunk_manager_ptr_; + StorageConfig configs_; +}; + +TEST_F(AzureChunkManagerTest, BasicFunctions) { + EXPECT_TRUE(chunk_manager_->GetName() == "AzureChunkManager"); + EXPECT_TRUE(chunk_manager_ptr_->GetName() == "AzureChunkManager"); + EXPECT_TRUE(chunk_manager_->GetRootPath() == "files"); + + + string path = "test"; + uint8_t readdata[20] = {0}; + try { + chunk_manager_->Read(path, 0, readdata, sizeof(readdata)); + } catch (SegcoreError& e) { + EXPECT_TRUE(string(e.what()).find("Read") != string::npos); + } + try { + chunk_manager_->Write(path, 0, readdata, sizeof(readdata)); + } catch (SegcoreError& e) { + EXPECT_TRUE(string(e.what()).find("Write") != string::npos); + } +} + +TEST_F(AzureChunkManagerTest, BucketPositive) { + string testBucketName = "test-bucket"; + bool exist = chunk_manager_->BucketExists(testBucketName); + EXPECT_EQ(exist, false); + chunk_manager_->CreateBucket(testBucketName); + exist = chunk_manager_->BucketExists(testBucketName); + EXPECT_EQ(exist, true); + vector buckets = chunk_manager_->ListBuckets(); + EXPECT_EQ(buckets[0], testBucketName); + chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(AzureChunkManagerTest, BucketNegtive) { + string testBucketName = "test-bucket-ng"; + try { + chunk_manager_->DeleteBucket(testBucketName); + } catch (SegcoreError& e) { + EXPECT_TRUE(string(e.what()).find("not") != string::npos); + } + + // create already exist bucket + chunk_manager_->CreateBucket(testBucketName); + try { + chunk_manager_->CreateBucket(testBucketName); + } catch (SegcoreError& e) { + EXPECT_TRUE(string(e.what()).find("exists") != string::npos); + } + chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(AzureChunkManagerTest, ObjectExist) { + string testBucketName = configs_.bucket_name; + string objPath = "1/3"; + if (!chunk_manager_->BucketExists(testBucketName)) { + chunk_manager_->CreateBucket(testBucketName); + } + + bool exist = chunk_manager_->Exist(objPath); + EXPECT_EQ(exist, false); + chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(AzureChunkManagerTest, WritePositive) { + string testBucketName = configs_.bucket_name; + EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName); + + if (!chunk_manager_->BucketExists(testBucketName)) { + chunk_manager_->CreateBucket(testBucketName); + } + auto has_bucket = chunk_manager_->BucketExists(testBucketName); + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + string path = "1"; + chunk_manager_->Write(path, data, sizeof(data)); + + bool exist = chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + + auto size = chunk_manager_->Size(path); + EXPECT_EQ(size, 5); + + int datasize = 10000; + uint8_t* bigdata = new uint8_t[datasize]; + srand((unsigned)time(NULL)); + for (int i = 0; i < datasize; ++i) { + bigdata[i] = rand() % 256; + } + chunk_manager_->Write(path, bigdata, datasize); + size = chunk_manager_->Size(path); + EXPECT_EQ(size, datasize); + delete[] bigdata; + + chunk_manager_->Remove(path); + chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(AzureChunkManagerTest, ReadPositive) { + string testBucketName = configs_.bucket_name; + EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName); + + if (!chunk_manager_->BucketExists(testBucketName)) { + chunk_manager_->CreateBucket(testBucketName); + } + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + string path = "1/4/6"; + chunk_manager_->Write(path, data, sizeof(data)); + bool exist = chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + auto size = chunk_manager_->Size(path); + EXPECT_EQ(size, sizeof(data)); + + uint8_t readdata[20] = {0}; + size = chunk_manager_->Read(path, readdata, sizeof(data)); + EXPECT_EQ(size, sizeof(data)); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x45); + EXPECT_EQ(readdata[3], 0x34); + EXPECT_EQ(readdata[4], 0x23); + + size = chunk_manager_->Read(path, readdata, 3); + EXPECT_EQ(size, 3); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x45); + + uint8_t dataWithNULL[] = {0x17, 0x32, 0x00, 0x34, 0x23}; + chunk_manager_->Write(path, dataWithNULL, sizeof(dataWithNULL)); + exist = chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + size = chunk_manager_->Size(path); + EXPECT_EQ(size, sizeof(dataWithNULL)); + size = chunk_manager_->Read(path, readdata, sizeof(dataWithNULL)); + EXPECT_EQ(size, sizeof(dataWithNULL)); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x00); + EXPECT_EQ(readdata[3], 0x34); + EXPECT_EQ(readdata[4], 0x23); + + chunk_manager_->Remove(path); + + try { + chunk_manager_->Read(path, readdata, sizeof(dataWithNULL)); + } catch (SegcoreError& e) { + EXPECT_TRUE(string(e.what()).find("exists") != string::npos); + } + + chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(AzureChunkManagerTest, RemovePositive) { + string testBucketName = configs_.bucket_name; + EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName); + + if (!chunk_manager_->BucketExists(testBucketName)) { + chunk_manager_->CreateBucket(testBucketName); + } + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + string path = "1/7/8"; + chunk_manager_->Write(path, data, sizeof(data)); + + bool exist = chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + + chunk_manager_->Remove(path); + + exist = chunk_manager_->Exist(path); + EXPECT_EQ(exist, false); + + try { + chunk_manager_->Remove(path); + } catch (SegcoreError& e) { + EXPECT_TRUE(string(e.what()).find("not") != string::npos); + } + + try { + chunk_manager_->Size(path); + } catch (SegcoreError& e) { + EXPECT_TRUE(string(e.what()).find("not") != string::npos); + } + + chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(AzureChunkManagerTest, ListWithPrefixPositive) { + string testBucketName = configs_.bucket_name; + EXPECT_EQ(chunk_manager_->GetBucketName(), testBucketName); + + if (!chunk_manager_->BucketExists(testBucketName)) { + chunk_manager_->CreateBucket(testBucketName); + } + + string path1 = "1/7/8"; + string path2 = "1/7/4"; + string path3 = "1/4/8"; + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + chunk_manager_->Write(path1, data, sizeof(data)); + chunk_manager_->Write(path2, data, sizeof(data)); + chunk_manager_->Write(path3, data, sizeof(data)); + + vector objs = chunk_manager_->ListWithPrefix("1/7"); + EXPECT_EQ(objs.size(), 2); + sort(objs.begin(), objs.end()); + EXPECT_EQ(objs[0], "1/7/4"); + EXPECT_EQ(objs[1], "1/7/8"); + + objs = chunk_manager_->ListWithPrefix("//1/7"); + EXPECT_EQ(objs.size(), 0); + + objs = chunk_manager_->ListWithPrefix("1"); + EXPECT_EQ(objs.size(), 3); + sort(objs.begin(), objs.end()); + EXPECT_EQ(objs[0], "1/4/8"); + EXPECT_EQ(objs[1], "1/7/4"); + + chunk_manager_->Remove(path1); + chunk_manager_->Remove(path2); + chunk_manager_->Remove(path3); + chunk_manager_->DeleteBucket(testBucketName); +} diff --git a/internal/core/unittest/test_remote_chunk_manager.cpp b/internal/core/unittest/test_remote_chunk_manager.cpp new file mode 100644 index 0000000000..77f3ec9498 --- /dev/null +++ b/internal/core/unittest/test_remote_chunk_manager.cpp @@ -0,0 +1,277 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include + +#include "storage/MinioChunkManager.h" +#include "storage/Util.h" + +using namespace std; +using namespace milvus; +using namespace milvus::storage; + +const string +get_default_bucket_name() { + return "a-bucket"; +} + +StorageConfig +get_default_remote_storage_config() { + StorageConfig storage_config; + storage_config.storage_type = "remote"; + storage_config.address = "localhost:9000"; + char const* tmp = getenv("MINIO_ADDRESS"); + if (tmp != NULL) { + storage_config.address = string(tmp); + } + storage_config.bucket_name = get_default_bucket_name(); + storage_config.access_key_id = "minioadmin"; + storage_config.access_key_value = "minioadmin"; + storage_config.root_path = "files"; + storage_config.storage_type = "remote"; + storage_config.cloud_provider = ""; + storage_config.useSSL = false; + storage_config.useIAM = false; + return storage_config; +} + +class RemoteChunkManagerTest : public testing::Test { + public: + RemoteChunkManagerTest() { + } + ~RemoteChunkManagerTest() { + } + + virtual void + SetUp() { + configs_ = get_default_remote_storage_config(); + aws_chunk_manager_ = make_unique(configs_); + chunk_manager_ptr_ = CreateChunkManager(configs_); + } + + protected: + std::unique_ptr aws_chunk_manager_; + ChunkManagerPtr chunk_manager_ptr_; + StorageConfig configs_; +}; + +TEST_F(RemoteChunkManagerTest, BasicFunctions) { + EXPECT_TRUE(aws_chunk_manager_->GetName() == "AwsChunkManager"); + EXPECT_TRUE(chunk_manager_ptr_->GetName() == "MinioChunkManager"); + + ChunkManagerPtr the_chunk_manager_; + configs_.cloud_provider = "aws"; + the_chunk_manager_ = CreateChunkManager(configs_); + EXPECT_TRUE(the_chunk_manager_->GetName() == "AwsChunkManager"); + + configs_.cloud_provider = "gcp"; + the_chunk_manager_ = CreateChunkManager(configs_); + EXPECT_TRUE(the_chunk_manager_->GetName() == "GcpChunkManager"); + + configs_.cloud_provider = "aliyun"; + the_chunk_manager_ = CreateChunkManager(configs_); + EXPECT_TRUE(the_chunk_manager_->GetName() == "AliyunChunkManager"); + +#ifdef AZURE_BUILD_DIR + configs_.cloud_provider = "azure"; + the_chunk_manager_ = CreateChunkManager(configs_); + EXPECT_TRUE(the_chunk_manager_->GetName() == "AzureChunkManager"); +#endif + + configs_.cloud_provider = ""; +} + +TEST_F(RemoteChunkManagerTest, BucketPositive) { + string testBucketName = get_default_bucket_name(); + aws_chunk_manager_->SetBucketName(testBucketName); + bool exist = aws_chunk_manager_->BucketExists(testBucketName); + EXPECT_EQ(exist, false); + aws_chunk_manager_->CreateBucket(testBucketName); + exist = aws_chunk_manager_->BucketExists(testBucketName); + EXPECT_EQ(exist, true); + aws_chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(RemoteChunkManagerTest, BucketNegtive) { + string testBucketName = get_default_bucket_name(); + aws_chunk_manager_->SetBucketName(testBucketName); + aws_chunk_manager_->DeleteBucket(testBucketName); + + // create already exist bucket + aws_chunk_manager_->CreateBucket(testBucketName); + try { + aws_chunk_manager_->CreateBucket(testBucketName); + } catch (SegcoreError& e) { + EXPECT_TRUE(std::string(e.what()).find("exists") != + string::npos); + } + aws_chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(RemoteChunkManagerTest, ObjectExist) { + string testBucketName = get_default_bucket_name(); + string objPath = "1/3"; + aws_chunk_manager_->SetBucketName(testBucketName); + if (!aws_chunk_manager_->BucketExists(testBucketName)) { + aws_chunk_manager_->CreateBucket(testBucketName); + } + + bool exist = aws_chunk_manager_->Exist(objPath); + EXPECT_EQ(exist, false); + exist = chunk_manager_ptr_->Exist(objPath); + EXPECT_EQ(exist, false); + aws_chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(RemoteChunkManagerTest, WritePositive) { + string testBucketName = get_default_bucket_name(); + aws_chunk_manager_->SetBucketName(testBucketName); + EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName); + + if (!aws_chunk_manager_->BucketExists(testBucketName)) { + aws_chunk_manager_->CreateBucket(testBucketName); + } + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + string path = "1"; + aws_chunk_manager_->Write(path, data, sizeof(data)); + + bool exist = aws_chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + + auto size = aws_chunk_manager_->Size(path); + EXPECT_EQ(size, 5); + + int datasize = 10000; + uint8_t* bigdata = new uint8_t[datasize]; + srand((unsigned)time(NULL)); + for (int i = 0; i < datasize; ++i) { + bigdata[i] = rand() % 256; + } + aws_chunk_manager_->Write(path, bigdata, datasize); + size = aws_chunk_manager_->Size(path); + EXPECT_EQ(size, datasize); + delete[] bigdata; + + aws_chunk_manager_->Remove(path); + aws_chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(RemoteChunkManagerTest, ReadPositive) { + string testBucketName = get_default_bucket_name(); + aws_chunk_manager_->SetBucketName(testBucketName); + EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName); + + if (!aws_chunk_manager_->BucketExists(testBucketName)) { + aws_chunk_manager_->CreateBucket(testBucketName); + } + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + string path = "1/4/6"; + aws_chunk_manager_->Write(path, data, sizeof(data)); + bool exist = aws_chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + auto size = aws_chunk_manager_->Size(path); + EXPECT_EQ(size, sizeof(data)); + + uint8_t readdata[20] = {0}; + size = aws_chunk_manager_->Read(path, readdata, sizeof(data)); + EXPECT_EQ(size, sizeof(data)); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x45); + EXPECT_EQ(readdata[3], 0x34); + EXPECT_EQ(readdata[4], 0x23); + + size = aws_chunk_manager_->Read(path, readdata, 3); + EXPECT_EQ(size, 3); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x45); + + uint8_t dataWithNULL[] = {0x17, 0x32, 0x00, 0x34, 0x23}; + aws_chunk_manager_->Write(path, dataWithNULL, sizeof(dataWithNULL)); + exist = aws_chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + size = aws_chunk_manager_->Size(path); + EXPECT_EQ(size, sizeof(dataWithNULL)); + size = aws_chunk_manager_->Read(path, readdata, sizeof(dataWithNULL)); + EXPECT_EQ(size, sizeof(dataWithNULL)); + EXPECT_EQ(readdata[0], 0x17); + EXPECT_EQ(readdata[1], 0x32); + EXPECT_EQ(readdata[2], 0x00); + EXPECT_EQ(readdata[3], 0x34); + EXPECT_EQ(readdata[4], 0x23); + + aws_chunk_manager_->Remove(path); + aws_chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(RemoteChunkManagerTest, RemovePositive) { + string testBucketName = get_default_bucket_name(); + aws_chunk_manager_->SetBucketName(testBucketName); + EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName); + + if (!aws_chunk_manager_->BucketExists(testBucketName)) { + aws_chunk_manager_->CreateBucket(testBucketName); + } + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + string path = "1/7/8"; + aws_chunk_manager_->Write(path, data, sizeof(data)); + + bool exist = aws_chunk_manager_->Exist(path); + EXPECT_EQ(exist, true); + + aws_chunk_manager_->Remove(path); + + exist = aws_chunk_manager_->Exist(path); + EXPECT_EQ(exist, false); + + aws_chunk_manager_->DeleteBucket(testBucketName); +} + +TEST_F(RemoteChunkManagerTest, ListWithPrefixPositive) { + string testBucketName = get_default_bucket_name(); + aws_chunk_manager_->SetBucketName(testBucketName); + EXPECT_EQ(aws_chunk_manager_->GetBucketName(), testBucketName); + + if (!aws_chunk_manager_->BucketExists(testBucketName)) { + aws_chunk_manager_->CreateBucket(testBucketName); + } + + string path1 = "1/7/8"; + string path2 = "1/7/4"; + string path3 = "1/4/8"; + uint8_t data[5] = {0x17, 0x32, 0x45, 0x34, 0x23}; + aws_chunk_manager_->Write(path1, data, sizeof(data)); + aws_chunk_manager_->Write(path2, data, sizeof(data)); + aws_chunk_manager_->Write(path3, data, sizeof(data)); + + vector objs = aws_chunk_manager_->ListWithPrefix("1/7"); + EXPECT_EQ(objs.size(), 2); + std::sort(objs.begin(), objs.end()); + EXPECT_EQ(objs[0], "1/7/4"); + EXPECT_EQ(objs[1], "1/7/8"); + + objs = aws_chunk_manager_->ListWithPrefix("//1/7"); + EXPECT_EQ(objs.size(), 2); + + objs = aws_chunk_manager_->ListWithPrefix("1"); + EXPECT_EQ(objs.size(), 3); + std::sort(objs.begin(), objs.end()); + EXPECT_EQ(objs[0], "1/4/8"); + EXPECT_EQ(objs[1], "1/7/4"); + + aws_chunk_manager_->Remove(path1); + aws_chunk_manager_->Remove(path2); + aws_chunk_manager_->Remove(path3); + aws_chunk_manager_->DeleteBucket(testBucketName); +} diff --git a/internal/datacoord/garbage_collector.go b/internal/datacoord/garbage_collector.go index 5fb69fa935..7b8e587593 100644 --- a/internal/datacoord/garbage_collector.go +++ b/internal/datacoord/garbage_collector.go @@ -37,13 +37,6 @@ import ( "github.com/milvus-io/milvus/pkg/util/typeutil" ) -const ( - //TODO silverxia change to configuration - insertLogPrefix = `insert_log` - statsLogPrefix = `stats_log` - deltaLogPrefix = `delta_log` -) - // GcOption garbage collection options type GcOption struct { cli storage.ChunkManager // client @@ -143,9 +136,9 @@ func (gc *garbageCollector) scan() { // walk only data cluster related prefixes prefixes := make([]string, 0, 3) - prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), insertLogPrefix)) - prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), statsLogPrefix)) - prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), deltaLogPrefix)) + prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentInsertLogPath)) + prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentStatslogPath)) + prefixes = append(prefixes, path.Join(gc.option.cli.RootPath(), common.SegmentDeltaLogPath)) var removedKeys []string for _, prefix := range prefixes { @@ -175,7 +168,7 @@ func (gc *garbageCollector) scan() { continue } - if strings.Contains(prefix, statsLogPrefix) && + if strings.Contains(prefix, common.SegmentInsertLogPath) && segmentMap.Contain(segmentID) { valid++ continue diff --git a/internal/datacoord/garbage_collector_test.go b/internal/datacoord/garbage_collector_test.go index eb62d6d597..a8aac6fd3a 100644 --- a/internal/datacoord/garbage_collector_test.go +++ b/internal/datacoord/garbage_collector_test.go @@ -26,6 +26,8 @@ import ( "testing" "time" + "github.com/milvus-io/milvus/pkg/common" + "github.com/cockroachdb/errors" minio "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/credentials" @@ -122,9 +124,9 @@ func Test_garbageCollector_scan(t *testing.T) { }) gc.scan() - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta) validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others) gc.close() }) @@ -139,9 +141,9 @@ func Test_garbageCollector_scan(t *testing.T) { }) gc.scan() - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta) validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others) gc.close() @@ -164,9 +166,9 @@ func Test_garbageCollector_scan(t *testing.T) { }) gc.start() gc.scan() - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta) validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others) gc.close() @@ -191,9 +193,9 @@ func Test_garbageCollector_scan(t *testing.T) { dropTolerance: 0, }) gc.clearEtcd() - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:]) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:]) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:]) validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others) gc.close() @@ -211,9 +213,9 @@ func Test_garbageCollector_scan(t *testing.T) { gc.clearEtcd() // bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2]) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2]) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2]) validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others) gc.close() @@ -231,9 +233,9 @@ func Test_garbageCollector_scan(t *testing.T) { gc.scan() // bad path shall remains since datacoord cannot determine file is garbage or not if path is not valid - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, insertLogPrefix), inserts[1:2]) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, statsLogPrefix), stats[1:2]) - validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, deltaLogPrefix), delta[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentInsertLogPath), inserts[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentStatslogPath), stats[1:2]) + validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, common.SegmentDeltaLogPath), delta[1:2]) validateMinioPrefixElements(t, cli.Client, bucketName, path.Join(rootPath, `indexes`), others) gc.close() @@ -280,14 +282,14 @@ func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, i token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), funcutil.RandomString(8), funcutil.RandomString(8)) } // insert - filePath := path.Join(root, insertLogPrefix, token) + filePath := path.Join(root, common.SegmentInsertLogPath, token) info, err := cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{}) if err != nil { return nil, nil, nil, nil, nil, err } inserts = append(inserts, info.Key) // stats - filePath = path.Join(root, statsLogPrefix, token) + filePath = path.Join(root, common.SegmentStatslogPath, token) info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{}) if err != nil { return nil, nil, nil, nil, nil, err @@ -300,7 +302,7 @@ func initUtOSSEnv(bucket, root string, n int) (mcm *storage.MinioChunkManager, i } else { token = path.Join(strconv.Itoa(1+i), strconv.Itoa(10+i), strconv.Itoa(100+i), funcutil.RandomString(8)) } - filePath = path.Join(root, deltaLogPrefix, token) + filePath = path.Join(root, common.SegmentDeltaLogPath, token) info, err = cli.PutObject(context.TODO(), bucket, filePath, reader, int64(len(content)), minio.PutObjectOptions{}) if err != nil { return nil, nil, nil, nil, nil, err diff --git a/internal/storage/azure_object_storage.go b/internal/storage/azure_object_storage.go new file mode 100644 index 0000000000..ef08362ba1 --- /dev/null +++ b/internal/storage/azure_object_storage.go @@ -0,0 +1,143 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + "fmt" + "io" + "os" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" + + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" + "github.com/milvus-io/milvus/pkg/util/retry" + + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service" +) + +type AzureObjectStorage struct { + *service.Client +} + +func newAzureObjectStorageWithConfig(ctx context.Context, c *config) (*AzureObjectStorage, error) { + var client *service.Client + var err error + if c.useIAM { + cred, credErr := azidentity.NewWorkloadIdentityCredential(&azidentity.WorkloadIdentityCredentialOptions{ + ClientID: os.Getenv("AZURE_CLIENT_ID"), + TenantID: os.Getenv("AZURE_TENANT_ID"), + TokenFilePath: os.Getenv("AZURE_FEDERATED_TOKEN_FILE"), + }) + if credErr != nil { + return nil, credErr + } + client, err = service.NewClient("https://"+c.accessKeyID+".blob."+c.address+"/", cred, &service.ClientOptions{}) + } else { + connectionString := os.Getenv("AZURE_STORAGE_CONNECTION_STRING") + if connectionString == "" { + connectionString = "DefaultEndpointsProtocol=https;AccountName=" + c.accessKeyID + + ";AccountKey=" + c.secretAccessKeyID + ";EndpointSuffix=" + c.address + } + client, err = service.NewClientFromConnectionString(connectionString, &service.ClientOptions{}) + } + if err != nil { + return nil, err + } + if c.bucketName == "" { + return nil, fmt.Errorf("invalid bucket name") + } + // check valid in first query + checkBucketFn := func() error { + _, err := client.NewContainerClient(c.bucketName).GetProperties(ctx, &container.GetPropertiesOptions{}) + if err != nil { + switch err := err.(type) { + case *azcore.ResponseError: + if c.createBucket && err.ErrorCode == string(bloberror.ContainerNotFound) { + _, createErr := client.NewContainerClient(c.bucketName).Create(ctx, &azblob.CreateContainerOptions{}) + if createErr != nil { + return createErr + } + return nil + } + } + } + return err + } + err = retry.Do(ctx, checkBucketFn, retry.Attempts(CheckBucketRetryAttempts)) + if err != nil { + return nil, err + } + return &AzureObjectStorage{Client: client}, nil +} + +func (AzureObjectStorage *AzureObjectStorage) GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error) { + opts := azblob.DownloadStreamOptions{} + if offset > 0 { + opts.Range = azblob.HTTPRange{ + Offset: offset, + Count: size, + } + } + object, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).DownloadStream(ctx, &opts) + if err != nil { + return nil, err + } + return object.Body, nil +} + +func (AzureObjectStorage *AzureObjectStorage) PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error { + _, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).UploadStream(ctx, reader, &azblob.UploadStreamOptions{}) + return err +} + +func (AzureObjectStorage *AzureObjectStorage) StatObject(ctx context.Context, bucketName, objectName string) (int64, error) { + info, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).GetProperties(ctx, &blob.GetPropertiesOptions{}) + if err == nil { + return *info.ContentLength, err + } + return 0, err +} + +func (AzureObjectStorage *AzureObjectStorage) ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error) { + var pager = AzureObjectStorage.Client.NewContainerClient(bucketName).NewListBlobsFlatPager(&azblob.ListBlobsFlatOptions{ + Prefix: &prefix, + }) + objects := map[string]time.Time{} + if pager.More() { + pageResp, err := pager.NextPage(context.Background()) + if err != nil { + return nil, err + } + for _, blob := range pageResp.Segment.BlobItems { + objects[*blob.Name] = *blob.Properties.LastModified + } + } + return objects, nil +} + +func (AzureObjectStorage *AzureObjectStorage) RemoveObject(ctx context.Context, bucketName, objectName string) error { + _, err := AzureObjectStorage.Client.NewContainerClient(bucketName).NewBlockBlobClient(objectName).Delete(ctx, &blob.DeleteOptions{}) + return err +} diff --git a/internal/storage/azure_object_storage_test.go b/internal/storage/azure_object_storage_test.go new file mode 100644 index 0000000000..a2538465c7 --- /dev/null +++ b/internal/storage/azure_object_storage_test.go @@ -0,0 +1,167 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "bytes" + "context" + "io" + "os" + "testing" + + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAzureObjectStorage(t *testing.T) { + ctx := context.Background() + bucketName := Params.MinioCfg.BucketName.GetValue() + config := config{ + bucketName: bucketName, + createBucket: true, + useIAM: false, + cloudProvider: "azure", + } + + t.Run("test initialize", func(t *testing.T) { + var err error + config.bucketName = "" + _, err = newAzureObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + config.bucketName = bucketName + _, err = newAzureObjectStorageWithConfig(ctx, &config) + assert.Equal(t, err, nil) + }) + + t.Run("test load", func(t *testing.T) { + + testCM, err := newAzureObjectStorageWithConfig(ctx, &config) + assert.Equal(t, err, nil) + defer testCM.DeleteContainer(ctx, config.bucketName, &azblob.DeleteContainerOptions{}) + + prepareTests := []struct { + key string + value []byte + }{ + {"abc", []byte("123")}, + {"abcd", []byte("1234")}, + {"key_1", []byte("111")}, + {"key_2", []byte("222")}, + {"key_3", []byte("333")}, + } + + for _, test := range prepareTests { + err := testCM.PutObject(ctx, config.bucketName, test.key, bytes.NewReader(test.value), int64(len(test.value))) + require.NoError(t, err) + } + + loadTests := []struct { + isvalid bool + loadKey string + expectedValue []byte + + description string + }{ + {true, "abc", []byte("123"), "load valid key abc"}, + {true, "abcd", []byte("1234"), "load valid key abcd"}, + {true, "key_1", []byte("111"), "load valid key key_1"}, + {true, "key_2", []byte("222"), "load valid key key_2"}, + {true, "key_3", []byte("333"), "load valid key key_3"}, + {false, "key_not_exist", []byte(""), "load invalid key key_not_exist"}, + {false, "/", []byte(""), "load leading slash"}, + } + + for _, test := range loadTests { + t.Run(test.description, func(t *testing.T) { + if test.isvalid { + got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024) + assert.NoError(t, err) + contentData, err := io.ReadAll(got) + assert.NoError(t, err) + assert.Equal(t, len(contentData), len(test.expectedValue)) + assert.Equal(t, test.expectedValue, contentData) + statSize, err := testCM.StatObject(ctx, config.bucketName, test.loadKey) + assert.NoError(t, err) + assert.Equal(t, statSize, int64(len(contentData))) + _, err = testCM.GetObject(ctx, config.bucketName, test.loadKey, 1, 1023) + assert.NoError(t, err) + } else { + if test.loadKey == "/" { + got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024) + assert.Error(t, err) + assert.Empty(t, got) + return + } + got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024) + assert.Error(t, err) + assert.Empty(t, got) + } + }) + } + + loadWithPrefixTests := []struct { + isvalid bool + prefix string + expectedValue [][]byte + + description string + }{ + {true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"}, + {true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"}, + {true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"}, + } + + for _, test := range loadWithPrefixTests { + t.Run(test.description, func(t *testing.T) { + gotk, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, false) + assert.NoError(t, err) + assert.Equal(t, len(test.expectedValue), len(gotk)) + for key := range gotk { + err := testCM.RemoveObject(ctx, config.bucketName, key) + assert.NoError(t, err) + } + }) + } + + }) + + t.Run("test useIAM", func(t *testing.T) { + var err error + config.useIAM = true + _, err = newAzureObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + os.Setenv("AZURE_CLIENT_ID", "00000000-0000-0000-0000-00000000000") + os.Setenv("AZURE_TENANT_ID", "00000000-0000-0000-0000-00000000000") + os.Setenv("AZURE_FEDERATED_TOKEN_FILE", "/var/run/secrets/tokens/azure-identity-token") + _, err = newAzureObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + config.useIAM = false + }) + + t.Run("test key secret", func(t *testing.T) { + var err error + connectionString := os.Getenv("AZURE_STORAGE_CONNECTION_STRING") + os.Setenv("AZURE_STORAGE_CONNECTION_STRING", "") + config.accessKeyID = "devstoreaccount1" + config.secretAccessKeyID = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + config.address = "core.windows.net" + _, err = newAzureObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + os.Setenv("AZURE_STORAGE_CONNECTION_STRING", connectionString) + }) +} diff --git a/internal/storage/factory.go b/internal/storage/factory.go index 293e32fe89..1fb0387642 100644 --- a/internal/storage/factory.go +++ b/internal/storage/factory.go @@ -17,7 +17,7 @@ func NewChunkManagerFactoryWithParam(params *paramtable.ComponentParam) *ChunkMa if params.CommonCfg.StorageType.GetValue() == "local" { return NewChunkManagerFactory("local", RootPath(params.LocalStorageCfg.Path.GetValue())) } - return NewChunkManagerFactory("minio", + return NewChunkManagerFactory(params.CommonCfg.StorageType.GetValue(), RootPath(params.MinioCfg.RootPath.GetValue()), Address(params.MinioCfg.Address.GetValue()), AccessKeyID(params.MinioCfg.AccessKeyID.GetValue()), @@ -49,6 +49,8 @@ func (f *ChunkManagerFactory) newChunkManager(ctx context.Context, engine string return NewLocalChunkManager(RootPath(f.config.rootPath)), nil case "minio": return newMinioChunkManagerWithConfig(ctx, f.config) + case "remote": + return NewRemoteChunkManager(ctx, f.config) default: return nil, errors.New("no chunk manager implemented with engine: " + engine) } diff --git a/internal/storage/minio_chunk_manager.go b/internal/storage/minio_chunk_manager.go index 635d6fee97..be983f05d5 100644 --- a/internal/storage/minio_chunk_manager.go +++ b/internal/storage/minio_chunk_manager.go @@ -40,20 +40,20 @@ import ( "golang.org/x/sync/errgroup" ) -var ( - ErrNoSuchKey = errors.New("NoSuchKey") -) +const NoSuchKey = "NoSuchKey" -const ( - CloudProviderGCP = "gcp" - CloudProviderAWS = "aws" - CloudProviderAliyun = "aliyun" +var ( + ErrNoSuchKey = errors.New(NoSuchKey) ) func WrapErrNoSuchKey(key string) error { return fmt.Errorf("%w(key=%s)", ErrNoSuchKey, key) } +func IsErrNoSuchKey(err error) bool { + return strings.HasPrefix(err.Error(), NoSuchKey) +} + var CheckBucketRetryAttempts uint = 20 // MinioChunkManager is responsible for read and write data stored in minio. diff --git a/internal/storage/minio_object_storage.go b/internal/storage/minio_object_storage.go new file mode 100644 index 0000000000..a66e3b32ca --- /dev/null +++ b/internal/storage/minio_object_storage.go @@ -0,0 +1,149 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/milvus-io/milvus/internal/storage/aliyun" + "github.com/milvus-io/milvus/internal/storage/gcp" + "github.com/milvus-io/milvus/pkg/util/retry" + "github.com/minio/minio-go/v7/pkg/credentials" + + "github.com/milvus-io/milvus/pkg/log" + minio "github.com/minio/minio-go/v7" + "go.uber.org/zap" +) + +type MinioObjectStorage struct { + *minio.Client +} + +func newMinioObjectStorageWithConfig(ctx context.Context, c *config) (*MinioObjectStorage, error) { + var creds *credentials.Credentials + var newMinioFn = minio.New + var bucketLookupType = minio.BucketLookupAuto + + switch c.cloudProvider { + case CloudProviderAliyun: + // auto doesn't work for aliyun, so we set to dns deliberately + bucketLookupType = minio.BucketLookupDNS + if c.useIAM { + newMinioFn = aliyun.NewMinioClient + } else { + creds = credentials.NewStaticV4(c.accessKeyID, c.secretAccessKeyID, "") + } + case CloudProviderGCP: + newMinioFn = gcp.NewMinioClient + if !c.useIAM { + creds = credentials.NewStaticV2(c.accessKeyID, c.secretAccessKeyID, "") + } + default: // aws, minio + if c.useIAM { + creds = credentials.NewIAM("") + } else { + creds = credentials.NewStaticV4(c.accessKeyID, c.secretAccessKeyID, "") + } + } + minioOpts := &minio.Options{ + BucketLookup: bucketLookupType, + Creds: creds, + Secure: c.useSSL, + } + minIOClient, err := newMinioFn(c.address, minioOpts) + // options nil or invalid formatted endpoint, don't need to retry + if err != nil { + return nil, err + } + var bucketExists bool + // check valid in first query + checkBucketFn := func() error { + bucketExists, err = minIOClient.BucketExists(ctx, c.bucketName) + if err != nil { + log.Warn("failed to check blob bucket exist", zap.String("bucket", c.bucketName), zap.Error(err)) + return err + } + if !bucketExists { + if c.createBucket { + log.Info("blob bucket not exist, create bucket.", zap.Any("bucket name", c.bucketName)) + err := minIOClient.MakeBucket(ctx, c.bucketName, minio.MakeBucketOptions{}) + if err != nil { + log.Warn("failed to create blob bucket", zap.String("bucket", c.bucketName), zap.Error(err)) + return err + } + } else { + return fmt.Errorf("bucket %s not Existed", c.bucketName) + } + } + return nil + } + err = retry.Do(ctx, checkBucketFn, retry.Attempts(CheckBucketRetryAttempts)) + if err != nil { + return nil, err + } + + return &MinioObjectStorage{minIOClient}, nil +} + +func (minioObjectStorage *MinioObjectStorage) GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error) { + opts := minio.GetObjectOptions{} + if offset > 0 { + err := opts.SetRange(offset, offset+size-1) + if err != nil { + log.Warn("failed to set range", zap.String("bucket", bucketName), zap.String("path", objectName), zap.Error(err)) + return nil, err + } + } + object, err := minioObjectStorage.Client.GetObject(ctx, bucketName, objectName, opts) + if err != nil { + return nil, err + } + return object, nil +} + +func (minioObjectStorage *MinioObjectStorage) PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error { + _, err := minioObjectStorage.Client.PutObject(ctx, bucketName, objectName, reader, objectSize, minio.PutObjectOptions{}) + return err +} + +func (minioObjectStorage *MinioObjectStorage) StatObject(ctx context.Context, bucketName, objectName string) (int64, error) { + info, err := minioObjectStorage.Client.StatObject(ctx, bucketName, objectName, minio.StatObjectOptions{}) + return info.Size, err +} + +func (minioObjectStorage *MinioObjectStorage) ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error) { + res := minioObjectStorage.Client.ListObjects(ctx, bucketName, minio.ListObjectsOptions{ + Prefix: prefix, + Recursive: recursive, + }) + + objects := map[string]time.Time{} + for object := range res { + if !recursive && object.Err != nil { + return map[string]time.Time{}, object.Err + } + objects[object.Key] = object.LastModified + } + return objects, nil +} + +func (minioObjectStorage *MinioObjectStorage) RemoveObject(ctx context.Context, bucketName, objectName string) error { + return minioObjectStorage.Client.RemoveObject(ctx, bucketName, objectName, minio.RemoveObjectOptions{}) +} diff --git a/internal/storage/minio_object_storage_test.go b/internal/storage/minio_object_storage_test.go new file mode 100644 index 0000000000..b3ea6bb68d --- /dev/null +++ b/internal/storage/minio_object_storage_test.go @@ -0,0 +1,171 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "bytes" + "context" + "io" + "testing" + + "github.com/minio/minio-go/v7" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMinioObjectStorage(t *testing.T) { + ctx := context.Background() + config := config{ + address: Params.MinioCfg.Address.GetValue(), + accessKeyID: Params.MinioCfg.AccessKeyID.GetValue(), + secretAccessKeyID: Params.MinioCfg.SecretAccessKey.GetValue(), + rootPath: Params.MinioCfg.RootPath.GetValue(), + + bucketName: Params.MinioCfg.BucketName.GetValue(), + createBucket: true, + useIAM: false, + cloudProvider: "minio", + } + + t.Run("test initialize", func(t *testing.T) { + var err error + bucketName := config.bucketName + config.bucketName = "" + _, err = newMinioObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + config.bucketName = bucketName + _, err = newMinioObjectStorageWithConfig(ctx, &config) + assert.Equal(t, err, nil) + }) + + t.Run("test load", func(t *testing.T) { + + testCM, err := newMinioObjectStorageWithConfig(ctx, &config) + assert.Equal(t, err, nil) + defer testCM.RemoveBucket(ctx, config.bucketName) + + prepareTests := []struct { + key string + value []byte + }{ + {"abc", []byte("123")}, + {"abcd", []byte("1234")}, + {"key_1", []byte("111")}, + {"key_2", []byte("222")}, + {"key_3", []byte("333")}, + } + + for _, test := range prepareTests { + err := testCM.PutObject(ctx, config.bucketName, test.key, bytes.NewReader(test.value), int64(len(test.value))) + require.NoError(t, err) + } + + loadTests := []struct { + isvalid bool + loadKey string + expectedValue []byte + + description string + }{ + {true, "abc", []byte("123"), "load valid key abc"}, + {true, "abcd", []byte("1234"), "load valid key abcd"}, + {true, "key_1", []byte("111"), "load valid key key_1"}, + {true, "key_2", []byte("222"), "load valid key key_2"}, + {true, "key_3", []byte("333"), "load valid key key_3"}, + {false, "key_not_exist", []byte(""), "load invalid key key_not_exist"}, + {false, "/", []byte(""), "load leading slash"}, + } + + for _, test := range loadTests { + t.Run(test.description, func(t *testing.T) { + if test.isvalid { + got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024) + assert.NoError(t, err) + contentData, err := io.ReadAll(got) + assert.NoError(t, err) + assert.Equal(t, len(contentData), len(test.expectedValue)) + assert.Equal(t, test.expectedValue, contentData) + statSize, err := testCM.StatObject(ctx, config.bucketName, test.loadKey) + assert.NoError(t, err) + assert.Equal(t, statSize, int64(len(contentData))) + _, err = testCM.GetObject(ctx, config.bucketName, test.loadKey, 1, 1023) + assert.NoError(t, err) + } else { + got, err := testCM.GetObject(ctx, config.bucketName, test.loadKey, 0, 1024) + assert.NoError(t, err) + _, err = io.ReadAll(got) + errResponse := minio.ToErrorResponse(err) + if test.loadKey == "/" { + assert.Equal(t, errResponse.Code, "XMinioInvalidObjectName") + } else { + assert.Equal(t, errResponse.Code, "NoSuchKey") + } + } + }) + } + + loadWithPrefixTests := []struct { + isvalid bool + prefix string + expectedValue [][]byte + + description string + }{ + {true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"}, + {true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"}, + {true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"}, + } + + for _, test := range loadWithPrefixTests { + t.Run(test.description, func(t *testing.T) { + gotk, err := testCM.ListObjects(ctx, config.bucketName, test.prefix, false) + assert.NoError(t, err) + assert.Equal(t, len(test.expectedValue), len(gotk)) + for key := range gotk { + err := testCM.RemoveObject(ctx, config.bucketName, key) + assert.NoError(t, err) + } + }) + } + + }) + + t.Run("test useIAM", func(t *testing.T) { + var err error + config.useIAM = true + _, err = newMinioObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + config.useIAM = false + }) + + t.Run("test cloud provider", func(t *testing.T) { + var err error + cloudProvider := config.cloudProvider + config.cloudProvider = "aliyun" + config.useIAM = true + _, err = newMinioObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + config.useIAM = false + _, err = newMinioObjectStorageWithConfig(ctx, &config) + assert.Error(t, err) + config.cloudProvider = "gcp" + _, err = newMinioObjectStorageWithConfig(ctx, &config) + assert.NoError(t, err) + config.cloudProvider = cloudProvider + }) +} diff --git a/internal/storage/remote_chunk_manager.go b/internal/storage/remote_chunk_manager.go new file mode 100644 index 0000000000..c2ab2b5576 --- /dev/null +++ b/internal/storage/remote_chunk_manager.go @@ -0,0 +1,458 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "bytes" + "container/list" + "context" + "io" + "strings" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" + + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/metrics" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/timerecord" + minio "github.com/minio/minio-go/v7" + "go.uber.org/zap" + "golang.org/x/exp/mmap" + "golang.org/x/sync/errgroup" +) + +const ( + CloudProviderGCP = "gcp" + CloudProviderAWS = "aws" + CloudProviderAliyun = "aliyun" + + CloudProviderAzure = "azure" +) + +type ObjectStorage interface { + GetObject(ctx context.Context, bucketName, objectName string, offset int64, size int64) (FileReader, error) + PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error + StatObject(ctx context.Context, bucketName, objectName string) (int64, error) + ListObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error) + RemoveObject(ctx context.Context, bucketName, objectName string) error +} + +// RemoteChunkManager is responsible for read and write data stored in minio. +type RemoteChunkManager struct { + client ObjectStorage + + // ctx context.Context + bucketName string + rootPath string +} + +var _ ChunkManager = (*RemoteChunkManager)(nil) + +func NewRemoteChunkManager(ctx context.Context, c *config) (*RemoteChunkManager, error) { + var client ObjectStorage + var err error + if c.cloudProvider == CloudProviderAzure { + client, err = newAzureObjectStorageWithConfig(ctx, c) + } else { + client, err = newMinioObjectStorageWithConfig(ctx, c) + } + if err != nil { + return nil, err + } + mcm := &RemoteChunkManager{ + client: client, + bucketName: c.bucketName, + rootPath: strings.TrimLeft(c.rootPath, "/"), + } + log.Info("remote chunk manager init success.", zap.String("remote", c.cloudProvider), zap.String("bucketname", c.bucketName), zap.String("root", mcm.RootPath())) + return mcm, nil +} + +// RootPath returns minio root path. +func (mcm *RemoteChunkManager) RootPath() string { + return mcm.rootPath +} + +// Path returns the path of minio data if exists. +func (mcm *RemoteChunkManager) Path(ctx context.Context, filePath string) (string, error) { + exist, err := mcm.Exist(ctx, filePath) + if err != nil { + return "", err + } + if !exist { + return "", errors.New("minio file manage cannot be found with filePath:" + filePath) + } + return filePath, nil +} + +// Reader returns the path of minio data if exists. +func (mcm *RemoteChunkManager) Reader(ctx context.Context, filePath string) (FileReader, error) { + reader, err := mcm.getObject(ctx, mcm.bucketName, filePath, int64(0), int64(0)) + if err != nil { + log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return nil, err + } + return reader, nil +} + +func (mcm *RemoteChunkManager) Size(ctx context.Context, filePath string) (int64, error) { + objectInfo, err := mcm.getObjectSize(ctx, mcm.bucketName, filePath) + if err != nil { + log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return 0, err + } + + return objectInfo, nil +} + +// Write writes the data to minio storage. +func (mcm *RemoteChunkManager) Write(ctx context.Context, filePath string, content []byte) error { + err := mcm.putObject(ctx, mcm.bucketName, filePath, bytes.NewReader(content), int64(len(content))) + + if err != nil { + log.Warn("failed to put object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return err + } + + metrics.PersistentDataKvSize.WithLabelValues(metrics.DataPutLabel).Observe(float64(len(content))) + return nil +} + +// MultiWrite saves multiple objects, the path is the key of @kvs. +// The object value is the value of @kvs. +func (mcm *RemoteChunkManager) MultiWrite(ctx context.Context, kvs map[string][]byte) error { + var el error + for key, value := range kvs { + err := mcm.Write(ctx, key, value) + if err != nil { + el = merr.Combine(el, errors.Wrapf(err, "failed to write %s", key)) + } + } + return el +} + +// Exist checks whether chunk is saved to minio storage. +func (mcm *RemoteChunkManager) Exist(ctx context.Context, filePath string) (bool, error) { + _, err := mcm.getObjectSize(ctx, mcm.bucketName, filePath) + if err != nil { + if IsErrNoSuchKey(err) { + return false, nil + } + log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return false, err + } + return true, nil +} + +// Read reads the minio storage data if exists. +func (mcm *RemoteChunkManager) Read(ctx context.Context, filePath string) ([]byte, error) { + object, err := mcm.getObject(ctx, mcm.bucketName, filePath, int64(0), int64(0)) + if err != nil { + log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return nil, err + } + defer object.Close() + + // Prefetch object data + var empty []byte + _, err = object.Read(empty) + if err != nil { + errResponse := minio.ToErrorResponse(err) + if errResponse.Code == "NoSuchKey" { + return nil, WrapErrNoSuchKey(filePath) + } + log.Warn("failed to read object", zap.String("path", filePath), zap.Error(err)) + return nil, err + } + size, err := mcm.getObjectSize(ctx, mcm.bucketName, filePath) + if err != nil { + log.Warn("failed to stat object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return nil, err + } + data, err := Read(object, size) + if err != nil { + errResponse := minio.ToErrorResponse(err) + if errResponse.Code == "NoSuchKey" { + return nil, WrapErrNoSuchKey(filePath) + } + log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return nil, err + } + metrics.PersistentDataKvSize.WithLabelValues(metrics.DataGetLabel).Observe(float64(size)) + return data, nil +} + +func (mcm *RemoteChunkManager) MultiRead(ctx context.Context, keys []string) ([][]byte, error) { + var el error + var objectsValues [][]byte + for _, key := range keys { + objectValue, err := mcm.Read(ctx, key) + if err != nil { + el = merr.Combine(el, errors.Wrapf(err, "failed to read %s", key)) + } + objectsValues = append(objectsValues, objectValue) + } + + return objectsValues, el +} + +func (mcm *RemoteChunkManager) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) { + objectsKeys, _, err := mcm.ListWithPrefix(ctx, prefix, true) + if err != nil { + return nil, nil, err + } + objectsValues, err := mcm.MultiRead(ctx, objectsKeys) + if err != nil { + return nil, nil, err + } + + return objectsKeys, objectsValues, nil +} + +func (mcm *RemoteChunkManager) Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error) { + return nil, errors.New("this method has not been implemented") +} + +// ReadAt reads specific position data of minio storage if exists. +func (mcm *RemoteChunkManager) ReadAt(ctx context.Context, filePath string, off int64, length int64) ([]byte, error) { + if off < 0 || length < 0 { + return nil, io.EOF + } + + object, err := mcm.getObject(ctx, mcm.bucketName, filePath, off, length) + if err != nil { + log.Warn("failed to get object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return nil, err + } + defer object.Close() + + data, err := Read(object, length) + if err != nil { + errResponse := minio.ToErrorResponse(err) + if errResponse.Code == "NoSuchKey" { + return nil, WrapErrNoSuchKey(filePath) + } + log.Warn("failed to read object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return nil, err + } + metrics.PersistentDataKvSize.WithLabelValues(metrics.DataGetLabel).Observe(float64(length)) + return data, nil +} + +// Remove deletes an object with @key. +func (mcm *RemoteChunkManager) Remove(ctx context.Context, filePath string) error { + err := mcm.removeObject(ctx, mcm.bucketName, filePath) + if err != nil { + log.Warn("failed to remove object", zap.String("bucket", mcm.bucketName), zap.String("path", filePath), zap.Error(err)) + return err + } + return nil +} + +// MultiRemove deletes a objects with @keys. +func (mcm *RemoteChunkManager) MultiRemove(ctx context.Context, keys []string) error { + var el error + for _, key := range keys { + err := mcm.Remove(ctx, key) + if err != nil { + el = merr.Combine(el, errors.Wrapf(err, "failed to remove %s", key)) + } + } + return el +} + +// RemoveWithPrefix removes all objects with the same prefix @prefix from minio. +func (mcm *RemoteChunkManager) RemoveWithPrefix(ctx context.Context, prefix string) error { + objects, err := mcm.listObjects(ctx, mcm.bucketName, prefix, true) + if err != nil { + return err + } + removeKeys := make([]string, 0) + for key := range objects { + removeKeys = append(removeKeys, key) + } + i := 0 + maxGoroutine := 10 + for i < len(removeKeys) { + runningGroup, groupCtx := errgroup.WithContext(ctx) + for j := 0; j < maxGoroutine && i < len(removeKeys); j++ { + key := removeKeys[i] + runningGroup.Go(func() error { + err := mcm.removeObject(groupCtx, mcm.bucketName, key) + if err != nil { + log.Warn("failed to remove object", zap.String("path", key), zap.Error(err)) + return err + } + return nil + }) + i++ + } + if err := runningGroup.Wait(); err != nil { + return err + } + } + return nil +} + +// ListWithPrefix returns objects with provided prefix. +// by default, if `recursive`=false, list object with return object with path under save level +// say minio has followinng objects: [a, ab, a/b, ab/c] +// calling `ListWithPrefix` with `prefix` = a && `recursive` = false will only returns [a, ab] +// If caller needs all objects without level limitation, `recursive` shall be true. +func (mcm *RemoteChunkManager) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) { + + // cannot use ListObjects(ctx, bucketName, Opt{Prefix:prefix, Recursive:true}) + // if minio has lots of objects under the provided path + // recursive = true may timeout during the recursive browsing the objects. + // See also: https://github.com/milvus-io/milvus/issues/19095 + + var objectsKeys []string + var modTimes []time.Time + + tasks := list.New() + tasks.PushBack(prefix) + for tasks.Len() > 0 { + e := tasks.Front() + pre := e.Value.(string) + tasks.Remove(e) + + // TODO add concurrent call if performance matters + // only return current level per call + objects, err := mcm.listObjects(ctx, mcm.bucketName, pre, false) + + if err != nil { + return nil, nil, err + } + + for object, lastModified := range objects { + + // with tailing "/", object is a "directory" + if strings.HasSuffix(object, "/") && recursive { + // enqueue when recursive is true + if object != pre { + tasks.PushBack(object) + } + continue + } + objectsKeys = append(objectsKeys, object) + modTimes = append(modTimes, lastModified) + } + } + + return objectsKeys, modTimes, nil +} + +func (mcm *RemoteChunkManager) getObject(ctx context.Context, bucketName, objectName string, + offset int64, size int64) (FileReader, error) { + start := timerecord.NewTimeRecorder("getObject") + + reader, err := mcm.client.GetObject(ctx, bucketName, objectName, offset, size) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.TotalLabel).Inc() + if err == nil && reader != nil { + metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataGetLabel).Observe(float64(start.ElapseSpan().Milliseconds())) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.SuccessLabel).Inc() + } else { + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataGetLabel, metrics.FailLabel).Inc() + } + + switch err := err.(type) { + case *azcore.ResponseError: + if err.ErrorCode == string(bloberror.BlobNotFound) { + return nil, WrapErrNoSuchKey(objectName) + } + case minio.ErrorResponse: + if err.Code == "NoSuchKey" { + return nil, WrapErrNoSuchKey(objectName) + } + } + + return reader, err +} + +func (mcm *RemoteChunkManager) putObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64) error { + start := timerecord.NewTimeRecorder("putObject") + + err := mcm.client.PutObject(ctx, bucketName, objectName, reader, objectSize) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataPutLabel, metrics.TotalLabel).Inc() + if err == nil { + metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataPutLabel).Observe(float64(start.ElapseSpan().Milliseconds())) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.MetaPutLabel, metrics.SuccessLabel).Inc() + } else { + metrics.PersistentDataOpCounter.WithLabelValues(metrics.MetaPutLabel, metrics.FailLabel).Inc() + } + + return err +} + +func (mcm *RemoteChunkManager) getObjectSize(ctx context.Context, bucketName, objectName string) (int64, error) { + start := timerecord.NewTimeRecorder("getObjectSize") + + info, err := mcm.client.StatObject(ctx, bucketName, objectName) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.TotalLabel).Inc() + if err == nil { + metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataStatLabel).Observe(float64(start.ElapseSpan().Milliseconds())) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.SuccessLabel).Inc() + } else { + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataStatLabel, metrics.FailLabel).Inc() + } + + switch err := err.(type) { + case *azcore.ResponseError: + if err.ErrorCode == string(bloberror.BlobNotFound) { + return info, WrapErrNoSuchKey(objectName) + } + case minio.ErrorResponse: + if err.Code == "NoSuchKey" { + return info, WrapErrNoSuchKey(objectName) + } + } + + return info, err +} + +func (mcm *RemoteChunkManager) listObjects(ctx context.Context, bucketName string, prefix string, recursive bool) (map[string]time.Time, error) { + start := timerecord.NewTimeRecorder("listObjects") + + res, err := mcm.client.ListObjects(ctx, bucketName, prefix, recursive) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.TotalLabel).Inc() + if err == nil { + metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataListLabel).Observe(float64(start.ElapseSpan().Milliseconds())) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.SuccessLabel).Inc() + } else { + log.Warn("failed to list with prefix", zap.String("bucket", mcm.bucketName), zap.String("prefix", prefix), zap.Error(err)) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataListLabel, metrics.FailLabel).Inc() + } + return res, err +} + +func (mcm *RemoteChunkManager) removeObject(ctx context.Context, bucketName, objectName string) error { + start := timerecord.NewTimeRecorder("removeObject") + + err := mcm.client.RemoveObject(ctx, bucketName, objectName) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.TotalLabel).Inc() + if err == nil { + metrics.PersistentDataRequestLatency.WithLabelValues(metrics.DataRemoveLabel).Observe(float64(start.ElapseSpan().Milliseconds())) + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.SuccessLabel).Inc() + } else { + metrics.PersistentDataOpCounter.WithLabelValues(metrics.DataRemoveLabel, metrics.FailLabel).Inc() + } + + return err +} diff --git a/internal/storage/remote_chunk_manager_test.go b/internal/storage/remote_chunk_manager_test.go new file mode 100644 index 0000000000..be8ecbf7d5 --- /dev/null +++ b/internal/storage/remote_chunk_manager_test.go @@ -0,0 +1,973 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "context" + "path" + "testing" + + "github.com/cockroachdb/errors" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TODO: NewRemoteChunkManager is deprecated. Rewrite this unittest. +func newMinioChunkManager(ctx context.Context, bucketName string, rootPath string) (ChunkManager, error) { + return newRemoteChunkManager(ctx, "minio", bucketName, rootPath) +} + +func newAzureChunkManager(ctx context.Context, bucketName string, rootPath string) (ChunkManager, error) { + return newRemoteChunkManager(ctx, "azure", bucketName, rootPath) +} + +func newRemoteChunkManager(ctx context.Context, cloudProvider string, bucketName string, rootPath string) (ChunkManager, error) { + factory := NewChunkManagerFactory("remote", + RootPath(rootPath), + Address(Params.MinioCfg.Address.GetValue()), + AccessKeyID(Params.MinioCfg.AccessKeyID.GetValue()), + SecretAccessKeyID(Params.MinioCfg.SecretAccessKey.GetValue()), + UseSSL(Params.MinioCfg.UseSSL.GetAsBool()), + BucketName(bucketName), + UseIAM(Params.MinioCfg.UseIAM.GetAsBool()), + CloudProvider(cloudProvider), + IAMEndpoint(Params.MinioCfg.IAMEndpoint.GetValue()), + CreateBucket(true)) + return factory.NewPersistentStorageChunkManager(ctx) +} + +func TestInitRemoteChunkManager(t *testing.T) { + ctx := context.Background() + client, err := NewRemoteChunkManager(ctx, &config{ + bucketName: Params.MinioCfg.BucketName.GetValue(), + createBucket: true, + useIAM: false, + cloudProvider: "azure", + }) + assert.NoError(t, err) + assert.NotNil(t, client) +} + +func TestMinioChunkManager(t *testing.T) { + testBucket := Params.MinioCfg.BucketName.GetValue() + + configRoot := Params.MinioCfg.RootPath.GetValue() + + testMinIOKVRoot := path.Join(configRoot, "milvus-minio-ut-root") + + t.Run("test load", func(t *testing.T) { + testLoadRoot := path.Join(testMinIOKVRoot, "test_load") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testLoadRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testLoadRoot) + + assert.Equal(t, testLoadRoot, testCM.RootPath()) + + prepareTests := []struct { + key string + value []byte + }{ + {"abc", []byte("123")}, + {"abcd", []byte("1234")}, + {"key_1", []byte("111")}, + {"key_2", []byte("222")}, + {"key_3", []byte("333")}, + } + + for _, test := range prepareTests { + err = testCM.Write(ctx, path.Join(testLoadRoot, test.key), test.value) + require.NoError(t, err) + } + + loadTests := []struct { + isvalid bool + loadKey string + expectedValue []byte + + description string + }{ + {true, "abc", []byte("123"), "load valid key abc"}, + {true, "abcd", []byte("1234"), "load valid key abcd"}, + {true, "key_1", []byte("111"), "load valid key key_1"}, + {true, "key_2", []byte("222"), "load valid key key_2"}, + {true, "key_3", []byte("333"), "load valid key key_3"}, + {false, "key_not_exist", []byte(""), "load invalid key key_not_exist"}, + {false, "/", []byte(""), "load leading slash"}, + } + + for _, test := range loadTests { + t.Run(test.description, func(t *testing.T) { + if test.isvalid { + got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey)) + assert.NoError(t, err) + assert.Equal(t, test.expectedValue, got) + } else { + if test.loadKey == "/" { + got, err := testCM.Read(ctx, test.loadKey) + assert.Error(t, err) + assert.Empty(t, got) + return + } + got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey)) + assert.Error(t, err) + assert.Empty(t, got) + } + }) + } + + loadWithPrefixTests := []struct { + isvalid bool + prefix string + expectedValue [][]byte + + description string + }{ + {true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"}, + {true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"}, + {true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"}, + } + + for _, test := range loadWithPrefixTests { + t.Run(test.description, func(t *testing.T) { + gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(testLoadRoot, test.prefix)) + assert.NoError(t, err) + assert.Equal(t, len(test.expectedValue), len(gotk)) + assert.Equal(t, len(test.expectedValue), len(gotv)) + assert.ElementsMatch(t, test.expectedValue, gotv) + }) + } + + multiLoadTests := []struct { + isvalid bool + multiKeys []string + + expectedValue [][]byte + description string + }{ + {false, []string{"key_1", "key_not_exist"}, [][]byte{[]byte("111"), nil}, "multiload 1 exist 1 not"}, + {true, []string{"abc", "key_3"}, [][]byte{[]byte("123"), []byte("333")}, "multiload 2 exist"}, + } + + for _, test := range multiLoadTests { + t.Run(test.description, func(t *testing.T) { + for i := range test.multiKeys { + test.multiKeys[i] = path.Join(testLoadRoot, test.multiKeys[i]) + } + if test.isvalid { + got, err := testCM.MultiRead(ctx, test.multiKeys) + assert.NoError(t, err) + assert.Equal(t, test.expectedValue, got) + } else { + got, err := testCM.MultiRead(ctx, test.multiKeys) + assert.Error(t, err) + assert.Equal(t, test.expectedValue, got) + } + }) + } + }) + + t.Run("test MultiSave", func(t *testing.T) { + testMultiSaveRoot := path.Join(testMinIOKVRoot, "test_multisave") + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testMultiSaveRoot) + assert.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testMultiSaveRoot) + + err = testCM.Write(ctx, path.Join(testMultiSaveRoot, "key_1"), []byte("111")) + assert.NoError(t, err) + + kvs := map[string][]byte{ + path.Join(testMultiSaveRoot, "key_1"): []byte("123"), + path.Join(testMultiSaveRoot, "key_2"): []byte("456"), + } + + err = testCM.MultiWrite(ctx, kvs) + assert.NoError(t, err) + + val, err := testCM.Read(ctx, path.Join(testMultiSaveRoot, "key_1")) + assert.NoError(t, err) + assert.Equal(t, []byte("123"), val) + + reader, err := testCM.Reader(ctx, path.Join(testMultiSaveRoot, "key_1")) + assert.NoError(t, err) + reader.Close() + }) + + t.Run("test Remove", func(t *testing.T) { + testRemoveRoot := path.Join(testMinIOKVRoot, "test_remove") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testRemoveRoot) + assert.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testRemoveRoot) + + prepareTests := []struct { + k string + v []byte + }{ + {"key_1", []byte("123")}, + {"key_2", []byte("456")}, + {"mkey_1", []byte("111")}, + {"mkey_2", []byte("222")}, + {"mkey_3", []byte("333")}, + {"key_prefix_1", []byte("111")}, + {"key_prefix_2", []byte("222")}, + {"key_prefix_3", []byte("333")}, + } + + for _, test := range prepareTests { + k := path.Join(testRemoveRoot, test.k) + err = testCM.Write(ctx, k, test.v) + require.NoError(t, err) + } + + removeTests := []struct { + removeKey string + valueBeforeRemove []byte + + description string + }{ + {"key_1", []byte("123"), "remove key_1"}, + {"key_2", []byte("456"), "remove key_2"}, + } + + for _, test := range removeTests { + t.Run(test.description, func(t *testing.T) { + k := path.Join(testRemoveRoot, test.removeKey) + v, err := testCM.Read(ctx, k) + require.NoError(t, err) + require.Equal(t, test.valueBeforeRemove, v) + + err = testCM.Remove(ctx, k) + assert.NoError(t, err) + + v, err = testCM.Read(ctx, k) + require.Error(t, err) + require.Empty(t, v) + }) + } + + multiRemoveTest := []string{ + path.Join(testRemoveRoot, "mkey_1"), + path.Join(testRemoveRoot, "mkey_2"), + path.Join(testRemoveRoot, "mkey_3"), + } + + lv, err := testCM.MultiRead(ctx, multiRemoveTest) + require.NoError(t, err) + require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv) + + err = testCM.MultiRemove(ctx, multiRemoveTest) + assert.NoError(t, err) + + for _, k := range multiRemoveTest { + v, err := testCM.Read(ctx, k) + assert.Error(t, err) + assert.Empty(t, v) + } + + removeWithPrefixTest := []string{ + path.Join(testRemoveRoot, "key_prefix_1"), + path.Join(testRemoveRoot, "key_prefix_2"), + path.Join(testRemoveRoot, "key_prefix_3"), + } + removePrefix := path.Join(testRemoveRoot, "key_prefix") + + lv, err = testCM.MultiRead(ctx, removeWithPrefixTest) + require.NoError(t, err) + require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv) + + err = testCM.RemoveWithPrefix(ctx, removePrefix) + assert.NoError(t, err) + + for _, k := range removeWithPrefixTest { + v, err := testCM.Read(ctx, k) + assert.Error(t, err) + assert.Empty(t, v) + } + }) + + t.Run("test ReadAt", func(t *testing.T) { + testLoadPartialRoot := path.Join(testMinIOKVRoot, "load_partial") + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testLoadPartialRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testLoadPartialRoot) + + key := path.Join(testLoadPartialRoot, "TestMinIOKV_LoadPartial_key") + value := []byte("TestMinIOKV_LoadPartial_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + var off, length int64 + var partial []byte + + off, length = 1, 1 + partial, err = testCM.ReadAt(ctx, key, off, length) + assert.NoError(t, err) + assert.ElementsMatch(t, partial, value[off:off+length]) + + off, length = 0, int64(len(value)) + partial, err = testCM.ReadAt(ctx, key, off, length) + assert.NoError(t, err) + assert.ElementsMatch(t, partial, value[off:off+length]) + + // error case + off, length = 5, -2 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + + off, length = -1, 2 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + + off, length = 1, -2 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + + err = testCM.Remove(ctx, key) + assert.NoError(t, err) + off, length = 1, 1 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + }) + + t.Run("test Size", func(t *testing.T) { + testGetSizeRoot := path.Join(testMinIOKVRoot, "get_size") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testGetSizeRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testGetSizeRoot) + + key := path.Join(testGetSizeRoot, "TestMinIOKV_GetSize_key") + value := []byte("TestMinIOKV_GetSize_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + size, err := testCM.Size(ctx, key) + assert.NoError(t, err) + assert.Equal(t, size, int64(len(value))) + + key2 := path.Join(testGetSizeRoot, "TestMemoryKV_GetSize_key2") + + size, err = testCM.Size(ctx, key2) + assert.Error(t, err) + assert.Equal(t, int64(0), size) + }) + + t.Run("test Path", func(t *testing.T) { + testGetPathRoot := path.Join(testMinIOKVRoot, "get_path") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testGetPathRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testGetPathRoot) + + key := path.Join(testGetPathRoot, "TestMinIOKV_GetSize_key") + value := []byte("TestMinIOKV_GetSize_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + p, err := testCM.Path(ctx, key) + assert.NoError(t, err) + assert.Equal(t, p, key) + + key2 := path.Join(testGetPathRoot, "TestMemoryKV_GetSize_key2") + + p, err = testCM.Path(ctx, key2) + assert.Error(t, err) + assert.Equal(t, p, "") + }) + + t.Run("test Mmap", func(t *testing.T) { + testMmapRoot := path.Join(testMinIOKVRoot, "mmap") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testMmapRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testMmapRoot) + + key := path.Join(testMmapRoot, "TestMinIOKV_GetSize_key") + value := []byte("TestMinIOKV_GetSize_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + r, err := testCM.Mmap(ctx, key) + assert.Error(t, err) + assert.Nil(t, r) + + }) + + t.Run("test Prefix", func(t *testing.T) { + testPrefix := path.Join(testMinIOKVRoot, "prefix") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testPrefix) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testPrefix) + + pathB := path.Join("a", "b") + + key := path.Join(testPrefix, pathB) + value := []byte("a") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + pathC := path.Join("a", "c") + key = path.Join(testPrefix, pathC) + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + pathPrefix := path.Join(testPrefix, "a") + r, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true) + assert.NoError(t, err) + assert.Equal(t, len(r), 2) + assert.Equal(t, len(m), 2) + + key = path.Join(testPrefix, "b", "b", "b") + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + key = path.Join(testPrefix, "b", "a", "b") + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + key = path.Join(testPrefix, "bc", "a", "b") + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix+"/", true) + assert.NoError(t, err) + assert.Equal(t, 5, len(dirs)) + assert.Equal(t, 5, len(mods)) + + dirs, mods, err = testCM.ListWithPrefix(ctx, path.Join(testPrefix, "b"), true) + assert.NoError(t, err) + assert.Equal(t, 3, len(dirs)) + assert.Equal(t, 3, len(mods)) + + testCM.RemoveWithPrefix(ctx, testPrefix) + r, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true) + assert.NoError(t, err) + assert.Equal(t, 0, len(r)) + assert.Equal(t, 0, len(m)) + + // test wrong prefix + b := make([]byte, 2048) + pathWrong := path.Join(testPrefix, string(b)) + _, _, err = testCM.ListWithPrefix(ctx, pathWrong, true) + assert.Error(t, err) + }) + + t.Run("test NoSuchKey", func(t *testing.T) { + testPrefix := path.Join(testMinIOKVRoot, "nokey") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newMinioChunkManager(ctx, testBucket, testPrefix) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testPrefix) + + key := "a" + + _, err = testCM.Read(ctx, key) + assert.Error(t, err) + assert.True(t, errors.Is(err, ErrNoSuchKey)) + + file, err := testCM.Reader(ctx, key) + assert.NoError(t, err) // todo + file.Close() + + _, err = testCM.ReadAt(ctx, key, 100, 1) + assert.Error(t, err) + assert.True(t, errors.Is(err, ErrNoSuchKey)) + }) +} + +func TestAzureChunkManager(t *testing.T) { + testBucket := Params.MinioCfg.BucketName.GetValue() + + configRoot := Params.MinioCfg.RootPath.GetValue() + + testMinIOKVRoot := path.Join(configRoot, "milvus-minio-ut-root") + + t.Run("test load", func(t *testing.T) { + testLoadRoot := path.Join(testMinIOKVRoot, "test_load") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testLoadRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testLoadRoot) + + assert.Equal(t, testLoadRoot, testCM.RootPath()) + + prepareTests := []struct { + key string + value []byte + }{ + {"abc", []byte("123")}, + {"abcd", []byte("1234")}, + {"key_1", []byte("111")}, + {"key_2", []byte("222")}, + {"key_3", []byte("333")}, + } + + for _, test := range prepareTests { + err = testCM.Write(ctx, path.Join(testLoadRoot, test.key), test.value) + require.NoError(t, err) + } + + loadTests := []struct { + isvalid bool + loadKey string + expectedValue []byte + + description string + }{ + {true, "abc", []byte("123"), "load valid key abc"}, + {true, "abcd", []byte("1234"), "load valid key abcd"}, + {true, "key_1", []byte("111"), "load valid key key_1"}, + {true, "key_2", []byte("222"), "load valid key key_2"}, + {true, "key_3", []byte("333"), "load valid key key_3"}, + {false, "key_not_exist", []byte(""), "load invalid key key_not_exist"}, + {false, "/", []byte(""), "load leading slash"}, + } + + for _, test := range loadTests { + t.Run(test.description, func(t *testing.T) { + if test.isvalid { + got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey)) + assert.NoError(t, err) + assert.Equal(t, test.expectedValue, got) + } else { + if test.loadKey == "/" { + got, err := testCM.Read(ctx, test.loadKey) + assert.Error(t, err) + assert.Empty(t, got) + return + } + got, err := testCM.Read(ctx, path.Join(testLoadRoot, test.loadKey)) + assert.Error(t, err) + assert.Empty(t, got) + } + }) + } + + loadWithPrefixTests := []struct { + isvalid bool + prefix string + expectedValue [][]byte + + description string + }{ + {true, "abc", [][]byte{[]byte("123"), []byte("1234")}, "load with valid prefix abc"}, + {true, "key_", [][]byte{[]byte("111"), []byte("222"), []byte("333")}, "load with valid prefix key_"}, + {true, "prefix", [][]byte{}, "load with valid but not exist prefix prefix"}, + } + + for _, test := range loadWithPrefixTests { + t.Run(test.description, func(t *testing.T) { + gotk, gotv, err := testCM.ReadWithPrefix(ctx, path.Join(testLoadRoot, test.prefix)) + assert.NoError(t, err) + assert.Equal(t, len(test.expectedValue), len(gotk)) + assert.Equal(t, len(test.expectedValue), len(gotv)) + assert.ElementsMatch(t, test.expectedValue, gotv) + }) + } + + multiLoadTests := []struct { + isvalid bool + multiKeys []string + + expectedValue [][]byte + description string + }{ + {false, []string{"key_1", "key_not_exist"}, [][]byte{[]byte("111"), nil}, "multiload 1 exist 1 not"}, + {true, []string{"abc", "key_3"}, [][]byte{[]byte("123"), []byte("333")}, "multiload 2 exist"}, + } + + for _, test := range multiLoadTests { + t.Run(test.description, func(t *testing.T) { + for i := range test.multiKeys { + test.multiKeys[i] = path.Join(testLoadRoot, test.multiKeys[i]) + } + if test.isvalid { + got, err := testCM.MultiRead(ctx, test.multiKeys) + assert.NoError(t, err) + assert.Equal(t, test.expectedValue, got) + } else { + got, err := testCM.MultiRead(ctx, test.multiKeys) + assert.Error(t, err) + assert.Equal(t, test.expectedValue, got) + } + }) + } + }) + + t.Run("test MultiSave", func(t *testing.T) { + testMultiSaveRoot := path.Join(testMinIOKVRoot, "test_multisave") + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testMultiSaveRoot) + assert.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testMultiSaveRoot) + + err = testCM.Write(ctx, path.Join(testMultiSaveRoot, "key_1"), []byte("111")) + assert.NoError(t, err) + + kvs := map[string][]byte{ + path.Join(testMultiSaveRoot, "key_1"): []byte("123"), + path.Join(testMultiSaveRoot, "key_2"): []byte("456"), + } + + err = testCM.MultiWrite(ctx, kvs) + assert.NoError(t, err) + + val, err := testCM.Read(ctx, path.Join(testMultiSaveRoot, "key_1")) + assert.NoError(t, err) + assert.Equal(t, []byte("123"), val) + + reader, err := testCM.Reader(ctx, path.Join(testMultiSaveRoot, "key_1")) + assert.NoError(t, err) + reader.Close() + }) + + t.Run("test Remove", func(t *testing.T) { + testRemoveRoot := path.Join(testMinIOKVRoot, "test_remove") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testRemoveRoot) + assert.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testRemoveRoot) + + prepareTests := []struct { + k string + v []byte + }{ + {"key_1", []byte("123")}, + {"key_2", []byte("456")}, + {"mkey_1", []byte("111")}, + {"mkey_2", []byte("222")}, + {"mkey_3", []byte("333")}, + {"key_prefix_1", []byte("111")}, + {"key_prefix_2", []byte("222")}, + {"key_prefix_3", []byte("333")}, + } + + for _, test := range prepareTests { + k := path.Join(testRemoveRoot, test.k) + err = testCM.Write(ctx, k, test.v) + require.NoError(t, err) + } + + removeTests := []struct { + removeKey string + valueBeforeRemove []byte + + description string + }{ + {"key_1", []byte("123"), "remove key_1"}, + {"key_2", []byte("456"), "remove key_2"}, + } + + for _, test := range removeTests { + t.Run(test.description, func(t *testing.T) { + k := path.Join(testRemoveRoot, test.removeKey) + v, err := testCM.Read(ctx, k) + require.NoError(t, err) + require.Equal(t, test.valueBeforeRemove, v) + + err = testCM.Remove(ctx, k) + assert.NoError(t, err) + + v, err = testCM.Read(ctx, k) + require.Error(t, err) + require.Empty(t, v) + }) + } + + multiRemoveTest := []string{ + path.Join(testRemoveRoot, "mkey_1"), + path.Join(testRemoveRoot, "mkey_2"), + path.Join(testRemoveRoot, "mkey_3"), + } + + lv, err := testCM.MultiRead(ctx, multiRemoveTest) + require.NoError(t, err) + require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv) + + err = testCM.MultiRemove(ctx, multiRemoveTest) + assert.NoError(t, err) + + for _, k := range multiRemoveTest { + v, err := testCM.Read(ctx, k) + assert.Error(t, err) + assert.Empty(t, v) + } + + removeWithPrefixTest := []string{ + path.Join(testRemoveRoot, "key_prefix_1"), + path.Join(testRemoveRoot, "key_prefix_2"), + path.Join(testRemoveRoot, "key_prefix_3"), + } + removePrefix := path.Join(testRemoveRoot, "key_prefix") + + lv, err = testCM.MultiRead(ctx, removeWithPrefixTest) + require.NoError(t, err) + require.ElementsMatch(t, [][]byte{[]byte("111"), []byte("222"), []byte("333")}, lv) + + err = testCM.RemoveWithPrefix(ctx, removePrefix) + assert.NoError(t, err) + + for _, k := range removeWithPrefixTest { + v, err := testCM.Read(ctx, k) + assert.Error(t, err) + assert.Empty(t, v) + } + }) + + t.Run("test ReadAt", func(t *testing.T) { + testLoadPartialRoot := path.Join(testMinIOKVRoot, "load_partial") + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testLoadPartialRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testLoadPartialRoot) + + key := path.Join(testLoadPartialRoot, "TestMinIOKV_LoadPartial_key") + value := []byte("TestMinIOKV_LoadPartial_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + var off, length int64 + var partial []byte + + off, length = 1, 1 + partial, err = testCM.ReadAt(ctx, key, off, length) + assert.NoError(t, err) + assert.ElementsMatch(t, partial, value[off:off+length]) + + off, length = 0, int64(len(value)) + partial, err = testCM.ReadAt(ctx, key, off, length) + assert.NoError(t, err) + assert.ElementsMatch(t, partial, value[off:off+length]) + + // error case + off, length = 5, -2 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + + off, length = -1, 2 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + + off, length = 1, -2 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + + err = testCM.Remove(ctx, key) + assert.NoError(t, err) + off, length = 1, 1 + _, err = testCM.ReadAt(ctx, key, off, length) + assert.Error(t, err) + }) + + t.Run("test Size", func(t *testing.T) { + testGetSizeRoot := path.Join(testMinIOKVRoot, "get_size") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testGetSizeRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testGetSizeRoot) + + key := path.Join(testGetSizeRoot, "TestMinIOKV_GetSize_key") + value := []byte("TestMinIOKV_GetSize_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + size, err := testCM.Size(ctx, key) + assert.NoError(t, err) + assert.Equal(t, size, int64(len(value))) + + key2 := path.Join(testGetSizeRoot, "TestMemoryKV_GetSize_key2") + + size, err = testCM.Size(ctx, key2) + assert.Error(t, err) + assert.Equal(t, int64(0), size) + }) + + t.Run("test Path", func(t *testing.T) { + testGetPathRoot := path.Join(testMinIOKVRoot, "get_path") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testGetPathRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testGetPathRoot) + + key := path.Join(testGetPathRoot, "TestMinIOKV_GetSize_key") + value := []byte("TestMinIOKV_GetSize_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + p, err := testCM.Path(ctx, key) + assert.NoError(t, err) + assert.Equal(t, p, key) + + key2 := path.Join(testGetPathRoot, "TestMemoryKV_GetSize_key2") + + p, err = testCM.Path(ctx, key2) + assert.Error(t, err) + assert.Equal(t, p, "") + }) + + t.Run("test Mmap", func(t *testing.T) { + testMmapRoot := path.Join(testMinIOKVRoot, "mmap") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testMmapRoot) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testMmapRoot) + + key := path.Join(testMmapRoot, "TestMinIOKV_GetSize_key") + value := []byte("TestMinIOKV_GetSize_value") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + r, err := testCM.Mmap(ctx, key) + assert.Error(t, err) + assert.Nil(t, r) + + }) + + t.Run("test Prefix", func(t *testing.T) { + testPrefix := path.Join(testMinIOKVRoot, "prefix") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testPrefix) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testPrefix) + + pathB := path.Join("a", "b") + + key := path.Join(testPrefix, pathB) + value := []byte("a") + + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + pathC := path.Join("a", "c") + key = path.Join(testPrefix, pathC) + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + pathPrefix := path.Join(testPrefix, "a") + r, m, err := testCM.ListWithPrefix(ctx, pathPrefix, true) + assert.NoError(t, err) + assert.Equal(t, len(r), 2) + assert.Equal(t, len(m), 2) + + key = path.Join(testPrefix, "b", "b", "b") + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + key = path.Join(testPrefix, "b", "a", "b") + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + + key = path.Join(testPrefix, "bc", "a", "b") + err = testCM.Write(ctx, key, value) + assert.NoError(t, err) + dirs, mods, err := testCM.ListWithPrefix(ctx, testPrefix+"/", true) + assert.NoError(t, err) + assert.Equal(t, 5, len(dirs)) + assert.Equal(t, 5, len(mods)) + + dirs, mods, err = testCM.ListWithPrefix(ctx, path.Join(testPrefix, "b"), true) + assert.NoError(t, err) + assert.Equal(t, 3, len(dirs)) + assert.Equal(t, 3, len(mods)) + + testCM.RemoveWithPrefix(ctx, testPrefix) + r, m, err = testCM.ListWithPrefix(ctx, pathPrefix, true) + assert.NoError(t, err) + assert.Equal(t, 0, len(r)) + assert.Equal(t, 0, len(m)) + + // test wrong prefix + b := make([]byte, 2048) + pathWrong := path.Join(testPrefix, string(b)) + _, _, err = testCM.ListWithPrefix(ctx, pathWrong, true) + assert.Error(t, err) + }) + + t.Run("test NoSuchKey", func(t *testing.T) { + testPrefix := path.Join(testMinIOKVRoot, "nokey") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + testCM, err := newAzureChunkManager(ctx, testBucket, testPrefix) + require.NoError(t, err) + defer testCM.RemoveWithPrefix(ctx, testPrefix) + + key := "a" + + _, err = testCM.Read(ctx, key) + assert.Error(t, err) + assert.True(t, errors.Is(err, ErrNoSuchKey)) + + _, err = testCM.Reader(ctx, key) + assert.Error(t, err) + assert.True(t, errors.Is(err, ErrNoSuchKey)) + + _, err = testCM.ReadAt(ctx, key, 100, 1) + assert.Error(t, err) + assert.True(t, errors.Is(err, ErrNoSuchKey)) + }) +} diff --git a/internal/util/indexcgowrapper/build_index_info.go b/internal/util/indexcgowrapper/build_index_info.go index c38cb99532..e8feaf7ec8 100644 --- a/internal/util/indexcgowrapper/build_index_info.go +++ b/internal/util/indexcgowrapper/build_index_info.go @@ -48,6 +48,7 @@ func NewBuildIndexInfo(config *indexpb.StorageConfig) (*BuildIndexInfo, error) { cAccessValue := C.CString(config.SecretAccessKey) cRootPath := C.CString(config.RootPath) cStorageType := C.CString(config.StorageType) + cCloudProvider := C.CString(config.CloudProvider) cIamEndPoint := C.CString(config.IAMEndpoint) cRegion := C.CString(config.Region) defer C.free(unsafe.Pointer(cAddress)) @@ -56,6 +57,7 @@ func NewBuildIndexInfo(config *indexpb.StorageConfig) (*BuildIndexInfo, error) { defer C.free(unsafe.Pointer(cAccessValue)) defer C.free(unsafe.Pointer(cRootPath)) defer C.free(unsafe.Pointer(cStorageType)) + defer C.free(unsafe.Pointer(cCloudProvider)) defer C.free(unsafe.Pointer(cIamEndPoint)) defer C.free(unsafe.Pointer(cRegion)) storageConfig := C.CStorageConfig{ @@ -65,6 +67,7 @@ func NewBuildIndexInfo(config *indexpb.StorageConfig) (*BuildIndexInfo, error) { access_key_value: cAccessValue, root_path: cRootPath, storage_type: cStorageType, + cloud_provider: cCloudProvider, iam_endpoint: cIamEndPoint, useSSL: C.bool(config.UseSSL), useIAM: C.bool(config.UseIAM), diff --git a/internal/util/initcore/init_core.go b/internal/util/initcore/init_core.go index fb4413596c..296557f9ce 100644 --- a/internal/util/initcore/init_core.go +++ b/internal/util/initcore/init_core.go @@ -62,6 +62,7 @@ func InitRemoteChunkManager(params *paramtable.ComponentParam) error { cAccessValue := C.CString(params.MinioCfg.SecretAccessKey.GetValue()) cRootPath := C.CString(params.MinioCfg.RootPath.GetValue()) cStorageType := C.CString(params.CommonCfg.StorageType.GetValue()) + cCloudProvider := C.CString(params.MinioCfg.CloudProvider.GetValue()) cIamEndPoint := C.CString(params.MinioCfg.IAMEndpoint.GetValue()) cLogLevel := C.CString(params.MinioCfg.LogLevel.GetValue()) cRegion := C.CString(params.MinioCfg.Region.GetValue()) @@ -71,6 +72,7 @@ func InitRemoteChunkManager(params *paramtable.ComponentParam) error { defer C.free(unsafe.Pointer(cAccessValue)) defer C.free(unsafe.Pointer(cRootPath)) defer C.free(unsafe.Pointer(cStorageType)) + defer C.free(unsafe.Pointer(cCloudProvider)) defer C.free(unsafe.Pointer(cIamEndPoint)) defer C.free(unsafe.Pointer(cLogLevel)) defer C.free(unsafe.Pointer(cRegion)) @@ -81,6 +83,7 @@ func InitRemoteChunkManager(params *paramtable.ComponentParam) error { access_key_value: cAccessValue, root_path: cRootPath, storage_type: cStorageType, + cloud_provider: cCloudProvider, iam_endpoint: cIamEndPoint, useSSL: C.bool(params.MinioCfg.UseSSL.GetAsBool()), useIAM: C.bool(params.MinioCfg.UseIAM.GetAsBool()), diff --git a/pkg/util/paramtable/http_param.go b/pkg/util/paramtable/http_param.go index 46c2abf64d..ea04befbab 100644 --- a/pkg/util/paramtable/http_param.go +++ b/pkg/util/paramtable/http_param.go @@ -27,7 +27,7 @@ func (p *httpConfig) init(base *BaseTable) { p.Port = ParamItem{ Key: "proxy.http.port", - Version: "2.1.0", + Version: "2.3.0", Doc: "high-level restful api", PanicIfEmpty: false, Export: true, diff --git a/scripts/azure_build.sh b/scripts/azure_build.sh new file mode 100644 index 0000000000..d05fb69818 --- /dev/null +++ b/scripts/azure_build.sh @@ -0,0 +1,9 @@ +ROOT_DIR=$1 + +AZURE_CMAKE_CMD="cmake \ +-DCMAKE_INSTALL_LIBDIR=${ROOT_DIR}/internal/core/output/lib \ +${ROOT_DIR}/internal/core/src/storage/azure-blob-storage" +echo ${AZURE_CMAKE_CMD} +${AZURE_CMAKE_CMD} + +make & make install \ No newline at end of file diff --git a/scripts/core_build.sh b/scripts/core_build.sh index a510e01cfd..2386552bfe 100755 --- a/scripts/core_build.sh +++ b/scripts/core_build.sh @@ -106,7 +106,7 @@ USE_ASAN="OFF" OPEN_SIMD="OFF" USE_DYNAMIC_SIMD="OFF" -while getopts "p:d:t:s:f:n:i:y:a:ulrcghzmeb" arg; do +while getopts "p:d:t:s:f:n:i:y:a:ulrcghzmebZ" arg; do case $arg in f) CUSTOM_THIRDPARTY_PATH=$OPTARG @@ -167,6 +167,9 @@ while getopts "p:d:t:s:f:n:i:y:a:ulrcghzmeb" arg; do y) USE_DYNAMIC_SIMD=$OPTARG ;; + Z) + BUILD_WITHOUT_AZURE="on" + ;; h) # help echo " @@ -185,6 +188,7 @@ parameter: -s: build with CUDA arch(default:DEFAULT), for example '-gencode=compute_61,code=sm_61;-gencode=compute_75,code=sm_75' -b: build embedded milvus(default: OFF) -a: build milvus with AddressSanitizer(default: false) +-Z: build milvus without azure-sdk-for-cpp, so cannot use azure blob -h: help usage: @@ -199,6 +203,28 @@ usage: esac done +if [ -z "$BUILD_WITHOUT_AZURE" ]; then + AZURE_BUILD_DIR="${ROOT_DIR}/cmake_build/azure" + if [ ! -d ${AZURE_BUILD_DIR} ]; then + mkdir -p ${AZURE_BUILD_DIR} + fi + pushd ${AZURE_BUILD_DIR} + env bash ${ROOT_DIR}/scripts/azure_build.sh ${ROOT_DIR} + cat vcpkg-bootstrap.log # need to remove + popd + SYSTEM_NAME=$(uname -s) + if [[ ${SYSTEM_NAME} == "Darwin" ]]; then + SYSTEM_NAME="osx" + elif [[ ${SYSTEM_NAME} == "Linux" ]]; then + SYSTEM_NAME="linux" + fi + ARCHITECTURE=$(uname -m) + if [[ ${ARCHITECTURE} == "x86_64" ]]; then + ARCHITECTURE="x64" + fi + VCPKG_TARGET_TRIPLET=${ARCHITECTURE}-${SYSTEM_NAME} +fi + if [[ ! -d ${BUILD_OUTPUT_DIR} ]]; then mkdir ${BUILD_OUTPUT_DIR} fi @@ -265,8 +291,12 @@ ${CMAKE_EXTRA_ARGS} \ -DUSE_ASAN=${USE_ASAN} \ -DOPEN_SIMD=${OPEN_SIMD} \ -DUSE_DYNAMIC_SIMD=${USE_DYNAMIC_SIMD} --DCPU_ARCH=${CPU_ARCH} \ -${CPP_SRC_DIR}" +-DCPU_ARCH=${CPU_ARCH} " +if [ -z "$BUILD_WITHOUT_AZURE" ]; then +CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \ +-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} " +fi +CMAKE_CMD=${CMAKE_CMD}"${CPP_SRC_DIR}" echo "CC $CC" echo ${CMAKE_CMD} diff --git a/scripts/install_deps.sh b/scripts/install_deps.sh index 01b7226bda..308967b96f 100755 --- a/scripts/install_deps.sh +++ b/scripts/install_deps.sh @@ -56,7 +56,7 @@ function install_linux_deps() { function install_mac_deps() { sudo xcode-select --install > /dev/null 2>&1 - brew install libomp ninja cmake llvm@15 ccache grep pkg-config + brew install libomp ninja cmake llvm@15 ccache grep pkg-config zip unzip export PATH="/usr/local/opt/grep/libexec/gnubin:$PATH" brew update && brew upgrade && brew cleanup