mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
feat: introduce third-party milvus-storage (#39418)
related: https://github.com/milvus-io/milvus/issues/39173 Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
This commit is contained in:
parent
f32830e016
commit
c4ae9f4ece
@ -307,7 +307,8 @@ ${CMAKE_EXTRA_ARGS} \
|
|||||||
-DUSE_DYNAMIC_SIMD=${USE_DYNAMIC_SIMD} \
|
-DUSE_DYNAMIC_SIMD=${USE_DYNAMIC_SIMD} \
|
||||||
-DCPU_ARCH=${CPU_ARCH} \
|
-DCPU_ARCH=${CPU_ARCH} \
|
||||||
-DINDEX_ENGINE=${INDEX_ENGINE} \
|
-DINDEX_ENGINE=${INDEX_ENGINE} \
|
||||||
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} "
|
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} \
|
||||||
|
-DENABLE_AZURE_FS=${ENABLE_AZURE_FS} "
|
||||||
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
|
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
|
||||||
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
|
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
|
||||||
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
|
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
|
||||||
|
|||||||
18
go.mod
18
go.mod
@ -18,7 +18,7 @@ require (
|
|||||||
github.com/gin-gonic/gin v1.9.1
|
github.com/gin-gonic/gin v1.9.1
|
||||||
github.com/go-playground/validator/v10 v10.14.0
|
github.com/go-playground/validator/v10 v10.14.0
|
||||||
github.com/gofrs/flock v0.8.1
|
github.com/gofrs/flock v0.8.1
|
||||||
github.com/golang/protobuf v1.5.4 // indirect
|
github.com/golang/protobuf v1.5.4
|
||||||
github.com/google/btree v1.1.2
|
github.com/google/btree v1.1.2
|
||||||
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
|
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
|
||||||
github.com/klauspost/compress v1.17.9
|
github.com/klauspost/compress v1.17.9
|
||||||
@ -101,9 +101,9 @@ require (
|
|||||||
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible // indirect
|
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible // indirect
|
||||||
github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect
|
github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect
|
||||||
github.com/alibabacloud-go/tea v1.1.8 // indirect
|
github.com/alibabacloud-go/tea v1.1.8 // indirect
|
||||||
github.com/andybalholm/brotli v1.0.4 // indirect
|
github.com/andybalholm/brotli v1.1.0 // indirect
|
||||||
github.com/apache/pulsar-client-go v0.6.1-0.20210728062540-29414db801a7 // indirect
|
github.com/apache/pulsar-client-go v0.6.1-0.20210728062540-29414db801a7 // indirect
|
||||||
github.com/apache/thrift v0.18.1 // indirect
|
github.com/apache/thrift v0.19.0 // indirect
|
||||||
github.com/ardielle/ardielle-go v1.5.2 // indirect
|
github.com/ardielle/ardielle-go v1.5.2 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect
|
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect
|
||||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect
|
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect
|
||||||
@ -158,7 +158,7 @@ require (
|
|||||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||||
github.com/golang/mock v1.6.0 // indirect
|
github.com/golang/mock v1.6.0 // indirect
|
||||||
github.com/golang/snappy v0.0.4 // indirect
|
github.com/golang/snappy v0.0.4 // indirect
|
||||||
github.com/google/flatbuffers v2.0.8+incompatible // indirect
|
github.com/google/flatbuffers v24.3.25+incompatible // indirect
|
||||||
github.com/google/s2a-go v0.1.7 // indirect
|
github.com/google/s2a-go v0.1.7 // indirect
|
||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
|
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
|
||||||
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
|
||||||
@ -205,7 +205,7 @@ require (
|
|||||||
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
|
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
|
||||||
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect
|
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect
|
||||||
github.com/pierrec/lz4 v2.5.2+incompatible // indirect
|
github.com/pierrec/lz4 v2.5.2+incompatible // indirect
|
||||||
github.com/pierrec/lz4/v4 v4.1.18 // indirect
|
github.com/pierrec/lz4/v4 v4.1.21 // indirect
|
||||||
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect
|
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect
|
||||||
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect
|
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect
|
||||||
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect
|
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect
|
||||||
@ -260,13 +260,13 @@ require (
|
|||||||
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
|
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
|
||||||
go.uber.org/automaxprocs v1.5.3 // indirect
|
go.uber.org/automaxprocs v1.5.3 // indirect
|
||||||
golang.org/x/arch v0.3.0 // indirect
|
golang.org/x/arch v0.3.0 // indirect
|
||||||
golang.org/x/mod v0.17.0 // indirect
|
golang.org/x/mod v0.18.0 // indirect
|
||||||
golang.org/x/sys v0.28.0 // indirect
|
golang.org/x/sys v0.28.0 // indirect
|
||||||
golang.org/x/term v0.27.0 // indirect
|
golang.org/x/term v0.27.0 // indirect
|
||||||
golang.org/x/time v0.5.0 // indirect
|
golang.org/x/time v0.5.0 // indirect
|
||||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
|
golang.org/x/tools v0.22.0 // indirect
|
||||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
|
||||||
gonum.org/v1/gonum v0.11.0 // indirect
|
gonum.org/v1/gonum v0.14.0 // indirect
|
||||||
google.golang.org/genproto v0.0.0-20240624140628-dc46fd24d27d // indirect
|
google.golang.org/genproto v0.0.0-20240624140628-dc46fd24d27d // indirect
|
||||||
google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
|
google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect
|
||||||
|
|||||||
32
go.sum
32
go.sum
@ -104,14 +104,14 @@ github.com/alibabacloud-go/tea v1.1.8 h1:vFF0707fqjGiQTxrtMnIXRjOCvQXf49CuDVRtTo
|
|||||||
github.com/alibabacloud-go/tea v1.1.8/go.mod h1:/tmnEaQMyb4Ky1/5D+SE1BAsa5zj/KeGOFfwYm3N/p4=
|
github.com/alibabacloud-go/tea v1.1.8/go.mod h1:/tmnEaQMyb4Ky1/5D+SE1BAsa5zj/KeGOFfwYm3N/p4=
|
||||||
github.com/aliyun/credentials-go v1.2.7 h1:gLtFylxLZ1TWi1pStIt1O6a53GFU1zkNwjtJir2B4ow=
|
github.com/aliyun/credentials-go v1.2.7 h1:gLtFylxLZ1TWi1pStIt1O6a53GFU1zkNwjtJir2B4ow=
|
||||||
github.com/aliyun/credentials-go v1.2.7/go.mod h1:/KowD1cfGSLrLsH28Jr8W+xwoId0ywIy5lNzDz6O1vw=
|
github.com/aliyun/credentials-go v1.2.7/go.mod h1:/KowD1cfGSLrLsH28Jr8W+xwoId0ywIy5lNzDz6O1vw=
|
||||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
|
||||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
|
||||||
github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q=
|
github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q=
|
||||||
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
|
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
|
||||||
github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
|
github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
|
||||||
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
|
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
|
||||||
github.com/apache/thrift v0.18.1 h1:lNhK/1nqjbwbiOPDBPFJVKxgDEGSepKuTh6OLiXW8kg=
|
github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk=
|
||||||
github.com/apache/thrift v0.18.1/go.mod h1:rdQn/dCcDKEWjjylUeueum4vQEjG2v8v2PqriUnbr+I=
|
github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I=
|
||||||
github.com/ardielle/ardielle-go v1.5.2 h1:TilHTpHIQJ27R1Tl/iITBzMwiUGSlVfiVhwDNGM3Zj4=
|
github.com/ardielle/ardielle-go v1.5.2 h1:TilHTpHIQJ27R1Tl/iITBzMwiUGSlVfiVhwDNGM3Zj4=
|
||||||
github.com/ardielle/ardielle-go v1.5.2/go.mod h1:I4hy1n795cUhaVt/ojz83SNVCYIGsAFAONtv2Dr7HUI=
|
github.com/ardielle/ardielle-go v1.5.2/go.mod h1:I4hy1n795cUhaVt/ojz83SNVCYIGsAFAONtv2Dr7HUI=
|
||||||
github.com/ardielle/ardielle-tools v1.5.4/go.mod h1:oZN+JRMnqGiIhrzkRN9l26Cej9dEx4jeNG6A+AdkShk=
|
github.com/ardielle/ardielle-tools v1.5.4/go.mod h1:oZN+JRMnqGiIhrzkRN9l26Cej9dEx4jeNG6A+AdkShk=
|
||||||
@ -427,8 +427,8 @@ github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ
|
|||||||
github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA=
|
github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA=
|
||||||
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
|
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
|
||||||
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||||
github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM=
|
github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI=
|
||||||
github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
|
||||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||||
github.com/google/go-cmp v0.2.1-0.20190312032427-6f77996f0c42/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
github.com/google/go-cmp v0.2.1-0.20190312032427-6f77996f0c42/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||||
@ -742,8 +742,8 @@ github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2
|
|||||||
github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
|
github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
|
||||||
github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI=
|
github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI=
|
||||||
github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
|
github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
|
||||||
github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ=
|
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
|
||||||
github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||||
github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
|
github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
|
||||||
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
|
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
|
||||||
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4=
|
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4=
|
||||||
@ -1122,8 +1122,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
|||||||
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||||
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||||
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
|
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
|
||||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
@ -1367,19 +1367,19 @@ golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
|||||||
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||||
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||||
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
|
golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
|
||||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||||
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
|
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||||
gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
|
gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
|
||||||
gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
|
gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
|
||||||
gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
|
gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
|
||||||
gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E=
|
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
||||||
gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA=
|
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
|
||||||
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
|
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
|
||||||
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
|
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
|
||||||
gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
|
gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
|
||||||
|
|||||||
@ -13,7 +13,7 @@ class MilvusConan(ConanFile):
|
|||||||
"lz4/1.9.4#c5afb86edd69ac0df30e3a9e192e43db",
|
"lz4/1.9.4#c5afb86edd69ac0df30e3a9e192e43db",
|
||||||
"snappy/1.1.9#0519333fef284acd04806243de7d3070",
|
"snappy/1.1.9#0519333fef284acd04806243de7d3070",
|
||||||
"lzo/2.10#9517fc1bcc4d4cc229a79806003a1baa",
|
"lzo/2.10#9517fc1bcc4d4cc229a79806003a1baa",
|
||||||
"arrow/15.0.0#0456d916ff25d509e0724c5b219b4c45",
|
"arrow/17.0.0#8cea917a6e06ca17c28411966d6fcdd7",
|
||||||
"openssl/3.1.2#02594c4c0a6e2b4feb3cd15119993597",
|
"openssl/3.1.2#02594c4c0a6e2b4feb3cd15119993597",
|
||||||
"aws-sdk-cpp/1.9.234#28d6d2c175975900ce292bafe8022c88",
|
"aws-sdk-cpp/1.9.234#28d6d2c175975900ce292bafe8022c88",
|
||||||
"googleapis/cci.20221108#65604e1b3b9a6b363044da625b201a2a",
|
"googleapis/cci.20221108#65604e1b3b9a6b363044da625b201a2a",
|
||||||
@ -72,6 +72,7 @@ class MilvusConan(ConanFile):
|
|||||||
"aws-sdk-cpp:transfer": False,
|
"aws-sdk-cpp:transfer": False,
|
||||||
"gtest:build_gmock": False,
|
"gtest:build_gmock": False,
|
||||||
"boost:without_locale": False,
|
"boost:without_locale": False,
|
||||||
|
"boost:without_test": True,
|
||||||
"glog:with_gflags": True,
|
"glog:with_gflags": True,
|
||||||
"glog:shared": True,
|
"glog:shared": True,
|
||||||
"prometheus-cpp:with_pull": False,
|
"prometheus-cpp:with_pull": False,
|
||||||
|
|||||||
@ -32,6 +32,7 @@ include_directories(
|
|||||||
${SIMDJSON_INCLUDE_DIR}
|
${SIMDJSON_INCLUDE_DIR}
|
||||||
${TANTIVY_INCLUDE_DIR}
|
${TANTIVY_INCLUDE_DIR}
|
||||||
${CONAN_INCLUDE_DIRS}
|
${CONAN_INCLUDE_DIRS}
|
||||||
|
${MILVUS_STORAGE_INCLUDE_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
add_subdirectory( pb )
|
add_subdirectory( pb )
|
||||||
@ -73,6 +74,7 @@ set(LINK_TARGETS
|
|||||||
simdjson
|
simdjson
|
||||||
tantivy_binding
|
tantivy_binding
|
||||||
knowhere
|
knowhere
|
||||||
|
milvus-storage
|
||||||
${OpenMP_CXX_FLAGS}
|
${OpenMP_CXX_FLAGS}
|
||||||
${CONAN_LIBS})
|
${CONAN_LIBS})
|
||||||
|
|
||||||
|
|||||||
97
internal/core/src/segcore/packed_reader_c.cpp
Normal file
97
internal/core/src/segcore/packed_reader_c.cpp
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "segcore/packed_reader_c.h"
|
||||||
|
#include "milvus-storage/packed/reader.h"
|
||||||
|
#include "milvus-storage/common/log.h"
|
||||||
|
#include "milvus-storage/filesystem/fs.h"
|
||||||
|
#include "milvus-storage/common/config.h"
|
||||||
|
|
||||||
|
#include <arrow/c/bridge.h>
|
||||||
|
#include <arrow/filesystem/filesystem.h>
|
||||||
|
#include <arrow/status.h>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
int
|
||||||
|
NewPackedReader(const char* path,
|
||||||
|
struct ArrowSchema* schema,
|
||||||
|
const int64_t buffer_size,
|
||||||
|
CPackedReader* c_packed_reader) {
|
||||||
|
try {
|
||||||
|
auto truePath = std::string(path);
|
||||||
|
auto factory = std::make_shared<milvus_storage::FileSystemFactory>();
|
||||||
|
auto conf = milvus_storage::StorageConfig();
|
||||||
|
conf.uri = "file:///tmp/";
|
||||||
|
auto trueFs = factory->BuildFileSystem(conf, &truePath).value();
|
||||||
|
auto trueSchema = arrow::ImportSchema(schema).ValueOrDie();
|
||||||
|
std::set<int> needed_columns;
|
||||||
|
for (int i = 0; i < trueSchema->num_fields(); i++) {
|
||||||
|
needed_columns.emplace(i);
|
||||||
|
}
|
||||||
|
auto reader = std::make_unique<milvus_storage::PackedRecordBatchReader>(
|
||||||
|
*trueFs, path, trueSchema, needed_columns, buffer_size);
|
||||||
|
*c_packed_reader = reader.release();
|
||||||
|
return 0;
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ReadNext(CPackedReader c_packed_reader,
|
||||||
|
CArrowArray* out_array,
|
||||||
|
CArrowSchema* out_schema) {
|
||||||
|
try {
|
||||||
|
auto packed_reader =
|
||||||
|
static_cast<milvus_storage::PackedRecordBatchReader*>(
|
||||||
|
c_packed_reader);
|
||||||
|
std::shared_ptr<arrow::RecordBatch> record_batch;
|
||||||
|
auto status = packed_reader->ReadNext(&record_batch);
|
||||||
|
if (!status.ok()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (record_batch == nullptr) {
|
||||||
|
// end of file
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
std::unique_ptr<ArrowArray> arr = std::make_unique<ArrowArray>();
|
||||||
|
std::unique_ptr<ArrowSchema> schema =
|
||||||
|
std::make_unique<ArrowSchema>();
|
||||||
|
auto status = arrow::ExportRecordBatch(
|
||||||
|
*record_batch, arr.get(), schema.get());
|
||||||
|
if (!status.ok()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
*out_array = arr.release();
|
||||||
|
*out_schema = schema.release();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
CloseReader(CPackedReader c_packed_reader) {
|
||||||
|
try {
|
||||||
|
auto packed_reader =
|
||||||
|
static_cast<milvus_storage::PackedRecordBatchReader*>(
|
||||||
|
c_packed_reader);
|
||||||
|
delete packed_reader;
|
||||||
|
return 0;
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
64
internal/core/src/segcore/packed_reader_c.h
Normal file
64
internal/core/src/segcore/packed_reader_c.h
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <arrow/c/abi.h>
|
||||||
|
|
||||||
|
typedef void* CPackedReader;
|
||||||
|
typedef void* CArrowArray;
|
||||||
|
typedef void* CArrowSchema;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Open a packed reader to read needed columns in the specified path.
|
||||||
|
*
|
||||||
|
* @param path The root path of the packed files to read.
|
||||||
|
* @param schema The original schema of data.
|
||||||
|
* @param buffer_size The max buffer size of the packed reader.
|
||||||
|
* @param c_packed_reader The output pointer of the packed reader.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
NewPackedReader(const char* path,
|
||||||
|
struct ArrowSchema* schema,
|
||||||
|
const int64_t buffer_size,
|
||||||
|
CPackedReader* c_packed_reader);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Read the next record batch from the packed reader.
|
||||||
|
* By default, the maximum return batch is 1024 rows.
|
||||||
|
*
|
||||||
|
* @param c_packed_reader The packed reader to read.
|
||||||
|
* @param out_array The output pointer of the arrow array.
|
||||||
|
* @param out_schema The output pointer of the arrow schema.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
ReadNext(CPackedReader c_packed_reader,
|
||||||
|
CArrowArray* out_array,
|
||||||
|
CArrowSchema* out_schema);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Close the packed reader and release the resources.
|
||||||
|
*
|
||||||
|
* @param c_packed_reader The packed reader to close.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
CloseReader(CPackedReader c_packed_reader);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
81
internal/core/src/segcore/packed_writer_c.cpp
Normal file
81
internal/core/src/segcore/packed_writer_c.cpp
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "segcore/packed_writer_c.h"
|
||||||
|
#include "milvus-storage/packed/writer.h"
|
||||||
|
#include "milvus-storage/common/log.h"
|
||||||
|
#include "milvus-storage/common/config.h"
|
||||||
|
#include "milvus-storage/filesystem/fs.h"
|
||||||
|
|
||||||
|
#include <arrow/c/bridge.h>
|
||||||
|
#include <arrow/filesystem/filesystem.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
NewPackedWriter(const char* path,
|
||||||
|
struct ArrowSchema* schema,
|
||||||
|
const int64_t buffer_size,
|
||||||
|
CPackedWriter* c_packed_writer) {
|
||||||
|
try {
|
||||||
|
auto truePath = std::string(path);
|
||||||
|
auto factory = std::make_shared<milvus_storage::FileSystemFactory>();
|
||||||
|
auto conf = milvus_storage::StorageConfig();
|
||||||
|
conf.uri = "file:///tmp/";
|
||||||
|
auto trueFs = factory->BuildFileSystem(conf, &truePath).value();
|
||||||
|
auto trueSchema = arrow::ImportSchema(schema).ValueOrDie();
|
||||||
|
auto writer = std::make_unique<milvus_storage::PackedRecordBatchWriter>(
|
||||||
|
buffer_size, trueSchema, trueFs, truePath, conf);
|
||||||
|
|
||||||
|
*c_packed_writer = writer.release();
|
||||||
|
return 0;
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
WriteRecordBatch(CPackedWriter c_packed_writer,
|
||||||
|
struct ArrowArray* array,
|
||||||
|
struct ArrowSchema* schema) {
|
||||||
|
try {
|
||||||
|
auto packed_writer =
|
||||||
|
static_cast<milvus_storage::PackedRecordBatchWriter*>(
|
||||||
|
c_packed_writer);
|
||||||
|
auto record_batch =
|
||||||
|
arrow::ImportRecordBatch(array, schema).ValueOrDie();
|
||||||
|
auto status = packed_writer->Write(record_batch);
|
||||||
|
if (!status.ok()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
CloseWriter(CPackedWriter c_packed_writer) {
|
||||||
|
try {
|
||||||
|
auto packed_writer =
|
||||||
|
static_cast<milvus_storage::PackedRecordBatchWriter*>(
|
||||||
|
c_packed_writer);
|
||||||
|
auto status = packed_writer->Close();
|
||||||
|
delete packed_writer;
|
||||||
|
if (!status.ok()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
} catch (std::exception& e) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
41
internal/core/src/segcore/packed_writer_c.h
Normal file
41
internal/core/src/segcore/packed_writer_c.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <arrow/c/abi.h>
|
||||||
|
|
||||||
|
typedef void* CPackedWriter;
|
||||||
|
|
||||||
|
int
|
||||||
|
NewPackedWriter(const char* path,
|
||||||
|
struct ArrowSchema* schema,
|
||||||
|
const int64_t buffer_size,
|
||||||
|
CPackedWriter* c_packed_writer);
|
||||||
|
|
||||||
|
int
|
||||||
|
WriteRecordBatch(CPackedWriter c_packed_writer,
|
||||||
|
struct ArrowArray* array,
|
||||||
|
struct ArrowSchema* schema);
|
||||||
|
|
||||||
|
int
|
||||||
|
CloseWriter(CPackedWriter c_packed_writer);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
@ -43,7 +43,7 @@ PayloadReader::init(std::shared_ptr<arrow::io::BufferReader> input,
|
|||||||
// Configure general Parquet reader settings
|
// Configure general Parquet reader settings
|
||||||
auto reader_properties = parquet::ReaderProperties(pool);
|
auto reader_properties = parquet::ReaderProperties(pool);
|
||||||
reader_properties.set_buffer_size(4096 * 4);
|
reader_properties.set_buffer_size(4096 * 4);
|
||||||
reader_properties.enable_buffered_stream();
|
// reader_properties.enable_buffered_stream();
|
||||||
|
|
||||||
// Configure Arrow-specific Parquet reader settings
|
// Configure Arrow-specific Parquet reader settings
|
||||||
auto arrow_reader_props = parquet::ArrowReaderProperties();
|
auto arrow_reader_props = parquet::ArrowReaderProperties();
|
||||||
|
|||||||
1
internal/core/thirdparty/CMakeLists.txt
vendored
1
internal/core/thirdparty/CMakeLists.txt
vendored
@ -45,3 +45,4 @@ if (LINUX)
|
|||||||
add_subdirectory(jemalloc)
|
add_subdirectory(jemalloc)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
add_subdirectory(milvus-storage)
|
||||||
51
internal/core/thirdparty/milvus-storage/CMakeLists.txt
vendored
Normal file
51
internal/core/thirdparty/milvus-storage/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#-------------------------------------------------------------------------------
|
||||||
|
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
# or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||||
|
#-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Update milvus-storage_VERSION for the first occurrence
|
||||||
|
milvus_add_pkg_config("milvus-storage")
|
||||||
|
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "")
|
||||||
|
set( milvus-storage_VERSION 7475494 )
|
||||||
|
set( GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git")
|
||||||
|
message(STATUS "milvus-storage repo: ${GIT_REPOSITORY}")
|
||||||
|
message(STATUS "milvus-storage version: ${milvus-storage_VERSION}")
|
||||||
|
|
||||||
|
message(STATUS "Building milvus-storage-${milvus-storage_SOURCE_VER} from source")
|
||||||
|
message(STATUS ${CMAKE_BUILD_TYPE})
|
||||||
|
|
||||||
|
if ( ENABLE_AZURE_FS STREQUAL "ON" )
|
||||||
|
set(WITH_AZURE_FS ON CACHE BOOL "" FORCE )
|
||||||
|
else ()
|
||||||
|
set(WITH_AZURE_FS OFF CACHE BOOL "" FORCE )
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} )
|
||||||
|
FetchContent_Declare(
|
||||||
|
milvus-storage
|
||||||
|
GIT_REPOSITORY ${GIT_REPOSITORY}
|
||||||
|
GIT_TAG ${milvus-storage_VERSION}
|
||||||
|
SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-src
|
||||||
|
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-build
|
||||||
|
SOURCE_SUBDIR cpp
|
||||||
|
DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} )
|
||||||
|
|
||||||
|
FetchContent_GetProperties( milvus-storage )
|
||||||
|
if ( NOT milvus-storage_POPULATED )
|
||||||
|
FetchContent_Populate( milvus-storage )
|
||||||
|
|
||||||
|
# Adding the following target:
|
||||||
|
# milvus-storage
|
||||||
|
add_subdirectory( ${milvus-storage_SOURCE_DIR}/cpp
|
||||||
|
${milvus-storage_BINARY_DIR} )
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set( MILVUS_STORAGE_INCLUDE_DIR ${milvus-storage_SOURCE_DIR}/cpp/include CACHE INTERNAL "Path to milvus-storage include directory" )
|
||||||
9
internal/core/thirdparty/milvus-storage/milvus-storage.pc.in
vendored
Normal file
9
internal/core/thirdparty/milvus-storage/milvus-storage.pc.in
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||||
|
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||||
|
|
||||||
|
Name: Milvus Storage
|
||||||
|
Description: Storage modules for Milvus
|
||||||
|
Version: @MILVUS_VERSION@
|
||||||
|
|
||||||
|
Libs: -L${libdir} -lmilvus-storage
|
||||||
|
Cflags: -I${includedir}
|
||||||
@ -16,6 +16,7 @@ include_directories(
|
|||||||
${SIMDJSON_INCLUDE_DIR}
|
${SIMDJSON_INCLUDE_DIR}
|
||||||
${TANTIVY_INCLUDE_DIR}
|
${TANTIVY_INCLUDE_DIR}
|
||||||
${CONAN_INCLUDE_DIRS}
|
${CONAN_INCLUDE_DIRS}
|
||||||
|
${MILVUS_STORAGE_INCLUDE_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
add_definitions(-DMILVUS_TEST_SEGCORE_YAML_PATH="${CMAKE_SOURCE_DIR}/unittest/test_utils/test_segcore.yaml")
|
add_definitions(-DMILVUS_TEST_SEGCORE_YAML_PATH="${CMAKE_SOURCE_DIR}/unittest/test_utils/test_segcore.yaml")
|
||||||
@ -157,6 +158,7 @@ if (LINUX)
|
|||||||
gtest
|
gtest
|
||||||
milvus_core
|
milvus_core
|
||||||
knowhere
|
knowhere
|
||||||
|
milvus-storage
|
||||||
)
|
)
|
||||||
install(TARGETS index_builder_test DESTINATION unittest)
|
install(TARGETS index_builder_test DESTINATION unittest)
|
||||||
endif()
|
endif()
|
||||||
@ -169,6 +171,7 @@ target_link_libraries(all_tests
|
|||||||
gtest
|
gtest
|
||||||
milvus_core
|
milvus_core
|
||||||
knowhere
|
knowhere
|
||||||
|
milvus-storage
|
||||||
)
|
)
|
||||||
|
|
||||||
install(TARGETS all_tests DESTINATION unittest)
|
install(TARGETS all_tests DESTINATION unittest)
|
||||||
|
|||||||
7
internal/storagev2/OWNERS
Normal file
7
internal/storagev2/OWNERS
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
reviewers:
|
||||||
|
- tedxu
|
||||||
|
- shaoting-huang
|
||||||
|
- sunby
|
||||||
|
|
||||||
|
approvers:
|
||||||
|
- maintainers
|
||||||
80
internal/storagev2/common/arrowutil/arrow_util.go
Normal file
80
internal/storagev2/common/arrowutil/arrow_util.go
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package arrowutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/file"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
func MakeArrowFileReader(fs fs.Fs, filePath string) (*pqarrow.FileReader, error) {
|
||||||
|
f, err := fs.OpenFile(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
parquetReader, err := file.NewParquetReader(f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return pqarrow.NewFileReader(parquetReader, pqarrow.ArrowReadProperties{BatchSize: constant.ReadBatchSize}, memory.DefaultAllocator)
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeArrowRecordReader(reader *pqarrow.FileReader, opts *options.ReadOptions) (array.RecordReader, error) {
|
||||||
|
var rowGroupsIndices []int
|
||||||
|
var columnIndices []int
|
||||||
|
metadata := reader.ParquetReader().MetaData()
|
||||||
|
for _, c := range opts.Columns {
|
||||||
|
columnIndices = append(columnIndices, metadata.Schema.ColumnIndexByName(c))
|
||||||
|
}
|
||||||
|
for _, f := range opts.Filters {
|
||||||
|
columnIndices = append(columnIndices, metadata.Schema.ColumnIndexByName(f.GetColumnName()))
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(metadata.RowGroups); i++ {
|
||||||
|
rg := metadata.RowGroup(i)
|
||||||
|
var canIgnored bool
|
||||||
|
for _, filter := range opts.Filters {
|
||||||
|
columnIndex := rg.Schema.ColumnIndexByName(filter.GetColumnName())
|
||||||
|
columnChunk, err := rg.ColumnChunk(columnIndex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
columnStats, err := columnChunk.Statistics()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if columnStats == nil || !columnStats.HasMinMax() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if filter.CheckStatistics(columnStats) {
|
||||||
|
canIgnored = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !canIgnored {
|
||||||
|
rowGroupsIndices = append(rowGroupsIndices, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reader.GetRecordReader(context.TODO(), columnIndices, rowGroupsIndices)
|
||||||
|
}
|
||||||
31
internal/storagev2/common/constant/constant.go
Normal file
31
internal/storagev2/common/constant/constant.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package constant
|
||||||
|
|
||||||
|
const (
|
||||||
|
ReadBatchSize = 1024
|
||||||
|
ManifestTempFileSuffix = ".manifest.tmp"
|
||||||
|
ManifestFileSuffix = ".manifest"
|
||||||
|
ManifestDir = "versions"
|
||||||
|
BlobDir = "blobs"
|
||||||
|
ParquetDataFileSuffix = ".parquet"
|
||||||
|
OffsetFieldName = "__offset"
|
||||||
|
VectorDataDir = "vector"
|
||||||
|
ScalarDataDir = "scalar"
|
||||||
|
DeleteDataDir = "delete"
|
||||||
|
LatestManifestVersion = -1
|
||||||
|
|
||||||
|
EndpointOverride = "endpoint_override"
|
||||||
|
)
|
||||||
27
internal/storagev2/common/errors/errors.go
Normal file
27
internal/storagev2/common/errors/errors.go
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package errors
|
||||||
|
|
||||||
|
import "github.com/cockroachdb/errors"
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrSchemaIsNil = errors.New("schema is nil")
|
||||||
|
ErrBlobAlreadyExist = errors.New("blob already exist")
|
||||||
|
ErrBlobNotExist = errors.New("blob not exist")
|
||||||
|
ErrSchemaNotMatch = errors.New("schema not match")
|
||||||
|
ErrColumnNotExist = errors.New("column not exist")
|
||||||
|
ErrInvalidPath = errors.New("invalid path")
|
||||||
|
ErrNoEndpoint = errors.New("no endpoint is specified")
|
||||||
|
)
|
||||||
70
internal/storagev2/common/log/field.go
Normal file
70
internal/storagev2/common/log/field.go
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package log
|
||||||
|
|
||||||
|
import "go.uber.org/zap"
|
||||||
|
|
||||||
|
var (
|
||||||
|
// not lint
|
||||||
|
Skip = zap.Skip
|
||||||
|
Binary = zap.Binary
|
||||||
|
Bool = zap.Bool
|
||||||
|
Boolp = zap.Boolp
|
||||||
|
ByteString = zap.ByteString
|
||||||
|
Complex128 = zap.Complex128
|
||||||
|
Complex128p = zap.Complex128p
|
||||||
|
Complex64 = zap.Complex64
|
||||||
|
Complex64p = zap.Complex64p
|
||||||
|
Float64 = zap.Float64
|
||||||
|
Float64p = zap.Float64p
|
||||||
|
Float32 = zap.Float32
|
||||||
|
Float32p = zap.Float32p
|
||||||
|
Int = zap.Int
|
||||||
|
Intp = zap.Intp
|
||||||
|
Int64 = zap.Int64
|
||||||
|
Int64p = zap.Int64p
|
||||||
|
Int32 = zap.Int32
|
||||||
|
Int32p = zap.Int32p
|
||||||
|
Int16 = zap.Int16
|
||||||
|
Int16p = zap.Int16p
|
||||||
|
Int8 = zap.Int8
|
||||||
|
Int8p = zap.Int8p
|
||||||
|
String = zap.String
|
||||||
|
Stringp = zap.Stringp
|
||||||
|
Uint = zap.Uint
|
||||||
|
Uintp = zap.Uintp
|
||||||
|
Uint64 = zap.Uint64
|
||||||
|
Uint64p = zap.Uint64p
|
||||||
|
Uint32 = zap.Uint32
|
||||||
|
Uint32p = zap.Uint32p
|
||||||
|
Uint16 = zap.Uint16
|
||||||
|
Uint16p = zap.Uint16p
|
||||||
|
Uint8 = zap.Uint8
|
||||||
|
Uint8p = zap.Uint8p
|
||||||
|
Uintptr = zap.Uintptr
|
||||||
|
Uintptrp = zap.Uintptrp
|
||||||
|
Reflect = zap.Reflect
|
||||||
|
Namespace = zap.Namespace
|
||||||
|
Stringer = zap.Stringer
|
||||||
|
Time = zap.Time
|
||||||
|
Timep = zap.Timep
|
||||||
|
Stack = zap.Stack
|
||||||
|
StackSkip = zap.StackSkip
|
||||||
|
Duration = zap.Duration
|
||||||
|
Durationp = zap.Durationp
|
||||||
|
Object = zap.Object
|
||||||
|
Inline = zap.Inline
|
||||||
|
Any = zap.Any
|
||||||
|
)
|
||||||
106
internal/storagev2/common/log/log.go
Normal file
106
internal/storagev2/common/log/log.go
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package log
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
"go.uber.org/zap/zapcore"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Level = zapcore.Level
|
||||||
|
|
||||||
|
const (
|
||||||
|
DebugLevel = zapcore.DebugLevel
|
||||||
|
InfoLevel = zapcore.InfoLevel
|
||||||
|
WarnLevel = zapcore.WarnLevel
|
||||||
|
ErrorLevel = zapcore.ErrorLevel
|
||||||
|
PanicLevel = zapcore.PanicLevel
|
||||||
|
FatalLevel = zapcore.FatalLevel
|
||||||
|
)
|
||||||
|
|
||||||
|
type Logger struct {
|
||||||
|
l *zap.Logger
|
||||||
|
al *zap.AtomicLevel
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(out io.Writer, level Level) *Logger {
|
||||||
|
if out == nil {
|
||||||
|
out = os.Stderr
|
||||||
|
}
|
||||||
|
|
||||||
|
al := zap.NewAtomicLevelAt(level)
|
||||||
|
cfg := zap.NewDevelopmentEncoderConfig()
|
||||||
|
|
||||||
|
core := zapcore.NewCore(
|
||||||
|
zapcore.NewConsoleEncoder(cfg),
|
||||||
|
zapcore.AddSync(out),
|
||||||
|
al,
|
||||||
|
)
|
||||||
|
return &Logger{l: zap.New(core, zap.AddCaller(), zap.AddCallerSkip(2)), al: &al}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Logger) SetLevel(level Level) {
|
||||||
|
if l.al != nil {
|
||||||
|
l.al.SetLevel(level)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Field = zap.Field
|
||||||
|
|
||||||
|
func (l *Logger) Debug(msg string, fields ...Field) {
|
||||||
|
l.l.Debug(msg, fields...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Logger) Info(msg string, fields ...Field) {
|
||||||
|
l.l.Info(msg, fields...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Logger) Warn(msg string, fields ...Field) {
|
||||||
|
l.l.Warn(msg, fields...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Logger) Error(msg string, fields ...Field) {
|
||||||
|
l.l.Error(msg, fields...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Logger) Panic(msg string, fields ...Field) {
|
||||||
|
l.l.Panic(msg, fields...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Logger) Fatal(msg string, fields ...Field) {
|
||||||
|
l.l.Fatal(msg, fields...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Logger) Sync() error {
|
||||||
|
return l.l.Sync()
|
||||||
|
}
|
||||||
|
|
||||||
|
var std = New(os.Stderr, DebugLevel)
|
||||||
|
|
||||||
|
func Default() *Logger { return std }
|
||||||
|
func ReplaceDefault(l *Logger) { std = l }
|
||||||
|
func SetLevel(level Level) { std.SetLevel(level) }
|
||||||
|
|
||||||
|
func Debug(msg string, fields ...Field) { std.Debug(msg, fields...) }
|
||||||
|
func Info(msg string, fields ...Field) { std.Info(msg, fields...) }
|
||||||
|
func Warn(msg string, fields ...Field) { std.Warn(msg, fields...) }
|
||||||
|
func Error(msg string, fields ...Field) { std.Error(msg, fields...) }
|
||||||
|
func Panic(msg string, fields ...Field) { std.Panic(msg, fields...) }
|
||||||
|
func Fatal(msg string, fields ...Field) { std.Fatal(msg, fields...) }
|
||||||
|
|
||||||
|
func Sync() error { return std.Sync() }
|
||||||
33
internal/storagev2/common/log/log_test.go
Normal file
33
internal/storagev2/common/log/log_test.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package log
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLogger(t *testing.T) {
|
||||||
|
defer Sync()
|
||||||
|
Info("Testing")
|
||||||
|
Debug("Testing")
|
||||||
|
Warn("Testing")
|
||||||
|
Error("Testing")
|
||||||
|
defer func() {
|
||||||
|
if err := recover(); err != nil {
|
||||||
|
Debug("logPanic recover")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
Panic("Testing")
|
||||||
|
}
|
||||||
34
internal/storagev2/common/log/options.go
Normal file
34
internal/storagev2/common/log/options.go
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package log
|
||||||
|
|
||||||
|
import "go.uber.org/zap"
|
||||||
|
|
||||||
|
type Option = zap.Option
|
||||||
|
|
||||||
|
var (
|
||||||
|
WrapCore = zap.WrapCore
|
||||||
|
Hooks = zap.Hooks
|
||||||
|
Fields = zap.Fields
|
||||||
|
ErrorOutput = zap.ErrorOutput
|
||||||
|
Development = zap.Development
|
||||||
|
AddCaller = zap.AddCaller
|
||||||
|
WithCaller = zap.WithCaller
|
||||||
|
AddCallerSkip = zap.AddCallerSkip
|
||||||
|
AddStacktrace = zap.AddStacktrace
|
||||||
|
IncreaseLevel = zap.IncreaseLevel
|
||||||
|
WithFatalHook = zap.WithFatalHook
|
||||||
|
WithClock = zap.WithClock
|
||||||
|
)
|
||||||
404
internal/storagev2/common/utils/utils.go
Normal file
404
internal/storagev2/common/utils/utils.go
Normal file
@ -0,0 +1,404 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/endian"
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrInvalidArgument = errors.New("invalid argument")
|
||||||
|
|
||||||
|
func ToProtobufType(dataType arrow.Type) (storagev2pb.LogicType, error) {
|
||||||
|
typeId := int(dataType)
|
||||||
|
if typeId < 0 || typeId >= int(storagev2pb.LogicType_MAX_ID) {
|
||||||
|
return storagev2pb.LogicType_NA, fmt.Errorf("parse data type %v: %w", dataType, ErrInvalidArgument)
|
||||||
|
}
|
||||||
|
return storagev2pb.LogicType(typeId), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToProtobufMetadata(metadata *arrow.Metadata) (*storagev2pb.KeyValueMetadata, error) {
|
||||||
|
keys := metadata.Keys()
|
||||||
|
values := metadata.Values()
|
||||||
|
return &storagev2pb.KeyValueMetadata{Keys: keys, Values: values}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToProtobufDataType(dataType arrow.DataType) (*storagev2pb.DataType, error) {
|
||||||
|
protoType := &storagev2pb.DataType{}
|
||||||
|
err := SetTypeValues(protoType, dataType)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
logicType, err := ToProtobufType(dataType.ID())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
protoType.LogicType = logicType
|
||||||
|
|
||||||
|
if len(GetFields(dataType)) > 0 {
|
||||||
|
for _, field := range GetFields(dataType) {
|
||||||
|
fieldCopy := field
|
||||||
|
protoFieldType, err := ToProtobufField(&fieldCopy)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
protoType.Children = append(protoType.Children, protoFieldType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return protoType, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetFields TODO CHECK MORE TYPES
|
||||||
|
func GetFields(dataType arrow.DataType) []arrow.Field {
|
||||||
|
switch dataType.ID() {
|
||||||
|
case arrow.LIST:
|
||||||
|
listType, _ := dataType.(*arrow.ListType)
|
||||||
|
return listType.Fields()
|
||||||
|
case arrow.STRUCT:
|
||||||
|
structType, _ := dataType.(*arrow.StructType)
|
||||||
|
return structType.Fields()
|
||||||
|
case arrow.MAP:
|
||||||
|
mapType, _ := dataType.(*arrow.MapType)
|
||||||
|
return mapType.Fields()
|
||||||
|
case arrow.FIXED_SIZE_LIST:
|
||||||
|
listType, _ := dataType.(*arrow.FixedSizeListType)
|
||||||
|
return listType.Fields()
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToProtobufField(field *arrow.Field) (*storagev2pb.Field, error) {
|
||||||
|
protoField := &storagev2pb.Field{}
|
||||||
|
protoField.Name = field.Name
|
||||||
|
protoField.Nullable = field.Nullable
|
||||||
|
|
||||||
|
if field.Metadata.Len() != 0 {
|
||||||
|
fieldMetadata, err := ToProtobufMetadata(&field.Metadata)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("convert to protobuf field: %w", err)
|
||||||
|
}
|
||||||
|
protoField.Metadata = fieldMetadata
|
||||||
|
}
|
||||||
|
|
||||||
|
dataType, err := ToProtobufDataType(field.Type)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("convert to protobuf field: %w", err)
|
||||||
|
}
|
||||||
|
protoField.DataType = dataType
|
||||||
|
return protoField, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetTypeValues(protoType *storagev2pb.DataType, dataType arrow.DataType) error {
|
||||||
|
switch dataType.ID() {
|
||||||
|
case arrow.FIXED_SIZE_BINARY:
|
||||||
|
realType, ok := dataType.(*arrow.FixedSizeBinaryType)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("convert to fixed size binary type: %w", ErrInvalidArgument)
|
||||||
|
}
|
||||||
|
fixedSizeBinaryType := &storagev2pb.FixedSizeBinaryType{}
|
||||||
|
fixedSizeBinaryType.ByteWidth = int32(realType.ByteWidth)
|
||||||
|
protoType.TypeRelatedValues = &storagev2pb.DataType_FixedSizeBinaryType{FixedSizeBinaryType: fixedSizeBinaryType}
|
||||||
|
case arrow.FIXED_SIZE_LIST:
|
||||||
|
realType, ok := dataType.(*arrow.FixedSizeListType)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("convert to fixed size list type: %w", ErrInvalidArgument)
|
||||||
|
}
|
||||||
|
fixedSizeListType := &storagev2pb.FixedSizeListType{}
|
||||||
|
fixedSizeListType.ListSize = realType.Len()
|
||||||
|
protoType.TypeRelatedValues = &storagev2pb.DataType_FixedSizeListType{FixedSizeListType: fixedSizeListType}
|
||||||
|
case arrow.DICTIONARY:
|
||||||
|
realType, ok := dataType.(*arrow.DictionaryType)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("convert to dictionary type: %w", ErrInvalidArgument)
|
||||||
|
}
|
||||||
|
dictionaryType := &storagev2pb.DictionaryType{}
|
||||||
|
indexType, err := ToProtobufDataType(realType.IndexType)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
dictionaryType.IndexType = indexType
|
||||||
|
valueType, err := ToProtobufDataType(realType.ValueType)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
dictionaryType.ValueType = valueType
|
||||||
|
dictionaryType.Ordered = realType.Ordered
|
||||||
|
protoType.TypeRelatedValues = &storagev2pb.DataType_DictionaryType{DictionaryType: dictionaryType}
|
||||||
|
|
||||||
|
case arrow.MAP:
|
||||||
|
realType, ok := dataType.(*arrow.MapType)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("convert to map type: %w", ErrInvalidArgument)
|
||||||
|
}
|
||||||
|
mapType := &storagev2pb.MapType{}
|
||||||
|
mapType.KeysSorted = realType.KeysSorted
|
||||||
|
protoType.TypeRelatedValues = &storagev2pb.DataType_MapType{MapType: mapType}
|
||||||
|
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToProtobufSchema(schema *arrow.Schema) (*storagev2pb.ArrowSchema, error) {
|
||||||
|
protoSchema := &storagev2pb.ArrowSchema{}
|
||||||
|
for _, field := range schema.Fields() {
|
||||||
|
fieldCopy := field
|
||||||
|
protoField, err := ToProtobufField(&fieldCopy)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
protoSchema.Fields = append(protoSchema.Fields, protoField)
|
||||||
|
}
|
||||||
|
if schema.Endianness() == endian.LittleEndian {
|
||||||
|
protoSchema.Endianness = storagev2pb.Endianness_Little
|
||||||
|
} else if schema.Endianness() == endian.BigEndian {
|
||||||
|
protoSchema.Endianness = storagev2pb.Endianness_Big
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO FIX ME: golang proto not support proto_schema->mutable_metadata()->add_keys(key);
|
||||||
|
if schema.HasMetadata() && !schema.HasMetadata() {
|
||||||
|
for _, key := range schema.Metadata().Keys() {
|
||||||
|
protoKeyValue := protoSchema.GetMetadata()
|
||||||
|
protoKeyValue.Keys = append(protoKeyValue.Keys, key)
|
||||||
|
}
|
||||||
|
for _, value := range schema.Metadata().Values() {
|
||||||
|
protoKeyValue := protoSchema.GetMetadata()
|
||||||
|
protoKeyValue.Values = append(protoKeyValue.Values, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return protoSchema, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromProtobufSchema(schema *storagev2pb.ArrowSchema) (*arrow.Schema, error) {
|
||||||
|
fields := make([]arrow.Field, 0, len(schema.Fields))
|
||||||
|
for _, field := range schema.Fields {
|
||||||
|
tmp, err := FromProtobufField(field)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
fields = append(fields, *tmp)
|
||||||
|
}
|
||||||
|
tmp, err := FromProtobufKeyValueMetadata(schema.Metadata)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
newSchema := arrow.NewSchema(fields, tmp)
|
||||||
|
return newSchema, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromProtobufField(field *storagev2pb.Field) (*arrow.Field, error) {
|
||||||
|
datatype, err := FromProtobufDataType(field.DataType)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata, err := FromProtobufKeyValueMetadata(field.GetMetadata())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &arrow.Field{Name: field.Name, Type: datatype, Nullable: field.Nullable, Metadata: *metadata}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromProtobufKeyValueMetadata(metadata *storagev2pb.KeyValueMetadata) (*arrow.Metadata, error) {
|
||||||
|
keys := make([]string, 0)
|
||||||
|
values := make([]string, 0)
|
||||||
|
if metadata != nil {
|
||||||
|
keys = metadata.Keys
|
||||||
|
values = metadata.Values
|
||||||
|
}
|
||||||
|
newMetadata := arrow.NewMetadata(keys, values)
|
||||||
|
return &newMetadata, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromProtobufDataType(dataType *storagev2pb.DataType) (arrow.DataType, error) {
|
||||||
|
switch dataType.LogicType {
|
||||||
|
case storagev2pb.LogicType_NA:
|
||||||
|
return &arrow.NullType{}, nil
|
||||||
|
case storagev2pb.LogicType_BOOL:
|
||||||
|
return &arrow.BooleanType{}, nil
|
||||||
|
case storagev2pb.LogicType_UINT8:
|
||||||
|
return &arrow.Uint8Type{}, nil
|
||||||
|
case storagev2pb.LogicType_INT8:
|
||||||
|
return &arrow.Int8Type{}, nil
|
||||||
|
case storagev2pb.LogicType_UINT16:
|
||||||
|
return &arrow.Uint16Type{}, nil
|
||||||
|
case storagev2pb.LogicType_INT16:
|
||||||
|
return &arrow.Int16Type{}, nil
|
||||||
|
case storagev2pb.LogicType_UINT32:
|
||||||
|
return &arrow.Uint32Type{}, nil
|
||||||
|
case storagev2pb.LogicType_INT32:
|
||||||
|
return &arrow.Int32Type{}, nil
|
||||||
|
case storagev2pb.LogicType_UINT64:
|
||||||
|
return &arrow.Uint64Type{}, nil
|
||||||
|
case storagev2pb.LogicType_INT64:
|
||||||
|
return &arrow.Int64Type{}, nil
|
||||||
|
case storagev2pb.LogicType_HALF_FLOAT:
|
||||||
|
return &arrow.Float16Type{}, nil
|
||||||
|
case storagev2pb.LogicType_FLOAT:
|
||||||
|
return &arrow.Float32Type{}, nil
|
||||||
|
case storagev2pb.LogicType_DOUBLE:
|
||||||
|
return &arrow.Float64Type{}, nil
|
||||||
|
case storagev2pb.LogicType_STRING:
|
||||||
|
return &arrow.StringType{}, nil
|
||||||
|
case storagev2pb.LogicType_BINARY:
|
||||||
|
return &arrow.BinaryType{}, nil
|
||||||
|
|
||||||
|
case storagev2pb.LogicType_LIST:
|
||||||
|
fieldType, err := FromProtobufField(dataType.Children[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
listType := arrow.ListOf(fieldType.Type)
|
||||||
|
return listType, nil
|
||||||
|
|
||||||
|
case storagev2pb.LogicType_STRUCT:
|
||||||
|
fields := make([]arrow.Field, 0, len(dataType.Children))
|
||||||
|
for _, child := range dataType.Children {
|
||||||
|
field, err := FromProtobufField(child)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
fields = append(fields, *field)
|
||||||
|
}
|
||||||
|
structType := arrow.StructOf(fields...)
|
||||||
|
return structType, nil
|
||||||
|
|
||||||
|
case storagev2pb.LogicType_DICTIONARY:
|
||||||
|
keyType, err := FromProtobufField(dataType.Children[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
valueType, err := FromProtobufField(dataType.Children[1])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
dictType := &arrow.DictionaryType{
|
||||||
|
IndexType: keyType.Type,
|
||||||
|
ValueType: valueType.Type,
|
||||||
|
}
|
||||||
|
return dictType, nil
|
||||||
|
|
||||||
|
case storagev2pb.LogicType_MAP:
|
||||||
|
fieldType, err := FromProtobufField(dataType.Children[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// TODO FIX ME
|
||||||
|
return arrow.MapOf(fieldType.Type, fieldType.Type), nil
|
||||||
|
|
||||||
|
case storagev2pb.LogicType_FIXED_SIZE_BINARY:
|
||||||
|
|
||||||
|
sizeBinaryType := arrow.FixedSizeBinaryType{ByteWidth: int(dataType.GetFixedSizeBinaryType().ByteWidth)}
|
||||||
|
return &sizeBinaryType, nil
|
||||||
|
|
||||||
|
case storagev2pb.LogicType_FIXED_SIZE_LIST:
|
||||||
|
fieldType, err := FromProtobufField(dataType.Children[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
fixedSizeListType := arrow.FixedSizeListOf(int32(int(dataType.GetFixedSizeListType().ListSize)), fieldType.Type)
|
||||||
|
return fixedSizeListType, nil
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("parse protobuf datatype: %w", ErrInvalidArgument)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetNewParquetFilePath(path string) string {
|
||||||
|
scalarFileId := uuid.New()
|
||||||
|
path = filepath.Join(path, scalarFileId.String()+constant.ParquetDataFileSuffix)
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetManifestFilePath(path string, version int64) string {
|
||||||
|
path = filepath.Join(path, constant.ManifestDir, strconv.FormatInt(version, 10)+constant.ManifestFileSuffix)
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetManifestTmpFilePath(path string, version int64) string {
|
||||||
|
path = filepath.Join(path, constant.ManifestDir, strconv.FormatInt(version, 10)+constant.ManifestTempFileSuffix)
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBlobFilePath(path string) string {
|
||||||
|
blobId := uuid.New()
|
||||||
|
return filepath.Join(GetBlobDir(path), blobId.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetManifestDir(path string) string {
|
||||||
|
path = filepath.Join(path, constant.ManifestDir)
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetVectorDataDir(path string) string {
|
||||||
|
return filepath.Join(path, constant.VectorDataDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetScalarDataDir(path string) string {
|
||||||
|
return filepath.Join(path, constant.ScalarDataDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBlobDir(path string) string {
|
||||||
|
return filepath.Join(path, constant.BlobDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetDeleteDataDir(path string) string {
|
||||||
|
return filepath.Join(path, constant.DeleteDataDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseVersionFromFileName(path string) int64 {
|
||||||
|
pos := strings.Index(path, constant.ManifestFileSuffix)
|
||||||
|
if pos == -1 || !strings.HasSuffix(path, constant.ManifestFileSuffix) {
|
||||||
|
log.Warn("manifest file suffix not match", log.String("path", path))
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
version := path[0:pos]
|
||||||
|
versionInt, err := strconv.ParseInt(version, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("parse version from file name error", log.String("path", path), log.String("version", version))
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return versionInt
|
||||||
|
}
|
||||||
|
|
||||||
|
func ProjectSchema(sc *arrow.Schema, columns []string) *arrow.Schema {
|
||||||
|
var fields []arrow.Field
|
||||||
|
for _, field := range sc.Fields() {
|
||||||
|
for _, column := range columns {
|
||||||
|
if field.Name == column {
|
||||||
|
fields = append(fields, field)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return arrow.NewSchema(fields, nil)
|
||||||
|
}
|
||||||
22
internal/storagev2/docs/layout.md
Normal file
22
internal/storagev2/docs/layout.md
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
|
||||||
|
**storage layer interface**: supply reader/writer of storage which contains read options. Maintain meta of storage and handle atomic read/write with multiple files (maybe have different format) on disks.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**File Reader/Writer interface**: receive data and read options from upper layer and turn the raw data to our defined data.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**File Format Reader/Writer**: file format reader/writer (eg. parquet/raw/others like orc).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**File system interface**: support different file system (eg. in-memory, aws, minio, posix, windows).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
41
internal/storagev2/file/blob/blob.go
Normal file
41
internal/storagev2/file/blob/blob.go
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package blob
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Blob struct {
|
||||||
|
Name string
|
||||||
|
Size int64
|
||||||
|
File string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b Blob) ToProtobuf() *storagev2pb.Blob {
|
||||||
|
blob := &storagev2pb.Blob{}
|
||||||
|
blob.Name = b.Name
|
||||||
|
blob.Size = b.Size
|
||||||
|
blob.File = b.File
|
||||||
|
return blob
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromProtobuf(blob *storagev2pb.Blob) Blob {
|
||||||
|
return Blob{
|
||||||
|
Name: blob.Name,
|
||||||
|
Size: blob.Size,
|
||||||
|
File: blob.File,
|
||||||
|
}
|
||||||
|
}
|
||||||
45
internal/storagev2/file/fragment/deletefragment.go
Normal file
45
internal/storagev2/file/fragment/deletefragment.go
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fragment
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
pkType any
|
||||||
|
DeleteFragmentVector []DeleteFragment
|
||||||
|
DeleteFragment struct {
|
||||||
|
id int64
|
||||||
|
schema *schema.Schema
|
||||||
|
fs fs.Fs
|
||||||
|
data map[pkType][]int64
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewDeleteFragment(id int64, schema *schema.Schema, fs fs.Fs) *DeleteFragment {
|
||||||
|
return &DeleteFragment{
|
||||||
|
id: id,
|
||||||
|
schema: schema,
|
||||||
|
fs: fs,
|
||||||
|
data: make(map[pkType][]int64),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Make(f fs.Fs, s *schema.Schema, frag Fragment) DeleteFragment {
|
||||||
|
// TODO: implement
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
76
internal/storagev2/file/fragment/fragment.go
Normal file
76
internal/storagev2/file/fragment/fragment.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fragment
|
||||||
|
|
||||||
|
import "github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||||
|
|
||||||
|
type FragmentType int32
|
||||||
|
|
||||||
|
const (
|
||||||
|
kUnknown FragmentType = 0
|
||||||
|
kData FragmentType = 1
|
||||||
|
kDelete FragmentType = 2
|
||||||
|
)
|
||||||
|
|
||||||
|
type Fragment struct {
|
||||||
|
fragmentId int64
|
||||||
|
files []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type FragmentVector []Fragment
|
||||||
|
|
||||||
|
func ToFilesVector(fragments []Fragment) []string {
|
||||||
|
files := make([]string, 0)
|
||||||
|
for _, fragment := range fragments {
|
||||||
|
files = append(files, fragment.files...)
|
||||||
|
}
|
||||||
|
return files
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFragment() Fragment {
|
||||||
|
return Fragment{
|
||||||
|
files: make([]string, 0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Fragment) AddFile(file string) {
|
||||||
|
f.files = append(f.files, file)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Fragment) Files() []string {
|
||||||
|
return f.files
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Fragment) FragmentId() int64 {
|
||||||
|
return f.fragmentId
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Fragment) SetFragmentId(fragmentId int64) {
|
||||||
|
f.fragmentId = fragmentId
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Fragment) ToProtobuf() *storagev2pb.Fragment {
|
||||||
|
fragment := &storagev2pb.Fragment{}
|
||||||
|
fragment.Id = f.fragmentId
|
||||||
|
fragment.Files = append(fragment.Files, f.files...)
|
||||||
|
return fragment
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromProtobuf(fragment *storagev2pb.Fragment) Fragment {
|
||||||
|
newFragment := NewFragment()
|
||||||
|
newFragment.SetFragmentId(fragment.GetId())
|
||||||
|
newFragment.files = append(newFragment.files, fragment.Files...)
|
||||||
|
return newFragment
|
||||||
|
}
|
||||||
84
internal/storagev2/filter/conjunction_filter.go
Normal file
84
internal/storagev2/filter/conjunction_filter.go
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package filter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||||
|
"github.com/bits-and-blooms/bitset"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ConjunctionAndFilter struct {
|
||||||
|
filters []Filter
|
||||||
|
columnName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConjunctionAndFilter) GetColumnName() string {
|
||||||
|
return f.columnName
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: should have 3 cases.
|
||||||
|
// 1. all records satisfy the filter, this group dont need to check filter again.
|
||||||
|
// 2. no record satisfies the filter.
|
||||||
|
// 3. some records satisfy the filter, this group should check filter again.
|
||||||
|
func (f *ConjunctionAndFilter) CheckStatistics(stats metadata.TypedStatistics) bool {
|
||||||
|
for _, filter := range f.filters {
|
||||||
|
if filter.CheckStatistics(stats) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConjunctionAndFilter) Type() FilterType {
|
||||||
|
return And
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConjunctionAndFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) {
|
||||||
|
for i := 0; i < len(f.filters); i++ {
|
||||||
|
f.filters[i].Apply(colData, filterBitSet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ConjunctionOrFilter struct {
|
||||||
|
filters []Filter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConjunctionOrFilter) CheckStatistics(stats metadata.TypedStatistics) bool {
|
||||||
|
for _, filter := range f.filters {
|
||||||
|
if !filter.CheckStatistics(stats) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConjunctionOrFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) {
|
||||||
|
orBitSet := bitset.New(filterBitSet.Len())
|
||||||
|
for i := 1; i < len(f.filters); i++ {
|
||||||
|
childBitSet := filterBitSet.Clone()
|
||||||
|
f.filters[i].Apply(colData, childBitSet)
|
||||||
|
orBitSet.Intersection(childBitSet)
|
||||||
|
}
|
||||||
|
filterBitSet.Union(orBitSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConjunctionOrFilter) Type() FilterType {
|
||||||
|
return Or
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewConjunctionAndFilter(filters ...Filter) *ConjunctionAndFilter {
|
||||||
|
return &ConjunctionAndFilter{filters: filters}
|
||||||
|
}
|
||||||
151
internal/storagev2/filter/constant_filter.go
Normal file
151
internal/storagev2/filter/constant_filter.go
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package filter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||||
|
"github.com/bits-and-blooms/bitset"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ConstantFilter struct {
|
||||||
|
cmpType ComparisonType
|
||||||
|
value interface{}
|
||||||
|
columnName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConstantFilter) GetColumnName() string {
|
||||||
|
return f.columnName
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConstantFilter) CheckStatistics(stats metadata.TypedStatistics) bool {
|
||||||
|
// FIXME: value may be int8/uint8/...., we should encapsulate the value type, now we just do type assertion for prototype
|
||||||
|
switch stats.Type() {
|
||||||
|
case parquet.Types.Int32:
|
||||||
|
i32stats := stats.(*metadata.Int32Statistics)
|
||||||
|
if i32stats.HasMinMax() {
|
||||||
|
return checkStats(f.value.(int32), i32stats.Min(), i32stats.Max(), f.cmpType)
|
||||||
|
}
|
||||||
|
case parquet.Types.Int64:
|
||||||
|
i64stats := stats.(*metadata.Int64Statistics)
|
||||||
|
if i64stats.HasMinMax() {
|
||||||
|
return checkStats(f.value.(int64), i64stats.Min(), i64stats.Max(), f.cmpType)
|
||||||
|
}
|
||||||
|
case parquet.Types.Float:
|
||||||
|
floatstats := stats.(*metadata.Float32Statistics)
|
||||||
|
if floatstats.HasMinMax() {
|
||||||
|
return checkStats(f.value.(float32), floatstats.Min(), floatstats.Max(), f.cmpType)
|
||||||
|
}
|
||||||
|
case parquet.Types.Double:
|
||||||
|
doublestats := stats.(*metadata.Float64Statistics)
|
||||||
|
if doublestats.HasMinMax() {
|
||||||
|
return checkStats(f.value.(float64), doublestats.Min(), doublestats.Max(), f.cmpType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type comparableValue interface {
|
||||||
|
int32 | int64 | float32 | float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkStats[T comparableValue](value, min, max T, cmpType ComparisonType) bool {
|
||||||
|
switch cmpType {
|
||||||
|
case Equal:
|
||||||
|
return value < min || value > max
|
||||||
|
case NotEqual:
|
||||||
|
return value == min && value == max
|
||||||
|
case LessThan:
|
||||||
|
return value <= min
|
||||||
|
case LessThanOrEqual:
|
||||||
|
return value < min
|
||||||
|
case GreaterThan:
|
||||||
|
return value >= max
|
||||||
|
case GreaterThanOrEqual:
|
||||||
|
return value > max
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConstantFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) {
|
||||||
|
switch data := colData.(type) {
|
||||||
|
case *array.Int8:
|
||||||
|
filterColumn(f.value.(int8), data.Int8Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Uint8:
|
||||||
|
filterColumn(f.value.(uint8), data.Uint8Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Int16:
|
||||||
|
filterColumn(f.value.(int16), data.Int16Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Uint16:
|
||||||
|
filterColumn(f.value.(uint16), data.Uint16Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Int32:
|
||||||
|
filterColumn(f.value.(int32), data.Int32Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Uint32:
|
||||||
|
filterColumn(f.value.(uint32), data.Uint32Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Int64:
|
||||||
|
filterColumn(f.value.(int64), data.Int64Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Uint64:
|
||||||
|
filterColumn(f.value.(uint64), data.Uint64Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Float32:
|
||||||
|
filterColumn(f.value.(float32), data.Float32Values(), f.cmpType, filterBitSet)
|
||||||
|
case *array.Float64:
|
||||||
|
filterColumn(f.value.(float64), data.Float64Values(), f.cmpType, filterBitSet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type comparableColumnType interface {
|
||||||
|
int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64 | float32 | float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func filterColumn[T comparableColumnType](value T, targets []T, cmpType ComparisonType, filterBitSet *bitset.BitSet) {
|
||||||
|
for i, target := range targets {
|
||||||
|
if checkColumn(value, target, cmpType) {
|
||||||
|
filterBitSet.Set(uint(i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkColumn[T comparableColumnType](value, target T, cmpType ComparisonType) bool {
|
||||||
|
switch cmpType {
|
||||||
|
case Equal:
|
||||||
|
return value != target
|
||||||
|
case NotEqual:
|
||||||
|
return value == target
|
||||||
|
case LessThan:
|
||||||
|
return value <= target
|
||||||
|
case LessThanOrEqual:
|
||||||
|
return value < target
|
||||||
|
case GreaterThan:
|
||||||
|
return value >= target
|
||||||
|
case GreaterThanOrEqual:
|
||||||
|
return value > target
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *ConstantFilter) Type() FilterType {
|
||||||
|
return Constant
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewConstantFilter(cmpType ComparisonType, columnName string, value interface{}) *ConstantFilter {
|
||||||
|
return &ConstantFilter{
|
||||||
|
cmpType: cmpType,
|
||||||
|
columnName: columnName,
|
||||||
|
value: value,
|
||||||
|
}
|
||||||
|
}
|
||||||
48
internal/storagev2/filter/filter.go
Normal file
48
internal/storagev2/filter/filter.go
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package filter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||||
|
"github.com/bits-and-blooms/bitset"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FilterType int8
|
||||||
|
|
||||||
|
const (
|
||||||
|
And FilterType = iota
|
||||||
|
Or
|
||||||
|
Constant
|
||||||
|
Range
|
||||||
|
)
|
||||||
|
|
||||||
|
type Filter interface {
|
||||||
|
CheckStatistics(metadata.TypedStatistics) bool
|
||||||
|
Type() FilterType
|
||||||
|
Apply(colData arrow.Array, filterBitSet *bitset.BitSet)
|
||||||
|
GetColumnName() string
|
||||||
|
}
|
||||||
|
|
||||||
|
type ComparisonType int8
|
||||||
|
|
||||||
|
const (
|
||||||
|
Equal ComparisonType = iota
|
||||||
|
NotEqual
|
||||||
|
LessThan
|
||||||
|
LessThanOrEqual
|
||||||
|
GreaterThan
|
||||||
|
GreaterThanOrEqual
|
||||||
|
)
|
||||||
220
internal/storagev2/io/format/parquet/file_reader.go
Normal file
220
internal/storagev2/io/format/parquet/file_reader.go
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package parquet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/file"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/metadata"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||||
|
"github.com/bits-and-blooms/bitset"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FileReader struct {
|
||||||
|
reader *pqarrow.FileReader
|
||||||
|
options *options.ReadOptions
|
||||||
|
recReader pqarrow.RecordReader
|
||||||
|
}
|
||||||
|
|
||||||
|
// When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF)
|
||||||
|
func (r *FileReader) Read() (arrow.Record, error) {
|
||||||
|
if r.recReader == nil {
|
||||||
|
// lazy init
|
||||||
|
if err := r.initRecReader(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rec, err := r.recReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return applyFilters(rec, r.options.Filters), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyFilters(rec arrow.Record, filters map[string]filter.Filter) arrow.Record {
|
||||||
|
filterBitSet := bitset.New(uint(rec.NumRows()))
|
||||||
|
for col, f := range filters {
|
||||||
|
colIndices := rec.Schema().FieldIndices(col)
|
||||||
|
if len(colIndices) == 0 {
|
||||||
|
panic("column not found")
|
||||||
|
}
|
||||||
|
colIndex := colIndices[0]
|
||||||
|
arr := rec.Column(colIndex)
|
||||||
|
f.Apply(arr, filterBitSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
if filterBitSet.None() {
|
||||||
|
return rec
|
||||||
|
}
|
||||||
|
|
||||||
|
var cols []arrow.Array
|
||||||
|
for i := 0; i < int(rec.NumCols()); i++ {
|
||||||
|
col := rec.Column(i)
|
||||||
|
switch t := col.(type) {
|
||||||
|
case *array.Int8:
|
||||||
|
builder := array.NewInt8Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Int8Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
case *array.Uint8:
|
||||||
|
builder := array.NewUint8Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Uint8Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
case *array.Int16:
|
||||||
|
builder := array.NewInt16Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Int16Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
case *array.Uint16:
|
||||||
|
builder := array.NewUint16Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Uint16Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
case *array.Int32:
|
||||||
|
builder := array.NewInt32Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Int32Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
case *array.Uint32:
|
||||||
|
builder := array.NewUint32Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Uint32Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
case *array.Int64:
|
||||||
|
builder := array.NewInt64Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Int64Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
case *array.Uint64:
|
||||||
|
builder := array.NewUint64Builder(memory.DefaultAllocator)
|
||||||
|
filtered := filterRecord(t.Uint64Values(), filterBitSet)
|
||||||
|
builder.AppendValues(filtered, nil)
|
||||||
|
cols = append(cols, builder.NewArray())
|
||||||
|
default:
|
||||||
|
panic("unsupported type")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return array.NewRecord(rec.Schema(), cols, int64(cols[0].Len()))
|
||||||
|
}
|
||||||
|
|
||||||
|
type comparableColumnType interface {
|
||||||
|
int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64 | float32 | float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func filterRecord[T comparableColumnType](targets []T, filterBitSet *bitset.BitSet) []T {
|
||||||
|
var res []T
|
||||||
|
for i := 0; i < int(filterBitSet.Len()); i++ {
|
||||||
|
if !filterBitSet.Test(uint(i)) {
|
||||||
|
res = append(res, targets[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FileReader) initRecReader() error {
|
||||||
|
var (
|
||||||
|
filters map[string]filter.Filter = r.options.Filters
|
||||||
|
columns []string = r.options.Columns
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
rowGroupNum int = r.reader.ParquetReader().NumRowGroups()
|
||||||
|
fileMetaData *metadata.FileMetaData = r.reader.ParquetReader().MetaData()
|
||||||
|
)
|
||||||
|
|
||||||
|
var rowGroups []int
|
||||||
|
var colIndices []int
|
||||||
|
// filters check column statistics
|
||||||
|
x1:
|
||||||
|
for i := 0; i < rowGroupNum; i++ {
|
||||||
|
rowGroupMetaData := fileMetaData.RowGroup(i)
|
||||||
|
for col, filter := range filters {
|
||||||
|
if checkColumnStats(rowGroupMetaData, col, filter) {
|
||||||
|
// ignore the row group
|
||||||
|
break x1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rowGroups = append(rowGroups, i)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, col := range columns {
|
||||||
|
colIndex := fileMetaData.Schema.Root().FieldIndexByName(col)
|
||||||
|
if colIndex == -1 {
|
||||||
|
panic("column not found")
|
||||||
|
}
|
||||||
|
colIndices = append(colIndices, colIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
recReader, err := r.reader.GetRecordReader(context.TODO(), colIndices, rowGroups)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
r.recReader = recReader
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkColumnStats(rowGroupMetaData *metadata.RowGroupMetaData, col string, f filter.Filter) bool {
|
||||||
|
colIndex := rowGroupMetaData.Schema.Root().FieldIndexByName(col)
|
||||||
|
if colIndex == -1 {
|
||||||
|
panic("column not found")
|
||||||
|
}
|
||||||
|
colMetaData, err := rowGroupMetaData.ColumnChunk(colIndex)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
stats, err := colMetaData.Statistics()
|
||||||
|
if err != nil || stats == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return f.CheckStatistics(stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FileReader) Close() error {
|
||||||
|
if r.recReader != nil {
|
||||||
|
r.recReader.Release()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFileReader(fs fs.Fs, filePath string, options *options.ReadOptions) (*FileReader, error) {
|
||||||
|
f, err := fs.OpenFile(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
parquetReader, err := file.NewParquetReader(f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
reader, err := pqarrow.NewFileReader(parquetReader, pqarrow.ArrowReadProperties{BatchSize: constant.ReadBatchSize}, memory.DefaultAllocator)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &FileReader{reader: reader, options: options}, nil
|
||||||
|
}
|
||||||
61
internal/storagev2/io/format/parquet/file_writer.go
Normal file
61
internal/storagev2/io/format/parquet/file_writer.go
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package parquet
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/format"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ format.Writer = (*FileWriter)(nil)
|
||||||
|
|
||||||
|
type FileWriter struct {
|
||||||
|
writer *pqarrow.FileWriter
|
||||||
|
count int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FileWriter) Write(record arrow.Record) error {
|
||||||
|
if err := f.writer.Write(record); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
f.count += record.NumRows()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FileWriter) Count() int64 {
|
||||||
|
return f.count
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FileWriter) Close() error {
|
||||||
|
return f.writer.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFileWriter(schema *arrow.Schema, fs fs.Fs, filePath string) (*FileWriter, error) {
|
||||||
|
file, err := fs.OpenFile(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
w, err := pqarrow.NewFileWriter(schema, file, parquet.NewWriterProperties(), pqarrow.DefaultWriterProps())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &FileWriter{writer: w}, nil
|
||||||
|
}
|
||||||
24
internal/storagev2/io/format/reader.go
Normal file
24
internal/storagev2/io/format/reader.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package format
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Reader interface {
|
||||||
|
Read() (arrow.Record, error)
|
||||||
|
Close() error
|
||||||
|
}
|
||||||
23
internal/storagev2/io/format/writer.go
Normal file
23
internal/storagev2/io/format/writer.go
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package format
|
||||||
|
|
||||||
|
import "github.com/apache/arrow/go/v12/arrow"
|
||||||
|
|
||||||
|
type Writer interface {
|
||||||
|
Write(record arrow.Record) error
|
||||||
|
Count() int64
|
||||||
|
Close() error
|
||||||
|
}
|
||||||
40
internal/storagev2/io/fs/factory.go
Normal file
40
internal/storagev2/io/fs/factory.go
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/url"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Factory struct{}
|
||||||
|
|
||||||
|
func (f *Factory) Create(fsType options.FsType, uri *url.URL) (Fs, error) {
|
||||||
|
switch fsType {
|
||||||
|
case options.InMemory:
|
||||||
|
return NewMemoryFs(), nil
|
||||||
|
case options.LocalFS:
|
||||||
|
return NewLocalFs(uri), nil
|
||||||
|
case options.S3:
|
||||||
|
return NewMinioFs(uri)
|
||||||
|
default:
|
||||||
|
panic("unknown fs type")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFsFactory() *Factory {
|
||||||
|
return &Factory{}
|
||||||
|
}
|
||||||
25
internal/storagev2/io/fs/file/file.go
Normal file
25
internal/storagev2/io/fs/file/file.go
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package file
|
||||||
|
|
||||||
|
import "io"
|
||||||
|
|
||||||
|
type File interface {
|
||||||
|
io.Writer
|
||||||
|
io.ReaderAt
|
||||||
|
io.Seeker
|
||||||
|
io.Reader
|
||||||
|
io.Closer
|
||||||
|
}
|
||||||
52
internal/storagev2/io/fs/file/local_file.go
Normal file
52
internal/storagev2/io/fs/file/local_file.go
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package file
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
var EOF = io.EOF
|
||||||
|
|
||||||
|
type LocalFile struct {
|
||||||
|
file os.File
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFile) Read(p []byte) (n int, err error) {
|
||||||
|
return l.file.Read(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFile) Write(p []byte) (n int, err error) {
|
||||||
|
return l.file.Write(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFile) ReadAt(p []byte, off int64) (n int, err error) {
|
||||||
|
return l.file.ReadAt(p, off)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFile) Seek(offset int64, whence int) (int64, error) {
|
||||||
|
return l.file.Seek(offset, whence)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFile) Close() error {
|
||||||
|
return l.file.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLocalFile(f *os.File) *LocalFile {
|
||||||
|
return &LocalFile{
|
||||||
|
file: *f,
|
||||||
|
}
|
||||||
|
}
|
||||||
116
internal/storagev2/io/fs/file/memory_file.go
Normal file
116
internal/storagev2/io/fs/file/memory_file.go
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package file
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
var errInvalid = errors.New("invalid argument")
|
||||||
|
|
||||||
|
type MemoryFile struct {
|
||||||
|
b []byte
|
||||||
|
i int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) Close() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) Read(p []byte) (n int, err error) {
|
||||||
|
if f.i >= len(f.b) {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
n = copy(p, f.b[f.i:])
|
||||||
|
f.i += n
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) Write(b []byte) (int, error) {
|
||||||
|
n, err := f.writeAt(b, int64(f.i))
|
||||||
|
f.i += n
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) writeAt(b []byte, off int64) (int, error) {
|
||||||
|
if off < 0 || int64(int(off)) < off {
|
||||||
|
return 0, errInvalid
|
||||||
|
}
|
||||||
|
if off > int64(len(f.b)) {
|
||||||
|
f.truncate(off)
|
||||||
|
}
|
||||||
|
n := copy(f.b[off:], b)
|
||||||
|
f.b = append(f.b, b[n:]...)
|
||||||
|
return len(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) truncate(n int64) error {
|
||||||
|
switch {
|
||||||
|
case n < 0 || int64(int(n)) < n:
|
||||||
|
return errInvalid
|
||||||
|
case n <= int64(len(f.b)):
|
||||||
|
f.b = f.b[:n]
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
f.b = append(f.b, make([]byte, int(n)-len(f.b))...)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) ReadAt(b []byte, off int64) (n int, err error) {
|
||||||
|
if off < 0 || int64(int(off)) < off {
|
||||||
|
return 0, errInvalid
|
||||||
|
}
|
||||||
|
if off > int64(len(f.b)) {
|
||||||
|
return 0, io.EOF
|
||||||
|
}
|
||||||
|
n = copy(b, f.b[off:])
|
||||||
|
f.i += n
|
||||||
|
if n < len(b) {
|
||||||
|
return n, io.EOF
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) Seek(offset int64, whence int) (int64, error) {
|
||||||
|
var abs int64
|
||||||
|
switch whence {
|
||||||
|
case io.SeekStart:
|
||||||
|
abs = offset
|
||||||
|
case io.SeekCurrent:
|
||||||
|
abs = int64(f.i) + offset
|
||||||
|
case io.SeekEnd:
|
||||||
|
abs = int64(len(f.b)) + offset
|
||||||
|
default:
|
||||||
|
return 0, errInvalid
|
||||||
|
}
|
||||||
|
if abs < 0 {
|
||||||
|
return 0, errInvalid
|
||||||
|
}
|
||||||
|
f.i = int(abs)
|
||||||
|
return abs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MemoryFile) Bytes() []byte {
|
||||||
|
return f.b
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMemoryFile(b []byte) *MemoryFile {
|
||||||
|
return &MemoryFile{
|
||||||
|
b: b,
|
||||||
|
}
|
||||||
|
}
|
||||||
73
internal/storagev2/io/fs/file/minio_file.go
Normal file
73
internal/storagev2/io/fs/file/minio_file.go
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package file
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/minio/minio-go/v7"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ File = (*MinioFile)(nil)
|
||||||
|
|
||||||
|
type MinioFile struct {
|
||||||
|
*minio.Object
|
||||||
|
writer *MemoryFile
|
||||||
|
client *minio.Client
|
||||||
|
fileName string
|
||||||
|
bucketName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MinioFile) Write(b []byte) (int, error) {
|
||||||
|
return f.writer.Write(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *MinioFile) Close() error {
|
||||||
|
if len(f.writer.b) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
_, err := f.client.PutObject(context.TODO(), f.bucketName, f.fileName, bytes.NewReader(f.writer.b), int64(len(f.writer.b)), minio.PutObjectOptions{})
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMinioFile(client *minio.Client, fileName string, bucketName string) (*MinioFile, error) {
|
||||||
|
_, err := client.StatObject(context.TODO(), bucketName, fileName, minio.StatObjectOptions{})
|
||||||
|
if err != nil {
|
||||||
|
eresp := minio.ToErrorResponse(err)
|
||||||
|
if eresp.Code != "NoSuchKey" {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &MinioFile{
|
||||||
|
writer: NewMemoryFile(nil),
|
||||||
|
client: client,
|
||||||
|
fileName: fileName,
|
||||||
|
bucketName: bucketName,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
object, err := client.GetObject(context.TODO(), bucketName, fileName, minio.GetObjectOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &MinioFile{
|
||||||
|
Object: object,
|
||||||
|
writer: NewMemoryFile(nil),
|
||||||
|
client: client,
|
||||||
|
fileName: fileName,
|
||||||
|
bucketName: bucketName,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
34
internal/storagev2/io/fs/fs.go
Normal file
34
internal/storagev2/io/fs/fs.go
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Fs interface {
|
||||||
|
OpenFile(path string) (file.File, error)
|
||||||
|
Rename(src string, dst string) error
|
||||||
|
DeleteFile(path string) error
|
||||||
|
CreateDir(path string) error
|
||||||
|
List(path string) ([]FileEntry, error)
|
||||||
|
ReadFile(path string) ([]byte, error)
|
||||||
|
Exist(path string) (bool, error)
|
||||||
|
Path() string
|
||||||
|
MkdirAll(dir string, i int) error
|
||||||
|
}
|
||||||
|
type FileEntry struct {
|
||||||
|
Path string
|
||||||
|
}
|
||||||
42
internal/storagev2/io/fs/fs_util.go
Normal file
42
internal/storagev2/io/fs/fs_util.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrInvalidFsType = errors.New("invalid fs type")
|
||||||
|
|
||||||
|
func BuildFileSystem(uri string) (Fs, error) {
|
||||||
|
parsedURI, err := url.Parse(uri)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("build file system with uri %s: %w", uri, err)
|
||||||
|
}
|
||||||
|
switch parsedURI.Scheme {
|
||||||
|
case "file":
|
||||||
|
return NewFsFactory().Create(options.LocalFS, parsedURI)
|
||||||
|
case "s3":
|
||||||
|
return NewFsFactory().Create(options.S3, parsedURI)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("build file system with uri %s: %w", uri, ErrInvalidFsType)
|
||||||
|
}
|
||||||
|
}
|
||||||
95
internal/storagev2/io/fs/local_fs.go
Normal file
95
internal/storagev2/io/fs/local_fs.go
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LocalFS struct {
|
||||||
|
path string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) MkdirAll(dir string, i int) error {
|
||||||
|
return os.MkdirAll(dir, os.FileMode(i))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) OpenFile(path string) (file.File, error) {
|
||||||
|
// Extract the directory from the path
|
||||||
|
dir := filepath.Dir(path)
|
||||||
|
// Create the directory (including all necessary parent directories)
|
||||||
|
err := os.MkdirAll(dir, os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
open, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0o666)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return file.NewLocalFile(open), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rename renames (moves) a file. If newpath already exists and is not a directory, Rename replaces it.
|
||||||
|
func (l *LocalFS) Rename(src string, dst string) error {
|
||||||
|
return os.Rename(src, dst)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) DeleteFile(path string) error {
|
||||||
|
return os.Remove(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) CreateDir(path string) error {
|
||||||
|
err := os.MkdirAll(path, os.ModePerm)
|
||||||
|
if err != nil && !os.IsExist(err) {
|
||||||
|
log.Error(err.Error())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) List(path string) ([]FileEntry, error) {
|
||||||
|
entries, err := os.ReadDir(path)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err.Error())
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ret := make([]FileEntry, 0, len(entries))
|
||||||
|
for _, entry := range entries {
|
||||||
|
ret = append(ret, FileEntry{Path: filepath.Join(path, entry.Name())})
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) ReadFile(path string) ([]byte, error) {
|
||||||
|
return os.ReadFile(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) Exist(path string) (bool, error) {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *LocalFS) Path() string {
|
||||||
|
return l.path
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLocalFs(uri *url.URL) *LocalFS {
|
||||||
|
return &LocalFS{uri.Path}
|
||||||
|
}
|
||||||
78
internal/storagev2/io/fs/memory_fs.go
Normal file
78
internal/storagev2/io/fs/memory_fs.go
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MemoryFs struct {
|
||||||
|
files map[string]*file.MemoryFile
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) MkdirAll(dir string, i int) error {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) List(path string) ([]FileEntry, error) {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) OpenFile(path string) (file.File, error) {
|
||||||
|
if f, ok := m.files[path]; ok {
|
||||||
|
return file.NewMemoryFile(f.Bytes()), nil
|
||||||
|
}
|
||||||
|
f := file.NewMemoryFile(nil)
|
||||||
|
m.files[path] = f
|
||||||
|
return f, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) Rename(path string, path2 string) error {
|
||||||
|
if _, ok := m.files[path]; !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
m.files[path2] = m.files[path]
|
||||||
|
delete(m.files, path)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) DeleteFile(path string) error {
|
||||||
|
delete(m.files, path)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) CreateDir(path string) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) ReadFile(path string) ([]byte, error) {
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) Exist(path string) (bool, error) {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryFs) Path() string {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMemoryFs() *MemoryFs {
|
||||||
|
return &MemoryFs{
|
||||||
|
files: make(map[string]*file.MemoryFile),
|
||||||
|
}
|
||||||
|
}
|
||||||
201
internal/storagev2/io/fs/minio_fs.go
Normal file
201
internal/storagev2/io/fs/minio_fs.go
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/minio/minio-go/v7"
|
||||||
|
"github.com/minio/minio-go/v7/pkg/credentials"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MinioFs struct {
|
||||||
|
client *minio.Client
|
||||||
|
bucketName string
|
||||||
|
path string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) MkdirAll(dir string, i int) error {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) OpenFile(path string) (file.File, error) {
|
||||||
|
err, bucket, path := getRealPath(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return file.NewMinioFile(fs.client, path, bucket)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) Rename(src string, dst string) error {
|
||||||
|
err, dstBucket, dst := getRealPath(dst)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err, srcBucket, src := getRealPath(src)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = fs.client.CopyObject(context.TODO(), minio.CopyDestOptions{Bucket: dstBucket, Object: dst}, minio.CopySrcOptions{Bucket: srcBucket, Object: src})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = fs.client.RemoveObject(context.TODO(), srcBucket, src, minio.RemoveObjectOptions{})
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("failed to remove source object", log.String("source", src))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) DeleteFile(path string) error {
|
||||||
|
err, bucket, path := getRealPath(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return fs.client.RemoveObject(context.TODO(), bucket, path, minio.RemoveObjectOptions{})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) CreateDir(path string) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) List(prefix string) ([]FileEntry, error) {
|
||||||
|
err, bucket, prefix := getRealPath(prefix)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
ret := make([]FileEntry, 0)
|
||||||
|
for objInfo := range fs.client.ListObjects(context.TODO(), bucket, minio.ListObjectsOptions{Prefix: prefix, Recursive: true}) {
|
||||||
|
if objInfo.Err != nil {
|
||||||
|
log.Warn("list object error", zap.Error(objInfo.Err))
|
||||||
|
return nil, objInfo.Err
|
||||||
|
}
|
||||||
|
ret = append(ret, FileEntry{Path: path.Join(bucket, objInfo.Key)})
|
||||||
|
}
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) ReadFile(path string) ([]byte, error) {
|
||||||
|
err, bucket, path := getRealPath(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
obj, err := fs.client.GetObject(context.TODO(), bucket, path, minio.GetObjectOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
stat, err := obj.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := make([]byte, stat.Size)
|
||||||
|
n, err := obj.Read(buf)
|
||||||
|
if err != nil && err != io.EOF {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if n != int(stat.Size) {
|
||||||
|
return nil, fmt.Errorf("failed to read full file, expect: %d, actual: %d", stat.Size, n)
|
||||||
|
}
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) Exist(path string) (bool, error) {
|
||||||
|
err, bucket, path := getRealPath(path)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
_, err = fs.client.StatObject(context.TODO(), bucket, path, minio.StatObjectOptions{})
|
||||||
|
if err != nil {
|
||||||
|
resp := minio.ToErrorResponse(err)
|
||||||
|
if resp.Code == "NoSuchKey" {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *MinioFs) Path() string {
|
||||||
|
return path.Join(fs.bucketName, strings.TrimPrefix(fs.path, "/"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// uri should be s3://username:password@bucket/path?endpoint_override=localhost%3A9000
|
||||||
|
func NewMinioFs(uri *url.URL) (*MinioFs, error) {
|
||||||
|
accessKey := uri.User.Username()
|
||||||
|
secretAccessKey, set := uri.User.Password()
|
||||||
|
if !set {
|
||||||
|
log.Warn("secret access key not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoints, ok := uri.Query()[constant.EndpointOverride]
|
||||||
|
if !ok || len(endpoints) == 0 {
|
||||||
|
return nil, errors.ErrNoEndpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
cli, err := minio.New(endpoints[0], &minio.Options{
|
||||||
|
BucketLookup: minio.BucketLookupAuto,
|
||||||
|
Creds: credentials.NewStaticV4(accessKey, secretAccessKey, ""),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
bucket := uri.Host
|
||||||
|
path := uri.Path
|
||||||
|
|
||||||
|
log.Info("minio fs infos", zap.String("endpoint", endpoints[0]), zap.String("bucket", bucket), zap.String("path", path))
|
||||||
|
|
||||||
|
exist, err := cli.BucketExists(context.TODO(), bucket)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !exist {
|
||||||
|
if err = cli.MakeBucket(context.TODO(), bucket, minio.MakeBucketOptions{}); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &MinioFs{
|
||||||
|
client: cli,
|
||||||
|
bucketName: bucket,
|
||||||
|
path: path,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getRealPath(path string) (error, string, string) {
|
||||||
|
if strings.HasPrefix(path, "/") {
|
||||||
|
return fmt.Errorf("Invalid path, %s should not start with '/'", path), "", ""
|
||||||
|
}
|
||||||
|
words := strings.SplitN(path, "/", 2)
|
||||||
|
if (len(words)) != 2 {
|
||||||
|
return fmt.Errorf("Invalid path, %s should contains at least one '/'", path), "", ""
|
||||||
|
}
|
||||||
|
return nil, words[0], words[1]
|
||||||
|
}
|
||||||
95
internal/storagev2/packed/arrow/c/abi.h
Normal file
95
internal/storagev2/packed/arrow/c/abi.h
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ARROW_C_DATA_INTERFACE
|
||||||
|
#define ARROW_C_DATA_INTERFACE
|
||||||
|
|
||||||
|
#define ARROW_FLAG_DICTIONARY_ORDERED 1
|
||||||
|
#define ARROW_FLAG_NULLABLE 2
|
||||||
|
#define ARROW_FLAG_MAP_KEYS_SORTED 4
|
||||||
|
|
||||||
|
struct ArrowSchema {
|
||||||
|
// Array type description
|
||||||
|
const char* format;
|
||||||
|
const char* name;
|
||||||
|
const char* metadata;
|
||||||
|
int64_t flags;
|
||||||
|
int64_t n_children;
|
||||||
|
struct ArrowSchema** children;
|
||||||
|
struct ArrowSchema* dictionary;
|
||||||
|
|
||||||
|
// Release callback
|
||||||
|
void (*release)(struct ArrowSchema*);
|
||||||
|
// Opaque producer-specific data
|
||||||
|
void* private_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ArrowArray {
|
||||||
|
// Array data description
|
||||||
|
int64_t length;
|
||||||
|
int64_t null_count;
|
||||||
|
int64_t offset;
|
||||||
|
int64_t n_buffers;
|
||||||
|
int64_t n_children;
|
||||||
|
const void** buffers;
|
||||||
|
struct ArrowArray** children;
|
||||||
|
struct ArrowArray* dictionary;
|
||||||
|
|
||||||
|
// Release callback
|
||||||
|
void (*release)(struct ArrowArray*);
|
||||||
|
// Opaque producer-specific data
|
||||||
|
void* private_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // ARROW_C_DATA_INTERFACE
|
||||||
|
|
||||||
|
#ifndef ARROW_C_STREAM_INTERFACE
|
||||||
|
#define ARROW_C_STREAM_INTERFACE
|
||||||
|
|
||||||
|
struct ArrowArrayStream {
|
||||||
|
// Callback to get the stream type
|
||||||
|
// (will be the same for all arrays in the stream).
|
||||||
|
//
|
||||||
|
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
|
||||||
|
//
|
||||||
|
// If successful, the ArrowSchema must be released independently from the stream.
|
||||||
|
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
|
||||||
|
|
||||||
|
// Callback to get the next array
|
||||||
|
// (if no error and the array is released, the stream has ended)
|
||||||
|
//
|
||||||
|
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
|
||||||
|
//
|
||||||
|
// If successful, the ArrowArray must be released independently from the stream.
|
||||||
|
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
|
||||||
|
|
||||||
|
// Callback to get optional detailed error information.
|
||||||
|
// This must only be called if the last stream operation failed
|
||||||
|
// with a non-0 return code.
|
||||||
|
//
|
||||||
|
// Return value: pointer to a null-terminated character array describing
|
||||||
|
// the last error, or NULL if no description is available.
|
||||||
|
//
|
||||||
|
// The returned pointer is only valid until the next operation on this stream
|
||||||
|
// (including release).
|
||||||
|
const char* (*get_last_error)(struct ArrowArrayStream*);
|
||||||
|
|
||||||
|
// Release callback: release the stream's own resources.
|
||||||
|
// Note that arrays returned by `get_next` must be individually released.
|
||||||
|
void (*release)(struct ArrowArrayStream*);
|
||||||
|
|
||||||
|
// Opaque producer-specific data
|
||||||
|
void* private_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // ARROW_C_STREAM_INTERFACE
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
115
internal/storagev2/packed/arrow/c/helpers.h
Normal file
115
internal/storagev2/packed/arrow/c/helpers.h
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "arrow/c/abi.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/// Query whether the C schema is released
|
||||||
|
static inline int
|
||||||
|
ArrowSchemaIsReleased(const struct ArrowSchema* schema) {
|
||||||
|
return schema->release == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mark the C schema released (for use in release callbacks)
|
||||||
|
static inline void
|
||||||
|
ArrowSchemaMarkReleased(struct ArrowSchema* schema) {
|
||||||
|
schema->release = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Move the C schema from `src` to `dest`
|
||||||
|
///
|
||||||
|
/// Note `dest` must *not* point to a valid schema already, otherwise there
|
||||||
|
/// will be a memory leak.
|
||||||
|
static inline void
|
||||||
|
ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) {
|
||||||
|
assert(dest != src);
|
||||||
|
assert(!ArrowSchemaIsReleased(src));
|
||||||
|
memcpy(dest, src, sizeof(struct ArrowSchema));
|
||||||
|
ArrowSchemaMarkReleased(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Release the C schema, if necessary, by calling its release callback
|
||||||
|
static inline void
|
||||||
|
ArrowSchemaRelease(struct ArrowSchema* schema) {
|
||||||
|
if (!ArrowSchemaIsReleased(schema)) {
|
||||||
|
schema->release(schema);
|
||||||
|
assert(ArrowSchemaIsReleased(schema));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Query whether the C array is released
|
||||||
|
static inline int
|
||||||
|
ArrowArrayIsReleased(const struct ArrowArray* array) {
|
||||||
|
return array->release == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mark the C array released (for use in release callbacks)
|
||||||
|
static inline void
|
||||||
|
ArrowArrayMarkReleased(struct ArrowArray* array) {
|
||||||
|
array->release = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Move the C array from `src` to `dest`
|
||||||
|
///
|
||||||
|
/// Note `dest` must *not* point to a valid array already, otherwise there
|
||||||
|
/// will be a memory leak.
|
||||||
|
static inline void
|
||||||
|
ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) {
|
||||||
|
assert(dest != src);
|
||||||
|
assert(!ArrowArrayIsReleased(src));
|
||||||
|
memcpy(dest, src, sizeof(struct ArrowArray));
|
||||||
|
ArrowArrayMarkReleased(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Release the C array, if necessary, by calling its release callback
|
||||||
|
static inline void
|
||||||
|
ArrowArrayRelease(struct ArrowArray* array) {
|
||||||
|
if (!ArrowArrayIsReleased(array)) {
|
||||||
|
array->release(array);
|
||||||
|
assert(ArrowArrayIsReleased(array));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Query whether the C array stream is released
|
||||||
|
static inline int
|
||||||
|
ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) {
|
||||||
|
return stream->release == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mark the C array stream released (for use in release callbacks)
|
||||||
|
static inline void
|
||||||
|
ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) {
|
||||||
|
stream->release = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Move the C array stream from `src` to `dest`
|
||||||
|
///
|
||||||
|
/// Note `dest` must *not* point to a valid stream already, otherwise there
|
||||||
|
/// will be a memory leak.
|
||||||
|
static inline void
|
||||||
|
ArrowArrayStreamMove(struct ArrowArrayStream* src,
|
||||||
|
struct ArrowArrayStream* dest) {
|
||||||
|
assert(dest != src);
|
||||||
|
assert(!ArrowArrayStreamIsReleased(src));
|
||||||
|
memcpy(dest, src, sizeof(struct ArrowArrayStream));
|
||||||
|
ArrowArrayStreamMarkReleased(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Release the C array stream, if necessary, by calling its release callback
|
||||||
|
static inline void
|
||||||
|
ArrowArrayStreamRelease(struct ArrowArrayStream* stream) {
|
||||||
|
if (!ArrowArrayStreamIsReleased(stream)) {
|
||||||
|
stream->release(stream);
|
||||||
|
assert(ArrowArrayStreamIsReleased(stream));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
84
internal/storagev2/packed/packed_reader.go
Normal file
84
internal/storagev2/packed/packed_reader.go
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package packed
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo pkg-config: milvus_core
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "segcore/packed_reader_c.h"
|
||||||
|
#include "arrow/c/abi.h"
|
||||||
|
#include "arrow/c/helpers.h"
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/cdata"
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewPackedReader(path string, schema *arrow.Schema, bufferSize int) (*PackedReader, error) {
|
||||||
|
var cas cdata.CArrowSchema
|
||||||
|
cdata.ExportArrowSchema(schema, &cas)
|
||||||
|
cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas))
|
||||||
|
|
||||||
|
cPath := C.CString(path)
|
||||||
|
defer C.free(unsafe.Pointer(cPath))
|
||||||
|
|
||||||
|
cBufferSize := C.int64_t(bufferSize)
|
||||||
|
|
||||||
|
var cPackedReader C.CPackedReader
|
||||||
|
status := C.NewPackedReader(cPath, cSchema, cBufferSize, &cPackedReader)
|
||||||
|
if status != 0 {
|
||||||
|
return nil, fmt.Errorf("failed to new packed reader: %s, status: %d", path, status)
|
||||||
|
}
|
||||||
|
return &PackedReader{cPackedReader: cPackedReader, schema: schema}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pr *PackedReader) ReadNext() (arrow.Record, error) {
|
||||||
|
var cArr C.CArrowArray
|
||||||
|
var cSchema C.CArrowSchema
|
||||||
|
status := C.ReadNext(pr.cPackedReader, &cArr, &cSchema)
|
||||||
|
if status != 0 {
|
||||||
|
return nil, fmt.Errorf("ReadNext failed with error code %d", status)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cArr == nil {
|
||||||
|
return nil, nil // end of stream, no more records to read
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert ArrowArray to Go RecordBatch using cdata
|
||||||
|
goCArr := (*cdata.CArrowArray)(unsafe.Pointer(cArr))
|
||||||
|
goCSchema := (*cdata.CArrowSchema)(unsafe.Pointer(cSchema))
|
||||||
|
recordBatch, err := cdata.ImportCRecordBatch(goCArr, goCSchema)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to convert ArrowArray to Record: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the RecordBatch as an arrow.Record
|
||||||
|
return recordBatch, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pr *PackedReader) Close() error {
|
||||||
|
status := C.CloseReader(pr.cPackedReader)
|
||||||
|
if status != 0 {
|
||||||
|
return errors.New("PackedReader: failed to close file")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
156
internal/storagev2/packed/packed_test.go
Normal file
156
internal/storagev2/packed/packed_test.go
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package packed
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||||
|
"github.com/stretchr/testify/suite"
|
||||||
|
"golang.org/x/exp/rand"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPackedReadAndWrite(t *testing.T) {
|
||||||
|
suite.Run(t, new(PackedTestSuite))
|
||||||
|
}
|
||||||
|
|
||||||
|
type PackedTestSuite struct {
|
||||||
|
suite.Suite
|
||||||
|
schema *arrow.Schema
|
||||||
|
rec arrow.Record
|
||||||
|
}
|
||||||
|
|
||||||
|
func (suite *PackedTestSuite) SetupTest() {
|
||||||
|
schema := arrow.NewSchema([]arrow.Field{
|
||||||
|
{Name: "a", Type: arrow.PrimitiveTypes.Int32},
|
||||||
|
{Name: "b", Type: arrow.PrimitiveTypes.Int64},
|
||||||
|
{Name: "c", Type: arrow.BinaryTypes.String},
|
||||||
|
}, nil)
|
||||||
|
suite.schema = schema
|
||||||
|
|
||||||
|
b := array.NewRecordBuilder(memory.DefaultAllocator, schema)
|
||||||
|
defer b.Release()
|
||||||
|
for idx := range schema.Fields() {
|
||||||
|
switch idx {
|
||||||
|
case 0:
|
||||||
|
b.Field(idx).(*array.Int32Builder).AppendValues(
|
||||||
|
[]int32{int32(1), int32(2), int32(3)}, nil,
|
||||||
|
)
|
||||||
|
case 1:
|
||||||
|
b.Field(idx).(*array.Int64Builder).AppendValues(
|
||||||
|
[]int64{int64(4), int64(5), int64(6)}, nil,
|
||||||
|
)
|
||||||
|
case 2:
|
||||||
|
b.Field(idx).(*array.StringBuilder).AppendValues(
|
||||||
|
[]string{"a", "b", "c"}, nil,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rec := b.NewRecord()
|
||||||
|
suite.rec = rec
|
||||||
|
}
|
||||||
|
|
||||||
|
func (suite *PackedTestSuite) TestPackedOneFile() {
|
||||||
|
batches := 100
|
||||||
|
|
||||||
|
path := "/tmp"
|
||||||
|
bufferSize := 10 * 1024 * 1024 // 10MB
|
||||||
|
pw, err := NewPackedWriter(path, suite.schema, bufferSize)
|
||||||
|
suite.NoError(err)
|
||||||
|
for i := 0; i < batches; i++ {
|
||||||
|
err = pw.WriteRecordBatch(suite.rec)
|
||||||
|
suite.NoError(err)
|
||||||
|
}
|
||||||
|
err = pw.Close()
|
||||||
|
suite.NoError(err)
|
||||||
|
|
||||||
|
reader, err := NewPackedReader(path, suite.schema, bufferSize)
|
||||||
|
suite.NoError(err)
|
||||||
|
rr, err := reader.ReadNext()
|
||||||
|
suite.NoError(err)
|
||||||
|
defer rr.Release()
|
||||||
|
suite.Equal(int64(3*batches), rr.NumRows())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (suite *PackedTestSuite) TestPackedMultiFiles() {
|
||||||
|
batches := 1000
|
||||||
|
|
||||||
|
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.schema)
|
||||||
|
strLen := 1000
|
||||||
|
arrLen := 30
|
||||||
|
defer b.Release()
|
||||||
|
for idx := range suite.schema.Fields() {
|
||||||
|
switch idx {
|
||||||
|
case 0:
|
||||||
|
values := make([]int32, arrLen)
|
||||||
|
for i := 0; i < arrLen; i++ {
|
||||||
|
values[i] = int32(i + 1)
|
||||||
|
}
|
||||||
|
b.Field(idx).(*array.Int32Builder).AppendValues(values, nil)
|
||||||
|
case 1:
|
||||||
|
values := make([]int64, arrLen)
|
||||||
|
for i := 0; i < arrLen; i++ {
|
||||||
|
values[i] = int64(i + 1)
|
||||||
|
}
|
||||||
|
b.Field(idx).(*array.Int64Builder).AppendValues(values, nil)
|
||||||
|
case 2:
|
||||||
|
values := make([]string, arrLen)
|
||||||
|
for i := 0; i < arrLen; i++ {
|
||||||
|
values[i] = randomString(strLen)
|
||||||
|
}
|
||||||
|
b.Field(idx).(*array.StringBuilder).AppendValues(values, nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rec := b.NewRecord()
|
||||||
|
defer rec.Release()
|
||||||
|
path := "/tmp"
|
||||||
|
bufferSize := 10 * 1024 * 1024 // 10MB
|
||||||
|
pw, err := NewPackedWriter(path, suite.schema, bufferSize)
|
||||||
|
suite.NoError(err)
|
||||||
|
for i := 0; i < batches; i++ {
|
||||||
|
err = pw.WriteRecordBatch(rec)
|
||||||
|
suite.NoError(err)
|
||||||
|
}
|
||||||
|
err = pw.Close()
|
||||||
|
suite.NoError(err)
|
||||||
|
|
||||||
|
reader, err := NewPackedReader(path, suite.schema, bufferSize)
|
||||||
|
suite.NoError(err)
|
||||||
|
var rows int64 = 0
|
||||||
|
var rr arrow.Record
|
||||||
|
for {
|
||||||
|
rr, err = reader.ReadNext()
|
||||||
|
suite.NoError(err)
|
||||||
|
if rr == nil {
|
||||||
|
// end of file
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
rows += rr.NumRows()
|
||||||
|
}
|
||||||
|
|
||||||
|
suite.Equal(int64(arrLen*batches), rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
func randomString(length int) string {
|
||||||
|
const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||||
|
result := make([]byte, length)
|
||||||
|
for i := range result {
|
||||||
|
result[i] = charset[rand.Intn(len(charset))]
|
||||||
|
}
|
||||||
|
return string(result)
|
||||||
|
}
|
||||||
77
internal/storagev2/packed/packed_writer.go
Normal file
77
internal/storagev2/packed/packed_writer.go
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package packed
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo pkg-config: milvus_core
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "segcore/packed_writer_c.h"
|
||||||
|
#include "arrow/c/abi.h"
|
||||||
|
#include "arrow/c/helpers.h"
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/cdata"
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
func NewPackedWriter(path string, schema *arrow.Schema, bufferSize int) (*PackedWriter, error) {
|
||||||
|
var cas cdata.CArrowSchema
|
||||||
|
cdata.ExportArrowSchema(schema, &cas)
|
||||||
|
cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas))
|
||||||
|
|
||||||
|
cPath := C.CString(path)
|
||||||
|
defer C.free(unsafe.Pointer(cPath))
|
||||||
|
|
||||||
|
cBufferSize := C.int64_t(bufferSize)
|
||||||
|
|
||||||
|
var cPackedWriter C.CPackedWriter
|
||||||
|
status := C.NewPackedWriter(cPath, cSchema, cBufferSize, &cPackedWriter)
|
||||||
|
if status != 0 {
|
||||||
|
return nil, fmt.Errorf("failed to new packed writer: %s, status: %d", path, status)
|
||||||
|
}
|
||||||
|
return &PackedWriter{cPackedWriter: cPackedWriter}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pw *PackedWriter) WriteRecordBatch(recordBatch arrow.Record) error {
|
||||||
|
var caa cdata.CArrowArray
|
||||||
|
var cas cdata.CArrowSchema
|
||||||
|
|
||||||
|
cdata.ExportArrowRecordBatch(recordBatch, &caa, &cas)
|
||||||
|
|
||||||
|
cArr := (*C.struct_ArrowArray)(unsafe.Pointer(&caa))
|
||||||
|
cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas))
|
||||||
|
|
||||||
|
status := C.WriteRecordBatch(pw.cPackedWriter, cArr, cSchema)
|
||||||
|
if status != 0 {
|
||||||
|
return errors.New("PackedWriter: failed to write record batch")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pw *PackedWriter) Close() error {
|
||||||
|
status := C.CloseWriter(pw.cPackedWriter)
|
||||||
|
if status != 0 {
|
||||||
|
return errors.New("PackedWriter: failed to close file")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
46
internal/storagev2/packed/type.go
Normal file
46
internal/storagev2/packed/type.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package packed
|
||||||
|
|
||||||
|
/*
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "arrow/c/abi.h"
|
||||||
|
#include "arrow/c/helpers.h"
|
||||||
|
#include "segcore/packed_reader_c.h"
|
||||||
|
#include "segcore/packed_writer_c.h"
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/cdata"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PackedWriter struct {
|
||||||
|
cPackedWriter C.CPackedWriter
|
||||||
|
}
|
||||||
|
|
||||||
|
type PackedReader struct {
|
||||||
|
cPackedReader C.CPackedReader
|
||||||
|
arr *cdata.CArrowArray
|
||||||
|
schema *arrow.Schema
|
||||||
|
}
|
||||||
|
|
||||||
|
type (
|
||||||
|
// CArrowSchema is the C Data Interface for ArrowSchemas
|
||||||
|
CArrowSchema = C.struct_ArrowSchema
|
||||||
|
// CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h
|
||||||
|
CArrowArray = C.struct_ArrowArray
|
||||||
|
)
|
||||||
65
internal/storagev2/reader/commonreader/delete_reader.go
Normal file
65
internal/storagev2/reader/commonreader/delete_reader.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package commonreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type DeleteReader struct {
|
||||||
|
recordReader array.RecordReader
|
||||||
|
schemaOptions *schema.SchemaOptions
|
||||||
|
deleteFragments fragment.DeleteFragmentVector
|
||||||
|
options *options.ReadOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d DeleteReader) Retain() {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d DeleteReader) Release() {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d DeleteReader) Schema() *arrow.Schema {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d DeleteReader) Next() bool {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d DeleteReader) Record() arrow.Record {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d DeleteReader) Err() error {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewDeleteReader(recordReader array.RecordReader, schemaOptions *schema.SchemaOptions, deleteFragments fragment.DeleteFragmentVector, options *options.ReadOptions) *DeleteReader {
|
||||||
|
return &DeleteReader{recordReader: recordReader, schemaOptions: schemaOptions, deleteFragments: deleteFragments, options: options}
|
||||||
|
}
|
||||||
84
internal/storagev2/reader/commonreader/filter_reader.go
Normal file
84
internal/storagev2/reader/commonreader/filter_reader.go
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package commonreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FilterReader struct {
|
||||||
|
recordReader array.RecordReader
|
||||||
|
option *options.ReadOptions
|
||||||
|
currentFilteredBatchReader array.RecordReader
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FilterReader) Retain() {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FilterReader) Release() {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FilterReader) Schema() *arrow.Schema {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FilterReader) Record() arrow.Record {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FilterReader) Err() error {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeFilterReader(recordReader array.RecordReader, option *options.ReadOptions) *FilterReader {
|
||||||
|
return &FilterReader{
|
||||||
|
recordReader: recordReader,
|
||||||
|
option: option,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *FilterReader) Next() bool {
|
||||||
|
//for {
|
||||||
|
// if r.currentFilteredBatchReader != nil {
|
||||||
|
// filteredBatch := r.currentFilteredBatchReader.Next()
|
||||||
|
// if err != nil {
|
||||||
|
// return false
|
||||||
|
// }
|
||||||
|
// if filteredBatch == nil {
|
||||||
|
// r.currentFilteredBatchReader = nil
|
||||||
|
// continue
|
||||||
|
// }
|
||||||
|
// return filteredBatch, nil
|
||||||
|
// }
|
||||||
|
// err := r.NextFilteredBatchReader()
|
||||||
|
// if err != nil {
|
||||||
|
// return nil
|
||||||
|
// }
|
||||||
|
// if r.currentFilteredBatchReader == nil {
|
||||||
|
// return nil
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
return false
|
||||||
|
}
|
||||||
35
internal/storagev2/reader/commonreader/projection_reader.go
Normal file
35
internal/storagev2/reader/commonreader/projection_reader.go
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package commonreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ProjectionReader struct {
|
||||||
|
array.RecordReader
|
||||||
|
reader array.RecordReader
|
||||||
|
options *options.ReadOptions
|
||||||
|
schema *arrow.Schema
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewProjectionReader(reader array.RecordReader, options *options.ReadOptions, schema *arrow.Schema) array.RecordReader {
|
||||||
|
projectionSchema := utils.ProjectSchema(schema, options.Columns)
|
||||||
|
return &ProjectionReader{reader: reader, options: options, schema: projectionSchema}
|
||||||
|
}
|
||||||
@ -0,0 +1,49 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package recordreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FilterQueryRecordReader struct {
|
||||||
|
// TODO implement me
|
||||||
|
ref int64
|
||||||
|
schema *schema.Schema
|
||||||
|
options *options.ReadOptions
|
||||||
|
fs fs.Fs
|
||||||
|
scalarFragment fragment.FragmentVector
|
||||||
|
vectorFragment fragment.FragmentVector
|
||||||
|
deleteFragments fragment.DeleteFragmentVector
|
||||||
|
record arrow.Record
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFilterQueryReader(
|
||||||
|
s *schema.Schema,
|
||||||
|
options *options.ReadOptions,
|
||||||
|
f fs.Fs,
|
||||||
|
scalarFragment fragment.FragmentVector,
|
||||||
|
vectorFragment fragment.FragmentVector,
|
||||||
|
deleteFragments fragment.DeleteFragmentVector,
|
||||||
|
) array.RecordReader {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
@ -0,0 +1,77 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package recordreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MergeRecordReader struct {
|
||||||
|
ref int64
|
||||||
|
schema *schema.Schema
|
||||||
|
options *options.ReadOptions
|
||||||
|
fs fs.Fs
|
||||||
|
scalarFragments fragment.FragmentVector
|
||||||
|
vectorFragments fragment.FragmentVector
|
||||||
|
deleteFragments fragment.DeleteFragmentVector
|
||||||
|
record arrow.Record
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m MergeRecordReader) Retain() {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m MergeRecordReader) Release() {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m MergeRecordReader) Schema() *arrow.Schema {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m MergeRecordReader) Next() bool {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m MergeRecordReader) Record() arrow.Record {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m MergeRecordReader) Err() error {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMergeRecordReader(
|
||||||
|
s *schema.Schema,
|
||||||
|
options *options.ReadOptions,
|
||||||
|
f fs.Fs,
|
||||||
|
scalarFragment fragment.FragmentVector,
|
||||||
|
vectorFragment fragment.FragmentVector,
|
||||||
|
deleteFragments fragment.DeleteFragmentVector,
|
||||||
|
) *MergeRecordReader {
|
||||||
|
// TODO implement me
|
||||||
|
panic("implement me")
|
||||||
|
}
|
||||||
@ -0,0 +1,119 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package recordreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
"github.com/apache/arrow/go/v12/parquet/pqarrow"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/arrowutil"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MultiFilesSequentialReader struct {
|
||||||
|
fs fs.Fs
|
||||||
|
schema *arrow.Schema
|
||||||
|
files []string
|
||||||
|
nextPos int
|
||||||
|
options *options.ReadOptions
|
||||||
|
currReader array.RecordReader
|
||||||
|
err error
|
||||||
|
ref int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MultiFilesSequentialReader) Retain() {
|
||||||
|
atomic.AddInt64(&m.ref, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MultiFilesSequentialReader) Release() {
|
||||||
|
if atomic.AddInt64(&m.ref, -1) == 0 {
|
||||||
|
if m.currReader != nil {
|
||||||
|
m.currReader.Release()
|
||||||
|
m.currReader = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MultiFilesSequentialReader) Schema() *arrow.Schema {
|
||||||
|
return m.schema
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MultiFilesSequentialReader) Next() bool {
|
||||||
|
for {
|
||||||
|
if m.currReader == nil {
|
||||||
|
if m.nextPos >= len(m.files) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
m.nextReader()
|
||||||
|
if m.err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
m.nextPos++
|
||||||
|
}
|
||||||
|
if m.currReader.Next() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if m.currReader.Err() != nil {
|
||||||
|
m.err = m.currReader.Err()
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if m.currReader != nil {
|
||||||
|
m.currReader.Release()
|
||||||
|
m.currReader = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MultiFilesSequentialReader) Record() arrow.Record {
|
||||||
|
if m.currReader != nil {
|
||||||
|
return m.currReader.Record()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MultiFilesSequentialReader) Err() error {
|
||||||
|
return m.err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MultiFilesSequentialReader) nextReader() {
|
||||||
|
var fileReader *pqarrow.FileReader
|
||||||
|
fileReader, m.err = arrowutil.MakeArrowFileReader(m.fs, m.files[m.nextPos])
|
||||||
|
if m.err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.currReader, m.err = arrowutil.MakeArrowRecordReader(fileReader, m.options)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMultiFilesSequentialReader(fs fs.Fs, fragments fragment.FragmentVector, schema *arrow.Schema, options *options.ReadOptions) *MultiFilesSequentialReader {
|
||||||
|
files := make([]string, 0, len(fragments))
|
||||||
|
for _, f := range fragments {
|
||||||
|
files = append(files, f.Files()...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &MultiFilesSequentialReader{
|
||||||
|
fs: fs,
|
||||||
|
schema: schema,
|
||||||
|
options: options,
|
||||||
|
files: files,
|
||||||
|
nextPos: 0,
|
||||||
|
ref: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
93
internal/storagev2/reader/recordreader/record_reader.go
Normal file
93
internal/storagev2/reader/recordreader/record_reader.go
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package recordreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/manifest"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func MakeRecordReader(
|
||||||
|
m *manifest.Manifest,
|
||||||
|
s *schema.Schema,
|
||||||
|
f fs.Fs,
|
||||||
|
deleteFragments fragment.DeleteFragmentVector,
|
||||||
|
options *options.ReadOptions,
|
||||||
|
) array.RecordReader {
|
||||||
|
relatedColumns := make([]string, 0)
|
||||||
|
relatedColumns = append(relatedColumns, options.Columns...)
|
||||||
|
|
||||||
|
for _, filter := range options.Filters {
|
||||||
|
relatedColumns = append(relatedColumns, filter.GetColumnName())
|
||||||
|
}
|
||||||
|
|
||||||
|
scalarData := m.GetScalarFragments()
|
||||||
|
vectorData := m.GetVectorFragments()
|
||||||
|
|
||||||
|
onlyScalar := onlyContainScalarColumns(s, relatedColumns)
|
||||||
|
onlyVector := onlyContainVectorColumns(s, relatedColumns)
|
||||||
|
|
||||||
|
if onlyScalar || onlyVector {
|
||||||
|
var dataFragments fragment.FragmentVector
|
||||||
|
if onlyScalar {
|
||||||
|
dataFragments = scalarData
|
||||||
|
} else {
|
||||||
|
dataFragments = vectorData
|
||||||
|
}
|
||||||
|
return NewScanRecordReader(s, options, f, dataFragments, deleteFragments)
|
||||||
|
}
|
||||||
|
if len(options.Filters) > 0 && filtersOnlyContainPKAndVersion(s, options.FiltersV2) {
|
||||||
|
return NewMergeRecordReader(s, options, f, scalarData, vectorData, deleteFragments)
|
||||||
|
}
|
||||||
|
return NewFilterQueryReader(s, options, f, scalarData, vectorData, deleteFragments)
|
||||||
|
}
|
||||||
|
|
||||||
|
func onlyContainVectorColumns(schema *schema.Schema, relatedColumns []string) bool {
|
||||||
|
for _, column := range relatedColumns {
|
||||||
|
if schema.Options().VectorColumn != column && schema.Options().PrimaryColumn != column && schema.Options().VersionColumn != column {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func onlyContainScalarColumns(schema *schema.Schema, relatedColumns []string) bool {
|
||||||
|
for _, column := range relatedColumns {
|
||||||
|
if schema.Options().VectorColumn == column {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func filtersOnlyContainPKAndVersion(s *schema.Schema, filters []filter.Filter) bool {
|
||||||
|
for _, f := range filters {
|
||||||
|
if f.GetColumnName() != s.Options().PrimaryColumn &&
|
||||||
|
f.GetColumnName() != s.Options().VersionColumn {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeScanDeleteReader(manifest *manifest.Manifest, fs fs.Fs) array.RecordReader {
|
||||||
|
return NewMultiFilesSequentialReader(fs, manifest.GetDeleteFragments(), manifest.GetSchema().DeleteSchema(), options.NewReadOptions())
|
||||||
|
}
|
||||||
151
internal/storagev2/reader/recordreader/scan_record.go
Normal file
151
internal/storagev2/reader/recordreader/scan_record.go
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package recordreader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/format"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/format/parquet"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/reader/commonreader"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ScanRecordReader struct {
|
||||||
|
ref int64
|
||||||
|
schema *schema.Schema
|
||||||
|
options *options.ReadOptions
|
||||||
|
fs fs.Fs
|
||||||
|
dataFragments fragment.FragmentVector
|
||||||
|
deleteFragments fragment.DeleteFragmentVector
|
||||||
|
rec arrow.Record
|
||||||
|
curReader format.Reader
|
||||||
|
reader array.RecordReader
|
||||||
|
nextPos int
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewScanRecordReader(
|
||||||
|
s *schema.Schema,
|
||||||
|
options *options.ReadOptions,
|
||||||
|
f fs.Fs,
|
||||||
|
dataFragments fragment.FragmentVector,
|
||||||
|
deleteFragments fragment.DeleteFragmentVector,
|
||||||
|
) *ScanRecordReader {
|
||||||
|
return &ScanRecordReader{
|
||||||
|
ref: 1,
|
||||||
|
schema: s,
|
||||||
|
options: options,
|
||||||
|
fs: f,
|
||||||
|
dataFragments: dataFragments,
|
||||||
|
deleteFragments: deleteFragments,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ScanRecordReader) Schema() *arrow.Schema {
|
||||||
|
return utils.ProjectSchema(r.schema.Schema(), r.options.OutputColumns())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ScanRecordReader) Retain() {
|
||||||
|
atomic.AddInt64(&r.ref, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ScanRecordReader) Release() {
|
||||||
|
if atomic.AddInt64(&r.ref, -1) == 0 {
|
||||||
|
if r.rec != nil {
|
||||||
|
r.rec.Release()
|
||||||
|
r.rec = nil
|
||||||
|
}
|
||||||
|
if r.curReader != nil {
|
||||||
|
r.curReader.Close()
|
||||||
|
r.curReader = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ScanRecordReader) Next() bool {
|
||||||
|
datafiles := fragment.ToFilesVector(r.dataFragments)
|
||||||
|
log.Debug("ScanRecordReader Next", zap.Any("datafiles", datafiles))
|
||||||
|
if r.rec != nil {
|
||||||
|
r.rec.Release()
|
||||||
|
r.rec = nil
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
if r.curReader == nil {
|
||||||
|
if r.nextPos >= len(datafiles) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// FIXME: nil options
|
||||||
|
reader, err := parquet.NewFileReader(r.fs, datafiles[r.nextPos], r.options)
|
||||||
|
if err != nil {
|
||||||
|
r.err = err
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
r.nextPos++
|
||||||
|
r.curReader = reader
|
||||||
|
}
|
||||||
|
|
||||||
|
rec, err := r.curReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
r.curReader.Close()
|
||||||
|
r.curReader = nil
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// if error occurs in the middle of reading, return false
|
||||||
|
r.curReader.Close()
|
||||||
|
r.curReader = nil
|
||||||
|
r.err = err
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if rec.NumRows() == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
r.rec = rec
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ScanRecordReader) Record() arrow.Record {
|
||||||
|
return r.rec
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ScanRecordReader) Err() error {
|
||||||
|
return r.err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ScanRecordReader) MakeInnerReader() array.RecordReader {
|
||||||
|
// TODO implement me
|
||||||
|
reader := NewMultiFilesSequentialReader(r.fs, r.dataFragments, r.Schema(), r.options)
|
||||||
|
|
||||||
|
filterReader := commonreader.MakeFilterReader(reader, r.options)
|
||||||
|
|
||||||
|
deleteReader := commonreader.NewDeleteReader(filterReader, r.schema.Options(), r.deleteFragments, r.options)
|
||||||
|
|
||||||
|
res := commonreader.NewProjectionReader(deleteReader, r.options, r.schema.Schema())
|
||||||
|
return res
|
||||||
|
}
|
||||||
98
internal/storagev2/storage/lock/lock_manager.go
Normal file
98
internal/storagev2/storage/lock/lock_manager.go
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package lock
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LockManager interface {
|
||||||
|
// Acquire the lock, wait until the lock is available, return the version to be modified or use the newest version
|
||||||
|
Acquire() (version int64, useLatestVersion bool, err error)
|
||||||
|
// Release the lock, accepts the new allocated manifest version and success state of operations between Acquire and Release as parameters
|
||||||
|
Release(version int64, success bool) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type EmptyLockManager struct{}
|
||||||
|
|
||||||
|
func (h *EmptyLockManager) Acquire() (version int64, useLatestVersion bool, err error) {
|
||||||
|
return constant.LatestManifestVersion, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *EmptyLockManager) Release(_ int64, _ bool) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type MemoryLockManager struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
locks map[int64]bool
|
||||||
|
nextVersion int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMemoryLockManager() *MemoryLockManager {
|
||||||
|
return &MemoryLockManager{
|
||||||
|
mu: sync.Mutex{},
|
||||||
|
locks: make(map[int64]bool),
|
||||||
|
nextVersion: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryLockManager) Acquire() (version int64, useLatestVersion bool, err error) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
version = m.nextVersion
|
||||||
|
|
||||||
|
if m.locks[version] {
|
||||||
|
log.Warn("lock is already acquired", zap.Int64("version", version))
|
||||||
|
return version, false, errors.New("lock is already acquired")
|
||||||
|
}
|
||||||
|
|
||||||
|
if version == constant.LatestManifestVersion {
|
||||||
|
useLatestVersion = true
|
||||||
|
} else {
|
||||||
|
useLatestVersion = false
|
||||||
|
}
|
||||||
|
m.locks[version] = true
|
||||||
|
log.Info("acquire lock", zap.Int64("version", version), zap.Bool("useLatestVersion", useLatestVersion))
|
||||||
|
|
||||||
|
return version, useLatestVersion, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryLockManager) Release(version int64, success bool) error {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
realVersion := int64(0)
|
||||||
|
realVersion = version - 1
|
||||||
|
if !m.locks[realVersion] {
|
||||||
|
return errors.New("lock is already released or does not exist")
|
||||||
|
}
|
||||||
|
m.locks[realVersion] = false
|
||||||
|
log.Info("release lock", zap.Int64("version", realVersion), zap.Bool("success", success))
|
||||||
|
if success {
|
||||||
|
m.nextVersion = version
|
||||||
|
} else {
|
||||||
|
m.nextVersion = constant.LatestManifestVersion
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
80
internal/storagev2/storage/manifest/commit.go
Normal file
80
internal/storagev2/storage/manifest/commit.go
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package manifest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ManifestCommit struct {
|
||||||
|
ops []ManifestCommitOp
|
||||||
|
lock lock.LockManager
|
||||||
|
rw ManifestReaderWriter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ManifestCommit) AddOp(op ...ManifestCommitOp) {
|
||||||
|
m.ops = append(m.ops, op...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m ManifestCommit) Commit() (manifest *Manifest, err error) {
|
||||||
|
ver, latest, err := m.lock.Acquire()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var version int64
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
if err2 := m.lock.Release(-1, false); err2 != nil {
|
||||||
|
err = err2
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
err = m.lock.Release(version, true)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
var base *Manifest
|
||||||
|
if latest {
|
||||||
|
base, err = m.rw.Read(constant.LatestManifestVersion)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
base.version++
|
||||||
|
} else {
|
||||||
|
base, err = m.rw.Read(ver)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
maxVersion, err := m.rw.MaxVersion()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
base.version = maxVersion + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, op := range m.ops {
|
||||||
|
op.commit(base)
|
||||||
|
}
|
||||||
|
version = base.version
|
||||||
|
|
||||||
|
err = m.rw.Write(base)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return base, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewManifestCommit(lock lock.LockManager, rw ManifestReaderWriter) ManifestCommit {
|
||||||
|
return ManifestCommit{nil, lock, rw}
|
||||||
|
}
|
||||||
68
internal/storagev2/storage/manifest/commit_op.go
Normal file
68
internal/storagev2/storage/manifest/commit_op.go
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package manifest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ManifestCommitOp interface {
|
||||||
|
commit(manifest *Manifest) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type AddScalarFragmentOp struct {
|
||||||
|
ScalarFragment fragment.Fragment
|
||||||
|
}
|
||||||
|
|
||||||
|
func (op AddScalarFragmentOp) commit(manifest *Manifest) error {
|
||||||
|
op.ScalarFragment.SetFragmentId(manifest.Version())
|
||||||
|
manifest.AddScalarFragment(op.ScalarFragment)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type AddVectorFragmentOp struct {
|
||||||
|
VectorFragment fragment.Fragment
|
||||||
|
}
|
||||||
|
|
||||||
|
func (op AddVectorFragmentOp) commit(manifest *Manifest) error {
|
||||||
|
op.VectorFragment.SetFragmentId(manifest.Version())
|
||||||
|
manifest.AddVectorFragment(op.VectorFragment)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type AddDeleteFragmentOp struct {
|
||||||
|
DeleteFragment fragment.Fragment
|
||||||
|
}
|
||||||
|
|
||||||
|
func (op AddDeleteFragmentOp) commit(manifest *Manifest) error {
|
||||||
|
op.DeleteFragment.SetFragmentId(manifest.Version())
|
||||||
|
manifest.AddDeleteFragment(op.DeleteFragment)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type AddBlobOp struct {
|
||||||
|
Replace bool
|
||||||
|
Blob blob.Blob
|
||||||
|
}
|
||||||
|
|
||||||
|
func (op AddBlobOp) commit(manifest *Manifest) error {
|
||||||
|
if !op.Replace && manifest.HasBlob(op.Blob.Name) {
|
||||||
|
return errors.ErrBlobAlreadyExist
|
||||||
|
}
|
||||||
|
manifest.AddBlob(op.Blob)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
243
internal/storagev2/storage/manifest/manifest.go
Normal file
243
internal/storagev2/storage/manifest/manifest.go
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package manifest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs/file"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Manifest struct {
|
||||||
|
schema *schema.Schema
|
||||||
|
ScalarFragments fragment.FragmentVector
|
||||||
|
vectorFragments fragment.FragmentVector
|
||||||
|
deleteFragments fragment.FragmentVector
|
||||||
|
blobs []blob.Blob
|
||||||
|
version int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewManifest(schema *schema.Schema) *Manifest {
|
||||||
|
return &Manifest{
|
||||||
|
schema: schema,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Init() *Manifest {
|
||||||
|
return &Manifest{
|
||||||
|
schema: schema.NewSchema(arrow.NewSchema(nil, nil), schema.DefaultSchemaOptions()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) Copy() *Manifest {
|
||||||
|
copied := *m
|
||||||
|
return &copied
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) GetSchema() *schema.Schema {
|
||||||
|
return m.schema
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) AddScalarFragment(fragment fragment.Fragment) {
|
||||||
|
m.ScalarFragments = append(m.ScalarFragments, fragment)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) AddVectorFragment(fragment fragment.Fragment) {
|
||||||
|
m.vectorFragments = append(m.vectorFragments, fragment)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) AddDeleteFragment(fragment fragment.Fragment) {
|
||||||
|
m.deleteFragments = append(m.deleteFragments, fragment)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) GetScalarFragments() fragment.FragmentVector {
|
||||||
|
return m.ScalarFragments
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) GetVectorFragments() fragment.FragmentVector {
|
||||||
|
return m.vectorFragments
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) GetDeleteFragments() fragment.FragmentVector {
|
||||||
|
return m.deleteFragments
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) Version() int64 {
|
||||||
|
return m.version
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) SetVersion(version int64) {
|
||||||
|
m.version = version
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) GetBlobs() []blob.Blob {
|
||||||
|
return m.blobs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) ToProtobuf() (*storagev2pb.Manifest, error) {
|
||||||
|
manifest := &storagev2pb.Manifest{}
|
||||||
|
manifest.Version = m.version
|
||||||
|
for _, vectorFragment := range m.vectorFragments {
|
||||||
|
manifest.VectorFragments = append(manifest.VectorFragments, vectorFragment.ToProtobuf())
|
||||||
|
}
|
||||||
|
for _, scalarFragment := range m.ScalarFragments {
|
||||||
|
manifest.ScalarFragments = append(manifest.ScalarFragments, scalarFragment.ToProtobuf())
|
||||||
|
}
|
||||||
|
for _, deleteFragment := range m.deleteFragments {
|
||||||
|
manifest.DeleteFragments = append(manifest.DeleteFragments, deleteFragment.ToProtobuf())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, blob := range m.blobs {
|
||||||
|
manifest.Blobs = append(manifest.Blobs, blob.ToProtobuf())
|
||||||
|
}
|
||||||
|
|
||||||
|
schemaProto, err := m.schema.ToProtobuf()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
manifest.Schema = schemaProto
|
||||||
|
|
||||||
|
return manifest, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) FromProtobuf(manifest *storagev2pb.Manifest) error {
|
||||||
|
err := m.schema.FromProtobuf(manifest.Schema)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, vectorFragment := range manifest.VectorFragments {
|
||||||
|
m.vectorFragments = append(m.vectorFragments, fragment.FromProtobuf(vectorFragment))
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, scalarFragment := range manifest.ScalarFragments {
|
||||||
|
m.ScalarFragments = append(m.ScalarFragments, fragment.FromProtobuf(scalarFragment))
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, deleteFragment := range manifest.DeleteFragments {
|
||||||
|
m.deleteFragments = append(m.deleteFragments, fragment.FromProtobuf(deleteFragment))
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, b := range manifest.Blobs {
|
||||||
|
m.blobs = append(m.blobs, blob.FromProtobuf(b))
|
||||||
|
}
|
||||||
|
|
||||||
|
m.version = manifest.Version
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteManifestFile(manifest *Manifest, output file.File) error {
|
||||||
|
protoManifest, err := manifest.ToProtobuf()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes, err := proto.Marshal(protoManifest)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("write manifest file: %w", err)
|
||||||
|
}
|
||||||
|
write, err := output.Write(bytes)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("write manifest file: %w", err)
|
||||||
|
}
|
||||||
|
if write != len(bytes) {
|
||||||
|
return fmt.Errorf("failed to write whole file, expect: %v, actual: %v", len(bytes), write)
|
||||||
|
}
|
||||||
|
if err = output.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) HasBlob(name string) bool {
|
||||||
|
for _, b := range m.blobs {
|
||||||
|
if b.Name == name {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) AddBlob(blob blob.Blob) {
|
||||||
|
m.blobs = append(m.blobs, blob)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) RemoveBlobIfExist(name string) {
|
||||||
|
idx := -1
|
||||||
|
for i, b := range m.blobs {
|
||||||
|
if b.Name == name {
|
||||||
|
idx = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.blobs = append(m.blobs[0:idx], m.blobs[idx+1:]...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manifest) GetBlob(name string) (blob.Blob, bool) {
|
||||||
|
for _, b := range m.blobs {
|
||||||
|
if b.Name == name {
|
||||||
|
return b, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return blob.Blob{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseFromFile(f fs.Fs, path string) (*Manifest, error) {
|
||||||
|
manifest := Init()
|
||||||
|
manifestProto := &storagev2pb.Manifest{}
|
||||||
|
|
||||||
|
buf, err := f.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = proto.Unmarshal(buf, manifestProto)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Failed to unmarshal manifest proto", log.String("err", err.Error()))
|
||||||
|
return nil, fmt.Errorf("parse from file: %w", err)
|
||||||
|
}
|
||||||
|
err = manifest.FromProtobuf(manifestProto)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return manifest, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO REMOVE BELOW CODE
|
||||||
|
|
||||||
|
type DataFile struct {
|
||||||
|
path string
|
||||||
|
cols []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *DataFile) Path() string {
|
||||||
|
return d.path
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewDataFile(path string) *DataFile {
|
||||||
|
return &DataFile{path: path}
|
||||||
|
}
|
||||||
119
internal/storagev2/storage/manifest/reader_writer.go
Normal file
119
internal/storagev2/storage/manifest/reader_writer.go
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package manifest
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrManifestNotFound = errors.New("manifest not found")
|
||||||
|
|
||||||
|
type ManifestReaderWriter struct {
|
||||||
|
fs fs.Fs
|
||||||
|
root string
|
||||||
|
}
|
||||||
|
|
||||||
|
func findAllManifest(fs fs.Fs, path string) ([]fs.FileEntry, error) {
|
||||||
|
files, err := fs.List(path)
|
||||||
|
log.Debug("list all manifest:", log.Any("files", files))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return files, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rw ManifestReaderWriter) Read(version int64) (*Manifest, error) {
|
||||||
|
manifests, err := findAllManifest(rw.fs, utils.GetManifestDir(rw.root))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var maxVersionManifest string
|
||||||
|
var maxVersion int64 = -1
|
||||||
|
for _, m := range manifests {
|
||||||
|
ver := utils.ParseVersionFromFileName(filepath.Base(m.Path))
|
||||||
|
if ver == -1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if version != constant.LatestManifestVersion {
|
||||||
|
if ver == version {
|
||||||
|
return ParseFromFile(rw.fs, m.Path)
|
||||||
|
}
|
||||||
|
} else if ver > maxVersion {
|
||||||
|
maxVersion = ver
|
||||||
|
maxVersionManifest = m.Path
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if maxVersion != -1 {
|
||||||
|
return ParseFromFile(rw.fs, maxVersionManifest)
|
||||||
|
}
|
||||||
|
return nil, ErrManifestNotFound
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rw ManifestReaderWriter) MaxVersion() (int64, error) {
|
||||||
|
manifests, err := findAllManifest(rw.fs, utils.GetManifestDir(rw.root))
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
var max int64 = -1
|
||||||
|
for _, m := range manifests {
|
||||||
|
ver := utils.ParseVersionFromFileName(filepath.Base(m.Path))
|
||||||
|
if ver == -1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ver > max {
|
||||||
|
max = ver
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if max == -1 {
|
||||||
|
return -1, ErrManifestNotFound
|
||||||
|
}
|
||||||
|
return max, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rw ManifestReaderWriter) Write(m *Manifest) error {
|
||||||
|
tmpManifestFilePath := utils.GetManifestTmpFilePath(rw.root, m.Version())
|
||||||
|
manifestFilePath := utils.GetManifestFilePath(rw.root, m.Version())
|
||||||
|
log.Debug("path", log.String("tmpManifestFilePath", tmpManifestFilePath), log.String("manifestFilePath", manifestFilePath))
|
||||||
|
output, err := rw.fs.OpenFile(tmpManifestFilePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open file error: %w", err)
|
||||||
|
}
|
||||||
|
if err = WriteManifestFile(m, output); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = rw.fs.Rename(tmpManifestFilePath, manifestFilePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("rename file error: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("save manifest file success", log.String("path", manifestFilePath))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewManifestReaderWriter(fs fs.Fs, root string) ManifestReaderWriter {
|
||||||
|
return ManifestReaderWriter{fs, root}
|
||||||
|
}
|
||||||
144
internal/storagev2/storage/options/options.go
Normal file
144
internal/storagev2/storage/options/options.go
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package options
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Options struct {
|
||||||
|
Schema *schema.Schema // optional
|
||||||
|
Version int64 // optional
|
||||||
|
LockManager lock.LockManager // optional, no lock manager as default
|
||||||
|
}
|
||||||
|
|
||||||
|
type SpaceOptionsBuilder struct {
|
||||||
|
options Options
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *SpaceOptionsBuilder) SetSchema(schema *schema.Schema) *SpaceOptionsBuilder {
|
||||||
|
b.options.Schema = schema
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *SpaceOptionsBuilder) SetVersion(version int64) *SpaceOptionsBuilder {
|
||||||
|
b.options.Version = version
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *SpaceOptionsBuilder) SetLockManager(lockManager lock.LockManager) *SpaceOptionsBuilder {
|
||||||
|
b.options.LockManager = lockManager
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *SpaceOptionsBuilder) Reset() {
|
||||||
|
b.options = Options{LockManager: &lock.EmptyLockManager{}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *SpaceOptionsBuilder) Build() Options { return b.options }
|
||||||
|
|
||||||
|
func NewSpaceOptionBuilder() *SpaceOptionsBuilder {
|
||||||
|
return &SpaceOptionsBuilder{
|
||||||
|
options: Options{
|
||||||
|
Version: constant.LatestManifestVersion,
|
||||||
|
LockManager: &lock.EmptyLockManager{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func DefaultOptions() *Options {
|
||||||
|
return &Options{}
|
||||||
|
}
|
||||||
|
|
||||||
|
type WriteOptions struct {
|
||||||
|
MaxRecordPerFile int64
|
||||||
|
}
|
||||||
|
|
||||||
|
var DefaultWriteOptions = WriteOptions{
|
||||||
|
MaxRecordPerFile: 1024,
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewWriteOption() *WriteOptions {
|
||||||
|
return &WriteOptions{
|
||||||
|
MaxRecordPerFile: 1024,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type FsType int8
|
||||||
|
|
||||||
|
const (
|
||||||
|
InMemory FsType = iota
|
||||||
|
LocalFS
|
||||||
|
S3
|
||||||
|
)
|
||||||
|
|
||||||
|
type SpaceOptions struct {
|
||||||
|
Fs FsType
|
||||||
|
VectorColumns []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Change to FilterSet type
|
||||||
|
type FilterSet []filter.Filter
|
||||||
|
|
||||||
|
var version int64 = math.MaxInt64
|
||||||
|
|
||||||
|
type ReadOptions struct {
|
||||||
|
// Filters map[string]filter.Filter
|
||||||
|
Filters map[string]filter.Filter
|
||||||
|
FiltersV2 FilterSet
|
||||||
|
Columns []string
|
||||||
|
ManifestVersion int64
|
||||||
|
version int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReadOptions() *ReadOptions {
|
||||||
|
return &ReadOptions{
|
||||||
|
Filters: make(map[string]filter.Filter),
|
||||||
|
FiltersV2: make(FilterSet, 0),
|
||||||
|
Columns: make([]string, 0),
|
||||||
|
ManifestVersion: constant.LatestManifestVersion,
|
||||||
|
version: math.MaxInt64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReadOptions) AddFilter(filter filter.Filter) {
|
||||||
|
o.Filters[filter.GetColumnName()] = filter
|
||||||
|
o.FiltersV2 = append(o.FiltersV2, filter)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReadOptions) AddColumn(column string) {
|
||||||
|
o.Columns = append(o.Columns, column)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReadOptions) SetColumns(columns []string) {
|
||||||
|
o.Columns = columns
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReadOptions) SetVersion(version int64) {
|
||||||
|
o.version = version
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReadOptions) GetVersion() int64 {
|
||||||
|
return o.version
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReadOptions) OutputColumns() []string {
|
||||||
|
return o.Columns
|
||||||
|
}
|
||||||
150
internal/storagev2/storage/schema/schema.go
Normal file
150
internal/storagev2/storage/schema/schema.go
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package schema
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/constant"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||||
|
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Schema is a wrapper of arrow schema
|
||||||
|
type Schema struct {
|
||||||
|
schema *arrow.Schema
|
||||||
|
scalarSchema *arrow.Schema
|
||||||
|
vectorSchema *arrow.Schema
|
||||||
|
deleteSchema *arrow.Schema
|
||||||
|
|
||||||
|
options *SchemaOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) Schema() *arrow.Schema {
|
||||||
|
return s.schema
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) Options() *SchemaOptions {
|
||||||
|
return s.options
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewSchema(schema *arrow.Schema, options *SchemaOptions) *Schema {
|
||||||
|
return &Schema{
|
||||||
|
schema: schema,
|
||||||
|
options: options,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) Validate() error {
|
||||||
|
err := s.options.Validate(s.schema)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = s.BuildScalarSchema()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = s.BuildVectorSchema()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = s.BuildDeleteSchema()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) ScalarSchema() *arrow.Schema {
|
||||||
|
return s.scalarSchema
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) VectorSchema() *arrow.Schema {
|
||||||
|
return s.vectorSchema
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) DeleteSchema() *arrow.Schema {
|
||||||
|
return s.deleteSchema
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) FromProtobuf(schema *storagev2pb.Schema) error {
|
||||||
|
schemaType, err := utils.FromProtobufSchema(schema.ArrowSchema)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.schema = schemaType
|
||||||
|
s.options.FromProtobuf(schema.GetSchemaOptions())
|
||||||
|
s.BuildScalarSchema()
|
||||||
|
s.BuildVectorSchema()
|
||||||
|
s.BuildDeleteSchema()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) ToProtobuf() (*storagev2pb.Schema, error) {
|
||||||
|
schema := &storagev2pb.Schema{}
|
||||||
|
arrowSchema, err := utils.ToProtobufSchema(s.schema)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
schema.ArrowSchema = arrowSchema
|
||||||
|
schema.SchemaOptions = s.options.ToProtobuf()
|
||||||
|
return schema, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) BuildScalarSchema() error {
|
||||||
|
fields := make([]arrow.Field, 0, len(s.schema.Fields()))
|
||||||
|
for _, field := range s.schema.Fields() {
|
||||||
|
if field.Name == s.options.VectorColumn {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fields = append(fields, field)
|
||||||
|
}
|
||||||
|
offsetFiled := arrow.Field{Name: constant.OffsetFieldName, Type: arrow.DataType(&arrow.Int64Type{})}
|
||||||
|
fields = append(fields, offsetFiled)
|
||||||
|
s.scalarSchema = arrow.NewSchema(fields, nil)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) BuildVectorSchema() error {
|
||||||
|
fields := make([]arrow.Field, 0, len(s.schema.Fields()))
|
||||||
|
for _, field := range s.schema.Fields() {
|
||||||
|
if field.Name == s.options.VectorColumn ||
|
||||||
|
field.Name == s.options.PrimaryColumn ||
|
||||||
|
field.Name == s.options.VersionColumn {
|
||||||
|
fields = append(fields, field)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.vectorSchema = arrow.NewSchema(fields, nil)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Schema) BuildDeleteSchema() error {
|
||||||
|
pkColumn, ok := s.schema.FieldsByName(s.options.PrimaryColumn)
|
||||||
|
if !ok {
|
||||||
|
return ErrPrimaryColumnNotFound
|
||||||
|
}
|
||||||
|
versionField, ok := s.schema.FieldsByName(s.options.VersionColumn)
|
||||||
|
if !ok {
|
||||||
|
return ErrVersionColumnNotFound
|
||||||
|
}
|
||||||
|
fields := make([]arrow.Field, 0, 2)
|
||||||
|
fields = append(fields, pkColumn[0])
|
||||||
|
fields = append(fields, versionField[0])
|
||||||
|
s.deleteSchema = arrow.NewSchema(fields, nil)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
97
internal/storagev2/storage/schema/schema_option.go
Normal file
97
internal/storagev2/storage/schema/schema_option.go
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package schema
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/pkg/proto/storagev2pb"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrPrimaryColumnNotFound = errors.New("primary column not found")
|
||||||
|
ErrPrimaryColumnType = errors.New("primary column is not int64 or string")
|
||||||
|
ErrPrimaryColumnEmpty = errors.New("primary column is empty")
|
||||||
|
ErrVersionColumnNotFound = errors.New("version column not found")
|
||||||
|
ErrVersionColumnType = errors.New("version column is not int64")
|
||||||
|
ErrVectorColumnNotFound = errors.New("vector column not found")
|
||||||
|
ErrVectorColumnType = errors.New("vector column is not fixed size binary or fixed size list")
|
||||||
|
ErrVectorColumnEmpty = errors.New("vector column is empty")
|
||||||
|
)
|
||||||
|
|
||||||
|
type SchemaOptions struct {
|
||||||
|
PrimaryColumn string
|
||||||
|
VersionColumn string
|
||||||
|
VectorColumn string
|
||||||
|
}
|
||||||
|
|
||||||
|
func DefaultSchemaOptions() *SchemaOptions {
|
||||||
|
return &SchemaOptions{
|
||||||
|
PrimaryColumn: "",
|
||||||
|
VersionColumn: "",
|
||||||
|
VectorColumn: "",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *SchemaOptions) ToProtobuf() *storagev2pb.SchemaOptions {
|
||||||
|
options := &storagev2pb.SchemaOptions{}
|
||||||
|
options.PrimaryColumn = o.PrimaryColumn
|
||||||
|
options.VersionColumn = o.VersionColumn
|
||||||
|
options.VectorColumn = o.VectorColumn
|
||||||
|
return options
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *SchemaOptions) FromProtobuf(options *storagev2pb.SchemaOptions) {
|
||||||
|
o.PrimaryColumn = options.PrimaryColumn
|
||||||
|
o.VersionColumn = options.VersionColumn
|
||||||
|
o.VectorColumn = options.VectorColumn
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *SchemaOptions) Validate(schema *arrow.Schema) error {
|
||||||
|
if o.PrimaryColumn != "" {
|
||||||
|
primaryField, ok := schema.FieldsByName(o.PrimaryColumn)
|
||||||
|
if !ok {
|
||||||
|
return ErrPrimaryColumnNotFound
|
||||||
|
} else if primaryField[0].Type.ID() != arrow.STRING && primaryField[0].Type.ID() != arrow.INT64 {
|
||||||
|
return ErrPrimaryColumnType
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return ErrPrimaryColumnEmpty
|
||||||
|
}
|
||||||
|
if o.VersionColumn != "" {
|
||||||
|
versionField, ok := schema.FieldsByName(o.VersionColumn)
|
||||||
|
if !ok {
|
||||||
|
return ErrVersionColumnNotFound
|
||||||
|
} else if versionField[0].Type.ID() != arrow.INT64 {
|
||||||
|
return ErrVersionColumnType
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if o.VectorColumn != "" {
|
||||||
|
vectorField, b := schema.FieldsByName(o.VectorColumn)
|
||||||
|
if !b {
|
||||||
|
return ErrVectorColumnNotFound
|
||||||
|
} else if vectorField[0].Type.ID() != arrow.FIXED_SIZE_BINARY && vectorField[0].Type.ID() != arrow.FIXED_SIZE_LIST {
|
||||||
|
return ErrVectorColumnType
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return ErrVectorColumnEmpty
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *SchemaOptions) HasVersionColumn() bool {
|
||||||
|
return o.VersionColumn != ""
|
||||||
|
}
|
||||||
53
internal/storagev2/storage/schema/schema_test.go
Normal file
53
internal/storagev2/storage/schema/schema_test.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package schema
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Test Schema.Schema
|
||||||
|
func TestBuildSchema(t *testing.T) {
|
||||||
|
pkField := arrow.Field{
|
||||||
|
Name: "pk_field",
|
||||||
|
Type: arrow.DataType(&arrow.Int64Type{}),
|
||||||
|
Nullable: false,
|
||||||
|
}
|
||||||
|
vsField := arrow.Field{
|
||||||
|
Name: "vs_field",
|
||||||
|
Type: arrow.DataType(&arrow.Int64Type{}),
|
||||||
|
Nullable: false,
|
||||||
|
}
|
||||||
|
vecField := arrow.Field{
|
||||||
|
Name: "vec_field",
|
||||||
|
Type: arrow.DataType(&arrow.FixedSizeBinaryType{ByteWidth: 16}),
|
||||||
|
Nullable: false,
|
||||||
|
}
|
||||||
|
fields := []arrow.Field{pkField, vsField, vecField}
|
||||||
|
|
||||||
|
as := arrow.NewSchema(fields, nil)
|
||||||
|
schemaOptions := &SchemaOptions{
|
||||||
|
PrimaryColumn: "pk_field",
|
||||||
|
VersionColumn: "vs_field",
|
||||||
|
VectorColumn: "vec_field",
|
||||||
|
}
|
||||||
|
|
||||||
|
sc := NewSchema(as, schemaOptions)
|
||||||
|
err := sc.Validate()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
220
internal/storagev2/storage/space.go
Normal file
220
internal/storagev2/storage/space.go
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/filter"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/reader/recordreader"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/manifest"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/transaction"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Space struct {
|
||||||
|
path string
|
||||||
|
fs fs.Fs
|
||||||
|
deleteFragments fragment.DeleteFragmentVector
|
||||||
|
manifest *manifest.Manifest
|
||||||
|
lockManager lock.LockManager
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) init() error {
|
||||||
|
for _, f := range s.manifest.GetDeleteFragments() {
|
||||||
|
deleteFragment := fragment.Make(s.fs, s.manifest.GetSchema(), f)
|
||||||
|
s.deleteFragments = append(s.deleteFragments, deleteFragment)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewSpace(f fs.Fs, path string, m *manifest.Manifest, lockManager lock.LockManager) *Space {
|
||||||
|
deleteFragments := fragment.DeleteFragmentVector{}
|
||||||
|
return &Space{
|
||||||
|
fs: f,
|
||||||
|
path: path,
|
||||||
|
manifest: m,
|
||||||
|
deleteFragments: deleteFragments,
|
||||||
|
lockManager: lockManager,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) NewTransaction() transaction.Transaction {
|
||||||
|
return transaction.NewConcurrentWriteTransaction(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) Write(reader array.RecordReader, options *options.WriteOptions) error {
|
||||||
|
return transaction.NewConcurrentWriteTransaction(s).Write(reader, options).Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) Delete(reader array.RecordReader) error {
|
||||||
|
return transaction.NewConcurrentWriteTransaction(s).Delete(reader).Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open opened a space or create if the space does not exist.
|
||||||
|
// If space does not exist. schema should not be nullptr, or an error will be returned.
|
||||||
|
// If space exists and version is specified, it will restore to the state at this version,
|
||||||
|
// or it will choose the latest version.
|
||||||
|
func Open(uri string, opt options.Options) (*Space, error) {
|
||||||
|
var f fs.Fs
|
||||||
|
var m *manifest.Manifest
|
||||||
|
var path string
|
||||||
|
f, err := fs.BuildFileSystem(uri)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
path = f.Path()
|
||||||
|
log.Debug("open space", log.String("path", path))
|
||||||
|
|
||||||
|
log.Debug(utils.GetManifestDir(path))
|
||||||
|
// create if not exist
|
||||||
|
if err = f.CreateDir(utils.GetManifestDir(path)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err = f.CreateDir(utils.GetScalarDataDir(path)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err = f.CreateDir(utils.GetVectorDataDir(path)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err = f.CreateDir(utils.GetBlobDir(path)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err = f.CreateDir(utils.GetDeleteDataDir(path)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
rw := manifest.NewManifestReaderWriter(f, path)
|
||||||
|
m, err = rw.Read(opt.Version)
|
||||||
|
if err != nil {
|
||||||
|
// create the first manifest file
|
||||||
|
if err == manifest.ErrManifestNotFound {
|
||||||
|
if opt.Schema == nil {
|
||||||
|
log.Error("schema is nil")
|
||||||
|
return nil, errors.ErrSchemaIsNil
|
||||||
|
}
|
||||||
|
if err = opt.Schema.Validate(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
m = manifest.NewManifest(opt.Schema)
|
||||||
|
m.SetVersion(0) // TODO: check if this is necessary
|
||||||
|
if err = rw.Write(m); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
space := NewSpace(f, path, m, opt.LockManager)
|
||||||
|
return space, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) readManifest(version int64) error {
|
||||||
|
rw := manifest.NewManifestReaderWriter(s.fs, s.path)
|
||||||
|
manifest, err := rw.Read(version)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.manifest = manifest
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) Read(readOptions *options.ReadOptions) (array.RecordReader, error) {
|
||||||
|
if s.manifest == nil || readOptions.ManifestVersion != s.manifest.Version() {
|
||||||
|
if err := s.readManifest(readOptions.ManifestVersion); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if s.manifest.GetSchema().Options().HasVersionColumn() {
|
||||||
|
f := filter.NewConstantFilter(filter.LessThanOrEqual, s.manifest.GetSchema().Options().VersionColumn, int64(math.MaxInt64))
|
||||||
|
readOptions.AddFilter(f)
|
||||||
|
readOptions.AddColumn(s.manifest.GetSchema().Options().VersionColumn)
|
||||||
|
}
|
||||||
|
log.Debug("read", log.Any("readOption", readOptions))
|
||||||
|
|
||||||
|
return recordreader.MakeRecordReader(s.manifest, s.manifest.GetSchema(), s.fs, s.deleteFragments, readOptions), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) WriteBlob(content []byte, name string, replace bool) error {
|
||||||
|
return transaction.NewConcurrentWriteTransaction(s).WriteBlob(content, name, replace).Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) ReadBlob(name string, output []byte) (int, error) {
|
||||||
|
blob, ok := s.manifest.GetBlob(name)
|
||||||
|
if !ok {
|
||||||
|
return -1, errors.ErrBlobNotExist
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := s.fs.OpenFile(blob.File)
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return f.Read(output)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) GetBlobByteSize(name string) (int64, error) {
|
||||||
|
blob, ok := s.manifest.GetBlob(name)
|
||||||
|
if !ok {
|
||||||
|
return -1, errors.ErrBlobNotExist
|
||||||
|
}
|
||||||
|
return blob.Size, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) GetCurrentVersion() int64 {
|
||||||
|
return s.manifest.Version()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) ScanDelete() (array.RecordReader, error) {
|
||||||
|
return recordreader.MakeScanDeleteReader(s.manifest, s.fs), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) Path() string {
|
||||||
|
return s.path
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) Fs() fs.Fs {
|
||||||
|
return s.fs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) Manifest() *manifest.Manifest {
|
||||||
|
return s.manifest
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) SetManifest(manifest *manifest.Manifest) {
|
||||||
|
s.manifest = manifest
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) LockManager() lock.LockManager {
|
||||||
|
return s.lockManager
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) SetLockManager(lockManager lock.LockManager) {
|
||||||
|
s.lockManager = lockManager
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Space) StatisticsBlobs() []blob.Blob {
|
||||||
|
return s.manifest.GetBlobs()
|
||||||
|
}
|
||||||
327
internal/storagev2/storage/transaction/transaction.go
Normal file
327
internal/storagev2/storage/transaction/transaction.go
Normal file
@ -0,0 +1,327 @@
|
|||||||
|
// Copyright 2023 Zilliz
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package transaction
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/apache/arrow/go/v12/arrow"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/array"
|
||||||
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/errors"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/log"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/common/utils"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/blob"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/file/fragment"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/format"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/format/parquet"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/io/fs"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/lock"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/manifest"
|
||||||
|
"github.com/milvus-io/milvus/internal/storagev2/storage/options"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SpaceMeta interface {
|
||||||
|
Path() string
|
||||||
|
Fs() fs.Fs
|
||||||
|
Manifest() *manifest.Manifest
|
||||||
|
LockManager() lock.LockManager
|
||||||
|
SetManifest(manifest *manifest.Manifest)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Transaction interface {
|
||||||
|
Write(reader array.RecordReader, options *options.WriteOptions) Transaction
|
||||||
|
Delete(reader array.RecordReader) Transaction
|
||||||
|
WriteBlob(content []byte, name string, replace bool) Transaction
|
||||||
|
Commit() error
|
||||||
|
}
|
||||||
|
|
||||||
|
type ConcurrentWriteTransaction struct {
|
||||||
|
operations []Operation
|
||||||
|
commit manifest.ManifestCommit
|
||||||
|
space SpaceMeta
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *ConcurrentWriteTransaction) Write(reader array.RecordReader, options *options.WriteOptions) Transaction {
|
||||||
|
operation := &WriteOperation{
|
||||||
|
reader: reader,
|
||||||
|
options: options,
|
||||||
|
space: t.space,
|
||||||
|
transaction: t,
|
||||||
|
}
|
||||||
|
t.operations = append(t.operations, operation)
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *ConcurrentWriteTransaction) Delete(reader array.RecordReader) Transaction {
|
||||||
|
operation := &DeleteOperation{
|
||||||
|
reader: reader,
|
||||||
|
space: t.space,
|
||||||
|
transaction: t,
|
||||||
|
}
|
||||||
|
t.operations = append(t.operations, operation)
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *ConcurrentWriteTransaction) WriteBlob(content []byte, name string, replace bool) Transaction {
|
||||||
|
operation := &WriteBlobOperation{
|
||||||
|
content: content,
|
||||||
|
name: name,
|
||||||
|
replace: replace,
|
||||||
|
space: t.space,
|
||||||
|
transaction: t,
|
||||||
|
}
|
||||||
|
t.operations = append(t.operations, operation)
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *ConcurrentWriteTransaction) Commit() error {
|
||||||
|
for _, op := range t.operations {
|
||||||
|
op.Execute()
|
||||||
|
}
|
||||||
|
nxtManifest, err := t.commit.Commit()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
t.space.SetManifest(nxtManifest)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewConcurrentWriteTransaction(space SpaceMeta) *ConcurrentWriteTransaction {
|
||||||
|
return &ConcurrentWriteTransaction{
|
||||||
|
operations: make([]Operation, 0),
|
||||||
|
commit: manifest.NewManifestCommit(space.LockManager(), manifest.NewManifestReaderWriter(space.Fs(), space.Path())),
|
||||||
|
space: space,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Operation interface {
|
||||||
|
Execute() error
|
||||||
|
}
|
||||||
|
|
||||||
|
type WriteOperation struct {
|
||||||
|
reader array.RecordReader
|
||||||
|
options *options.WriteOptions
|
||||||
|
space SpaceMeta
|
||||||
|
transaction *ConcurrentWriteTransaction
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WriteOperation) Execute() error {
|
||||||
|
if !w.space.Manifest().GetSchema().Schema().Equal(w.reader.Schema()) {
|
||||||
|
return errors.ErrSchemaNotMatch
|
||||||
|
}
|
||||||
|
|
||||||
|
scalarSchema, vectorSchema := w.space.Manifest().GetSchema().ScalarSchema(), w.space.Manifest().GetSchema().VectorSchema()
|
||||||
|
var (
|
||||||
|
scalarWriter format.Writer
|
||||||
|
vectorWriter format.Writer
|
||||||
|
)
|
||||||
|
scalarFragment := fragment.NewFragment()
|
||||||
|
vectorFragment := fragment.NewFragment()
|
||||||
|
|
||||||
|
isEmpty := true
|
||||||
|
for w.reader.Next() {
|
||||||
|
rec := w.reader.Record()
|
||||||
|
|
||||||
|
if rec.NumRows() == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
scalarWriter, err = w.write(scalarSchema, rec, scalarWriter, &scalarFragment, w.options, true)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
vectorWriter, err = w.write(vectorSchema, rec, vectorWriter, &vectorFragment, w.options, false)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
isEmpty = false
|
||||||
|
}
|
||||||
|
|
||||||
|
if scalarWriter != nil {
|
||||||
|
if err := scalarWriter.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if vectorWriter != nil {
|
||||||
|
if err := vectorWriter.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if isEmpty {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
op1 := manifest.AddScalarFragmentOp{ScalarFragment: scalarFragment}
|
||||||
|
op2 := manifest.AddVectorFragmentOp{VectorFragment: vectorFragment}
|
||||||
|
w.transaction.commit.AddOp(op1, op2)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WriteOperation) write(
|
||||||
|
schema *arrow.Schema,
|
||||||
|
rec arrow.Record,
|
||||||
|
writer format.Writer,
|
||||||
|
fragment *fragment.Fragment,
|
||||||
|
opt *options.WriteOptions,
|
||||||
|
isScalar bool,
|
||||||
|
) (format.Writer, error) {
|
||||||
|
var columns []arrow.Array
|
||||||
|
cols := rec.Columns()
|
||||||
|
for k := range cols {
|
||||||
|
_, has := schema.FieldsByName(rec.ColumnName(k))
|
||||||
|
if has {
|
||||||
|
columns = append(columns, cols[k])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var rootPath string
|
||||||
|
if isScalar {
|
||||||
|
// add offset column for scalar
|
||||||
|
offsetValues := make([]int64, rec.NumRows())
|
||||||
|
for i := 0; i < int(rec.NumRows()); i++ {
|
||||||
|
offsetValues[i] = int64(i)
|
||||||
|
}
|
||||||
|
builder := array.NewInt64Builder(memory.DefaultAllocator)
|
||||||
|
builder.AppendValues(offsetValues, nil)
|
||||||
|
offsetColumn := builder.NewArray()
|
||||||
|
columns = append(columns, offsetColumn)
|
||||||
|
rootPath = utils.GetScalarDataDir(w.space.Path())
|
||||||
|
} else {
|
||||||
|
rootPath = utils.GetVectorDataDir(w.space.Path())
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
record := array.NewRecord(schema, columns, rec.NumRows())
|
||||||
|
|
||||||
|
if writer == nil {
|
||||||
|
filePath := utils.GetNewParquetFilePath(rootPath)
|
||||||
|
writer, err = parquet.NewFileWriter(schema, w.space.Fs(), filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
fragment.AddFile(filePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = writer.Write(record)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if writer.Count() >= opt.MaxRecordPerFile {
|
||||||
|
log.Debug("close writer", log.Any("count", writer.Count()))
|
||||||
|
err = writer.Close()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
writer = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return writer, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type DeleteOperation struct {
|
||||||
|
reader array.RecordReader
|
||||||
|
space SpaceMeta
|
||||||
|
transaction *ConcurrentWriteTransaction
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *DeleteOperation) Execute() error {
|
||||||
|
schema := o.space.Manifest().GetSchema().DeleteSchema()
|
||||||
|
fragment := fragment.NewFragment()
|
||||||
|
var (
|
||||||
|
err error
|
||||||
|
writer format.Writer
|
||||||
|
deleteFile string
|
||||||
|
)
|
||||||
|
|
||||||
|
for o.reader.Next() {
|
||||||
|
rec := o.reader.Record()
|
||||||
|
if rec.NumRows() == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if writer == nil {
|
||||||
|
deleteFile = utils.GetNewParquetFilePath(utils.GetDeleteDataDir(o.space.Path()))
|
||||||
|
writer, err = parquet.NewFileWriter(schema, o.space.Fs(), deleteFile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fragment.AddFile(deleteFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = writer.Write(rec); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if writer != nil {
|
||||||
|
if err = writer.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
op := manifest.AddDeleteFragmentOp{DeleteFragment: fragment}
|
||||||
|
o.transaction.commit.AddOp(op)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type WriteBlobOperation struct {
|
||||||
|
content []byte
|
||||||
|
name string
|
||||||
|
replace bool
|
||||||
|
space SpaceMeta
|
||||||
|
transaction *ConcurrentWriteTransaction
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *WriteBlobOperation) Execute() error {
|
||||||
|
if !o.replace && o.space.Manifest().HasBlob(o.name) {
|
||||||
|
return errors.ErrBlobAlreadyExist
|
||||||
|
}
|
||||||
|
|
||||||
|
blobFile := utils.GetBlobFilePath(o.space.Path())
|
||||||
|
f, err := o.space.Fs().OpenFile(blobFile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := f.Write(o.content)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if n != len(o.content) {
|
||||||
|
return fmt.Errorf("blob not written completely, written %d but expect %d", n, len(o.content))
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = f.Close(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
op := manifest.AddBlobOp{
|
||||||
|
Replace: o.replace,
|
||||||
|
Blob: blob.Blob{
|
||||||
|
Name: o.name,
|
||||||
|
Size: int64(len(o.content)),
|
||||||
|
File: blobFile,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
o.transaction.commit.AddOp(op)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
131
pkg/proto/storagev2.proto
Normal file
131
pkg/proto/storagev2.proto
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package milvus.proto.storagev2;
|
||||||
|
|
||||||
|
option go_package = "github.com/milvus-io/milvus/pkg/proto/storagev2pb";
|
||||||
|
|
||||||
|
enum LogicType {
|
||||||
|
NA = 0;
|
||||||
|
BOOL = 1;
|
||||||
|
UINT8 = 2;
|
||||||
|
INT8 = 3;
|
||||||
|
UINT16 = 4;
|
||||||
|
INT16 = 5;
|
||||||
|
UINT32 = 6;
|
||||||
|
INT32 = 7;
|
||||||
|
UINT64 = 8;
|
||||||
|
INT64 = 9;
|
||||||
|
HALF_FLOAT = 10;
|
||||||
|
FLOAT = 11;
|
||||||
|
DOUBLE = 12;
|
||||||
|
STRING = 13;
|
||||||
|
BINARY = 14;
|
||||||
|
FIXED_SIZE_BINARY = 15;
|
||||||
|
// DATE32 = 16;
|
||||||
|
// DATE64 = 17;
|
||||||
|
// TIMESTAMP = 18;
|
||||||
|
// TIME32 = 19;
|
||||||
|
// TIME64 = 20;
|
||||||
|
// INTERVAL_MONTHS = 21;
|
||||||
|
// INTERVAL_DAY_TIME = 22;
|
||||||
|
// DECIMAL128 = 23;
|
||||||
|
// option allow_alias = true;
|
||||||
|
// DECIMAL = 23; // DECIMAL==DECIMAL128
|
||||||
|
// DECIMAL256 = 24;
|
||||||
|
LIST = 25;
|
||||||
|
STRUCT = 26;
|
||||||
|
// SPARSE_UNION = 27;
|
||||||
|
// DENSE_UNION = 28;
|
||||||
|
DICTIONARY = 29;
|
||||||
|
MAP = 30;
|
||||||
|
// EXTENSION = 31;
|
||||||
|
FIXED_SIZE_LIST = 32;
|
||||||
|
// DURATION = 33;
|
||||||
|
// LARGE_STRING = 34;
|
||||||
|
// LARGE_BINARY = 35;
|
||||||
|
// LARGE_LIST = 36;
|
||||||
|
// INTERVAL_MONTH_DAY_NANO = 37;
|
||||||
|
// RUN_END_ENCODED = 38;
|
||||||
|
MAX_ID = 39;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Endianness {
|
||||||
|
Little = 0;
|
||||||
|
Big = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message FixedSizeBinaryType { int32 byte_width = 1; }
|
||||||
|
|
||||||
|
message FixedSizeListType { int32 list_size = 1; }
|
||||||
|
|
||||||
|
message DictionaryType {
|
||||||
|
DataType index_type = 1;
|
||||||
|
DataType value_type = 2;
|
||||||
|
bool ordered = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message MapType { bool keys_sorted = 1; }
|
||||||
|
|
||||||
|
message DataType {
|
||||||
|
oneof type_related_values {
|
||||||
|
FixedSizeBinaryType fixed_size_binary_type = 1;
|
||||||
|
FixedSizeListType fixed_size_list_type = 2;
|
||||||
|
DictionaryType dictionary_type = 3;
|
||||||
|
MapType map_type = 4;
|
||||||
|
}
|
||||||
|
LogicType logic_type = 100;
|
||||||
|
repeated Field children = 101;
|
||||||
|
}
|
||||||
|
|
||||||
|
message KeyValueMetadata {
|
||||||
|
repeated string keys = 1;
|
||||||
|
repeated string values = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Field {
|
||||||
|
string name = 1;
|
||||||
|
bool nullable = 2;
|
||||||
|
DataType data_type = 3;
|
||||||
|
KeyValueMetadata metadata = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message SchemaOptions {
|
||||||
|
string primary_column = 1;
|
||||||
|
string version_column = 2;
|
||||||
|
string vector_column = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ArrowSchema {
|
||||||
|
repeated Field fields = 1;
|
||||||
|
Endianness endianness = 2;
|
||||||
|
KeyValueMetadata metadata = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Schema {
|
||||||
|
ArrowSchema arrow_schema = 1;
|
||||||
|
SchemaOptions schema_options = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
message Options { string uri = 1; }
|
||||||
|
|
||||||
|
message Manifest {
|
||||||
|
int64 version = 1;
|
||||||
|
Options options = 2;
|
||||||
|
Schema schema = 3;
|
||||||
|
repeated Fragment scalar_fragments = 4;
|
||||||
|
repeated Fragment vector_fragments = 5;
|
||||||
|
repeated Fragment delete_fragments = 6;
|
||||||
|
repeated Blob blobs = 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Fragment {
|
||||||
|
int64 id = 1;
|
||||||
|
repeated string files = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Blob {
|
||||||
|
string name = 1;
|
||||||
|
int64 size = 2;
|
||||||
|
string file = 3;
|
||||||
|
}
|
||||||
1519
pkg/proto/storagev2pb/storagev2.pb.go
Normal file
1519
pkg/proto/storagev2pb/storagev2.pb.go
Normal file
File diff suppressed because it is too large
Load Diff
@ -101,6 +101,7 @@ USE_ASAN="OFF"
|
|||||||
USE_DYNAMIC_SIMD="ON"
|
USE_DYNAMIC_SIMD="ON"
|
||||||
USE_OPENDAL="OFF"
|
USE_OPENDAL="OFF"
|
||||||
INDEX_ENGINE="KNOWHERE"
|
INDEX_ENGINE="KNOWHERE"
|
||||||
|
ENABLE_AZURE_FS="OFF"
|
||||||
: "${ENABLE_GCP_NATIVE:="OFF"}"
|
: "${ENABLE_GCP_NATIVE:="OFF"}"
|
||||||
|
|
||||||
while getopts "p:d:t:s:f:n:i:y:a:x:o:ulrcghzmebZ" arg; do
|
while getopts "p:d:t:s:f:n:i:y:a:x:o:ulrcghzmebZ" arg; do
|
||||||
@ -257,7 +258,8 @@ ${CMAKE_EXTRA_ARGS} \
|
|||||||
-DCPU_ARCH=${CPU_ARCH} \
|
-DCPU_ARCH=${CPU_ARCH} \
|
||||||
-DUSE_OPENDAL=${USE_OPENDAL} \
|
-DUSE_OPENDAL=${USE_OPENDAL} \
|
||||||
-DINDEX_ENGINE=${INDEX_ENGINE} \
|
-DINDEX_ENGINE=${INDEX_ENGINE} \
|
||||||
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} "
|
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} \
|
||||||
|
-DENABLE_AZURE_FS=${ENABLE_AZURE_FS} "
|
||||||
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
|
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
|
||||||
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
|
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
|
||||||
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
|
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
|
||||||
|
|||||||
@ -62,10 +62,12 @@ mkdir -p ./planpb
|
|||||||
mkdir -p ./workerpb
|
mkdir -p ./workerpb
|
||||||
mkdir -p ./messagespb
|
mkdir -p ./messagespb
|
||||||
mkdir -p ./streamingpb
|
mkdir -p ./streamingpb
|
||||||
|
mkdir -p ./storagev2pb
|
||||||
mkdir -p $ROOT_DIR/cmd/tools/migration/legacy/legacypb
|
mkdir -p $ROOT_DIR/cmd/tools/migration/legacy/legacypb
|
||||||
|
|
||||||
protoc_opt="${PROTOC_BIN} --proto_path=${API_PROTO_DIR} --proto_path=."
|
protoc_opt="${PROTOC_BIN} --proto_path=${API_PROTO_DIR} --proto_path=."
|
||||||
|
|
||||||
|
${protoc_opt} --go_out=paths=source_relative:./storagev2pb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./storagev2pb storagev2.proto || { echo 'generate storagev2.proto failed'; exit 1; }
|
||||||
${protoc_opt} --go_out=paths=source_relative:./etcdpb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./etcdpb etcd_meta.proto || { echo 'generate etcd_meta.proto failed'; exit 1; }
|
${protoc_opt} --go_out=paths=source_relative:./etcdpb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./etcdpb etcd_meta.proto || { echo 'generate etcd_meta.proto failed'; exit 1; }
|
||||||
${protoc_opt} --go_out=paths=source_relative:./indexcgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./indexcgopb index_cgo_msg.proto || { echo 'generate index_cgo_msg failed '; exit 1; }
|
${protoc_opt} --go_out=paths=source_relative:./indexcgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./indexcgopb index_cgo_msg.proto || { echo 'generate index_cgo_msg failed '; exit 1; }
|
||||||
${protoc_opt} --go_out=paths=source_relative:./cgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./cgopb cgo_msg.proto || { echo 'generate cgo_msg failed '; exit 1; }
|
${protoc_opt} --go_out=paths=source_relative:./cgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./cgopb cgo_msg.proto || { echo 'generate cgo_msg failed '; exit 1; }
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user