diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 4e67228e61..9128694725 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -521,6 +521,9 @@ common: session: ttl: 30 # ttl value when session granting a lease to register service retryTimes: 30 # retry times when session sending etcd requests + storage: + scheme: "s3" + enablev2: false # preCreatedTopic decides whether using existed topic preCreatedTopic: diff --git a/go.mod b/go.mod index 2df26d50c8..dd66e2f281 100644 --- a/go.mod +++ b/go.mod @@ -21,21 +21,20 @@ require ( github.com/golang/protobuf v1.5.3 github.com/google/btree v1.1.2 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 - github.com/klauspost/compress v1.16.5 + github.com/klauspost/compress v1.16.7 github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20231114080011-9a495865219e github.com/milvus-io/milvus/pkg v0.0.1 - github.com/minio/minio-go/v7 v7.0.56 + github.com/minio/minio-go/v7 v7.0.61 github.com/prometheus/client_golang v1.14.0 github.com/prometheus/client_model v0.3.0 github.com/prometheus/common v0.42.0 - github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/samber/lo v1.27.0 github.com/sbinet/npyio v0.6.0 github.com/soheilhy/cmux v0.1.5 github.com/spf13/cast v1.3.1 github.com/spf13/viper v1.8.1 - github.com/stretchr/testify v1.8.3 + github.com/stretchr/testify v1.8.4 github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c github.com/tidwall/gjson v1.14.4 github.com/tikv/client-go/v2 v2.0.4 @@ -45,15 +44,15 @@ require ( go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.38.0 go.opentelemetry.io/otel v1.13.0 go.opentelemetry.io/otel/trace v1.13.0 - go.uber.org/atomic v1.10.0 - go.uber.org/multierr v1.7.0 - go.uber.org/zap v1.20.0 + go.uber.org/atomic v1.11.0 + go.uber.org/multierr v1.11.0 + go.uber.org/zap v1.24.0 golang.org/x/crypto v0.14.0 - golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 - golang.org/x/oauth2 v0.6.0 - golang.org/x/sync v0.1.0 + golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691 + golang.org/x/oauth2 v0.8.0 + golang.org/x/sync v0.3.0 golang.org/x/text v0.13.0 - google.golang.org/grpc v1.54.0 + google.golang.org/grpc v1.57.0 google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f stathat.com/c/consistent v1.0.0 ) @@ -61,7 +60,12 @@ require ( require github.com/apache/arrow/go/v12 v12.0.1 require ( - cloud.google.com/go/compute v1.19.0 // indirect + github.com/milvus-io/milvus-storage/go v0.0.0-20231109072809-1cd7b0866092 + github.com/quasilyte/go-ruleguard/dsl v0.3.22 +) + +require ( + cloud.google.com/go/compute v1.20.1 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.1 // indirect @@ -73,7 +77,7 @@ require ( github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect github.com/alibabacloud-go/tea v1.1.8 // indirect github.com/andybalholm/brotli v1.0.4 // indirect - github.com/apache/thrift v0.16.0 // indirect + github.com/apache/thrift v0.18.1 // indirect github.com/ardielle/ardielle-go v1.5.2 // indirect github.com/benbjohnson/clock v1.1.0 // indirect github.com/benesch/cgosymbolizer v0.0.0-20190515212042-bec6fe6e597b // indirect @@ -92,7 +96,7 @@ require ( github.com/coreos/go-systemd/v22 v22.3.2 // indirect github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect github.com/danieljoos/wincred v1.1.2 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 // indirect github.com/docker/go-units v0.4.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect @@ -129,8 +133,8 @@ require ( github.com/jonboulle/clockwork v0.2.2 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/cpuid/v2 v2.2.4 // indirect - github.com/kr/pretty v0.3.0 // indirect + github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/leodido/go-urn v1.2.4 // indirect @@ -160,7 +164,7 @@ require ( github.com/pelletier/go-toml v1.9.3 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/pierrec/lz4 v2.5.2+incompatible // indirect - github.com/pierrec/lz4/v4 v4.1.15 // indirect + github.com/pierrec/lz4/v4 v4.1.18 // indirect github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect @@ -168,14 +172,14 @@ require ( github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/procfs v0.9.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect - github.com/rogpeppe/go-internal v1.9.0 // indirect + github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/rs/xid v1.5.0 // indirect github.com/shirou/gopsutil/v3 v3.22.9 // indirect - github.com/sirupsen/logrus v1.9.2 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/spf13/afero v1.6.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect @@ -213,17 +217,19 @@ require ( go.opentelemetry.io/proto/otlp v0.19.0 // indirect go.uber.org/automaxprocs v1.5.2 // indirect golang.org/x/arch v0.3.0 // indirect - golang.org/x/mod v0.9.0 // indirect + golang.org/x/mod v0.12.0 // indirect golang.org/x/net v0.17.0 // indirect golang.org/x/sys v0.13.0 // indirect golang.org/x/term v0.13.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.7.0 // indirect + golang.org/x/tools v0.11.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect gonum.org/v1/gonum v0.11.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633 // indirect - google.golang.org/protobuf v1.30.0 // indirect + google.golang.org/genproto v0.0.0-20230706204954-ccb25ca9f130 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20230629202037-9506855d4529 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230726155614-23370e0ffb3e // indirect + google.golang.org/protobuf v1.31.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect @@ -238,6 +244,7 @@ replace ( github.com/milvus-io/milvus/pkg => ./pkg github.com/streamnative/pulsarctl => github.com/xiaofan-luan/pulsarctl v0.5.1 github.com/tecbot/gorocksdb => github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b // indirect +// github.com/milvus-io/milvus-storage/go => ../milvus-storage/go ) exclude github.com/apache/pulsar-client-go/oauth2 v0.0.0-20211108044248-fe3b7c4e445b diff --git a/go.sum b/go.sum index 045edc35d4..14aacb8163 100644 --- a/go.sum +++ b/go.sum @@ -24,8 +24,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute v1.19.0 h1:+9zda3WGgW1ZSTlVppLCYFIr48Pa35q1uG2N1itbCEQ= -cloud.google.com/go/compute v1.19.0/go.mod h1:rikpw2y+UMidAe9tISo04EHNOIf42RLYF/q8Bs93scU= +cloud.google.com/go/compute v1.20.1 h1:6aKEtlUiwEpJzM001l0yFkpXmUVXaN8W+fbkb2AZNbg= +cloud.google.com/go/compute v1.20.1/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= @@ -98,8 +98,8 @@ github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20210826220005-b48c857c3a0e h1:G github.com/antlr/antlr4/runtime/Go/antlr v0.0.0-20210826220005-b48c857c3a0e/go.mod h1:F7bn7fEU90QkQ3tnmaTx3LTKLEDqnwWODIYppRQ5hnY= github.com/apache/arrow/go/v12 v12.0.1 h1:JsR2+hzYYjgSUkBSaahpqCetqZMr76djX80fF/DiJbg= github.com/apache/arrow/go/v12 v12.0.1/go.mod h1:weuTY7JvTG/HDPtMQxEUp7pU73vkLWMLpY67QwZ/WWw= -github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= -github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= +github.com/apache/thrift v0.18.1 h1:lNhK/1nqjbwbiOPDBPFJVKxgDEGSepKuTh6OLiXW8kg= +github.com/apache/thrift v0.18.1/go.mod h1:rdQn/dCcDKEWjjylUeueum4vQEjG2v8v2PqriUnbr+I= github.com/ardielle/ardielle-go v1.5.2 h1:TilHTpHIQJ27R1Tl/iITBzMwiUGSlVfiVhwDNGM3Zj4= github.com/ardielle/ardielle-go v1.5.2/go.mod h1:I4hy1n795cUhaVt/ojz83SNVCYIGsAFAONtv2Dr7HUI= github.com/ardielle/ardielle-tools v1.5.4/go.mod h1:oZN+JRMnqGiIhrzkRN9l26Cej9dEx4jeNG6A+AdkShk= @@ -164,6 +164,7 @@ github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= github.com/cockroachdb/datadriven v1.0.2 h1:H9MtNqVoVhvd9nCBwOyDjUEdZCREqbIdCJD93PBm/jA= github.com/cockroachdb/datadriven v1.0.2/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= @@ -199,8 +200,9 @@ github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs github.com/danieljoos/wincred v1.1.2 h1:QLdCxFs1/Yl4zduvBdcHB8goaYk9RARS2SgLLRuAyr0= github.com/danieljoos/wincred v1.1.2/go.mod h1:GijpziifJoIBfYh+S7BbkdUTU4LfM+QnGqR5Vl2tAx0= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= @@ -227,6 +229,7 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.m github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c h1:8ISkoahWXwZR41ois5lSJBSVw4D0OV19Ht/JSTzvSv0= github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= @@ -325,8 +328,8 @@ github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOW github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ= github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= +github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -512,21 +515,22 @@ github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.14.4/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.16.5 h1:IFV2oUNUzZaz+XyusxpLzpzS8Pt5rh0Z16For/djlyI= -github.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= -github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= +github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -579,6 +583,8 @@ github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZz github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4= github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20231114080011-9a495865219e h1:IH1WAXwEF8vbwahPdupi4zzRNWViT4B7fZzIjtRLpG4= github.com/milvus-io/milvus-proto/go-api/v2 v2.3.4-0.20231114080011-9a495865219e/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= +github.com/milvus-io/milvus-storage/go v0.0.0-20231109072809-1cd7b0866092 h1:UYJ7JB+QlMOoFHNdd8mUa3/lV63t9dnBX7ILXmEEWPY= +github.com/milvus-io/milvus-storage/go v0.0.0-20231109072809-1cd7b0866092/go.mod h1:GPETMcTZq1gLY1WA6Na5kiNAKnq8SEMMiVKUZrM3sho= github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A= github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= @@ -589,8 +595,8 @@ github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= -github.com/minio/minio-go/v7 v7.0.56 h1:pkZplIEHu8vinjkmhsexcXpWth2tjVLphrTZx6fBVZY= -github.com/minio/minio-go/v7 v7.0.56/go.mod h1:NUDy4A4oXPq1l2yK6LTSvCEzAMeIcoz9lcj5dbzSrRE= +github.com/minio/minio-go/v7 v7.0.61 h1:87c+x8J3jxQ5VUGimV9oHdpjsAvy3fhneEBKuoKEVUI= +github.com/minio/minio-go/v7 v7.0.61/go.mod h1:BTu8FcrEw+HidY0zd/0eny43QnVNkXRPXrLXFuQBHXg= github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= @@ -665,8 +671,8 @@ github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= -github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= +github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4= @@ -688,8 +694,9 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= @@ -734,8 +741,9 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= -github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= @@ -759,8 +767,8 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/sirupsen/logrus v1.9.2 h1:oxx1eChJGI6Uks2ZC4W1zpLlVgqB8ner4EuQwV4Ik1Y= -github.com/sirupsen/logrus v1.9.2/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= @@ -810,8 +818,9 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M= @@ -935,22 +944,24 @@ go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= -go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/automaxprocs v1.5.2 h1:2LxUOGiR3O6tw8ui5sZa2LAaHnsviZdVOUZw4fvbnME= go.uber.org/automaxprocs v1.5.2/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/multierr v1.7.0 h1:zaiO/rmgFjbmCXdSYJWQcdvOCsthmdaHfr3Gm2Kx4Ec= go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= -go.uber.org/zap v1.20.0 h1:N4oPlghZwYG55MlU6LXk/Zp00FVNE9X9wrYO8CEs4lc= go.uber.org/zap v1.20.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= +go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= +go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= @@ -984,8 +995,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw= -golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691 h1:/yRP+0AN7mf5DkD3BAI6TOFnd51gEoDEb8o35jIFtgw= +golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -1018,8 +1029,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.9.0 h1:KENHtAZL2y3NLMYZeHY9DW8HW8V+kQyJsY/V9JlKvCs= -golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1086,8 +1097,8 @@ golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.6.0 h1:Lh8GPgSKBfWSwFvtuWOfeI3aAAnbXTSutYxJiOJFgIw= -golang.org/x/oauth2 v0.6.0/go.mod h1:ycmewcwgD4Rpr3eZJLSB4Kyyljb3qDh40vJ8STE5HKw= +golang.org/x/oauth2 v0.8.0 h1:6dkIjl3j3LtZ/O3sTgZTMsLKSftL/B8Zgq4huOIIUu8= +golang.org/x/oauth2 v0.8.0/go.mod h1:yr7u4HXZRm1R1kBWqr/xKNqewf0plRYoB7sla+BCIXE= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1099,8 +1110,9 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1179,8 +1191,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -1270,8 +1282,8 @@ golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4= -golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= +golang.org/x/tools v0.11.0 h1:EMCa6U9S2LtZXLAMoWiR/R8dAQFRqbAitmbJ2UKhoi8= +golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -1364,8 +1376,12 @@ google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxH google.golang.org/genproto v0.0.0-20210624195500-8bfb893ecb84/go.mod h1:SzzZ/N+nwJDaO1kznhnlzqS8ocJICar6hYhVyhi++24= google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20220503193339-ba3ae3f07e29/go.mod h1:RAyBrSAP7Fh3Nc84ghnVLDPuV51xc9agzmm4Ph6i0Q4= -google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633 h1:0BOZf6qNozI3pkN3fJLwNubheHJYHhMh91GRFOWWK08= -google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633/go.mod h1:UUQDJDOlWu4KYeJZffbWgBkS1YFobzKbLVfK69pe0Ak= +google.golang.org/genproto v0.0.0-20230706204954-ccb25ca9f130 h1:Au6te5hbKUV8pIYWHqOUZ1pva5qK/rwbIhoXEUB9Lu8= +google.golang.org/genproto v0.0.0-20230706204954-ccb25ca9f130/go.mod h1:O9kGHb51iE/nOGvQaDUuadVYqovW56s5emA88lQnj6Y= +google.golang.org/genproto/googleapis/api v0.0.0-20230629202037-9506855d4529 h1:s5YSX+ZH5b5vS9rnpGymvIyMpLRJizowqDlOuyjXnTk= +google.golang.org/genproto/googleapis/api v0.0.0-20230629202037-9506855d4529/go.mod h1:vHYtlOoi6TsQ3Uk2yxR7NI5z8uoV+3pZtR4jmHIkRig= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230726155614-23370e0ffb3e h1:S83+ibolgyZ0bqz7KEsUOPErxcv4VzlszxY+31OfB/E= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230726155614-23370e0ffb3e/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= @@ -1394,8 +1410,8 @@ google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzI google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.43.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.46.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= -google.golang.org/grpc v1.54.0 h1:EhTqbhiYeixwWQtAEZAxmV9MGqcjEU2mFx52xCzNyag= -google.golang.org/grpc v1.54.0/go.mod h1:PUSEXI6iWghWaB6lXM4knEgpJNu2qUcKfDtNci3EC2g= +google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw= +google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f h1:rqzndB2lIQGivcXdTuY3Y9NBvr70X+y77woofSRluec= google.golang.org/grpc/examples v0.0.0-20220617181431-3e7b97febc7f/go.mod h1:gxndsbNG1n4TZcHGgsYEfVGnTxqfEdfiDv6/DADXX9o= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -1412,8 +1428,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= -google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/avro.v0 v0.0.0-20171217001914-a730b5802183/go.mod h1:FvqrFXt+jCsyQibeRv4xxEJBL5iG2DDW5aeJwzDiq4A= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/datacoord/meta.go b/internal/datacoord/meta.go index 6904bf6ba8..a9432e4ee0 100644 --- a/internal/datacoord/meta.go +++ b/internal/datacoord/meta.go @@ -493,6 +493,20 @@ func CreateL0Operator(collectionID, partitionID, segmentID int64, channel string } } +func UpdateStorageVersionOperator(segmentID int64, version int64) UpdateOperator { + return func(modPack *updateSegmentPack) bool { + segment := modPack.meta.GetSegment(segmentID) + if segment == nil { + log.Info("meta update: update storage version - segment not found", + zap.Int64("segmentID", segmentID)) + return false + } + + segment.StorageVersion = version + return true + } +} + // Set status of segment // and record dropped time when change segment status to dropped func UpdateStatusOperator(segmentID int64, status commonpb.SegmentState) UpdateOperator { diff --git a/internal/datacoord/services.go b/internal/datacoord/services.go index 16794f88c4..5b9cb5614f 100644 --- a/internal/datacoord/services.go +++ b/internal/datacoord/services.go @@ -489,6 +489,9 @@ func (s *Server) SaveBinlogPaths(ctx context.Context, req *datapb.SaveBinlogPath // save checkpoints. operators = append(operators, UpdateCheckPointOperator(segmentID, req.GetImporting(), req.GetCheckPoints())) + if Params.CommonCfg.EnableStorageV2.GetAsBool() { + operators = append(operators, UpdateStorageVersionOperator(segmentID, req.GetStorageVersion())) + } // run all operator and update new segment info err := s.meta.UpdateSegmentsInfo(operators...) if err != nil { diff --git a/internal/datanode/data_sync_service.go b/internal/datanode/data_sync_service.go index e14933fedd..37eedb1745 100644 --- a/internal/datanode/data_sync_service.go +++ b/internal/datanode/data_sync_service.go @@ -32,6 +32,7 @@ import ( "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/datanode/writebuffer" "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/flowgraph" "github.com/milvus-io/milvus/pkg/log" @@ -124,19 +125,19 @@ func (dsService *dataSyncService) close() { }) } -func getMetaCacheWithTickler(initCtx context.Context, node *DataNode, info *datapb.ChannelWatchInfo, tickler *tickler, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) { +func getMetaCacheWithTickler(initCtx context.Context, node *DataNode, info *datapb.ChannelWatchInfo, tickler *tickler, unflushed, flushed []*datapb.SegmentInfo, storageV2Cache *metacache.StorageV2Cache) (metacache.MetaCache, error) { tickler.setTotal(int32(len(unflushed) + len(flushed))) - return initMetaCache(initCtx, node.chunkManager, info, tickler, unflushed, flushed) + return initMetaCache(initCtx, storageV2Cache, node.chunkManager, info, tickler, unflushed, flushed) } -func getMetaCacheWithEtcdTickler(initCtx context.Context, node *DataNode, info *datapb.ChannelWatchInfo, tickler *etcdTickler, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) { +func getMetaCacheWithEtcdTickler(initCtx context.Context, node *DataNode, info *datapb.ChannelWatchInfo, tickler *etcdTickler, unflushed, flushed []*datapb.SegmentInfo, storageV2Cache *metacache.StorageV2Cache) (metacache.MetaCache, error) { tickler.watch() defer tickler.stop() - return initMetaCache(initCtx, node.chunkManager, info, tickler, unflushed, flushed) + return initMetaCache(initCtx, storageV2Cache, node.chunkManager, info, tickler, unflushed, flushed) } -func initMetaCache(initCtx context.Context, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) { +func initMetaCache(initCtx context.Context, storageV2Cache *metacache.StorageV2Cache, chunkManager storage.ChunkManager, info *datapb.ChannelWatchInfo, tickler interface{ inc() }, unflushed, flushed []*datapb.SegmentInfo) (metacache.MetaCache, error) { recoverTs := info.GetVchan().GetSeekPosition().GetTimestamp() // tickler will update addSegment progress to watchInfo @@ -154,7 +155,13 @@ func initMetaCache(initCtx context.Context, chunkManager storage.ChunkManager, i segment := item future := getOrCreateIOPool().Submit(func() (any, error) { - stats, err := loadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetCollectionID(), segment.GetStatslogs(), recoverTs) + var stats []*storage.PkStatistics + var err error + if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { + stats, err = loadStatsV2(storageV2Cache, segment, info.GetSchema()) + } else { + stats, err = loadStats(initCtx, chunkManager, info.GetSchema(), segment.GetID(), segment.GetCollectionID(), segment.GetStatslogs(), recoverTs) + } if err != nil { return nil, err } @@ -188,6 +195,57 @@ func initMetaCache(initCtx context.Context, chunkManager storage.ChunkManager, i return metacache, nil } +func loadStatsV2(storageCache *metacache.StorageV2Cache, segment *datapb.SegmentInfo, schema *schemapb.CollectionSchema) ([]*storage.PkStatistics, error) { + space, err := storageCache.GetOrCreateSpace(segment.ID, writebuffer.SpaceCreatorFunc(segment.ID, schema, storageCache.ArrowSchema())) + if err != nil { + return nil, err + } + + getResult := func(stats []*storage.PrimaryKeyStats) []*storage.PkStatistics { + result := make([]*storage.PkStatistics, 0, len(stats)) + for _, stat := range stats { + pkStat := &storage.PkStatistics{ + PkFilter: stat.BF, + MinPK: stat.MinPk, + MaxPK: stat.MaxPk, + } + result = append(result, pkStat) + } + return result + } + + blobs := space.StatisticsBlobs() + deserBlobs := make([]*Blob, 0) + for _, b := range blobs { + if b.Name == storage.CompoundStatsType.LogIdx() { + blobData := make([]byte, b.Size) + _, err = space.ReadBlob(b.Name, blobData) + if err != nil { + return nil, err + } + stats, err := storage.DeserializeStatsList(&Blob{Value: blobData}) + if err != nil { + return nil, err + } + return getResult(stats), nil + } + } + + for _, b := range blobs { + blobData := make([]byte, b.Size) + _, err = space.ReadBlob(b.Name, blobData) + if err != nil { + return nil, err + } + deserBlobs = append(deserBlobs, &Blob{Value: blobData}) + } + stats, err := storage.DeserializeStats(deserBlobs) + if err != nil { + return nil, err + } + return getResult(stats), nil +} + func loadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *schemapb.CollectionSchema, segmentID int64, collectionID int64, statsBinlogs []*datapb.FieldBinlog, ts Timestamp) ([]*storage.PkStatistics, error) { startTs := time.Now() log := log.With(zap.Int64("segmentID", segmentID)) @@ -274,7 +332,7 @@ func loadStats(ctx context.Context, chunkManager storage.ChunkManager, schema *s return result, nil } -func getServiceWithChannel(initCtx context.Context, node *DataNode, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, unflushed, flushed []*datapb.SegmentInfo) (*dataSyncService, error) { +func getServiceWithChannel(initCtx context.Context, node *DataNode, info *datapb.ChannelWatchInfo, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, unflushed, flushed []*datapb.SegmentInfo) (*dataSyncService, error) { var ( channelName = info.GetVchan().GetChannelName() collectionID = info.GetVchan().GetCollectionID() @@ -295,7 +353,7 @@ func getServiceWithChannel(initCtx context.Context, node *DataNode, info *datapb resendTTCh = make(chan resendTTMsg, 100) ) - node.writeBufferManager.Register(channelName, metacache, writebuffer.WithMetaWriter(syncmgr.BrokerMetaWriter(node.broker)), writebuffer.WithIDAllocator(node.allocator)) + node.writeBufferManager.Register(channelName, metacache, storageV2Cache, writebuffer.WithMetaWriter(syncmgr.BrokerMetaWriter(node.broker)), writebuffer.WithIDAllocator(node.allocator)) ctx, cancel := context.WithCancel(node.ctx) ds := &dataSyncService{ ctx: ctx, @@ -391,13 +449,20 @@ func newServiceWithEtcdTickler(initCtx context.Context, node *DataNode, info *da return nil, err } + var storageCache *metacache.StorageV2Cache + if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { + storageCache, err = metacache.NewStorageV2Cache(info.Schema) + if err != nil { + return nil, err + } + } // init channel meta - metaCache, err := getMetaCacheWithEtcdTickler(initCtx, node, info, tickler, unflushedSegmentInfos, flushedSegmentInfos) + metaCache, err := getMetaCacheWithEtcdTickler(initCtx, node, info, tickler, unflushedSegmentInfos, flushedSegmentInfos, storageCache) if err != nil { return nil, err } - return getServiceWithChannel(initCtx, node, info, metaCache, unflushedSegmentInfos, flushedSegmentInfos) + return getServiceWithChannel(initCtx, node, info, metaCache, storageCache, unflushedSegmentInfos, flushedSegmentInfos) } // newDataSyncService gets a dataSyncService, but flowgraphs are not running @@ -415,11 +480,18 @@ func newDataSyncService(initCtx context.Context, node *DataNode, info *datapb.Ch return nil, err } + var storageCache *metacache.StorageV2Cache + if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { + storageCache, err = metacache.NewStorageV2Cache(info.Schema) + if err != nil { + return nil, err + } + } // init metaCache meta - metaCache, err := getMetaCacheWithTickler(initCtx, node, info, tickler, unflushedSegmentInfos, flushedSegmentInfos) + metaCache, err := getMetaCacheWithTickler(initCtx, node, info, tickler, unflushedSegmentInfos, flushedSegmentInfos, storageCache) if err != nil { return nil, err } - return getServiceWithChannel(initCtx, node, info, metaCache, unflushedSegmentInfos, flushedSegmentInfos) + return getServiceWithChannel(initCtx, node, info, metaCache, storageCache, unflushedSegmentInfos, flushedSegmentInfos) } diff --git a/internal/datanode/data_sync_service_test.go b/internal/datanode/data_sync_service_test.go index 55a43da1e3..161c674ddf 100644 --- a/internal/datanode/data_sync_service_test.go +++ b/internal/datanode/data_sync_service_test.go @@ -310,7 +310,7 @@ func TestGetChannelWithTickler(t *testing.T) { }, } - metaCache, err := getMetaCacheWithTickler(context.TODO(), node, info, newTickler(), unflushed, flushed) + metaCache, err := getMetaCacheWithTickler(context.TODO(), node, info, newTickler(), unflushed, flushed, nil) assert.NoError(t, err) assert.NotNil(t, metaCache) assert.Equal(t, int64(1), metaCache.Collection()) @@ -404,7 +404,7 @@ func (s *DataSyncServiceSuite) TestStartStop() { return item, ok }) }, nil) - s.wbManager.EXPECT().Register(insertChannelName, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) + s.wbManager.EXPECT().Register(insertChannelName, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) ufs := []*datapb.SegmentInfo{{ CollectionID: collMeta.ID, diff --git a/internal/datanode/metacache/storagev2_cache.go b/internal/datanode/metacache/storagev2_cache.go new file mode 100644 index 0000000000..4b4981cee8 --- /dev/null +++ b/internal/datanode/metacache/storagev2_cache.go @@ -0,0 +1,187 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metacache + +import ( + "sync" + + "github.com/apache/arrow/go/v12/arrow" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + milvus_storage "github.com/milvus-io/milvus-storage/go/storage" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/util/merr" +) + +type StorageV2Cache struct { + arrowSchema *arrow.Schema + spaceMu sync.Mutex + spaces map[int64]*milvus_storage.Space +} + +func (s *StorageV2Cache) ArrowSchema() *arrow.Schema { + return s.arrowSchema +} + +func (s *StorageV2Cache) GetOrCreateSpace(segmentID int64, creator func() (*milvus_storage.Space, error)) (*milvus_storage.Space, error) { + s.spaceMu.Lock() + defer s.spaceMu.Unlock() + space, ok := s.spaces[segmentID] + if ok { + return space, nil + } + space, err := creator() + if err != nil { + return nil, err + } + s.spaces[segmentID] = space + return space, nil +} + +// only for unit test +func (s *StorageV2Cache) SetSpace(segmentID int64, space *milvus_storage.Space) { + s.spaceMu.Lock() + defer s.spaceMu.Unlock() + s.spaces[segmentID] = space +} + +func NewStorageV2Cache(schema *schemapb.CollectionSchema) (*StorageV2Cache, error) { + arrowSchema, err := ConvertToArrowSchema(schema.Fields) + if err != nil { + return nil, err + } + return &StorageV2Cache{ + arrowSchema: arrowSchema, + spaces: make(map[int64]*milvus_storage.Space), + }, nil +} + +func ConvertToArrowSchema(fields []*schemapb.FieldSchema) (*arrow.Schema, error) { + arrowFields := make([]arrow.Field, 0, len(fields)) + for _, field := range fields { + switch field.DataType { + case schemapb.DataType_Bool: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.FixedWidthTypes.Boolean, + }) + case schemapb.DataType_Int8: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.PrimitiveTypes.Int8, + }) + case schemapb.DataType_Int16: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.PrimitiveTypes.Int16, + }) + case schemapb.DataType_Int32: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.PrimitiveTypes.Int32, + }) + case schemapb.DataType_Int64: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.PrimitiveTypes.Int64, + }) + case schemapb.DataType_Float: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.PrimitiveTypes.Float32, + }) + case schemapb.DataType_Double: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.PrimitiveTypes.Float64, + }) + case schemapb.DataType_String, schemapb.DataType_VarChar: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.BinaryTypes.String, + }) + case schemapb.DataType_Array: + elemType, err := convertToArrowType(field.ElementType) + if err != nil { + return nil, err + } + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.ListOf(elemType), + }) + case schemapb.DataType_JSON: + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: arrow.BinaryTypes.Binary, + }) + case schemapb.DataType_BinaryVector: + dim, err := storage.GetDimFromParams(field.TypeParams) + if err != nil { + return nil, err + } + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: &arrow.FixedSizeBinaryType{ByteWidth: dim / 8}, + }) + case schemapb.DataType_FloatVector: + dim, err := storage.GetDimFromParams(field.TypeParams) + if err != nil { + return nil, err + } + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: &arrow.FixedSizeBinaryType{ByteWidth: dim * 4}, + }) + case schemapb.DataType_Float16Vector: + dim, err := storage.GetDimFromParams(field.TypeParams) + if err != nil { + return nil, err + } + arrowFields = append(arrowFields, arrow.Field{ + Name: field.Name, + Type: &arrow.FixedSizeBinaryType{ByteWidth: dim * 2}, + }) + default: + return nil, merr.WrapErrParameterInvalidMsg("unknown type %v", field.DataType.String()) + } + } + + return arrow.NewSchema(arrowFields, nil), nil +} + +func convertToArrowType(dataType schemapb.DataType) (arrow.DataType, error) { + switch dataType { + case schemapb.DataType_Bool: + return arrow.FixedWidthTypes.Boolean, nil + case schemapb.DataType_Int8: + return arrow.PrimitiveTypes.Int8, nil + case schemapb.DataType_Int16: + return arrow.PrimitiveTypes.Int16, nil + case schemapb.DataType_Int32: + return arrow.PrimitiveTypes.Int32, nil + case schemapb.DataType_Int64: + return arrow.PrimitiveTypes.Int64, nil + case schemapb.DataType_Float: + return arrow.PrimitiveTypes.Float32, nil + case schemapb.DataType_Double: + return arrow.PrimitiveTypes.Float64, nil + case schemapb.DataType_String, schemapb.DataType_VarChar: + return arrow.BinaryTypes.String, nil + default: + return nil, merr.WrapErrParameterInvalidMsg("unknown type %v", dataType.String()) + } +} diff --git a/internal/datanode/metacache/storagev2_cache_test.go b/internal/datanode/metacache/storagev2_cache_test.go new file mode 100644 index 0000000000..ba237326c9 --- /dev/null +++ b/internal/datanode/metacache/storagev2_cache_test.go @@ -0,0 +1,49 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metacache + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +) + +func TestConvertArrowSchema(t *testing.T) { + fieldSchemas := []*schemapb.FieldSchema{ + {FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool}, + {FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8}, + {FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16}, + {FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32}, + {FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64}, + {FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float}, + {FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double}, + {FieldID: 8, Name: "field7", DataType: schemapb.DataType_String}, + {FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar}, + {FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "128"}}}, + {FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "128"}}}, + {FieldID: 12, Name: "field11", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int64}, + {FieldID: 13, Name: "field12", DataType: schemapb.DataType_JSON}, + {FieldID: 14, Name: "field13", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "128"}}}, + } + + schema, err := ConvertToArrowSchema(fieldSchemas) + assert.NoError(t, err) + assert.Equal(t, len(fieldSchemas), len(schema.Fields())) +} diff --git a/internal/datanode/services_test.go b/internal/datanode/services_test.go index 9729f6fd80..794ac26e1f 100644 --- a/internal/datanode/services_test.go +++ b/internal/datanode/services_test.go @@ -243,7 +243,7 @@ func (s *DataNodeServicesSuite) TestFlushSegments() { metaCache := metacache.NewMockMetaCache(s.T()) metaCache.EXPECT().Collection().Return(1).Maybe() metaCache.EXPECT().Schema().Return(schema).Maybe() - s.node.writeBufferManager.Register(dmChannelName, metaCache) + s.node.writeBufferManager.Register(dmChannelName, metaCache, nil) fgservice.metacache.AddSegment(&datapb.SegmentInfo{ ID: segmentID, diff --git a/internal/datanode/syncmgr/key_lock_dispatcher.go b/internal/datanode/syncmgr/key_lock_dispatcher.go index 94e83e0541..493c53c57c 100644 --- a/internal/datanode/syncmgr/key_lock_dispatcher.go +++ b/internal/datanode/syncmgr/key_lock_dispatcher.go @@ -1,11 +1,16 @@ package syncmgr import ( + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/lock" ) type Task interface { + SegmentID() int64 + Checkpoint() *msgpb.MsgPosition + StartPosition() *msgpb.MsgPosition + ChannelName() string Run() error } diff --git a/internal/datanode/syncmgr/key_lock_dispatcher_test.go b/internal/datanode/syncmgr/key_lock_dispatcher_test.go index 37dd84df27..25af7d88f6 100644 --- a/internal/datanode/syncmgr/key_lock_dispatcher_test.go +++ b/internal/datanode/syncmgr/key_lock_dispatcher_test.go @@ -6,6 +6,8 @@ import ( "github.com/stretchr/testify/suite" "go.uber.org/atomic" + + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" ) type mockTask struct { @@ -17,6 +19,11 @@ func (t *mockTask) done() { close(t.ch) } +func (t *mockTask) SegmentID() int64 { panic("no implementation") } +func (t *mockTask) Checkpoint() *msgpb.MsgPosition { panic("no implementation") } +func (t *mockTask) StartPosition() *msgpb.MsgPosition { panic("no implementation") } +func (t *mockTask) ChannelName() string { panic("no implementation") } + func (t *mockTask) Run() error { <-t.ch return t.err diff --git a/internal/datanode/syncmgr/meta_writer.go b/internal/datanode/syncmgr/meta_writer.go index 716a935f3a..9acb4fd726 100644 --- a/internal/datanode/syncmgr/meta_writer.go +++ b/internal/datanode/syncmgr/meta_writer.go @@ -20,6 +20,7 @@ import ( // MetaWriter is the interface for SyncManager to write segment sync meta. type MetaWriter interface { UpdateSync(*SyncTask) error + UpdateSyncV2(*SyncTaskV2) error DropChannel(string) error } @@ -135,6 +136,84 @@ func (b *brokerMetaWriter) UpdateSync(pack *SyncTask) error { return nil } +func (b *brokerMetaWriter) UpdateSyncV2(pack *SyncTaskV2) error { + checkPoints := []*datapb.CheckPoint{} + + // only current segment checkpoint info, + segments := pack.metacache.GetSegmentsBy(metacache.WithSegmentIDs(pack.segmentID)) + if len(segments) == 0 { + return merr.WrapErrSegmentNotFound(pack.segmentID) + } + segment := segments[0] + checkPoints = append(checkPoints, &datapb.CheckPoint{ + SegmentID: pack.segmentID, + NumOfRows: segment.FlushedRows() + pack.batchSize, + Position: pack.checkpoint, + }) + + startPos := lo.Map(pack.metacache.GetSegmentsBy(metacache.WithStartPosNotRecorded()), func(info *metacache.SegmentInfo, _ int) *datapb.SegmentStartPosition { + return &datapb.SegmentStartPosition{ + SegmentID: info.SegmentID(), + StartPosition: info.StartPosition(), + } + }) + log.Info("SaveBinlogPath", + zap.Int64("SegmentID", pack.segmentID), + zap.Int64("CollectionID", pack.collectionID), + zap.Any("startPos", startPos), + zap.Any("checkPoints", checkPoints), + zap.String("vChannelName", pack.channelName), + ) + + req := &datapb.SaveBinlogPathsRequest{ + Base: commonpbutil.NewMsgBase( + commonpbutil.WithMsgType(0), + commonpbutil.WithMsgID(0), + commonpbutil.WithSourceID(paramtable.GetNodeID()), + ), + SegmentID: pack.segmentID, + CollectionID: pack.collectionID, + + CheckPoints: checkPoints, + StorageVersion: pack.storageVersion, + + StartPositions: startPos, + Flushed: pack.isFlush, + Dropped: pack.isDrop, + Channel: pack.channelName, + } + err := retry.Do(context.Background(), func() error { + err := b.broker.SaveBinlogPaths(context.Background(), req) + // Segment not found during stale segment flush. Segment might get compacted already. + // Stop retry and still proceed to the end, ignoring this error. + if !pack.isFlush && errors.Is(err, merr.ErrSegmentNotFound) { + log.Warn("stale segment not found, could be compacted", + zap.Int64("segmentID", pack.segmentID)) + log.Warn("failed to SaveBinlogPaths", + zap.Int64("segmentID", pack.segmentID), + zap.Error(err)) + return nil + } + // meta error, datanode handles a virtual channel does not belong here + if errors.IsAny(err, merr.ErrSegmentNotFound, merr.ErrChannelNotFound) { + log.Warn("meta error found, skip sync and start to drop virtual channel", zap.String("channel", pack.channelName)) + return nil + } + + if err != nil { + return err + } + + return nil + }, b.opts...) + if err != nil { + log.Warn("failed to SaveBinlogPaths", + zap.Int64("segmentID", pack.segmentID), + zap.Error(err)) + } + return err +} + func (b *brokerMetaWriter) DropChannel(channelName string) error { err := retry.Do(context.Background(), func() error { status, err := b.broker.DropVirtualChannel(context.Background(), &datapb.DropVirtualChannelRequest{ diff --git a/internal/datanode/syncmgr/meta_writer_test.go b/internal/datanode/syncmgr/meta_writer_test.go index 763c4f2283..8742e8c6b9 100644 --- a/internal/datanode/syncmgr/meta_writer_test.go +++ b/internal/datanode/syncmgr/meta_writer_test.go @@ -60,6 +60,32 @@ func (s *MetaWriterSuite) TestReturnError() { s.Error(err) } +func (s *MetaWriterSuite) TestNormalSaveV2() { + s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil) + + bfs := metacache.NewBloomFilterSet() + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) + metacache.UpdateNumOfRows(1000)(seg) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + task := NewSyncTaskV2() + task.WithMetaCache(s.metacache) + err := s.writer.UpdateSyncV2(task) + s.NoError(err) +} + +func (s *MetaWriterSuite) TestReturnErrorV2() { + s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(errors.New("mocked")) + + bfs := metacache.NewBloomFilterSet() + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) + metacache.UpdateNumOfRows(1000)(seg) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + task := NewSyncTaskV2() + task.WithMetaCache(s.metacache) + err := s.writer.UpdateSyncV2(task) + s.Error(err) +} + func TestMetaWriter(t *testing.T) { suite.Run(t, new(MetaWriterSuite)) } diff --git a/internal/datanode/syncmgr/mock_sync_manager.go b/internal/datanode/syncmgr/mock_sync_manager.go index ebf01293d5..9d05ee41a2 100644 --- a/internal/datanode/syncmgr/mock_sync_manager.go +++ b/internal/datanode/syncmgr/mock_sync_manager.go @@ -103,11 +103,11 @@ func (_c *MockSyncManager_GetEarliestPosition_Call) RunAndReturn(run func(string } // SyncData provides a mock function with given fields: ctx, task -func (_m *MockSyncManager) SyncData(ctx context.Context, task *SyncTask) *conc.Future[error] { +func (_m *MockSyncManager) SyncData(ctx context.Context, task Task) *conc.Future[error] { ret := _m.Called(ctx, task) var r0 *conc.Future[error] - if rf, ok := ret.Get(0).(func(context.Context, *SyncTask) *conc.Future[error]); ok { + if rf, ok := ret.Get(0).(func(context.Context, Task) *conc.Future[error]); ok { r0 = rf(ctx, task) } else { if ret.Get(0) != nil { @@ -125,14 +125,14 @@ type MockSyncManager_SyncData_Call struct { // SyncData is a helper method to define mock.On call // - ctx context.Context -// - task *SyncTask +// - task Task func (_e *MockSyncManager_Expecter) SyncData(ctx interface{}, task interface{}) *MockSyncManager_SyncData_Call { return &MockSyncManager_SyncData_Call{Call: _e.mock.On("SyncData", ctx, task)} } -func (_c *MockSyncManager_SyncData_Call) Run(run func(ctx context.Context, task *SyncTask)) *MockSyncManager_SyncData_Call { +func (_c *MockSyncManager_SyncData_Call) Run(run func(ctx context.Context, task Task)) *MockSyncManager_SyncData_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(*SyncTask)) + run(args[0].(context.Context), args[1].(Task)) }) return _c } @@ -142,7 +142,7 @@ func (_c *MockSyncManager_SyncData_Call) Return(_a0 *conc.Future[error]) *MockSy return _c } -func (_c *MockSyncManager_SyncData_Call) RunAndReturn(run func(context.Context, *SyncTask) *conc.Future[error]) *MockSyncManager_SyncData_Call { +func (_c *MockSyncManager_SyncData_Call) RunAndReturn(run func(context.Context, Task) *conc.Future[error]) *MockSyncManager_SyncData_Call { _c.Call.Return(run) return _c } diff --git a/internal/datanode/syncmgr/sync_manager.go b/internal/datanode/syncmgr/sync_manager.go index 53a75c392e..840398b9b9 100644 --- a/internal/datanode/syncmgr/sync_manager.go +++ b/internal/datanode/syncmgr/sync_manager.go @@ -38,7 +38,7 @@ type SyncMeta struct { // it processes the sync tasks inside and changes the meta. type SyncManager interface { // SyncData is the method to submit sync task. - SyncData(ctx context.Context, task *SyncTask) *conc.Future[error] + SyncData(ctx context.Context, task Task) *conc.Future[error] // GetEarliestPosition returns the earliest position (normally start position) of the processing sync task of provided channel. GetEarliestPosition(channel string) *msgpb.MsgPosition // Block allows caller to block tasks of provided segment id. @@ -54,7 +54,7 @@ type syncManager struct { chunkManager storage.ChunkManager allocator allocator.Interface - tasks *typeutil.ConcurrentMap[string, *SyncTask] + tasks *typeutil.ConcurrentMap[string, Task] } func NewSyncManager(parallelTask int, chunkManager storage.ChunkManager, allocator allocator.Interface) (SyncManager, error) { @@ -65,19 +65,24 @@ func NewSyncManager(parallelTask int, chunkManager storage.ChunkManager, allocat keyLockDispatcher: newKeyLockDispatcher[int64](parallelTask), chunkManager: chunkManager, allocator: allocator, - tasks: typeutil.NewConcurrentMap[string, *SyncTask](), + tasks: typeutil.NewConcurrentMap[string, Task](), }, nil } -func (mgr syncManager) SyncData(ctx context.Context, task *SyncTask) *conc.Future[error] { - task.WithAllocator(mgr.allocator).WithChunkManager(mgr.chunkManager) +func (mgr syncManager) SyncData(ctx context.Context, task Task) *conc.Future[error] { + switch t := task.(type) { + case *SyncTask: + t.WithAllocator(mgr.allocator).WithChunkManager(mgr.chunkManager) + case *SyncTaskV2: + t.WithAllocator(mgr.allocator) + } - taskKey := fmt.Sprintf("%d-%d", task.segmentID, task.checkpoint.GetTimestamp()) + taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp()) mgr.tasks.Insert(taskKey, task) // make sync for same segment execute in sequence // if previous sync task is not finished, block here - return mgr.Submit(task.segmentID, task, func(err error) { + return mgr.Submit(task.SegmentID(), task, func(err error) { // remove task from records mgr.tasks.Remove(taskKey) }) @@ -85,13 +90,13 @@ func (mgr syncManager) SyncData(ctx context.Context, task *SyncTask) *conc.Futur func (mgr syncManager) GetEarliestPosition(channel string) *msgpb.MsgPosition { var cp *msgpb.MsgPosition - mgr.tasks.Range(func(_ string, task *SyncTask) bool { - if task.startPosition == nil { + mgr.tasks.Range(func(_ string, task Task) bool { + if task.StartPosition() == nil { return true } - if task.channelName == channel { - if cp == nil || task.startPosition.GetTimestamp() < cp.GetTimestamp() { - cp = task.startPosition + if task.ChannelName() == channel { + if cp == nil || task.StartPosition().GetTimestamp() < cp.GetTimestamp() { + cp = task.StartPosition() } } return true diff --git a/internal/datanode/syncmgr/task.go b/internal/datanode/syncmgr/task.go index 4963fd06f3..e9b386075e 100644 --- a/internal/datanode/syncmgr/task.go +++ b/internal/datanode/syncmgr/task.go @@ -371,3 +371,19 @@ func (t *SyncTask) getInCodec() *storage.InsertCodec { return storage.NewInsertCodecWithSchema(meta) } + +func (t *SyncTask) SegmentID() int64 { + return t.segmentID +} + +func (t *SyncTask) Checkpoint() *msgpb.MsgPosition { + return t.checkpoint +} + +func (t *SyncTask) StartPosition() *msgpb.MsgPosition { + return t.startPosition +} + +func (t *SyncTask) ChannelName() string { + return t.channelName +} diff --git a/internal/datanode/syncmgr/taskv2.go b/internal/datanode/syncmgr/taskv2.go new file mode 100644 index 0000000000..55b8ec75d8 --- /dev/null +++ b/internal/datanode/syncmgr/taskv2.go @@ -0,0 +1,583 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package syncmgr + +import ( + "context" + "math" + "strconv" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + milvus_storage "github.com/milvus-io/milvus-storage/go/storage" + "github.com/milvus-io/milvus-storage/go/storage/options" + "github.com/milvus-io/milvus/internal/allocator" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/retry" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +type SyncTaskV2 struct { + *SyncTask + arrowSchema *arrow.Schema + reader array.RecordReader + statsBlob *storage.Blob + deleteReader array.RecordReader + storageVersion int64 + space *milvus_storage.Space + + failureCallback func(err error) +} + +func (t *SyncTaskV2) getLogger() *log.MLogger { + return log.Ctx(context.Background()).With( + zap.Int64("collectionID", t.collectionID), + zap.Int64("partitionID", t.partitionID), + zap.Int64("segmentID", t.segmentID), + zap.String("channel", t.channelName), + ) +} + +func (t *SyncTaskV2) handleError(err error) { + if t.failureCallback != nil { + t.failureCallback(err) + } +} + +func (t *SyncTaskV2) Run() error { + log := t.getLogger() + var err error + + infos := t.metacache.GetSegmentsBy(metacache.WithSegmentIDs(t.segmentID)) + if len(infos) == 0 { + log.Warn("failed to sync data, segment not found in metacache") + t.handleError(err) + return merr.WrapErrSegmentNotFound(t.segmentID) + } + + segment := infos[0] + if segment.CompactTo() > 0 { + log.Info("syncing segment compacted, update segment id", zap.Int64("compactTo", segment.CompactTo())) + // update sync task segment id + // it's ok to use compactTo segmentID here, since there shall be no insert for compacted segment + t.segmentID = segment.CompactTo() + } + + if err = t.serializeInsertData(); err != nil { + t.handleError(err) + return err + } + + if err = t.serializeStatsData(); err != nil { + t.handleError(err) + return err + } + + if err = t.serializeDeleteData(); err != nil { + t.handleError(err) + return err + } + + if err = t.writeSpace(); err != nil { + t.handleError(err) + return err + } + + if err = t.writeMeta(); err != nil { + t.handleError(err) + return err + } + + actions := []metacache.SegmentAction{metacache.FinishSyncing(t.batchSize)} + switch { + case t.isDrop: + actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Dropped)) + case t.isFlush: + actions = append(actions, metacache.UpdateState(commonpb.SegmentState_Flushed)) + } + + t.metacache.UpdateSegments(metacache.MergeSegmentAction(actions...), metacache.WithSegmentIDs(t.segmentID)) + + return nil +} + +func (t *SyncTaskV2) serializeInsertData() error { + if t.insertData == nil { + return nil + } + + b := array.NewRecordBuilder(memory.DefaultAllocator, t.arrowSchema) + defer b.Release() + + if err := buildRecord(b, t.insertData, t.schema.Fields); err != nil { + return err + } + + rec := b.NewRecord() + defer rec.Release() + + itr, err := array.NewRecordReader(t.arrowSchema, []arrow.Record{rec}) + if err != nil { + return err + } + itr.Retain() + t.reader = itr + return nil +} + +func (t *SyncTaskV2) serializeStatsData() error { + if t.insertData == nil { + return nil + } + + pkField := lo.FindOrElse(t.schema.GetFields(), nil, func(field *schemapb.FieldSchema) bool { return field.GetIsPrimaryKey() }) + if pkField == nil { + return merr.WrapErrServiceInternal("cannot find pk field") + } + fieldID := pkField.GetFieldID() + + stats, rowNum := t.convertInsertData2PkStats(fieldID, pkField.GetDataType()) + + // not flush and not insert data + if !t.isFlush && stats == nil { + return nil + } + if t.isFlush { + return t.serializeMergedPkStats(fieldID, pkField.GetDataType(), stats, rowNum) + } + + return t.serializeSinglePkStats(fieldID, stats, rowNum) +} + +func (t *SyncTaskV2) serializeMergedPkStats(fieldID int64, pkType schemapb.DataType, stats *storage.PrimaryKeyStats, rowNum int64) error { + segments := t.metacache.GetSegmentsBy(metacache.WithSegmentIDs(t.segmentID)) + var statsList []*storage.PrimaryKeyStats + var oldRowNum int64 + for _, segment := range segments { + oldRowNum += segment.NumOfRows() + statsList = append(statsList, lo.Map(segment.GetHistory(), func(pks *storage.PkStatistics, _ int) *storage.PrimaryKeyStats { + return &storage.PrimaryKeyStats{ + FieldID: fieldID, + MaxPk: pks.MaxPK, + MinPk: pks.MinPK, + BF: pks.PkFilter, + PkType: int64(pkType), + } + })...) + } + if stats != nil { + statsList = append(statsList, stats) + } + + blob, err := t.getInCodec().SerializePkStatsList(statsList, oldRowNum+rowNum) + if err != nil { + return err + } + blob.Key = strconv.Itoa(int(storage.CompoundStatsType)) + t.statsBlob = blob + return nil +} + +func (t *SyncTaskV2) serializeSinglePkStats(fieldID int64, stats *storage.PrimaryKeyStats, rowNum int64) error { + blob, err := t.getInCodec().SerializePkStats(stats, rowNum) + if err != nil { + return err + } + + logidx, err := t.allocator.AllocOne() + if err != nil { + return err + } + + blob.Key = strconv.Itoa(int(logidx)) + t.statsBlob = blob + return nil +} + +func (t *SyncTaskV2) serializeDeleteData() error { + if t.deleteData == nil { + return nil + } + + fields := make([]*schemapb.FieldSchema, 0) + pkField := lo.FindOrElse(t.schema.GetFields(), nil, func(field *schemapb.FieldSchema) bool { return field.GetIsPrimaryKey() }) + if pkField == nil { + return merr.WrapErrServiceInternal("cannot find pk field") + } + fields = append(fields, pkField) + tsField := &schemapb.FieldSchema{ + FieldID: common.TimeStampField, + Name: common.TimeStampFieldName, + DataType: schemapb.DataType_Int64, + } + fields = append(fields, tsField) + + schema, err := metacache.ConvertToArrowSchema(fields) + if err != nil { + return err + } + + b := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer b.Release() + + switch pkField.DataType { + case schemapb.DataType_Int64: + pb := b.Field(0).(*array.Int64Builder) + for _, pk := range t.deleteData.Pks { + pb.Append(pk.GetValue().(int64)) + } + case schemapb.DataType_VarChar: + pb := b.Field(0).(*array.StringBuilder) + for _, pk := range t.deleteData.Pks { + pb.Append(pk.GetValue().(string)) + } + default: + return merr.WrapErrParameterInvalidMsg("unexpected pk type %v", pkField.DataType) + } + + for _, ts := range t.deleteData.Tss { + b.Field(1).(*array.Int64Builder).Append(int64(ts)) + } + + rec := b.NewRecord() + defer rec.Release() + + reader, err := array.NewRecordReader(schema, []arrow.Record{rec}) + if err != nil { + return err + } + + t.deleteReader = reader + return nil +} + +func (t *SyncTaskV2) writeSpace() error { + defer func() { + if t.reader != nil { + t.reader.Release() + } + if t.deleteReader != nil { + t.deleteReader.Release() + } + }() + + // url := fmt.Sprintf("s3://%s:%s@%s/%d?endpoint_override=%s", + // params.Params.MinioCfg.AccessKeyID.GetValue(), + // params.Params.MinioCfg.SecretAccessKey.GetValue(), + // params.Params.MinioCfg.BucketName.GetValue(), + // t.segmentID, + // params.Params.MinioCfg.Address.GetValue()) + + // pkSchema, err := typeutil.GetPrimaryFieldSchema(t.schema) + // if err != nil { + // return err + // } + // vecSchema, err := typeutil.GetVectorFieldSchema(t.schema) + // if err != nil { + // return err + // } + // space, err := milvus_storage.Open( + // url, + // options.NewSpaceOptionBuilder(). + // SetSchema(schema.NewSchema( + // t.arrowSchema, + // &schema.SchemaOptions{ + // PrimaryColumn: pkSchema.Name, + // VectorColumn: vecSchema.Name, + // VersionColumn: common.TimeStampFieldName, + // }, + // )). + // Build(), + // ) + // if err != nil { + // return err + // } + txn := t.space.NewTransaction() + if t.reader != nil { + txn.Write(t.reader, &options.DefaultWriteOptions) + } + if t.deleteReader != nil { + txn.Delete(t.deleteReader) + } + if t.statsBlob != nil { + txn.WriteBlob(t.statsBlob.Value, t.statsBlob.Key, false) + } + + return txn.Commit() +} + +func (t *SyncTaskV2) writeMeta() error { + return t.metaWriter.UpdateSyncV2(t) +} + +func buildRecord(b *array.RecordBuilder, data *storage.InsertData, fields []*schemapb.FieldSchema) error { + if data == nil { + log.Info("no buffer data to flush") + return nil + } + for i, field := range fields { + fBuilder := b.Field(i) + switch field.DataType { + case schemapb.DataType_Bool: + fBuilder.(*array.BooleanBuilder).AppendValues(data.Data[field.FieldID].(*storage.BoolFieldData).Data, nil) + case schemapb.DataType_Int8: + fBuilder.(*array.Int8Builder).AppendValues(data.Data[field.FieldID].(*storage.Int8FieldData).Data, nil) + case schemapb.DataType_Int16: + fBuilder.(*array.Int16Builder).AppendValues(data.Data[field.FieldID].(*storage.Int16FieldData).Data, nil) + case schemapb.DataType_Int32: + fBuilder.(*array.Int32Builder).AppendValues(data.Data[field.FieldID].(*storage.Int32FieldData).Data, nil) + case schemapb.DataType_Int64: + fBuilder.(*array.Int64Builder).AppendValues(data.Data[field.FieldID].(*storage.Int64FieldData).Data, nil) + case schemapb.DataType_Float: + fBuilder.(*array.Float32Builder).AppendValues(data.Data[field.FieldID].(*storage.FloatFieldData).Data, nil) + case schemapb.DataType_Double: + fBuilder.(*array.Float64Builder).AppendValues(data.Data[field.FieldID].(*storage.DoubleFieldData).Data, nil) + case schemapb.DataType_VarChar, schemapb.DataType_String: + fBuilder.(*array.StringBuilder).AppendValues(data.Data[field.FieldID].(*storage.StringFieldData).Data, nil) + case schemapb.DataType_Array: + appendListValues(fBuilder.(*array.ListBuilder), data.Data[field.FieldID].(*storage.ArrayFieldData)) + case schemapb.DataType_JSON: + fBuilder.(*array.BinaryBuilder).AppendValues(data.Data[field.FieldID].(*storage.JSONFieldData).Data, nil) + case schemapb.DataType_BinaryVector: + vecData := data.Data[field.FieldID].(*storage.BinaryVectorFieldData) + for i := 0; i < len(vecData.Data); i += vecData.Dim / 8 { + fBuilder.(*array.FixedSizeBinaryBuilder).Append(vecData.Data[i : i+vecData.Dim/8]) + } + case schemapb.DataType_FloatVector: + vecData := data.Data[field.FieldID].(*storage.FloatVectorFieldData) + builder := fBuilder.(*array.FixedSizeBinaryBuilder) + dim := vecData.Dim + data := vecData.Data + byteLength := dim * 4 + length := len(data) / dim + + builder.Reserve(length) + bytesData := make([]byte, byteLength) + for i := 0; i < length; i++ { + vec := data[i*dim : (i+1)*dim] + for j := range vec { + bytes := math.Float32bits(vec[j]) + common.Endian.PutUint32(bytesData[j*4:], bytes) + } + builder.Append(bytesData) + } + case schemapb.DataType_Float16Vector: + vecData := data.Data[field.FieldID].(*storage.Float16VectorFieldData) + builder := fBuilder.(*array.FixedSizeBinaryBuilder) + dim := vecData.Dim + data := vecData.Data + byteLength := dim * 2 + length := len(data) / byteLength + + builder.Reserve(length) + for i := 0; i < length; i++ { + builder.Append(data[i*byteLength : (i+1)*byteLength]) + } + + default: + return merr.WrapErrParameterInvalidMsg("unknown type %v", field.DataType.String()) + } + } + + return nil +} + +func appendListValues(builder *array.ListBuilder, data *storage.ArrayFieldData) error { + vb := builder.ValueBuilder() + switch data.ElementType { + case schemapb.DataType_Bool: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.BooleanBuilder).AppendValues(data.GetBoolData().Data, nil) + } + case schemapb.DataType_Int8: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.Int8Builder).AppendValues(castIntArray[int8](data.GetIntData().Data), nil) + } + case schemapb.DataType_Int16: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.Int16Builder).AppendValues(castIntArray[int16](data.GetIntData().Data), nil) + } + case schemapb.DataType_Int32: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.Int32Builder).AppendValues(data.GetIntData().Data, nil) + } + case schemapb.DataType_Int64: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.Int64Builder).AppendValues(data.GetLongData().Data, nil) + } + case schemapb.DataType_Float: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.Float32Builder).AppendValues(data.GetFloatData().Data, nil) + } + case schemapb.DataType_Double: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.Float64Builder).AppendValues(data.GetDoubleData().Data, nil) + } + case schemapb.DataType_String, schemapb.DataType_VarChar: + for _, data := range data.Data { + builder.Append(true) + vb.(*array.StringBuilder).AppendValues(data.GetStringData().Data, nil) + } + + default: + return merr.WrapErrParameterInvalidMsg("unknown type %v", data.ElementType.String()) + } + return nil +} + +func castIntArray[T int8 | int16](nums []int32) []T { + ret := make([]T, 0, len(nums)) + for _, n := range nums { + ret = append(ret, T(n)) + } + return ret +} + +func NewSyncTaskV2() *SyncTaskV2 { + return &SyncTaskV2{ + SyncTask: NewSyncTask(), + } +} + +func (t *SyncTaskV2) WithChunkManager(cm storage.ChunkManager) *SyncTaskV2 { + t.chunkManager = cm + return t +} + +func (t *SyncTaskV2) WithAllocator(allocator allocator.Interface) *SyncTaskV2 { + t.allocator = allocator + return t +} + +func (t *SyncTaskV2) WithInsertData(insertData *storage.InsertData) *SyncTaskV2 { + t.insertData = insertData + return t +} + +func (t *SyncTaskV2) WithDeleteData(deleteData *storage.DeleteData) *SyncTaskV2 { + t.deleteData = deleteData + return t +} + +func (t *SyncTaskV2) WithStartPosition(start *msgpb.MsgPosition) *SyncTaskV2 { + t.startPosition = start + return t +} + +func (t *SyncTaskV2) WithCheckpoint(cp *msgpb.MsgPosition) *SyncTaskV2 { + t.checkpoint = cp + return t +} + +func (t *SyncTaskV2) WithCollectionID(collID int64) *SyncTaskV2 { + t.collectionID = collID + return t +} + +func (t *SyncTaskV2) WithPartitionID(partID int64) *SyncTaskV2 { + t.partitionID = partID + return t +} + +func (t *SyncTaskV2) WithSegmentID(segID int64) *SyncTaskV2 { + t.segmentID = segID + return t +} + +func (t *SyncTaskV2) WithChannelName(chanName string) *SyncTaskV2 { + t.channelName = chanName + return t +} + +func (t *SyncTaskV2) WithSchema(schema *schemapb.CollectionSchema) *SyncTaskV2 { + t.schema = schema + return t +} + +func (t *SyncTaskV2) WithTimeRange(from, to typeutil.Timestamp) *SyncTaskV2 { + t.tsFrom, t.tsTo = from, to + return t +} + +func (t *SyncTaskV2) WithFlush() *SyncTaskV2 { + t.isFlush = true + return t +} + +func (t *SyncTaskV2) WithDrop() *SyncTaskV2 { + t.isDrop = true + return t +} + +func (t *SyncTaskV2) WithMetaCache(metacache metacache.MetaCache) *SyncTaskV2 { + t.metacache = metacache + return t +} + +func (t *SyncTaskV2) WithMetaWriter(metaWriter MetaWriter) *SyncTaskV2 { + t.metaWriter = metaWriter + return t +} + +func (t *SyncTaskV2) WithWriteRetryOptions(opts ...retry.Option) *SyncTaskV2 { + t.writeRetryOpts = opts + return t +} + +func (t *SyncTaskV2) WithFailureCallback(callback func(error)) *SyncTaskV2 { + t.failureCallback = callback + return t +} + +func (t *SyncTaskV2) WithBatchSize(batchSize int64) *SyncTaskV2 { + t.batchSize = batchSize + return t +} + +func (t *SyncTaskV2) WithSpace(space *milvus_storage.Space) *SyncTaskV2 { + t.space = space + return t +} + +func (t *SyncTaskV2) WithArrowSchema(arrowSchema *arrow.Schema) *SyncTaskV2 { + t.arrowSchema = arrowSchema + return t +} + +func (t *SyncTaskV2) WithLevel(level datapb.SegmentLevel) *SyncTaskV2 { + t.level = level + return t +} diff --git a/internal/datanode/syncmgr/taskv2_test.go b/internal/datanode/syncmgr/taskv2_test.go new file mode 100644 index 0000000000..ac53a38cd7 --- /dev/null +++ b/internal/datanode/syncmgr/taskv2_test.go @@ -0,0 +1,443 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package syncmgr + +import ( + "fmt" + "math/rand" + "testing" + "time" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/samber/lo" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + milvus_storage "github.com/milvus-io/milvus-storage/go/storage" + "github.com/milvus-io/milvus-storage/go/storage/options" + "github.com/milvus-io/milvus-storage/go/storage/schema" + "github.com/milvus-io/milvus/internal/allocator" + "github.com/milvus-io/milvus/internal/datanode/broker" + "github.com/milvus-io/milvus/internal/datanode/metacache" + "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/tsoutil" +) + +type SyncTaskSuiteV2 struct { + suite.Suite + + collectionID int64 + partitionID int64 + segmentID int64 + channelName string + + metacache *metacache.MockMetaCache + allocator *allocator.MockGIDAllocator + schema *schemapb.CollectionSchema + arrowSchema *arrow.Schema + broker *broker.MockBroker + + space *milvus_storage.Space +} + +func (s *SyncTaskSuiteV2) SetupSuite() { + paramtable.Get().Init(paramtable.NewBaseTable()) + + s.collectionID = 100 + s.partitionID = 101 + s.segmentID = 1001 + s.channelName = "by-dev-rootcoord-dml_0_100v0" + + s.schema = &schemapb.CollectionSchema{ + Name: "sync_task_test_col", + Fields: []*schemapb.FieldSchema{ + {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64}, + {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64}, + { + FieldID: 100, + Name: "pk", + DataType: schemapb.DataType_Int64, + IsPrimaryKey: true, + }, + { + FieldID: 101, + Name: "vector", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + {Key: common.DimKey, Value: "128"}, + }, + }, + }, + } + + arrowSchema, err := metacache.ConvertToArrowSchema(s.schema.Fields) + s.NoError(err) + s.arrowSchema = arrowSchema +} + +func (s *SyncTaskSuiteV2) SetupTest() { + s.allocator = allocator.NewMockGIDAllocator() + s.allocator.AllocF = func(count uint32) (int64, int64, error) { + return time.Now().Unix(), int64(count), nil + } + s.allocator.AllocOneF = func() (allocator.UniqueID, error) { + return time.Now().Unix(), nil + } + + s.broker = broker.NewMockBroker(s.T()) + s.metacache = metacache.NewMockMetaCache(s.T()) + + tmpDir := s.T().TempDir() + space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder(). + SetSchema(schema.NewSchema(s.arrowSchema, &schema.SchemaOptions{ + PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName, + })).Build()) + s.Require().NoError(err) + s.space = space +} + +func (s *SyncTaskSuiteV2) getEmptyInsertBuffer() *storage.InsertData { + buf, err := storage.NewInsertData(s.schema) + s.Require().NoError(err) + + return buf +} + +func (s *SyncTaskSuiteV2) getInsertBuffer() *storage.InsertData { + buf := s.getEmptyInsertBuffer() + + // generate data + for i := 0; i < 10; i++ { + data := make(map[storage.FieldID]any) + data[common.RowIDField] = int64(i + 1) + data[common.TimeStampField] = int64(i + 1) + data[100] = int64(i + 1) + vector := lo.RepeatBy(128, func(_ int) float32 { + return rand.Float32() + }) + data[101] = vector + err := buf.Append(data) + s.Require().NoError(err) + } + return buf +} + +func (s *SyncTaskSuiteV2) getDeleteBuffer() *storage.DeleteData { + buf := &storage.DeleteData{} + for i := 0; i < 10; i++ { + pk := storage.NewInt64PrimaryKey(int64(i + 1)) + ts := tsoutil.ComposeTSByTime(time.Now(), 0) + buf.Append(pk, ts) + } + return buf +} + +func (s *SyncTaskSuiteV2) getDeleteBufferZeroTs() *storage.DeleteData { + buf := &storage.DeleteData{} + for i := 0; i < 10; i++ { + pk := storage.NewInt64PrimaryKey(int64(i + 1)) + buf.Append(pk, 0) + } + return buf +} + +func (s *SyncTaskSuiteV2) getSuiteSyncTask() *SyncTaskV2 { + log.Info("space", zap.Any("space", s.space)) + task := NewSyncTaskV2(). + WithArrowSchema(s.arrowSchema). + WithSpace(s.space). + WithCollectionID(s.collectionID). + WithPartitionID(s.partitionID). + WithSegmentID(s.segmentID). + WithChannelName(s.channelName). + WithSchema(s.schema). + WithAllocator(s.allocator). + WithMetaCache(s.metacache) + + return task +} + +func (s *SyncTaskSuiteV2) TestRunNormal() { + s.broker.EXPECT().SaveBinlogPaths(mock.Anything, mock.Anything).Return(nil) + bfs := metacache.NewBloomFilterSet() + fd, err := storage.NewFieldData(schemapb.DataType_Int64, &schemapb.FieldSchema{ + FieldID: 101, + Name: "ID", + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + }) + s.Require().NoError(err) + + ids := []int64{1, 2, 3, 4, 5, 6, 7} + for _, id := range ids { + err = fd.AppendRow(id) + s.Require().NoError(err) + } + + bfs.UpdatePKRange(fd) + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{}, bfs) + metacache.UpdateNumOfRows(1000)(seg) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() + + s.Run("without_insert_delete", func() { + task := s.getSuiteSyncTask() + task.WithMetaWriter(BrokerMetaWriter(s.broker)) + task.WithTimeRange(50, 100) + task.WithCheckpoint(&msgpb.MsgPosition{ + ChannelName: s.channelName, + MsgID: []byte{1, 2, 3, 4}, + Timestamp: 100, + }) + + err := task.Run() + s.NoError(err) + }) + + s.Run("with_insert_delete_cp", func() { + task := s.getSuiteSyncTask() + task.WithInsertData(s.getInsertBuffer()).WithDeleteData(s.getDeleteBuffer()) + task.WithTimeRange(50, 100) + task.WithMetaWriter(BrokerMetaWriter(s.broker)) + task.WithCheckpoint(&msgpb.MsgPosition{ + ChannelName: s.channelName, + MsgID: []byte{1, 2, 3, 4}, + Timestamp: 100, + }) + + err := task.Run() + s.NoError(err) + }) + + s.Run("with_insert_delete_flush", func() { + task := s.getSuiteSyncTask() + task.WithInsertData(s.getInsertBuffer()).WithDeleteData(s.getDeleteBuffer()) + task.WithFlush() + task.WithDrop() + task.WithMetaWriter(BrokerMetaWriter(s.broker)) + task.WithCheckpoint(&msgpb.MsgPosition{ + ChannelName: s.channelName, + MsgID: []byte{1, 2, 3, 4}, + Timestamp: 100, + }) + + err := task.Run() + s.NoError(err) + }) +} + +func (s *SyncTaskSuiteV2) TestBuildRecord() { + fieldSchemas := []*schemapb.FieldSchema{ + {FieldID: 1, Name: "field0", DataType: schemapb.DataType_Bool}, + {FieldID: 2, Name: "field1", DataType: schemapb.DataType_Int8}, + {FieldID: 3, Name: "field2", DataType: schemapb.DataType_Int16}, + {FieldID: 4, Name: "field3", DataType: schemapb.DataType_Int32}, + {FieldID: 5, Name: "field4", DataType: schemapb.DataType_Int64}, + {FieldID: 6, Name: "field5", DataType: schemapb.DataType_Float}, + {FieldID: 7, Name: "field6", DataType: schemapb.DataType_Double}, + {FieldID: 8, Name: "field7", DataType: schemapb.DataType_String}, + {FieldID: 9, Name: "field8", DataType: schemapb.DataType_VarChar}, + {FieldID: 10, Name: "field9", DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "8"}}}, + {FieldID: 11, Name: "field10", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, + {FieldID: 12, Name: "field11", DataType: schemapb.DataType_JSON}, + {FieldID: 13, Name: "field12", DataType: schemapb.DataType_Float16Vector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "4"}}}, + {FieldID: 14, Name: "field13", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int32}, + {FieldID: 15, Name: "field14", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Bool}, + {FieldID: 16, Name: "field15", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int8}, + {FieldID: 17, Name: "field16", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int16}, + {FieldID: 18, Name: "field17", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int64}, + {FieldID: 19, Name: "field18", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Float}, + {FieldID: 20, Name: "field19", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Double}, + {FieldID: 21, Name: "field20", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_String}, + } + + schema, err := metacache.ConvertToArrowSchema(fieldSchemas) + s.NoError(err) + + b := array.NewRecordBuilder(memory.NewGoAllocator(), schema) + defer b.Release() + + data := &storage.InsertData{ + Data: map[int64]storage.FieldData{ + 1: &storage.BoolFieldData{Data: []bool{true, false}}, + 2: &storage.Int8FieldData{Data: []int8{3, 4}}, + 3: &storage.Int16FieldData{Data: []int16{3, 4}}, + 4: &storage.Int32FieldData{Data: []int32{3, 4}}, + 5: &storage.Int64FieldData{Data: []int64{3, 4}}, + 6: &storage.FloatFieldData{Data: []float32{3, 4}}, + 7: &storage.DoubleFieldData{Data: []float64{3, 4}}, + 8: &storage.StringFieldData{Data: []string{"3", "4"}}, + 9: &storage.StringFieldData{Data: []string{"3", "4"}}, + 10: &storage.BinaryVectorFieldData{Data: []byte{0, 255}, Dim: 8}, + 11: &storage.FloatVectorFieldData{ + Data: []float32{4, 5, 6, 7, 4, 5, 6, 7}, + Dim: 4, + }, + 12: &storage.JSONFieldData{ + Data: [][]byte{ + []byte(`{"batch":2}`), + []byte(`{"key":"world"}`), + }, + }, + 13: &storage.Float16VectorFieldData{ + Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255}, + Dim: 4, + }, + 14: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Int32, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}}, + }, + }, + }, + }, + 15: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Bool, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_BoolData{ + BoolData: &schemapb.BoolArray{Data: []bool{false, false, false}}, + }, + }, + { + Data: &schemapb.ScalarField_BoolData{ + BoolData: &schemapb.BoolArray{Data: []bool{false, false, false}}, + }, + }, + }, + }, + 16: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Int8, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}}, + }, + }, + }, + }, + 17: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Int16, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{Data: []int32{6, 5, 4}}, + }, + }, + }, + }, + 18: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Int64, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_LongData{ + LongData: &schemapb.LongArray{Data: []int64{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_LongData{ + LongData: &schemapb.LongArray{Data: []int64{3, 2, 1}}, + }, + }, + }, + }, + 19: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Float, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_FloatData{ + FloatData: &schemapb.FloatArray{Data: []float32{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_FloatData{ + FloatData: &schemapb.FloatArray{Data: []float32{3, 2, 1}}, + }, + }, + }, + }, + 20: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_Double, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_DoubleData{ + DoubleData: &schemapb.DoubleArray{Data: []float64{3, 2, 1}}, + }, + }, + { + Data: &schemapb.ScalarField_DoubleData{ + DoubleData: &schemapb.DoubleArray{Data: []float64{3, 2, 1}}, + }, + }, + }, + }, + 21: &storage.ArrayFieldData{ + ElementType: schemapb.DataType_String, + Data: []*schemapb.ScalarField{ + { + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{Data: []string{"a", "b", "c"}}, + }, + }, + { + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{Data: []string{"a", "b", "c"}}, + }, + }, + }, + }, + }, + } + + err = buildRecord(b, data, fieldSchemas) + s.NoError(err) + s.EqualValues(2, b.NewRecord().NumRows()) +} + +func (s *SyncTaskSuiteV2) TestAppendLists() { +} + +func TestSyncTaskV2(t *testing.T) { + suite.Run(t, new(SyncTaskSuiteV2)) +} diff --git a/internal/datanode/writebuffer/bf_write_buffer.go b/internal/datanode/writebuffer/bf_write_buffer.go index a44bb86eb2..2d56977dd2 100644 --- a/internal/datanode/writebuffer/bf_write_buffer.go +++ b/internal/datanode/writebuffer/bf_write_buffer.go @@ -17,9 +17,9 @@ type bfWriteBuffer struct { metacache metacache.MetaCache } -func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { +func NewBFWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { return &bfWriteBuffer{ - writeBufferBase: newWriteBufferBase(channel, metacache, syncMgr, option), + writeBufferBase: newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option), syncMgr: syncMgr, }, nil } diff --git a/internal/datanode/writebuffer/bf_write_buffer_test.go b/internal/datanode/writebuffer/bf_write_buffer_test.go index 935466e831..2c59b97e52 100644 --- a/internal/datanode/writebuffer/bf_write_buffer_test.go +++ b/internal/datanode/writebuffer/bf_write_buffer_test.go @@ -1,6 +1,7 @@ package writebuffer import ( + "fmt" "math/rand" "testing" "time" @@ -12,10 +13,14 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + milvus_storage "github.com/milvus-io/milvus-storage/go/storage" + "github.com/milvus-io/milvus-storage/go/storage/options" + "github.com/milvus-io/milvus-storage/go/storage/schema" "github.com/milvus-io/milvus/internal/datanode/broker" "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/mq/msgstream" @@ -25,12 +30,13 @@ import ( type BFWriteBufferSuite struct { suite.Suite - collID int64 - channelName string - collSchema *schemapb.CollectionSchema - syncMgr *syncmgr.MockSyncManager - metacache *metacache.MockMetaCache - broker *broker.MockBroker + collID int64 + channelName string + collSchema *schemapb.CollectionSchema + syncMgr *syncmgr.MockSyncManager + metacache *metacache.MockMetaCache + broker *broker.MockBroker + storageV2Cache *metacache.StorageV2Cache } func (s *BFWriteBufferSuite) SetupSuite() { @@ -141,10 +147,13 @@ func (s *BFWriteBufferSuite) SetupTest() { s.metacache.EXPECT().Schema().Return(s.collSchema).Maybe() s.metacache.EXPECT().Collection().Return(s.collID).Maybe() s.broker = broker.NewMockBroker(s.T()) + var err error + s.storageV2Cache, err = metacache.NewStorageV2Cache(s.collSchema) + s.Require().NoError(err) } func (s *BFWriteBufferSuite) TestBufferData() { - wb, err := NewBFWriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{}) + wb, err := NewBFWriteBuffer(s.channelName, s.metacache, nil, s.syncMgr, &writeBufferOption{}) s.NoError(err) seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) @@ -164,7 +173,77 @@ func (s *BFWriteBufferSuite) TestAutoSync() { paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1") s.Run("normal_auto_sync", func() { - wb, err := NewBFWriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ + wb, err := NewBFWriteBuffer(s.channelName, s.metacache, nil, s.syncMgr, &writeBufferOption{ + syncPolicies: []SyncPolicy{ + SyncFullBuffer, + GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)), + GetFlushingSegmentsPolicy(s.metacache), + }, + }) + s.NoError(err) + + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) + s.metacache.EXPECT().GetSegmentByID(int64(1002)).Return(seg, true) + s.metacache.EXPECT().GetSegmentIDsBy(mock.Anything).Return([]int64{1002}) + s.metacache.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() + s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() + s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything).Return() + s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).Return(nil) + + pks, msg := s.composeInsertMsg(1000, 10, 128) + delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) + + err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) + s.NoError(err) + }) +} + +func (s *BFWriteBufferSuite) TestBufferDataWithStorageV2() { + params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true") + params.Params.CommonCfg.StorageScheme.SwapTempValue("file") + tmpDir := s.T().TempDir() + arrowSchema, err := metacache.ConvertToArrowSchema(s.collSchema.Fields) + s.Require().NoError(err) + space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder(). + SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{ + PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName, + })).Build()) + s.Require().NoError(err) + s.storageV2Cache.SetSpace(1000, space) + wb, err := NewBFWriteBuffer(s.channelName, s.metacache, s.storageV2Cache, s.syncMgr, &writeBufferOption{}) + s.NoError(err) + + seg := metacache.NewSegmentInfo(&datapb.SegmentInfo{ID: 1000}, metacache.NewBloomFilterSet()) + s.metacache.EXPECT().GetSegmentsBy(mock.Anything, mock.Anything).Return([]*metacache.SegmentInfo{seg}) + s.metacache.EXPECT().GetSegmentByID(int64(1000)).Return(nil, false) + s.metacache.EXPECT().AddSegment(mock.Anything, mock.Anything, mock.Anything).Return() + s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything).Return() + + pks, msg := s.composeInsertMsg(1000, 10, 128) + delMsg := s.composeDeleteMsg(lo.Map(pks, func(id int64, _ int) storage.PrimaryKey { return storage.NewInt64PrimaryKey(id) })) + + err = wb.BufferData([]*msgstream.InsertMsg{msg}, []*msgstream.DeleteMsg{delMsg}, &msgpb.MsgPosition{Timestamp: 100}, &msgpb.MsgPosition{Timestamp: 200}) + s.NoError(err) +} + +func (s *BFWriteBufferSuite) TestAutoSyncWithStorageV2() { + params.Params.CommonCfg.EnableStorageV2.SwapTempValue("true") + paramtable.Get().Save(paramtable.Get().DataNodeCfg.FlushInsertBufferSize.Key, "1") + tmpDir := s.T().TempDir() + arrowSchema, err := metacache.ConvertToArrowSchema(s.collSchema.Fields) + s.Require().NoError(err) + + space, err := milvus_storage.Open(fmt.Sprintf("file:///%s", tmpDir), options.NewSpaceOptionBuilder(). + SetSchema(schema.NewSchema(arrowSchema, &schema.SchemaOptions{ + PrimaryColumn: "pk", VectorColumn: "vector", VersionColumn: common.TimeStampFieldName, + })).Build()) + s.Require().NoError(err) + s.storageV2Cache.SetSpace(1002, space) + + s.Run("normal_auto_sync", func() { + wb, err := NewBFWriteBuffer(s.channelName, s.metacache, s.storageV2Cache, s.syncMgr, &writeBufferOption{ syncPolicies: []SyncPolicy{ SyncFullBuffer, GetSyncStaleBufferPolicy(paramtable.Get().DataNodeCfg.SyncPeriod.GetAsDuration(time.Second)), diff --git a/internal/datanode/writebuffer/l0_write_buffer.go b/internal/datanode/writebuffer/l0_write_buffer.go index 14aeba27b6..36f31bf19c 100644 --- a/internal/datanode/writebuffer/l0_write_buffer.go +++ b/internal/datanode/writebuffer/l0_write_buffer.go @@ -28,14 +28,14 @@ type l0WriteBuffer struct { idAllocator allocator.Interface } -func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { +func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) { if option.idAllocator == nil { return nil, merr.WrapErrServiceInternal("id allocator is nil when creating l0 write buffer") } return &l0WriteBuffer{ l0Segments: make(map[int64]int64), l0partition: make(map[int64]int64), - writeBufferBase: newWriteBufferBase(channel, metacache, syncMgr, option), + writeBufferBase: newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option), syncMgr: syncMgr, idAllocator: option.idAllocator, }, nil diff --git a/internal/datanode/writebuffer/l0_write_buffer_test.go b/internal/datanode/writebuffer/l0_write_buffer_test.go index 3007e0325b..ac746556f4 100644 --- a/internal/datanode/writebuffer/l0_write_buffer_test.go +++ b/internal/datanode/writebuffer/l0_write_buffer_test.go @@ -145,7 +145,7 @@ func (s *L0WriteBufferSuite) SetupTest() { } func (s *L0WriteBufferSuite) TestBufferData() { - wb, err := NewL0WriteBuffer(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ + wb, err := NewL0WriteBuffer(s.channelName, s.metacache, nil, s.syncMgr, &writeBufferOption{ idAllocator: s.allocator, }) s.NoError(err) diff --git a/internal/datanode/writebuffer/manager.go b/internal/datanode/writebuffer/manager.go index b7035ec4cc..cbc3f8ada2 100644 --- a/internal/datanode/writebuffer/manager.go +++ b/internal/datanode/writebuffer/manager.go @@ -17,7 +17,7 @@ import ( // BufferManager is the interface for WriteBuffer management. type BufferManager interface { // Register adds a WriteBuffer with provided schema & options. - Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error + Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error // FlushSegments notifies writeBuffer corresponding to provided channel to flush segments. FlushSegments(ctx context.Context, channel string, segmentIDs []int64) error // FlushChannel @@ -49,7 +49,7 @@ type bufferManager struct { } // Register a new WriteBuffer for channel. -func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, opts ...WriteBufferOption) error { +func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error { m.mut.Lock() defer m.mut.Unlock() @@ -57,7 +57,7 @@ func (m *bufferManager) Register(channel string, metacache metacache.MetaCache, if ok { return merr.WrapErrChannelReduplicate(channel) } - buf, err := NewWriteBuffer(channel, metacache, m.syncMgr, opts...) + buf, err := NewWriteBuffer(channel, metacache, storageV2Cache, m.syncMgr, opts...) if err != nil { return err } diff --git a/internal/datanode/writebuffer/manager_test.go b/internal/datanode/writebuffer/manager_test.go index 48d01563c1..144878a660 100644 --- a/internal/datanode/writebuffer/manager_test.go +++ b/internal/datanode/writebuffer/manager_test.go @@ -69,10 +69,10 @@ func (s *ManagerSuite) SetupTest() { func (s *ManagerSuite) TestRegister() { manager := s.manager - err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator)) + err := manager.Register(s.channelName, s.metacache, nil, WithIDAllocator(s.allocator)) s.NoError(err) - err = manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator)) + err = manager.Register(s.channelName, s.metacache, nil, WithIDAllocator(s.allocator)) s.Error(err) s.ErrorIs(err, merr.ErrChannelReduplicate) } @@ -176,7 +176,7 @@ func (s *ManagerSuite) TestRemoveChannel() { }) s.Run("remove_channel", func() { - err := manager.Register(s.channelName, s.metacache, WithIDAllocator(s.allocator)) + err := manager.Register(s.channelName, s.metacache, nil, WithIDAllocator(s.allocator)) s.Require().NoError(err) s.NotPanics(func() { diff --git a/internal/datanode/writebuffer/mock_mananger.go b/internal/datanode/writebuffer/mock_mananger.go index cdd6e22f5f..ac7a501f98 100644 --- a/internal/datanode/writebuffer/mock_mananger.go +++ b/internal/datanode/writebuffer/mock_mananger.go @@ -288,20 +288,20 @@ func (_c *MockBufferManager_NotifyCheckpointUpdated_Call) RunAndReturn(run func( return _c } -// Register provides a mock function with given fields: channel, _a1, opts -func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption) error { +// Register provides a mock function with given fields: channel, _a1, storageV2Cache, opts +func (_m *MockBufferManager) Register(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption) error { _va := make([]interface{}, len(opts)) for _i := range opts { _va[_i] = opts[_i] } var _ca []interface{} - _ca = append(_ca, channel, _a1) + _ca = append(_ca, channel, _a1, storageV2Cache) _ca = append(_ca, _va...) ret := _m.Called(_ca...) var r0 error - if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, ...WriteBufferOption) error); ok { - r0 = rf(channel, _a1, opts...) + if rf, ok := ret.Get(0).(func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error); ok { + r0 = rf(channel, _a1, storageV2Cache, opts...) } else { r0 = ret.Error(0) } @@ -317,21 +317,22 @@ type MockBufferManager_Register_Call struct { // Register is a helper method to define mock.On call // - channel string // - _a1 metacache.MetaCache +// - storageV2Cache *metacache.StorageV2Cache // - opts ...WriteBufferOption -func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, opts ...interface{}) *MockBufferManager_Register_Call { +func (_e *MockBufferManager_Expecter) Register(channel interface{}, _a1 interface{}, storageV2Cache interface{}, opts ...interface{}) *MockBufferManager_Register_Call { return &MockBufferManager_Register_Call{Call: _e.mock.On("Register", - append([]interface{}{channel, _a1}, opts...)...)} + append([]interface{}{channel, _a1, storageV2Cache}, opts...)...)} } -func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call { +func (_c *MockBufferManager_Register_Call) Run(run func(channel string, _a1 metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, opts ...WriteBufferOption)) *MockBufferManager_Register_Call { _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]WriteBufferOption, len(args)-2) - for i, a := range args[2:] { + variadicArgs := make([]WriteBufferOption, len(args)-3) + for i, a := range args[3:] { if a != nil { variadicArgs[i] = a.(WriteBufferOption) } } - run(args[0].(string), args[1].(metacache.MetaCache), variadicArgs...) + run(args[0].(string), args[1].(metacache.MetaCache), args[2].(*metacache.StorageV2Cache), variadicArgs...) }) return _c } @@ -341,7 +342,7 @@ func (_c *MockBufferManager_Register_Call) Return(_a0 error) *MockBufferManager_ return _c } -func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, ...WriteBufferOption) error) *MockBufferManager_Register_Call { +func (_c *MockBufferManager_Register_Call) RunAndReturn(run func(string, metacache.MetaCache, *metacache.StorageV2Cache, ...WriteBufferOption) error) *MockBufferManager_Register_Call { _c.Call.Return(run) return _c } diff --git a/internal/datanode/writebuffer/write_buffer.go b/internal/datanode/writebuffer/write_buffer.go index 82d33c4616..9c3976f144 100644 --- a/internal/datanode/writebuffer/write_buffer.go +++ b/internal/datanode/writebuffer/write_buffer.go @@ -2,8 +2,10 @@ package writebuffer import ( "context" + "fmt" "sync" + "github.com/apache/arrow/go/v12/arrow" "github.com/samber/lo" "go.uber.org/atomic" "go.uber.org/zap" @@ -11,11 +13,16 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + milvus_storage "github.com/milvus-io/milvus-storage/go/storage" + "github.com/milvus-io/milvus-storage/go/storage/options" + "github.com/milvus-io/milvus-storage/go/storage/schema" "github.com/milvus-io/milvus/internal/datanode/broker" "github.com/milvus-io/milvus/internal/datanode/metacache" "github.com/milvus-io/milvus/internal/datanode/syncmgr" "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/internal/querycoordv2/params" "github.com/milvus-io/milvus/internal/storage" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/conc" @@ -48,7 +55,7 @@ type WriteBuffer interface { Close(drop bool) } -func NewWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) { +func NewWriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, opts ...WriteBufferOption) (WriteBuffer, error) { option := defaultWBOption() option.syncPolicies = append(option.syncPolicies, GetFlushingSegmentsPolicy(metacache)) for _, opt := range opts { @@ -57,9 +64,9 @@ func NewWriteBuffer(channel string, metacache metacache.MetaCache, syncMgr syncm switch option.deletePolicy { case DeletePolicyBFPkOracle: - return NewBFWriteBuffer(channel, metacache, syncMgr, option) + return NewBFWriteBuffer(channel, metacache, nil, syncMgr, option) case DeletePolicyL0Delta: - return NewL0WriteBuffer(channel, metacache, syncMgr, option) + return NewL0WriteBuffer(channel, metacache, nil, syncMgr, option) default: return nil, merr.WrapErrParameterInvalid("valid delete policy config", option.deletePolicy) } @@ -82,9 +89,11 @@ type writeBufferBase struct { syncPolicies []SyncPolicy checkpoint *msgpb.MsgPosition flushTimestamp *atomic.Uint64 + + storagev2Cache *metacache.StorageV2Cache } -func newWriteBufferBase(channel string, metacache metacache.MetaCache, syncMgr syncmgr.SyncManager, option *writeBufferOption) *writeBufferBase { +func newWriteBufferBase(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) *writeBufferBase { flushTs := atomic.NewUint64(nonFlushTS) flushTsPolicy := GetFlushTsPolicy(flushTs, metacache) option.syncPolicies = append(option.syncPolicies, flushTsPolicy) @@ -99,6 +108,7 @@ func newWriteBufferBase(channel string, metacache metacache.MetaCache, syncMgr s metaCache: metacache, syncPolicies: option.syncPolicies, flushTimestamp: flushTs, + storagev2Cache: storageV2Cache, } } @@ -263,7 +273,42 @@ func (wb *writeBufferBase) bufferDelete(segmentID int64, pks []storage.PrimaryKe return nil } -func (wb *writeBufferBase) getSyncTask(ctx context.Context, segmentID int64) *syncmgr.SyncTask { +func SpaceCreatorFunc(segmentID int64, collSchema *schemapb.CollectionSchema, arrowSchema *arrow.Schema) func() (*milvus_storage.Space, error) { + return func() (*milvus_storage.Space, error) { + url := fmt.Sprintf("%s://%s:%s@%s/%d?endpoint_override=%s", + params.Params.CommonCfg.StorageScheme.GetValue(), + params.Params.MinioCfg.AccessKeyID.GetValue(), + params.Params.MinioCfg.SecretAccessKey.GetValue(), + params.Params.MinioCfg.BucketName.GetValue(), + segmentID, + params.Params.MinioCfg.Address.GetValue()) + + pkSchema, err := typeutil.GetPrimaryFieldSchema(collSchema) + if err != nil { + return nil, err + } + vecSchema, err := typeutil.GetVectorFieldSchema(collSchema) + if err != nil { + return nil, err + } + space, err := milvus_storage.Open( + url, + options.NewSpaceOptionBuilder(). + SetSchema(schema.NewSchema( + arrowSchema, + &schema.SchemaOptions{ + PrimaryColumn: pkSchema.Name, + VectorColumn: vecSchema.Name, + VersionColumn: common.TimeStampFieldName, + }, + )). + Build(), + ) + return space, err + } +} + +func (wb *writeBufferBase) getSyncTask(ctx context.Context, segmentID int64) syncmgr.Task { segmentInfo, ok := wb.metaCache.GetSegmentByID(segmentID) // wb.metaCache.GetSegmentsBy(metacache.WithSegmentIDs(segmentID)) if !ok { log.Ctx(ctx).Warn("segment info not found in meta cache", zap.Int64("segmentID", segmentID)) @@ -280,26 +325,62 @@ func (wb *writeBufferBase) getSyncTask(ctx context.Context, segmentID int64) *sy } wb.metaCache.UpdateSegments(metacache.MergeSegmentAction(actions...), metacache.WithSegmentIDs(segmentID)) - syncTask := syncmgr.NewSyncTask(). - WithInsertData(insert). - WithDeleteData(delta). - WithCollectionID(wb.collectionID). - WithPartitionID(segmentInfo.PartitionID()). - WithChannelName(wb.channelName). - WithSegmentID(segmentID). - WithStartPosition(startPos). - WithLevel(segmentInfo.Level()). - WithCheckpoint(wb.checkpoint). - WithSchema(wb.collSchema). - WithBatchSize(batchSize). - WithMetaCache(wb.metaCache). - WithMetaWriter(wb.metaWriter). - WithFailureCallback(func(err error) { - // TODO could change to unsub channel in the future - panic(err) - }) - if segmentInfo.State() == commonpb.SegmentState_Flushing { - syncTask.WithFlush() + var syncTask syncmgr.Task + if params.Params.CommonCfg.EnableStorageV2.GetAsBool() { + arrowSchema := wb.storagev2Cache.ArrowSchema() + space, err := wb.storagev2Cache.GetOrCreateSpace(segmentID, SpaceCreatorFunc(segmentID, wb.collSchema, arrowSchema)) + if err != nil { + log.Warn("failed to get or create space", zap.Error(err)) + return nil + } + + task := syncmgr.NewSyncTaskV2(). + WithInsertData(insert). + WithDeleteData(delta). + WithCollectionID(wb.collectionID). + WithPartitionID(segmentInfo.PartitionID()). + WithChannelName(wb.channelName). + WithSegmentID(segmentID). + WithStartPosition(startPos). + WithLevel(segmentInfo.Level()). + WithCheckpoint(wb.checkpoint). + WithSchema(wb.collSchema). + WithBatchSize(batchSize). + WithMetaCache(wb.metaCache). + WithMetaWriter(wb.metaWriter). + WithArrowSchema(arrowSchema). + WithSpace(space). + WithFailureCallback(func(err error) { + // TODO could change to unsub channel in the future + panic(err) + }) + if segmentInfo.State() == commonpb.SegmentState_Flushing { + task.WithFlush() + } + syncTask = task + } else { + task := syncmgr.NewSyncTask(). + WithInsertData(insert). + WithDeleteData(delta). + WithCollectionID(wb.collectionID). + WithPartitionID(segmentInfo.PartitionID()). + WithChannelName(wb.channelName). + WithSegmentID(segmentID). + WithStartPosition(startPos). + WithLevel(segmentInfo.Level()). + WithCheckpoint(wb.checkpoint). + WithSchema(wb.collSchema). + WithBatchSize(batchSize). + WithMetaCache(wb.metaCache). + WithMetaWriter(wb.metaWriter). + WithFailureCallback(func(err error) { + // TODO could change to unsub channel in the future + panic(err) + }) + if segmentInfo.State() == commonpb.SegmentState_Flushing { + task.WithFlush() + } + syncTask = task } return syncTask @@ -319,7 +400,13 @@ func (wb *writeBufferBase) Close(drop bool) { if syncTask == nil { continue } - syncTask.WithDrop() + switch t := syncTask.(type) { + case *syncmgr.SyncTask: + t.WithDrop() + case *syncmgr.SyncTaskV2: + t.WithDrop() + } + f := wb.syncMgr.SyncData(context.Background(), syncTask) futures = append(futures, f) } diff --git a/internal/datanode/writebuffer/write_buffer_test.go b/internal/datanode/writebuffer/write_buffer_test.go index ec54cc588f..e43f57c7e4 100644 --- a/internal/datanode/writebuffer/write_buffer_test.go +++ b/internal/datanode/writebuffer/write_buffer_test.go @@ -47,7 +47,7 @@ func (s *WriteBufferSuite) SetupTest() { s.metacache = metacache.NewMockMetaCache(s.T()) s.metacache.EXPECT().Schema().Return(s.collSchema).Maybe() s.metacache.EXPECT().Collection().Return(s.collID).Maybe() - s.wb = newWriteBufferBase(s.channelName, s.metacache, s.syncMgr, &writeBufferOption{ + s.wb = newWriteBufferBase(s.channelName, s.metacache, nil, s.syncMgr, &writeBufferOption{ pkStatsFactory: func(vchannel *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }, @@ -55,18 +55,18 @@ func (s *WriteBufferSuite) SetupTest() { } func (s *WriteBufferSuite) TestWriteBufferType() { - wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) + wb, err := NewWriteBuffer(s.channelName, s.metacache, nil, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) s.NoError(err) _, ok := wb.(*bfWriteBuffer) s.True(ok) - wb, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator())) + wb, err = NewWriteBuffer(s.channelName, s.metacache, nil, s.syncMgr, WithDeletePolicy(DeletePolicyL0Delta), WithIDAllocator(allocator.NewMockGIDAllocator())) s.NoError(err) _, ok = wb.(*l0WriteBuffer) s.True(ok) - _, err = NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy("")) + _, err = NewWriteBuffer(s.channelName, s.metacache, nil, s.syncMgr, WithDeletePolicy("")) s.Error(err) } @@ -85,7 +85,7 @@ func (s *WriteBufferSuite) TestFlushSegments() { s.metacache.EXPECT().UpdateSegments(mock.Anything, mock.Anything, mock.Anything) - wb, err := NewWriteBuffer(s.channelName, s.metacache, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) + wb, err := NewWriteBuffer(s.channelName, s.metacache, nil, s.syncMgr, WithDeletePolicy(DeletePolicyBFPkOracle)) s.NoError(err) err = wb.FlushSegments(context.Background(), []int64{segmentID}) diff --git a/internal/proto/data_coord.proto b/internal/proto/data_coord.proto index 146b272b61..5a9eec1d75 100644 --- a/internal/proto/data_coord.proto +++ b/internal/proto/data_coord.proto @@ -310,6 +310,7 @@ message SegmentInfo { // For legacy level, it represent old segment before segment level introduced // so segments with Legacy level shall be treated as L1 segment SegmentLevel level = 20; + int64 storage_version = 21; } message SegmentStartPosition { @@ -332,6 +333,7 @@ message SaveBinlogPathsRequest { string channel = 12; // report channel name for verification SegmentLevel seg_level =13; int64 partitionID =14; // report partitionID for create L0 segment + int64 storageVersion = 15; } message CheckPoint { diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 1615dd1e32..c118ba41bc 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -218,8 +218,10 @@ type commonConfig struct { LockSlowLogInfoThreshold ParamItem `refreshable:"true"` LockSlowLogWarnThreshold ParamItem `refreshable:"true"` - TTMsgEnabled ParamItem `refreshable:"true"` - TraceLogMode ParamItem `refreshable:"true"` + StorageScheme ParamItem `refreshable:"false"` + EnableStorageV2 ParamItem `refreshable:"false"` + TTMsgEnabled ParamItem `refreshable:"true"` + TraceLogMode ParamItem `refreshable:"true"` } func (p *commonConfig) init(base *BaseTable) { @@ -627,6 +629,20 @@ like the old password verification when updating the credential`, } p.LockSlowLogWarnThreshold.Init(base.mgr) + p.EnableStorageV2 = ParamItem{ + Key: "common.storage.enablev2", + Version: "2.3.1", + DefaultValue: "false", + } + p.EnableStorageV2.Init(base.mgr) + + p.StorageScheme = ParamItem{ + Key: "common.storage.scheme", + Version: "2.3.4", + DefaultValue: "s3", + } + p.StorageScheme.Init(base.mgr) + p.TTMsgEnabled = ParamItem{ Key: "common.ttMsgEnabled", Version: "2.3.2",