mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
feat: compaction to support add field (#40415)
See: #39718 --------- Signed-off-by: Ted Xu <ted.xu@zilliz.com>
This commit is contained in:
parent
f4f81b7d0c
commit
be86d31ea3
@ -1,18 +1,5 @@
|
|||||||
run:
|
run:
|
||||||
go: "1.21"
|
go: "1.22"
|
||||||
skip-dirs:
|
|
||||||
- build
|
|
||||||
- configs
|
|
||||||
- deployments
|
|
||||||
- docs
|
|
||||||
- scripts
|
|
||||||
- internal/core
|
|
||||||
- cmake_build
|
|
||||||
- mmap
|
|
||||||
- data
|
|
||||||
- ci
|
|
||||||
skip-files:
|
|
||||||
- partial_search_test.go
|
|
||||||
build-tags:
|
build-tags:
|
||||||
- dynamic
|
- dynamic
|
||||||
- test
|
- test
|
||||||
@ -51,7 +38,6 @@ linters-settings:
|
|||||||
enable: # add extra linters
|
enable: # add extra linters
|
||||||
- nilness
|
- nilness
|
||||||
gofumpt:
|
gofumpt:
|
||||||
lang-version: "1.18"
|
|
||||||
module-path: github.com/milvus-io
|
module-path: github.com/milvus-io
|
||||||
goimports:
|
goimports:
|
||||||
local-prefixes: github.com/milvus-io
|
local-prefixes: github.com/milvus-io
|
||||||
@ -142,6 +128,19 @@ linters-settings:
|
|||||||
#- 'fmt\.Print.*' WIP
|
#- 'fmt\.Print.*' WIP
|
||||||
|
|
||||||
issues:
|
issues:
|
||||||
|
exclude-dirs:
|
||||||
|
- build
|
||||||
|
- configs
|
||||||
|
- deployments
|
||||||
|
- docs
|
||||||
|
- scripts
|
||||||
|
- internal/core
|
||||||
|
- cmake_build
|
||||||
|
- mmap
|
||||||
|
- data
|
||||||
|
- ci
|
||||||
|
exclude-files:
|
||||||
|
- partial_search_test.go
|
||||||
exclude-use-default: false
|
exclude-use-default: false
|
||||||
exclude-rules:
|
exclude-rules:
|
||||||
- path: .+_test\.go
|
- path: .+_test\.go
|
||||||
@ -176,6 +175,31 @@ issues:
|
|||||||
- SA1019
|
- SA1019
|
||||||
# defer return errors
|
# defer return errors
|
||||||
- SA5001
|
- SA5001
|
||||||
|
# TODO: cleanup following exclusions, added on golangci-lint upgrade
|
||||||
|
- sloppyLen
|
||||||
|
- dupSubExpr
|
||||||
|
- assignOp
|
||||||
|
- ifElseChain
|
||||||
|
- elseif
|
||||||
|
- commentFormatting
|
||||||
|
- var-naming
|
||||||
|
- exitAfterDefer
|
||||||
|
- captLocal
|
||||||
|
- singleCaseSwitch
|
||||||
|
- typeSwitchVar
|
||||||
|
- indent-error-flow
|
||||||
|
- appendAssign
|
||||||
|
- deprecatedComment
|
||||||
|
- SA9009
|
||||||
|
- SA1006
|
||||||
|
- S1009
|
||||||
|
- unlambda
|
||||||
|
- dupCase
|
||||||
|
- dupArg
|
||||||
|
- offBy1
|
||||||
|
- unslice
|
||||||
|
# Integer overflow conversion
|
||||||
|
- G115
|
||||||
|
|
||||||
# Maximum issues count per one linter. Set to 0 to disable. Default is 50.
|
# Maximum issues count per one linter. Set to 0 to disable. Default is 50.
|
||||||
max-issues-per-linter: 0
|
max-issues-per-linter: 0
|
||||||
|
|||||||
2
Makefile
2
Makefile
@ -45,7 +45,7 @@ ifdef USE_OPENDAL
|
|||||||
use_opendal = ${USE_OPENDAL}
|
use_opendal = ${USE_OPENDAL}
|
||||||
endif
|
endif
|
||||||
# golangci-lint
|
# golangci-lint
|
||||||
GOLANGCI_LINT_VERSION := 1.55.2
|
GOLANGCI_LINT_VERSION := 1.64.7
|
||||||
GOLANGCI_LINT_OUTPUT := $(shell $(INSTALL_PATH)/golangci-lint --version 2>/dev/null)
|
GOLANGCI_LINT_OUTPUT := $(shell $(INSTALL_PATH)/golangci-lint --version 2>/dev/null)
|
||||||
INSTALL_GOLANGCI_LINT := $(findstring $(GOLANGCI_LINT_VERSION), $(GOLANGCI_LINT_OUTPUT))
|
INSTALL_GOLANGCI_LINT := $(findstring $(GOLANGCI_LINT_VERSION), $(GOLANGCI_LINT_OUTPUT))
|
||||||
# mockery
|
# mockery
|
||||||
|
|||||||
@ -1,15 +1,5 @@
|
|||||||
run:
|
run:
|
||||||
go: "1.21"
|
go: "1.22"
|
||||||
skip-dirs:
|
|
||||||
- build
|
|
||||||
- configs
|
|
||||||
- deployments
|
|
||||||
- docs
|
|
||||||
- scripts
|
|
||||||
- internal/core
|
|
||||||
- cmake_build
|
|
||||||
skip-files:
|
|
||||||
- partial_search_test.go
|
|
||||||
|
|
||||||
linters:
|
linters:
|
||||||
disable-all: true
|
disable-all: true
|
||||||
@ -42,7 +32,6 @@ linters-settings:
|
|||||||
- prefix(github.com/milvus-io)
|
- prefix(github.com/milvus-io)
|
||||||
custom-order: true
|
custom-order: true
|
||||||
gofumpt:
|
gofumpt:
|
||||||
lang-version: "1.18"
|
|
||||||
module-path: github.com/milvus-io
|
module-path: github.com/milvus-io
|
||||||
goimports:
|
goimports:
|
||||||
local-prefixes: github.com/milvus-io
|
local-prefixes: github.com/milvus-io
|
||||||
@ -129,6 +118,16 @@ linters-settings:
|
|||||||
#- 'fmt\.Print.*' WIP
|
#- 'fmt\.Print.*' WIP
|
||||||
|
|
||||||
issues:
|
issues:
|
||||||
|
exclude-dirs:
|
||||||
|
- build
|
||||||
|
- configs
|
||||||
|
- deployments
|
||||||
|
- docs
|
||||||
|
- scripts
|
||||||
|
- internal/core
|
||||||
|
- cmake_build
|
||||||
|
exclude-files:
|
||||||
|
- partial_search_test.go
|
||||||
exclude-use-default: false
|
exclude-use-default: false
|
||||||
exclude-rules:
|
exclude-rules:
|
||||||
- path: .+_test\.go
|
- path: .+_test\.go
|
||||||
@ -161,6 +160,31 @@ issues:
|
|||||||
- SA1019
|
- SA1019
|
||||||
# defer return errors
|
# defer return errors
|
||||||
- SA5001
|
- SA5001
|
||||||
|
# TODO: cleanup following exclusions, added on golangci-lint upgrade
|
||||||
|
- sloppyLen
|
||||||
|
- dupSubExpr
|
||||||
|
- assignOp
|
||||||
|
- ifElseChain
|
||||||
|
- elseif
|
||||||
|
- commentFormatting
|
||||||
|
- var-naming
|
||||||
|
- exitAfterDefer
|
||||||
|
- captLocal
|
||||||
|
- singleCaseSwitch
|
||||||
|
- typeSwitchVar
|
||||||
|
- indent-error-flow
|
||||||
|
- appendAssign
|
||||||
|
- deprecatedComment
|
||||||
|
- SA9009
|
||||||
|
- SA1006
|
||||||
|
- S1009
|
||||||
|
- unlambda
|
||||||
|
- dupCase
|
||||||
|
- dupArg
|
||||||
|
- offBy1
|
||||||
|
- unslice
|
||||||
|
# Integer overflow conversion
|
||||||
|
- G115
|
||||||
|
|
||||||
# Maximum issues count per one linter. Set to 0 to disable. Default is 50.
|
# Maximum issues count per one linter. Set to 0 to disable. Default is 50.
|
||||||
max-issues-per-linter: 0
|
max-issues-per-linter: 0
|
||||||
|
|||||||
@ -53,28 +53,11 @@ func mergeSortMultipleSegments(ctx context.Context,
|
|||||||
segmentReaders := make([]storage.RecordReader, len(binlogs))
|
segmentReaders := make([]storage.RecordReader, len(binlogs))
|
||||||
segmentFilters := make([]compaction.EntityFilter, len(binlogs))
|
segmentFilters := make([]compaction.EntityFilter, len(binlogs))
|
||||||
for i, s := range binlogs {
|
for i, s := range binlogs {
|
||||||
var binlogBatchCount int
|
reader, err := storage.NewBinlogRecordReader(ctx, s.GetFieldBinlogs(), plan.GetSchema(), storage.WithDownloader(binlogIO.Download))
|
||||||
for _, b := range s.GetFieldBinlogs() {
|
if err != nil {
|
||||||
if b != nil {
|
return nil, err
|
||||||
binlogBatchCount = len(b.GetBinlogs())
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
segmentReaders[i] = reader
|
||||||
if binlogBatchCount == 0 {
|
|
||||||
log.Warn("compacting empty segment", zap.Int64("segmentID", s.GetSegmentID()))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
binlogPaths := make([][]string, binlogBatchCount)
|
|
||||||
for idx := 0; idx < binlogBatchCount; idx++ {
|
|
||||||
var batchPaths []string
|
|
||||||
for _, f := range s.GetFieldBinlogs() {
|
|
||||||
batchPaths = append(batchPaths, f.GetBinlogs()[idx].GetLogPath())
|
|
||||||
}
|
|
||||||
binlogPaths[idx] = batchPaths
|
|
||||||
}
|
|
||||||
segmentReaders[i] = NewSegmentRecordReader(ctx, binlogPaths, binlogIO)
|
|
||||||
deltalogPaths := make([]string, 0)
|
deltalogPaths := make([]string, 0)
|
||||||
for _, d := range s.GetDeltalogs() {
|
for _, d := range s.GetDeltalogs() {
|
||||||
for _, l := range d.GetBinlogs() {
|
for _, l := range d.GetBinlogs() {
|
||||||
|
|||||||
@ -26,6 +26,7 @@ import (
|
|||||||
|
|
||||||
"github.com/cockroachdb/errors"
|
"github.com/cockroachdb/errors"
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/mock"
|
"github.com/stretchr/testify/mock"
|
||||||
"github.com/stretchr/testify/suite"
|
"github.com/stretchr/testify/suite"
|
||||||
|
|
||||||
@ -346,7 +347,7 @@ func (s *MixCompactionTaskSuite) TestCompactSortedSegmentLackBinlog() {
|
|||||||
deleteTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(10*time.Second), 0)
|
deleteTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(10*time.Second), 0)
|
||||||
addedFieldSet := typeutil.NewSet[int64]()
|
addedFieldSet := typeutil.NewSet[int64]()
|
||||||
for _, f := range s.meta.GetSchema().GetFields() {
|
for _, f := range s.meta.GetSchema().GetFields() {
|
||||||
if f.FieldID == common.RowIDField || f.FieldID == common.TimeStampField || f.IsPrimaryKey || typeutil.IsVectorType(f.DataType) {
|
if !f.Nullable {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
addedFieldSet.Insert(f.FieldID)
|
addedFieldSet.Insert(f.FieldID)
|
||||||
@ -454,7 +455,6 @@ func (s *MixCompactionTaskSuite) TestSplitMergeEntityExpired() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *MixCompactionTaskSuite) TestMergeNoExpirationLackBinlog() {
|
func (s *MixCompactionTaskSuite) TestMergeNoExpirationLackBinlog() {
|
||||||
s.T().Skip() // Skip added field related tests for now.
|
|
||||||
s.initSegBuffer(1, 4)
|
s.initSegBuffer(1, 4)
|
||||||
deleteTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(10*time.Second), 0)
|
deleteTs := tsoutil.ComposeTSByTime(getMilvusBirthday().Add(10*time.Second), 0)
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
@ -471,18 +471,16 @@ func (s *MixCompactionTaskSuite) TestMergeNoExpirationLackBinlog() {
|
|||||||
alloc := allocator.NewLocalAllocator(888888, math.MaxInt64)
|
alloc := allocator.NewLocalAllocator(888888, math.MaxInt64)
|
||||||
addedFieldSet := typeutil.NewSet[int64]()
|
addedFieldSet := typeutil.NewSet[int64]()
|
||||||
for _, f := range s.meta.GetSchema().GetFields() {
|
for _, f := range s.meta.GetSchema().GetFields() {
|
||||||
if f.FieldID == common.RowIDField || f.FieldID == common.TimeStampField || f.IsPrimaryKey || typeutil.IsVectorType(f.DataType) {
|
if !f.Nullable {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
addedFieldSet.Insert(f.FieldID)
|
addedFieldSet.Insert(f.FieldID)
|
||||||
}
|
}
|
||||||
|
assert.NotEmpty(s.T(), addedFieldSet)
|
||||||
|
|
||||||
kvs, fBinlogs, err := serializeWrite(context.TODO(), alloc, s.segWriter)
|
kvs, fBinlogs, err := serializeWrite(context.TODO(), alloc, s.segWriter)
|
||||||
for fid, binlog := range fBinlogs {
|
for fid, binlog := range fBinlogs {
|
||||||
if addedFieldSet.Contain(fid) {
|
if addedFieldSet.Contain(fid) {
|
||||||
if rand.Intn(2) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, k := range binlog.GetBinlogs() {
|
for _, k := range binlog.GetBinlogs() {
|
||||||
delete(kvs, k.LogPath)
|
delete(kvs, k.LogPath)
|
||||||
}
|
}
|
||||||
@ -996,6 +994,7 @@ func genTestCollectionMeta() *etcdpb.CollectionMeta {
|
|||||||
FieldID: StringField,
|
FieldID: StringField,
|
||||||
Name: "field_string",
|
Name: "field_string",
|
||||||
DataType: schemapb.DataType_String,
|
DataType: schemapb.DataType_String,
|
||||||
|
Nullable: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
FieldID: VarCharField,
|
FieldID: VarCharField,
|
||||||
|
|||||||
@ -1,31 +0,0 @@
|
|||||||
package compactor
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"io"
|
|
||||||
|
|
||||||
"github.com/samber/lo"
|
|
||||||
|
|
||||||
binlogIO "github.com/milvus-io/milvus/internal/flushcommon/io"
|
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
|
||||||
)
|
|
||||||
|
|
||||||
func NewSegmentRecordReader(ctx context.Context, binlogPaths [][]string, binlogIO binlogIO.BinlogIO) storage.RecordReader {
|
|
||||||
pos := 0
|
|
||||||
return &storage.CompositeBinlogRecordReader{
|
|
||||||
BlobsReader: func() ([]*storage.Blob, error) {
|
|
||||||
if pos >= len(binlogPaths) {
|
|
||||||
return nil, io.EOF
|
|
||||||
}
|
|
||||||
bytesArr, err := binlogIO.Download(ctx, binlogPaths[pos])
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
blobs := lo.Map(bytesArr, func(v []byte, i int) *storage.Blob {
|
|
||||||
return &storage.Blob{Key: binlogPaths[pos][i], Value: v}
|
|
||||||
})
|
|
||||||
pos++
|
|
||||||
return blobs, nil
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -20,11 +20,8 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/apache/arrow/go/v17/arrow"
|
|
||||||
"github.com/apache/arrow/go/v17/arrow/array"
|
"github.com/apache/arrow/go/v17/arrow/array"
|
||||||
"github.com/apache/arrow/go/v17/arrow/memory"
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
"go.uber.org/atomic"
|
"go.uber.org/atomic"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
@ -369,44 +366,7 @@ func (w *SegmentWriter) WriteRecord(r storage.Record) error {
|
|||||||
|
|
||||||
w.rowCount.Inc()
|
w.rowCount.Inc()
|
||||||
}
|
}
|
||||||
|
return w.writer.Write(r)
|
||||||
builders := make([]array.Builder, len(w.sch.Fields))
|
|
||||||
for i, f := range w.sch.Fields {
|
|
||||||
var b array.Builder
|
|
||||||
if r.Column(f.FieldID) == nil {
|
|
||||||
b = array.NewBuilder(memory.DefaultAllocator, storage.MilvusDataTypeToArrowType(f.GetDataType(), 1))
|
|
||||||
} else {
|
|
||||||
b = array.NewBuilder(memory.DefaultAllocator, r.Column(f.FieldID).DataType())
|
|
||||||
}
|
|
||||||
builders[i] = b
|
|
||||||
}
|
|
||||||
for c, builder := range builders {
|
|
||||||
fid := w.sch.Fields[c].FieldID
|
|
||||||
defaultValue := w.sch.Fields[c].GetDefaultValue()
|
|
||||||
for i := 0; i < rows; i++ {
|
|
||||||
if err := storage.AppendValueAt(builder, r.Column(fid), i, defaultValue); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
arrays := make([]arrow.Array, len(builders))
|
|
||||||
fields := make([]arrow.Field, len(builders))
|
|
||||||
field2Col := make(map[typeutil.UniqueID]int, len(builders))
|
|
||||||
|
|
||||||
for c, builder := range builders {
|
|
||||||
arrays[c] = builder.NewArray()
|
|
||||||
fid := w.sch.Fields[c].FieldID
|
|
||||||
fields[c] = arrow.Field{
|
|
||||||
Name: strconv.Itoa(int(fid)),
|
|
||||||
Type: arrays[c].DataType(),
|
|
||||||
Nullable: true, // No nullable check here.
|
|
||||||
}
|
|
||||||
field2Col[fid] = c
|
|
||||||
}
|
|
||||||
|
|
||||||
rec := storage.NewSimpleArrowRecord(array.NewRecord(arrow.NewSchema(fields, nil), arrays, int64(rows)), field2Col)
|
|
||||||
defer rec.Release()
|
|
||||||
return w.writer.Write(rec)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *SegmentWriter) Write(v *storage.Value) error {
|
func (w *SegmentWriter) Write(v *storage.Value) error {
|
||||||
|
|||||||
@ -20,6 +20,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
sio "io"
|
sio "io"
|
||||||
|
"sort"
|
||||||
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
|
||||||
@ -36,11 +37,16 @@ const (
|
|||||||
StorageV2 int64 = 2
|
StorageV2 int64 = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
downloaderFn func(ctx context.Context, paths []string) ([][]byte, error)
|
||||||
|
uploaderFn func(ctx context.Context, kvs map[string][]byte) error
|
||||||
|
)
|
||||||
|
|
||||||
type rwOptions struct {
|
type rwOptions struct {
|
||||||
version int64
|
version int64
|
||||||
bufferSize int64
|
bufferSize int64
|
||||||
downloader func(ctx context.Context, paths []string) ([][]byte, error)
|
downloader downloaderFn
|
||||||
uploader func(ctx context.Context, kvs map[string][]byte) error
|
uploader uploaderFn
|
||||||
multiPartUploadSize int64
|
multiPartUploadSize int64
|
||||||
columnGroups []storagecommon.ColumnGroup
|
columnGroups []storagecommon.ColumnGroup
|
||||||
}
|
}
|
||||||
@ -90,6 +96,77 @@ func WithColumnGroups(columnGroups []storagecommon.ColumnGroup) RwOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func makeBlobsReader(ctx context.Context, binlogs []*datapb.FieldBinlog, downloader downloaderFn) (ChunkedBlobsReader, error) {
|
||||||
|
if len(binlogs) == 0 {
|
||||||
|
return func() ([]*Blob, error) {
|
||||||
|
return nil, sio.EOF
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
sort.Slice(binlogs, func(i, j int) bool {
|
||||||
|
return binlogs[i].FieldID < binlogs[j].FieldID
|
||||||
|
})
|
||||||
|
for _, binlog := range binlogs {
|
||||||
|
sort.Slice(binlog.Binlogs, func(i, j int) bool {
|
||||||
|
return binlog.Binlogs[i].LogID < binlog.Binlogs[j].LogID
|
||||||
|
})
|
||||||
|
}
|
||||||
|
nChunks := len(binlogs[0].Binlogs)
|
||||||
|
chunks := make([][]string, nChunks) // i is chunkid, j is fieldid
|
||||||
|
missingChunks := lo.Map(binlogs, func(binlog *datapb.FieldBinlog, _ int) int {
|
||||||
|
return nChunks - len(binlog.Binlogs)
|
||||||
|
})
|
||||||
|
for i := range nChunks {
|
||||||
|
chunks[i] = make([]string, 0, len(binlogs))
|
||||||
|
for j, binlog := range binlogs {
|
||||||
|
if i >= missingChunks[j] {
|
||||||
|
idx := i - missingChunks[j]
|
||||||
|
chunks[i] = append(chunks[i], binlog.Binlogs[idx].LogPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// verify if the chunks order is correct.
|
||||||
|
// the zig-zag order should have a (strict) increasing order on logids.
|
||||||
|
// lastLogID := int64(-1)
|
||||||
|
// for _, paths := range chunks {
|
||||||
|
// lastFieldID := int64(-1)
|
||||||
|
// for _, path := range paths {
|
||||||
|
// _, _, _, fieldID, logID, ok := metautil.ParseInsertLogPath(path)
|
||||||
|
// if !ok {
|
||||||
|
// return nil, merr.WrapErrIoFailedReason(fmt.Sprintf("malformed log path %s", path))
|
||||||
|
// }
|
||||||
|
// if fieldID < lastFieldID {
|
||||||
|
// return nil, merr.WrapErrIoFailedReason(fmt.Sprintf("unaligned log path %s, fieldID %d less than lastFieldID %d", path, fieldID, lastFieldID))
|
||||||
|
// }
|
||||||
|
// if logID < lastLogID {
|
||||||
|
// return nil, merr.WrapErrIoFailedReason(fmt.Sprintf("unaligned log path %s, logID %d less than lastLogID %d", path, logID, lastLogID))
|
||||||
|
// }
|
||||||
|
// lastLogID = logID
|
||||||
|
// lastFieldID = fieldID
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
chunkPos := 0
|
||||||
|
return func() ([]*Blob, error) {
|
||||||
|
if chunkPos >= nChunks {
|
||||||
|
return nil, sio.EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
vals, err := downloader(ctx, chunks[chunkPos])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
blobs := make([]*Blob, 0, len(vals))
|
||||||
|
for i := range vals {
|
||||||
|
blobs = append(blobs, &Blob{
|
||||||
|
Key: chunks[chunkPos][i],
|
||||||
|
Value: vals[i],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
chunkPos++
|
||||||
|
return blobs, nil
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, schema *schemapb.CollectionSchema, option ...RwOption) (RecordReader, error) {
|
func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, schema *schemapb.CollectionSchema, option ...RwOption) (RecordReader, error) {
|
||||||
rwOptions := DefaultRwOptions()
|
rwOptions := DefaultRwOptions()
|
||||||
for _, opt := range option {
|
for _, opt := range option {
|
||||||
@ -97,28 +174,11 @@ func NewBinlogRecordReader(ctx context.Context, binlogs []*datapb.FieldBinlog, s
|
|||||||
}
|
}
|
||||||
switch rwOptions.version {
|
switch rwOptions.version {
|
||||||
case StorageV1:
|
case StorageV1:
|
||||||
itr := 0
|
blobsReader, err := makeBlobsReader(ctx, binlogs, rwOptions.downloader)
|
||||||
return newCompositeBinlogRecordReader(schema, func() ([]*Blob, error) {
|
if err != nil {
|
||||||
if len(binlogs) <= 0 {
|
return nil, err
|
||||||
return nil, sio.EOF
|
}
|
||||||
}
|
return newCompositeBinlogRecordReader(schema, blobsReader)
|
||||||
paths := make([]string, len(binlogs))
|
|
||||||
for i, fieldBinlog := range binlogs {
|
|
||||||
if itr >= len(fieldBinlog.GetBinlogs()) {
|
|
||||||
return nil, sio.EOF
|
|
||||||
}
|
|
||||||
paths[i] = fieldBinlog.GetBinlogs()[itr].GetLogPath()
|
|
||||||
}
|
|
||||||
itr++
|
|
||||||
values, err := rwOptions.downloader(ctx, paths)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
blobs := lo.Map(values, func(v []byte, i int) *Blob {
|
|
||||||
return &Blob{Key: paths[i], Value: v}
|
|
||||||
})
|
|
||||||
return blobs, nil
|
|
||||||
})
|
|
||||||
case StorageV2:
|
case StorageV2:
|
||||||
if len(binlogs) <= 0 {
|
if len(binlogs) <= 0 {
|
||||||
return nil, sio.EOF
|
return nil, sio.EOF
|
||||||
|
|||||||
@ -26,6 +26,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/mock"
|
"github.com/stretchr/testify/mock"
|
||||||
"github.com/stretchr/testify/suite"
|
"github.com/stretchr/testify/suite"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
@ -377,3 +378,165 @@ func genCollectionSchemaWithBM25() *schemapb.CollectionSchema {
|
|||||||
func getMilvusBirthday() time.Time {
|
func getMilvusBirthday() time.Time {
|
||||||
return time.Date(2019, time.Month(5), 30, 0, 0, 0, 0, time.UTC)
|
return time.Date(2019, time.Month(5), 30, 0, 0, 0, 0, time.UTC)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_makeBlobsReader(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
downloader := func(ctx context.Context, paths []string) ([][]byte, error) {
|
||||||
|
return lo.Map(paths, func(item string, index int) []byte {
|
||||||
|
return []byte{}
|
||||||
|
}), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
binlogs []*datapb.FieldBinlog
|
||||||
|
want [][]*Blob
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "test full",
|
||||||
|
binlogs: []*datapb.FieldBinlog{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{LogPath: "x/1/1/1/100/1"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{LogPath: "x/1/1/1/101/2"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 102,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{LogPath: "x/1/1/1/102/3"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: [][]*Blob{
|
||||||
|
{
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/100/1",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/101/2",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/102/3",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
name: "test added field",
|
||||||
|
binlogs: []*datapb.FieldBinlog{
|
||||||
|
{
|
||||||
|
FieldID: 100,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{LogPath: "x/1/1/1/100/1"},
|
||||||
|
{LogPath: "x/1/1/1/100/3"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 101,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{LogPath: "x/1/1/1/101/2"},
|
||||||
|
{LogPath: "x/1/1/1/101/4"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
FieldID: 102,
|
||||||
|
Binlogs: []*datapb.Binlog{
|
||||||
|
{LogPath: "x/1/1/1/102/5"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: [][]*Blob{
|
||||||
|
{
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/100/1",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/101/2",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/100/3",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/101/4",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Key: "x/1/1/1/102/5",
|
||||||
|
Value: []byte{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
|
||||||
|
// {
|
||||||
|
// name: "test error",
|
||||||
|
// binlogs: []*datapb.FieldBinlog{
|
||||||
|
// {
|
||||||
|
// FieldID: 100,
|
||||||
|
// Binlogs: []*datapb.Binlog{
|
||||||
|
// {LogPath: "x/1/1/1/100/1"},
|
||||||
|
// {LogPath: "x/1/1/1/100/3"},
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// FieldID: 101,
|
||||||
|
// Binlogs: []*datapb.Binlog{
|
||||||
|
// {LogPath: "x/1/1/1/101/2"},
|
||||||
|
// {LogPath: "x/1/1/1/101/4"},
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// FieldID: 102,
|
||||||
|
// Binlogs: []*datapb.Binlog{
|
||||||
|
// {LogPath: "x/1/1/1/102/5"},
|
||||||
|
// {LogPath: "x/1/1/1/102/6"},
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// want: nil,
|
||||||
|
// wantErr: true,
|
||||||
|
// },
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
reader, err := makeBlobsReader(ctx, tt.binlogs, downloader)
|
||||||
|
if err != nil {
|
||||||
|
if !tt.wantErr {
|
||||||
|
t.Errorf("makeBlobsReader() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
got := make([][]*Blob, 0)
|
||||||
|
for {
|
||||||
|
bs, err := reader()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
assert.Fail(t, err.Error())
|
||||||
|
}
|
||||||
|
got = append(got, bs)
|
||||||
|
}
|
||||||
|
assert.Equal(t, tt.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -49,21 +49,26 @@ type ChunkedBlobsReader func() ([]*Blob, error)
|
|||||||
|
|
||||||
type CompositeBinlogRecordReader struct {
|
type CompositeBinlogRecordReader struct {
|
||||||
BlobsReader ChunkedBlobsReader
|
BlobsReader ChunkedBlobsReader
|
||||||
|
schema *schemapb.CollectionSchema
|
||||||
|
index map[FieldID]int16
|
||||||
|
|
||||||
brs []*BinlogReader
|
brs []*BinlogReader
|
||||||
rrs []array.RecordReader
|
rrs []array.RecordReader
|
||||||
|
|
||||||
schema *schemapb.CollectionSchema
|
|
||||||
index map[FieldID]int16
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (crr *CompositeBinlogRecordReader) iterateNextBatch() error {
|
func (crr *CompositeBinlogRecordReader) iterateNextBatch() error {
|
||||||
if crr.brs != nil {
|
if crr.brs != nil {
|
||||||
for _, er := range crr.brs {
|
for _, er := range crr.brs {
|
||||||
er.Close()
|
if er != nil {
|
||||||
|
er.Close()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if crr.rrs != nil {
|
||||||
for _, rr := range crr.rrs {
|
for _, rr := range crr.rrs {
|
||||||
rr.Release()
|
if rr != nil {
|
||||||
|
rr.Release()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,13 +77,10 @@ func (crr *CompositeBinlogRecordReader) iterateNextBatch() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if crr.rrs == nil {
|
crr.rrs = make([]array.RecordReader, len(crr.schema.Fields))
|
||||||
crr.rrs = make([]array.RecordReader, len(blobs))
|
crr.brs = make([]*BinlogReader, len(crr.schema.Fields))
|
||||||
crr.brs = make([]*BinlogReader, len(blobs))
|
|
||||||
crr.index = make(map[FieldID]int16, len(blobs))
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, b := range blobs {
|
for _, b := range blobs {
|
||||||
reader, err := NewBinlogReader(b.Value)
|
reader, err := NewBinlogReader(b.Value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -88,12 +90,12 @@ func (crr *CompositeBinlogRecordReader) iterateNextBatch() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
i := crr.index[reader.FieldID]
|
||||||
rr, err := er.GetArrowRecordReader()
|
rr, err := er.GetArrowRecordReader()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
crr.rrs[i] = rr
|
crr.rrs[i] = rr
|
||||||
crr.index[reader.FieldID] = int16(i)
|
|
||||||
crr.brs[i] = reader
|
crr.brs[i] = reader
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -106,36 +108,45 @@ func (crr *CompositeBinlogRecordReader) Next() (Record, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
composeRecord := func() (Record, bool) {
|
composeRecord := func() (Record, error) {
|
||||||
recs := make([]arrow.Array, len(crr.rrs))
|
recs := make([]arrow.Array, len(crr.schema.Fields))
|
||||||
for i, rr := range crr.rrs {
|
|
||||||
if ok := rr.Next(); !ok {
|
for i, f := range crr.schema.Fields {
|
||||||
return nil, false
|
if crr.rrs[i] != nil {
|
||||||
|
if ok := crr.rrs[i].Next(); !ok {
|
||||||
|
return nil, io.EOF
|
||||||
|
}
|
||||||
|
recs[i] = crr.rrs[i].Record().Column(0)
|
||||||
|
} else {
|
||||||
|
// If the field is not in the current batch, fill with null array
|
||||||
|
// Note that we're intentionally not filling default value here, because the
|
||||||
|
// deserializer will fill them later.
|
||||||
|
if !f.Nullable {
|
||||||
|
return nil, merr.WrapErrServiceInternal(fmt.Sprintf("missing field data %s", f.Name))
|
||||||
|
}
|
||||||
|
dim, _ := typeutil.GetDim(f)
|
||||||
|
builder := array.NewBuilder(memory.DefaultAllocator, serdeMap[f.DataType].arrowType(int(dim)))
|
||||||
|
builder.AppendNulls(int(crr.rrs[0].Record().NumRows()))
|
||||||
|
recs[i] = builder.NewArray()
|
||||||
}
|
}
|
||||||
recs[i] = rr.Record().Column(0)
|
|
||||||
}
|
}
|
||||||
return &compositeRecord{
|
return &compositeRecord{
|
||||||
index: crr.index,
|
index: crr.index,
|
||||||
recs: recs,
|
recs: recs,
|
||||||
}, true
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try compose records
|
// Try compose records
|
||||||
var (
|
r, err := composeRecord()
|
||||||
r Record
|
if err == io.EOF {
|
||||||
ok bool
|
// if EOF, try iterate next batch (blob)
|
||||||
)
|
|
||||||
r, ok = composeRecord()
|
|
||||||
if !ok {
|
|
||||||
// If failed the first time, try iterate next batch (blob), the error may be io.EOF
|
|
||||||
if err := crr.iterateNextBatch(); err != nil {
|
if err := crr.iterateNextBatch(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// If iterate next batch success, try compose again
|
r, err = composeRecord() // try compose again
|
||||||
if r, ok = composeRecord(); !ok {
|
}
|
||||||
// If the next blob is empty, return io.EOF (it's rare).
|
if err != nil {
|
||||||
return nil, io.EOF
|
return nil, err
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return r, nil
|
return r, nil
|
||||||
}
|
}
|
||||||
@ -170,43 +181,45 @@ func parseBlobKey(blobKey string) (colId FieldID, logId UniqueID) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func MakeBlobsReader(blobs []*Blob) ChunkedBlobsReader {
|
func MakeBlobsReader(blobs []*Blob) ChunkedBlobsReader {
|
||||||
blobMap := make(map[FieldID][]*Blob)
|
// sort blobs by log id
|
||||||
for _, blob := range blobs {
|
sort.Slice(blobs, func(i, j int) bool {
|
||||||
colId, _ := parseBlobKey(blob.Key)
|
_, iLog := parseBlobKey(blobs[i].Key)
|
||||||
if _, exists := blobMap[colId]; !exists {
|
_, jLog := parseBlobKey(blobs[j].Key)
|
||||||
blobMap[colId] = []*Blob{blob}
|
return iLog < jLog
|
||||||
} else {
|
})
|
||||||
blobMap[colId] = append(blobMap[colId], blob)
|
var field0 FieldID
|
||||||
|
pivots := make([]int, 0)
|
||||||
|
for i, blob := range blobs {
|
||||||
|
if i == 0 {
|
||||||
|
field0, _ = parseBlobKey(blob.Key)
|
||||||
|
pivots = append(pivots, 0)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if fieldID, _ := parseBlobKey(blob.Key); fieldID == field0 {
|
||||||
|
pivots = append(pivots, i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sortedBlobs := make([][]*Blob, 0, len(blobMap))
|
pivots = append(pivots, len(blobs)) // append a pivot to the end of the slice
|
||||||
for _, blobsForField := range blobMap {
|
|
||||||
sort.Slice(blobsForField, func(i, j int) bool {
|
|
||||||
_, iLog := parseBlobKey(blobsForField[i].Key)
|
|
||||||
_, jLog := parseBlobKey(blobsForField[j].Key)
|
|
||||||
|
|
||||||
return iLog < jLog
|
|
||||||
})
|
|
||||||
sortedBlobs = append(sortedBlobs, blobsForField)
|
|
||||||
}
|
|
||||||
chunkPos := 0
|
chunkPos := 0
|
||||||
return func() ([]*Blob, error) {
|
return func() ([]*Blob, error) {
|
||||||
if len(sortedBlobs) == 0 || chunkPos >= len(sortedBlobs[0]) {
|
if chunkPos >= len(pivots)-1 {
|
||||||
return nil, io.EOF
|
return nil, io.EOF
|
||||||
}
|
}
|
||||||
blobs := make([]*Blob, len(sortedBlobs))
|
chunk := blobs[pivots[chunkPos]:pivots[chunkPos+1]]
|
||||||
for fieldPos := range blobs {
|
|
||||||
blobs[fieldPos] = sortedBlobs[fieldPos][chunkPos]
|
|
||||||
}
|
|
||||||
chunkPos++
|
chunkPos++
|
||||||
return blobs, nil
|
return chunk, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newCompositeBinlogRecordReader(schema *schemapb.CollectionSchema, blobsReader ChunkedBlobsReader) (*CompositeBinlogRecordReader, error) {
|
func newCompositeBinlogRecordReader(schema *schemapb.CollectionSchema, blobsReader ChunkedBlobsReader) (*CompositeBinlogRecordReader, error) {
|
||||||
|
index := make(map[FieldID]int16)
|
||||||
|
for i, f := range schema.Fields {
|
||||||
|
index[f.FieldID] = int16(i)
|
||||||
|
}
|
||||||
return &CompositeBinlogRecordReader{
|
return &CompositeBinlogRecordReader{
|
||||||
schema: schema,
|
schema: schema,
|
||||||
BlobsReader: blobsReader,
|
BlobsReader: blobsReader,
|
||||||
|
index: index,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,16 +248,12 @@ func ValueDeserializer(r Record, v []*Value, fieldSchema []*schemapb.FieldSchema
|
|||||||
for _, f := range fieldSchema {
|
for _, f := range fieldSchema {
|
||||||
j := f.FieldID
|
j := f.FieldID
|
||||||
dt := f.DataType
|
dt := f.DataType
|
||||||
if r.Column(j) == nil {
|
if r.Column(j).IsNull(i) {
|
||||||
if f.GetDefaultValue() != nil {
|
if f.GetDefaultValue() != nil {
|
||||||
m[j] = getDefaultValue(f)
|
m[j] = getDefaultValue(f)
|
||||||
} else {
|
} else {
|
||||||
m[j] = nil
|
m[j] = nil
|
||||||
}
|
}
|
||||||
continue
|
|
||||||
}
|
|
||||||
if r.Column(j).IsNull(i) {
|
|
||||||
m[j] = nil
|
|
||||||
} else {
|
} else {
|
||||||
d, ok := serdeMap[dt].deserialize(r.Column(j), i)
|
d, ok := serdeMap[dt].deserialize(r.Column(j), i)
|
||||||
if ok {
|
if ok {
|
||||||
@ -286,7 +295,15 @@ func NewBinlogDeserializeReader(schema *schemapb.CollectionSchema, blobsReader C
|
|||||||
}
|
}
|
||||||
|
|
||||||
func newDeltalogOneFieldReader(blobs []*Blob) (*DeserializeReaderImpl[*DeleteLog], error) {
|
func newDeltalogOneFieldReader(blobs []*Blob) (*DeserializeReaderImpl[*DeleteLog], error) {
|
||||||
reader, err := newCompositeBinlogRecordReader(nil, MakeBlobsReader(blobs))
|
reader, err := newCompositeBinlogRecordReader(
|
||||||
|
&schemapb.CollectionSchema{
|
||||||
|
Fields: []*schemapb.FieldSchema{
|
||||||
|
{
|
||||||
|
DataType: schemapb.DataType_VarChar,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
MakeBlobsReader(blobs))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -923,7 +940,7 @@ func newDeltalogSerializeWriter(eventWriter *DeltalogStreamWriter, batchSize int
|
|||||||
}
|
}
|
||||||
rws[0] = rw
|
rws[0] = rw
|
||||||
compositeRecordWriter := NewCompositeRecordWriter(rws)
|
compositeRecordWriter := NewCompositeRecordWriter(rws)
|
||||||
return NewSerializeRecordWriter[*DeleteLog](compositeRecordWriter, func(v []*DeleteLog) (Record, error) {
|
return NewSerializeRecordWriter(compositeRecordWriter, func(v []*DeleteLog) (Record, error) {
|
||||||
builder := array.NewBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String)
|
builder := array.NewBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String)
|
||||||
|
|
||||||
for _, vv := range v {
|
for _, vv := range v {
|
||||||
|
|||||||
@ -32,6 +32,7 @@ import (
|
|||||||
"github.com/apache/arrow/go/v17/arrow/memory"
|
"github.com/apache/arrow/go/v17/arrow/memory"
|
||||||
"github.com/apache/arrow/go/v17/parquet/file"
|
"github.com/apache/arrow/go/v17/parquet/file"
|
||||||
"github.com/apache/arrow/go/v17/parquet/pqarrow"
|
"github.com/apache/arrow/go/v17/parquet/pqarrow"
|
||||||
|
"github.com/samber/lo"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
@ -44,7 +45,7 @@ import (
|
|||||||
|
|
||||||
func TestBinlogDeserializeReader(t *testing.T) {
|
func TestBinlogDeserializeReader(t *testing.T) {
|
||||||
t.Run("test empty data", func(t *testing.T) {
|
t.Run("test empty data", func(t *testing.T) {
|
||||||
reader, err := NewBinlogDeserializeReader(nil, func() ([]*Blob, error) {
|
reader, err := NewBinlogDeserializeReader(generateTestSchema(), func() ([]*Blob, error) {
|
||||||
return nil, io.EOF
|
return nil, io.EOF
|
||||||
})
|
})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
@ -184,7 +185,7 @@ func TestBinlogSerializeWriter(t *testing.T) {
|
|||||||
|
|
||||||
func TestBinlogValueWriter(t *testing.T) {
|
func TestBinlogValueWriter(t *testing.T) {
|
||||||
t.Run("test empty data", func(t *testing.T) {
|
t.Run("test empty data", func(t *testing.T) {
|
||||||
reader, err := NewBinlogDeserializeReader(nil, func() ([]*Blob, error) {
|
reader, err := NewBinlogDeserializeReader(generateTestSchema(), func() ([]*Blob, error) {
|
||||||
return nil, io.EOF
|
return nil, io.EOF
|
||||||
})
|
})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
@ -708,3 +709,108 @@ func readDeltaLog(size int, blob *Blob) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMakeBlobsReader(t *testing.T) {
|
||||||
|
type args struct {
|
||||||
|
blobs []string
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
args args
|
||||||
|
want [][]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "test empty",
|
||||||
|
args: args{
|
||||||
|
blobs: nil,
|
||||||
|
},
|
||||||
|
want: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "test aligned",
|
||||||
|
args: args{
|
||||||
|
blobs: []string{
|
||||||
|
"x/1/1/1/1/1",
|
||||||
|
"x/1/1/1/2/2",
|
||||||
|
"x/1/1/1/3/3",
|
||||||
|
"x/1/1/1/1/4",
|
||||||
|
"x/1/1/1/2/5",
|
||||||
|
"x/1/1/1/3/6",
|
||||||
|
"x/1/1/1/1/7",
|
||||||
|
"x/1/1/1/2/8",
|
||||||
|
"x/1/1/1/3/9",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: [][]string{
|
||||||
|
{"x/1/1/1/1/1", "x/1/1/1/2/2", "x/1/1/1/3/3"},
|
||||||
|
{"x/1/1/1/1/4", "x/1/1/1/2/5", "x/1/1/1/3/6"},
|
||||||
|
{"x/1/1/1/1/7", "x/1/1/1/2/8", "x/1/1/1/3/9"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "test added field",
|
||||||
|
args: args{
|
||||||
|
blobs: []string{
|
||||||
|
"x/1/1/1/1/1",
|
||||||
|
"x/1/1/1/2/2",
|
||||||
|
"x/1/1/1/1/3",
|
||||||
|
"x/1/1/1/2/4",
|
||||||
|
"x/1/1/1/1/5",
|
||||||
|
"x/1/1/1/2/6",
|
||||||
|
"x/1/1/1/3/7",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
want: [][]string{
|
||||||
|
{"x/1/1/1/1/1", "x/1/1/1/2/2"},
|
||||||
|
{"x/1/1/1/1/3", "x/1/1/1/2/4"},
|
||||||
|
{"x/1/1/1/1/5", "x/1/1/1/2/6", "x/1/1/1/3/7"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "test if there is a hole",
|
||||||
|
args: args{
|
||||||
|
blobs: []string{
|
||||||
|
"x/1/1/1/1/1",
|
||||||
|
"x/1/1/1/2/2",
|
||||||
|
"x/1/1/1/3/3",
|
||||||
|
"x/1/1/1/1/4",
|
||||||
|
"x/1/1/1/2/5",
|
||||||
|
"x/1/1/1/1/6",
|
||||||
|
"x/1/1/1/2/7",
|
||||||
|
"x/1/1/1/3/8",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
want: [][]string{
|
||||||
|
{"x/1/1/1/1/1", "x/1/1/1/2/2", "x/1/1/1/3/3"},
|
||||||
|
{"x/1/1/1/1/4", "x/1/1/1/2/5"},
|
||||||
|
{"x/1/1/1/1/6", "x/1/1/1/2/7", "x/1/1/1/3/8"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
blobs := lo.Map(tt.args.blobs, func(item string, index int) *Blob {
|
||||||
|
return &Blob{
|
||||||
|
Key: item,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
reader := MakeBlobsReader(blobs)
|
||||||
|
got := make([][]string, 0)
|
||||||
|
for {
|
||||||
|
bs, err := reader()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
assert.Fail(t, err.Error())
|
||||||
|
}
|
||||||
|
got = append(got, lo.Map(bs, func(item *Blob, index int) string {
|
||||||
|
return item.Key
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
assert.ElementsMatch(t, tt.want, got)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -1,15 +1,5 @@
|
|||||||
run:
|
run:
|
||||||
go: "1.21"
|
go: "1.22"
|
||||||
skip-dirs:
|
|
||||||
- build
|
|
||||||
- configs
|
|
||||||
- deployments
|
|
||||||
- docs
|
|
||||||
- scripts
|
|
||||||
- internal/core
|
|
||||||
- cmake_build
|
|
||||||
skip-files:
|
|
||||||
- partial_search_test.go
|
|
||||||
|
|
||||||
linters:
|
linters:
|
||||||
disable-all: true
|
disable-all: true
|
||||||
@ -42,7 +32,6 @@ linters-settings:
|
|||||||
- prefix(github.com/milvus-io)
|
- prefix(github.com/milvus-io)
|
||||||
custom-order: true
|
custom-order: true
|
||||||
gofumpt:
|
gofumpt:
|
||||||
lang-version: "1.18"
|
|
||||||
module-path: github.com/milvus-io
|
module-path: github.com/milvus-io
|
||||||
goimports:
|
goimports:
|
||||||
local-prefixes: github.com/milvus-io
|
local-prefixes: github.com/milvus-io
|
||||||
@ -129,6 +118,16 @@ linters-settings:
|
|||||||
#- 'fmt\.Print.*' WIP
|
#- 'fmt\.Print.*' WIP
|
||||||
|
|
||||||
issues:
|
issues:
|
||||||
|
exclude-dirs:
|
||||||
|
- build
|
||||||
|
- configs
|
||||||
|
- deployments
|
||||||
|
- docs
|
||||||
|
- scripts
|
||||||
|
- internal/core
|
||||||
|
- cmake_build
|
||||||
|
exclude-files:
|
||||||
|
- partial_search_test.go
|
||||||
exclude-use-default: false
|
exclude-use-default: false
|
||||||
exclude-rules:
|
exclude-rules:
|
||||||
- path: .+_test\.go
|
- path: .+_test\.go
|
||||||
@ -161,6 +160,31 @@ issues:
|
|||||||
- SA1019
|
- SA1019
|
||||||
# defer return errors
|
# defer return errors
|
||||||
- SA5001
|
- SA5001
|
||||||
|
# TODO: cleanup following exclusions, added on golangci-lint upgrade
|
||||||
|
- sloppyLen
|
||||||
|
- dupSubExpr
|
||||||
|
- assignOp
|
||||||
|
- ifElseChain
|
||||||
|
- elseif
|
||||||
|
- commentFormatting
|
||||||
|
- var-naming
|
||||||
|
- exitAfterDefer
|
||||||
|
- captLocal
|
||||||
|
- singleCaseSwitch
|
||||||
|
- typeSwitchVar
|
||||||
|
- indent-error-flow
|
||||||
|
- appendAssign
|
||||||
|
- deprecatedComment
|
||||||
|
- SA9009
|
||||||
|
- SA1006
|
||||||
|
- S1009
|
||||||
|
- unlambda
|
||||||
|
- dupCase
|
||||||
|
- dupArg
|
||||||
|
- offBy1
|
||||||
|
- unslice
|
||||||
|
# Integer overflow conversion
|
||||||
|
- G115
|
||||||
|
|
||||||
# Maximum issues count per one linter. Set to 0 to disable. Default is 50.
|
# Maximum issues count per one linter. Set to 0 to disable. Default is 50.
|
||||||
max-issues-per-linter: 0
|
max-issues-per-linter: 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user