mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: #44358 Implement complete snapshot management system including creation, deletion, listing, description, and restoration capabilities across all system components. Key features: - Create snapshots for entire collections - Drop snapshots by name with proper cleanup - List snapshots with collection filtering - Describe snapshot details and metadata Components added/modified: - Client SDK with full snapshot API support and options - DataCoord snapshot service with metadata management - Proxy layer with task-based snapshot operations - Protocol buffer definitions for snapshot RPCs - Comprehensive unit tests with mockey framework - Integration tests for end-to-end validation Technical implementation: - Snapshot metadata storage in etcd with proper indexing - File-based snapshot data persistence in object storage - Garbage collection integration for snapshot cleanup - Error handling and validation across all operations - Thread-safe operations with proper locking mechanisms <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant/assumption: snapshots are immutable point‑in‑time captures identified by (collection, snapshot name/ID); etcd snapshot metadata is authoritative for lifecycle (PENDING → COMMITTED → DELETING) and per‑segment manifests live in object storage (Avro / StorageV2). GC and restore logic must see snapshotRefIndex loaded (snapshotMeta.IsRefIndexLoaded) before reclaiming or relying on segment/index files. - New capability added: full end‑to‑end snapshot subsystem — client SDK APIs (Create/Drop/List/Describe/Restore + restore job queries), DataCoord SnapshotWriter/Reader (Avro + StorageV2 manifests), snapshotMeta in meta, SnapshotManager orchestration (create/drop/describe/list/restore), copy‑segment restore tasks/inspector/checker, proxy & RPC surface, GC integration, and docs/tests — enabling point‑in‑time collection snapshots persisted to object storage and restorations orchestrated across components. - Logic removed/simplified and why: duplicated recursive compaction/delta‑log traversal and ad‑hoc lookup code were consolidated behind two focused APIs/owners (Handler.GetDeltaLogFromCompactTo for delta traversal and SnapshotManager/SnapshotReader for snapshot I/O). MixCoord/coordinator broker paths were converted to thin RPC proxies. This eliminates multiple implementations of the same traversal/lookup, reducing divergence and simplifying responsibility boundaries. - Why this does NOT introduce data loss or regressions: snapshot create/drop use explicit two‑phase semantics (PENDING → COMMIT/DELETING) with SnapshotWriter writing manifests and metadata before commit; GC uses snapshotRefIndex guards and IsRefIndexLoaded/GetSnapshotBySegment/GetSnapshotByIndex checks to avoid removing referenced files; restore flow pre‑allocates job IDs, validates resources (partitions/indexes), performs rollback on failure (rollbackRestoreSnapshot), and converts/updates segment/index metadata only after successful copy tasks. Extensive unit and integration tests exercise pending/deleting/GC/restore/error paths to ensure idempotence and protection against premature deletion. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Wei Liu <wei.liu@zilliz.com>
852 lines
26 KiB
Go
852 lines
26 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package importv2
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/mock"
|
|
|
|
"github.com/milvus-io/milvus/internal/mocks"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
|
|
)
|
|
|
|
func TestGenerateTargetPath(t *testing.T) {
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
}
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
tests := []struct {
|
|
name string
|
|
sourcePath string
|
|
wantPath string
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "insert binlog path",
|
|
sourcePath: "files/insert_log/111/222/333/100/log1.log",
|
|
wantPath: "files/insert_log/444/555/666/100/log1.log",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "delta binlog path",
|
|
sourcePath: "files/delta_log/111/222/333/100/log1.log",
|
|
wantPath: "files/delta_log/444/555/666/100/log1.log",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "stats binlog path",
|
|
sourcePath: "files/stats_log/111/222/333/100/log1.log",
|
|
wantPath: "files/stats_log/444/555/666/100/log1.log",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "bm25 binlog path",
|
|
sourcePath: "files/bm25_stats/111/222/333/100/log1.log",
|
|
wantPath: "files/bm25_stats/444/555/666/100/log1.log",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "invalid path - no log type",
|
|
sourcePath: "files/111/222/333/100/log1.log",
|
|
wantPath: "",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "invalid path - too short",
|
|
sourcePath: "files/insert_log/111",
|
|
wantPath: "",
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
gotPath, err := generateTargetPath(tt.sourcePath, source, target)
|
|
if tt.wantErr {
|
|
assert.Error(t, err)
|
|
} else {
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, tt.wantPath, gotPath)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestGenerateTargetIndexPath(t *testing.T) {
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
}
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
tests := []struct {
|
|
name string
|
|
sourcePath string
|
|
indexType string
|
|
wantPath string
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "vector scalar index path",
|
|
sourcePath: "files/index_files/111/222/333/100/1001/1002/scalar_index",
|
|
indexType: IndexTypeVectorScalar,
|
|
wantPath: "files/index_files/444/555/666/100/1001/1002/scalar_index",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "text index path",
|
|
sourcePath: "files/text_log/123/1/111/222/333/100/index_file",
|
|
indexType: IndexTypeText,
|
|
wantPath: "files/text_log/123/1/444/555/666/100/index_file",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "json key index path",
|
|
sourcePath: "files/json_key_index_log/123/1/111/222/333/100/index_file",
|
|
indexType: IndexTypeJSONKey,
|
|
wantPath: "files/json_key_index_log/123/1/444/555/666/100/index_file",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "json stats path - shared_key_index",
|
|
sourcePath: "files/json_stats/2/123/1/111/222/333/100/shared_key_index/index_file",
|
|
indexType: IndexTypeJSONStats,
|
|
wantPath: "files/json_stats/2/123/1/444/555/666/100/shared_key_index/index_file",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "json stats path - shredding_data",
|
|
sourcePath: "files/json_stats/2/123/1/111/222/333/100/shredding_data/data_file",
|
|
indexType: IndexTypeJSONStats,
|
|
wantPath: "files/json_stats/2/123/1/444/555/666/100/shredding_data/data_file",
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "invalid - keyword not found",
|
|
sourcePath: "files/other_index/111/222/333/100/index",
|
|
indexType: IndexTypeVectorScalar,
|
|
wantPath: "",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "invalid - path too short",
|
|
sourcePath: "files/index_files/111",
|
|
indexType: IndexTypeVectorScalar,
|
|
wantPath: "",
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "invalid - unsupported index type",
|
|
sourcePath: "files/unknown_index/111/222/333/100/index",
|
|
indexType: "unknown_type",
|
|
wantPath: "",
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
gotPath, err := generateTargetIndexPath(tt.sourcePath, source, target, tt.indexType)
|
|
if tt.wantErr {
|
|
assert.Error(t, err)
|
|
} else {
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, tt.wantPath, gotPath)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestTransformFieldBinlogs(t *testing.T) {
|
|
mappings := map[string]string{
|
|
"files/insert_log/111/222/333/100/log1.log": "files/insert_log/444/555/666/100/log1.log",
|
|
"files/insert_log/111/222/333/101/log2.log": "files/insert_log/444/555/666/101/log2.log",
|
|
}
|
|
|
|
srcFieldBinlogs := []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
EntriesNum: 1000,
|
|
TimestampFrom: 100,
|
|
TimestampTo: 200,
|
|
LogPath: "files/insert_log/111/222/333/100/log1.log",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
FieldID: 101,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
EntriesNum: 2000,
|
|
TimestampFrom: 150,
|
|
TimestampTo: 250,
|
|
LogPath: "files/insert_log/111/222/333/101/log2.log",
|
|
LogSize: 2048,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
t.Run("count rows for insert logs", func(t *testing.T) {
|
|
result, totalRows, err := transformFieldBinlogs(srcFieldBinlogs, mappings, true)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(3000), totalRows)
|
|
assert.Equal(t, 2, len(result))
|
|
|
|
// Verify first field binlog
|
|
assert.Equal(t, int64(100), result[0].FieldID)
|
|
assert.Equal(t, 1, len(result[0].Binlogs))
|
|
assert.Equal(t, int64(1000), result[0].Binlogs[0].EntriesNum)
|
|
assert.Equal(t, "files/insert_log/444/555/666/100/log1.log", result[0].Binlogs[0].LogPath)
|
|
assert.Equal(t, int64(1024), result[0].Binlogs[0].LogSize)
|
|
|
|
// Verify second field binlog
|
|
assert.Equal(t, int64(101), result[1].FieldID)
|
|
assert.Equal(t, 1, len(result[1].Binlogs))
|
|
assert.Equal(t, int64(2000), result[1].Binlogs[0].EntriesNum)
|
|
})
|
|
|
|
t.Run("no row counting for stats logs", func(t *testing.T) {
|
|
result, totalRows, err := transformFieldBinlogs(srcFieldBinlogs, mappings, false)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(0), totalRows)
|
|
assert.Equal(t, 2, len(result))
|
|
})
|
|
|
|
t.Run("skip binlogs with empty path", func(t *testing.T) {
|
|
srcWithEmpty := []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
EntriesNum: 1000,
|
|
LogPath: "",
|
|
},
|
|
},
|
|
},
|
|
}
|
|
result, _, err := transformFieldBinlogs(srcWithEmpty, mappings, false)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 0, len(result))
|
|
})
|
|
}
|
|
|
|
func TestCreateFileMappings(t *testing.T) {
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
InsertBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{LogPath: "files/insert_log/111/222/333/100/log1.log"},
|
|
},
|
|
},
|
|
},
|
|
DeltaBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{LogPath: "files/delta_log/111/222/333/100/delta1.log"},
|
|
},
|
|
},
|
|
},
|
|
StatsBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{LogPath: "files/stats_log/111/222/333/100/stats1.log"},
|
|
},
|
|
},
|
|
},
|
|
Bm25Binlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{LogPath: "files/bm25_stats/111/222/333/100/bm25_1.log"},
|
|
},
|
|
},
|
|
},
|
|
IndexFiles: []*indexpb.IndexFilePathInfo{
|
|
{
|
|
FieldID: 100,
|
|
IndexFilePaths: []string{"files/index_files/111/222/333/100/1001/1002/index1"},
|
|
},
|
|
},
|
|
TextIndexFiles: map[int64]*datapb.TextIndexStats{
|
|
100: {
|
|
FieldID: 100,
|
|
Files: []string{"files/text_log/123/1/111/222/333/100/text1"},
|
|
},
|
|
},
|
|
JsonKeyIndexFiles: map[int64]*datapb.JsonKeyStats{
|
|
101: {
|
|
FieldID: 101,
|
|
JsonKeyStatsDataFormat: 1, // Legacy format
|
|
Files: []string{"files/json_key_index_log/123/1/111/222/333/101/json1"},
|
|
},
|
|
102: {
|
|
FieldID: 102,
|
|
BuildID: 3002,
|
|
Version: 1,
|
|
JsonKeyStatsDataFormat: 2, // New format
|
|
Files: []string{
|
|
"files/json_stats/2/3002/1/111/222/333/102/shared_key_index/index1",
|
|
"files/json_stats/2/3002/1/111/222/333/102/shredding_data/data1",
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
t.Run("create all file mappings", func(t *testing.T) {
|
|
mappings, err := createFileMappings(source, target)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 9, len(mappings)) // Updated: 7 + 2 JSON Stats files
|
|
|
|
// Verify insert binlog mapping
|
|
assert.Equal(t, "files/insert_log/444/555/666/100/log1.log",
|
|
mappings["files/insert_log/111/222/333/100/log1.log"])
|
|
|
|
// Verify delta binlog mapping
|
|
assert.Equal(t, "files/delta_log/444/555/666/100/delta1.log",
|
|
mappings["files/delta_log/111/222/333/100/delta1.log"])
|
|
|
|
// Verify stats binlog mapping
|
|
assert.Equal(t, "files/stats_log/444/555/666/100/stats1.log",
|
|
mappings["files/stats_log/111/222/333/100/stats1.log"])
|
|
|
|
// Verify BM25 binlog mapping
|
|
assert.Equal(t, "files/bm25_stats/444/555/666/100/bm25_1.log",
|
|
mappings["files/bm25_stats/111/222/333/100/bm25_1.log"])
|
|
|
|
// Verify vector/scalar index mapping
|
|
assert.Equal(t, "files/index_files/444/555/666/100/1001/1002/index1",
|
|
mappings["files/index_files/111/222/333/100/1001/1002/index1"])
|
|
|
|
// Verify text index mapping
|
|
assert.Equal(t, "files/text_log/123/1/444/555/666/100/text1",
|
|
mappings["files/text_log/123/1/111/222/333/100/text1"])
|
|
|
|
// Verify JSON key index mapping (legacy format)
|
|
assert.Equal(t, "files/json_key_index_log/123/1/444/555/666/101/json1",
|
|
mappings["files/json_key_index_log/123/1/111/222/333/101/json1"])
|
|
|
|
// Verify JSON Stats mapping (new format)
|
|
assert.Equal(t, "files/json_stats/2/3002/1/444/555/666/102/shared_key_index/index1",
|
|
mappings["files/json_stats/2/3002/1/111/222/333/102/shared_key_index/index1"])
|
|
assert.Equal(t, "files/json_stats/2/3002/1/444/555/666/102/shredding_data/data1",
|
|
mappings["files/json_stats/2/3002/1/111/222/333/102/shredding_data/data1"])
|
|
})
|
|
}
|
|
|
|
func TestCopySegmentAndIndexFiles(t *testing.T) {
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
InsertBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
EntriesNum: 1000,
|
|
LogPath: "files/insert_log/111/222/333/100/12345",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
IndexFiles: []*indexpb.IndexFilePathInfo{
|
|
{
|
|
FieldID: 100,
|
|
IndexID: 1001,
|
|
BuildID: 1002,
|
|
IndexFilePaths: []string{"files/index_files/111/222/333/100/1001/1002/index1"},
|
|
},
|
|
},
|
|
}
|
|
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
t.Run("successful copy", func(t *testing.T) {
|
|
mockCM := mocks.NewChunkManager(t)
|
|
mockCM.EXPECT().Copy(mock.Anything, mock.Anything, mock.Anything).Return(nil).Times(2)
|
|
|
|
result, copiedFiles, err := CopySegmentAndIndexFiles(context.Background(), mockCM, source, target, nil)
|
|
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
assert.Equal(t, int64(666), result.SegmentId)
|
|
assert.Equal(t, int64(1000), result.ImportedRows)
|
|
assert.Equal(t, 1, len(result.Binlogs))
|
|
assert.Equal(t, 1, len(result.IndexInfos))
|
|
assert.Len(t, copiedFiles, 2)
|
|
})
|
|
|
|
t.Run("copy failure", func(t *testing.T) {
|
|
mockCM := mocks.NewChunkManager(t)
|
|
mockCM.EXPECT().Copy(mock.Anything, mock.Anything, mock.Anything).
|
|
Return(errors.New("copy failed")).Once()
|
|
|
|
result, copiedFiles, err := CopySegmentAndIndexFiles(context.Background(), mockCM, source, target, nil)
|
|
|
|
assert.Error(t, err)
|
|
assert.Nil(t, result)
|
|
assert.Contains(t, err.Error(), "failed to copy file")
|
|
assert.Empty(t, copiedFiles)
|
|
})
|
|
}
|
|
|
|
func TestGenerateSegmentInfoFromSource(t *testing.T) {
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
InsertBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
EntriesNum: 1000,
|
|
TimestampFrom: 100,
|
|
TimestampTo: 200,
|
|
LogPath: "files/insert_log/111/222/333/100/log1.log",
|
|
LogSize: 1024,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
StatsBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 100,
|
|
Binlogs: []*datapb.Binlog{
|
|
{
|
|
LogPath: "files/stats_log/111/222/333/100/stats1.log",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
mappings := map[string]string{
|
|
"files/insert_log/111/222/333/100/log1.log": "files/insert_log/444/555/666/100/log1.log",
|
|
"files/stats_log/111/222/333/100/stats1.log": "files/stats_log/444/555/666/100/stats1.log",
|
|
}
|
|
|
|
t.Run("generate segment info", func(t *testing.T) {
|
|
segmentInfo, err := generateSegmentInfoFromSource(source, target, mappings)
|
|
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, segmentInfo)
|
|
assert.Equal(t, int64(666), segmentInfo.SegmentID)
|
|
assert.Equal(t, int64(1000), segmentInfo.ImportedRows)
|
|
assert.Equal(t, 1, len(segmentInfo.Binlogs))
|
|
assert.Equal(t, 1, len(segmentInfo.Statslogs))
|
|
assert.Equal(t, "files/insert_log/444/555/666/100/log1.log", segmentInfo.Binlogs[0].Binlogs[0].LogPath)
|
|
})
|
|
}
|
|
|
|
func TestBuildIndexInfoFromSource(t *testing.T) {
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
IndexFiles: []*indexpb.IndexFilePathInfo{
|
|
{
|
|
FieldID: 100,
|
|
IndexID: 1001,
|
|
BuildID: 1002,
|
|
IndexFilePaths: []string{"files/index_files/111/222/333/100/1001/1002/index1"},
|
|
SerializedSize: 5000,
|
|
},
|
|
},
|
|
TextIndexFiles: map[int64]*datapb.TextIndexStats{
|
|
100: {
|
|
FieldID: 100,
|
|
Version: 1,
|
|
BuildID: 2001,
|
|
Files: []string{"files/text_log/123/1/111/222/333/100/text1"},
|
|
LogSize: 2048,
|
|
MemorySize: 4096,
|
|
},
|
|
},
|
|
JsonKeyIndexFiles: map[int64]*datapb.JsonKeyStats{
|
|
101: {
|
|
FieldID: 101,
|
|
Version: 1,
|
|
BuildID: 3001,
|
|
JsonKeyStatsDataFormat: 1, // Legacy format
|
|
Files: []string{"files/json_key_index_log/123/1/111/222/333/101/json1"},
|
|
MemorySize: 3072,
|
|
},
|
|
102: {
|
|
FieldID: 102,
|
|
Version: 1,
|
|
BuildID: 3002,
|
|
JsonKeyStatsDataFormat: 2, // New format
|
|
Files: []string{
|
|
"files/json_stats/2/3002/1/111/222/333/102/shared_key_index/index1",
|
|
"files/json_stats/2/3002/1/111/222/333/102/shredding_data/data1",
|
|
},
|
|
MemorySize: 4096,
|
|
},
|
|
},
|
|
}
|
|
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
mappings := map[string]string{
|
|
"files/index_files/111/222/333/100/1001/1002/index1": "files/index_files/444/555/666/100/1001/1002/index1",
|
|
"files/text_log/123/1/111/222/333/100/text1": "files/text_log/123/1/444/555/666/100/text1",
|
|
"files/json_key_index_log/123/1/111/222/333/101/json1": "files/json_key_index_log/123/1/444/555/666/101/json1",
|
|
"files/json_stats/2/3002/1/111/222/333/102/shared_key_index/index1": "files/json_stats/2/3002/1/444/555/666/102/shared_key_index/index1",
|
|
"files/json_stats/2/3002/1/111/222/333/102/shredding_data/data1": "files/json_stats/2/3002/1/444/555/666/102/shredding_data/data1",
|
|
}
|
|
|
|
t.Run("build all index info", func(t *testing.T) {
|
|
indexInfos, textIndexInfos, jsonKeyIndexInfos := buildIndexInfoFromSource(source, target, mappings)
|
|
|
|
// Verify vector/scalar index info
|
|
assert.Equal(t, 1, len(indexInfos))
|
|
assert.NotNil(t, indexInfos[100])
|
|
assert.Equal(t, int64(100), indexInfos[100].FieldId)
|
|
assert.Equal(t, int64(1001), indexInfos[100].IndexId)
|
|
assert.Equal(t, int64(1002), indexInfos[100].BuildId)
|
|
assert.Equal(t, int64(5000), indexInfos[100].IndexSize)
|
|
assert.Equal(t, "files/index_files/444/555/666/100/1001/1002/index1", indexInfos[100].IndexFilePaths[0])
|
|
|
|
// Verify text index info
|
|
assert.Equal(t, 1, len(textIndexInfos))
|
|
assert.NotNil(t, textIndexInfos[100])
|
|
assert.Equal(t, int64(100), textIndexInfos[100].FieldID)
|
|
assert.Equal(t, int64(2001), textIndexInfos[100].BuildID)
|
|
assert.Equal(t, "files/text_log/123/1/444/555/666/100/text1", textIndexInfos[100].Files[0])
|
|
|
|
// Verify JSON key index info (legacy and new formats)
|
|
assert.Equal(t, 2, len(jsonKeyIndexInfos))
|
|
|
|
// Legacy format (data_format = 1)
|
|
assert.NotNil(t, jsonKeyIndexInfos[101])
|
|
assert.Equal(t, int64(101), jsonKeyIndexInfos[101].FieldID)
|
|
assert.Equal(t, int64(3001), jsonKeyIndexInfos[101].BuildID)
|
|
assert.Equal(t, int64(1), jsonKeyIndexInfos[101].JsonKeyStatsDataFormat)
|
|
assert.Equal(t, "files/json_key_index_log/123/1/444/555/666/101/json1", jsonKeyIndexInfos[101].Files[0])
|
|
|
|
// New format (data_format = 2)
|
|
assert.NotNil(t, jsonKeyIndexInfos[102])
|
|
assert.Equal(t, int64(102), jsonKeyIndexInfos[102].FieldID)
|
|
assert.Equal(t, int64(3002), jsonKeyIndexInfos[102].BuildID)
|
|
assert.Equal(t, int64(2), jsonKeyIndexInfos[102].JsonKeyStatsDataFormat)
|
|
assert.Equal(t, 2, len(jsonKeyIndexInfos[102].Files))
|
|
assert.Equal(t, "files/json_stats/2/3002/1/444/555/666/102/shared_key_index/index1", jsonKeyIndexInfos[102].Files[0])
|
|
assert.Equal(t, "files/json_stats/2/3002/1/444/555/666/102/shredding_data/data1", jsonKeyIndexInfos[102].Files[1])
|
|
})
|
|
}
|
|
|
|
func TestCopySegmentAndIndexFiles_ReturnsFileList(t *testing.T) {
|
|
t.Run("success returns all copied files", func(t *testing.T) {
|
|
cm := mocks.NewChunkManager(t)
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
InsertBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{LogPath: "files/insert_log/111/222/333/1/10001", LogSize: 100},
|
|
{LogPath: "files/insert_log/111/222/333/1/10002", LogSize: 200},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
// Mock successful copies
|
|
cm.EXPECT().Copy(mock.Anything, mock.Anything, mock.Anything).Return(nil).Times(2)
|
|
|
|
result, copiedFiles, err := CopySegmentAndIndexFiles(context.Background(), cm, source, target, nil)
|
|
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
assert.Len(t, copiedFiles, 2)
|
|
assert.Contains(t, copiedFiles, "files/insert_log/444/555/666/1/10001")
|
|
assert.Contains(t, copiedFiles, "files/insert_log/444/555/666/1/10002")
|
|
})
|
|
|
|
t.Run("failure returns partial file list", func(t *testing.T) {
|
|
cm := mocks.NewChunkManager(t)
|
|
source := &datapb.CopySegmentSource{
|
|
CollectionId: 111,
|
|
PartitionId: 222,
|
|
SegmentId: 333,
|
|
InsertBinlogs: []*datapb.FieldBinlog{
|
|
{
|
|
FieldID: 1,
|
|
Binlogs: []*datapb.Binlog{
|
|
{LogPath: "files/insert_log/111/222/333/1/10001", LogSize: 100},
|
|
{LogPath: "files/insert_log/111/222/333/1/10002", LogSize: 200},
|
|
{LogPath: "files/insert_log/111/222/333/1/10003", LogSize: 300},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
target := &datapb.CopySegmentTarget{
|
|
CollectionId: 444,
|
|
PartitionId: 555,
|
|
SegmentId: 666,
|
|
}
|
|
|
|
// First copy succeeds, second fails
|
|
cm.EXPECT().Copy(mock.Anything, "files/insert_log/111/222/333/1/10001", "files/insert_log/444/555/666/1/10001").Return(nil).Maybe()
|
|
cm.EXPECT().Copy(mock.Anything, "files/insert_log/111/222/333/1/10002", "files/insert_log/444/555/666/1/10002").Return(errors.New("copy failed")).Maybe()
|
|
|
|
result, copiedFiles, err := CopySegmentAndIndexFiles(context.Background(), cm, source, target, nil)
|
|
|
|
assert.Error(t, err)
|
|
assert.Nil(t, result)
|
|
assert.True(t, len(copiedFiles) <= 1, "should return files copied before failure")
|
|
})
|
|
}
|
|
|
|
func TestShortenIndexFilePaths(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
fullPaths []string
|
|
expected []string
|
|
}{
|
|
{
|
|
name: "vector/scalar index paths",
|
|
fullPaths: []string{
|
|
"files/index_files/444/555/666/100/1001/1002/scalar_index",
|
|
"files/index_files/444/555/666/100/1001/1002/vector_index",
|
|
},
|
|
expected: []string{"scalar_index", "vector_index"},
|
|
},
|
|
{
|
|
name: "empty path list",
|
|
fullPaths: []string{},
|
|
expected: []string{},
|
|
},
|
|
{
|
|
name: "single file name",
|
|
fullPaths: []string{
|
|
"index_file",
|
|
},
|
|
expected: []string{"index_file"},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
result := shortenIndexFilePaths(tt.fullPaths)
|
|
assert.Equal(t, tt.expected, result)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestShortenSingleJsonStatsPath(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
inputPath string
|
|
expectedPath string
|
|
}{
|
|
{
|
|
name: "shared_key_index path",
|
|
inputPath: "files/json_stats/2/123/1/444/555/666/100/shared_key_index/inverted_index_0",
|
|
expectedPath: "shared_key_index/inverted_index_0",
|
|
},
|
|
{
|
|
name: "shredding_data path",
|
|
inputPath: "files/json_stats/2/123/1/444/555/666/100/shredding_data/parquet_data_0",
|
|
expectedPath: "shredding_data/parquet_data_0",
|
|
},
|
|
{
|
|
name: "already shortened - shared_key_index",
|
|
inputPath: "shared_key_index/inverted_index_0",
|
|
expectedPath: "shared_key_index/inverted_index_0",
|
|
},
|
|
{
|
|
name: "already shortened - shredding_data",
|
|
inputPath: "shredding_data/parquet_data_0",
|
|
expectedPath: "shredding_data/parquet_data_0",
|
|
},
|
|
{
|
|
name: "no keyword - keep as-is",
|
|
inputPath: "files/other/path/file.json",
|
|
expectedPath: "files/other/path/file.json",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
result := shortenSingleJsonStatsPath(tt.inputPath)
|
|
assert.Equal(t, tt.expectedPath, result)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestShortenJsonStatsPath(t *testing.T) {
|
|
jsonStats := map[int64]*datapb.JsonKeyStats{
|
|
100: {
|
|
FieldID: 100,
|
|
Version: 1,
|
|
BuildID: 123,
|
|
Files: []string{
|
|
"files/json_stats/2/123/1/444/555/666/100/shared_key_index/inverted_index_0",
|
|
"files/json_stats/2/123/1/444/555/666/100/shared_key_index/inverted_index_1",
|
|
},
|
|
JsonKeyStatsDataFormat: 2,
|
|
MemorySize: 1024,
|
|
LogSize: 2048,
|
|
},
|
|
200: {
|
|
FieldID: 200,
|
|
Version: 1,
|
|
BuildID: 456,
|
|
Files: []string{
|
|
"files/json_stats/2/456/1/444/555/666/200/shredding_data/parquet_data_0",
|
|
},
|
|
JsonKeyStatsDataFormat: 2,
|
|
MemorySize: 512,
|
|
},
|
|
}
|
|
|
|
result := shortenJsonStatsPath(jsonStats)
|
|
|
|
assert.Equal(t, 2, len(result))
|
|
|
|
// Check field 100
|
|
assert.NotNil(t, result[100])
|
|
assert.Equal(t, int64(100), result[100].FieldID)
|
|
assert.Equal(t, int64(1), result[100].Version)
|
|
assert.Equal(t, int64(123), result[100].BuildID)
|
|
assert.Equal(t, int64(2), result[100].JsonKeyStatsDataFormat)
|
|
assert.Equal(t, int64(1024), result[100].MemorySize)
|
|
assert.Equal(t, int64(2048), result[100].LogSize)
|
|
assert.Equal(t, 2, len(result[100].Files))
|
|
assert.Equal(t, "shared_key_index/inverted_index_0", result[100].Files[0])
|
|
assert.Equal(t, "shared_key_index/inverted_index_1", result[100].Files[1])
|
|
|
|
// Check field 200
|
|
assert.NotNil(t, result[200])
|
|
assert.Equal(t, int64(200), result[200].FieldID)
|
|
assert.Equal(t, int64(1), result[200].Version)
|
|
assert.Equal(t, int64(456), result[200].BuildID)
|
|
assert.Equal(t, int64(2), result[200].JsonKeyStatsDataFormat)
|
|
assert.Equal(t, int64(512), result[200].MemorySize)
|
|
assert.Equal(t, 1, len(result[200].Files))
|
|
assert.Equal(t, "shredding_data/parquet_data_0", result[200].Files[0])
|
|
}
|
|
|
|
func TestShortenJsonStatsPath_MetaJson(t *testing.T) {
|
|
// Test shortening meta.json path (file directly under fieldID directory)
|
|
jsonStats := map[int64]*datapb.JsonKeyStats{
|
|
102: {
|
|
FieldID: 102,
|
|
Version: 1,
|
|
BuildID: 462930163709949539,
|
|
Files: []string{
|
|
"files/json_stats/2/462930163709949539/1/462930163710600038/462930163710600039/462930163710600046/102/meta.json",
|
|
},
|
|
JsonKeyStatsDataFormat: 2,
|
|
MemorySize: 256,
|
|
},
|
|
}
|
|
|
|
result := shortenJsonStatsPath(jsonStats)
|
|
|
|
assert.Equal(t, 1, len(result))
|
|
assert.NotNil(t, result[102])
|
|
assert.Equal(t, int64(102), result[102].FieldID)
|
|
assert.Equal(t, 1, len(result[102].Files))
|
|
assert.Equal(t, "meta.json", result[102].Files[0])
|
|
}
|
|
|
|
func TestShortenSingleJsonStatsPath_EdgeCases(t *testing.T) {
|
|
// Test already shortened path
|
|
t.Run("already_shortened_meta", func(t *testing.T) {
|
|
result := shortenSingleJsonStatsPath("meta.json")
|
|
assert.Equal(t, "meta.json", result)
|
|
})
|
|
|
|
// Test already shortened shared_key_index path
|
|
t.Run("already_shortened_shared_key", func(t *testing.T) {
|
|
result := shortenSingleJsonStatsPath("shared_key_index/inverted_index_0")
|
|
assert.Equal(t, "shared_key_index/inverted_index_0", result)
|
|
})
|
|
|
|
// Test full path with meta.json
|
|
t.Run("full_path_meta_json", func(t *testing.T) {
|
|
fullPath := "files/json_stats/2/123/1/444/555/666/100/meta.json"
|
|
result := shortenSingleJsonStatsPath(fullPath)
|
|
assert.Equal(t, "meta.json", result)
|
|
})
|
|
|
|
// Test full path with nested file under fieldID
|
|
t.Run("full_path_nested_file", func(t *testing.T) {
|
|
fullPath := "files/json_stats/2/123/1/444/555/666/100/subdir/file.dat"
|
|
result := shortenSingleJsonStatsPath(fullPath)
|
|
assert.Equal(t, "subdir/file.dat", result)
|
|
})
|
|
}
|