milvus/pkg/util/metautil/binlog_test.go
sijie-ni-0214 941c6eaed7
fix: text log loading failure during cluster upgrade (#46697)
issue: https://github.com/milvus-io/milvus/issues/46678

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: Text index log keys are canonicalized at KV
(serialization) boundaries — etcd stores compressed filename-only
entries, while in-memory and runtime consumers must receive full
object-storage keys so Datanode/QueryNode can load text indexes
directly.

- Logic removed/simplified: ad-hoc reconstruction of full text-log paths
scattered across components (garbage_collector.getTextLogs,
querynodev2.LoadTextIndex, compactor/index task code) was removed;
consumers now use TextIndexStats.Files as-provided (full keys). Path
compression/decompression was centralized into KV marshal/unmarshal
utilities (metautil.ExtractTextLogFilenames in marshalSegmentInfo and
metautil.BuildTextLogPaths in kv_catalog.listSegments), eliminating
redundant, inconsistent prefix-rebuilding logic that broke during
rolling upgrades.

- Why this does NOT cause data loss or regressions: before persist,
marshalSegmentInfo compresses TextStatsLogs.Files to filenames
(metautil.ExtractTextLogFilenames) so stored KV remains compact; on
load, kv_catalog.listSegments calls metautil.BuildTextLogPaths to
restore full paths and includes compatibility logic that leaves
already-full keys unchanged. Thus every persisted filename is
recoverable to a valid full key and consumers receive correct full paths
(see marshalSegmentInfo → KV write path and kv_catalog.listSegments →
reload path), preventing dropped or malformed keys.

- Bug fix (refs #46678): resolves text-log loading failures during
cluster upgrades by centralizing path handling at KV encode/decode and
removing per-component path reconstruction — the immediate fix is
changing consumers to read TextIndexStats.Files directly and relying on
marshal/unmarshal to perform compression/expansion, preventing
mixed-format failures during rolling upgrades.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: sijie-ni-0214 <sijie.ni@zilliz.com>
2026-01-05 11:19:24 +08:00

198 lines
5.6 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metautil
import (
"path"
"reflect"
"testing"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
func TestParseInsertLogPath(t *testing.T) {
type args struct {
path string
}
tests := []struct {
name string
args args
wantCollectionID typeutil.UniqueID
wantPartitionID typeutil.UniqueID
wantSegmentID typeutil.UniqueID
wantFieldID typeutil.UniqueID
wantLogID typeutil.UniqueID
wantOk bool
}{
{
"test parse insert log path",
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/447985737531772787/102/447985737523710526"},
446266956600703270,
446266956600703326,
447985737531772787,
102,
447985737523710526,
true,
},
{
"test parse insert log path negative1",
args{path: "foobar"},
0,
0,
0,
0,
0,
false,
},
{
"test parse insert log path negative2",
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/447985737531772787/102/foo"},
0,
0,
0,
0,
0,
false,
},
{
"test parse insert log path negative3",
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/447985737531772787/foo/447985737523710526"},
0,
0,
0,
0,
0,
false,
},
{
"test parse insert log path negative4",
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/foo/102/447985737523710526"},
0,
0,
0,
0,
0,
false,
},
{
"test parse insert log path negative5",
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/foo/447985737531772787/102/447985737523710526"},
0,
0,
0,
0,
0,
false,
},
{
"test parse insert log path negative6",
args{path: "8a8c3ac2298b12f/insert_log/foo/446266956600703326/447985737531772787/102/447985737523710526"},
0,
0,
0,
0,
0,
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotCollectionID, gotPartitionID, gotSegmentID, gotFieldID, gotLogID, gotOk := ParseInsertLogPath(tt.args.path)
if !reflect.DeepEqual(gotCollectionID, tt.wantCollectionID) {
t.Errorf("ParseInsertLogPath() gotCollectionID = %v, want %v", gotCollectionID, tt.wantCollectionID)
}
if !reflect.DeepEqual(gotPartitionID, tt.wantPartitionID) {
t.Errorf("ParseInsertLogPath() gotPartitionID = %v, want %v", gotPartitionID, tt.wantPartitionID)
}
if !reflect.DeepEqual(gotSegmentID, tt.wantSegmentID) {
t.Errorf("ParseInsertLogPath() gotSegmentID = %v, want %v", gotSegmentID, tt.wantSegmentID)
}
if !reflect.DeepEqual(gotFieldID, tt.wantFieldID) {
t.Errorf("ParseInsertLogPath() gotFieldID = %v, want %v", gotFieldID, tt.wantFieldID)
}
if !reflect.DeepEqual(gotLogID, tt.wantLogID) {
t.Errorf("ParseInsertLogPath() gotLogID = %v, want %v", gotLogID, tt.wantLogID)
}
if gotOk != tt.wantOk {
t.Errorf("ParseInsertLogPath() gotOk = %v, want %v", gotOk, tt.wantOk)
}
})
}
}
func TestExtractTextLogFilenames(t *testing.T) {
textStatsLogs := map[int64]*datapb.TextIndexStats{
100: {
FieldID: 100,
Files: []string{
"/root/text_log/1/2/10/20/30/100/file1.txt",
"/root/text_log/1/2/10/20/30/100/file2.txt",
},
},
}
ExtractTextLogFilenames(textStatsLogs)
wantFiles := []string{"file1.txt", "file2.txt"}
if !reflect.DeepEqual(textStatsLogs[100].Files, wantFiles) {
t.Errorf("ExtractTextLogFilenames() Files = %v, want %v", textStatsLogs[100].Files, wantFiles)
}
}
func TestBuildTextLogPaths(t *testing.T) {
rootPath := "/root"
collectionID := typeutil.UniqueID(10)
partitionID := typeutil.UniqueID(20)
segmentID := typeutil.UniqueID(30)
// Test building paths from filenames (new version)
textStatsLogs := map[int64]*datapb.TextIndexStats{
100: {
FieldID: 100,
BuildID: 1,
Version: 2,
Files: []string{"file1.txt", "file2.txt"},
},
}
BuildTextLogPaths(rootPath, collectionID, partitionID, segmentID, textStatsLogs)
wantFiles := []string{
path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file1.txt"),
path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file2.txt"),
}
if !reflect.DeepEqual(textStatsLogs[100].Files, wantFiles) {
t.Errorf("BuildTextLogPaths() Files = %v, want %v", textStatsLogs[100].Files, wantFiles)
}
// Test old version compatibility (already full paths)
fullPath := path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file3.txt")
textStatsLogs[100].Files = []string{fullPath}
BuildTextLogPaths(rootPath, collectionID, partitionID, segmentID, textStatsLogs)
if textStatsLogs[100].Files[0] != fullPath {
t.Errorf("BuildTextLogPaths() should keep full path unchanged, got %v", textStatsLogs[100].Files[0])
}
}