fix: text log loading failure during cluster upgrade (#46697)

issue: https://github.com/milvus-io/milvus/issues/46678

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: Text index log keys are canonicalized at KV
(serialization) boundaries — etcd stores compressed filename-only
entries, while in-memory and runtime consumers must receive full
object-storage keys so Datanode/QueryNode can load text indexes
directly.

- Logic removed/simplified: ad-hoc reconstruction of full text-log paths
scattered across components (garbage_collector.getTextLogs,
querynodev2.LoadTextIndex, compactor/index task code) was removed;
consumers now use TextIndexStats.Files as-provided (full keys). Path
compression/decompression was centralized into KV marshal/unmarshal
utilities (metautil.ExtractTextLogFilenames in marshalSegmentInfo and
metautil.BuildTextLogPaths in kv_catalog.listSegments), eliminating
redundant, inconsistent prefix-rebuilding logic that broke during
rolling upgrades.

- Why this does NOT cause data loss or regressions: before persist,
marshalSegmentInfo compresses TextStatsLogs.Files to filenames
(metautil.ExtractTextLogFilenames) so stored KV remains compact; on
load, kv_catalog.listSegments calls metautil.BuildTextLogPaths to
restore full paths and includes compatibility logic that leaves
already-full keys unchanged. Thus every persisted filename is
recoverable to a valid full key and consumers receive correct full paths
(see marshalSegmentInfo → KV write path and kv_catalog.listSegments →
reload path), preventing dropped or malformed keys.

- Bug fix (refs #46678): resolves text-log loading failures during
cluster upgrades by centralizing path handling at KV encode/decode and
removing per-component path reconstruction — the immediate fix is
changing consumers to read TextIndexStats.Files directly and relying on
marshal/unmarshal to perform compression/expansion, preventing
mixed-format failures during rolling upgrades.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: sijie-ni-0214 <sijie.ni@zilliz.com>
This commit is contained in:
sijie-ni-0214 2026-01-05 11:19:24 +08:00 committed by GitHub
parent a16d04f5d1
commit 941c6eaed7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 111 additions and 168 deletions

View File

@ -743,7 +743,7 @@ func (gc *garbageCollector) recycleDroppedSegments(ctx context.Context, signal <
binlog.DecompressBinLogs(cloned.SegmentInfo)
logs := getLogs(cloned)
for key := range getTextLogs(cloned, gc) {
for key := range getTextLogs(cloned) {
logs[key] = struct{}{}
}
@ -868,23 +868,11 @@ func getLogs(sinfo *SegmentInfo) map[string]struct{} {
return logs
}
func getTextLogs(sinfo *SegmentInfo, gc *garbageCollector) map[string]struct{} {
func getTextLogs(sinfo *SegmentInfo) map[string]struct{} {
textLogs := make(map[string]struct{})
for _, flog := range sinfo.GetTextStatsLogs() {
// Reconstruct full paths from filenames
// Files stored in TextIndexStats only contain filenames to save space
fullPaths := metautil.BuildTextLogPaths(
gc.option.cli.RootPath(),
flog.GetBuildID(),
flog.GetVersion(),
sinfo.GetCollectionID(),
sinfo.GetPartitionID(),
sinfo.GetID(),
flog.GetFieldID(),
flog.GetFiles(),
)
for _, fullPath := range fullPaths {
textLogs[fullPath] = struct{}{}
for _, file := range flog.GetFiles() {
textLogs[file] = struct{}{}
}
}

View File

@ -275,12 +275,6 @@ func (m *meta) reloadFromKV(ctx context.Context, broker broker.Broker) error {
for _, segments := range collectionSegments {
numSegments += len(segments)
for _, segment := range segments {
// Convert old text log paths (full paths) to filenames to save memory
// This handles backward compatibility during recovery
for _, textStatsLog := range segment.GetTextStatsLogs() {
textStatsLog.Files = metautil.ExtractTextLogFilenames(textStatsLog.GetFiles())
}
// segments from catalog.ListSegments will not have logPath
m.segments.SetSegment(segment.ID, NewSegmentInfo(segment))
metrics.DataCoordNumSegments.WithLabelValues(segment.GetState().String(), segment.GetLevel().String(), getSortStatus(segment.GetIsSorted())).Inc()

View File

@ -491,15 +491,13 @@ func (t *sortCompactionTask) createTextIndex(ctx context.Context,
if err != nil {
return err
}
// Extract only filenames from full paths to save space
filenames := metautil.ExtractTextLogFilenames(lo.Keys(uploaded))
mu.Lock()
textIndexLogs[field.GetFieldID()] = &datapb.TextIndexStats{
FieldID: field.GetFieldID(),
Version: 0,
BuildID: taskID,
Files: filenames,
Files: lo.Keys(uploaded),
}
mu.Unlock()

View File

@ -503,15 +503,13 @@ func (st *statsTask) createTextIndex(ctx context.Context,
if err != nil {
return err
}
// Extract only filenames from full paths to save space
filenames := metautil.ExtractTextLogFilenames(lo.Keys(uploaded))
mu.Lock()
textIndexLogs[field.GetFieldID()] = &datapb.TextIndexStats{
FieldID: field.GetFieldID(),
Version: version,
BuildID: taskID,
Files: filenames,
Files: lo.Keys(uploaded),
}
mu.Unlock()

View File

@ -45,6 +45,7 @@ import (
"github.com/milvus-io/milvus/pkg/v2/util"
"github.com/milvus-io/milvus/pkg/v2/util/etcd"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/metautil"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
@ -133,6 +134,16 @@ func (kc *Catalog) listSegments(ctx context.Context, collectionID int64) ([]*dat
return err
}
// Restore full paths for text index logs (compatible with old version)
// segments from etcd may have filenames only in TextStatsLogs
metautil.BuildTextLogPaths(
kc.ChunkManagerRootPath,
segmentInfo.GetCollectionID(),
segmentInfo.GetPartitionID(),
segmentInfo.GetID(),
segmentInfo.GetTextStatsLogs(),
)
segments = append(segments, segmentInfo)
return nil
}
@ -382,11 +393,11 @@ func (kc *Catalog) SaveDroppedSegmentsInBatch(ctx context.Context, segments []*d
noBinlogsSegment, _, _, _, _ := CloneSegmentWithExcludeBinlogs(s)
// `s` is not mutated above. Also, `noBinlogsSegment` is a cloned version of `s`.
segmentutil.ReCalcRowCount(s, noBinlogsSegment)
segBytes, err := proto.Marshal(noBinlogsSegment)
segBytes, err := marshalSegmentInfo(noBinlogsSegment)
if err != nil {
return fmt.Errorf("failed to marshal segment: %d, err: %w", s.GetID(), err)
}
kvs[key] = string(segBytes)
kvs[key] = segBytes
}
saveFn := func(partialKvs map[string]string) error {

View File

@ -27,6 +27,7 @@ import (
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/util"
"github.com/milvus-io/milvus/pkg/v2/util/metautil"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
@ -225,6 +226,9 @@ func CloneSegmentWithExcludeBinlogs(segment *datapb.SegmentInfo) (*datapb.Segmen
}
func marshalSegmentInfo(segment *datapb.SegmentInfo) (string, error) {
// Compress TextStatsLogs paths to filenames before marshaling to save etcd space
metautil.ExtractTextLogFilenames(segment.GetTextStatsLogs())
segBytes, err := proto.Marshal(segment)
if err != nil {
return "", fmt.Errorf("failed to marshal segment: %d, err: %w", segment.ID, err)

View File

@ -43,7 +43,6 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/metastore/kv/binlog"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querynodev2/pkoracle"
"github.com/milvus-io/milvus/internal/querynodev2/segments/state"
@ -1190,25 +1189,11 @@ func (s *LocalSegment) LoadTextIndex(ctx context.Context, textLogs *datapb.TextI
// Text match index mmap config is based on the raw data mmap.
enableMmap := isDataMmapEnable(f)
// Reconstruct full paths from filenames
// Files stored in TextIndexStats only contain filenames to save space
fullPaths := metautil.BuildTextLogPaths(
binlog.GetRootPath(),
textLogs.GetBuildID(),
textLogs.GetVersion(),
s.Collection(),
s.Partition(),
s.ID(),
textLogs.GetFieldID(),
textLogs.GetFiles(),
)
cgoProto := &indexcgopb.LoadTextIndexInfo{
FieldID: textLogs.GetFieldID(),
Version: textLogs.GetVersion(),
BuildID: textLogs.GetBuildID(),
Files: fullPaths,
Files: textLogs.GetFiles(),
Schema: f,
CollectionID: s.Collection(),
PartitionID: s.Partition(),

View File

@ -6,6 +6,7 @@ import (
"strings"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
@ -94,37 +95,55 @@ func JoinIDPath(ids ...typeutil.UniqueID) string {
}
// ExtractTextLogFilenames extracts only filenames from full paths to save space.
// It takes a slice of full paths and returns a slice of filenames.
func ExtractTextLogFilenames(files []string) []string {
filenames := make([]string, 0, len(files))
for _, fullPath := range files {
idx := strings.LastIndex(fullPath, pathSep)
if idx < 0 {
filenames = append(filenames, fullPath)
} else {
filenames = append(filenames, fullPath[idx+1:])
// It modifies the TextStatsLogs map in place, compressing full paths to filenames.
func ExtractTextLogFilenames(textStatsLogs map[int64]*datapb.TextIndexStats) {
for _, textStats := range textStatsLogs {
if textStats == nil {
continue
}
filenames := make([]string, 0, len(textStats.GetFiles()))
for _, fullPath := range textStats.GetFiles() {
idx := strings.LastIndex(fullPath, pathSep)
if idx < 0 {
filenames = append(filenames, fullPath)
} else {
filenames = append(filenames, fullPath[idx+1:])
}
}
textStats.Files = filenames
}
return filenames
}
// BuildTextLogPaths reconstructs full paths from filenames for text index logs.
// Files stored in TextIndexStats only contain filenames to save space.
func BuildTextLogPaths(rootPath string, buildID, version, collectionID, partitionID, segmentID, fieldID typeutil.UniqueID, filenames []string) []string {
prefix := path.Join(
rootPath,
common.TextIndexPath,
strconv.FormatInt(buildID, 10),
strconv.FormatInt(version, 10),
strconv.FormatInt(collectionID, 10),
strconv.FormatInt(partitionID, 10),
strconv.FormatInt(segmentID, 10),
strconv.FormatInt(fieldID, 10),
)
// This function is compatible with both old version (full paths) and new version (filenames only).
func BuildTextLogPaths(rootPath string, collectionID, partitionID, segmentID typeutil.UniqueID, textStatsLogs map[int64]*datapb.TextIndexStats) {
for _, textStats := range textStatsLogs {
if textStats == nil {
continue
}
prefix := path.Join(
rootPath,
common.TextIndexPath,
strconv.FormatInt(textStats.GetBuildID(), 10),
strconv.FormatInt(textStats.GetVersion(), 10),
strconv.FormatInt(collectionID, 10),
strconv.FormatInt(partitionID, 10),
strconv.FormatInt(segmentID, 10),
strconv.FormatInt(textStats.GetFieldID(), 10),
)
fullPaths := make([]string, 0, len(filenames))
for _, filename := range filenames {
fullPaths = append(fullPaths, path.Join(prefix, filename))
filenames := textStats.GetFiles()
fullPaths := make([]string, 0, len(filenames))
for _, filename := range filenames {
// Check if filename is already a full path (compatible with old version)
// If it contains the text_log path segment, treat it as a full path
if strings.Contains(filename, common.TextIndexPath+pathSep) {
fullPaths = append(fullPaths, filename)
} else {
// New version: filename only, need to join with prefix
fullPaths = append(fullPaths, path.Join(prefix, filename))
}
}
textStats.Files = fullPaths
}
return fullPaths
}

View File

@ -17,10 +17,12 @@
package metautil
import (
"path"
"reflect"
"sort"
"testing"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
@ -141,111 +143,55 @@ func TestParseInsertLogPath(t *testing.T) {
}
func TestExtractTextLogFilenames(t *testing.T) {
tests := []struct {
name string
files []string
want []string
}{
{
name: "test extract filenames from full paths",
files: []string{
"files/text_log/123/0/456/789/101112/131415/test_file.pos_0",
"files/text_log/123/0/456/789/101112/131415/test_file.pos_1",
"files/text_log/123/0/456/789/101112/131416/another_file.pos_0",
},
want: []string{
"test_file.pos_0",
"test_file.pos_1",
"another_file.pos_0",
},
},
{
name: "test extract filename without path",
files: []string{
"filename.txt",
},
want: []string{
"filename.txt",
},
},
{
name: "test empty slice",
files: []string{},
want: []string{},
},
{
name: "test single file",
files: []string{
"root/path/to/file.log",
},
want: []string{
"file.log",
textStatsLogs := map[int64]*datapb.TextIndexStats{
100: {
FieldID: 100,
Files: []string{
"/root/text_log/1/2/10/20/30/100/file1.txt",
"/root/text_log/1/2/10/20/30/100/file2.txt",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ExtractTextLogFilenames(tt.files)
// Sort both slices for comparison
sort.Strings(got)
sort.Strings(tt.want)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ExtractTextLogFilenames() = %v, want %v", got, tt.want)
}
})
ExtractTextLogFilenames(textStatsLogs)
wantFiles := []string{"file1.txt", "file2.txt"}
if !reflect.DeepEqual(textStatsLogs[100].Files, wantFiles) {
t.Errorf("ExtractTextLogFilenames() Files = %v, want %v", textStatsLogs[100].Files, wantFiles)
}
}
func TestBuildTextLogPaths(t *testing.T) {
tests := []struct {
name string
rootPath string
buildID typeutil.UniqueID
version typeutil.UniqueID
collectionID typeutil.UniqueID
partitionID typeutil.UniqueID
segmentID typeutil.UniqueID
fieldID typeutil.UniqueID
filenames []string
want []string
}{
{
name: "test build text log paths with multiple files",
rootPath: "files",
buildID: 123,
version: 0,
collectionID: 456,
partitionID: 789,
segmentID: 101112,
fieldID: 131415,
filenames: []string{"test_file.pos_0", "test_file.pos_1", "another_file.pos_0"},
want: []string{
"files/text_log/123/0/456/789/101112/131415/test_file.pos_0",
"files/text_log/123/0/456/789/101112/131415/test_file.pos_1",
"files/text_log/123/0/456/789/101112/131415/another_file.pos_0",
},
},
{
name: "test build text log paths with empty filenames",
rootPath: "files",
buildID: 123,
version: 0,
collectionID: 456,
partitionID: 789,
segmentID: 101112,
fieldID: 131415,
filenames: []string{},
want: []string{},
rootPath := "/root"
collectionID := typeutil.UniqueID(10)
partitionID := typeutil.UniqueID(20)
segmentID := typeutil.UniqueID(30)
// Test building paths from filenames (new version)
textStatsLogs := map[int64]*datapb.TextIndexStats{
100: {
FieldID: 100,
BuildID: 1,
Version: 2,
Files: []string{"file1.txt", "file2.txt"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := BuildTextLogPaths(tt.rootPath, tt.buildID, tt.version, tt.collectionID, tt.partitionID, tt.segmentID, tt.fieldID, tt.filenames)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("BuildTextLogPaths() = %v, want %v", got, tt.want)
}
})
BuildTextLogPaths(rootPath, collectionID, partitionID, segmentID, textStatsLogs)
wantFiles := []string{
path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file1.txt"),
path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file2.txt"),
}
if !reflect.DeepEqual(textStatsLogs[100].Files, wantFiles) {
t.Errorf("BuildTextLogPaths() Files = %v, want %v", textStatsLogs[100].Files, wantFiles)
}
// Test old version compatibility (already full paths)
fullPath := path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file3.txt")
textStatsLogs[100].Files = []string{fullPath}
BuildTextLogPaths(rootPath, collectionID, partitionID, segmentID, textStatsLogs)
if textStatsLogs[100].Files[0] != fullPath {
t.Errorf("BuildTextLogPaths() should keep full path unchanged, got %v", textStatsLogs[100].Files[0])
}
}