milvus/pkg/util/metautil/binlog.go
sijie-ni-0214 941c6eaed7
fix: text log loading failure during cluster upgrade (#46697)
issue: https://github.com/milvus-io/milvus/issues/46678

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: Text index log keys are canonicalized at KV
(serialization) boundaries — etcd stores compressed filename-only
entries, while in-memory and runtime consumers must receive full
object-storage keys so Datanode/QueryNode can load text indexes
directly.

- Logic removed/simplified: ad-hoc reconstruction of full text-log paths
scattered across components (garbage_collector.getTextLogs,
querynodev2.LoadTextIndex, compactor/index task code) was removed;
consumers now use TextIndexStats.Files as-provided (full keys). Path
compression/decompression was centralized into KV marshal/unmarshal
utilities (metautil.ExtractTextLogFilenames in marshalSegmentInfo and
metautil.BuildTextLogPaths in kv_catalog.listSegments), eliminating
redundant, inconsistent prefix-rebuilding logic that broke during
rolling upgrades.

- Why this does NOT cause data loss or regressions: before persist,
marshalSegmentInfo compresses TextStatsLogs.Files to filenames
(metautil.ExtractTextLogFilenames) so stored KV remains compact; on
load, kv_catalog.listSegments calls metautil.BuildTextLogPaths to
restore full paths and includes compatibility logic that leaves
already-full keys unchanged. Thus every persisted filename is
recoverable to a valid full key and consumers receive correct full paths
(see marshalSegmentInfo → KV write path and kv_catalog.listSegments →
reload path), preventing dropped or malformed keys.

- Bug fix (refs #46678): resolves text-log loading failures during
cluster upgrades by centralizing path handling at KV encode/decode and
removing per-component path reconstruction — the immediate fix is
changing consumers to read TextIndexStats.Files directly and relying on
marshal/unmarshal to perform compression/expansion, preventing
mixed-format failures during rolling upgrades.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: sijie-ni-0214 <sijie.ni@zilliz.com>
2026-01-05 11:19:24 +08:00

150 lines
4.7 KiB
Go

package metautil
import (
"path"
"strconv"
"strings"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
const pathSep = "/"
func BuildInsertLogPath(rootPath string, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) string {
k := JoinIDPath(collectionID, partitionID, segmentID, fieldID, logID)
return path.Join(rootPath, common.SegmentInsertLogPath, k)
}
func ParseInsertLogPath(path string) (collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID, ok bool) {
infos := strings.Split(path, pathSep)
l := len(infos)
if l < 6 {
ok = false
return
}
var err error
if collectionID, err = strconv.ParseInt(infos[l-5], 10, 64); err != nil {
return 0, 0, 0, 0, 0, false
}
if partitionID, err = strconv.ParseInt(infos[l-4], 10, 64); err != nil {
return 0, 0, 0, 0, 0, false
}
if segmentID, err = strconv.ParseInt(infos[l-3], 10, 64); err != nil {
return 0, 0, 0, 0, 0, false
}
if fieldID, err = strconv.ParseInt(infos[l-2], 10, 64); err != nil {
return 0, 0, 0, 0, 0, false
}
if logID, err = strconv.ParseInt(infos[l-1], 10, 64); err != nil {
return 0, 0, 0, 0, 0, false
}
ok = true
return
}
func GetSegmentIDFromInsertLogPath(logPath string) typeutil.UniqueID {
return getSegmentIDFromPath(logPath, 3)
}
func BuildStatsLogPath(rootPath string, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) string {
k := JoinIDPath(collectionID, partitionID, segmentID, fieldID, logID)
return path.Join(rootPath, common.SegmentStatslogPath, k)
}
func BuildBm25LogPath(rootPath string, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) string {
k := JoinIDPath(collectionID, partitionID, segmentID, fieldID, logID)
return path.Join(rootPath, common.SegmentBm25LogPath, k)
}
func GetSegmentIDFromStatsLogPath(logPath string) typeutil.UniqueID {
return getSegmentIDFromPath(logPath, 3)
}
func BuildDeltaLogPath(rootPath string, collectionID, partitionID, segmentID, logID typeutil.UniqueID) string {
k := JoinIDPath(collectionID, partitionID, segmentID, logID)
return path.Join(rootPath, common.SegmentDeltaLogPath, k)
}
func GetSegmentIDFromDeltaLogPath(logPath string) typeutil.UniqueID {
return getSegmentIDFromPath(logPath, 2)
}
func getSegmentIDFromPath(logPath string, segmentIndex int) typeutil.UniqueID {
infos := strings.Split(logPath, pathSep)
l := len(infos)
if l < segmentIndex {
return 0
}
v, err := strconv.ParseInt(infos[l-segmentIndex], 10, 64)
if err != nil {
return 0
}
return v
}
// JoinIDPath joins ids to path format.
func JoinIDPath(ids ...typeutil.UniqueID) string {
idStr := make([]string, 0, len(ids))
for _, id := range ids {
idStr = append(idStr, strconv.FormatInt(id, 10))
}
return path.Join(idStr...)
}
// ExtractTextLogFilenames extracts only filenames from full paths to save space.
// It modifies the TextStatsLogs map in place, compressing full paths to filenames.
func ExtractTextLogFilenames(textStatsLogs map[int64]*datapb.TextIndexStats) {
for _, textStats := range textStatsLogs {
if textStats == nil {
continue
}
filenames := make([]string, 0, len(textStats.GetFiles()))
for _, fullPath := range textStats.GetFiles() {
idx := strings.LastIndex(fullPath, pathSep)
if idx < 0 {
filenames = append(filenames, fullPath)
} else {
filenames = append(filenames, fullPath[idx+1:])
}
}
textStats.Files = filenames
}
}
// BuildTextLogPaths reconstructs full paths from filenames for text index logs.
// This function is compatible with both old version (full paths) and new version (filenames only).
func BuildTextLogPaths(rootPath string, collectionID, partitionID, segmentID typeutil.UniqueID, textStatsLogs map[int64]*datapb.TextIndexStats) {
for _, textStats := range textStatsLogs {
if textStats == nil {
continue
}
prefix := path.Join(
rootPath,
common.TextIndexPath,
strconv.FormatInt(textStats.GetBuildID(), 10),
strconv.FormatInt(textStats.GetVersion(), 10),
strconv.FormatInt(collectionID, 10),
strconv.FormatInt(partitionID, 10),
strconv.FormatInt(segmentID, 10),
strconv.FormatInt(textStats.GetFieldID(), 10),
)
filenames := textStats.GetFiles()
fullPaths := make([]string, 0, len(filenames))
for _, filename := range filenames {
// Check if filename is already a full path (compatible with old version)
// If it contains the text_log path segment, treat it as a full path
if strings.Contains(filename, common.TextIndexPath+pathSep) {
fullPaths = append(fullPaths, filename)
} else {
// New version: filename only, need to join with prefix
fullPaths = append(fullPaths, path.Join(prefix, filename))
}
}
textStats.Files = fullPaths
}
}