mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: https://github.com/milvus-io/milvus/issues/46678 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant: Text index log keys are canonicalized at KV (serialization) boundaries — etcd stores compressed filename-only entries, while in-memory and runtime consumers must receive full object-storage keys so Datanode/QueryNode can load text indexes directly. - Logic removed/simplified: ad-hoc reconstruction of full text-log paths scattered across components (garbage_collector.getTextLogs, querynodev2.LoadTextIndex, compactor/index task code) was removed; consumers now use TextIndexStats.Files as-provided (full keys). Path compression/decompression was centralized into KV marshal/unmarshal utilities (metautil.ExtractTextLogFilenames in marshalSegmentInfo and metautil.BuildTextLogPaths in kv_catalog.listSegments), eliminating redundant, inconsistent prefix-rebuilding logic that broke during rolling upgrades. - Why this does NOT cause data loss or regressions: before persist, marshalSegmentInfo compresses TextStatsLogs.Files to filenames (metautil.ExtractTextLogFilenames) so stored KV remains compact; on load, kv_catalog.listSegments calls metautil.BuildTextLogPaths to restore full paths and includes compatibility logic that leaves already-full keys unchanged. Thus every persisted filename is recoverable to a valid full key and consumers receive correct full paths (see marshalSegmentInfo → KV write path and kv_catalog.listSegments → reload path), preventing dropped or malformed keys. - Bug fix (refs #46678): resolves text-log loading failures during cluster upgrades by centralizing path handling at KV encode/decode and removing per-component path reconstruction — the immediate fix is changing consumers to read TextIndexStats.Files directly and relying on marshal/unmarshal to perform compression/expansion, preventing mixed-format failures during rolling upgrades. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: sijie-ni-0214 <sijie.ni@zilliz.com>
150 lines
4.7 KiB
Go
150 lines
4.7 KiB
Go
package metautil
|
|
|
|
import (
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
const pathSep = "/"
|
|
|
|
func BuildInsertLogPath(rootPath string, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) string {
|
|
k := JoinIDPath(collectionID, partitionID, segmentID, fieldID, logID)
|
|
return path.Join(rootPath, common.SegmentInsertLogPath, k)
|
|
}
|
|
|
|
func ParseInsertLogPath(path string) (collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID, ok bool) {
|
|
infos := strings.Split(path, pathSep)
|
|
l := len(infos)
|
|
if l < 6 {
|
|
ok = false
|
|
return
|
|
}
|
|
var err error
|
|
if collectionID, err = strconv.ParseInt(infos[l-5], 10, 64); err != nil {
|
|
return 0, 0, 0, 0, 0, false
|
|
}
|
|
if partitionID, err = strconv.ParseInt(infos[l-4], 10, 64); err != nil {
|
|
return 0, 0, 0, 0, 0, false
|
|
}
|
|
if segmentID, err = strconv.ParseInt(infos[l-3], 10, 64); err != nil {
|
|
return 0, 0, 0, 0, 0, false
|
|
}
|
|
if fieldID, err = strconv.ParseInt(infos[l-2], 10, 64); err != nil {
|
|
return 0, 0, 0, 0, 0, false
|
|
}
|
|
if logID, err = strconv.ParseInt(infos[l-1], 10, 64); err != nil {
|
|
return 0, 0, 0, 0, 0, false
|
|
}
|
|
ok = true
|
|
return
|
|
}
|
|
|
|
func GetSegmentIDFromInsertLogPath(logPath string) typeutil.UniqueID {
|
|
return getSegmentIDFromPath(logPath, 3)
|
|
}
|
|
|
|
func BuildStatsLogPath(rootPath string, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) string {
|
|
k := JoinIDPath(collectionID, partitionID, segmentID, fieldID, logID)
|
|
return path.Join(rootPath, common.SegmentStatslogPath, k)
|
|
}
|
|
|
|
func BuildBm25LogPath(rootPath string, collectionID, partitionID, segmentID, fieldID, logID typeutil.UniqueID) string {
|
|
k := JoinIDPath(collectionID, partitionID, segmentID, fieldID, logID)
|
|
return path.Join(rootPath, common.SegmentBm25LogPath, k)
|
|
}
|
|
|
|
func GetSegmentIDFromStatsLogPath(logPath string) typeutil.UniqueID {
|
|
return getSegmentIDFromPath(logPath, 3)
|
|
}
|
|
|
|
func BuildDeltaLogPath(rootPath string, collectionID, partitionID, segmentID, logID typeutil.UniqueID) string {
|
|
k := JoinIDPath(collectionID, partitionID, segmentID, logID)
|
|
return path.Join(rootPath, common.SegmentDeltaLogPath, k)
|
|
}
|
|
|
|
func GetSegmentIDFromDeltaLogPath(logPath string) typeutil.UniqueID {
|
|
return getSegmentIDFromPath(logPath, 2)
|
|
}
|
|
|
|
func getSegmentIDFromPath(logPath string, segmentIndex int) typeutil.UniqueID {
|
|
infos := strings.Split(logPath, pathSep)
|
|
l := len(infos)
|
|
if l < segmentIndex {
|
|
return 0
|
|
}
|
|
|
|
v, err := strconv.ParseInt(infos[l-segmentIndex], 10, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return v
|
|
}
|
|
|
|
// JoinIDPath joins ids to path format.
|
|
func JoinIDPath(ids ...typeutil.UniqueID) string {
|
|
idStr := make([]string, 0, len(ids))
|
|
for _, id := range ids {
|
|
idStr = append(idStr, strconv.FormatInt(id, 10))
|
|
}
|
|
return path.Join(idStr...)
|
|
}
|
|
|
|
// ExtractTextLogFilenames extracts only filenames from full paths to save space.
|
|
// It modifies the TextStatsLogs map in place, compressing full paths to filenames.
|
|
func ExtractTextLogFilenames(textStatsLogs map[int64]*datapb.TextIndexStats) {
|
|
for _, textStats := range textStatsLogs {
|
|
if textStats == nil {
|
|
continue
|
|
}
|
|
filenames := make([]string, 0, len(textStats.GetFiles()))
|
|
for _, fullPath := range textStats.GetFiles() {
|
|
idx := strings.LastIndex(fullPath, pathSep)
|
|
if idx < 0 {
|
|
filenames = append(filenames, fullPath)
|
|
} else {
|
|
filenames = append(filenames, fullPath[idx+1:])
|
|
}
|
|
}
|
|
textStats.Files = filenames
|
|
}
|
|
}
|
|
|
|
// BuildTextLogPaths reconstructs full paths from filenames for text index logs.
|
|
// This function is compatible with both old version (full paths) and new version (filenames only).
|
|
func BuildTextLogPaths(rootPath string, collectionID, partitionID, segmentID typeutil.UniqueID, textStatsLogs map[int64]*datapb.TextIndexStats) {
|
|
for _, textStats := range textStatsLogs {
|
|
if textStats == nil {
|
|
continue
|
|
}
|
|
prefix := path.Join(
|
|
rootPath,
|
|
common.TextIndexPath,
|
|
strconv.FormatInt(textStats.GetBuildID(), 10),
|
|
strconv.FormatInt(textStats.GetVersion(), 10),
|
|
strconv.FormatInt(collectionID, 10),
|
|
strconv.FormatInt(partitionID, 10),
|
|
strconv.FormatInt(segmentID, 10),
|
|
strconv.FormatInt(textStats.GetFieldID(), 10),
|
|
)
|
|
|
|
filenames := textStats.GetFiles()
|
|
fullPaths := make([]string, 0, len(filenames))
|
|
for _, filename := range filenames {
|
|
// Check if filename is already a full path (compatible with old version)
|
|
// If it contains the text_log path segment, treat it as a full path
|
|
if strings.Contains(filename, common.TextIndexPath+pathSep) {
|
|
fullPaths = append(fullPaths, filename)
|
|
} else {
|
|
// New version: filename only, need to join with prefix
|
|
fullPaths = append(fullPaths, path.Join(prefix, filename))
|
|
}
|
|
}
|
|
textStats.Files = fullPaths
|
|
}
|
|
}
|