mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: https://github.com/milvus-io/milvus/issues/46678 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant: Text index log keys are canonicalized at KV (serialization) boundaries — etcd stores compressed filename-only entries, while in-memory and runtime consumers must receive full object-storage keys so Datanode/QueryNode can load text indexes directly. - Logic removed/simplified: ad-hoc reconstruction of full text-log paths scattered across components (garbage_collector.getTextLogs, querynodev2.LoadTextIndex, compactor/index task code) was removed; consumers now use TextIndexStats.Files as-provided (full keys). Path compression/decompression was centralized into KV marshal/unmarshal utilities (metautil.ExtractTextLogFilenames in marshalSegmentInfo and metautil.BuildTextLogPaths in kv_catalog.listSegments), eliminating redundant, inconsistent prefix-rebuilding logic that broke during rolling upgrades. - Why this does NOT cause data loss or regressions: before persist, marshalSegmentInfo compresses TextStatsLogs.Files to filenames (metautil.ExtractTextLogFilenames) so stored KV remains compact; on load, kv_catalog.listSegments calls metautil.BuildTextLogPaths to restore full paths and includes compatibility logic that leaves already-full keys unchanged. Thus every persisted filename is recoverable to a valid full key and consumers receive correct full paths (see marshalSegmentInfo → KV write path and kv_catalog.listSegments → reload path), preventing dropped or malformed keys. - Bug fix (refs #46678): resolves text-log loading failures during cluster upgrades by centralizing path handling at KV encode/decode and removing per-component path reconstruction — the immediate fix is changing consumers to read TextIndexStats.Files directly and relying on marshal/unmarshal to perform compression/expansion, preventing mixed-format failures during rolling upgrades. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: sijie-ni-0214 <sijie.ni@zilliz.com>
198 lines
5.6 KiB
Go
198 lines
5.6 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package metautil
|
|
|
|
import (
|
|
"path"
|
|
"reflect"
|
|
"testing"
|
|
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
func TestParseInsertLogPath(t *testing.T) {
|
|
type args struct {
|
|
path string
|
|
}
|
|
tests := []struct {
|
|
name string
|
|
args args
|
|
wantCollectionID typeutil.UniqueID
|
|
wantPartitionID typeutil.UniqueID
|
|
wantSegmentID typeutil.UniqueID
|
|
wantFieldID typeutil.UniqueID
|
|
wantLogID typeutil.UniqueID
|
|
wantOk bool
|
|
}{
|
|
{
|
|
"test parse insert log path",
|
|
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/447985737531772787/102/447985737523710526"},
|
|
446266956600703270,
|
|
446266956600703326,
|
|
447985737531772787,
|
|
102,
|
|
447985737523710526,
|
|
true,
|
|
},
|
|
|
|
{
|
|
"test parse insert log path negative1",
|
|
args{path: "foobar"},
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
false,
|
|
},
|
|
|
|
{
|
|
"test parse insert log path negative2",
|
|
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/447985737531772787/102/foo"},
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
false,
|
|
},
|
|
|
|
{
|
|
"test parse insert log path negative3",
|
|
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/447985737531772787/foo/447985737523710526"},
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
false,
|
|
},
|
|
|
|
{
|
|
"test parse insert log path negative4",
|
|
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/446266956600703326/foo/102/447985737523710526"},
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
false,
|
|
},
|
|
|
|
{
|
|
"test parse insert log path negative5",
|
|
args{path: "8a8c3ac2298b12f/insert_log/446266956600703270/foo/447985737531772787/102/447985737523710526"},
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
false,
|
|
},
|
|
|
|
{
|
|
"test parse insert log path negative6",
|
|
args{path: "8a8c3ac2298b12f/insert_log/foo/446266956600703326/447985737531772787/102/447985737523710526"},
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
0,
|
|
false,
|
|
},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
gotCollectionID, gotPartitionID, gotSegmentID, gotFieldID, gotLogID, gotOk := ParseInsertLogPath(tt.args.path)
|
|
if !reflect.DeepEqual(gotCollectionID, tt.wantCollectionID) {
|
|
t.Errorf("ParseInsertLogPath() gotCollectionID = %v, want %v", gotCollectionID, tt.wantCollectionID)
|
|
}
|
|
if !reflect.DeepEqual(gotPartitionID, tt.wantPartitionID) {
|
|
t.Errorf("ParseInsertLogPath() gotPartitionID = %v, want %v", gotPartitionID, tt.wantPartitionID)
|
|
}
|
|
if !reflect.DeepEqual(gotSegmentID, tt.wantSegmentID) {
|
|
t.Errorf("ParseInsertLogPath() gotSegmentID = %v, want %v", gotSegmentID, tt.wantSegmentID)
|
|
}
|
|
if !reflect.DeepEqual(gotFieldID, tt.wantFieldID) {
|
|
t.Errorf("ParseInsertLogPath() gotFieldID = %v, want %v", gotFieldID, tt.wantFieldID)
|
|
}
|
|
if !reflect.DeepEqual(gotLogID, tt.wantLogID) {
|
|
t.Errorf("ParseInsertLogPath() gotLogID = %v, want %v", gotLogID, tt.wantLogID)
|
|
}
|
|
if gotOk != tt.wantOk {
|
|
t.Errorf("ParseInsertLogPath() gotOk = %v, want %v", gotOk, tt.wantOk)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestExtractTextLogFilenames(t *testing.T) {
|
|
textStatsLogs := map[int64]*datapb.TextIndexStats{
|
|
100: {
|
|
FieldID: 100,
|
|
Files: []string{
|
|
"/root/text_log/1/2/10/20/30/100/file1.txt",
|
|
"/root/text_log/1/2/10/20/30/100/file2.txt",
|
|
},
|
|
},
|
|
}
|
|
|
|
ExtractTextLogFilenames(textStatsLogs)
|
|
|
|
wantFiles := []string{"file1.txt", "file2.txt"}
|
|
if !reflect.DeepEqual(textStatsLogs[100].Files, wantFiles) {
|
|
t.Errorf("ExtractTextLogFilenames() Files = %v, want %v", textStatsLogs[100].Files, wantFiles)
|
|
}
|
|
}
|
|
|
|
func TestBuildTextLogPaths(t *testing.T) {
|
|
rootPath := "/root"
|
|
collectionID := typeutil.UniqueID(10)
|
|
partitionID := typeutil.UniqueID(20)
|
|
segmentID := typeutil.UniqueID(30)
|
|
|
|
// Test building paths from filenames (new version)
|
|
textStatsLogs := map[int64]*datapb.TextIndexStats{
|
|
100: {
|
|
FieldID: 100,
|
|
BuildID: 1,
|
|
Version: 2,
|
|
Files: []string{"file1.txt", "file2.txt"},
|
|
},
|
|
}
|
|
|
|
BuildTextLogPaths(rootPath, collectionID, partitionID, segmentID, textStatsLogs)
|
|
|
|
wantFiles := []string{
|
|
path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file1.txt"),
|
|
path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file2.txt"),
|
|
}
|
|
if !reflect.DeepEqual(textStatsLogs[100].Files, wantFiles) {
|
|
t.Errorf("BuildTextLogPaths() Files = %v, want %v", textStatsLogs[100].Files, wantFiles)
|
|
}
|
|
|
|
// Test old version compatibility (already full paths)
|
|
fullPath := path.Join(rootPath, common.TextIndexPath, "1", "2", "10", "20", "30", "100", "file3.txt")
|
|
textStatsLogs[100].Files = []string{fullPath}
|
|
BuildTextLogPaths(rootPath, collectionID, partitionID, segmentID, textStatsLogs)
|
|
if textStatsLogs[100].Files[0] != fullPath {
|
|
t.Errorf("BuildTextLogPaths() should keep full path unchanged, got %v", textStatsLogs[100].Files[0])
|
|
}
|
|
}
|