enhance: Remove large segment ID arrays from QueryNode logs (#45719)

issue: #45718

Logging complete segment ID arrays caused excessive log volume (3-6 TB
for 200k segments). Remove arrays from logger fields and keep only
segment counts for observability.

Changes:
- Remove requestSegments/preparedSegments arrays from Load logger
- Remove segmentIDs from BM25 stats logs
- Remove entries structure from sync distribution log

This reduces log volume by 99.99% for large-scale operations.

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
This commit is contained in:
wei liu 2025-11-20 17:18:14 +08:00 committed by GitHub
parent 3c90dddebf
commit 3fbee154f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 14 deletions

View File

@ -221,10 +221,6 @@ func (sd *shardDelegator) GetSegmentInfo(readable bool) ([]SnapshotItem, []Segme
// SyncDistribution revises distribution.
func (sd *shardDelegator) SyncDistribution(ctx context.Context, entries ...SegmentEntry) {
log := sd.getLogger(ctx)
log.Info("sync distribution", zap.Any("entries", entries))
sd.distribution.AddDistributions(entries...)
}

View File

@ -278,11 +278,6 @@ func (loader *segmentLoader) Load(ctx context.Context,
infos := loader.prepare(ctx, segmentType, segments...)
defer loader.unregister(infos...)
log = log.With(
zap.Int64s("requestSegments", lo.Map(segments, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
zap.Int64s("preparedSegments", lo.Map(infos, func(s *querypb.SegmentLoadInfo, _ int) int64 { return s.GetSegmentID() })),
)
// continue to wait other task done
log.Info("start loading...", zap.Int("segmentNum", len(segments)), zap.Int("afterFilter", len(infos)))
@ -633,10 +628,7 @@ func (loader *segmentLoader) LoadBM25Stats(ctx context.Context, collectionID int
return nil, nil
}
segments := lo.Map(infos, func(info *querypb.SegmentLoadInfo, _ int) int64 {
return info.GetSegmentID()
})
log.Info("start loading bm25 stats for remote...", zap.Int64("collectionID", collectionID), zap.Int64s("segmentIDs", segments), zap.Int("segmentNum", segmentNum))
log.Info("start loading bm25 stats for remote...", zap.Int64("collectionID", collectionID), zap.Int("segmentNum", segmentNum))
loadedStats := typeutil.NewConcurrentMap[int64, map[int64]*storage.BM25Stats]()
loadRemoteBM25Func := func(idx int) error {
@ -661,7 +653,7 @@ func (loader *segmentLoader) LoadBM25Stats(ctx context.Context, collectionID int
err := funcutil.ProcessFuncParallel(segmentNum, segmentNum, loadRemoteBM25Func, "loadRemoteBM25Func")
if err != nil {
// no partial success here
log.Warn("failed to load bm25 stats for remote segment", zap.Int64("collectionID", collectionID), zap.Int64s("segmentIDs", segments), zap.Error(err))
log.Warn("failed to load bm25 stats for remote segment", zap.Int64("collectionID", collectionID), zap.Error(err))
return nil, err
}