mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 01:28:27 +08:00
issue: #42884 Fixes an issue where delete records for a segment are lost from the delete buffer if `load segment` execution on the delegator is too slow, causing `syncTargetVersion` or other cleanup operations to clear them prematurely. Changes include: - Introduced `Pin` and `Unpin` methods in `DeleteBuffer` interface and its implementations (`doubleCacheBuffer`, `listDeleteBuffer`). - Added a `pinnedTimestamps` map to track timestamps protected from cleanup by specific segments. - Modified `LoadSegments` in `shardDelegator` to `Pin` relevant segment delete records before loading and `Unpin` them afterwards. - Added `isPinned` check in `UnRegister` and `TryDiscard` methods of `listDeleteBuffer` to skip cleanup if corresponding timestamps are pinned. - Added comprehensive unit tests for `Pin`, `Unpin`, and `isPinned` functionality, covering basic, multiple pins, concurrent, and edge cases. This ensures the integrity of delete records by preventing their premature removal from the delete buffer during segment loading. Signed-off-by: Wei Liu <wei.liu@zilliz.com>
304 lines
7.5 KiB
Go
304 lines
7.5 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package deletebuffer
|
|
|
|
import (
|
|
"context"
|
|
"sort"
|
|
"sync"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/internal/querynodev2/segments"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/tsoutil"
|
|
)
|
|
|
|
var errBufferFull = errors.New("buffer full")
|
|
|
|
type timed interface {
|
|
Timestamp() uint64
|
|
Size() int64
|
|
EntryNum() int64
|
|
}
|
|
|
|
// DeleteBuffer is the interface for delete buffer.
|
|
type DeleteBuffer[T timed] interface {
|
|
Put(T)
|
|
ListAfter(uint64) []T
|
|
SafeTs() uint64
|
|
TryDiscard(uint64)
|
|
// Size returns current size information of delete buffer: entryNum and memory
|
|
Size() (entryNum, memorySize int64)
|
|
|
|
// Register L0 segment
|
|
RegisterL0(segments ...segments.Segment)
|
|
// ListAll L0
|
|
ListL0() []segments.Segment
|
|
// Clean delete data, include l0 segment and delete buffer
|
|
UnRegister(ts uint64)
|
|
|
|
// clean up delete buffer
|
|
Clear()
|
|
|
|
// Pin/Unpin methods for protecting specific timestamps from cleanup
|
|
Pin(ts uint64, segmentID int64)
|
|
Unpin(ts uint64, segmentID int64)
|
|
}
|
|
|
|
func NewDoubleCacheDeleteBuffer[T timed](startTs uint64, maxSize int64) DeleteBuffer[T] {
|
|
return &doubleCacheBuffer[T]{
|
|
head: newCacheBlock[T](startTs, maxSize),
|
|
maxSize: maxSize,
|
|
ts: startTs,
|
|
l0Segments: make([]segments.Segment, 0),
|
|
pinnedTimestamps: make(map[uint64]map[int64]struct{}),
|
|
}
|
|
}
|
|
|
|
// doubleCacheBuffer implements DeleteBuffer with fixed sized double cache.
|
|
type doubleCacheBuffer[T timed] struct {
|
|
mut sync.RWMutex
|
|
head, tail *cacheBlock[T]
|
|
maxSize int64
|
|
ts uint64
|
|
|
|
// maintain l0 segment list
|
|
l0Segments []segments.Segment
|
|
|
|
// track pinned timestamps to prevent cleanup
|
|
// map[timestamp]map[segmentID]struct{} - tracks which segments pin which timestamps
|
|
pinnedTimestamps map[uint64]map[int64]struct{}
|
|
}
|
|
|
|
func (c *doubleCacheBuffer[T]) RegisterL0(segmentList ...segments.Segment) {
|
|
c.mut.Lock()
|
|
defer c.mut.Unlock()
|
|
|
|
// Filter out nil segments
|
|
for _, seg := range segmentList {
|
|
if seg != nil {
|
|
c.l0Segments = append(c.l0Segments, seg)
|
|
log.Info("register l0 from delete buffer",
|
|
zap.Int64("segmentID", seg.ID()),
|
|
zap.Time("startPosition", tsoutil.PhysicalTime(seg.StartPosition().GetTimestamp())),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *doubleCacheBuffer[T]) ListL0() []segments.Segment {
|
|
c.mut.RLock()
|
|
defer c.mut.RUnlock()
|
|
return c.l0Segments
|
|
}
|
|
|
|
func (c *doubleCacheBuffer[T]) UnRegister(ts uint64) {
|
|
c.mut.Lock()
|
|
defer c.mut.Unlock()
|
|
var newSegments []segments.Segment
|
|
|
|
for _, s := range c.l0Segments {
|
|
if s.StartPosition().GetTimestamp() < ts {
|
|
s.Release(context.TODO())
|
|
log.Info("unregister l0 from delete buffer",
|
|
zap.Int64("segmentID", s.ID()),
|
|
zap.Time("startPosition", tsoutil.PhysicalTime(s.StartPosition().GetTimestamp())),
|
|
zap.Time("cleanTs", tsoutil.PhysicalTime(ts)),
|
|
)
|
|
continue
|
|
}
|
|
newSegments = append(newSegments, s)
|
|
}
|
|
c.l0Segments = newSegments
|
|
}
|
|
|
|
func (c *doubleCacheBuffer[T]) Clear() {
|
|
c.mut.Lock()
|
|
defer c.mut.Unlock()
|
|
|
|
for _, s := range c.l0Segments {
|
|
s.Release(context.TODO())
|
|
}
|
|
c.l0Segments = nil
|
|
// reset cache block
|
|
c.tail = c.head
|
|
c.head = newCacheBlock[T](c.ts, c.maxSize)
|
|
}
|
|
|
|
func (c *doubleCacheBuffer[T]) SafeTs() uint64 {
|
|
return c.ts
|
|
}
|
|
|
|
func (c *doubleCacheBuffer[T]) TryDiscard(_ uint64) {
|
|
}
|
|
|
|
// Put implements DeleteBuffer.
|
|
func (c *doubleCacheBuffer[T]) Put(entry T) {
|
|
c.mut.Lock()
|
|
defer c.mut.Unlock()
|
|
|
|
err := c.head.Put(entry)
|
|
if errors.Is(err, errBufferFull) {
|
|
c.evict(entry.Timestamp(), entry)
|
|
}
|
|
}
|
|
|
|
// ListAfter implements DeleteBuffer.
|
|
func (c *doubleCacheBuffer[T]) ListAfter(ts uint64) []T {
|
|
c.mut.RLock()
|
|
defer c.mut.RUnlock()
|
|
var result []T
|
|
if c.tail != nil {
|
|
result = append(result, c.tail.ListAfter(ts)...)
|
|
}
|
|
if c.head != nil {
|
|
result = append(result, c.head.ListAfter(ts)...)
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (c *doubleCacheBuffer[T]) Size() (entryNum int64, memorySize int64) {
|
|
c.mut.RLock()
|
|
defer c.mut.RUnlock()
|
|
|
|
if c.head != nil {
|
|
blockNum, blockSize := c.head.Size()
|
|
entryNum += blockNum
|
|
memorySize += blockSize
|
|
}
|
|
|
|
if c.tail != nil {
|
|
blockNum, blockSize := c.tail.Size()
|
|
entryNum += blockNum
|
|
memorySize += blockSize
|
|
}
|
|
|
|
return entryNum, memorySize
|
|
}
|
|
|
|
// evict sets head as tail and evicts tail.
|
|
func (c *doubleCacheBuffer[T]) evict(newTs uint64, entry T) {
|
|
c.tail = c.head
|
|
c.head = &cacheBlock[T]{
|
|
headTs: newTs,
|
|
maxSize: c.maxSize / 2,
|
|
size: entry.Size(),
|
|
entryNum: entry.EntryNum(),
|
|
data: []T{entry},
|
|
}
|
|
c.ts = c.tail.headTs
|
|
}
|
|
|
|
func newCacheBlock[T timed](ts uint64, maxSize int64, elements ...T) *cacheBlock[T] {
|
|
var entryNum, memorySize int64
|
|
for _, element := range elements {
|
|
entryNum += element.EntryNum()
|
|
memorySize += element.Size()
|
|
}
|
|
return &cacheBlock[T]{
|
|
headTs: ts,
|
|
maxSize: maxSize,
|
|
data: elements,
|
|
entryNum: entryNum,
|
|
size: memorySize,
|
|
}
|
|
}
|
|
|
|
type cacheBlock[T timed] struct {
|
|
mut sync.RWMutex
|
|
headTs uint64
|
|
entryNum int64
|
|
size int64
|
|
maxSize int64
|
|
|
|
data []T
|
|
}
|
|
|
|
// Cache adds entry into cache item.
|
|
// returns error if item is full
|
|
func (c *cacheBlock[T]) Put(entry T) error {
|
|
c.mut.Lock()
|
|
defer c.mut.Unlock()
|
|
|
|
if c.size+entry.Size() > c.maxSize {
|
|
return errBufferFull
|
|
}
|
|
|
|
c.data = append(c.data, entry)
|
|
c.size += entry.Size()
|
|
c.entryNum += entry.EntryNum()
|
|
return nil
|
|
}
|
|
|
|
// ListAfter returns entries of which ts after provided value.
|
|
func (c *cacheBlock[T]) ListAfter(ts uint64) []T {
|
|
c.mut.RLock()
|
|
defer c.mut.RUnlock()
|
|
idx := sort.Search(len(c.data), func(idx int) bool {
|
|
return c.data[idx].Timestamp() >= ts
|
|
})
|
|
// not found
|
|
if idx == len(c.data) {
|
|
return nil
|
|
}
|
|
return c.data[idx:]
|
|
}
|
|
|
|
func (c *cacheBlock[T]) Size() (entryNum, memorySize int64) {
|
|
return c.entryNum, c.size
|
|
}
|
|
|
|
// Pin protects a specific timestamp from being cleaned up by a specific segment
|
|
func (c *doubleCacheBuffer[T]) Pin(ts uint64, segmentID int64) {
|
|
c.mut.Lock()
|
|
defer c.mut.Unlock()
|
|
|
|
if c.pinnedTimestamps[ts] == nil {
|
|
c.pinnedTimestamps[ts] = make(map[int64]struct{})
|
|
}
|
|
c.pinnedTimestamps[ts][segmentID] = struct{}{}
|
|
|
|
log.Info("pin timestamp for segment",
|
|
zap.Uint64("timestamp", ts),
|
|
zap.Int64("segmentID", segmentID),
|
|
zap.Time("physicalTime", tsoutil.PhysicalTime(ts)),
|
|
)
|
|
}
|
|
|
|
// Unpin removes protection for a specific timestamp by a specific segment
|
|
func (c *doubleCacheBuffer[T]) Unpin(ts uint64, segmentID int64) {
|
|
c.mut.Lock()
|
|
defer c.mut.Unlock()
|
|
|
|
if segmentMap, exists := c.pinnedTimestamps[ts]; exists {
|
|
delete(segmentMap, segmentID)
|
|
if len(segmentMap) == 0 {
|
|
delete(c.pinnedTimestamps, ts)
|
|
}
|
|
}
|
|
|
|
log.Info("unpin timestamp for segment",
|
|
zap.Uint64("timestamp", ts),
|
|
zap.Int64("segmentID", segmentID),
|
|
zap.Time("physicalTime", tsoutil.PhysicalTime(ts)),
|
|
)
|
|
// Note: doubleCacheBuffer doesn't implement cleanup logic in TryDiscard,
|
|
// so no cleanup is triggered here
|
|
}
|