milvus/internal/querynodev2/segments/segment_interface.go
congqixia 21ed1fabfd
feat: support reopen segment for data/schema changes (#46359)
issue: #46358

This PR implements segment reopening functionality on query nodes,
enabling the application of data or schema changes to already-loaded
segments without requiring a full reload.

### Core (C++)

**New SegmentLoadInfo class**
(`internal/core/src/segcore/SegmentLoadInfo.h/cpp`):
- Encapsulates segment load configuration with structured access
- Implements `ComputeDiff()` to calculate differences between old and
new load states
- Tracks indexes, binlogs, and column groups that need to be loaded or
dropped
- Provides `ConvertFieldIndexInfoToLoadIndexInfo()` for index loading

**ChunkedSegmentSealedImpl modifications**:
- Added `Reopen(const SegmentLoadInfo&)` method to apply incremental
changes based on computed diff
- Refactored `LoadColumnGroups()` and `LoadColumnGroup()` to support
selective loading via field ID map
- Extracted `LoadBatchIndexes()` and `LoadBatchFieldData()` for reusable
batch loading logic
- Added `LoadManifest()` for manifest-based loading path
- Updated all methods to use `SegmentLoadInfo` wrapper instead of direct
proto access

**SegmentGrowingImpl modifications**:
- Added `Reopen()` stub method for interface compliance

**C API additions** (`segment_c.h/cpp`):
- Added `ReopenSegment()` function exposing reopen to Go layer

### Go Side

**QueryNode handlers** (`internal/querynodev2/`):
- Added `HandleReopen()` in handlers.go
- Added `ReopenSegments()` RPC in services.go

**Segment interface** (`internal/querynodev2/segments/`):
- Extended `Segment` interface with `Reopen()` method
- Implemented `Reopen()` in LocalSegment
- Added `Reopen()` to segment loader

**Segcore wrapper** (`internal/util/segcore/`):
- Added `Reopen()` method in segment.go
- Added `ReopenSegmentRequest` in requests.go

### Proto

- Added new fields to support reopen in `query_coord.proto`

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
2025-12-17 15:49:16 +08:00

121 lines
4.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segments
import (
"context"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
pkoracle "github.com/milvus-io/milvus/internal/querynodev2/pkoracle"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/segcore"
"github.com/milvus-io/milvus/pkg/v2/proto/datapb"
"github.com/milvus-io/milvus/pkg/v2/proto/querypb"
"github.com/milvus-io/milvus/pkg/v2/proto/segcorepb"
"github.com/milvus-io/milvus/pkg/v2/util/metautil"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
// ResourceUsage is used to estimate the resource usage of a sealed segment.
type ResourceUsage struct {
MemorySize uint64
DiskSize uint64
MmapFieldCount int
FieldGpuMemorySize []uint64
}
// Segment is the interface of a segment implementation.
// Some methods can not apply to all segment typessuch as LoadInfo, ResourceUsageEstimate.
// Add more interface to represent different segment types is a better implementation.
type Segment interface {
// ResourceUsageEstimate() ResourceUsage
// Properties
ID() int64
DatabaseName() string
ResourceGroup() string
Collection() int64
Partition() int64
Shard() metautil.Channel
Version() int64
CASVersion(int64, int64) bool
StartPosition() *msgpb.MsgPosition
Type() SegmentType
Level() datapb.SegmentLevel
IsSorted() bool
LoadInfo() *querypb.SegmentLoadInfo
// PinIfNotReleased the segment to prevent it from being released
PinIfNotReleased() error
// Unpin the segment to allow it to be released
Unpin()
// Stats related
// InsertCount returns the number of inserted rows, not effected by deletion
InsertCount() int64
// RowNum returns the number of rows, it's slow, so DO NOT call it in a loop
RowNum() int64
MemSize() int64
// ResourceUsageEstimate returns the estimated resource usage of the segment
ResourceUsageEstimate() ResourceUsage
// Index related
GetIndexByID(indexID int64) *IndexedFieldInfo
GetIndex(fieldID int64) []*IndexedFieldInfo
ExistIndex(fieldID int64) bool
Indexes() []*IndexedFieldInfo
HasRawData(fieldID int64) bool
DropIndex(ctx context.Context, indexID int64) error
// Modification related
Insert(ctx context.Context, rowIDs []int64, timestamps []typeutil.Timestamp, record *segcorepb.InsertRecord) error
Delete(ctx context.Context, primaryKeys storage.PrimaryKeys, timestamps []typeutil.Timestamp) error
LoadDeltaData(ctx context.Context, deltaData *storage.DeltaData) error
LastDeltaTimestamp() uint64
Load(ctx context.Context) error
FinishLoad() error
Release(ctx context.Context, opts ...releaseOption)
Reopen(ctx context.Context, newLoadInfo *querypb.SegmentLoadInfo) error
// Bloom filter related
SetBloomFilter(bf *pkoracle.BloomFilterSet)
BloomFilterExist() bool
UpdateBloomFilter(pks []storage.PrimaryKey)
MayPkExist(lc *storage.LocationsCache) bool
BatchPkExist(lc *storage.BatchLocationsCache) []bool
// Get min/max
GetMinPk() *storage.PrimaryKey
GetMaxPk() *storage.PrimaryKey
// BM25 stats
UpdateBM25Stats(stats map[int64]*storage.BM25Stats)
GetBM25Stats() map[int64]*storage.BM25Stats
// Read operations
Search(ctx context.Context, searchReq *segcore.SearchRequest) (*segcore.SearchResult, error)
Retrieve(ctx context.Context, plan *segcore.RetrievePlan) (*segcorepb.RetrieveResults, error)
RetrieveByOffsets(ctx context.Context, plan *segcore.RetrievePlanWithOffsets) (*segcorepb.RetrieveResults, error)
IsLazyLoad() bool
ResetIndexesLazyLoad(lazyState bool)
// lazy load related
NeedUpdatedVersion() int64
RemoveUnusedFieldFiles() error
GetFieldJSONIndexStats() map[int64]*querypb.JsonStatsInfo
}