milvus/tests/go_client/base/milvus_client.go
wei liu 975c91df16
feat: Add comprehensive snapshot functionality for collections (#44361)
issue: #44358

Implement complete snapshot management system including creation,
deletion, listing, description, and restoration capabilities across all
system components.

Key features:
- Create snapshots for entire collections
- Drop snapshots by name with proper cleanup
- List snapshots with collection filtering
- Describe snapshot details and metadata

Components added/modified:
- Client SDK with full snapshot API support and options
- DataCoord snapshot service with metadata management
- Proxy layer with task-based snapshot operations
- Protocol buffer definitions for snapshot RPCs
- Comprehensive unit tests with mockey framework
- Integration tests for end-to-end validation

Technical implementation:
- Snapshot metadata storage in etcd with proper indexing
- File-based snapshot data persistence in object storage
- Garbage collection integration for snapshot cleanup
- Error handling and validation across all operations
- Thread-safe operations with proper locking mechanisms

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant/assumption: snapshots are immutable point‑in‑time
captures identified by (collection, snapshot name/ID); etcd snapshot
metadata is authoritative for lifecycle (PENDING → COMMITTED → DELETING)
and per‑segment manifests live in object storage (Avro / StorageV2). GC
and restore logic must see snapshotRefIndex loaded
(snapshotMeta.IsRefIndexLoaded) before reclaiming or relying on
segment/index files.

- New capability added: full end‑to‑end snapshot subsystem — client SDK
APIs (Create/Drop/List/Describe/Restore + restore job queries),
DataCoord SnapshotWriter/Reader (Avro + StorageV2 manifests),
snapshotMeta in meta, SnapshotManager orchestration
(create/drop/describe/list/restore), copy‑segment restore
tasks/inspector/checker, proxy & RPC surface, GC integration, and
docs/tests — enabling point‑in‑time collection snapshots persisted to
object storage and restorations orchestrated across components.

- Logic removed/simplified and why: duplicated recursive
compaction/delta‑log traversal and ad‑hoc lookup code were consolidated
behind two focused APIs/owners (Handler.GetDeltaLogFromCompactTo for
delta traversal and SnapshotManager/SnapshotReader for snapshot I/O).
MixCoord/coordinator broker paths were converted to thin RPC proxies.
This eliminates multiple implementations of the same traversal/lookup,
reducing divergence and simplifying responsibility boundaries.

- Why this does NOT introduce data loss or regressions: snapshot
create/drop use explicit two‑phase semantics (PENDING → COMMIT/DELETING)
with SnapshotWriter writing manifests and metadata before commit; GC
uses snapshotRefIndex guards and
IsRefIndexLoaded/GetSnapshotBySegment/GetSnapshotByIndex checks to avoid
removing referenced files; restore flow pre‑allocates job IDs, validates
resources (partitions/indexes), performs rollback on failure
(rollbackRestoreSnapshot), and converts/updates segment/index metadata
only after successful copy tasks. Extensive unit and integration tests
exercise pending/deleting/GC/restore/error paths to ensure idempotence
and protection against premature deletion.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
2026-01-06 10:15:24 +08:00

137 lines
5.2 KiB
Go

package base
import (
"context"
"encoding/json"
"strings"
"time"
"go.uber.org/zap"
"google.golang.org/grpc"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/client/v2/entity"
client "github.com/milvus-io/milvus/client/v2/milvusclient"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
func LoggingUnaryInterceptor() grpc.UnaryClientInterceptor {
// Limit debug logging for these methods
ratedLogMethods := typeutil.NewSet("GetFlushState", "GetLoadingProgress", "DescribeIndex")
logWithRateLimit := func(methodShortName string, logFunc func(msg string, fields ...zap.Field),
logRateFunc func(cost float64, msg string, fields ...zap.Field) bool,
msg string, fields ...zap.Field,
) {
if ratedLogMethods.Contain(methodShortName) {
logRateFunc(10, msg, fields...)
} else {
logFunc(msg, fields...)
}
}
return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
const maxLogLength = 300
_method := strings.Split(method, "/")
_methodShortName := _method[len(_method)-1]
// Marshal request
marshalWithFallback := func(v interface{}, fallbackMsg string) string {
dataJSON, err := json.Marshal(v)
if err != nil {
log.Error("Failed to marshal", zap.Error(err))
return fallbackMsg
}
dataStr := string(dataJSON)
if len(dataStr) > maxLogLength {
return dataStr[:maxLogLength] + "......"
}
return dataStr
}
reqStr := marshalWithFallback(req, "could not marshal request")
logWithRateLimit(_methodShortName, log.Info, log.RatedInfo, "Request", zap.String("method", _methodShortName), zap.String("reqs", reqStr))
// Invoke the actual method
start := time.Now()
errResp := invoker(ctx, method, req, reply, cc, opts...)
cost := time.Since(start)
// Marshal response
respStr := marshalWithFallback(reply, "could not marshal response")
logWithRateLimit(_methodShortName, log.Info, log.RatedInfo, "Response", zap.String("method", _methodShortName), zap.String("resp", respStr))
logWithRateLimit(_methodShortName, log.Debug, log.RatedDebug, "Cost", zap.String("method", _methodShortName), zap.Duration("cost", cost))
return errResp
}
}
type MilvusClient struct {
*client.Client
}
func NewMilvusClient(ctx context.Context, cfg *client.ClientConfig) (*MilvusClient, error) {
cfg.DialOptions = append(cfg.DialOptions, grpc.WithUnaryInterceptor(LoggingUnaryInterceptor()))
mClient, err := client.New(ctx, cfg)
return &MilvusClient{
Client: mClient,
}, err
}
func (mc *MilvusClient) Close(ctx context.Context) error {
err := mc.Client.Close(ctx)
return err
}
func (mc *MilvusClient) Compact(ctx context.Context, option client.CompactOption, callOptions ...grpc.CallOption) (int64, error) {
compactID, err := mc.Client.Compact(ctx, option, callOptions...)
return compactID, err
}
func (mc *MilvusClient) GetCompactionState(ctx context.Context, option client.GetCompactionStateOption, callOptions ...grpc.CallOption) (entity.CompactionState, error) {
state, err := mc.Client.GetCompactionState(ctx, option, callOptions...)
return state, err
}
// -- snapshot --
// CreateSnapshot creates a snapshot for the specified collection
func (mc *MilvusClient) CreateSnapshot(ctx context.Context, option client.CreateSnapshotOption, callOptions ...grpc.CallOption) error {
err := mc.Client.CreateSnapshot(ctx, option, callOptions...)
return err
}
// DropSnapshot drops a snapshot by name
func (mc *MilvusClient) DropSnapshot(ctx context.Context, option client.DropSnapshotOption, callOptions ...grpc.CallOption) error {
err := mc.Client.DropSnapshot(ctx, option, callOptions...)
return err
}
// ListSnapshots lists all snapshots for the specified collection or all snapshots if no collection is specified
func (mc *MilvusClient) ListSnapshots(ctx context.Context, option client.ListSnapshotsOption, callOptions ...grpc.CallOption) ([]string, error) {
snapshots, err := mc.Client.ListSnapshots(ctx, option, callOptions...)
return snapshots, err
}
// DescribeSnapshot describes a snapshot by name
func (mc *MilvusClient) DescribeSnapshot(ctx context.Context, option client.DescribeSnapshotOption, callOptions ...grpc.CallOption) (*milvuspb.DescribeSnapshotResponse, error) {
resp, err := mc.Client.DescribeSnapshot(ctx, option, callOptions...)
return resp, err
}
// RestoreSnapshot restores a snapshot to a target collection
func (mc *MilvusClient) RestoreSnapshot(ctx context.Context, option client.RestoreSnapshotOption, callOptions ...grpc.CallOption) (int64, error) {
return mc.Client.RestoreSnapshot(ctx, option, callOptions...)
}
// GetRestoreSnapshotState gets the state of a restore snapshot job
func (mc *MilvusClient) GetRestoreSnapshotState(ctx context.Context, option client.GetRestoreSnapshotStateOption, callOptions ...grpc.CallOption) (*milvuspb.RestoreSnapshotInfo, error) {
return mc.Client.GetRestoreSnapshotState(ctx, option, callOptions...)
}
// ListRestoreSnapshotJobs lists all restore snapshot jobs
func (mc *MilvusClient) ListRestoreSnapshotJobs(ctx context.Context, option client.ListRestoreSnapshotJobsOption, callOptions ...grpc.CallOption) ([]*milvuspb.RestoreSnapshotInfo, error) {
return mc.Client.ListRestoreSnapshotJobs(ctx, option, callOptions...)
}