mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: #44358 Implement complete snapshot management system including creation, deletion, listing, description, and restoration capabilities across all system components. Key features: - Create snapshots for entire collections - Drop snapshots by name with proper cleanup - List snapshots with collection filtering - Describe snapshot details and metadata Components added/modified: - Client SDK with full snapshot API support and options - DataCoord snapshot service with metadata management - Proxy layer with task-based snapshot operations - Protocol buffer definitions for snapshot RPCs - Comprehensive unit tests with mockey framework - Integration tests for end-to-end validation Technical implementation: - Snapshot metadata storage in etcd with proper indexing - File-based snapshot data persistence in object storage - Garbage collection integration for snapshot cleanup - Error handling and validation across all operations - Thread-safe operations with proper locking mechanisms <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant/assumption: snapshots are immutable point‑in‑time captures identified by (collection, snapshot name/ID); etcd snapshot metadata is authoritative for lifecycle (PENDING → COMMITTED → DELETING) and per‑segment manifests live in object storage (Avro / StorageV2). GC and restore logic must see snapshotRefIndex loaded (snapshotMeta.IsRefIndexLoaded) before reclaiming or relying on segment/index files. - New capability added: full end‑to‑end snapshot subsystem — client SDK APIs (Create/Drop/List/Describe/Restore + restore job queries), DataCoord SnapshotWriter/Reader (Avro + StorageV2 manifests), snapshotMeta in meta, SnapshotManager orchestration (create/drop/describe/list/restore), copy‑segment restore tasks/inspector/checker, proxy & RPC surface, GC integration, and docs/tests — enabling point‑in‑time collection snapshots persisted to object storage and restorations orchestrated across components. - Logic removed/simplified and why: duplicated recursive compaction/delta‑log traversal and ad‑hoc lookup code were consolidated behind two focused APIs/owners (Handler.GetDeltaLogFromCompactTo for delta traversal and SnapshotManager/SnapshotReader for snapshot I/O). MixCoord/coordinator broker paths were converted to thin RPC proxies. This eliminates multiple implementations of the same traversal/lookup, reducing divergence and simplifying responsibility boundaries. - Why this does NOT introduce data loss or regressions: snapshot create/drop use explicit two‑phase semantics (PENDING → COMMIT/DELETING) with SnapshotWriter writing manifests and metadata before commit; GC uses snapshotRefIndex guards and IsRefIndexLoaded/GetSnapshotBySegment/GetSnapshotByIndex checks to avoid removing referenced files; restore flow pre‑allocates job IDs, validates resources (partitions/indexes), performs rollback on failure (rollbackRestoreSnapshot), and converts/updates segment/index metadata only after successful copy tasks. Extensive unit and integration tests exercise pending/deleting/GC/restore/error paths to ensure idempotence and protection against premature deletion. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Wei Liu <wei.liu@zilliz.com>
137 lines
5.2 KiB
Go
137 lines
5.2 KiB
Go
package base
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"strings"
|
|
"time"
|
|
|
|
"go.uber.org/zap"
|
|
"google.golang.org/grpc"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
client "github.com/milvus-io/milvus/client/v2/milvusclient"
|
|
"github.com/milvus-io/milvus/pkg/v2/log"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
func LoggingUnaryInterceptor() grpc.UnaryClientInterceptor {
|
|
// Limit debug logging for these methods
|
|
ratedLogMethods := typeutil.NewSet("GetFlushState", "GetLoadingProgress", "DescribeIndex")
|
|
|
|
logWithRateLimit := func(methodShortName string, logFunc func(msg string, fields ...zap.Field),
|
|
logRateFunc func(cost float64, msg string, fields ...zap.Field) bool,
|
|
msg string, fields ...zap.Field,
|
|
) {
|
|
if ratedLogMethods.Contain(methodShortName) {
|
|
logRateFunc(10, msg, fields...)
|
|
} else {
|
|
logFunc(msg, fields...)
|
|
}
|
|
}
|
|
|
|
return func(ctx context.Context, method string, req, reply interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
|
|
const maxLogLength = 300
|
|
_method := strings.Split(method, "/")
|
|
_methodShortName := _method[len(_method)-1]
|
|
|
|
// Marshal request
|
|
marshalWithFallback := func(v interface{}, fallbackMsg string) string {
|
|
dataJSON, err := json.Marshal(v)
|
|
if err != nil {
|
|
log.Error("Failed to marshal", zap.Error(err))
|
|
return fallbackMsg
|
|
}
|
|
dataStr := string(dataJSON)
|
|
if len(dataStr) > maxLogLength {
|
|
return dataStr[:maxLogLength] + "......"
|
|
}
|
|
return dataStr
|
|
}
|
|
|
|
reqStr := marshalWithFallback(req, "could not marshal request")
|
|
logWithRateLimit(_methodShortName, log.Info, log.RatedInfo, "Request", zap.String("method", _methodShortName), zap.String("reqs", reqStr))
|
|
|
|
// Invoke the actual method
|
|
start := time.Now()
|
|
errResp := invoker(ctx, method, req, reply, cc, opts...)
|
|
cost := time.Since(start)
|
|
|
|
// Marshal response
|
|
respStr := marshalWithFallback(reply, "could not marshal response")
|
|
logWithRateLimit(_methodShortName, log.Info, log.RatedInfo, "Response", zap.String("method", _methodShortName), zap.String("resp", respStr))
|
|
logWithRateLimit(_methodShortName, log.Debug, log.RatedDebug, "Cost", zap.String("method", _methodShortName), zap.Duration("cost", cost))
|
|
|
|
return errResp
|
|
}
|
|
}
|
|
|
|
type MilvusClient struct {
|
|
*client.Client
|
|
}
|
|
|
|
func NewMilvusClient(ctx context.Context, cfg *client.ClientConfig) (*MilvusClient, error) {
|
|
cfg.DialOptions = append(cfg.DialOptions, grpc.WithUnaryInterceptor(LoggingUnaryInterceptor()))
|
|
mClient, err := client.New(ctx, cfg)
|
|
return &MilvusClient{
|
|
Client: mClient,
|
|
}, err
|
|
}
|
|
|
|
func (mc *MilvusClient) Close(ctx context.Context) error {
|
|
err := mc.Client.Close(ctx)
|
|
return err
|
|
}
|
|
|
|
func (mc *MilvusClient) Compact(ctx context.Context, option client.CompactOption, callOptions ...grpc.CallOption) (int64, error) {
|
|
compactID, err := mc.Client.Compact(ctx, option, callOptions...)
|
|
return compactID, err
|
|
}
|
|
|
|
func (mc *MilvusClient) GetCompactionState(ctx context.Context, option client.GetCompactionStateOption, callOptions ...grpc.CallOption) (entity.CompactionState, error) {
|
|
state, err := mc.Client.GetCompactionState(ctx, option, callOptions...)
|
|
return state, err
|
|
}
|
|
|
|
// -- snapshot --
|
|
|
|
// CreateSnapshot creates a snapshot for the specified collection
|
|
func (mc *MilvusClient) CreateSnapshot(ctx context.Context, option client.CreateSnapshotOption, callOptions ...grpc.CallOption) error {
|
|
err := mc.Client.CreateSnapshot(ctx, option, callOptions...)
|
|
return err
|
|
}
|
|
|
|
// DropSnapshot drops a snapshot by name
|
|
func (mc *MilvusClient) DropSnapshot(ctx context.Context, option client.DropSnapshotOption, callOptions ...grpc.CallOption) error {
|
|
err := mc.Client.DropSnapshot(ctx, option, callOptions...)
|
|
return err
|
|
}
|
|
|
|
// ListSnapshots lists all snapshots for the specified collection or all snapshots if no collection is specified
|
|
func (mc *MilvusClient) ListSnapshots(ctx context.Context, option client.ListSnapshotsOption, callOptions ...grpc.CallOption) ([]string, error) {
|
|
snapshots, err := mc.Client.ListSnapshots(ctx, option, callOptions...)
|
|
return snapshots, err
|
|
}
|
|
|
|
// DescribeSnapshot describes a snapshot by name
|
|
func (mc *MilvusClient) DescribeSnapshot(ctx context.Context, option client.DescribeSnapshotOption, callOptions ...grpc.CallOption) (*milvuspb.DescribeSnapshotResponse, error) {
|
|
resp, err := mc.Client.DescribeSnapshot(ctx, option, callOptions...)
|
|
return resp, err
|
|
}
|
|
|
|
// RestoreSnapshot restores a snapshot to a target collection
|
|
func (mc *MilvusClient) RestoreSnapshot(ctx context.Context, option client.RestoreSnapshotOption, callOptions ...grpc.CallOption) (int64, error) {
|
|
return mc.Client.RestoreSnapshot(ctx, option, callOptions...)
|
|
}
|
|
|
|
// GetRestoreSnapshotState gets the state of a restore snapshot job
|
|
func (mc *MilvusClient) GetRestoreSnapshotState(ctx context.Context, option client.GetRestoreSnapshotStateOption, callOptions ...grpc.CallOption) (*milvuspb.RestoreSnapshotInfo, error) {
|
|
return mc.Client.GetRestoreSnapshotState(ctx, option, callOptions...)
|
|
}
|
|
|
|
// ListRestoreSnapshotJobs lists all restore snapshot jobs
|
|
func (mc *MilvusClient) ListRestoreSnapshotJobs(ctx context.Context, option client.ListRestoreSnapshotJobsOption, callOptions ...grpc.CallOption) ([]*milvuspb.RestoreSnapshotInfo, error) {
|
|
return mc.Client.ListRestoreSnapshotJobs(ctx, option, callOptions...)
|
|
}
|