package replicates import ( "context" "sync" "github.com/cockroachdb/errors" "google.golang.org/protobuf/encoding/protojson" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/streamingnode/server/wal/recovery" "github.com/milvus-io/milvus/internal/streamingnode/server/wal/utility" "github.com/milvus-io/milvus/internal/util/streamingutil/status" "github.com/milvus-io/milvus/pkg/v2/streaming/util/message" "github.com/milvus-io/milvus/pkg/v2/streaming/util/types" "github.com/milvus-io/milvus/pkg/v2/util/replicateutil" ) // ErrNotHandledByReplicateManager is a special error to indicate that the message should not be handled by the replicate manager. var ErrNotHandledByReplicateManager = errors.New("not handled by replicate manager") // ReplicateManagerRecoverParam is the parameter for recovering the replicate manager. type ReplicateManagerRecoverParam struct { ChannelInfo types.PChannelInfo CurrentClusterID string InitialRecoverSnapshot *recovery.RecoverySnapshot // the initial recover snapshot of the replicate manager. } // RecoverReplicateManager recovers the replicate manager from the initial recover snapshot. // It will recover the replicate manager from the initial recover snapshot. // If the wal is on replicating mode, it will recover the replicate state. func RecoverReplicateManager(param *ReplicateManagerRecoverParam) (ReplicateManager, error) { replicateConfigHelper, err := replicateutil.NewConfigHelper(param.CurrentClusterID, param.InitialRecoverSnapshot.Checkpoint.ReplicateConfig) if err != nil { return nil, newReplicateViolationErrorForConfig(param.InitialRecoverSnapshot.Checkpoint.ReplicateConfig, err) } rm := &replicatesManagerImpl{ mu: sync.Mutex{}, currentClusterID: param.CurrentClusterID, pchannel: param.ChannelInfo, replicateConfigHelper: replicateConfigHelper, } if !rm.isPrimaryRole() { // if current cluster is not the primary role, // recover the secondary state for it. if rm.secondaryState, err = recoverSecondaryState(param); err != nil { return nil, err } } return rm, nil } // replicatesManagerImpl is the implementation of the replicates manager. type replicatesManagerImpl struct { mu sync.Mutex pchannel types.PChannelInfo currentClusterID string replicateConfigHelper *replicateutil.ConfigHelper secondaryState *secondaryState // if the current cluster is not the primary role, it will have secondaryState. } // SwitchReplicateMode switches the replicates manager between replicating mode and non-replicating mode. func (impl *replicatesManagerImpl) SwitchReplicateMode(_ context.Context, msg message.MutableAlterReplicateConfigMessageV2) error { impl.mu.Lock() defer impl.mu.Unlock() newCfg := msg.Header().ReplicateConfiguration newGraph, err := replicateutil.NewConfigHelper(impl.currentClusterID, newCfg) if err != nil { return newReplicateViolationErrorForConfig(newCfg, err) } incomingCurrentClusterConfig := newGraph.GetCurrentCluster() switch incomingCurrentClusterConfig.Role() { case replicateutil.RolePrimary: // drop the replicating state if the current cluster is switched to primary. impl.secondaryState = nil case replicateutil.RoleSecondary: if impl.isPrimaryRole() || impl.secondaryState.SourceClusterID() != incomingCurrentClusterConfig.SourceCluster().GetClusterId() { // Only update the replicating state when the current cluster switch from primary to secondary, // or the source cluster is changed. impl.secondaryState = newSecondaryState( incomingCurrentClusterConfig.SourceCluster().GetClusterId(), incomingCurrentClusterConfig.MustGetSourceChannel(impl.pchannel.Name), ) } } impl.replicateConfigHelper = newGraph return nil } func (impl *replicatesManagerImpl) BeginReplicateMessage(ctx context.Context, msg message.MutableMessage) (g ReplicateAcker, err error) { rh := msg.ReplicateHeader() // some message type like timetick, create segment, flush are generated by wal itself. // it should never be handled by the replicates manager. if msg.MessageType().IsSelfControlled() { if rh != nil { return nil, status.NewIgnoreOperation("wal self-controlled message cannot be replicated") } return nil, ErrNotHandledByReplicateManager } impl.mu.Lock() defer func() { if err != nil { impl.mu.Unlock() } }() switch impl.getRole() { case replicateutil.RolePrimary: if rh != nil { return nil, status.NewReplicateViolation("replicate message cannot be received in primary role") } return nil, ErrNotHandledByReplicateManager case replicateutil.RoleSecondary: if rh == nil { return nil, status.NewReplicateViolation("non-replicate message cannot be received in secondary role") } return impl.beginReplicateMessage(ctx, msg) default: panic("unreachable: invalid role") } } // GetReplicateCheckpoint gets the replicate checkpoint. func (impl *replicatesManagerImpl) GetReplicateCheckpoint() (*utility.ReplicateCheckpoint, error) { impl.mu.Lock() defer impl.mu.Unlock() if impl.isPrimaryRole() { return nil, status.NewReplicateViolation("wal is not a secondary cluster in replicating topology") } return impl.secondaryState.GetCheckpoint(), nil } // beginReplicateMessage begins the replicate message operation. func (impl *replicatesManagerImpl) beginReplicateMessage(ctx context.Context, msg message.MutableMessage) (ReplicateAcker, error) { rh := msg.ReplicateHeader() if rh.ClusterID != impl.secondaryState.SourceClusterID() { return nil, status.NewReplicateViolation("cluster id mismatch, current: %s, expected: %s", rh.ClusterID, impl.secondaryState.SourceClusterID()) } // if the incoming message's time tick is less than the checkpoint's time tick, // it means that the message has been written to the wal, so it can be ignored. // txn message will share same time tick, so we only filter with <, it will be deduplicated by the txnHelper. isTxnBody := msg.TxnContext() != nil && msg.MessageType() != message.MessageTypeBeginTxn if (isTxnBody && rh.TimeTick < impl.secondaryState.GetCheckpoint().TimeTick) || (!isTxnBody && rh.TimeTick <= impl.secondaryState.GetCheckpoint().TimeTick) { return nil, status.NewIgnoreOperation("message is too old, message_id: %s, time_tick: %d, txn: %t, current time tick: %d", rh.MessageID, rh.TimeTick, isTxnBody, impl.secondaryState.GetCheckpoint().TimeTick) } if msg.TxnContext() != nil { return impl.startReplicateTxnMessage(ctx, msg, rh) } return impl.startReplicateNonTxnMessage(ctx, msg, rh) } // startReplicateTxnMessage starts the replicate txn message operation. func (impl *replicatesManagerImpl) startReplicateTxnMessage(_ context.Context, msg message.MutableMessage, rh *message.ReplicateHeader) (ReplicateAcker, error) { txn := msg.TxnContext() switch msg.MessageType() { case message.MessageTypeBeginTxn: if err := impl.secondaryState.StartBegin(txn, rh); err != nil { return nil, err } return replicateAckerImpl(func(err error) { if err == nil { impl.secondaryState.BeginDone(txn) } impl.mu.Unlock() }), nil case message.MessageTypeCommitTxn: if err := impl.secondaryState.StartCommit(txn); err != nil { return nil, err } // only update the checkpoint when the txn is committed. return replicateAckerImpl(func(err error) { if err == nil { impl.secondaryState.CommitDone(txn) impl.secondaryState.PushForwardCheckpoint(rh.TimeTick, rh.LastConfirmedMessageID) } impl.mu.Unlock() }), nil case message.MessageTypeRollbackTxn: panic("unreachable: rollback txn message should never be replicated when wal is on replicating mode") default: if err := impl.secondaryState.AddNewMessage(txn, rh); err != nil { return nil, err } return replicateAckerImpl(func(err error) { if err == nil { impl.secondaryState.AddNewMessageDone(rh) } impl.mu.Unlock() }), nil } } // startReplicateNonTxnMessage starts the replicate non-txn message operation. func (impl *replicatesManagerImpl) startReplicateNonTxnMessage(_ context.Context, _ message.MutableMessage, rh *message.ReplicateHeader) (ReplicateAcker, error) { if impl.secondaryState.CurrentTxn() != nil { return nil, status.NewReplicateViolation( "txn is in progress, so the incoming message must be txn message, current txn: %d", impl.secondaryState.CurrentTxn().TxnID, ) } return replicateAckerImpl(func(err error) { if err == nil { impl.secondaryState.PushForwardCheckpoint(rh.TimeTick, rh.LastConfirmedMessageID) } impl.mu.Unlock() }), nil } // Role returns the role of the current cluster in the replicate topology. func (impl *replicatesManagerImpl) Role() replicateutil.Role { impl.mu.Lock() defer impl.mu.Unlock() return impl.getRole() } // getRole returns the role of the current cluster in the replicate topology. func (impl *replicatesManagerImpl) getRole() replicateutil.Role { if impl.replicateConfigHelper == nil { return replicateutil.RolePrimary } return impl.replicateConfigHelper.MustGetCluster(impl.currentClusterID).Role() } // isPrimaryRole checks if the current cluster is the primary role. func (impl *replicatesManagerImpl) isPrimaryRole() bool { return impl.getRole() == replicateutil.RolePrimary } // newReplicateViolationErrorForConfig creates a new replicate violation error for the given configuration and error. func newReplicateViolationErrorForConfig(cfg *commonpb.ReplicateConfiguration, err error) error { bytes, _ := protojson.Marshal(cfg) return status.NewReplicateViolation("when greating replciate graph, %s, %s", string(bytes), err.Error()) }