mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 09:08:43 +08:00
issue: #44558, #44123 - Update config(A->C) to A and C, config(B) to B on replicate topology (A->B,A->C) can remove the B from replicate topology - Fix some metric error of CDC Signed-off-by: chyezh <chyezh@outlook.com>
191 lines
7.2 KiB
Go
191 lines
7.2 KiB
Go
package streaming
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus/internal/streamingnode/server/wal"
|
|
"github.com/milvus-io/milvus/internal/util/streamingutil/status"
|
|
"github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
|
|
"github.com/milvus-io/milvus/pkg/v2/streaming/util/types"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/replicateutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
var _ ReplicateService = replicateService{}
|
|
|
|
type replicateService struct {
|
|
*walAccesserImpl
|
|
}
|
|
|
|
// Append appends the message into current cluster.
|
|
func (s replicateService) Append(ctx context.Context, rmsg message.ReplicateMutableMessage) (*types.AppendResult, error) {
|
|
rh := rmsg.ReplicateHeader()
|
|
if rh == nil {
|
|
panic("message is not a replicate message")
|
|
}
|
|
|
|
if !s.lifetime.Add(typeutil.LifetimeStateWorking) {
|
|
return nil, ErrWALAccesserClosed
|
|
}
|
|
defer s.lifetime.Done()
|
|
|
|
msg, err := s.overwriteReplicateMessage(ctx, rmsg, rh)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return s.appendToWAL(ctx, msg)
|
|
}
|
|
|
|
func (s replicateService) UpdateReplicateConfiguration(ctx context.Context, config *commonpb.ReplicateConfiguration) error {
|
|
if !s.lifetime.Add(typeutil.LifetimeStateWorking) {
|
|
return ErrWALAccesserClosed
|
|
}
|
|
defer s.lifetime.Done()
|
|
|
|
return s.streamingCoordClient.Assignment().UpdateReplicateConfiguration(ctx, config)
|
|
}
|
|
|
|
func (s replicateService) GetReplicateConfiguration(ctx context.Context) (*replicateutil.ConfigHelper, error) {
|
|
if !s.lifetime.Add(typeutil.LifetimeStateWorking) {
|
|
return nil, ErrWALAccesserClosed
|
|
}
|
|
defer s.lifetime.Done()
|
|
|
|
config, err := s.streamingCoordClient.Assignment().GetReplicateConfiguration(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return config, nil
|
|
}
|
|
|
|
func (s replicateService) GetReplicateCheckpoint(ctx context.Context, channelName string) (*wal.ReplicateCheckpoint, error) {
|
|
if !s.lifetime.Add(typeutil.LifetimeStateWorking) {
|
|
return nil, ErrWALAccesserClosed
|
|
}
|
|
defer s.lifetime.Done()
|
|
|
|
checkpoint, err := s.handlerClient.GetReplicateCheckpoint(ctx, channelName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return checkpoint, nil
|
|
}
|
|
|
|
// overwriteReplicateMessage overwrites the replicate message.
|
|
// because some message such as create collection message write vchannel in its body, so we need to overwrite the message.
|
|
func (s replicateService) overwriteReplicateMessage(ctx context.Context, msg message.ReplicateMutableMessage, rh *message.ReplicateHeader) (message.MutableMessage, error) {
|
|
cfg, err := s.streamingCoordClient.Assignment().GetReplicateConfiguration(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get target vchannel on current cluster that should be written to
|
|
currentCluster := cfg.GetCluster(s.clusterID)
|
|
if currentCluster.Role() == replicateutil.RolePrimary {
|
|
return nil, status.NewReplicateViolation("primary cluster cannot receive replicate message")
|
|
}
|
|
sourceCluster := cfg.GetCluster(rh.ClusterID)
|
|
if sourceCluster == nil {
|
|
return nil, status.NewReplicateViolation("source cluster %s not found in replicate configuration", rh.ClusterID)
|
|
}
|
|
targetVChannel, err := s.getTargetVChannel(sourceCluster, msg.VChannel())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get target broadcast vchannels on current cluster that should be written to
|
|
if bh := msg.BroadcastHeader(); bh != nil {
|
|
// broadcast header have vchannels, so we need to overwrite it.
|
|
targetBroadcastVChannels := make([]string, 0, len(bh.VChannels))
|
|
for _, vchannel := range bh.VChannels {
|
|
targetBroadcastVChannel, err := s.getTargetVChannel(sourceCluster, vchannel)
|
|
if err != nil {
|
|
return nil, status.NewReplicateViolation("failed to get target channel, %s", err.Error())
|
|
}
|
|
targetBroadcastVChannels = append(targetBroadcastVChannels, targetBroadcastVChannel)
|
|
}
|
|
msg.OverwriteReplicateVChannel(targetVChannel, targetBroadcastVChannels)
|
|
} else {
|
|
msg.OverwriteReplicateVChannel(targetVChannel)
|
|
}
|
|
|
|
// create collection message will set the vchannel in its body, so we need to overwrite it.
|
|
switch msg.MessageType() {
|
|
case message.MessageTypeCreateCollection:
|
|
if err := s.overwriteCreateCollectionMessage(sourceCluster, msg); err != nil {
|
|
return nil, err
|
|
}
|
|
case message.MessageTypeAlterReplicateConfig:
|
|
if err := s.overwriteAlterReplicateConfigMessage(cfg, msg); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if funcutil.IsControlChannel(msg.VChannel()) {
|
|
assignments, err := s.streamingCoordClient.Assignment().GetLatestAssignments(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !strings.HasPrefix(msg.VChannel(), assignments.PChannelOfCChannel()) {
|
|
return nil, status.NewReplicateViolation("invalid control channel %s, expected pchannel %s", msg.VChannel(), assignments.PChannelOfCChannel())
|
|
}
|
|
}
|
|
return msg, nil
|
|
}
|
|
|
|
// getTargetVChannel gets the target vchannel of the source vchannel.
|
|
func (s replicateService) getTargetVChannel(sourceCluster *replicateutil.MilvusCluster, sourceVChannel string) (string, error) {
|
|
sourcePChannel := funcutil.ToPhysicalChannel(sourceVChannel)
|
|
targetPChannel, err := sourceCluster.GetTargetChannel(sourcePChannel, s.clusterID)
|
|
if err != nil {
|
|
return "", status.NewReplicateViolation("failed to get target channel, %s", err.Error())
|
|
}
|
|
return strings.Replace(sourceVChannel, sourcePChannel, targetPChannel, 1), nil
|
|
}
|
|
|
|
// overwriteCreateCollectionMessage overwrites the create collection message.
|
|
func (s replicateService) overwriteCreateCollectionMessage(sourceCluster *replicateutil.MilvusCluster, msg message.ReplicateMutableMessage) error {
|
|
createCollectionMsg := message.MustAsMutableCreateCollectionMessageV1(msg)
|
|
body := createCollectionMsg.MustBody()
|
|
for idx, sourcePChannel := range body.PhysicalChannelNames {
|
|
targetPChannel, err := sourceCluster.GetTargetChannel(sourcePChannel, s.clusterID)
|
|
if err != nil {
|
|
return status.NewReplicateViolation("failed to get target channel, %s", err.Error())
|
|
}
|
|
body.PhysicalChannelNames[idx] = targetPChannel
|
|
body.VirtualChannelNames[idx] = strings.Replace(body.VirtualChannelNames[idx], sourcePChannel, targetPChannel, 1)
|
|
}
|
|
createCollectionMsg.OverwriteBody(body)
|
|
return nil
|
|
}
|
|
|
|
// overwriteAlterReplicateConfigMessage overwrites the alter replicate configuration message.
|
|
func (s replicateService) overwriteAlterReplicateConfigMessage(currentReplicateConfig *replicateutil.ConfigHelper, msg message.ReplicateMutableMessage) error {
|
|
alterReplicateConfigMsg := message.MustAsMutableAlterReplicateConfigMessageV2(msg)
|
|
cfg := alterReplicateConfigMsg.Header().ReplicateConfiguration
|
|
_, err := replicateutil.NewConfigHelper(s.clusterID, cfg)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
if !errors.Is(err, replicateutil.ErrCurrentClusterNotFound) {
|
|
return err
|
|
}
|
|
|
|
// Current cluster not found in the replicate configuration,
|
|
// it means that the current cluster is removed from the replicate topology and become a independent cluster.
|
|
// So we need to overwrite the replicate configuration to make current cluster to be a primary cluster without replicate topology.
|
|
cluster := currentReplicateConfig.GetCurrentCluster()
|
|
alterReplicateConfigMsg.OverwriteHeader(&message.AlterReplicateConfigMessageHeader{
|
|
ReplicateConfiguration: &commonpb.ReplicateConfiguration{
|
|
Clusters: []*commonpb.MilvusCluster{cluster.MilvusCluster},
|
|
},
|
|
})
|
|
return nil
|
|
}
|