yihao.dai 51f69f32d0
feat: Add CDC support (#44124)
This PR implements a new CDC service for Milvus 2.6, providing log-based
cross-cluster replication.

issue: https://github.com/milvus-io/milvus/issues/44123

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
Signed-off-by: chyezh <chyezh@outlook.com>
Co-authored-by: chyezh <chyezh@outlook.com>
2025-09-16 16:32:01 +08:00

122 lines
3.7 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package replicatestream
import (
"github.com/milvus-io/milvus/pkg/v2/metrics"
streamingpb "github.com/milvus-io/milvus/pkg/v2/proto/streamingpb"
message "github.com/milvus-io/milvus/pkg/v2/streaming/util/message"
"github.com/milvus-io/milvus/pkg/v2/util/timerecord"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type ReplicateMetrics interface {
StartReplicate(msg message.ImmutableMessage)
OnSent(msg message.ImmutableMessage)
OnConfirmed(msg message.ImmutableMessage)
OnConnect()
OnDisconnect()
OnReconnect()
}
type msgMetrics struct {
tr *timerecord.TimeRecorder
}
type replicateMetrics struct {
replicateInfo *streamingpb.ReplicatePChannelMeta
msgsMetrics *typeutil.ConcurrentMap[string, msgMetrics] // message id -> msgMetrics
}
func NewReplicateMetrics(replicateInfo *streamingpb.ReplicatePChannelMeta) ReplicateMetrics {
return &replicateMetrics{
replicateInfo: replicateInfo,
msgsMetrics: typeutil.NewConcurrentMap[string, msgMetrics](),
}
}
func (m *replicateMetrics) StartReplicate(msg message.ImmutableMessage) {
msgID := msg.MessageID().String()
m.msgsMetrics.Insert(msgID, msgMetrics{
tr: timerecord.NewTimeRecorder("replicate_msg"),
})
}
func (m *replicateMetrics) OnSent(msg message.ImmutableMessage) {
sourceChannel := m.replicateInfo.GetSourceChannelName()
targetChannel := m.replicateInfo.GetTargetChannelName()
msgType := msg.MessageType().String()
metrics.CDCReplicatedMessagesTotal.WithLabelValues(
sourceChannel,
targetChannel,
msgType,
).Inc()
metrics.CDCReplicatedBytesTotal.WithLabelValues(
sourceChannel,
targetChannel,
msgType,
).Add(float64(msg.EstimateSize()))
}
func (m *replicateMetrics) OnConfirmed(msg message.ImmutableMessage) {
msgMetrics, ok := m.msgsMetrics.GetAndRemove(msg.MessageID().String())
if !ok {
return
}
replicateDuration := msgMetrics.tr.RecordSpan()
metrics.CDCReplicateEndToEndLatency.WithLabelValues(
m.replicateInfo.GetSourceChannelName(),
m.replicateInfo.GetTargetChannelName(),
).Observe(float64(replicateDuration.Milliseconds()))
}
func (m *replicateMetrics) OnConnect() {
metrics.CDCStreamRPCConnections.WithLabelValues(
m.replicateInfo.GetTargetCluster().GetClusterId(),
metrics.CDCStatusConnected,
).Inc()
}
func (m *replicateMetrics) OnDisconnect() {
clusterID := m.replicateInfo.GetTargetCluster().GetClusterId()
metrics.CDCStreamRPCConnections.WithLabelValues(
clusterID,
metrics.CDCStatusConnected,
).Dec()
metrics.CDCStreamRPCConnections.WithLabelValues(
clusterID,
metrics.CDCStatusDisconnected,
).Inc()
}
func (m *replicateMetrics) OnReconnect() {
clusterID := m.replicateInfo.GetTargetCluster().GetClusterId()
metrics.CDCStreamRPCConnections.WithLabelValues(
clusterID,
metrics.CDCStatusDisconnected,
).Dec()
metrics.CDCStreamRPCConnections.WithLabelValues(
clusterID,
metrics.CDCStatusConnected,
).Inc()
metrics.CDCStreamRPCReconnectTimes.WithLabelValues(
clusterID,
).Inc()
}