milvus/pkg/util/replicateutil/config_validator.go
yihao.dai 51f69f32d0
feat: Add CDC support (#44124)
This PR implements a new CDC service for Milvus 2.6, providing log-based
cross-cluster replication.

issue: https://github.com/milvus-io/milvus/issues/44123

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
Signed-off-by: chyezh <chyezh@outlook.com>
Co-authored-by: chyezh <chyezh@outlook.com>
2025-09-16 16:32:01 +08:00

244 lines
8.4 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package replicateutil
import (
"fmt"
"net/url"
"strings"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
)
// ReplicateConfigValidator validates ReplicateConfiguration according to business rules
type ReplicateConfigValidator struct {
currentClusterID string
currentPChannels []string
clusterMap map[string]*commonpb.MilvusCluster
config *commonpb.ReplicateConfiguration
}
// NewReplicateConfigValidator creates a new validator instance with the given configuration
func NewReplicateConfigValidator(config *commonpb.ReplicateConfiguration, currentClusterID string, currentPChannels []string) *ReplicateConfigValidator {
validator := &ReplicateConfigValidator{
currentClusterID: currentClusterID,
currentPChannels: currentPChannels,
clusterMap: make(map[string]*commonpb.MilvusCluster),
config: config,
}
return validator
}
// Validate performs all validation checks on the configuration
func (v *ReplicateConfigValidator) Validate() error {
if v.config == nil {
return fmt.Errorf("config cannot be nil")
}
clusters := v.config.GetClusters()
if len(clusters) == 0 {
return fmt.Errorf("clusters list cannot be empty")
}
// Perform all validation checks
if err := v.validateClusterBasic(clusters); err != nil {
return err
}
if err := v.validateRelevance(); err != nil {
return err
}
topologies := v.config.GetCrossClusterTopology()
if err := v.validateTopologyEdgeUniqueness(topologies); err != nil {
return err
}
if err := v.validateTopologyTypeConstraint(topologies); err != nil {
return err
}
return nil
}
// validateClusterBasic validates basic format requirements for each MilvusCluster
func (v *ReplicateConfigValidator) validateClusterBasic(clusters []*commonpb.MilvusCluster) error {
var expectedPchannelCount int
var firstClusterID string
for i, cluster := range clusters {
if cluster == nil {
return fmt.Errorf("cluster at index %d is nil", i)
}
// clusterID validation: non-empty and no whitespace
clusterID := cluster.GetClusterId()
if clusterID == "" {
return fmt.Errorf("cluster at index %d has empty clusterID", i)
}
if strings.ContainsAny(clusterID, " \t\n\r") {
return fmt.Errorf("cluster at index %d has clusterID '%s' containing whitespace characters", i, clusterID)
}
// connection_param.uri validation: non-empty and basic URI format
connParam := cluster.GetConnectionParam()
if connParam == nil {
return fmt.Errorf("cluster '%s' has nil connection_param", clusterID)
}
uri := connParam.GetUri()
if uri == "" {
return fmt.Errorf("cluster '%s' has empty URI", clusterID)
}
_, err := url.ParseRequestURI(uri)
if err != nil {
return fmt.Errorf("cluster '%s' has invalid URI format: '%s'", clusterID, uri)
}
// pchannels validation: non-empty
pchannels := cluster.GetPchannels()
if len(pchannels) == 0 {
return fmt.Errorf("cluster '%s' has empty pchannels", clusterID)
}
// pchannels uniqueness within cluster
pchannelSet := make(map[string]bool)
for j, pchannel := range pchannels {
if pchannel == "" {
return fmt.Errorf("cluster '%s' has empty pchannel at index %d", clusterID, j)
}
if pchannelSet[pchannel] {
return fmt.Errorf("cluster '%s' has duplicate pchannel: '%s'", clusterID, pchannel)
}
// Validate that pchannel starts with clusterID as prefix
if !strings.HasPrefix(pchannel, clusterID) {
return fmt.Errorf("cluster '%s' has pchannel '%s' that does not start with clusterID as prefix", clusterID, pchannel)
}
pchannelSet[pchannel] = true
}
// pchannels count consistency across all clusters
if i == 0 {
expectedPchannelCount = len(pchannels)
firstClusterID = clusterID
} else if len(pchannels) != expectedPchannelCount {
return fmt.Errorf("cluster '%s' has %d pchannels, but expected %d (same as cluster '%s')",
clusterID, len(pchannels), expectedPchannelCount, firstClusterID)
}
// Build cluster maps
if _, exists := v.clusterMap[clusterID]; exists {
return fmt.Errorf("duplicate clusterID found: '%s'", clusterID)
}
v.clusterMap[clusterID] = cluster
}
return nil
}
// validateRelevance validates that clusters must contain current Milvus cluster
func (v *ReplicateConfigValidator) validateRelevance() error {
currentCluster, exists := v.clusterMap[v.currentClusterID]
if !exists {
return fmt.Errorf("current Milvus cluster '%s' must be included in the clusters list", v.currentClusterID)
}
if !equalIgnoreOrder(v.currentPChannels, currentCluster.GetPchannels()) {
return fmt.Errorf("current pchannels do not match the pchannels in the config, current pchannels: %v, config pchannels: %v", v.currentPChannels, currentCluster.GetPchannels())
}
return nil
}
// validateTopologyEdgeUniqueness validates that a given source_clusterID -> target_clusterID pair appears only once
func (v *ReplicateConfigValidator) validateTopologyEdgeUniqueness(topologies []*commonpb.CrossClusterTopology) error {
if len(topologies) == 0 {
return nil
}
edgeSet := make(map[string]struct{})
for i, topology := range topologies {
if topology == nil {
return fmt.Errorf("topology at index %d is nil", i)
}
sourceClusterID := topology.GetSourceClusterId()
targetClusterID := topology.GetTargetClusterId()
// Validate edge endpoints exist
if _, exists := v.clusterMap[sourceClusterID]; !exists {
return fmt.Errorf("topology at index %d references non-existent source cluster: '%s'", i, sourceClusterID)
}
if _, exists := v.clusterMap[targetClusterID]; !exists {
return fmt.Errorf("topology at index %d references non-existent target cluster: '%s'", i, targetClusterID)
}
// Edge uniqueness
edgeKey := fmt.Sprintf("%s->%s", sourceClusterID, targetClusterID)
if _, exists := edgeSet[edgeKey]; exists {
return fmt.Errorf("duplicate topology relationship found: '%s'", edgeKey)
}
edgeSet[edgeKey] = struct{}{}
}
return nil
}
// validateTopologyTypeConstraint validates that currently only STAR topology is supported
func (v *ReplicateConfigValidator) validateTopologyTypeConstraint(topologies []*commonpb.CrossClusterTopology) error {
if len(topologies) == 0 {
return nil
}
// Build in-degree and out-degree maps
inDegree := make(map[string]int)
outDegree := make(map[string]int)
// Initialize all clusters with 0 degrees
for clusterID := range v.clusterMap {
inDegree[clusterID] = 0
outDegree[clusterID] = 0
}
// Calculate degrees
for _, topology := range topologies {
source := topology.GetSourceClusterId()
target := topology.GetTargetClusterId()
outDegree[source]++
inDegree[target]++
}
// Find center node (out-degree = clusters-1, in-degree = 0)
var centerNode string
clusterCount := len(v.clusterMap)
for clusterID := range v.clusterMap {
if outDegree[clusterID] == clusterCount-1 && inDegree[clusterID] == 0 {
if centerNode != "" {
// Multiple center nodes found
return fmt.Errorf("multiple center nodes found, only one center node is allowed in star topology")
}
centerNode = clusterID
}
}
if centerNode == "" {
// No center node found
return fmt.Errorf("no center node found, star topology must have exactly one center node")
}
// Validate other nodes (in-degree = 1, out-degree = 0)
for clusterID := range v.clusterMap {
if clusterID == centerNode {
continue
}
if inDegree[clusterID] != 1 || outDegree[clusterID] != 0 {
return fmt.Errorf("cluster '%s' does not follow star topology pattern (in-degree=%d, out-degree=%d)",
clusterID, inDegree[clusterID], outDegree[clusterID])
}
}
return nil
}
func equalIgnoreOrder(a, b []string) bool {
if len(a) != len(b) {
return false
}
counts := make(map[string]int)
for _, v := range a {
counts[v]++
}
for _, v := range b {
if counts[v] == 0 {
return false
}
counts[v]--
}
return true
}