fix: ddl framework bug patch (#45290)

issue: #45080, #45274, #45285

- LoadCollection doesn't ignore the ignorable request, for false field
array.
- CreatIndex doesn't ignore the ignorable request, for wrong index.
- index meta is not thread safe.
- lost parameter check of DDL.
- DDL Ack scheduler may get stuck and DDL is block until next incoming
DDL.
- lost parameter checker of ddl

---------

Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
Zhen Ye 2025-11-04 22:25:33 +08:00 committed by GitHub
parent fa3d4ebfbe
commit a2ce70d252
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 58 additions and 14 deletions

View File

@ -53,6 +53,8 @@ import (
"github.com/milvus-io/milvus/pkg/v2/util/typeutil" "github.com/milvus-io/milvus/pkg/v2/util/typeutil"
) )
var errIndexOperationIgnored = errors.New("index operation ignored")
type indexMeta struct { type indexMeta struct {
ctx context.Context ctx context.Context
catalog metastore.DataCoordCatalog catalog metastore.DataCoordCatalog
@ -375,7 +377,7 @@ func (m *indexMeta) canCreateIndex(req *indexpb.CreateIndexRequest, isJson bool)
if req.IndexName == index.IndexName { if req.IndexName == index.IndexName {
if req.FieldID == index.FieldID && checkParams(index, req) && if req.FieldID == index.FieldID && checkParams(index, req) &&
/*only check json params when it is json index*/ (!isJson || checkIdenticalJson(index, req)) { /*only check json params when it is json index*/ (!isJson || checkIdenticalJson(index, req)) {
return index.IndexID, nil return index.IndexID, errIndexOperationIgnored
} }
errMsg := "at most one distinct index is allowed per field" errMsg := "at most one distinct index is allowed per field"
log.Warn(errMsg, log.Warn(errMsg,
@ -1060,12 +1062,17 @@ func (m *indexMeta) CheckCleanSegmentIndex(buildID UniqueID) (bool, *model.Segme
func (m *indexMeta) getSegmentsIndexStates(collectionID UniqueID, segmentIDs []UniqueID) map[int64]map[int64]*indexpb.SegmentIndexState { func (m *indexMeta) getSegmentsIndexStates(collectionID UniqueID, segmentIDs []UniqueID) map[int64]map[int64]*indexpb.SegmentIndexState {
ret := make(map[int64]map[int64]*indexpb.SegmentIndexState, 0) ret := make(map[int64]map[int64]*indexpb.SegmentIndexState, 0)
m.fieldIndexLock.RLock() m.fieldIndexLock.RLock()
fieldIndexes, ok := m.indexes[collectionID] fieldIndexesMap, ok := m.indexes[collectionID]
if !ok { if !ok {
m.fieldIndexLock.RUnlock() m.fieldIndexLock.RUnlock()
return ret return ret
} }
fieldIndexes := make(map[UniqueID]*model.Index, len(fieldIndexesMap))
for id, index := range fieldIndexesMap {
fieldIndexes[id] = index
}
m.fieldIndexLock.RUnlock() m.fieldIndexLock.RUnlock()
for _, segID := range segmentIDs { for _, segID := range segmentIDs {
ret[segID] = make(map[int64]*indexpb.SegmentIndexState) ret[segID] = make(map[int64]*indexpb.SegmentIndexState)
segIndexInfos, ok := m.segmentIndexes.Get(segID) segIndexInfos, ok := m.segmentIndexes.Get(segID)

View File

@ -134,8 +134,8 @@ func TestMeta_ScalarAutoIndex(t *testing.T) {
}, },
} }
tmpIndexID, err := m.CanCreateIndex(req, false) tmpIndexID, err := m.CanCreateIndex(req, false)
assert.NoError(t, err) assert.ErrorIs(t, err, errIndexOperationIgnored)
assert.Equal(t, int64(indexID), tmpIndexID) assert.Zero(t, tmpIndexID)
}) })
t.Run("user index params not consistent", func(t *testing.T) { t.Run("user index params not consistent", func(t *testing.T) {
@ -203,8 +203,8 @@ func TestMeta_ScalarAutoIndex(t *testing.T) {
}, },
} }
tmpIndexID, err := m.CanCreateIndex(req, false) tmpIndexID, err := m.CanCreateIndex(req, false)
assert.NoError(t, err) assert.ErrorIs(t, err, errIndexOperationIgnored)
assert.Equal(t, int64(indexID), tmpIndexID) assert.Zero(t, tmpIndexID)
newIndexParams := req.GetIndexParams() newIndexParams := req.GetIndexParams()
assert.Equal(t, len(newIndexParams), 1) assert.Equal(t, len(newIndexParams), 1)
assert.Equal(t, newIndexParams[0].Key, common.IndexTypeKey) assert.Equal(t, newIndexParams[0].Key, common.IndexTypeKey)
@ -287,8 +287,8 @@ func TestMeta_CanCreateIndex(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
tmpIndexID, err = m.CanCreateIndex(req, false) tmpIndexID, err = m.CanCreateIndex(req, false)
assert.NoError(t, err) assert.ErrorIs(t, err, errIndexOperationIgnored)
assert.Equal(t, indexID, tmpIndexID) assert.Zero(t, tmpIndexID)
}) })
t.Run("params not consistent", func(t *testing.T) { t.Run("params not consistent", func(t *testing.T) {
@ -326,8 +326,8 @@ func TestMeta_CanCreateIndex(t *testing.T) {
req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}} req.UserIndexParams = []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "AUTOINDEX"}, {Key: common.MetricTypeKey, Value: "COSINE"}}
req.UserAutoindexMetricTypeSpecified = false req.UserAutoindexMetricTypeSpecified = false
tmpIndexID, err = m.CanCreateIndex(req, false) tmpIndexID, err = m.CanCreateIndex(req, false)
assert.NoError(t, err) assert.ErrorIs(t, err, errIndexOperationIgnored)
assert.Equal(t, indexID, tmpIndexID) assert.Zero(t, tmpIndexID)
// req should follow the meta // req should follow the meta
assert.Equal(t, "L2", req.GetUserIndexParams()[1].Value) assert.Equal(t, "L2", req.GetUserIndexParams()[1].Value)
assert.Equal(t, "L2", req.GetIndexParams()[1].Value) assert.Equal(t, "L2", req.GetIndexParams()[1].Value)

View File

@ -21,6 +21,7 @@ import (
"fmt" "fmt"
"math" "math"
"github.com/cockroachdb/errors"
"github.com/samber/lo" "github.com/samber/lo"
"go.uber.org/zap" "go.uber.org/zap"
@ -211,6 +212,14 @@ func (s *Server) CreateIndex(ctx context.Context, req *indexpb.CreateIndexReques
indexID, err := s.meta.indexMeta.CanCreateIndex(req, isJson) indexID, err := s.meta.indexMeta.CanCreateIndex(req, isJson)
if err != nil { if err != nil {
if errors.Is(err, errIndexOperationIgnored) {
log.Info("index already exists",
zap.Int64("collectionID", req.GetCollectionID()),
zap.Int64("fieldID", req.GetFieldID()),
zap.String("indexName", req.GetIndexName()))
metrics.IndexRequestCounter.WithLabelValues(metrics.SuccessLabel).Inc()
return merr.Success(), nil
}
log.Error("Check CanCreateIndex fail", zap.Error(err)) log.Error("Check CanCreateIndex fail", zap.Error(err))
metrics.IndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc() metrics.IndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc()
return merr.Status(err), nil return merr.Status(err), nil

View File

@ -101,7 +101,7 @@ func (job *LoadCollectionJob) Execute() error {
// 2. put load info meta // 2. put load info meta
fieldIndexIDs := make(map[int64]int64, len(req.GetLoadFields())) fieldIndexIDs := make(map[int64]int64, len(req.GetLoadFields()))
fieldIDs := make([]int64, len(req.GetLoadFields())) fieldIDs := make([]int64, 0, len(req.GetLoadFields()))
for _, loadField := range req.GetLoadFields() { for _, loadField := range req.GetLoadFields() {
if loadField.GetIndexId() != 0 { if loadField.GetIndexId() != 0 {
fieldIndexIDs[loadField.GetFieldId()] = loadField.GetIndexId() fieldIndexIDs[loadField.GetFieldId()] = loadField.GetIndexId()

View File

@ -104,8 +104,12 @@ func (c *Core) broadcastAlterCollectionForRenameCollection(ctx context.Context,
} }
func (c *Core) validateEncryption(ctx context.Context, oldDBName string, newDBName string) error { func (c *Core) validateEncryption(ctx context.Context, oldDBName string, newDBName string) error {
// old and new DB names are filled in Prepare, shouldn't be empty here if oldDBName == newDBName {
return nil
}
// Check if renaming across databases with encryption enabled
// old and new DB names are filled in Prepare, shouldn't be empty here
originalDB, err := c.meta.GetDatabaseByName(ctx, oldDBName, typeutil.MaxTimestamp) originalDB, err := c.meta.GetDatabaseByName(ctx, oldDBName, typeutil.MaxTimestamp)
if err != nil { if err != nil {
return fmt.Errorf("failed to get original database: %w", err) return fmt.Errorf("failed to get original database: %w", err)

View File

@ -26,6 +26,10 @@ import (
// broadcastAlterCollectionForAlterCollection broadcasts the put collection message for alter collection. // broadcastAlterCollectionForAlterCollection broadcasts the put collection message for alter collection.
func (c *Core) broadcastAlterCollectionForAlterCollection(ctx context.Context, req *milvuspb.AlterCollectionRequest) error { func (c *Core) broadcastAlterCollectionForAlterCollection(ctx context.Context, req *milvuspb.AlterCollectionRequest) error {
if req.GetCollectionName() == "" {
return merr.WrapErrParameterInvalidMsg("alter collection failed, collection name does not exists")
}
if len(req.GetProperties()) == 0 && len(req.GetDeleteKeys()) == 0 { if len(req.GetProperties()) == 0 && len(req.GetDeleteKeys()) == 0 {
return merr.WrapErrParameterInvalidMsg("no properties or delete keys provided") return merr.WrapErrParameterInvalidMsg("no properties or delete keys provided")
} }
@ -38,6 +42,10 @@ func (c *Core) broadcastAlterCollectionForAlterCollection(ctx context.Context, r
return merr.WrapErrParameterInvalidMsg("can not alter cipher related properties") return merr.WrapErrParameterInvalidMsg("can not alter cipher related properties")
} }
if funcutil.SliceContain(req.GetDeleteKeys(), common.EnableDynamicSchemaKey) {
return merr.WrapErrParameterInvalidMsg("cannot delete key %s, dynamic field schema could support set to true/false", common.EnableDynamicSchemaKey)
}
// Validate timezone // Validate timezone
tz, exist := funcutil.TryGetAttrByKeyFromRepeatedKV(common.TimezoneKey, req.GetProperties()) tz, exist := funcutil.TryGetAttrByKeyFromRepeatedKV(common.TimezoneKey, req.GetProperties())
if exist && !funcutil.IsTimezoneValid(tz) { if exist && !funcutil.IsTimezoneValid(tz) {

View File

@ -18,7 +18,6 @@ package rootcoord
import ( import (
"context" "context"
"strings"
"github.com/cockroachdb/errors" "github.com/cockroachdb/errors"
"github.com/samber/lo" "github.com/samber/lo"
@ -39,7 +38,9 @@ import (
) )
func (c *Core) broadcastAlterDatabase(ctx context.Context, req *rootcoordpb.AlterDatabaseRequest) error { func (c *Core) broadcastAlterDatabase(ctx context.Context, req *rootcoordpb.AlterDatabaseRequest) error {
req.DbName = strings.TrimSpace(req.DbName) if req.GetDbName() == "" {
return merr.WrapErrParameterInvalidMsg("alter database failed, database name does not exists")
}
if req.GetProperties() == nil && req.GetDeleteKeys() == nil { if req.GetProperties() == nil && req.GetDeleteKeys() == nil {
return merr.WrapErrParameterInvalidMsg("alter database with empty properties and delete keys, expected to set either properties or delete keys") return merr.WrapErrParameterInvalidMsg("alter database with empty properties and delete keys, expected to set either properties or delete keys")
} }

View File

@ -83,14 +83,27 @@ func (s *ackCallbackScheduler) background() {
}() }()
s.Logger().Info("ack scheduler background start") s.Logger().Info("ack scheduler background start")
// it's weired to find that FastLock may be failure even if there's no resource-key locked,
// also see: #45285
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
var triggerTicker <-chan time.Time
for { for {
s.triggerAckCallback() s.triggerAckCallback()
if len(s.pendingAckedTasks) > 0 {
// if there's pending tasks, trigger the ack callback after a delay.
triggerTicker = ticker.C
} else {
triggerTicker = nil
}
select { select {
case <-s.notifier.Context().Done(): case <-s.notifier.Context().Done():
return return
case task := <-s.pending: case task := <-s.pending:
s.addBroadcastTask(task) s.addBroadcastTask(task)
case <-s.triggerChan: case <-s.triggerChan:
case <-triggerTicker:
} }
} }
} }

View File

@ -121,4 +121,6 @@ func (s *tombstoneScheduler) triggerGCTombstone() {
return return
} }
} }
// all the tombstones are dropped, reset the tombstones.
s.tombstones = make([]tombstoneItem, 0)
} }