mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: #46033 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Pull Request Summary: Entity-Level TTL Field Support ### Core Invariant and Design This PR introduces **per-entity TTL (time-to-live) expiration** via a dedicated TIMESTAMPTZ field as a fine-grained alternative to collection-level TTL. The key invariant is **mutual exclusivity**: collection-level TTL and entity-level TTL field cannot coexist on the same collection. Validation is enforced at the proxy layer during collection creation/alteration (`validateTTL()` prevents both being set simultaneously). ### What Is Removed and Why - **Global `EntityExpirationTTL` parameter** removed from config (`configs/milvus.yaml`, `pkg/util/paramtable/component_param.go`). This was the only mechanism for collection-level expiration. The removal is safe because: - The collection-level TTL path (`isEntityExpired(ts)` check) remains intact in the codebase for backward compatibility - TTL field check (`isEntityExpiredByTTLField()`) is a secondary path invoked only when a TTL field is configured - Existing deployments using collection TTL can continue without modification The global parameter was removed specifically because entity-level TTL makes per-entity control redundant with a collection-wide setting, and the PR chooses one mechanism per collection rather than layering both. ### No Data Loss or Behavior Regression **TTL filtering logic is additive and safe:** 1. **Collection-level TTL unaffected**: The `isEntityExpired(ts)` check still applies when no TTL field is configured; callers of `EntityFilter.Filtered()` pass `-1` as the TTL expiration timestamp when no field exists, causing `isEntityExpiredByTTLField()` to return false immediately 2. **Null/invalid TTL values treated safely**: Rows with null TTL or TTL ≤ 0 are marked as "never expire" (using sentinel value `int64(^uint64(0) >> 1)`) and are preserved across compactions; percentile calculations only include positive TTL values 3. **Query-time filtering automatic**: TTL filtering is transparently added to expression compilation via `AddTTLFieldFilterExpressions()`, which appends `(ttl_field IS NULL OR ttl_field > current_time)` to the filter pipeline. Entities with null TTL always pass the filter 4. **Compaction triggering granular**: Percentile-based expiration (20%, 40%, 60%, 80%, 100%) allows configurable compaction thresholds via `SingleCompactionRatioThreshold`, preventing premature data deletion ### Capability Added: Per-Entity Expiration with Data Distribution Awareness Users can now specify a TIMESTAMPTZ collection property `ttl_field` naming a schema field. During data writes, TTL values are collected per segment and percentile quantiles (5-value array) are computed and stored in segment metadata. At query time, the TTL field is automatically filtered. At compaction time, segment-level percentiles drive expiration-based compaction decisions, enabling intelligent compaction of segments where a configurable fraction of data has expired (e.g., compact when 40% of rows are expired, controlled by threshold ratio). <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
347 lines
15 KiB
Go
347 lines
15 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
package rootcoord
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/util/hookutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/common"
|
|
"github.com/milvus-io/milvus/pkg/v2/util"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/funcutil"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
func TestDDLCallbacksAlterCollectionProperties(t *testing.T) {
|
|
core := initStreamingSystemAndCore(t)
|
|
|
|
ctx := context.Background()
|
|
dbName := "testDB" + funcutil.RandomString(10)
|
|
collectionName := "testCollection" + funcutil.RandomString(10)
|
|
|
|
// Cannot alter collection with empty properties and delete keys.
|
|
resp, err := core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrParameterInvalid)
|
|
|
|
// Cannot alter collection properties with delete keys at same time.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.CollectionReplicaNumber, Value: "1"}},
|
|
DeleteKeys: []string{common.CollectionReplicaNumber},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrParameterInvalid)
|
|
|
|
// hook related properties are not allowed to be altered.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: hookutil.EncryptionEnabledKey, Value: "1"}},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrParameterInvalid)
|
|
|
|
// Alter a database that does not exist should return error.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
DeleteKeys: []string{common.CollectionReplicaNumber},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrDatabaseNotFound)
|
|
|
|
// Alter a collection that does not exist should return error.
|
|
resp, err = core.CreateDatabase(ctx, &milvuspb.CreateDatabaseRequest{
|
|
DbName: util.DefaultDBName,
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: util.DefaultDBName,
|
|
CollectionName: collectionName,
|
|
DeleteKeys: []string{common.CollectionReplicaNumber},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrCollectionNotFound)
|
|
|
|
// atler a property of a collection.
|
|
createCollectionAndAliasForTest(t, ctx, core, dbName, collectionName)
|
|
assertReplicaNumber(t, ctx, core, dbName, collectionName, 1)
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{
|
|
{Key: common.CollectionReplicaNumber, Value: "2"},
|
|
{Key: common.CollectionResourceGroups, Value: "rg1,rg2"},
|
|
},
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertReplicaNumber(t, ctx, core, dbName, collectionName, 2)
|
|
assertResourceGroups(t, ctx, core, dbName, collectionName, []string{"rg1", "rg2"})
|
|
|
|
// delete a property of a collection.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
DeleteKeys: []string{common.CollectionReplicaNumber, common.CollectionResourceGroups},
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertReplicaNumber(t, ctx, core, dbName, collectionName, 0)
|
|
assertResourceGroups(t, ctx, core, dbName, collectionName, []string{})
|
|
|
|
// alter consistency level and description of a collection.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{
|
|
{Key: common.ConsistencyLevel, Value: commonpb.ConsistencyLevel_Eventually.String()},
|
|
{Key: common.CollectionDescription, Value: "description2"},
|
|
},
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertConsistencyLevel(t, ctx, core, dbName, collectionName, commonpb.ConsistencyLevel_Eventually)
|
|
assertDescription(t, ctx, core, dbName, collectionName, "description2")
|
|
|
|
// alter collection should be idempotent.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{
|
|
{Key: common.ConsistencyLevel, Value: commonpb.ConsistencyLevel_Eventually.String()},
|
|
{Key: common.CollectionDescription, Value: "description2"},
|
|
},
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertConsistencyLevel(t, ctx, core, dbName, collectionName, commonpb.ConsistencyLevel_Eventually)
|
|
assertDescription(t, ctx, core, dbName, collectionName, "description2")
|
|
assertSchemaVersion(t, ctx, core, dbName, collectionName, 0) // schema version should not be changed with alter collection properties.
|
|
|
|
// update dynamic schema property with other properties should return error.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.EnableDynamicSchemaKey, Value: "true"}, {Key: common.CollectionReplicaNumber, Value: "1"}},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrParameterInvalid)
|
|
}
|
|
|
|
func TestDDLCallbacksAlterCollectionPropertiesForDynamicField(t *testing.T) {
|
|
core := initStreamingSystemAndCore(t)
|
|
ctx := context.Background()
|
|
dbName := "testDB" + funcutil.RandomString(10)
|
|
collectionName := "testCollection" + funcutil.RandomString(10)
|
|
|
|
createCollectionAndAliasForTest(t, ctx, core, dbName, collectionName)
|
|
|
|
// update dynamic schema property with other properties should return error.
|
|
resp, err := core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.EnableDynamicSchemaKey, Value: "true"}, {Key: common.CollectionReplicaNumber, Value: "1"}},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrParameterInvalid)
|
|
|
|
// update dynamic schema property with invalid value should return error.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.EnableDynamicSchemaKey, Value: "123123"}},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrParameterInvalid)
|
|
|
|
// update dynamic schema property with other properties should return error.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.EnableDynamicSchemaKey, Value: "true"}},
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertDynamicSchema(t, ctx, core, dbName, collectionName, true)
|
|
assertSchemaVersion(t, ctx, core, dbName, collectionName, 1) // add dynamic field should increment schema version.
|
|
|
|
// update dynamic schema property with other properties should be idempotent.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.EnableDynamicSchemaKey, Value: "true"}},
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertDynamicSchema(t, ctx, core, dbName, collectionName, true)
|
|
assertSchemaVersion(t, ctx, core, dbName, collectionName, 1)
|
|
|
|
// disable dynamic schema property should return error.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.EnableDynamicSchemaKey, Value: "false"}},
|
|
})
|
|
require.ErrorIs(t, merr.CheckRPCCall(resp, err), merr.ErrParameterInvalid)
|
|
}
|
|
|
|
func TestDDLCallbacksAlterCollectionProperties_TTLFieldShouldBroadcastSchema(t *testing.T) {
|
|
core := initStreamingSystemAndCore(t)
|
|
ctx := context.Background()
|
|
|
|
dbName := "testDB" + funcutil.RandomString(10)
|
|
collectionName := "testCollectionTTLField" + funcutil.RandomString(10)
|
|
|
|
// Create collection with a ttl field.
|
|
resp, err := core.CreateDatabase(ctx, &milvuspb.CreateDatabaseRequest{
|
|
DbName: dbName,
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
|
|
testSchema := &schemapb.CollectionSchema{
|
|
Name: collectionName,
|
|
Description: "description",
|
|
AutoID: false,
|
|
Fields: []*schemapb.FieldSchema{
|
|
{Name: "field1", DataType: schemapb.DataType_Int64},
|
|
{Name: "ttl", DataType: schemapb.DataType_Timestamptz, Nullable: true},
|
|
},
|
|
}
|
|
schemaBytes, err := proto.Marshal(testSchema)
|
|
require.NoError(t, err)
|
|
resp, err = core.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.CollectionReplicaNumber, Value: "1"}},
|
|
Schema: schemaBytes,
|
|
ConsistencyLevel: commonpb.ConsistencyLevel_Bounded,
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertSchemaVersion(t, ctx, core, dbName, collectionName, 0)
|
|
|
|
// Alter properties to set ttl field should succeed and should NOT change schema version in meta.
|
|
resp, err = core.AlterCollection(ctx, &milvuspb.AlterCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.CollectionTTLFieldKey, Value: "ttl"}},
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertSchemaVersion(t, ctx, core, dbName, collectionName, 0)
|
|
}
|
|
|
|
func createCollectionForTest(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string) {
|
|
resp, err := core.CreateDatabase(ctx, &milvuspb.CreateDatabaseRequest{
|
|
DbName: dbName,
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
testSchema := &schemapb.CollectionSchema{
|
|
Name: collectionName,
|
|
Description: "description",
|
|
AutoID: false,
|
|
Fields: []*schemapb.FieldSchema{
|
|
{
|
|
Name: "field1",
|
|
DataType: schemapb.DataType_Int64,
|
|
},
|
|
},
|
|
}
|
|
schemaBytes, err := proto.Marshal(testSchema)
|
|
require.NoError(t, err)
|
|
resp, err = core.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Properties: []*commonpb.KeyValuePair{{Key: common.CollectionReplicaNumber, Value: "1"}},
|
|
Schema: schemaBytes,
|
|
ConsistencyLevel: commonpb.ConsistencyLevel_Bounded,
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertReplicaNumber(t, ctx, core, dbName, collectionName, 1)
|
|
assertConsistencyLevel(t, ctx, core, dbName, collectionName, commonpb.ConsistencyLevel_Bounded)
|
|
assertDescription(t, ctx, core, dbName, collectionName, "description")
|
|
assertSchemaVersion(t, ctx, core, dbName, collectionName, 0)
|
|
}
|
|
|
|
func createCollectionAndAliasForTest(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string) {
|
|
createCollectionForTest(t, ctx, core, dbName, collectionName)
|
|
|
|
// add an alias to the collection.
|
|
aliasName := collectionName + "_alias"
|
|
resp, err := core.CreateAlias(ctx, &milvuspb.CreateAliasRequest{
|
|
DbName: dbName,
|
|
CollectionName: collectionName,
|
|
Alias: aliasName,
|
|
})
|
|
require.NoError(t, merr.CheckRPCCall(resp, err))
|
|
assertReplicaNumber(t, ctx, core, dbName, aliasName, 1)
|
|
assertConsistencyLevel(t, ctx, core, dbName, aliasName, commonpb.ConsistencyLevel_Bounded)
|
|
assertDescription(t, ctx, core, dbName, aliasName, "description")
|
|
}
|
|
|
|
func assertReplicaNumber(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string, replicaNumber int64) {
|
|
coll, err := core.meta.GetCollectionByName(ctx, dbName, collectionName, typeutil.MaxTimestamp)
|
|
require.NoError(t, err)
|
|
replicaNum, err := common.CollectionLevelReplicaNumber(coll.Properties)
|
|
if replicaNumber == 0 {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
require.NoError(t, err)
|
|
require.Equal(t, replicaNumber, replicaNum)
|
|
}
|
|
|
|
func assertResourceGroups(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string, resourceGroups []string) {
|
|
coll, err := core.meta.GetCollectionByName(ctx, dbName, collectionName, typeutil.MaxTimestamp)
|
|
require.NoError(t, err)
|
|
rgs, err := common.CollectionLevelResourceGroups(coll.Properties)
|
|
if len(resourceGroups) == 0 {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
require.NoError(t, err)
|
|
require.ElementsMatch(t, resourceGroups, rgs)
|
|
}
|
|
|
|
func assertConsistencyLevel(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string, consistencyLevel commonpb.ConsistencyLevel) {
|
|
coll, err := core.meta.GetCollectionByName(ctx, dbName, collectionName, typeutil.MaxTimestamp)
|
|
require.NoError(t, err)
|
|
require.Equal(t, consistencyLevel, coll.ConsistencyLevel)
|
|
}
|
|
|
|
func assertDescription(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string, description string) {
|
|
coll, err := core.meta.GetCollectionByName(ctx, dbName, collectionName, typeutil.MaxTimestamp)
|
|
require.NoError(t, err)
|
|
require.Equal(t, description, coll.Description)
|
|
}
|
|
|
|
func assertSchemaVersion(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string, schemaVersion int32) {
|
|
coll, err := core.meta.GetCollectionByName(ctx, dbName, collectionName, typeutil.MaxTimestamp)
|
|
require.NoError(t, err)
|
|
require.Equal(t, schemaVersion, coll.SchemaVersion)
|
|
}
|
|
|
|
func assertDynamicSchema(t *testing.T, ctx context.Context, core *Core, dbName string, collectionName string, dynamicSchema bool) {
|
|
coll, err := core.meta.GetCollectionByName(ctx, dbName, collectionName, typeutil.MaxTimestamp)
|
|
require.NoError(t, err)
|
|
require.Equal(t, dynamicSchema, coll.EnableDynamicField)
|
|
if !dynamicSchema {
|
|
return
|
|
}
|
|
require.Len(t, coll.Fields, 4)
|
|
require.True(t, coll.Fields[len(coll.Fields)-1].IsDynamic)
|
|
require.Equal(t, coll.Fields[len(coll.Fields)-1].DataType, schemapb.DataType_JSON)
|
|
require.Equal(t, coll.Fields[len(coll.Fields)-1].FieldID, int64(101))
|
|
}
|