mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
issue: #46033 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Pull Request Summary: Entity-Level TTL Field Support ### Core Invariant and Design This PR introduces **per-entity TTL (time-to-live) expiration** via a dedicated TIMESTAMPTZ field as a fine-grained alternative to collection-level TTL. The key invariant is **mutual exclusivity**: collection-level TTL and entity-level TTL field cannot coexist on the same collection. Validation is enforced at the proxy layer during collection creation/alteration (`validateTTL()` prevents both being set simultaneously). ### What Is Removed and Why - **Global `EntityExpirationTTL` parameter** removed from config (`configs/milvus.yaml`, `pkg/util/paramtable/component_param.go`). This was the only mechanism for collection-level expiration. The removal is safe because: - The collection-level TTL path (`isEntityExpired(ts)` check) remains intact in the codebase for backward compatibility - TTL field check (`isEntityExpiredByTTLField()`) is a secondary path invoked only when a TTL field is configured - Existing deployments using collection TTL can continue without modification The global parameter was removed specifically because entity-level TTL makes per-entity control redundant with a collection-wide setting, and the PR chooses one mechanism per collection rather than layering both. ### No Data Loss or Behavior Regression **TTL filtering logic is additive and safe:** 1. **Collection-level TTL unaffected**: The `isEntityExpired(ts)` check still applies when no TTL field is configured; callers of `EntityFilter.Filtered()` pass `-1` as the TTL expiration timestamp when no field exists, causing `isEntityExpiredByTTLField()` to return false immediately 2. **Null/invalid TTL values treated safely**: Rows with null TTL or TTL ≤ 0 are marked as "never expire" (using sentinel value `int64(^uint64(0) >> 1)`) and are preserved across compactions; percentile calculations only include positive TTL values 3. **Query-time filtering automatic**: TTL filtering is transparently added to expression compilation via `AddTTLFieldFilterExpressions()`, which appends `(ttl_field IS NULL OR ttl_field > current_time)` to the filter pipeline. Entities with null TTL always pass the filter 4. **Compaction triggering granular**: Percentile-based expiration (20%, 40%, 60%, 80%, 100%) allows configurable compaction thresholds via `SingleCompactionRatioThreshold`, preventing premature data deletion ### Capability Added: Per-Entity Expiration with Data Distribution Awareness Users can now specify a TIMESTAMPTZ collection property `ttl_field` naming a schema field. During data writes, TTL values are collected per segment and percentile quantiles (5-value array) are computed and stored in segment metadata. At query time, the TTL field is automatically filtered. At compaction time, segment-level percentiles drive expiration-based compaction decisions, enabling intelligent compaction of segments where a configurable fraction of data has expired (e.g., compact when 40% of rows are expired, controlled by threshold ratio). <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
293 lines
8.1 KiB
Go
293 lines
8.1 KiB
Go
package common
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
)
|
|
|
|
func TestIsSystemField(t *testing.T) {
|
|
type args struct {
|
|
fieldID int64
|
|
}
|
|
tests := []struct {
|
|
name string
|
|
args args
|
|
want bool
|
|
}{
|
|
{
|
|
args: args{fieldID: StartOfUserFieldID},
|
|
want: false,
|
|
},
|
|
{
|
|
args: args{fieldID: StartOfUserFieldID + 1},
|
|
want: false,
|
|
},
|
|
{
|
|
args: args{fieldID: TimeStampField},
|
|
want: true,
|
|
},
|
|
{
|
|
args: args{fieldID: RowIDField},
|
|
want: true,
|
|
},
|
|
}
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
assert.Equalf(t, tt.want, IsSystemField(tt.args.fieldID), "IsSystemField(%v)", tt.args.fieldID)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestDatabaseProperties(t *testing.T) {
|
|
props := []*commonpb.KeyValuePair{
|
|
{
|
|
Key: DatabaseReplicaNumber,
|
|
Value: "3",
|
|
},
|
|
{
|
|
Key: DatabaseResourceGroups,
|
|
Value: strings.Join([]string{"rg1", "rg2"}, ","),
|
|
},
|
|
}
|
|
|
|
replicaNum, err := DatabaseLevelReplicaNumber(props)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, int64(3), replicaNum)
|
|
|
|
rgs, err := DatabaseLevelResourceGroups(props)
|
|
assert.NoError(t, err)
|
|
assert.Contains(t, rgs, "rg1")
|
|
assert.Contains(t, rgs, "rg2")
|
|
|
|
// test prop not found
|
|
_, err = DatabaseLevelReplicaNumber(nil)
|
|
assert.Error(t, err)
|
|
|
|
_, err = DatabaseLevelResourceGroups(nil)
|
|
assert.Error(t, err)
|
|
|
|
// test invalid prop value
|
|
|
|
props = []*commonpb.KeyValuePair{
|
|
{
|
|
Key: DatabaseReplicaNumber,
|
|
Value: "xxxx",
|
|
},
|
|
{
|
|
Key: DatabaseResourceGroups,
|
|
Value: "",
|
|
},
|
|
}
|
|
_, err = DatabaseLevelReplicaNumber(props)
|
|
assert.Error(t, err)
|
|
|
|
_, err = DatabaseLevelResourceGroups(props)
|
|
assert.Error(t, err)
|
|
}
|
|
|
|
func TestCommonPartitionKeyIsolation(t *testing.T) {
|
|
getProto := func(val string) []*commonpb.KeyValuePair {
|
|
return []*commonpb.KeyValuePair{
|
|
{
|
|
Key: PartitionKeyIsolationKey,
|
|
Value: val,
|
|
},
|
|
}
|
|
}
|
|
|
|
getMp := func(val string) map[string]string {
|
|
return map[string]string{
|
|
PartitionKeyIsolationKey: val,
|
|
}
|
|
}
|
|
|
|
t.Run("pb", func(t *testing.T) {
|
|
props := getProto("true")
|
|
res, err := IsPartitionKeyIsolationKvEnabled(props...)
|
|
assert.NoError(t, err)
|
|
assert.True(t, res)
|
|
|
|
props = getProto("false")
|
|
res, err = IsPartitionKeyIsolationKvEnabled(props...)
|
|
assert.NoError(t, err)
|
|
assert.False(t, res)
|
|
|
|
props = getProto("")
|
|
res, err = IsPartitionKeyIsolationKvEnabled(props...)
|
|
assert.ErrorContains(t, err, "failed to parse partition key isolation")
|
|
assert.False(t, res)
|
|
|
|
props = getProto("invalid")
|
|
res, err = IsPartitionKeyIsolationKvEnabled(props...)
|
|
assert.ErrorContains(t, err, "failed to parse partition key isolation")
|
|
assert.False(t, res)
|
|
})
|
|
|
|
t.Run("map", func(t *testing.T) {
|
|
props := getMp("true")
|
|
res, err := IsPartitionKeyIsolationPropEnabled(props)
|
|
assert.NoError(t, err)
|
|
assert.True(t, res)
|
|
|
|
props = getMp("false")
|
|
res, err = IsPartitionKeyIsolationPropEnabled(props)
|
|
assert.NoError(t, err)
|
|
assert.False(t, res)
|
|
|
|
props = getMp("")
|
|
res, err = IsPartitionKeyIsolationPropEnabled(props)
|
|
assert.ErrorContains(t, err, "failed to parse partition key isolation property")
|
|
assert.False(t, res)
|
|
|
|
props = getMp("invalid")
|
|
res, err = IsPartitionKeyIsolationPropEnabled(props)
|
|
assert.ErrorContains(t, err, "failed to parse partition key isolation property")
|
|
assert.False(t, res)
|
|
})
|
|
}
|
|
|
|
func TestShouldFieldBeLoaded(t *testing.T) {
|
|
type testCase struct {
|
|
tag string
|
|
input []*commonpb.KeyValuePair
|
|
expectOutput bool
|
|
expectError bool
|
|
}
|
|
|
|
testcases := []testCase{
|
|
{tag: "no_params", expectOutput: true},
|
|
{tag: "skipload_true", input: []*commonpb.KeyValuePair{{Key: FieldSkipLoadKey, Value: "true"}}, expectOutput: false},
|
|
{tag: "skipload_false", input: []*commonpb.KeyValuePair{{Key: FieldSkipLoadKey, Value: "false"}}, expectOutput: true},
|
|
{tag: "bad_skip_load_value", input: []*commonpb.KeyValuePair{{Key: FieldSkipLoadKey, Value: "abc"}}, expectError: true},
|
|
}
|
|
|
|
for _, tc := range testcases {
|
|
t.Run(tc.tag, func(t *testing.T) {
|
|
result, err := ShouldFieldBeLoaded(tc.input)
|
|
if tc.expectError {
|
|
assert.Error(t, err)
|
|
} else {
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, tc.expectOutput, result)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestIsEnableDynamicSchema(t *testing.T) {
|
|
type testCase struct {
|
|
tag string
|
|
input []*commonpb.KeyValuePair
|
|
expectFound bool
|
|
expectValue bool
|
|
expectError bool
|
|
}
|
|
|
|
cases := []testCase{
|
|
{tag: "no_params", expectFound: false},
|
|
{tag: "dynamicfield_true", input: []*commonpb.KeyValuePair{{Key: EnableDynamicSchemaKey, Value: "true"}}, expectFound: true, expectValue: true},
|
|
{tag: "dynamicfield_false", input: []*commonpb.KeyValuePair{{Key: EnableDynamicSchemaKey, Value: "false"}}, expectFound: true, expectValue: false},
|
|
{tag: "bad_kv_value", input: []*commonpb.KeyValuePair{{Key: EnableDynamicSchemaKey, Value: "abc"}}, expectFound: true, expectError: true},
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
t.Run(tc.tag, func(t *testing.T) {
|
|
found, value, err := IsEnableDynamicSchema(tc.input)
|
|
if tc.expectError {
|
|
assert.Error(t, err)
|
|
} else {
|
|
assert.NoError(t, err)
|
|
}
|
|
assert.Equal(t, tc.expectFound, found)
|
|
assert.Equal(t, tc.expectValue, value)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestAllocAutoID(t *testing.T) {
|
|
start, end, err := AllocAutoID(func(n uint32) (int64, int64, error) {
|
|
return 100, 110, nil
|
|
}, 10, 1)
|
|
assert.NoError(t, err)
|
|
assert.EqualValues(t, 0b0100, start>>60)
|
|
assert.EqualValues(t, 0b0100, end>>60)
|
|
}
|
|
|
|
func TestFunctionProperty(t *testing.T) {
|
|
assert.False(t, GetCollectionAllowInsertNonBM25FunctionOutputs([]*commonpb.KeyValuePair{}))
|
|
assert.False(t, GetCollectionAllowInsertNonBM25FunctionOutputs(
|
|
[]*commonpb.KeyValuePair{{Key: "other", Value: "test"}}),
|
|
)
|
|
assert.False(t, GetCollectionAllowInsertNonBM25FunctionOutputs(
|
|
[]*commonpb.KeyValuePair{{Key: CollectionAllowInsertNonBM25FunctionOutputs, Value: "false"}}),
|
|
)
|
|
assert.False(t, GetCollectionAllowInsertNonBM25FunctionOutputs(
|
|
[]*commonpb.KeyValuePair{{Key: CollectionAllowInsertNonBM25FunctionOutputs, Value: "test"}}),
|
|
)
|
|
assert.True(t, GetCollectionAllowInsertNonBM25FunctionOutputs(
|
|
[]*commonpb.KeyValuePair{{Key: CollectionAllowInsertNonBM25FunctionOutputs, Value: "true"}}),
|
|
)
|
|
}
|
|
|
|
func TestIsDisableFuncRuntimeCheck(t *testing.T) {
|
|
disable, err := IsDisableFuncRuntimeCheck([]*commonpb.KeyValuePair{}...)
|
|
assert.NoError(t, err)
|
|
assert.False(t, disable)
|
|
disable, err = IsDisableFuncRuntimeCheck([]*commonpb.KeyValuePair{{Key: DisableFuncRuntimeCheck, Value: "False"}}...)
|
|
assert.NoError(t, err)
|
|
assert.False(t, disable)
|
|
disable, err = IsDisableFuncRuntimeCheck([]*commonpb.KeyValuePair{{Key: DisableFuncRuntimeCheck, Value: "True"}}...)
|
|
assert.NoError(t, err)
|
|
assert.True(t, disable)
|
|
disable, err = IsDisableFuncRuntimeCheck([]*commonpb.KeyValuePair{{Key: DisableFuncRuntimeCheck, Value: "Error"}}...)
|
|
assert.Error(t, err)
|
|
assert.False(t, disable)
|
|
}
|
|
|
|
func TestGetCollectionTTL(t *testing.T) {
|
|
type testCase struct {
|
|
tag string
|
|
value string
|
|
expect time.Duration
|
|
expectErr bool
|
|
}
|
|
|
|
cases := []testCase{
|
|
{tag: "normal_case", value: "3600", expect: time.Duration(3600) * time.Second, expectErr: false},
|
|
{tag: "error_value", value: "error value", expectErr: true},
|
|
{tag: "out_of_int64_range", value: "10000000000000000000000000000000000000000000000000000000000000000000000000000", expectErr: true},
|
|
{tag: "negative", value: "-1", expect: -1 * time.Second},
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
t.Run(tc.tag, func(t *testing.T) {
|
|
result, err := GetCollectionTTL([]*commonpb.KeyValuePair{{Key: CollectionTTLConfigKey, Value: tc.value}})
|
|
if tc.expectErr {
|
|
assert.Error(t, err)
|
|
} else {
|
|
assert.EqualValues(t, tc.expect, result)
|
|
}
|
|
result, err = GetCollectionTTLFromMap(map[string]string{CollectionTTLConfigKey: tc.value})
|
|
if tc.expectErr {
|
|
assert.Error(t, err)
|
|
} else {
|
|
assert.EqualValues(t, tc.expect, result)
|
|
}
|
|
})
|
|
}
|
|
|
|
t.Run("not_config", func(t *testing.T) {
|
|
result, err := GetCollectionTTL([]*commonpb.KeyValuePair{})
|
|
assert.NoError(t, err)
|
|
assert.EqualValues(t, -1, result)
|
|
result, err = GetCollectionTTLFromMap(map[string]string{})
|
|
assert.NoError(t, err)
|
|
assert.EqualValues(t, -1, result)
|
|
})
|
|
}
|