fix: Handle default values correctly during compaction for added fields (#45572)

Related to #45543

When a field with a default value is added to a collection, the default
value becomes null after compaction instead of retaining the expected
default value.

**Root Cause**
The `appendValueAt` function in `internal/storage/arrow_util.go`
incorrectly checked if the entire arrow.Array was nil before handling
default values. This meant that default values were only applied when
the array itself was nil, not when individual field values were null
(which is the correct condition).

**Changes**
1. **Early nil check**: Added a guard at the function entry to detect
nil arrow.Array and return an error immediately, as this is an
unexpected condition that should not occur during normal operation.

2. **Refactored default value handling**: Removed the per-type nil array
checks and moved default value logic to handle individual null values
within the array (when `IsNull(idx)` returns true).

3. **Applied to all types**: Updated the logic consistently across all
builder types:
   - BooleanBuilder
   - Int8Builder, Int16Builder, Int32Builder, Int64Builder
   - Float32Builder
   - StringBuilder
   - BinaryBuilder (added default value support for internal $meta json)
   - ListBuilder (removed unnecessary nil check)

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
This commit is contained in:
congqixia 2025-11-17 19:03:38 +08:00 committed by GitHub
parent 96d0e780ac
commit e9506f1d64
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -30,22 +30,18 @@ import (
)
func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *schemapb.ValueField) (uint64, error) {
// a could never be nil here
switch b := builder.(type) {
case *array.BooleanBuilder:
if a == nil {
if defaultValue != nil {
b.Append(defaultValue.GetBoolData())
return 1, nil
} else {
b.AppendNull()
return 0, nil
}
}
ba, ok := a.(*array.Boolean)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if ba.IsNull(idx) {
if defaultValue != nil {
b.Append(defaultValue.GetBoolData())
return 1, nil
}
b.AppendNull()
return 0, nil
} else {
@ -53,20 +49,15 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
return 1, nil
}
case *array.Int8Builder:
if a == nil {
if defaultValue != nil {
b.Append(int8(defaultValue.GetIntData()))
return 1, nil
} else {
b.AppendNull()
return 0, nil
}
}
ia, ok := a.(*array.Int8)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if ia.IsNull(idx) {
if defaultValue != nil {
b.Append(int8(defaultValue.GetIntData()))
return 1, nil
}
b.AppendNull()
return 0, nil
} else {
@ -74,20 +65,15 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
return 1, nil
}
case *array.Int16Builder:
if a == nil {
if defaultValue != nil {
b.Append(int16(defaultValue.GetIntData()))
return 2, nil
} else {
b.AppendNull()
return 0, nil
}
}
ia, ok := a.(*array.Int16)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if ia.IsNull(idx) {
if defaultValue != nil {
b.Append(int16(defaultValue.GetIntData()))
return 2, nil
}
b.AppendNull()
return 0, nil
} else {
@ -95,20 +81,15 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
return 2, nil
}
case *array.Int32Builder:
if a == nil {
if defaultValue != nil {
b.Append(defaultValue.GetIntData())
return 4, nil
} else {
b.AppendNull()
return 0, nil
}
}
ia, ok := a.(*array.Int32)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if ia.IsNull(idx) {
if defaultValue != nil {
b.Append(defaultValue.GetIntData())
return 4, nil
}
b.AppendNull()
return 0, nil
} else {
@ -116,20 +97,15 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
return 4, nil
}
case *array.Int64Builder:
if a == nil {
if defaultValue != nil {
b.Append(defaultValue.GetLongData())
return 8, nil
} else {
b.AppendNull()
return 0, nil
}
}
ia, ok := a.(*array.Int64)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if ia.IsNull(idx) {
if defaultValue != nil {
b.Append(defaultValue.GetLongData())
return 8, nil
}
b.AppendNull()
return 0, nil
} else {
@ -137,20 +113,15 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
return 8, nil
}
case *array.Float32Builder:
if a == nil {
if defaultValue != nil {
b.Append(defaultValue.GetFloatData())
return 4, nil
} else {
b.AppendNull()
return 0, nil
}
}
fa, ok := a.(*array.Float32)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if fa.IsNull(idx) {
if defaultValue != nil {
b.Append(defaultValue.GetFloatData())
return 4, nil
}
b.AppendNull()
return 0, nil
} else {
@ -179,21 +150,16 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
return 8, nil
}
case *array.StringBuilder:
if a == nil {
if defaultValue != nil {
val := defaultValue.GetStringData()
b.Append(val)
return uint64(len(val)), nil
} else {
b.AppendNull()
return 0, nil
}
}
sa, ok := a.(*array.String)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if sa.IsNull(idx) {
if defaultValue != nil {
val := defaultValue.GetStringData()
b.Append(val)
return uint64(len(val)), nil
}
b.AppendNull()
return 0, nil
} else {
@ -202,16 +168,17 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
return uint64(len(val)), nil
}
case *array.BinaryBuilder:
// array type, not support defaultValue now
if a == nil {
b.AppendNull()
return 0, nil
}
ba, ok := a.(*array.Binary)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())
}
if ba.IsNull(idx) {
// could be internal $meta json
if defaultValue != nil {
val := defaultValue.GetBytesData()
b.Append(val)
return uint64(len(val)), nil
}
b.AppendNull()
return 0, nil
} else {
@ -234,10 +201,6 @@ func appendValueAt(builder array.Builder, a arrow.Array, idx int, defaultValue *
}
case *array.ListBuilder:
// Handle ListBuilder for ArrayOfVector type
if a == nil {
b.AppendNull()
return 0, nil
}
la, ok := a.(*array.List)
if !ok {
return 0, fmt.Errorf("invalid value type %T, expect %T", a.DataType(), builder.Type())