milvus/internal/util/indexparamcheck/stl_sort_checker.go
Spade A 6077178553
enhance: enable STL_SORT to support VARCHAR (#44401)
issue: https://github.com/milvus-io/milvus/issues/44399

This PR implements STL_SORT for VARCHAR data type for both RAM and MMAP
mode.
The general idea is that we deduplicate field values and maintains a
posting list for each unique value.

The serialization format of the index is:
```
[unique_count][string_offsets][string_data][post_list_offsets][post_list_data][magic_code]
string_offsets: array of offsets into string_data section
string_data: str_len1, str1, str_len2, str2, ...
post_list_offsets: array of offsets into post_list_data section
post_list_data: post_list_len1, row_id1, row_id2, ..., post_list_len2, row_id1, row_id2, ...
```

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
2025-10-23 11:00:05 +08:00

31 lines
910 B
Go

package indexparamcheck
import (
"fmt"
"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
// STLSORTChecker checks if a STL_SORT index can be built.
type STLSORTChecker struct {
scalarIndexChecker
}
func (c *STLSORTChecker) CheckTrain(dataType schemapb.DataType, elementType schemapb.DataType, params map[string]string) error {
return c.scalarIndexChecker.CheckTrain(dataType, elementType, params)
}
func (c *STLSORTChecker) CheckValidDataType(indexType IndexType, field *schemapb.FieldSchema) error {
if !typeutil.IsArithmetic(field.GetDataType()) && !typeutil.IsStringType(field.GetDataType()) {
return errors.New(fmt.Sprintf("STL_SORT are only supported on numeric or varchar field, got %s", field.GetDataType()))
}
return nil
}
func newSTLSORTChecker() *STLSORTChecker {
return &STLSORTChecker{}
}