mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
issue: https://github.com/milvus-io/milvus/issues/44399 This PR implements STL_SORT for VARCHAR data type for both RAM and MMAP mode. The general idea is that we deduplicate field values and maintains a posting list for each unique value. The serialization format of the index is: ``` [unique_count][string_offsets][string_data][post_list_offsets][post_list_data][magic_code] string_offsets: array of offsets into string_data section string_data: str_len1, str1, str_len2, str2, ... post_list_offsets: array of offsets into post_list_data section post_list_data: post_list_len1, row_id1, row_id2, ..., post_list_len2, row_id1, row_id2, ... ``` --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
31 lines
910 B
Go
31 lines
910 B
Go
package indexparamcheck
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
|
|
)
|
|
|
|
// STLSORTChecker checks if a STL_SORT index can be built.
|
|
type STLSORTChecker struct {
|
|
scalarIndexChecker
|
|
}
|
|
|
|
func (c *STLSORTChecker) CheckTrain(dataType schemapb.DataType, elementType schemapb.DataType, params map[string]string) error {
|
|
return c.scalarIndexChecker.CheckTrain(dataType, elementType, params)
|
|
}
|
|
|
|
func (c *STLSORTChecker) CheckValidDataType(indexType IndexType, field *schemapb.FieldSchema) error {
|
|
if !typeutil.IsArithmetic(field.GetDataType()) && !typeutil.IsStringType(field.GetDataType()) {
|
|
return errors.New(fmt.Sprintf("STL_SORT are only supported on numeric or varchar field, got %s", field.GetDataType()))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func newSTLSORTChecker() *STLSORTChecker {
|
|
return &STLSORTChecker{}
|
|
}
|