milvus/client/column/generic_base.go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package column

import (
	"github.com/cockroachdb/errors"
	"github.com/samber/lo"

	"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
	"github.com/milvus-io/milvus/client/v2/entity"
)

type GColumn[T any] interface {
	Value(idx int) T
	AppendValue(v T)
}

var _ Column = (*genericColumnBase[any])(nil)

// genericColumnBase implements `Column` interface
// it provides the basic function for each scalar params
type genericColumnBase[T any] struct {
	name      string
	fieldType entity.FieldType
	values    []T

	// nullable related fields
	// note that nullable must be set to true explicitly
	nullable  bool
	validData []bool
	// nullable column could be presented in two modes
	// - compactMode, in which all valid data are compacted into one slice
	// - sparseMode, in which valid data are located in its index position
	// while invalid one are filled with zero value.
	// for Milvus 2.5.x and before, insert request shall be in compactMode while
	// search & query results are formed in sparseMode
	// this flag indicates which form current column are in and peform validation
	// or conversion logical based on it
	sparseMode bool
	// indexMapping stores the compact-sparse mapping
	indexMapping []int
}

// Name returns column name.
func (c *genericColumnBase[T]) Name() string {
	return c.name
}

// Type returns corresponding field type.
// note that: it is not necessary to be 1-on-1 mapping
// say, `[]byte` could be lots of field type.
func (c *genericColumnBase[T]) Type() entity.FieldType {
	return c.fieldType
}

func (c *genericColumnBase[T]) Len() int {
	if c.validData != nil {
		return len(c.validData)
	}
	return len(c.values)
}

func (c *genericColumnBase[T]) AppendValue(a any) error {
	if a == nil {
		return c.AppendNull()
	}
	v, ok := a.(T)
	if !ok {
		return errors.Newf("unexpected append value type %T, field type %v", a, c.fieldType)
	}
	c.values = append(c.values, v)
	if c.nullable {
		c.validData = append(c.validData, true)
		c.indexMapping = append(c.indexMapping, len(c.values)-1)
	}
	return nil
}

func (c *genericColumnBase[T]) Slice(start, end int) Column {
	return c.slice(start, end)
}

// WARNING: this methods works only for sparse mode column
func (c *genericColumnBase[T]) slice(start, end int) *genericColumnBase[T] {
	l := c.Len()
	if start > l {
		start = l
	}
	if end == -1 || end > l {
		end = l
	}
	result := &genericColumnBase[T]{
		name:       c.name,
		fieldType:  c.fieldType,
		values:     c.values[start:end],
		nullable:   c.nullable,
		sparseMode: c.sparseMode,
	}
	if c.nullable {
		result.validData = c.validData[start:end]
	}
	return result
}

func (c *genericColumnBase[T]) FieldData() *schemapb.FieldData {
	fd := values2FieldData(c.values, c.fieldType, 0)
	fd.FieldName = c.name
	fd.Type = schemapb.DataType(c.fieldType)
	if c.nullable {
		fd.ValidData = c.validData
	}
	return fd
}

func (c *genericColumnBase[T]) rangeCheck(idx int) error {
	if idx < 0 || idx >= c.Len() {
		return errors.Newf("index %d out of range[0, %d)", idx, c.Len())
	}
	return nil
}

func (c *genericColumnBase[T]) Get(idx int) (any, error) {
	idx = c.valueIndex(idx)
	if err := c.rangeCheck(idx); err != nil {
		return nil, err
	}
	return c.values[idx], nil
}

func (c *genericColumnBase[T]) GetAsInt64(idx int) (int64, error) {
	idx = c.valueIndex(idx)
	if err := c.rangeCheck(idx); err != nil {
		return 0, err
	}
	return value2Type[T, int64](c.values[idx])
}

func (c *genericColumnBase[T]) GetAsString(idx int) (string, error) {
	idx = c.valueIndex(idx)
	if err := c.rangeCheck(idx); err != nil {
		return "", err
	}
	return value2Type[T, string](c.values[idx])
}

func (c *genericColumnBase[T]) GetAsDouble(idx int) (float64, error) {
	idx = c.valueIndex(idx)
	if err := c.rangeCheck(idx); err != nil {
		return 0, err
	}
	return value2Type[T, float64](c.values[idx])
}

func (c *genericColumnBase[T]) GetAsBool(idx int) (bool, error) {
	idx = c.valueIndex(idx)
	if err := c.rangeCheck(idx); err != nil {
		return false, err
	}
	return value2Type[T, bool](c.values[idx])
}

func (c *genericColumnBase[T]) Value(idx int) (T, error) {
	idx = c.valueIndex(idx)
	var z T
	if err := c.rangeCheck(idx); err != nil {
		return z, err
	}
	return c.values[idx], nil
}

func (c *genericColumnBase[T]) valueIndex(idx int) int {
	if !c.nullable || c.sparseMode {
		return idx
	}
	return c.indexMapping[idx]
}

func (c *genericColumnBase[T]) Data() []T {
	return c.values
}

func (c *genericColumnBase[T]) MustValue(idx int) T {
	idx = c.valueIndex(idx)
	if idx < 0 || idx > c.Len() {
		panic("index out of range")
	}
	return c.values[idx]
}

func (c *genericColumnBase[T]) AppendNull() error {
	if !c.nullable {
		return errors.New("append null to not nullable column")
	}

	c.validData = append(c.validData, false)
	if !c.sparseMode {
		c.indexMapping = append(c.indexMapping, -1)
	}
	return nil
}

func (c *genericColumnBase[T]) IsNull(idx int) (bool, error) {
	if err := c.rangeCheck(idx); err != nil {
		return false, err
	}
	if !c.nullable {
		return false, nil
	}
	return !c.validData[idx], nil
}

func (c *genericColumnBase[T]) Nullable() bool {
	return c.nullable
}

// SetNullable update the nullable flag and change the valid data array according to the flag value.
// NOTE: set nullable to false will erase all the validData previously set.
func (c *genericColumnBase[T]) SetNullable(nullable bool) {
	c.nullable = nullable
	// initialize validData only when
	if c.nullable && c.validData == nil {
		// set valid flag for all exisiting values
		c.validData = lo.RepeatBy(len(c.values), func(_ int) bool { return true })
	}

	if !c.nullable {
		c.validData = nil
	}
}

// ValidateNullable performs the sanity check for nullable column.
// it checks the length of data and the valid number indicated by validData slice,
// which shall be the same by definition
func (c *genericColumnBase[T]) ValidateNullable() error {
	// skip check if column not nullable
	if !c.nullable {
		return nil
	}

	if c.sparseMode {
		return c.validateNullableSparse()
	}
	return c.validateNullableCompact()
}

func (c *genericColumnBase[T]) validateNullableCompact() error {
	// count valid entries
	var validCnt int
	c.indexMapping = make([]int, len(c.validData))
	for idx, v := range c.validData {
		if v {
			c.indexMapping[idx] = validCnt
			validCnt++
		} else {
			c.indexMapping[idx] = -1
		}
	}
	if validCnt != len(c.values) {
		return errors.Newf("values number(%d) does not match valid count(%d)", len(c.values), validCnt)
	}
	return nil
}

func (c *genericColumnBase[T]) validateNullableSparse() error {
	if len(c.validData) != len(c.values) {
		return errors.Newf("values number (%d) does not match valid data len(%d)", len(c.values), len(c.validData))
	}
	return nil
}

func (c *genericColumnBase[T]) CompactNullableValues() {
	if !c.nullable || !c.sparseMode {
		return
	}

	c.indexMapping = make([]int, len(c.validData))
	var cnt int
	for idx, valid := range c.validData {
		if !valid {
			c.indexMapping[idx] = -1
			continue
		}
		c.values[cnt] = c.values[idx]
		c.indexMapping[idx] = cnt
		cnt++
	}
	c.values = c.values[0:cnt]
}

func (c *genericColumnBase[T]) ValidCount() int {
	if !c.nullable || len(c.validData) == 0 {
		return len(c.values)
	}
	count := 0
	for _, v := range c.validData {
		if v {
			count++
		}
	}
	return count
}

func (c *genericColumnBase[T]) withValidData(validData []bool) {
	if len(validData) > 0 {
		c.nullable = true
		c.validData = validData
	}
}

func (c *genericColumnBase[T]) base() *genericColumnBase[T] {
	return c
}

type ColumnOption[T any] func(*genericColumnBase[T])

// WithSparseNullableMode returns a ColumnOption that sets the sparse mode for the column.
func WithSparseNullableMode[T any](flag bool) ColumnOption[T] {
	return func(c *genericColumnBase[T]) {
		c.sparseMode = flag
	}
}