milvus/client/milvusclient/iterator_option.go
congqixia 6c8e11da4f
feat: [GoSDK] add QueryIterator support for Go client (#46633)
Related to #31293

Implement QueryIterator for the Go SDK to enable efficient iteration
over large query result sets using PK-based pagination.

Key changes:
- Add QueryIterator interface and implementation with PK-based
pagination
- Support Int64 and VarChar primary key types for pagination filtering
- Add QueryIteratorOption with batchSize, limit, filter, outputFields
config
- Fix ResultSet.Slice to handle Query results without IDs/Scores
- Add comprehensive unit tests and integration tests

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: the iterator requires the collection primary key (PK)
to be present in outputFields so PK-based pagination and accurate row
counting work. The constructor enforces this by appending the PK to
outputFields when absent, and all pagination (lastPK tracking, PK-range
filters) and ResultCount calculations depend on that guaranteed PK
column.

- New capability: adds a public QueryIterator API (Client.QueryIterator,
QueryIterator interface, QueryIteratorOption) that issues server-side
Query RPCs in configurable batches and implements PK-based pagination
supporting Int64 and VarChar PKs, with options for batchSize, limit,
filter, outputFields and an upfront first-batch validation to fail fast
on invalid params.

- Removed/simplified logic: ResultSet.Slice no longer assumes IDs and
Scores are always present — it branches on presence of IDs (use IDs
length when non-nil; otherwise derive row count from Fields[0]) and
guards Scores slicing. This eliminates redundant/unsafe assumptions and
centralizes correct row-count logic based on actual returned fields.

- No data loss or behavior regression: pagination composes the user
filter with a PK-range filter and always requests the PK field, so
lastPK is extracted from a real column and fetchNextBatch only advances
when rows are returned; EOF is returned only when the server returns no
rows or iterator limit is reached. ResultSet.Slice guards prevent panics
for queries that lack IDs/Scores; Query RPC → ResultSet.Fields remains
the authoritative path for row data, so rows are not dropped and
existing query behavior is preserved.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
2025-12-27 01:43:20 +08:00

246 lines
7.4 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package milvusclient
import (
"fmt"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus/client/v2/entity"
"github.com/milvus-io/milvus/client/v2/index"
)
type SearchIteratorOption interface {
// SearchOption returns the search option when iterate search
SearchOption() *searchOption
// Limit returns the overall limit of entries to iterate
Limit() int64
// ValidateParams performs the static params validation
ValidateParams() error
}
type searchIteratorOption struct {
*searchOption
batchSize int
iteratorLimit int64
}
func (opt *searchIteratorOption) SearchOption() *searchOption {
opt.annRequest.topK = opt.batchSize
opt.WithSearchParam(IteratorSearchBatchSizeKey, fmt.Sprintf("%d", opt.batchSize))
return opt.searchOption
}
func (opt *searchIteratorOption) Limit() int64 {
return opt.iteratorLimit
}
// ValidateParams performs the static params validation
func (opt *searchIteratorOption) ValidateParams() error {
if opt.batchSize <= 0 {
return fmt.Errorf("batch size must be greater than 0")
}
return nil
}
func (opt *searchIteratorOption) WithBatchSize(batchSize int) *searchIteratorOption {
opt.batchSize = batchSize
return opt
}
func (opt *searchIteratorOption) WithPartitions(partitionNames ...string) *searchIteratorOption {
opt.partitionNames = partitionNames
return opt
}
func (opt *searchIteratorOption) WithFilter(expr string) *searchIteratorOption {
opt.annRequest.WithFilter(expr)
return opt
}
func (opt *searchIteratorOption) WithTemplateParam(key string, val any) *searchIteratorOption {
opt.annRequest.WithTemplateParam(key, val)
return opt
}
func (opt *searchIteratorOption) WithOffset(offset int) *searchIteratorOption {
opt.annRequest.WithOffset(offset)
return opt
}
func (opt *searchIteratorOption) WithOutputFields(fieldNames ...string) *searchIteratorOption {
opt.outputFields = fieldNames
return opt
}
func (opt *searchIteratorOption) WithConsistencyLevel(consistencyLevel entity.ConsistencyLevel) *searchIteratorOption {
opt.consistencyLevel = consistencyLevel
opt.useDefaultConsistencyLevel = false
return opt
}
func (opt *searchIteratorOption) WithANNSField(annsField string) *searchIteratorOption {
opt.annRequest.WithANNSField(annsField)
return opt
}
func (opt *searchIteratorOption) WithGroupByField(groupByField string) *searchIteratorOption {
opt.annRequest.WithGroupByField(groupByField)
return opt
}
func (opt *searchIteratorOption) WithGroupSize(groupSize int) *searchIteratorOption {
opt.annRequest.WithGroupSize(groupSize)
return opt
}
func (opt *searchIteratorOption) WithStrictGroupSize(strictGroupSize bool) *searchIteratorOption {
opt.annRequest.WithStrictGroupSize(strictGroupSize)
return opt
}
func (opt *searchIteratorOption) WithIgnoreGrowing(ignoreGrowing bool) *searchIteratorOption {
opt.annRequest.WithIgnoreGrowing(ignoreGrowing)
return opt
}
func (opt *searchIteratorOption) WithAnnParam(ap index.AnnParam) *searchIteratorOption {
opt.annRequest.WithAnnParam(ap)
return opt
}
func (opt *searchIteratorOption) WithSearchParam(key, value string) *searchIteratorOption {
opt.annRequest.WithSearchParam(key, value)
return opt
}
// WithIteratorLimit sets the limit of entries to iterate
// if limit < 0, then it will be set to Unlimited
func (opt *searchIteratorOption) WithIteratorLimit(limit int64) *searchIteratorOption {
if limit < 0 {
limit = Unlimited
}
opt.iteratorLimit = limit
return opt
}
func NewSearchIteratorOption(collectionName string, vector entity.Vector) *searchIteratorOption {
return &searchIteratorOption{
searchOption: NewSearchOption(collectionName, 1000, []entity.Vector{vector}).
WithSearchParam(IteratorKey, "true").
WithSearchParam(IteratorSearchV2Key, "true"),
batchSize: 1000,
iteratorLimit: Unlimited,
}
}
// QueryIteratorOption is the interface for query iterator options.
type QueryIteratorOption interface {
// Request returns the query request when iterate query
Request() (*milvuspb.QueryRequest, error)
// BatchSize returns the batch size for each query iteration
BatchSize() int
// Limit returns the overall limit of entries to iterate
Limit() int64
// ValidateParams performs the static params validation
ValidateParams() error
}
type queryIteratorOption struct {
collectionName string
partitionNames []string
outputFields []string
expr string
batchSize int
iteratorLimit int64
consistencyLevel entity.ConsistencyLevel
useDefaultConsistencyLevel bool
}
func (opt *queryIteratorOption) Request() (*milvuspb.QueryRequest, error) {
return &milvuspb.QueryRequest{
CollectionName: opt.collectionName,
PartitionNames: opt.partitionNames,
OutputFields: opt.outputFields,
Expr: opt.expr,
ConsistencyLevel: opt.consistencyLevel.CommonConsistencyLevel(),
UseDefaultConsistency: opt.useDefaultConsistencyLevel,
QueryParams: entity.MapKvPairs(map[string]string{IteratorKey: "true", "reduce_stop_for_best": "true"}),
}, nil
}
func (opt *queryIteratorOption) BatchSize() int {
return opt.batchSize
}
func (opt *queryIteratorOption) Limit() int64 {
return opt.iteratorLimit
}
func (opt *queryIteratorOption) ValidateParams() error {
if opt.batchSize <= 0 {
return fmt.Errorf("batch size must be greater than 0")
}
return nil
}
func (opt *queryIteratorOption) WithBatchSize(batchSize int) *queryIteratorOption {
opt.batchSize = batchSize
return opt
}
func (opt *queryIteratorOption) WithPartitions(partitionNames ...string) *queryIteratorOption {
opt.partitionNames = partitionNames
return opt
}
func (opt *queryIteratorOption) WithFilter(expr string) *queryIteratorOption {
opt.expr = expr
return opt
}
func (opt *queryIteratorOption) WithOutputFields(fieldNames ...string) *queryIteratorOption {
opt.outputFields = fieldNames
return opt
}
func (opt *queryIteratorOption) WithConsistencyLevel(consistencyLevel entity.ConsistencyLevel) *queryIteratorOption {
opt.consistencyLevel = consistencyLevel
opt.useDefaultConsistencyLevel = false
return opt
}
// WithIteratorLimit sets the limit of entries to iterate
// if limit < 0, then it will be set to Unlimited
func (opt *queryIteratorOption) WithIteratorLimit(limit int64) *queryIteratorOption {
if limit < 0 {
limit = Unlimited
}
opt.iteratorLimit = limit
return opt
}
func NewQueryIteratorOption(collectionName string) *queryIteratorOption {
return &queryIteratorOption{
collectionName: collectionName,
batchSize: 1000,
iteratorLimit: Unlimited,
useDefaultConsistencyLevel: true,
consistencyLevel: entity.ClBounded,
}
}