mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-28 14:35:27 +08:00
Related to #31293 Implement QueryIterator for the Go SDK to enable efficient iteration over large query result sets using PK-based pagination. Key changes: - Add QueryIterator interface and implementation with PK-based pagination - Support Int64 and VarChar primary key types for pagination filtering - Add QueryIteratorOption with batchSize, limit, filter, outputFields config - Fix ResultSet.Slice to handle Query results without IDs/Scores - Add comprehensive unit tests and integration tests <!-- This is an auto-generated comment: release notes by coderabbit.ai --> - Core invariant: the iterator requires the collection primary key (PK) to be present in outputFields so PK-based pagination and accurate row counting work. The constructor enforces this by appending the PK to outputFields when absent, and all pagination (lastPK tracking, PK-range filters) and ResultCount calculations depend on that guaranteed PK column. - New capability: adds a public QueryIterator API (Client.QueryIterator, QueryIterator interface, QueryIteratorOption) that issues server-side Query RPCs in configurable batches and implements PK-based pagination supporting Int64 and VarChar PKs, with options for batchSize, limit, filter, outputFields and an upfront first-batch validation to fail fast on invalid params. - Removed/simplified logic: ResultSet.Slice no longer assumes IDs and Scores are always present — it branches on presence of IDs (use IDs length when non-nil; otherwise derive row count from Fields[0]) and guards Scores slicing. This eliminates redundant/unsafe assumptions and centralizes correct row-count logic based on actual returned fields. - No data loss or behavior regression: pagination composes the user filter with a PK-range filter and always requests the PK field, so lastPK is extracted from a real column and fetchNextBatch only advances when rows are returned; EOF is returned only when the server returns no rows or iterator limit is reached. ResultSet.Slice guards prevent panics for queries that lack IDs/Scores; Query RPC → ResultSet.Fields remains the authoritative path for row data, so rows are not dropped and existing query behavior is preserved. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
246 lines
7.4 KiB
Go
246 lines
7.4 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package milvusclient
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
"github.com/milvus-io/milvus/client/v2/index"
|
|
)
|
|
|
|
type SearchIteratorOption interface {
|
|
// SearchOption returns the search option when iterate search
|
|
SearchOption() *searchOption
|
|
// Limit returns the overall limit of entries to iterate
|
|
Limit() int64
|
|
// ValidateParams performs the static params validation
|
|
ValidateParams() error
|
|
}
|
|
|
|
type searchIteratorOption struct {
|
|
*searchOption
|
|
batchSize int
|
|
iteratorLimit int64
|
|
}
|
|
|
|
func (opt *searchIteratorOption) SearchOption() *searchOption {
|
|
opt.annRequest.topK = opt.batchSize
|
|
opt.WithSearchParam(IteratorSearchBatchSizeKey, fmt.Sprintf("%d", opt.batchSize))
|
|
return opt.searchOption
|
|
}
|
|
|
|
func (opt *searchIteratorOption) Limit() int64 {
|
|
return opt.iteratorLimit
|
|
}
|
|
|
|
// ValidateParams performs the static params validation
|
|
func (opt *searchIteratorOption) ValidateParams() error {
|
|
if opt.batchSize <= 0 {
|
|
return fmt.Errorf("batch size must be greater than 0")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithBatchSize(batchSize int) *searchIteratorOption {
|
|
opt.batchSize = batchSize
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithPartitions(partitionNames ...string) *searchIteratorOption {
|
|
opt.partitionNames = partitionNames
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithFilter(expr string) *searchIteratorOption {
|
|
opt.annRequest.WithFilter(expr)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithTemplateParam(key string, val any) *searchIteratorOption {
|
|
opt.annRequest.WithTemplateParam(key, val)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithOffset(offset int) *searchIteratorOption {
|
|
opt.annRequest.WithOffset(offset)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithOutputFields(fieldNames ...string) *searchIteratorOption {
|
|
opt.outputFields = fieldNames
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithConsistencyLevel(consistencyLevel entity.ConsistencyLevel) *searchIteratorOption {
|
|
opt.consistencyLevel = consistencyLevel
|
|
opt.useDefaultConsistencyLevel = false
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithANNSField(annsField string) *searchIteratorOption {
|
|
opt.annRequest.WithANNSField(annsField)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithGroupByField(groupByField string) *searchIteratorOption {
|
|
opt.annRequest.WithGroupByField(groupByField)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithGroupSize(groupSize int) *searchIteratorOption {
|
|
opt.annRequest.WithGroupSize(groupSize)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithStrictGroupSize(strictGroupSize bool) *searchIteratorOption {
|
|
opt.annRequest.WithStrictGroupSize(strictGroupSize)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithIgnoreGrowing(ignoreGrowing bool) *searchIteratorOption {
|
|
opt.annRequest.WithIgnoreGrowing(ignoreGrowing)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithAnnParam(ap index.AnnParam) *searchIteratorOption {
|
|
opt.annRequest.WithAnnParam(ap)
|
|
return opt
|
|
}
|
|
|
|
func (opt *searchIteratorOption) WithSearchParam(key, value string) *searchIteratorOption {
|
|
opt.annRequest.WithSearchParam(key, value)
|
|
return opt
|
|
}
|
|
|
|
// WithIteratorLimit sets the limit of entries to iterate
|
|
// if limit < 0, then it will be set to Unlimited
|
|
func (opt *searchIteratorOption) WithIteratorLimit(limit int64) *searchIteratorOption {
|
|
if limit < 0 {
|
|
limit = Unlimited
|
|
}
|
|
opt.iteratorLimit = limit
|
|
return opt
|
|
}
|
|
|
|
func NewSearchIteratorOption(collectionName string, vector entity.Vector) *searchIteratorOption {
|
|
return &searchIteratorOption{
|
|
searchOption: NewSearchOption(collectionName, 1000, []entity.Vector{vector}).
|
|
WithSearchParam(IteratorKey, "true").
|
|
WithSearchParam(IteratorSearchV2Key, "true"),
|
|
batchSize: 1000,
|
|
iteratorLimit: Unlimited,
|
|
}
|
|
}
|
|
|
|
// QueryIteratorOption is the interface for query iterator options.
|
|
type QueryIteratorOption interface {
|
|
// Request returns the query request when iterate query
|
|
Request() (*milvuspb.QueryRequest, error)
|
|
// BatchSize returns the batch size for each query iteration
|
|
BatchSize() int
|
|
// Limit returns the overall limit of entries to iterate
|
|
Limit() int64
|
|
// ValidateParams performs the static params validation
|
|
ValidateParams() error
|
|
}
|
|
|
|
type queryIteratorOption struct {
|
|
collectionName string
|
|
partitionNames []string
|
|
outputFields []string
|
|
expr string
|
|
batchSize int
|
|
iteratorLimit int64
|
|
consistencyLevel entity.ConsistencyLevel
|
|
useDefaultConsistencyLevel bool
|
|
}
|
|
|
|
func (opt *queryIteratorOption) Request() (*milvuspb.QueryRequest, error) {
|
|
return &milvuspb.QueryRequest{
|
|
CollectionName: opt.collectionName,
|
|
PartitionNames: opt.partitionNames,
|
|
OutputFields: opt.outputFields,
|
|
Expr: opt.expr,
|
|
ConsistencyLevel: opt.consistencyLevel.CommonConsistencyLevel(),
|
|
UseDefaultConsistency: opt.useDefaultConsistencyLevel,
|
|
QueryParams: entity.MapKvPairs(map[string]string{IteratorKey: "true", "reduce_stop_for_best": "true"}),
|
|
}, nil
|
|
}
|
|
|
|
func (opt *queryIteratorOption) BatchSize() int {
|
|
return opt.batchSize
|
|
}
|
|
|
|
func (opt *queryIteratorOption) Limit() int64 {
|
|
return opt.iteratorLimit
|
|
}
|
|
|
|
func (opt *queryIteratorOption) ValidateParams() error {
|
|
if opt.batchSize <= 0 {
|
|
return fmt.Errorf("batch size must be greater than 0")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (opt *queryIteratorOption) WithBatchSize(batchSize int) *queryIteratorOption {
|
|
opt.batchSize = batchSize
|
|
return opt
|
|
}
|
|
|
|
func (opt *queryIteratorOption) WithPartitions(partitionNames ...string) *queryIteratorOption {
|
|
opt.partitionNames = partitionNames
|
|
return opt
|
|
}
|
|
|
|
func (opt *queryIteratorOption) WithFilter(expr string) *queryIteratorOption {
|
|
opt.expr = expr
|
|
return opt
|
|
}
|
|
|
|
func (opt *queryIteratorOption) WithOutputFields(fieldNames ...string) *queryIteratorOption {
|
|
opt.outputFields = fieldNames
|
|
return opt
|
|
}
|
|
|
|
func (opt *queryIteratorOption) WithConsistencyLevel(consistencyLevel entity.ConsistencyLevel) *queryIteratorOption {
|
|
opt.consistencyLevel = consistencyLevel
|
|
opt.useDefaultConsistencyLevel = false
|
|
return opt
|
|
}
|
|
|
|
// WithIteratorLimit sets the limit of entries to iterate
|
|
// if limit < 0, then it will be set to Unlimited
|
|
func (opt *queryIteratorOption) WithIteratorLimit(limit int64) *queryIteratorOption {
|
|
if limit < 0 {
|
|
limit = Unlimited
|
|
}
|
|
opt.iteratorLimit = limit
|
|
return opt
|
|
}
|
|
|
|
func NewQueryIteratorOption(collectionName string) *queryIteratorOption {
|
|
return &queryIteratorOption{
|
|
collectionName: collectionName,
|
|
batchSize: 1000,
|
|
iteratorLimit: Unlimited,
|
|
useDefaultConsistencyLevel: true,
|
|
consistencyLevel: entity.ClBounded,
|
|
}
|
|
}
|