mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
### Is there an existing issue for this? - [x] I have searched the existing issues --- Please see: https://github.com/milvus-io/milvus/issues/44593 for the background This PR makes https://github.com/milvus-io/milvus/pull/44638 redundant, which can be closed. The PR comments for the original implementation suggested an alternative and a better approach, this new PR has that implementation. --- This PR - Adds an optional `minimum_should_match` argument to `text_match(...)` and wires it through the parser, planner/visitor, index bindings, and client-level tests/examples so full-text queries can require a minimum number of tokens to match. Motivation - Provide a way to require an expression to match a minimum number of tokens in lexical search. What changed - Parser / grammar - Added grammar rule and token: `MINIMUM_SHOULD_MATCH` and `textMatchOption` in `internal/parser/planparserv2/Plan.g4`. - Regenerated parser outputs: `internal/parser/planparserv2/generated/*` (parser, lexer, visitor, etc.) to support the new rule. - Planner / visitor - `parser_visitor.go`: parse and validate the `minimum_should_match` integer; propagate as an extra value on the `TextMatch` expression so downstream components receive it. - Added `VisitTextMatchOption` visitor method handling. - Client (Golang) - Added a unit test to verify `text_match(..., minimum_should_match=...)` appears in the generated DSL and is accepted by client code: `client/milvusclient/read_test.go` (new test coverage). - Added an integration-style test for the feature to the go-client testcase suite: `tests/go_client/testcases/full_text_search_test.go` (exercise min=1, min=3, large min). - Added an example demonstrating `text_match` usage: `client/milvusclient/read_example_test.go` (example name conforms to godoc mapping). - Engine / index - Updated C++ index interface: `TextMatchIndex::MatchQuery` - Added/updated unit tests for the index behavior: `internal/core/src/index/TextMatchIndexTest.cpp`. - Tantivy binding - Added `match_query_with_minimum` implementation and unit tests to `internal/core/thirdparty/tantivy/tantivy-binding/src/index_reader_text.rs` that construct boolean queries with minimum required clauses. Behavioral / compatibility notes - This adds an optional argument to `text_match` only; default behavior (no `minimum_should_match`) is unchanged. - Internal API change: `TextMatchIndex::MatchQuery` signature changed (internal component). Callers in the repo were updated accordingly. - Parser changes required regenerating ANTLR outputs Tests and verification - New/updated tests: - Go client unit test: `client/milvusclient/read_test.go` (mocked Search request asserts DSL contains `minimum_should_match=2`). - Go e2e-style test: `tests/go_client/testcases/full_text_search_test.go` (exercises min=1, 3 and a large min). - C++ unit tests for index behavior: `internal/core/src/index/TextMatchIndexTest.cpp`. - Rust binding unit tests for `match_query_with_minimum`. - Local verification commands to run: - Go client tests: `cd client && go test ./milvusclient -run ^$` (client package) - Go testcases: `cd tests/go_client && go test ./testcases -run TestTextMatchMinimumShouldMatch` (requires a running Milvus instance) - C++ unit tests / build: run core build/test per repo instructions (the change touches core index code). - Rust binding tests: `cd internal/core/thirdparty/tantivy/tantivy-binding && cargo test` (if developing locally). --------- Signed-off-by: Amit Kumar <amit.kumar@reddit.com> Co-authored-by: Amit Kumar <amit.kumar@reddit.com>
494 lines
15 KiB
Go
494 lines
15 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// nolint
|
|
package milvusclient_test
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
|
|
"github.com/milvus-io/milvus/client/v2/column"
|
|
"github.com/milvus-io/milvus/client/v2/entity"
|
|
"github.com/milvus-io/milvus/client/v2/index"
|
|
"github.com/milvus-io/milvus/client/v2/milvusclient"
|
|
)
|
|
|
|
func ExampleClient_Search_basic() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
token := "root:Milvus"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
APIKey: token,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVector := []float32{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592}
|
|
|
|
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
|
|
"quick_setup", // collectionName
|
|
3, // limit
|
|
[]entity.Vector{entity.FloatVector(queryVector)},
|
|
))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Search_multivectors() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
token := "root:Milvus"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
APIKey: token,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVectors := []entity.Vector{
|
|
entity.FloatVector([]float32{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592}),
|
|
entity.FloatVector([]float32{0.19886812562848388, 0.06023560599112088, 0.6976963061752597, 0.2614474506242501, 0.838729485096104}),
|
|
}
|
|
|
|
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
|
|
"quick_setup", // collectionName
|
|
3, // limit
|
|
queryVectors,
|
|
))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Search_partition() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
token := "root:Milvus"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
APIKey: token,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVector := []float32{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592}
|
|
|
|
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
|
|
"quick_setup", // collectionName
|
|
3, // limit
|
|
[]entity.Vector{entity.FloatVector(queryVector)},
|
|
).WithPartitions("partitionA"))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Search_outputFields() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
token := "root:Milvus"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
APIKey: token,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVector := []float32{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592}
|
|
|
|
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
|
|
"quick_setup", // collectionName
|
|
3, // limit
|
|
[]entity.Vector{entity.FloatVector(queryVector)},
|
|
).WithOutputFields("color"))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
log.Println("Colors: ", resultSet.GetColumn("color"))
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Search_offsetLimit() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
token := "root:Milvus"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
APIKey: token,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVector := []float32{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592}
|
|
|
|
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
|
|
"quick_setup", // collectionName
|
|
3, // limit
|
|
[]entity.Vector{entity.FloatVector(queryVector)},
|
|
).WithOffset(10))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Search_jsonExpr() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVector := []float32{0.3, -0.6, -0.1}
|
|
|
|
annParam := index.NewCustomAnnParam()
|
|
annParam.WithExtraParam("nprobe", 10)
|
|
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
|
|
"my_json_collection", // collectionName
|
|
5, // limit
|
|
[]entity.Vector{entity.FloatVector(queryVector)},
|
|
).WithOutputFields("metadata").WithAnnParam(annParam))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Search_binaryVector() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
token := "root:Milvus"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
APIKey: token,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVector := []byte{0b10011011, 0b01010100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
|
|
|
annSearchParams := index.NewCustomAnnParam()
|
|
annSearchParams.WithExtraParam("nprobe", 10)
|
|
resultSets, err := cli.Search(ctx, milvusclient.NewSearchOption(
|
|
"my_binary_collection", // collectionName
|
|
5, // limit
|
|
[]entity.Vector{entity.BinaryVector(queryVector)},
|
|
).WithOutputFields("pk").WithAnnParam(annSearchParams))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
log.Println("Pks: ", resultSet.GetColumn("pk"))
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Get() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
rs, err := cli.Get(ctx, milvusclient.NewQueryOption("quick_setup").
|
|
WithIDs(column.NewColumnInt64("id", []int64{1, 2, 3})))
|
|
if err != nil {
|
|
// handle error
|
|
}
|
|
|
|
fmt.Println(rs.GetColumn("id"))
|
|
}
|
|
|
|
func ExampleClient_Query() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
rs, err := cli.Query(ctx, milvusclient.NewQueryOption("quick_setup").
|
|
WithFilter("emb_type == 3").
|
|
WithOutputFields("id", "emb_type"))
|
|
if err != nil {
|
|
// handle error
|
|
}
|
|
|
|
fmt.Println(rs.GetColumn("id"))
|
|
}
|
|
|
|
func ExampleClient_Query_jsonExpr_notnull() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
rs, err := cli.Query(ctx, milvusclient.NewQueryOption("my_json_collection").
|
|
WithFilter("metadata is not null").
|
|
WithOutputFields("metadata", "pk"))
|
|
if err != nil {
|
|
// handle error
|
|
}
|
|
|
|
fmt.Println(rs.GetColumn("pk"))
|
|
fmt.Println(rs.GetColumn("metadata"))
|
|
}
|
|
|
|
func ExampleClient_Query_jsonExpr_leafChild() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
rs, err := cli.Query(ctx, milvusclient.NewQueryOption("my_json_collection").
|
|
WithFilter(`metadata["product_info"]["category"] == "electronics"`).
|
|
WithOutputFields("metadata", "pk"))
|
|
if err != nil {
|
|
// handle error
|
|
}
|
|
|
|
fmt.Println(rs.GetColumn("pk"))
|
|
fmt.Println(rs.GetColumn("metadata"))
|
|
}
|
|
|
|
func ExampleClient_HybridSearch() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
milvusAddr := "127.0.0.1:19530"
|
|
token := "root:Milvus"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
APIKey: token,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
|
|
defer cli.Close(ctx)
|
|
|
|
queryVector := []float32{0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592}
|
|
sparseVector, _ := entity.NewSliceSparseEmbedding([]uint32{1, 21, 100}, []float32{0.1, 0.2, 0.3})
|
|
|
|
resultSets, err := cli.HybridSearch(ctx, milvusclient.NewHybridSearchOption(
|
|
"quick_setup",
|
|
3,
|
|
milvusclient.NewAnnRequest("dense_vector", 10, entity.FloatVector(queryVector)),
|
|
milvusclient.NewAnnRequest("sparse_vector", 10, sparseVector),
|
|
).WithReranker(milvusclient.NewRRFReranker()))
|
|
if err != nil {
|
|
log.Fatal("failed to perform basic ANN search collection: ", err.Error())
|
|
}
|
|
|
|
for _, resultSet := range resultSets {
|
|
log.Println("IDs: ", resultSet.IDs)
|
|
log.Println("Scores: ", resultSet.Scores)
|
|
}
|
|
}
|
|
|
|
func ExampleClient_Search_textMatch() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
collectionName := "text_min_match"
|
|
titleField := "title"
|
|
textField := "document_text"
|
|
titleSparse := "title_sparse_vector"
|
|
textSparse := "text_sparse_vector"
|
|
milvusAddr := "127.0.0.1:19530"
|
|
|
|
cli, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
|
|
Address: milvusAddr,
|
|
})
|
|
if err != nil {
|
|
log.Fatal("failed to connect to milvus server: ", err.Error())
|
|
}
|
|
defer cli.Close(ctx)
|
|
|
|
_ = cli.DropCollection(ctx, milvusclient.NewDropCollectionOption(collectionName))
|
|
|
|
schema := entity.NewSchema().
|
|
WithField(entity.NewField().WithName("id").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true)).
|
|
WithField(entity.NewField().WithName(titleField).WithDataType(entity.FieldTypeVarChar).WithMaxLength(512).WithEnableAnalyzer(true).WithEnableMatch(true)).
|
|
WithField(entity.NewField().WithName(textField).WithDataType(entity.FieldTypeVarChar).WithMaxLength(2048).WithEnableAnalyzer(true).WithEnableMatch(true)).
|
|
WithField(entity.NewField().WithName(titleSparse).WithDataType(entity.FieldTypeSparseVector)).
|
|
WithField(entity.NewField().WithName(textSparse).WithDataType(entity.FieldTypeSparseVector)).
|
|
WithFunction(entity.NewFunction().WithName("title_bm25_func").WithType(entity.FunctionTypeBM25).WithInputFields(titleField).WithOutputFields(titleSparse)).
|
|
WithFunction(entity.NewFunction().WithName("text_bm25_func").WithType(entity.FunctionTypeBM25).WithInputFields(textField).WithOutputFields(textSparse))
|
|
|
|
idxOpts := []milvusclient.CreateIndexOption{
|
|
milvusclient.NewCreateIndexOption(collectionName, titleField, index.NewInvertedIndex()),
|
|
milvusclient.NewCreateIndexOption(collectionName, textField, index.NewInvertedIndex()),
|
|
milvusclient.NewCreateIndexOption(collectionName, titleSparse, index.NewSparseInvertedIndex(entity.BM25, 0.2)),
|
|
milvusclient.NewCreateIndexOption(collectionName, textSparse, index.NewSparseInvertedIndex(entity.BM25, 0.2)),
|
|
}
|
|
|
|
err = cli.CreateCollection(ctx, milvusclient.NewCreateCollectionOption(collectionName, schema).WithIndexOptions(idxOpts...))
|
|
if err != nil {
|
|
log.Fatal("failed to create collection: ", err.Error())
|
|
}
|
|
|
|
_, err = cli.Insert(ctx, milvusclient.NewColumnBasedInsertOption(collectionName).
|
|
WithVarcharColumn(titleField, []string{
|
|
"History of AI",
|
|
"Alan Turing Biography",
|
|
"Machine Learning Overview",
|
|
}).
|
|
WithVarcharColumn(textField, []string{
|
|
"Artificial intelligence was founded in 1956 by computer scientists.",
|
|
"Alan Turing proposed early concepts of AI and machine learning.",
|
|
"Machine learning is a subset of artificial intelligence.",
|
|
}))
|
|
if err != nil {
|
|
log.Fatal("failed to insert data: ", err.Error())
|
|
}
|
|
|
|
task, err := cli.LoadCollection(ctx, milvusclient.NewLoadCollectionOption(collectionName))
|
|
if err != nil {
|
|
log.Fatal("failed to load collection: ", err.Error())
|
|
}
|
|
_ = task.Await(ctx)
|
|
|
|
q := "artificial intelligence"
|
|
expr := "text_match(" + titleField + ", \"" + q + "\", minimum_should_match=2) OR text_match(" + textField + ", \"" + q + "\", minimum_should_match=2)"
|
|
|
|
boost := entity.NewFunction().
|
|
WithName("title_boost").
|
|
WithType(entity.FunctionTypeRerank).
|
|
WithParam("reranker", "boost").
|
|
WithParam("filter", "text_match("+titleField+", \""+q+"\", minimum_should_match=2)").
|
|
WithParam("weight", "2.0")
|
|
|
|
vectors := []entity.Vector{entity.Text(q)}
|
|
rs, err := cli.Search(ctx, milvusclient.NewSearchOption(collectionName, 5, vectors).
|
|
WithANNSField(textSparse).
|
|
WithFilter(expr).
|
|
WithOutputFields("id", titleField, textField).
|
|
WithFunctionReranker(boost))
|
|
if err != nil {
|
|
log.Fatal("failed to search: ", err.Error())
|
|
}
|
|
|
|
for _, r := range rs {
|
|
_ = r.ResultCount
|
|
}
|
|
}
|