mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
https://github.com/milvus-io/milvus/issues/42589 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Semantic Highlighting Feature **Core Invariant**: Semantic highlighting operates on a per-field basis with independent text processing through an external Zilliz highlight provider. The implementation maintains field ID to field name mapping and correlates highlight results back to original field outputs. **What is Added**: This PR introduces semantic highlighting capability for search results alongside the existing lexical highlighting. The feature consists of: - New `SemanticHighlight` orchestrator that validates queries/input fields against collection schema, instantiates a Zilliz-based provider, and batches text processing across multiple queries - New `SemanticHighlighter` proxy wrapper implementing the `Highlighter` interface for search pipeline integration - New `semanticHighlightOperator` that processes search results by delegating per-field text processing to the provider and attaching correlated `HighlightResult` data to search outputs - New gRPC service definition (`HighlightService`) and `ZillizClient.Highlight()` method for external provider communication **No Data Loss or Regression**: The change is purely additive without modifying existing logic: - Lexical highlighting path remains unchanged (separate switch case in `createHighlightTask`) - New `HighlightResults` field is only populated when semantic highlighting is explicitly requested via `HighlightType_Semantic` enum value - Gracefully handles missing fields by returning explicit errors rather than silent failures - Pipeline operator integration follows existing patterns and only processes when semantic highlighter is instantiated **Why This Design**: Semantic highlighting is routed through the same pipeline operator pattern as lexical highlighting, ensuring consistent integration into search workflows. The per-field model allows flexible highlighting across different text columns and batch processing ensures efficient handling of multiple queries with configurable provider constraints. <!-- end of auto-generated comment: release notes by coderabbit.ai --> Signed-off-by: junjie.jiang <junjie.jiang@zilliz.com>
546 lines
16 KiB
Go
546 lines
16 KiB
Go
/*
|
|
* # Licensed to the LF AI & Data foundation under one
|
|
* # or more contributor license agreements. See the NOTICE file
|
|
* # distributed with this work for additional information
|
|
* # regarding copyright ownership. The ASF licenses this file
|
|
* # to you under the Apache License, Version 2.0 (the
|
|
* # "License"); you may not use this file except in compliance
|
|
* # with the License. You may obtain a copy of the License at
|
|
* #
|
|
* # http://www.apache.org/licenses/LICENSE-2.0
|
|
* #
|
|
* # Unless required by applicable law or agreed to in writing, software
|
|
* # distributed under the License is distributed on an "AS IS" BASIS,
|
|
* # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* # See the License for the specific language governing permissions and
|
|
* # limitations under the License.
|
|
*/
|
|
|
|
package highlight
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"testing"
|
|
|
|
"github.com/bytedance/mockey"
|
|
"github.com/cockroachdb/errors"
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/util/function/models"
|
|
"github.com/milvus-io/milvus/internal/util/function/models/zilliz"
|
|
)
|
|
|
|
func TestSemanticHighlight(t *testing.T) {
|
|
suite.Run(t, new(SemanticHighlightSuite))
|
|
}
|
|
|
|
type SemanticHighlightSuite struct {
|
|
suite.Suite
|
|
schema *schemapb.CollectionSchema
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) SetupTest() {
|
|
s.schema = &schemapb.CollectionSchema{
|
|
Name: "test_collection",
|
|
Fields: []*schemapb.FieldSchema{
|
|
{FieldID: 100, Name: "id", DataType: schemapb.DataType_Int64},
|
|
{FieldID: 101, Name: "title", DataType: schemapb.DataType_VarChar},
|
|
{FieldID: 102, Name: "content", DataType: schemapb.DataType_Text},
|
|
{FieldID: 103, Name: "description", DataType: schemapb.DataType_VarChar},
|
|
{FieldID: 104, Name: "embedding", DataType: schemapb.DataType_FloatVector},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestNewSemanticHighlight_Success() {
|
|
queries := []string{"machine learning", "artificial intelligence"}
|
|
inputFields := []string{"title", "content"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
mock1 := mockey.Mock(zilliz.NewZilliClient).To(func(_ string, _ string, _ string, _ map[string]string) (*zilliz.ZillizClient, error) {
|
|
return &zilliz.ZillizClient{}, nil
|
|
}).Build()
|
|
defer mock1.UnPatch()
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
|
|
s.NoError(err)
|
|
s.NotNil(highlight)
|
|
s.Equal([]int64{101, 102}, highlight.FieldIDs())
|
|
s.Equal(queries, highlight.queries)
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestNewSemanticHighlight_MissingQueries() {
|
|
inputFields := []string{"title"}
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlight)
|
|
s.Contains(err.Error(), "queries is required")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestNewSemanticHighlight_MissingInputFields() {
|
|
queries := []string{"machine learning"}
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlight)
|
|
s.Contains(err.Error(), "input_field is required")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestNewSemanticHighlight_InvalidQueriesJSON() {
|
|
inputFields := []string{"title"}
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: "invalid json"},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlight)
|
|
s.Contains(err.Error(), "Parse queries failed")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestNewSemanticHighlight_InvalidInputFieldsJSON() {
|
|
queries := []string{"machine learning"}
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: "invalid json"},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlight)
|
|
s.Contains(err.Error(), "Parse input_field failed")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestNewSemanticHighlight_FieldNotFound() {
|
|
queries := []string{"machine learning"}
|
|
inputFields := []string{"nonexistent_field"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlight)
|
|
s.Contains(err.Error(), "not found")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestNewSemanticHighlight_InvalidFieldType() {
|
|
queries := []string{"machine learning"}
|
|
inputFields := []string{"embedding"} // FloatVector, not VarChar or Text
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlight)
|
|
s.Contains(err.Error(), "is not a VarChar or Text field")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestProcessOneQuery_Success() {
|
|
queries := []string{"machine learning"}
|
|
inputFields := []string{"title"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
expectedHighlights := [][]string{
|
|
{"machine learning"},
|
|
{"machine"},
|
|
}
|
|
|
|
mock1 := mockey.Mock(zilliz.NewZilliClient).To(func(_ string, _ string, _ string, _ map[string]string) (*zilliz.ZillizClient, error) {
|
|
return &zilliz.ZillizClient{}, nil
|
|
}).Build()
|
|
defer mock1.UnPatch()
|
|
|
|
mock2 := mockey.Mock((*zilliz.ZillizClient).Highlight).To(func(_ *zilliz.ZillizClient, _ context.Context, _ string, _ []string, _ map[string]string) ([][]string, error) {
|
|
return expectedHighlights, nil
|
|
}).Build()
|
|
defer mock2.UnPatch()
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
s.NoError(err)
|
|
|
|
ctx := context.Background()
|
|
data := []string{"Machine learning is a subset of AI", "Machine learning is powerful"}
|
|
highlights, err := highlight.processOneQuery(ctx, "machine learning", data, nil)
|
|
|
|
s.NoError(err)
|
|
s.Equal(expectedHighlights, highlights)
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestProcessOneQuery_Error() {
|
|
queries := []string{"test query"}
|
|
inputFields := []string{"title"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
expectedError := errors.New("highlight service error")
|
|
|
|
mock1 := mockey.Mock(zilliz.NewZilliClient).To(func(_ string, _ string, _ string, _ map[string]string) (*zilliz.ZillizClient, error) {
|
|
return &zilliz.ZillizClient{}, nil
|
|
}).Build()
|
|
defer mock1.UnPatch()
|
|
|
|
mock2 := mockey.Mock((*zilliz.ZillizClient).Highlight).To(func(_ *zilliz.ZillizClient, _ context.Context, _ string, _ []string, _ map[string]string) ([][]string, error) {
|
|
return nil, expectedError
|
|
}).Build()
|
|
defer mock2.UnPatch()
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
s.NoError(err)
|
|
|
|
ctx := context.Background()
|
|
data := []string{"test document"}
|
|
highlights, err := highlight.processOneQuery(ctx, "test query", data, nil)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlights)
|
|
s.Equal(expectedError, err)
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestProcess_Success() {
|
|
queries := []string{"machine learning", "deep learning"}
|
|
inputFields := []string{"title"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
expectedHighlights1 := [][]string{
|
|
{"machine learning", "deep learning"},
|
|
}
|
|
expectedHighlights2 := [][]string{
|
|
{"deep learning", "machine learning"},
|
|
}
|
|
|
|
callCount := 0
|
|
mock1 := mockey.Mock(zilliz.NewZilliClient).To(func(_ string, _ string, _ string, _ map[string]string) (*zilliz.ZillizClient, error) {
|
|
return &zilliz.ZillizClient{}, nil
|
|
}).Build()
|
|
defer mock1.UnPatch()
|
|
|
|
mock2 := mockey.Mock((*zilliz.ZillizClient).Highlight).To(func(_ *zilliz.ZillizClient, _ context.Context, query string, _ []string, _ map[string]string) ([][]string, error) {
|
|
callCount++
|
|
if query == "machine learning" {
|
|
return expectedHighlights1, nil
|
|
}
|
|
return expectedHighlights2, nil
|
|
}).Build()
|
|
defer mock2.UnPatch()
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
s.NoError(err)
|
|
|
|
ctx := context.Background()
|
|
data := []string{"Machine learning document", "Deep learning document"}
|
|
highlights, err := highlight.Process(ctx, []int64{1, 1}, data, nil)
|
|
|
|
s.NoError(err)
|
|
s.NotNil(highlights)
|
|
s.Equal(2, callCount, "Should call highlight twice for two queries")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestProcess_NqMismatch() {
|
|
queries := []string{"machine learning"}
|
|
inputFields := []string{"title"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
mock1 := mockey.Mock(zilliz.NewZilliClient).To(func(_ string, _ string, _ string, _ map[string]string) (*zilliz.ZillizClient, error) {
|
|
return &zilliz.ZillizClient{}, nil
|
|
}).Build()
|
|
defer mock1.UnPatch()
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
s.NoError(err)
|
|
|
|
ctx := context.Background()
|
|
data := []string{"test document"}
|
|
highlights, err := highlight.Process(ctx, []int64{1, 1, 1}, data, nil) // nq=3 but queries has only 1
|
|
|
|
s.Error(err)
|
|
s.Nil(highlights)
|
|
s.Contains(err.Error(), "nq must equal to queries size")
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestProcess_ProviderError() {
|
|
queries := []string{"test query"}
|
|
inputFields := []string{"title"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
expectedError := errors.New("provider error")
|
|
|
|
mock1 := mockey.Mock(zilliz.NewZilliClient).To(func(_ string, _ string, _ string, _ map[string]string) (*zilliz.ZillizClient, error) {
|
|
return &zilliz.ZillizClient{}, nil
|
|
}).Build()
|
|
defer mock1.UnPatch()
|
|
|
|
mock2 := mockey.Mock((*zilliz.ZillizClient).Highlight).To(func(_ *zilliz.ZillizClient, _ context.Context, _ string, _ []string, _ map[string]string) ([][]string, error) {
|
|
return nil, expectedError
|
|
}).Build()
|
|
defer mock2.UnPatch()
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
s.NoError(err)
|
|
|
|
ctx := context.Background()
|
|
data := []string{"test document"}
|
|
highlights, err := highlight.Process(ctx, []int64{1}, data, nil)
|
|
|
|
s.Error(err)
|
|
s.Nil(highlights)
|
|
s.Equal(expectedError, err)
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestProcess_EmptyData() {
|
|
queries := []string{"test query", "test query 2", "test query 3"}
|
|
inputFields := []string{"title"}
|
|
|
|
queriesJSON, _ := json.Marshal(queries)
|
|
inputFieldsJSON, _ := json.Marshal(inputFields)
|
|
|
|
mock1 := mockey.Mock(zilliz.NewZilliClient).To(func(_ string, _ string, _ string, _ map[string]string) (*zilliz.ZillizClient, error) {
|
|
return &zilliz.ZillizClient{}, nil
|
|
}).Build()
|
|
defer mock1.UnPatch()
|
|
|
|
mock2 := mockey.Mock((*zilliz.ZillizClient).Highlight).To(func(_ *zilliz.ZillizClient, _ context.Context, _ string, texts []string, _ map[string]string) ([][]string, error) {
|
|
return [][]string{texts}, nil
|
|
}).Build()
|
|
defer mock2.UnPatch()
|
|
|
|
params := []*commonpb.KeyValuePair{
|
|
{Key: queryKeyName, Value: string(queriesJSON)},
|
|
{Key: inputFieldKeyName, Value: string(inputFieldsJSON)},
|
|
{Key: models.ModelDeploymentIDKey, Value: "test-deployment"},
|
|
}
|
|
|
|
conf := map[string]string{
|
|
"endpoint": "localhost:8080",
|
|
}
|
|
|
|
extraInfo := &models.ModelExtraInfo{
|
|
ClusterID: "test-cluster",
|
|
DBName: "test-db",
|
|
}
|
|
|
|
highlight, err := NewSemanticHighlight(s.schema, params, conf, extraInfo)
|
|
s.NoError(err)
|
|
|
|
ctx := context.Background()
|
|
data := []string{}
|
|
highlights, err := highlight.Process(ctx, []int64{0, 0, 0}, data, nil)
|
|
|
|
s.NoError(err)
|
|
s.NotNil(highlights)
|
|
|
|
data2 := []string{"test document"}
|
|
|
|
highlights2, err := highlight.Process(ctx, []int64{0, 1, 0}, data2, nil)
|
|
|
|
s.NoError(err)
|
|
s.Equal(1, len(highlights2))
|
|
s.Equal([][]string{{"test document"}}, highlights2)
|
|
}
|
|
|
|
func (s *SemanticHighlightSuite) TestBaseSemanticHighlightProvider_MaxBatch() {
|
|
provider := &baseSemanticHighlightProvider{batchSize: 128}
|
|
s.Equal(128, provider.maxBatch())
|
|
|
|
provider2 := &baseSemanticHighlightProvider{batchSize: 32}
|
|
s.Equal(32, provider2.maxBatch())
|
|
}
|