milvus/internal/util/function/cohere_embedding_provider_test.go
junjiejiangjjj bb7df40fc1
feat: Http interface supports rerank (#41486)
https://github.com/milvus-io/milvus/issues/35856

Signed-off-by: junjie.jiang <junjie.jiang@zilliz.com>
2025-04-28 23:02:50 +08:00

341 lines
11 KiB
Go

/*
* # Licensed to the LF AI & Data foundation under one
* # or more contributor license agreements. See the NOTICE file
* # distributed with this work for additional information
* # regarding copyright ownership. The ASF licenses this file
* # to you under the Apache License, Version 2.0 (the
* # "License"); you may not use this file except in compliance
* # with the License. You may obtain a copy of the License at
* #
* # http://www.apache.org/licenses/LICENSE-2.0
* #
* # Unless required by applicable law or agreed to in writing, software
* # distributed under the License is distributed on an "AS IS" BASIS,
* # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* # See the License for the specific language governing permissions and
* # limitations under the License.
*/
package function
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/util/credentials"
"github.com/milvus-io/milvus/internal/util/function/models/cohere"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
func TestCohereTextEmbeddingProvider(t *testing.T) {
suite.Run(t, new(CohereTextEmbeddingProviderSuite))
}
type CohereTextEmbeddingProviderSuite struct {
suite.Suite
schema *schemapb.CollectionSchema
providers []string
}
func (s *CohereTextEmbeddingProviderSuite) SetupTest() {
paramtable.Init()
paramtable.Get().CredentialCfg.Credential.GetFunc = func() map[string]string {
return map[string]string{
"mock.apikey": "mock",
}
}
s.schema = &schemapb.CollectionSchema{
Name: "test",
Fields: []*schemapb.FieldSchema{
{FieldID: 100, Name: "int64", DataType: schemapb.DataType_Int64},
{FieldID: 101, Name: "text", DataType: schemapb.DataType_VarChar},
{
FieldID: 102, Name: "vector", DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "4"},
},
},
},
}
s.providers = []string{cohereProvider}
}
func createCohereProvider(url string, schema *schemapb.FieldSchema, providerName string) (textEmbeddingProvider, error) {
functionSchema := &schemapb.FunctionSchema{
Name: "test",
Type: schemapb.FunctionType_TextEmbedding,
InputFieldNames: []string{"text"},
OutputFieldNames: []string{"vector"},
InputFieldIds: []int64{101},
OutputFieldIds: []int64{102},
Params: []*commonpb.KeyValuePair{
{Key: modelNameParamKey, Value: TestModel},
{Key: credentialParamKey, Value: "mock"},
},
}
switch providerName {
case cohereProvider:
return NewCohereEmbeddingProvider(schema, functionSchema, map[string]string{embeddingURLParamKey: url}, credentials.NewCredentials(map[string]string{"mock.apikey": "mock"}))
default:
return nil, errors.New("Unknow provider")
}
}
func (s *CohereTextEmbeddingProviderSuite) TestEmbedding() {
ts := CreateCohereEmbeddingServer[float32]()
defer ts.Close()
for _, providerName := range s.providers {
provider, err := createCohereProvider(ts.URL, s.schema.Fields[2], providerName)
s.True(provider.MaxBatch() > 0)
s.Equal(provider.FieldDim(), int64(4))
s.NoError(err)
s.Equal(float32Embd, provider.(*CohereEmbeddingProvider).embdType)
s.Equal("float", provider.(*CohereEmbeddingProvider).outputType)
{
data := []string{"sentence"}
r, err2 := provider.CallEmbedding(data, InsertMode)
ret := r.([][]float32)
s.NoError(err2)
s.Equal(1, len(ret))
s.Equal(4, len(ret[0]))
s.Equal([]float32{0.0, 1.0, 2.0, 3.0}, ret[0])
}
{
data := []string{"sentence 1", "sentence 2", "sentence 3"}
ret, _ := provider.CallEmbedding(data, SearchMode)
s.Equal([][]float32{{0.0, 1.0, 2.0, 3.0}, {1.0, 2.0, 3.0, 4.0}, {2.0, 3.0, 4.0, 5.0}}, ret)
}
}
}
func (s *CohereTextEmbeddingProviderSuite) TestEmbeddingInt8() {
ts := CreateCohereEmbeddingServer[int8]()
defer ts.Close()
int8VecField := &schemapb.FieldSchema{
FieldID: 102, Name: "vector", DataType: schemapb.DataType_Int8Vector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "4"},
},
}
for _, providerName := range s.providers {
provider, err := createCohereProvider(ts.URL, int8VecField, providerName)
s.True(provider.MaxBatch() > 0)
s.Equal(provider.FieldDim(), int64(4))
s.NoError(err)
s.Equal(int8Embd, provider.(*CohereEmbeddingProvider).embdType)
s.Equal("int8", provider.(*CohereEmbeddingProvider).outputType)
{
data := []string{"sentence"}
r, err2 := provider.CallEmbedding(data, InsertMode)
s.NoError(err2)
ret := r.([][]int8)
s.Equal(1, len(ret))
s.Equal(4, len(ret[0]))
s.Equal([]int8{0, 1, 2, 3}, ret[0])
}
{
data := []string{"sentence 1", "sentence 2", "sentence 3"}
ret, _ := provider.CallEmbedding(data, SearchMode)
s.Equal([][]int8{{0, 1, 2, 3}, {1, 2, 3, 4}, {2, 3, 4, 5}}, ret)
}
}
}
func (s *CohereTextEmbeddingProviderSuite) TestEmbeddingDimNotMatch() {
{
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var res cohere.EmbeddingResponse
res.Embeddings = cohere.Embeddings{
Float: [][]float32{{1.0, 1.0, 1.0, 1.0}, {1.0, 1.0}},
}
w.WriteHeader(http.StatusOK)
data, _ := json.Marshal(res)
w.Write(data)
}))
defer ts.Close()
for _, providerName := range s.providers {
provider, err := createCohereProvider(ts.URL, s.schema.Fields[2], providerName)
s.NoError(err)
// embedding dim not match
data := []string{"sentence", "sentence"}
_, err2 := provider.CallEmbedding(data, InsertMode)
s.Error(err2)
}
}
{
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var res cohere.EmbeddingResponse
res.Embeddings = cohere.Embeddings{
Int8: [][]int8{{1, 1, 1, 1}, {1, 1}},
}
w.WriteHeader(http.StatusOK)
data, _ := json.Marshal(res)
w.Write(data)
}))
defer ts.Close()
fieldSchema := &schemapb.FieldSchema{
FieldID: 102, Name: "vector", DataType: schemapb.DataType_Int8Vector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "4"},
},
}
provider, err := createCohereProvider(ts.URL, fieldSchema, cohereProvider)
s.NoError(err)
// embedding dim not match
data := []string{"sentence", "sentence"}
_, err2 := provider.CallEmbedding(data, InsertMode)
s.Error(err2)
}
}
func (s *CohereTextEmbeddingProviderSuite) TestEmbeddingNumberNotMatch() {
{
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var res cohere.EmbeddingResponse
res.Embeddings = cohere.Embeddings{
Float: [][]float32{{1.0, 1.0, 1.0, 1.0}},
}
w.WriteHeader(http.StatusOK)
data, _ := json.Marshal(res)
w.Write(data)
}))
defer ts.Close()
for _, providerName := range s.providers {
provider, err := createCohereProvider(ts.URL, s.schema.Fields[2], providerName)
s.NoError(err)
// embedding dim not match
data := []string{"sentence", "sentence2"}
_, err2 := provider.CallEmbedding(data, InsertMode)
s.Error(err2)
}
}
{
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var res cohere.EmbeddingResponse
res.Embeddings = cohere.Embeddings{
Int8: [][]int8{{1, 1, 1, 1}},
}
w.WriteHeader(http.StatusOK)
data, _ := json.Marshal(res)
w.Write(data)
}))
defer ts.Close()
fieldSchema := &schemapb.FieldSchema{
FieldID: 102, Name: "vector", DataType: schemapb.DataType_Int8Vector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "4"},
},
}
provider, err := createCohereProvider(ts.URL, fieldSchema, cohereProvider)
s.NoError(err)
// embedding dim not match
data := []string{"sentence", "sentence2"}
_, err2 := provider.CallEmbedding(data, InsertMode)
s.Error(err2)
}
}
func (s *CohereTextEmbeddingProviderSuite) TestNewCohereProvider() {
functionSchema := &schemapb.FunctionSchema{
Name: "test",
Type: schemapb.FunctionType_TextEmbedding,
InputFieldNames: []string{"text"},
OutputFieldNames: []string{"vector"},
InputFieldIds: []int64{101},
OutputFieldIds: []int64{102},
Params: []*commonpb.KeyValuePair{
{Key: modelNameParamKey, Value: TestModel},
{Key: credentialParamKey, Value: "mock"},
},
}
provider, err := NewCohereEmbeddingProvider(s.schema.Fields[2], functionSchema, map[string]string{}, credentials.NewCredentials(map[string]string{"mock.apikey": "mock"}))
s.NoError(err)
s.Equal(provider.truncate, "END")
functionSchema.Params = append(functionSchema.Params, &commonpb.KeyValuePair{Key: truncateParamKey, Value: "START"})
provider, err = NewCohereEmbeddingProvider(s.schema.Fields[2], functionSchema, map[string]string{}, credentials.NewCredentials(map[string]string{"mock.apikey": "mock"}))
s.NoError(err)
s.Equal(provider.truncate, "START")
// Invalid truncateParam
functionSchema.Params[2].Value = "Unknow"
_, err = NewCohereEmbeddingProvider(s.schema.Fields[2], functionSchema, map[string]string{}, credentials.NewCredentials(map[string]string{"mock.apikey": "mock"}))
s.Error(err)
}
func (s *CohereTextEmbeddingProviderSuite) TestGetInputType() {
functionSchema := &schemapb.FunctionSchema{
Name: "test",
Type: schemapb.FunctionType_TextEmbedding,
InputFieldNames: []string{"text"},
OutputFieldNames: []string{"vector"},
InputFieldIds: []int64{101},
OutputFieldIds: []int64{102},
Params: []*commonpb.KeyValuePair{
{Key: modelNameParamKey, Value: "model-v2.0"},
{Key: credentialParamKey, Value: "mock"},
},
}
provider, err := NewCohereEmbeddingProvider(s.schema.Fields[2], functionSchema, map[string]string{}, credentials.NewCredentials(map[string]string{"mock.apikey": "mock"}))
s.NoError(err)
s.Equal(provider.getInputType(InsertMode), "")
s.Equal(provider.getInputType(SearchMode), "")
functionSchema.Params[0].Value = "model-v3.0"
provider, err = NewCohereEmbeddingProvider(s.schema.Fields[2], functionSchema, map[string]string{}, credentials.NewCredentials(map[string]string{"mock.apikey": "mock"}))
s.NoError(err)
s.Equal(provider.getInputType(InsertMode), "search_document")
s.Equal(provider.getInputType(SearchMode), "search_query")
}
func (s *CohereTextEmbeddingProviderSuite) TestCreateCohereEmbeddingClient() {
_, err := createCohereEmbeddingClient("", "")
s.Error(err)
}
func (s *CohereTextEmbeddingProviderSuite) TestRuntimeDimNotMatch() {
ts := CreateCohereEmbeddingServer[float32]()
defer ts.Close()
s.schema = &schemapb.CollectionSchema{
Name: "test",
Fields: []*schemapb.FieldSchema{
{FieldID: 100, Name: "int64", DataType: schemapb.DataType_Int64},
{FieldID: 101, Name: "text", DataType: schemapb.DataType_VarChar},
{
FieldID: 102, Name: "vector", DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "5"},
},
},
},
}
provider, err := createCohereProvider(ts.URL, s.schema.Fields[2], cohereProvider)
s.NoError(err)
data := []string{"sentence"}
_, err2 := provider.CallEmbedding(data, InsertMode)
s.Error(err2)
}