milvus/tests/python_client/text_embedding/test_text_embedding_function.py

import random
import uuid
from pymilvus import (
    FieldSchema,
    CollectionSchema,
    DataType,
    Function,
    FunctionType,
    AnnSearchRequest,
    WeightedRanker,
)
from pymilvus.bulk_writer import BulkFileType, RemoteBulkWriter
from common.common_type import CheckTasks
from common import common_func as cf
from utils.util_log import test_log as log
from base.client_base import TestcaseBase
import numpy as np
import time
import pytest
import pandas as pd
from faker import Faker
import requests
import os
from numpy import dot
from numpy.linalg import norm

fake_zh = Faker("zh_CN")
fake_jp = Faker("ja_JP")
fake_en = Faker("en_US")

pd.set_option("expand_frame_repr", False)

prefix = "text_embedding_collection"


class TestCreateCollectionWithTextEmbedding(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test create collection with text embedding function
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_create_collection_with_text_embedding(self, model_name):
        """
        target: test create collection with text embedding function
        method: create collection with text embedding function
        expected: create collection successfully
        """
        dim = 1024  # dimension for bge-m3 model
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )
        res, _ = collection_w.describe()
        assert len(res["functions"]) == 1

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_create_collection_with_text_embedding_twice_with_same_schema(
            self, model_name
    ):
        """
        target: test create collection with text embedding twice with same schema
        method: create collection with text embedding function, then create again
        expected: create collection successfully and create again successfully
        """
        dim = 1024  # dimension for bge-m3 model
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        c_name = cf.gen_unique_str(prefix)
        self.init_collection_wrap(name=c_name, schema=schema)
        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
        res, _ = collection_w.describe()
        assert len(res["functions"]) == 1

    def test_create_collection_with_text_embedding_with_multi_models(self):
        """
        target: test create collection with text embedding twice with same schema
        method: create collection with text embedding function, then create again
        expected: create collection successfully and create again successfully
        """
        bge_dim = 1024
        bce_dim = 768
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
        ]
        model_names = ["BAAI/bge-m3", "netease-youdao/bce-embedding-base_v1"]
        for model_name in model_names:
            field_name = f"dense_{model_name.replace('/', '_').replace('-', '_').replace('.', '_')}"
            dim = bge_dim if "bge" in model_name else bce_dim
            field = FieldSchema(name=field_name, dtype=DataType.FLOAT_VECTOR, dim=dim)
            fields.append(field)

        schema = CollectionSchema(fields=fields, description="test collection")

        for model_name in model_names:
            field_name = f"dense_{model_name.replace('/', '_').replace('-', '_').replace('.', '_')}"
            log.info(f"model_name: {model_name}, field_name: {field_name}")
            text_embedding_function = Function(
                name=f"siliconflow-{model_name}",
                function_type=FunctionType.TEXTEMBEDDING,
                input_field_names=["document"],
                output_field_names=field_name,
                params={
                    "provider": "siliconflow",
                    "model_name": model_name,
                },
            )
            schema.add_function(text_embedding_function)

        c_name = cf.gen_unique_str(prefix)

        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
        res, _ = collection_w.describe()
        log.info(f"collection describe: {res}")
        assert len(res["functions"]) == 2
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }

        for model_name in model_names:
            field_name = f"dense_{model_name.replace('/', '_').replace('-', '_').replace('.', '_')}"

            collection_w.create_index(field_name=field_name, index_params=index_params)
        collection_w.load()

        for model_name in model_names:
            field_name = f"dense_{model_name.replace('/', '_').replace('-', '_').replace('.', '_')}"
            res, _ = collection_w.query(
                expr="id >= 0",
                output_fields=[field_name],
            )
            for row in res:
                assert (
                    len(row[field_name]) == bge_dim if "bge" in model_name else bce_dim
                )


class TestCreateCollectionWithTextEmbeddingNegative(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test create collection with text embedding negative
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["unsupported_model"])
    def test_create_collection_with_text_embedding_unsupported_model(self, model_name):
        """
        target: test create collection with text embedding with unsupported model
        method: create collection with text embedding function using unsupported model
        expected: create collection failed
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        self.init_collection_wrap(
            name=cf.gen_unique_str(prefix),
            schema=schema,
            check_task=CheckTasks.err_res,
            check_items={"err_code": 65535, "err_msg": "Unsupported model"},
        )

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_create_collection_with_text_embedding_unmatched_dim(self, model_name):
        """
        target: test create collection with text embedding with unsupported model
        method: create collection with text embedding function using unsupported model
        expected: create collection failed
        """
        dim = 512
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        self.init_collection_wrap(
            name=cf.gen_unique_str(prefix),
            schema=schema,
            check_task=CheckTasks.err_res,
            check_items={
                "err_code": 65535,
                "err_msg": f"The required embedding dim is [{dim}], but the embedding obtained from the model is [1024]",
            },
        )

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_create_collection_with_text_embedding_invalid_api_key(self, model_name):
        """
        target: test create collection with text embedding with invalid api key
        method: create collection with text embedding function using invalid api key
        expected: create collection failed
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
                "api_key": "invalid_api_key",
            },
        )
        schema.add_function(text_embedding_function)

        self.init_collection_wrap(
            name=cf.gen_unique_str(prefix),
            schema=schema,
            check_task=CheckTasks.err_res,
            check_items={"err_code": 65535, "err_msg": "Invalid"},
        )


class TestInsertWithTextEmbedding(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test insert with text embedding
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_insert_with_text_embedding(self, model_name):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            # For INT8_VECTOR, the data might be returned as a binary array
            # We need to check if there's data, but not necessarily the exact dimension
            if isinstance(row["dense"], bytes):
                # For binary data, just verify it's not empty
                assert len(row["dense"]) > 0, "Vector should not be empty"
            else:
                # For regular vectors, check the exact dimension
                assert len(row["dense"]) == dim


class TestALLProviderWithTextEmbedding(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test all provider with text embedding
    ******************************************************************
    """

    @pytest.mark.parametrize(
        "model_name",
        ["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"],
    )
    def test_insert_with_openai_text_embedding(self, model_name):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        dim_map = {
            "text-embedding-ada-002": 1536,
            "text-embedding-3-small": 1536,
            "text-embedding-3-large": 1024,
        }
        dim = dim_map.get(model_name)
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")
        params = {
            "provider": "openai",
            "model_name": model_name,
            "user": f"{uuid.uuid4().hex}",
        }
        if model_name == "text-embedding-3-large":
            params["dim"] = dim
        text_embedding_function = Function(
            name="openai",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params=params,
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize(
        "model_name", ["text-embedding-v1", "text-embedding-v2", "text-embedding-v3"]
    )
    def test_insert_with_dashscope_text_embedding(self, model_name):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        dim_map = {
            "text-embedding-v1": 1536,
            "text-embedding-v2": 1536,
            "text-embedding-v3": 768,
        }
        dim = dim_map.get(model_name)
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="dashscope",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "dashscope",
                "model_name": model_name,
                "dim": dim,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize(
        "model_name,dim",
        [
            ("amazon.titan-embed-text-v2:0", 1024),
            ("amazon.titan-embed-text-v2:0", 512),
            ("amazon.titan-embed-text-v2:0", 256),
        ],
    )
    def test_insert_with_bedrock_text_embedding(self, model_name, dim):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        # Set up parameters for Bedrock embedding
        params = {
            "provider": "bedrock",
            "model_name": model_name,
            "normalize": True,
            "dim": dim,
        }

        text_embedding_function = Function(
            name="bedrock_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params=params,
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize(
        "model_name", ["text-embedding-005", "text-multilingual-embedding-002"]
    )
    def test_insert_with_vertexai_text_embedding(self, model_name):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        # Both models use 768 dimensions by default
        dim = 768
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        # Set up parameters for Vertex AI embedding
        params = {
            "provider": "vertexai",
            "model_name": model_name,
            "projectid": "test-410709",
            "location": "us-central1",
        }

        text_embedding_function = Function(
            name="vertexai_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params=params,
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize(
        "model_name",
        [
            "voyage-3-large",  # 1024 (default), 256, 512, 2048
            "voyage-3",  # 1024
            "voyage-3-lite",  # 512
            "voyage-code-3",  # 1024 (default), 256, 512, 2048
            "voyage-finance-2",  # 1024
            "voyage-law-2",  # 1024
            "voyage-code-2",  # 1536
        ],
    )
    def test_insert_with_voyageai_text_embedding(self, model_name):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        dim_map = {
            "voyage-3-large": 2048,
            "voyage-3": 1024,
            "voyage-3-lite": 512,
            "voyage-code-3": 2048,
            "voyage-finance-2": 1024,
            "voyage-law-2": 1024,
            "voyage-code-2": 1536,
        }
        dim = dim_map.get(model_name)
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="voyageai_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "voyageai",
                "model_name": model_name,
                "dim": dim,
                "api_key": "pa-c18f3MzGnJUYpxCxp9pWuzp9l-wQZ_lXfS1ZKzV7IGL",
            },
        )
        schema.add_function(text_embedding_function)

        # insert data with retry mechanism to handle rate limit (429 errors)
        max_retries = 5
        retry_delay = 20  # seconds between retries
        for retry_count in range(max_retries):
            try:
                collection_w = self.init_collection_wrap(
                    name=cf.gen_unique_str(prefix),
                    schema=schema,
                    check_task=CheckTasks.check_nothing,
                )

                # prepare data
                nb = 1
                data = [{"id": i, "document": fake_en.text()} for i in range(nb)]
                res, result = collection_w.insert(
                    data, check_task=CheckTasks.check_nothing
                )
                if result:
                    assert collection_w.num_entities == nb
                    break  # Success, exit retry loop
                else:
                    # Insert failed, raise exception to trigger retry
                    # res is already an Error object, so we can directly raise it
                    raise Exception(str(res))
            except Exception as e:
                error_msg = str(e)
                if (
                        "429 Too Many Requests" in error_msg
                        or "'NoneType' object has no attribute" in error_msg
                ) and retry_count < max_retries - 1:
                    log.info(
                        f"Rate limit exceeded, retrying in {retry_delay} seconds... (Attempt {retry_count + 1}/{max_retries})"
                    )
                    time.sleep(retry_delay)
                    # Increase delay for next retry (exponential backoff)
                    retry_delay *= 1.5
                else:
                    # If it's not a rate limit error or we've exhausted retries, re-raise
                    raise
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize(
        "model_name",
        [
            "embed-english-v3.0",  # 1024
            "embed-multilingual-v3.0",  # 1024
            "embed-english-light-v3.0",  # 384
            "embed-multilingual-light-v3.0",  # 384
            "embed-english-v2.0",  # 4096
            "embed-english-light-v2.0",  # 1024
            "embed-multilingual-v2.0",  # 768
        ],
    )
    def test_insert_with_cohere_text_embedding(self, model_name):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        dim_map = {
            "embed-english-v3.0": 1024,
            "embed-multilingual-v3.0": 1024,
            "embed-english-light-v3.0": 384,
            "embed-multilingual-light-v3.0": 384,
            "embed-english-v2.0": 4096,
            "embed-english-light-v2.0": 1024,
            "embed-multilingual-v2.0": 768,
        }
        dim = dim_map.get(model_name)
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="cohere_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "cohere",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize(
        "model_name",
        [
            "BAAI/bge-large-zh-v1.5",  # 1024
            "BAAI/bge-large-en-v1.5",  # 1024
            "netease-youdao/bce-embedding-base_v1",  # 768
            "BAAI/bge-m3",  # 1024
            "Pro/BAAI/bge-m3",  # 1024
        ],
    )
    def test_insert_with_siliconflow_text_embedding(self, model_name):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        dim_map = {
            "BAAI/bge-large-zh-v1.5": 1024,
            "BAAI/bge-large-en-v1.5": 1024,
            "netease-youdao/bce-embedding-base_v1": 768,
            "BAAI/bge-m3": 1024,
            "Pro/BAAI/bge-m3": 1024,
        }
        dim = dim_map.get(model_name)
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_insert_with_tei_text_embedding(self, model_name, tei_endpoint):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="tei",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "tei",
                "model_name": model_name,
                "tei_url": tei_endpoint,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim

    @pytest.mark.parametrize(
        "provider, model_name, dim",
        [
            ("cohere", "embed-english-v3.0", 1024),
            ("cohere", "embed-multilingual-v3.0", 1024),
            ("cohere", "embed-english-light-v3.0", 384),
            ("cohere", "embed-multilingual-light-v3.0", 384),
            ("voyageai", "voyage-3-large", 1024),
            ("voyageai", "voyage-code-3", 1024),
        ],
    )
    def test_insert_with_int8_text_embedding(self, provider, model_name, dim):
        """
        target: test insert data with text embedding
        method: insert data with text embedding function
        expected: insert successfully
        """
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.INT8_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name=f"{provider}_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": provider,
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb
        # create index
        index_params = {
            "index_type": "HNSW",
            "metric_type": "COSINE",
            "params": {"M": 48},
        }
        collection_w.create_index(field_name="dense", index_params=index_params)
        collection_w.load()
        res, _ = collection_w.query(
            expr="id >= 0",
            output_fields=["dense"],
        )
        for row in res:
            assert len(row["dense"]) == dim


class TestSearchWithTextEmbedding(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test search with text embedding
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_search_with_text_embedding(self, model_name):
        """
        target: test search with text embedding
        method: search with text embedding function
        expected: search successfully
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 10
        data = [{"id": i, "document": fake_en.text()} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb

        # create index
        index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        collection_w.create_index("dense", index_params)
        collection_w.load()

        # search
        search_params = {"metric_type": "COSINE", "params": {}}
        nq = 1
        limit = 10
        res, _ = collection_w.search(
            data=[fake_en.text() for _ in range(nq)],
            anns_field="dense",
            param=search_params,
            limit=10,
            output_fields=["document"],
        )
        assert len(res) == nq
        for hits in res:
            assert len(hits) == limit


class TestInsertWithTextEmbeddingNegative(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test insert with text embedding negative
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_insert_with_text_embedding_empty_document(self, model_name):
        """
        target: test insert data with empty document
        method: insert data with empty document
        expected: insert failed
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data with empty document
        empty_data = [{"id": 1, "document": ""}]
        normal_data = [{"id": 2, "document": fake_en.text()}]
        data = empty_data + normal_data

        collection_w.insert(
            data,
            check_task=CheckTasks.err_res,
            check_items={"err_code": 65535, "err_msg": "The parameter is invalid"},
        )
        assert collection_w.num_entities == 0

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_insert_with_text_embedding_long_document(self, model_name):
        """
        target: test insert data with long document
        method: insert data with long document
        expected: insert failed
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data with empty document
        empty_data = [{"id": 1, "document": fake_en.word() * 10000}]
        normal_data = [{"id": 2, "document": fake_en.text()}]
        data = empty_data + normal_data

        collection_w.insert(
            data,
            check_task=CheckTasks.err_res,
            check_items={
                "err_code": 65535,
                "err_msg": "input must have less than 8192 tokens",
            },
        )
        assert collection_w.num_entities == 0


class TestEmbeddingAccuracy(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test embedding accuracy
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_embedding_accuracy(self, model_name):
        """
        target: test embedding accuracy compared with provider API
        method: 1. generate embedding using Milvus
                2. generate embedding using provider API directly
                3. compare the results
        expected: embeddings should be identical within float precision
        """
        # Connect to Milvus
        self._connect()

        # Test document
        test_document = fake_en.text()

        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )
        batch_size = 10
        # Insert data
        data = [{"id": i, "document": test_document} for i in range(batch_size)]
        collection_w.insert(data)

        # Create index and load collection
        index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        collection_w.create_index("dense", index_params)
        collection_w.load()

        # Query the document and get the embedding from Milvus
        res, _ = collection_w.query(expr="id >= 0", output_fields=["document", "dense"])

        assert len(res) == batch_size

        # Get API key from environment variable
        api_key = os.getenv("SILICONFLOW_API_KEY")
        if not api_key:
            assert False, "SILICONFLOW_API_KEY environment variable not set"

        # API endpoint for SiliconFlow
        url = "https://api.siliconflow.cn/v1/embeddings"
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        }

        # Test all vectors instead of just the first one
        similarities = []
        for i, item in enumerate(res):
            milvus_embedding = item["dense"]
            retrieved_document = item["document"]
            assert retrieved_document == test_document

            # Call SiliconFlow API directly to get embedding
            payload = {
                "model": model_name,
                "input": retrieved_document,
                "encoding_format": "float",
            }

            response = requests.post(url, json=payload, headers=headers)
            response.raise_for_status()  # Raise exception for HTTP errors

            # Extract embedding from response
            api_embedding = response.json()["data"][0]["embedding"]

            # Compare embeddings
            assert (
                    api_embedding is not None
            ), f"Failed to get embedding from SiliconFlow API for item {i}"
            assert len(milvus_embedding) == len(
                api_embedding
            ), f"Embedding dimensions don't match for item {i}"

            # Calculate cosine similarity
            cosine_sim = dot(milvus_embedding, api_embedding) / (
                    norm(milvus_embedding) * norm(api_embedding)
            )
            similarities.append(cosine_sim)

            # Log the similarity for debugging
            log.info(
                f"Item {i}: Cosine similarity between Milvus and SiliconFlow API embeddings: {cosine_sim}"
            )

            # Embeddings should be nearly identical (allowing for minor floating point differences)
            assert (
                    cosine_sim > 0.999
            ), f"Embeddings are not similar enough for item {i}: {cosine_sim}"

        # Log summary statistics
        avg_similarity = sum(similarities) / len(similarities)
        min_similarity = min(similarities)
        max_similarity = max(similarities)
        log.info(
            f"Summary - Average similarity: {avg_similarity}, Min: {min_similarity}, Max: {max_similarity}"
        )
        query_text = fake_en.text()
        text_search_res, _ = collection_w.search(
            data=[query_text],
            anns_field="dense",
            param={},
            output_fields=["document"],
            limit=10,
        )
        query_embedding = requests.post(
            url,
            json={"model": model_name, "input": query_text, "encoding_format": "float"},
            headers=headers,
        ).json()["data"][0]["embedding"]
        vector_search_res, _ = collection_w.search(
            data=[query_embedding],
            anns_field="dense",
            param={},
            output_fields=["document"],
            limit=10,
        )
        for i in range(len(text_search_res)):
            for j in range(len(text_search_res[i])):
                assert text_search_res[i][j].entity.get(
                    "document"
                ) == vector_search_res[i][j].entity.get("document")


class TestMultiLanguageSupport(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test multi-language support
    ******************************************************************
    """

    def test_multi_language_semantic(self):
        """
        target: test semantic similarity of embeddings across different languages
        method: 1. Test similar sentences in same language
                2. Test same meaning sentences in different languages
        expected: 1. Similar sentences in same language should have high similarity
                 2. Same meaning in different languages should have high similarity
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="text_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": "BAAI/bge-m3",
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # Same sentence in different languages (relevant texts)
        relevant_texts = [
            "我喜欢中国美食",  # Chinese: I love Chinese food
            "I love Chinese food",  # English
            "私は中華料理が大好きです",  # Japanese: I love Chinese food
        ]

        # Different sentence in different languages (irrelevant texts)
        irrelevant_texts = [
            "意大利面很好吃",  # Chinese: Italian pasta is delicious
            "Italian pasta is delicious",  # English
            "イタリアンパスタは美味しいです",  # Japanese: Italian pasta is delicious
        ]

        # Insert all texts
        data = [
            {"id": i, "document": text}
            for i, text in enumerate(relevant_texts + irrelevant_texts)
        ]
        collection_w.insert(data)

        # Create index and load
        index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        collection_w.create_index("dense", index_params)
        collection_w.load()

        # Search parameters
        search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}}

        for q_text in relevant_texts:
            # Search with text
            res, _ = collection_w.search(
                data=[q_text],
                anns_field="dense",
                param=search_params,
                limit=len(relevant_texts) + len(irrelevant_texts),  # Get all results
                output_fields=["document"],
            )

            # Verify results
            assert len(res) == 1  # One search query

            # Get all result texts with their scores
            results = [(hit.entity.get("document"), hit.score) for hit in res[0]]
            log.info(f"data {q_text}, Search results: {results}")

            # Verify that all translations of the same sentence are ranked higher
            relevant_scores = [
                score for text, score in results if text in relevant_texts
            ]
            irrelevant_scores = [
                score for text, score in results if text in irrelevant_texts
            ]

            # Check each relevant text score is higher than any irrelevant text score
            min_relevant_score = min(relevant_scores)
            max_irrelevant_score = max(irrelevant_scores) if irrelevant_scores else 0

            # All translations should be found with high similarity
            assert min_relevant_score > max_irrelevant_score, (
                f"Some irrelevant texts ranked higher than relevant ones. \n"
                f"Relevant texts (scores): {relevant_scores}\n"
                f"Irrelevant texts (scores): {irrelevant_scores}"
            )


class TestMultiProviderSearch(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test multi-provider search
    ******************************************************************
    """

    def test_multi_provider_search(self):
        """
        target: test search with multiple embedding providers
        method: 1. create collection with multiple embedding functions
                2. insert data
                3. search with different providers
        expected: search results should be relevant for each provider
        """
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="openai_dense", dtype=DataType.FLOAT_VECTOR, dim=1536),
            FieldSchema(name="bge_dense", dtype=DataType.FLOAT_VECTOR, dim=1024),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        # Add OpenAI embedding function
        openai_function = Function(
            name="openai_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="openai_dense",
            params={
                "provider": "openai",
                "model_name": "text-embedding-ada-002",
            },
        )
        schema.add_function(openai_function)
        #
        # Add BGE embedding function
        bge_function = Function(
            name="bge_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="bge_dense",
            params={
                "provider": "siliconflow",
                "model_name": "BAAI/bge-m3",
            },
        )
        schema.add_function(bge_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # insert data
        nb = 10
        data = [{"id": i, "document": f"This is test document {i}"} for i in range(nb)]
        collection_w.insert(data)

        # create indexes and load
        for field in ["openai_dense", "bge_dense"]:
            index_params = {
                "index_type": "AUTOINDEX",
                "metric_type": "COSINE",
                "params": {},
            }
            collection_w.create_index(field, index_params)
        collection_w.load()

        # search with both providers
        search_params = {"metric_type": "COSINE", "params": {}}
        for field in ["openai_dense", "bge_dense"]:
            res, _ = collection_w.search(
                data=["test document"],
                anns_field=field,
                param=search_params,
                limit=10,
                output_fields=["document"],
            )
            assert len(res) == 1
            assert len(res[0]) == 10


class TestUpsertWithTextEmbedding(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test upsert with text embedding
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_upsert_text_field(self, model_name):
        """
        target: test upsert text field updates embedding
        method: 1. insert data
                2. upsert text field
                3. verify embedding is updated
        expected: embedding should be updated after text field is updated
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="text_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )
        # create index and load
        index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        collection_w.create_index("dense", index_params)
        collection_w.load()

        # insert initial data
        old_text = "This is the original text"
        data = [{"id": 1, "document": old_text}]
        collection_w.insert(data)

        # get original embedding
        res, _ = collection_w.query(expr="id == 1", output_fields=["dense"])
        old_embedding = res[0]["dense"]

        # upsert with new text
        new_text = "This is the updated text"
        upsert_data = [{"id": 1, "document": new_text}]
        collection_w.upsert(upsert_data)

        # get new embedding
        res, _ = collection_w.query(expr="id == 1", output_fields=["dense"])
        new_embedding = res[0]["dense"]

        # verify embeddings are different
        assert not np.allclose(old_embedding, new_embedding)
        # caculate cosine similarity
        sim = np.dot(old_embedding, new_embedding) / (
                np.linalg.norm(old_embedding) * np.linalg.norm(new_embedding)
        )
        log.info(f"cosine similarity: {sim}")
        assert sim < 0.99


class TestDeleteWithTextEmbedding(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test delete with text embedding
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_delete_and_search(self, model_name):
        """
        target: test deleted text cannot be searched
        method: 1. insert data
                2. delete some data
                3. verify deleted data cannot be searched
        expected: deleted data should not appear in search results
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="text_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # insert data
        nb = 3
        data = [{"id": i, "document": f"This is test document {i}"} for i in range(nb)]
        collection_w.insert(data)

        # create index and load
        index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        collection_w.create_index("dense", index_params)
        collection_w.load()

        # delete document 1
        collection_w.delete("id in [1]")

        # search and verify document 1 is not in results
        search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}}
        res, _ = collection_w.search(
            data=["test document 1"],
            anns_field="dense",
            param=search_params,
            limit=3,
            output_fields=["document", "id"],
        )
        assert len(res) == 1
        for hit in res[0]:
            assert hit.entity.get("id") != 1


class TestImportWithTextEmbedding(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test import with text embedding
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    @pytest.mark.parametrize("file_format", ["json", "parquet", "numpy"])
    def test_import_without_embedding(self, model_name, minio_host, file_format):
        """
        target: test import data without embedding
        method: 1. create collection
                2. import data without embedding field
                3. verify embeddings are generated
        expected: embeddings should be generated after import
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="text_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)
        c_name = cf.gen_unique_str(prefix)
        collection_w = self.init_collection_wrap(name=c_name, schema=schema)

        # prepare import data without embedding
        nb = 1000
        if file_format == "json":
            file_type = BulkFileType.JSON
        elif file_format == "numpy":
            file_type = BulkFileType.NUMPY
        else:
            file_type = BulkFileType.PARQUET
        with RemoteBulkWriter(
                schema=schema,
                remote_path="bulk_data",
                connect_param=RemoteBulkWriter.ConnectParam(
                    bucket_name="milvus-bucket",
                    endpoint=f"{minio_host}:9000",
                    access_key="minioadmin",
                    secret_key="minioadmin",
                ),
                file_type=file_type,
        ) as remote_writer:
            for i in range(nb):
                row = {"id": i, "document": f"This is test document {i}"}
                remote_writer.append_row(row)
            remote_writer.commit()
            files = remote_writer.batch_files
        # import data
        for f in files:
            t0 = time.time()
            task_id, _ = self.utility_wrap.do_bulk_insert(
                collection_name=c_name, files=f
            )
            log.info(f"bulk insert task ids:{task_id}")
            success, states = self.utility_wrap.wait_for_bulk_insert_tasks_completed(
                task_ids=[task_id], timeout=300
            )
            tt = time.time() - t0
            log.info(f"bulk insert state:{success} in {tt} with states:{states}")
            assert success
        num_entities = collection_w.num_entities
        log.info(f" collection entities: {num_entities}")
        assert num_entities == nb

        # create index and load
        index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        collection_w.create_index("dense", index_params)
        collection_w.load()
        # verify embeddings are generated
        res, _ = collection_w.query(expr="id >= 0", output_fields=["dense"])
        assert len(res) == nb
        for r in res:
            assert "dense" in r
            assert len(r["dense"]) == dim


class TestHybridSearch(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test hybrid search
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    def test_hybrid_search(self, model_name):
        """
        target: test hybrid search with text embedding and BM25
        method: 1. create collection with text embedding and BM25 functions
                2. insert data
                3. perform hybrid search
        expected: search results should combine vector similarity and text relevance
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(
                name="document",
                dtype=DataType.VARCHAR,
                max_length=65535,
                enable_analyzer=True,
                analyzer_params={"tokenizer": "standard"},
            ),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
            FieldSchema(name="sparse", dtype=DataType.SPARSE_FLOAT_VECTOR),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        # Add text embedding function
        text_embedding_function = Function(
            name="text_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        # Add BM25 function
        bm25_function = Function(
            name="bm25",
            function_type=FunctionType.BM25,
            input_field_names=["document"],
            output_field_names="sparse",
            params={},
        )
        schema.add_function(bm25_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # insert test data
        data_size = 1000
        data = [{"id": i, "document": fake_en.text()} for i in range(data_size)]

        for batch in range(0, data_size, 100):
            collection_w.insert(data[batch: batch + 100])

        # create index and load
        dense_index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        sparse_index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "BM25",
            "params": {},
        }
        collection_w.create_index("dense", dense_index_params)
        collection_w.create_index("sparse", sparse_index_params)
        collection_w.load()
        nq = 2
        limit = 100
        dense_text_search = AnnSearchRequest(
            data=[fake_en.text().lower() for _ in range(nq)],
            anns_field="dense",
            param={},
            limit=limit,
        )
        dense_vector_search = AnnSearchRequest(
            data=[[random.random() for _ in range(dim)] for _ in range(nq)],
            anns_field="dense",
            param={},
            limit=limit,
        )
        full_text_search = AnnSearchRequest(
            data=[fake_en.text().lower() for _ in range(nq)],
            anns_field="sparse",
            param={},
            limit=limit,
        )
        # hybrid search
        res_list, _ = collection_w.hybrid_search(
            reqs=[dense_text_search, dense_vector_search, full_text_search],
            rerank=WeightedRanker(0.5, 0.5, 0.5),
            limit=limit,
            output_fields=["id", "document"],
        )
        assert len(res_list) == nq
        # check the result correctness
        for i in range(nq):
            log.info(f"res length: {len(res_list[i])}")
            assert len(res_list[i]) == limit


class TestMultiVectorSearch(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test multi-vector search
    ******************************************************************
    """

    def test_multi_vector_search(self):
        """
        target: test search with multiple embedding vectors
        method: 1. create collection with multiple embedding functions
                2. insert data
                3. perform weighted search across multiple vectors
        expected: search results should reflect combined similarity scores
        """
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="openai_dense", dtype=DataType.FLOAT_VECTOR, dim=1536),
            FieldSchema(name="bge_dense", dtype=DataType.FLOAT_VECTOR, dim=1024),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        # Add OpenAI embedding function
        openai_function = Function(
            name="openai_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="openai_dense",
            params={
                "provider": "openai",
                "model_name": "text-embedding-ada-002",
            },
        )
        schema.add_function(openai_function)

        # Add BGE embedding function
        bge_function = Function(
            name="bge_embedding",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="bge_dense",
            params={
                "provider": "siliconflow",
                "model_name": "BAAI/bge-m3",
            },
        )
        schema.add_function(bge_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # insert data
        data_size = 3000
        batch_size = 100
        data = [{"id": i, "document": fake_en.text()} for i in range(data_size)]
        for batch in range(0, data_size, batch_size):
            collection_w.insert(data[batch: batch + batch_size])

        # create indexes and load
        for field in ["openai_dense", "bge_dense"]:
            index_params = {
                "index_type": "AUTOINDEX",
                "metric_type": "COSINE",
                "params": {},
            }
            collection_w.create_index(field, index_params)
        collection_w.load()

        # perform multi-vector search
        search_params = {"metric_type": "COSINE", "params": {}}
        nq = 10
        limit = 100
        query_text = [fake_en.text() for i in range(nq)]

        # search with OpenAI embedding
        openai_res, _ = collection_w.search(
            data=query_text,
            anns_field="openai_dense",
            param=search_params,
            limit=limit,
            output_fields=["document"],
        )

        # search with BGE embedding
        bge_res, _ = collection_w.search(
            data=query_text,
            anns_field="bge_dense",
            param=search_params,
            limit=limit,
            output_fields=["document"],
        )

        # verify both searches return results
        assert len(openai_res) == nq
        assert len(bge_res) == nq
        assert len(openai_res[0]) == limit
        assert len(bge_res[0]) == limit


class TestSearchWithTextEmbeddingNegative(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test search with text embedding negative
    ******************************************************************
    """

    @pytest.mark.parametrize("model_name", ["BAAI/bge-m3"])
    @pytest.mark.parametrize("query", ["", "hello world" * 8192])
    def test_search_with_text_embedding_negative_query(self, model_name, query):
        """
        target: test search with empty query or long query
        method: search with empty query
        expected: search failed
        """
        dim = 1024
        fields = [
            FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
            FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=65535),
            FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dim),
        ]
        schema = CollectionSchema(fields=fields, description="test collection")

        text_embedding_function = Function(
            name="siliconflow",
            function_type=FunctionType.TEXTEMBEDDING,
            input_field_names=["document"],
            output_field_names="dense",
            params={
                "provider": "siliconflow",
                "model_name": model_name,
            },
        )
        schema.add_function(text_embedding_function)

        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix), schema=schema
        )

        # prepare data
        nb = 3
        data = [{"id": i, "document": f"This is test document {i}"} for i in range(nb)]

        # insert data
        collection_w.insert(data)
        assert collection_w.num_entities == nb

        # create index
        index_params = {
            "index_type": "AUTOINDEX",
            "metric_type": "COSINE",
            "params": {},
        }
        collection_w.create_index("dense", index_params)
        collection_w.load()

        # search with empty query should fail
        search_params = {"metric_type": "COSINE", "params": {}}
        collection_w.search(
            data=[query],
            anns_field="dense",
            param=search_params,
            limit=3,
            output_fields=["document"],
            check_task=CheckTasks.err_res,
            check_items={"err_code": 65535, "err_msg": "Call service faild"},
        )


class TestInsertPerformanceWithTextEmbeddingFunction(TestcaseBase):
    """
    ******************************************************************
      The following cases are used to test insert performance with text embedding function
    ******************************************************************
    """

    def test_insert_performance_with_text_embedding_function(self, tei_endpoint):
        """
        target: test insert performance with text embedding function for all providers and models
        method: 1. Test performance with different token counts
               2. Test performance across all providers and models
               3. Test with batch size = 1
        expected: Performance metrics are collected and compared for all models
        """
        self._connect()
        import time
        import pandas as pd
        from pymilvus import (
            Collection,
            DataType,
            FieldSchema,
            CollectionSchema,
            utility,
            Function,
            FunctionType,
        )

        # Define all providers and their models with dimensions
        providers_models = {
            "openai": [
                {"name": "text-embedding-ada-002", "dim": 1536},
                {"name": "text-embedding-3-small", "dim": 1536},
                {"name": "text-embedding-3-large", "dim": 3072},
            ],
            # "azure_openai": [
            #     {"name": "text-embedding-ada-002", "dim": 1536},
            #     {"name": "text-embedding-3-small", "dim": 1536},
            #     {"name": "text-embedding-3-large", "dim": 3072}
            # ],
            "dashscope": [
                {"name": "text-embedding-v1", "dim": 1536},
                {"name": "text-embedding-v2", "dim": 1536},
                {"name": "text-embedding-v3", "dim": 1024},
            ],
            # "bedrock": [
            #     {"name": "amazon.titan-embed-text-v2:0", "dim": 1024, "params": {"regin": "us-east-2"}}
            # ],
            # "vertexai": [
            #     {"name": "text-embedding-005", "dim": 768, "params": {"projectid": "zilliz-test-410709"}},
            #     {"name": "text-multilingual-embedding-002", "dim": 768, "params": {"projectid": "zilliz-test-410709"}}
            # ],
            # "voyageai": [
            #     {"name": "voyage-3-large", "dim": 1024},
            #     {"name": "voyage-3", "dim": 1024},
            #     {"name": "voyage-3-lite", "dim": 512},
            #     {"name": "voyage-code-3", "dim": 1024},
            #     {"name": "voyage-finance-2", "dim": 1024},
            #     {"name": "voyage-law-2", "dim": 1024},
            #     {"name": "voyage-code-2", "dim": 1536}
            # ],
            "cohere": [
                {"name": "embed-english-v3.0", "dim": 1024},
                {"name": "embed-multilingual-v3.0", "dim": 1024},
                {"name": "embed-english-light-v3.0", "dim": 384},
                {"name": "embed-multilingual-light-v3.0", "dim": 384},
                {"name": "embed-english-v2.0", "dim": 4096},
                {"name": "embed-english-light-v2.0", "dim": 1024},
                {"name": "embed-multilingual-v2.0", "dim": 768},
            ],
            #
            "siliconflow": [
                {"name": "BAAI/bge-large-zh-v1.5", "dim": 1024},
                {"name": "BAAI/bge-large-en-v1.5", "dim": 1024},
                {"name": "netease-youdao/bce-embedding-base_v1", "dim": 768},
                {"name": "BAAI/bge-m3", "dim": 1024},
                {"name": "Pro/BAAI/bge-m3", "dim": 1024},
            ],
            "tei": [
                {
                    "name": "tei",
                    "dim": 1024,
                    "params": {"provider": "TEI", "endpoint": tei_endpoint},
                }
            ],
        }

        # Generate text with simple fake words for precise token control
        def generate_fake_text(token_count):
            # Generate text with the specified number of tokens
            return " ".join([fake_en.word() for i in range(int(token_count * 0.8))])

        # Define token count variations
        token_variations = [
            {"name": "256_tokens", "text": generate_fake_text(256), "tokens": 256},
            {"name": "512_tokens", "text": generate_fake_text(512), "tokens": 512},
            {"name": "1024_tokens", "text": generate_fake_text(1024), "tokens": 1024},
            {"name": "2048_tokens", "text": generate_fake_text(2048), "tokens": 2048},
            {"name": "4096_tokens", "text": generate_fake_text(4096), "tokens": 4096},
            {"name": "8192_tokens", "text": generate_fake_text(8192), "tokens": 8192},
        ]

        # Prepare results dataframe
        results = []

        # Test each provider and model
        for provider, models in providers_models.items():
            for model in models:
                model_name = model["name"]
                dim = model["dim"]

                # Create collection with appropriate dimension
                schema = CollectionSchema(
                    [
                        FieldSchema("id", DataType.INT64, is_primary=True),
                        FieldSchema("text", DataType.VARCHAR, max_length=65535),
                        FieldSchema("embedding", DataType.FLOAT_VECTOR, dim=dim),
                    ]
                )
                # Configure text embedding function
                params = {"provider": provider, "model_name": model_name}

                # Add additional parameters
                if "params" in model:
                    params.update(model["params"])
                log.info(f"params: {params}")
                text_embedding_function = Function(
                    name=f"{provider}_{model_name.replace('/', '_')}_func",
                    function_type=FunctionType.TEXTEMBEDDING,
                    input_field_names=["text"],
                    output_field_names="embedding",
                    params=params,
                )
                schema.add_function(text_embedding_function)
                # Process special characters in model name
                model_name_safe = (
                    model_name.replace("/", "_")
                    .replace(".", "_")
                    .replace(":", "_")
                    .replace("-", "_")
                )

                # Create collection name
                collection_name = f"test_text_embedding_perf_{provider}_{model_name_safe}_{int(time.time())}"

                try:
                    collection = Collection(collection_name, schema)
                    # Test with different token counts
                    for token_var in token_variations:
                        test_text = token_var["text"]
                        token_count = token_var["tokens"]
                        token_name = token_var["name"]

                        # Measure latency with batch size = 1
                        try:
                            data = [
                                {
                                    "id": 0,
                                    "text": test_text,
                                }
                            ]
                            start_time = time.time()
                            collection.insert(data)
                            latency = time.time() - start_time

                            # Add concurrent test for 256 tokens only to avoid excessive API calls
                            if token_name == "256_tokens":
                                import concurrent.futures

                                # Function to run in parallel
                                def concurrent_insert(i, request_id):
                                    try:
                                        data = {
                                            "id": 0,
                                            "text": test_text,
                                        }
                                        start = time.time()
                                        collection.insert(data)
                                        end = time.time()
                                        return {"success": True, "latency": end - start}
                                    except Exception as e:
                                        return {"success": False, "error": str(e)}

                                # Define concurrency levels to test
                                concurrency_levels = [1, 2, 5, 10, 20, 50]
                                rate_limit_detected = False
                                best_qps = 0
                                best_concurrency = 1
                                previous_success_rate = 100

                                print(
                                    f"\n{provider} - {model_name} - Concurrency scaling test:"
                                )

                                # Test each concurrency level
                                for concurrency in concurrency_levels:
                                    if rate_limit_detected:
                                        break

                                    # Run concurrent test
                                    concurrent_results = []
                                    concurrent_start = time.time()

                                    with concurrent.futures.ThreadPoolExecutor(
                                            max_workers=concurrency
                                    ) as executor:
                                        future_to_idx = {
                                            executor.submit(
                                                concurrent_insert, i % concurrency, i
                                            ): i
                                            for i in range(concurrency)
                                        }
                                        for future in concurrent.futures.as_completed(
                                                future_to_idx
                                        ):
                                            idx = future_to_idx[future]
                                            try:
                                                result = future.result()
                                                result["idx"] = idx
                                                concurrent_results.append(result)
                                            except Exception as e:
                                                concurrent_results.append(
                                                    {
                                                        "idx": idx,
                                                        "success": False,
                                                        "error": str(e),
                                                    }
                                                )

                                    concurrent_end = time.time()
                                    concurrent_total_time = (
                                            concurrent_end - concurrent_start
                                    )

                                    # Calculate concurrent metrics
                                    successful = [
                                        r
                                        for r in concurrent_results
                                        if r.get("success", False)
                                    ]
                                    success_rate = (
                                        len(successful) / concurrency
                                        if concurrency > 0
                                        else 0
                                    )
                                    success_rate_pct = success_rate * 100
                                    avg_latency = (
                                        sum(r.get("latency", 0) for r in successful)
                                        / len(successful)
                                        if successful
                                        else 0
                                    )

                                    # Calculate QPS (Queries Per Second)
                                    qps = (
                                        len(successful) / concurrent_total_time
                                        if concurrent_total_time > 0
                                        else 0
                                    )

                                    # Check if this is the best QPS so far
                                    if (
                                            qps > best_qps and success_rate_pct >= 90
                                    ):  # Only consider if success rate is good
                                        best_qps = qps
                                        best_concurrency = concurrency

                                    # Check if we've hit a rate limit (success rate dropped significantly)
                                    if (
                                            previous_success_rate > 90
                                            and success_rate_pct < 70
                                    ):
                                        rate_limit_detected = True
                                        print(
                                            f"  Rate limit detected at concurrency {concurrency} (QPS: {qps:.2f})"
                                        )

                                    previous_success_rate = success_rate_pct

                                    # Collect error messages
                                    error_messages = [
                                        r.get("error", "")
                                        for r in concurrent_results
                                        if not r.get("success", False)
                                    ]
                                    error_message = (
                                        "; ".join(set(error_messages))
                                        if error_messages
                                        else ""
                                    )

                                    # Record results
                                    results.append(
                                        {
                                            "provider": provider,
                                            "model": model_name,
                                            "token_count": token_count,
                                            "token_name": token_name,
                                            "test_type": "concurrent",
                                            "concurrent_count": concurrency,
                                            "total_time": concurrent_total_time,
                                            "avg_latency": avg_latency,
                                            "qps": qps,
                                            "success_rate": success_rate_pct,
                                            "fail_rate": 100.0 - success_rate_pct,
                                            "error_message": error_message,
                                            "rate_limit_detected": rate_limit_detected,
                                            "status": "success",
                                        }
                                    )

                                    print(
                                        f"  Concurrency {concurrency}: QPS={qps:.2f}, Success={success_rate_pct:.1f}%, Avg Latency={avg_latency:.3f}s"
                                    )

                                    # Add a small delay between tests to avoid immediate rate limiting
                                    time.sleep(1)

                                # Record best QPS results
                                if best_qps > 0:
                                    print(
                                        f"  Best performance: {best_qps:.2f} QPS at concurrency {best_concurrency}"
                                    )
                                    results.append(
                                        {
                                            "provider": provider,
                                            "model": model_name,
                                            "token_count": token_count,
                                            "token_name": token_name,
                                            "test_type": "best_performance",
                                            "best_qps": best_qps,
                                            "best_concurrency": best_concurrency,
                                            "status": "success",
                                        }
                                    )

                            # Record results
                            results.append(
                                {
                                    "provider": provider,
                                    "model": model_name,
                                    "token_count": token_count,
                                    "token_name": token_name,
                                    "latency": latency,
                                    "tokens_per_second": token_count / latency,
                                    "test_type": "single",  # Add test_type field
                                    "status": "success",
                                }
                            )

                            print(
                                f"{provider} - {model_name} - {token_name} ({token_count} tokens): {latency:.3f}s"
                            )

                        except Exception as e:
                            print(
                                f"Error testing {provider} - {model_name} with {token_count} tokens: {str(e)}"
                            )
                            results.append(
                                {
                                    "provider": provider,
                                    "model": model_name,
                                    "token_count": token_count,
                                    "token_name": token_name,
                                    "latency": None,
                                    "tokens_per_second": None,
                                    "test_type": "single",  # Add test_type field
                                    "status": f"error: {str(e)}",
                                }
                            )

                except Exception as e:
                    print(f"Error setting up {provider} - {model_name}: {str(e)}")
                    results.append(
                        {
                            "provider": provider,
                            "model": model_name,
                            "token_count": "N/A",
                            "token_name": "N/A",
                            "latency": None,
                            "tokens_per_second": None,
                            "test_type": "setup",  # Add test_type field
                            "status": f"setup error: {str(e)}",
                        }
                    )

                # Cleanup
                utility.drop_collection(collection_name)

        # Convert results to DataFrame for analysis
        df = pd.DataFrame(results)
        if not df.empty:
            # Create a new DataFrame for generating more intuitive tabular data
            performance_table = []

            # Process single token test results - check if test_type field exists
            # First add test_type field (if it doesn't exist)
            if "test_type" not in df.columns:
                df["test_type"] = "single"  # Default to single test

            single_tests = df[
                (df["status"] == "success")
                & (~df["test_type"].isin(["concurrent", "best_performance"]))
                ]
            for _, row in single_tests.iterrows():
                performance_table.append(
                    {
                        "Provider": row["provider"],
                        "Model Name": row["model"],
                        "Text Token": row["token_count"],
                        "Batch Size": 1,
                        "Concurrent": 1,
                        "Latency (avg)": row["latency"],
                        "Latency (min)": row["latency"],
                        "Latency (max)": row["latency"],
                        "QPS": 1 / row["latency"] if row["latency"] > 0 else 0,
                        "Success Rate": 100.0,
                        "Fail Rate": 0.0,
                        "Error Message": "",
                        "Rate Limit": "No",
                        "Token Limit": "No",
                    }
                )

            concurrent_tests = df[
                (df["status"] == "success") & (df["test_type"] == "concurrent")
                ]
            for _, row in concurrent_tests.iterrows():
                performance_table.append(
                    {
                        "Provider": row["provider"],
                        "Model Name": row["model"],
                        "Text Token": row["token_count"],
                        "Batch Size": 1,
                        "Concurrent": row["concurrent_count"],
                        "Latency (avg)": row["avg_latency"],
                        "Latency (min)": row["avg_latency"],
                        "Latency (max)": row["avg_latency"],
                        "QPS": row["qps"],
                        "Success Rate": row["success_rate"],
                        "Fail Rate": 100.0 - row["success_rate"],
                        "Error Message": "",
                        "Rate Limit": "Yes"
                        if row.get("rate_limit_detected", False)
                        else "No",
                        "Token Limit": "No",
                    }
                )

            error_tests = df[df["status"].str.contains("error")]
            for _, row in error_tests.iterrows():
                error_msg = row["status"].replace("error: ", "")
                token_limit = (
                    "Yes"
                    if "input must have less than 512 tokens" in error_msg
                    else "No"
                )

                performance_table.append(
                    {
                        "Provider": row["provider"],
                        "Model Name": row["model"],
                        "Text Token": row["token_count"],
                        "Batch Size": 1,
                        "Concurrent": 1,
                        "Latency (avg)": None,
                        "Latency (min)": None,
                        "Latency (max)": None,
                        "QPS": 0,
                        "Success Rate": 0.0,
                        "Fail Rate": 100.0,
                        "Error Message": error_msg,
                        "Rate Limit": "No",
                        "Token Limit": token_limit,
                    }
                )

            performance_df = pd.DataFrame(performance_table)

            performance_df = performance_df.sort_values(
                by=["Provider", "Model Name", "Text Token", "Concurrent"]
            )

            print("\nDetailed Performance Results:")
            pd.set_option("display.max_rows", None)
            pd.set_option("display.max_columns", None)
            pd.set_option("display.width", 1000)
            print(performance_df.to_string(index=False))

            import os
            from datetime import datetime

            results_dir = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), "results"
            )
            os.makedirs(results_dir, exist_ok=True)

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            csv_filename = os.path.join(
                results_dir, f"embedding_performance_{timestamp}.csv"
            )

            performance_df.to_csv(csv_filename, index=False)

            if "status" in df.columns:
                provider_summary = (
                    df[df["status"] == "success"]
                    .groupby("provider")["latency"]
                    .agg(["mean", "min", "max"])
                )
                summary_csv = os.path.join(
                    results_dir, f"provider_summary_{timestamp}.csv"
                )
                provider_summary.to_csv(summary_csv)

            print(f"\nResults saved to: {csv_filename}")

            print("\nPerformance Summary by Provider:")
            provider_summary = (
                df[df["status"] == "success"]
                .groupby("provider")["latency"]
                .agg(["mean", "min", "max"])
            )
            print(provider_summary)

            print("\nPerformance Summary by Model:")
            model_summary = (
                df[df["status"] == "success"]
                .groupby(["provider", "model"])["latency"]
                .agg(["mean", "min", "max"])
            )
            print(model_summary)
        else:
            print("No successful tests completed")