diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py
index e0f79ebc10..b1f5952cbc 100644
--- a/tests/python_client/base/client_base.py
+++ b/tests/python_client/base/client_base.py
@@ -138,6 +138,7 @@ class TestcaseBase(Base):
     Additional methods;
     Public methods that can be used for test cases.
     """
+    client = None
 
     def _connect(self, enable_milvus_client_api=False):
         """ Add a connection and create the connect """
@@ -152,6 +153,7 @@ class TestcaseBase(Base):
             self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING,uri=uri,token=cf.param_info.param_token)
             res, is_succ = self.connection_wrap.MilvusClient(uri=uri,
                                                              token=cf.param_info.param_token)
+            self.client = MilvusClient(uri=uri, token=cf.param_info.param_token)
         else:
             if cf.param_info.param_user and cf.param_info.param_password:
                 res, is_succ = self.connection_wrap.connect(alias=DefaultConfig.DEFAULT_USING,
@@ -165,6 +167,8 @@ class TestcaseBase(Base):
                                                             host=cf.param_info.param_host,
                                                             port=cf.param_info.param_port)
 
+            uri = "http://" + cf.param_info.param_host + ":" + str(cf.param_info.param_port)
+            self.client = MilvusClient(uri=uri, token=cf.param_info.param_token)
         server_version = utility.get_server_version()
         log.info(f"server version: {server_version}")
         return res
@@ -183,7 +187,7 @@ class TestcaseBase(Base):
         res = client.run_analyzer(text, analyzer_params, with_detail=True, with_hash=True)
         tokens = [r['token'] for r in res.tokens]
         return tokens
-        
+
 
     # def init_async_milvus_client(self):
     #     uri = cf.param_info.param_uri or f"http://{cf.param_info.param_host}:{cf.param_info.param_port}"
diff --git a/tests/python_client/milvus_client/test_milvus_client_analyzer.py b/tests/python_client/milvus_client/test_milvus_client_analyzer.py
index 3b0faa7805..60885f96e8 100644
--- a/tests/python_client/milvus_client/test_milvus_client_analyzer.py
+++ b/tests/python_client/milvus_client/test_milvus_client_analyzer.py
@@ -18,6 +18,12 @@ class TestMilvusClientAnalyzer(TestMilvusClientV2Base):
         },
         {
             "tokenizer": "jieba",
+            "filter": [
+                {
+                    "type": "stop",
+                    "stop_words": ["is", "the", "this", "a", "an", "and", "or", "是", "的", "这", "一个", "和", "或"],
+                }
+            ],
         },
         {
             "tokenizer": "icu"
diff --git a/tests/python_client/testcases/test_full_text_search.py b/tests/python_client/testcases/test_full_text_search.py
index f966d6b55d..31f3ee0858 100644
--- a/tests/python_client/testcases/test_full_text_search.py
+++ b/tests/python_client/testcases/test_full_text_search.py
@@ -1,5 +1,13 @@
+import json
+
 from pymilvus import (
-    FieldSchema, CollectionSchema, DataType, Function, FunctionType, AnnSearchRequest, WeightedRanker
+    FieldSchema,
+    CollectionSchema,
+    DataType,
+    Function,
+    FunctionType,
+    AnnSearchRequest,
+    WeightedRanker,
 )
 from common.common_type import CaseLabel, CheckTasks
 from common import common_func as cf
@@ -15,6 +23,8 @@ from faker import Faker
 Faker.seed(19530)
 fake_en = Faker("en_US")
 fake_zh = Faker("zh_CN")
+fake_jp = Faker("ja_JP")
+fake_de = Faker("de_DE")
 
 # patch faker to generate text with specific distribution
 cf.patch_faker_text(fake_en, cf.en_vocabularies_distribution)
@@ -77,7 +87,9 @@ class TestCreateCollectionWIthFullTextSearch(TestcaseBase):
             ),
             FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
             FieldSchema(name="text_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
-            FieldSchema(name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
+            FieldSchema(
+                name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR
+            ),
         ]
         schema = CollectionSchema(fields=fields, description="test collection")
         text_fields = ["text", "paragraph"]
@@ -98,7 +110,9 @@ class TestCreateCollectionWIthFullTextSearch(TestcaseBase):
 
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.parametrize("tokenizer", ["standard"])
-    def test_create_collection_for_full_text_search_twice_with_same_schema(self, tokenizer):
+    def test_create_collection_for_full_text_search_twice_with_same_schema(
+        self, tokenizer
+    ):
         """
         target: test create collection with full text search twice with same schema
         method: create collection with full text search, use bm25 function, then create again
@@ -141,7 +155,9 @@ class TestCreateCollectionWIthFullTextSearch(TestcaseBase):
             ),
             FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
             FieldSchema(name="text_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
-            FieldSchema(name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
+            FieldSchema(
+                name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR
+            ),
         ]
         schema = CollectionSchema(fields=fields, description="test collection")
         text_fields = ["text", "paragraph"]
@@ -155,12 +171,8 @@ class TestCreateCollectionWIthFullTextSearch(TestcaseBase):
             )
             schema.add_function(bm25_function)
         c_name = cf.gen_unique_str(prefix)
-        self.init_collection_wrap(
-            name=c_name, schema=schema
-        )
-        collection_w = self.init_collection_wrap(
-            name=c_name, schema=schema
-        )
+        self.init_collection_wrap(name=c_name, schema=schema)
+        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
         res, _ = collection_w.describe()
         assert len(res["functions"]) == len(text_fields)
 
@@ -176,7 +188,9 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.tags(CaseLabel.L1)
     @pytest.mark.parametrize("tokenizer", ["unsupported"])
     @pytest.mark.skip(reason="check not implement may cause panic")
-    def test_create_collection_for_full_text_search_with_unsupported_tokenizer(self, tokenizer):
+    def test_create_collection_for_full_text_search_with_unsupported_tokenizer(
+        self, tokenizer
+    ):
         """
         target: test create collection with full text search with unsupported tokenizer
         method: create collection with full text search, use bm25 function and unsupported tokenizer
@@ -219,7 +233,9 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
             ),
             FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
             FieldSchema(name="text_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
-            FieldSchema(name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
+            FieldSchema(
+                name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR
+            ),
         ]
         schema = CollectionSchema(fields=fields, description="test collection")
         text_fields = ["text", "paragraph"]
@@ -237,12 +253,16 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
         )
         res, result = collection_w.describe()
         log.info(f"collection describe {res}")
-        assert not result, "create collection with unsupported tokenizer should be failed"
+        assert not result, (
+            "create collection with unsupported tokenizer should be failed"
+        )
 
     @pytest.mark.tags(CaseLabel.L2)
     @pytest.mark.parametrize("valid_output", [True, False])
     @pytest.mark.parametrize("valid_input", [True, False])
-    def test_create_collection_for_full_text_search_with_invalid_input_output(self, valid_output, valid_input):
+    def test_create_collection_for_full_text_search_with_invalid_input_output(
+        self, valid_output, valid_input
+    ):
         """
         target: test create collection with full text search with invalid input/output in bm25 function
         method: create collection with full text search, use bm25 function and invalid input/output
@@ -285,7 +305,9 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
             ),
             FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
             FieldSchema(name="text_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
-            FieldSchema(name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
+            FieldSchema(
+                name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR
+            ),
         ]
         schema = CollectionSchema(fields=fields, description="test collection")
         if valid_input:
@@ -298,7 +320,7 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
             output_field_names = ["invalid_output"]
 
         bm25_function = Function(
-            name=f"text_bm25_emb",
+            name="text_bm25_emb",
             function_type=FunctionType.BM25,
             input_field_names=input_field_names,
             output_field_names=output_field_names,
@@ -307,9 +329,13 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
         schema.add_function(bm25_function)
         if (not valid_output) or (not valid_input):
             self.init_collection_wrap(
-                name=cf.gen_unique_str(prefix), schema=schema,
+                name=cf.gen_unique_str(prefix),
+                schema=schema,
                 check_task=CheckTasks.err_res,
-                check_items={ct.err_code: 1, ct.err_msg: "field not found in collection"}
+                check_items={
+                    ct.err_code: 1,
+                    ct.err_msg: "field not found in collection",
+                },
             )
         else:
             collection_w = self.init_collection_wrap(
@@ -317,7 +343,9 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
             )
             res, result = collection_w.describe()
             log.info(f"collection describe {res}")
-            assert result, "create collection with valid input/output should be successful"
+            assert result, (
+                "create collection with valid input/output should be successful"
+            )
 
     @pytest.mark.tags(CaseLabel.L1)
     def test_create_collection_for_full_text_search_with_field_not_tokenized(self):
@@ -363,25 +391,30 @@ class TestCreateCollectionWithFullTextSearchNegative(TestcaseBase):
             ),
             FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
             FieldSchema(name="text_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
-            FieldSchema(name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
+            FieldSchema(
+                name="paragraph_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR
+            ),
         ]
         schema = CollectionSchema(fields=fields, description="test collection")
 
         bm25_function = Function(
-            name=f"text_bm25_emb",
+            name="text_bm25_emb",
             function_type=FunctionType.BM25,
             input_field_names=["text"],
             output_field_names=["text_sparse_emb"],
-            params={
-            },
+            params={},
         )
         schema.add_function(bm25_function)
         check_task = CheckTasks.err_res
-        check_items = {ct.err_code: 65535, ct.err_msg: "BM25 function input field must set enable_analyzer to true"}
+        check_items = {
+            ct.err_code: 65535,
+            ct.err_msg: "BM25 function input field must set enable_analyzer to true",
+        }
         self.init_collection_wrap(
-            name=cf.gen_unique_str(prefix), schema=schema,
+            name=cf.gen_unique_str(prefix),
+            schema=schema,
             check_task=check_task,
-            check_items=check_items
+            check_items=check_items,
         )
 
 
@@ -393,7 +426,6 @@ class TestInsertWithFullTextSearch(TestcaseBase):
     ******************************************************************
     """
 
-
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.parametrize("nullable", [False, True])
     @pytest.mark.parametrize("text_lang", ["en", "zh", "hybrid"])
@@ -469,8 +501,12 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                 {
                     "id": i,
                     "word": fake.word().lower(),
-                    "sentence": fake.sentence().lower() if random.random() < 0.5 else None,
-                    "paragraph": fake.paragraph().lower() if random.random() < 0.5 else None,
+                    "sentence": fake.sentence().lower()
+                    if random.random() < 0.5
+                    else None,
+                    "paragraph": fake.paragraph().lower()
+                    if random.random() < 0.5
+                    else None,
                     "text": fake.text().lower(),  # function input should not be None
                     "emb": [random.random() for _ in range(dim)],
                 }
@@ -507,13 +543,17 @@ class TestInsertWithFullTextSearch(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -524,16 +564,13 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                     "drop_ratio_build": 0.3,
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
         num_entities = collection_w.num_entities
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert len(data) == num_entities
         assert len(data) == count
@@ -543,7 +580,9 @@ class TestInsertWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("nullable", [False])
     @pytest.mark.parametrize("text_lang", ["en"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
-    def test_insert_for_full_text_search_enable_dynamic_field(self, tokenizer, text_lang, nullable, enable_dynamic_field):
+    def test_insert_for_full_text_search_enable_dynamic_field(
+        self, tokenizer, text_lang, nullable, enable_dynamic_field
+    ):
         """
         target: test insert data with full text search and enable dynamic field
         method: 1. create collection with full text search and enable dynamic field
@@ -591,7 +630,11 @@ class TestInsertWithFullTextSearch(TestcaseBase):
             FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim),
             FieldSchema(name="text_sparse_emb", dtype=DataType.SPARSE_FLOAT_VECTOR),
         ]
-        schema = CollectionSchema(fields=fields, description="test collection", enable_dynamic_field=enable_dynamic_field)
+        schema = CollectionSchema(
+            fields=fields,
+            description="test collection",
+            enable_dynamic_field=enable_dynamic_field,
+        )
         bm25_function = Function(
             name="text_bm25_emb",
             function_type=FunctionType.BM25,
@@ -617,11 +660,15 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                 {
                     "id": i,
                     "word": fake.word().lower(),
-                    "sentence": fake.sentence().lower() if random.random() < 0.5 else None,
-                    "paragraph": fake.paragraph().lower() if random.random() < 0.5 else None,
+                    "sentence": fake.sentence().lower()
+                    if random.random() < 0.5
+                    else None,
+                    "paragraph": fake.paragraph().lower()
+                    if random.random() < 0.5
+                    else None,
                     "text": fake.text().lower(),  # function input should not be None
                     "emb": [random.random() for _ in range(dim)],
-                    f"dynamic_field_{i}": f"dynamic_value_{i}"
+                    f"dynamic_field_{i}": f"dynamic_value_{i}",
                 }
                 for i in range(data_size)
             ]
@@ -634,7 +681,7 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                     "paragraph": fake.paragraph().lower(),
                     "text": fake.text().lower(),
                     "emb": [random.random() for _ in range(dim)],
-                    f"dynamic_field_{i}": f"dynamic_value_{i}"
+                    f"dynamic_field_{i}": f"dynamic_value_{i}",
                 }
                 for i in range(data_size)
             ]
@@ -649,7 +696,7 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                     "paragraph": fake.paragraph().lower(),
                     "text": fake.text().lower(),
                     "emb": [random.random() for _ in range(dim)],
-                    f"dynamic_field_{i}": f"dynamic_value_{i}"
+                    f"dynamic_field_{i}": f"dynamic_value_{i}",
                 }
                 hybrid_data.append(tmp)
             data = hybrid_data + data
@@ -658,13 +705,17 @@ class TestInsertWithFullTextSearch(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(data), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(data)
-                else data[i: len(data)]
+                else data[i : len(data)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -675,16 +726,13 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                     "drop_ratio_build": 0.3,
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
         num_entities = collection_w.num_entities
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert len(data) == num_entities
         assert len(data) == count
@@ -693,7 +741,9 @@ class TestInsertWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("nullable", [True])
     @pytest.mark.parametrize("text_lang", ["en"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
-    def test_insert_for_full_text_search_with_dataframe(self, tokenizer, text_lang, nullable):
+    def test_insert_for_full_text_search_with_dataframe(
+        self, tokenizer, text_lang, nullable
+    ):
         """
         target: test insert data for full text search with dataframe
         method: 1. insert data with varchar in dataframe format
@@ -765,8 +815,12 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                 {
                     "id": i,
                     "word": fake.word().lower(),
-                    "sentence": fake.sentence().lower() if random.random() < 0.5 else None,
-                    "paragraph": fake.paragraph().lower() if random.random() < 0.5 else None,
+                    "sentence": fake.sentence().lower()
+                    if random.random() < 0.5
+                    else None,
+                    "paragraph": fake.paragraph().lower()
+                    if random.random() < 0.5
+                    else None,
                     "text": fake.text().lower(),  # function input should not be None
                     "emb": [random.random() for _ in range(dim)],
                 }
@@ -802,10 +856,14 @@ class TestInsertWithFullTextSearch(TestcaseBase):
         log.info(f"dataframe\n{df}")
         batch_size = 5000
         for i in range(0, len(df), batch_size):
-            collection_w.insert(df[i: i + batch_size])
+            collection_w.insert(df[i : i + batch_size])
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -816,16 +874,13 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                     "drop_ratio_build": 0.3,
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
         num_entities = collection_w.num_entities
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert len(data) == num_entities
         assert len(data) == count
@@ -900,7 +955,11 @@ class TestInsertWithFullTextSearch(TestcaseBase):
             language = "zh"
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -911,8 +970,8 @@ class TestInsertWithFullTextSearch(TestcaseBase):
                     "drop_ratio_build": 0.3,
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
@@ -929,31 +988,25 @@ class TestInsertWithFullTextSearch(TestcaseBase):
         ]
         df = pd.DataFrame(data)
         log.info(f"dataframe\n{df}")
-        log.info(f"analyze documents")
+        log.info("analyze documents")
         texts = df["text"].to_list()
         word_freq = cf.analyze_documents(texts, language=language)
         tokens = list(word_freq.keys())
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         num_entities = collection_w.num_entities
         # query with count(*)
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert len(data) == num_entities
         assert len(data) == count
         # query with expr
-        res, _ = collection_w.query(
-            expr="id >= 0",
-            output_fields=["text"]
-        )
+        res, _ = collection_w.query(expr="id >= 0", output_fields=["text"])
         assert len(res) == len(data)
 
         # search with text
@@ -965,7 +1018,8 @@ class TestInsertWithFullTextSearch(TestcaseBase):
             anns_field="text_sparse_emb",
             param={},
             limit=limit,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
         assert len(res_list) == nq
         for i in range(nq):
             assert len(res_list[i]) == limit
@@ -975,9 +1029,12 @@ class TestInsertWithFullTextSearch(TestcaseBase):
             for j in range(len(res)):
                 r = res[j]
                 result_text = r.text
-                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(search_text, result_text, language=language)
-                assert len(
-                    overlap) > 0, f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
+                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(
+                    search_text, result_text, language=language
+                )
+                assert len(overlap) > 0, (
+                    f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
+                )
 
 
 # @pytest.mark.skip("skip")
@@ -991,7 +1048,9 @@ class TestInsertWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.tags(CaseLabel.L1)
     @pytest.mark.parametrize("nullable", [True])
     @pytest.mark.parametrize("tokenizer", ["standard"])
-    def test_insert_with_full_text_search_with_non_varchar_data(self, tokenizer, nullable):
+    def test_insert_with_full_text_search_with_non_varchar_data(
+        self, tokenizer, nullable
+    ):
         """
         target: test insert data with full text search with non varchar data
         method: 1. insert data with non varchar data
@@ -1061,7 +1120,9 @@ class TestInsertWithFullTextSearchNegative(TestcaseBase):
                 "word": fake.word().lower(),
                 "sentence": fake.sentence().lower(),
                 "paragraph": fake.paragraph().lower(),
-                "text": fake.text().lower() if random.random() < 0.5 else 1,  # mix some int data
+                "text": fake.text().lower()
+                if random.random() < 0.5
+                else 1,  # mix some int data
                 "emb": [random.random() for _ in range(dim)],
             }
             for i in range(data_size)
@@ -1071,13 +1132,17 @@ class TestInsertWithFullTextSearchNegative(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)],
+                else data[i : len(df)],
                 check_task=CheckTasks.err_res,
-                check_items={ct.err_code: 1, ct.err_msg: "inconsistent with defined schema"},
+                check_items={
+                    ct.err_code: 1,
+                    ct.err_msg: "inconsistent with defined schema",
+                },
             )
 
+
 # @pytest.mark.skip("skip")
 class TestUpsertWithFullTextSearch(TestcaseBase):
     """
@@ -1086,7 +1151,6 @@ class TestUpsertWithFullTextSearch(TestcaseBase):
     ******************************************************************
     """
 
-
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.parametrize("nullable", [False, True])
     @pytest.mark.parametrize("tokenizer", ["standard"])
@@ -1152,18 +1216,20 @@ class TestUpsertWithFullTextSearch(TestcaseBase):
             name=cf.gen_unique_str(prefix), schema=schema
         )
         fake = fake_en
-        language = "en"
         if tokenizer == "jieba":
             fake = fake_zh
-            language = "zh"
 
         if nullable:
             data = [
                 {
                     "id": i,
                     "word": fake.word().lower(),
-                    "sentence": fake.sentence().lower() if random.random() < 0.5 else None,
-                    "paragraph": fake.paragraph().lower() if random.random() < 0.5 else None,
+                    "sentence": fake.sentence().lower()
+                    if random.random() < 0.5
+                    else None,
+                    "paragraph": fake.paragraph().lower()
+                    if random.random() < 0.5
+                    else None,
                     "text": fake.text().lower(),  # function input should not be None
                     "emb": [random.random() for _ in range(dim)],
                 }
@@ -1186,13 +1252,17 @@ class TestUpsertWithFullTextSearch(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -1203,16 +1273,13 @@ class TestUpsertWithFullTextSearch(TestcaseBase):
                     "drop_ratio_build": 0.3,
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
         num_entities = collection_w.num_entities
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert len(data) == num_entities
         assert len(data) == count
@@ -1229,17 +1296,14 @@ class TestUpsertWithFullTextSearch(TestcaseBase):
             }
             for i in range(data_size // 2)
         ]
-        upsert_data += data[data_size // 2:]
+        upsert_data += data[data_size // 2 :]
         for i in range(0, len(upsert_data), batch_size):
             collection_w.upsert(
-                upsert_data[i: i + batch_size]
+                upsert_data[i : i + batch_size]
                 if i + batch_size < len(upsert_data)
-                else upsert_data[i: len(upsert_data)]
+                else upsert_data[i : len(upsert_data)]
             )
-        res, _ = collection_w.query(
-            expr="id >= 0",
-            output_fields=["*"]
-        )
+        res, _ = collection_w.query(expr="id >= 0", output_fields=["*"])
         upsert_data_map = {}
         for d in upsert_data:
             upsert_data_map[d["id"]] = d
@@ -1260,7 +1324,9 @@ class TestUpsertWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.tags(CaseLabel.L1)
     @pytest.mark.parametrize("nullable", [False])
     @pytest.mark.parametrize("tokenizer", ["standard"])
-    def test_upsert_for_full_text_search_with_no_varchar_data(self, tokenizer, nullable):
+    def test_upsert_for_full_text_search_with_no_varchar_data(
+        self, tokenizer, nullable
+    ):
         """
         target: test upsert data for full text search with no varchar data
         method: 1. insert data with varchar data
@@ -1321,10 +1387,8 @@ class TestUpsertWithFullTextSearchNegative(TestcaseBase):
             name=cf.gen_unique_str(prefix), schema=schema
         )
         fake = fake_en
-        language = "en"
         if tokenizer == "jieba":
             fake = fake_zh
-            language = "zh"
 
         data = [
             {
@@ -1342,13 +1406,17 @@ class TestUpsertWithFullTextSearchNegative(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -1359,16 +1427,13 @@ class TestUpsertWithFullTextSearchNegative(TestcaseBase):
                     "drop_ratio_build": 0.3,
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
         num_entities = collection_w.num_entities
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert len(data) == num_entities
         assert len(data) == count
@@ -1380,16 +1445,16 @@ class TestUpsertWithFullTextSearchNegative(TestcaseBase):
                 "word": fake.word().lower(),
                 "sentence": fake.sentence().lower(),
                 "paragraph": fake.paragraph().lower(),
-                "text": fake.text().lower() if random.random() < 0.5 else 1,  # mix some int data
+                "text": fake.text().lower()
+                if random.random() < 0.5
+                else 1,  # mix some int data
                 "emb": [random.random() for _ in range(dim)],
             }
             for i in range(data_size)
         ]
         check_items = {ct.err_code: 1, ct.err_msg: "inconsistent with defined schema"}
         check_task = CheckTasks.err_res
-        collection_w.upsert(upsert_data,
-                            check_task=check_task,
-                            check_items=check_items)
+        collection_w.upsert(upsert_data, check_task=check_task, check_items=check_items)
 
 
 class TestDeleteWithFullTextSearch(TestcaseBase):
@@ -1479,13 +1544,17 @@ class TestDeleteWithFullTextSearch(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -1496,47 +1565,37 @@ class TestDeleteWithFullTextSearch(TestcaseBase):
                     "drop_ratio_build": 0.3,
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
         num_entities = collection_w.num_entities
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert len(data) == num_entities
         assert len(data) == count
 
         # delete half of the data
         delete_ids = [i for i in range(data_size // 2)]
-        collection_w.delete(
-            expr=f"id in {delete_ids}"
-        )
-        res, _ = collection_w.query(
-            expr="",
-            output_fields=["count(*)"]
-        )
+        collection_w.delete(expr=f"id in {delete_ids}")
+        res, _ = collection_w.query(expr="", output_fields=["count(*)"])
         count = res[0]["count(*)"]
         assert count == data_size // 2
 
         # query with delete expr and get empty result
-        res, _ = collection_w.query(
-            expr=f"id in {delete_ids}",
-            output_fields=["*"]
-        )
+        res, _ = collection_w.query(expr=f"id in {delete_ids}", output_fields=["*"])
         assert len(res) == 0
 
         # search with text has been deleted, not in the result
-        search_data = df["text"].to_list()[:data_size // 2]
+        search_data = df["text"].to_list()[: data_size // 2]
         res_list, _ = collection_w.search(
             data=search_data,
             anns_field="text_sparse_emb",
             param={},
             limit=100,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
         for i in range(len(res_list)):
             query_text = search_data[i]
             result_texts = [r.text for r in res_list[i]]
@@ -1547,6 +1606,7 @@ class TestDeleteWithFullTextSearchNegative(TestcaseBase):
     """
     todo: add some negative cases
     """
+
     pass
 
 
@@ -1564,7 +1624,7 @@ class TestCreateIndexWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("index_type", ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_create_index_for_full_text_search_default(
-            self, tokenizer, index_type, k, b
+        self, tokenizer, index_type, k, b
     ):
         """
         target: test create index for full text search
@@ -1631,8 +1691,12 @@ class TestCreateIndexWithFullTextSearch(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -1643,13 +1707,17 @@ class TestCreateIndexWithFullTextSearch(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -1659,8 +1727,8 @@ class TestCreateIndexWithFullTextSearch(TestcaseBase):
                 "params": {
                     "bm25_k1": k,
                     "bm25_b": b,
-                }
-            }
+                },
+            },
         )
         # describe index info to verify
         res = collection_w.indexes
@@ -1688,7 +1756,7 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.parametrize("index_type", ["HNSW", "INVALID_INDEX_TYPE"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_create_full_text_search_with_invalid_index_type(
-            self, tokenizer, index_type, k, b
+        self, tokenizer, index_type, k, b
     ):
         """
         target: test create index for full text search with invalid index type
@@ -1754,8 +1822,12 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -1766,13 +1838,17 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         error = {"err_code": 1100, "err_msg": "invalid"}
         collection_w.create_index(
@@ -1783,10 +1859,10 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
                 "params": {
                     "bm25_k1": k,
                     "bm25_b": b,
-                }
+                },
             },
             check_task=CheckTasks.err_res,
-            check_items=error
+            check_items=error,
         )
 
     @pytest.mark.tags(CaseLabel.L2)
@@ -1796,7 +1872,7 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.parametrize("metric_type", ["COSINE", "L2", "IP"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_create_full_text_search_index_with_invalid_metric_type(
-            self, tokenizer, index_type, metric_type, k, b
+        self, tokenizer, index_type, metric_type, k, b
     ):
         """
         target: test create index for full text search with invalid metric type
@@ -1862,8 +1938,12 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -1874,15 +1954,22 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
-        error = {ct.err_code: 65535, ct.err_msg: "index metric type of BM25 function output field must be BM25"}
+        error = {
+            ct.err_code: 65535,
+            ct.err_msg: "index metric type of BM25 function output field must be BM25",
+        }
         collection_w.create_index(
             "text_sparse_emb",
             {
@@ -1891,10 +1978,10 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
                 "params": {
                     "bm25_k1": k,
                     "bm25_b": b,
-                }
+                },
             },
             check_task=CheckTasks.err_res,
-            check_items=error
+            check_items=error,
         )
 
     @pytest.mark.tags(CaseLabel.L2)
@@ -1903,7 +1990,7 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.parametrize("index_type", ["SPARSE_INVERTED_INDEX"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_create_index_using_bm25_metric_type_for_non_bm25_output_field(
-            self, tokenizer, index_type, k, b
+        self, tokenizer, index_type, k, b
     ):
         """
         target: test create index using bm25 metric type for non bm25 output field (dense float vector or
@@ -1970,8 +2057,12 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -1982,16 +2073,23 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
-        error = {ct.err_code: 1100, ct.err_msg: "float vector index does not support metric type: BM25"}
+        error = {
+            ct.err_code: 1100,
+            ct.err_msg: "float vector index does not support metric type: BM25",
+        }
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "BM25", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "BM25",
+                "params": {"M": 16, "efConstruction": 500},
+            },
             check_task=CheckTasks.err_res,
-            check_items=error
+            check_items=error,
         )
 
     @pytest.mark.tags(CaseLabel.L0)
@@ -2000,7 +2098,7 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.parametrize("index_type", ["SPARSE_INVERTED_INDEX"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_create_full_text_search_with_invalid_bm25_params(
-            self, tokenizer, index_type, k, b
+        self, tokenizer, index_type, k, b
     ):
         """
         target: test create index for full text search with invalid bm25 params
@@ -2066,8 +2164,12 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -2078,17 +2180,24 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
 
         check_task = CheckTasks.err_res
-        error = {"err_code": 1100, "err_msg": "invalid"}  # todo, update error code and message
+        error = {
+            "err_code": 1100,
+            "err_msg": "invalid",
+        }  # todo, update error code and message
         collection_w.create_index(
             "text_sparse_emb",
             {
@@ -2097,10 +2206,10 @@ class TestCreateIndexWithFullTextSearchNegative(TestcaseBase):
                 "params": {
                     "bm25_k1": k,
                     "bm25_b": b,
-                }
+                },
             },
             check_task=check_task,
-            check_items=error
+            check_items=error,
         )
 
 
@@ -2122,7 +2231,15 @@ class TestSearchWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("tokenizer", ["standard"])
     @pytest.mark.parametrize("offset", [10, 0])
     def test_full_text_search_default(
-            self, offset, tokenizer, expr, enable_inverted_index, enable_partition_key, empty_percent, index_type, nq
+        self,
+        offset,
+        tokenizer,
+        expr,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        nq,
     ):
         """
         target: test full text search
@@ -2194,8 +2311,12 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -2208,19 +2329,23 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         most_freq_word = word_freq.most_common(10)
         tokens = [item[0] for item in most_freq_word]
         if len(tokens) == 0:
-            log.info(f"empty tokens, add a dummy token")
+            log.info("empty tokens, add a dummy token")
             tokens = ["dummy"]
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.flush()
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -2230,15 +2355,15 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
         limit = 100
         token = random.choice(tokens)
-        search_data = [fake.text().lower() + f" {token} "  for _ in range(nq)]
+        search_data = [fake.text().lower() + f" {token} " for _ in range(nq)]
         if expr == "text_match":
             filter = f"TEXT_MATCH(text, '{token}')"
             res, _ = collection_w.query(
@@ -2262,7 +2387,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             param={},
             limit=limit + offset,
             offset=0,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
         full_res_id_list = []
         for i in range(nq):
             res = full_res_list[i]
@@ -2278,7 +2404,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             param={},
             limit=limit,
             offset=offset,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
 
         # verify correctness
         for i in range(nq):
@@ -2301,10 +2428,13 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 if expr == "id_range":
                     assert _id < data_size // 2
                 # verify search result has overlap with search text
-                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(search_text, result_text, language=language)
+                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(
+                    search_text, result_text, language=language
+                )
                 log.info(f"overlap {overlap}")
-                assert len(
-                    overlap) > 0, f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
+                assert len(overlap) > 0, (
+                    f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
+                )
 
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.parametrize("nq", [2])
@@ -2317,8 +2447,17 @@ class TestSearchWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("tokenizer", ["jieba"])
     @pytest.mark.parametrize("inverted_index_algo", ct.inverted_index_algo)
     def test_full_text_search_with_jieba_tokenizer(
-            self, offset, tokenizer, expr, enable_inverted_index, enable_partition_key,
-            empty_percent, index_type, nq, inverted_index_algo):
+        self,
+        offset,
+        tokenizer,
+        expr,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        nq,
+        inverted_index_algo,
+    ):
         """
         target: test full text search
         method: 1. enable full text search with jieba tokenizer and insert data with varchar
@@ -2332,7 +2471,7 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             lang_type = "english"
 
         analyzer_params = {
-                "type": lang_type,
+            "type": lang_type,
         }
         dim = 128
         fields = [
@@ -2394,8 +2533,12 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -2410,19 +2553,23 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             if len(item[0]) == 2:
                 tokens.append(item[0])
         if len(tokens) == 0:
-            log.info(f"empty tokens, add a dummy token")
+            log.info("empty tokens, add a dummy token")
             tokens = ["dummy"]
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.flush()
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -2432,9 +2579,9 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                    "inverted_index_algo": inverted_index_algo
-                }
-            }
+                    "inverted_index_algo": inverted_index_algo,
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
@@ -2465,7 +2612,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             param={},
             limit=limit + offset,
             offset=0,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
         full_res_id_list = []
         for i in range(nq):
             res = full_res_list[i]
@@ -2481,7 +2629,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             param={},
             limit=limit,
             offset=offset,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
 
         # verify correctness
         for i in range(nq):
@@ -2504,11 +2653,13 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 if expr == "id_range":
                     assert _id < data_size // 2
                 # verify search result has overlap with search text
-                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(search_text, result_text, language=language)
+                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(
+                    search_text, result_text, language=language
+                )
                 log.info(f"overlap {overlap}")
-                assert len(
-                    overlap) > 0, f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
-
+                assert len(overlap) > 0, (
+                    f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
+                )
 
     @pytest.mark.tags(CaseLabel.L0)
     @pytest.mark.parametrize("nq", [2])
@@ -2520,7 +2671,15 @@ class TestSearchWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("tokenizer", ["standard"])
     @pytest.mark.parametrize("offset", [0])
     def test_full_text_search_for_growing_segment(
-            self, offset, tokenizer, expr, enable_inverted_index, enable_partition_key, empty_percent, index_type, nq
+        self,
+        offset,
+        tokenizer,
+        expr,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        nq,
     ):
         """
         target: test full text search
@@ -2592,8 +2751,12 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -2606,11 +2769,15 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         most_freq_word = word_freq.most_common(10)
         tokens = [item[0] for item in most_freq_word]
         if len(tokens) == 0:
-            log.info(f"empty tokens, add a dummy token")
+            log.info("empty tokens, add a dummy token")
             tokens = ["dummy"]
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -2620,8 +2787,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
@@ -2629,12 +2796,14 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         limit = 100
-        search_data = [fake.text().lower() + " " + random.choice(tokens) for _ in range(nq)]
+        search_data = [
+            fake.text().lower() + " " + random.choice(tokens) for _ in range(nq)
+        ]
         if expr == "text_match":
             filter = f"TextMatch(text, '{tokens[0]}')"
             res, _ = collection_w.query(
@@ -2658,7 +2827,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             param={},
             limit=limit + offset,
             offset=0,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
         full_res_id_list = []
         for i in range(nq):
             res = full_res_list[i]
@@ -2674,7 +2844,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             param={},
             limit=limit,
             offset=offset,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
 
         # verify correctness
         for i in range(nq):
@@ -2697,10 +2868,13 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 if expr == "id_range":
                     assert _id < data_size // 2
                 # verify search result has overlap with search text
-                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(search_text, result_text, language=language)
+                overlap, word_freq_a, word_freq_b = cf.check_token_overlap(
+                    search_text, result_text, language=language
+                )
                 log.info(f"overlap {overlap}")
-                assert len(
-                    overlap) > 0, f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
+                assert len(overlap) > 0, (
+                    f"query text: {search_text}, \ntext: {result_text} \n overlap: {overlap} \n word freq a: {word_freq_a} \n word freq b: {word_freq_b}\n result: {r}"
+                )
 
     @pytest.mark.tags(CaseLabel.L1)
     @pytest.mark.parametrize("nq", [2])
@@ -2711,7 +2885,14 @@ class TestSearchWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("expr", [None])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_full_text_search_with_range_search(
-            self, tokenizer, expr, enable_inverted_index, enable_partition_key, empty_percent, index_type, nq
+        self,
+        tokenizer,
+        expr,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        nq,
     ):
         """
         target: test full text search
@@ -2783,8 +2964,12 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -2796,18 +2981,22 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         word_freq = cf.analyze_documents(texts, language=language)
         tokens = list(word_freq.keys())
         if len(tokens) == 0:
-            log.info(f"empty tokens, add a dummy token")
+            log.info("empty tokens, add a dummy token")
             tokens = ["dummy"]
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -2817,8 +3006,8 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
@@ -2830,10 +3019,10 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         res_list, _ = collection_w.search(
             data=search_data,
             anns_field="text_sparse_emb",
-            param={
-            },
+            param={},
             limit=limit,  # get a wider range of search result
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
 
         distance_list = []
         for i in range(nq):
@@ -2850,13 +3039,10 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         res_list, _ = collection_w.search(
             data=search_data,
             anns_field="text_sparse_emb",
-            param={
-                "params": {
-                    "radius": low, "range_filter": high
-                }
-            },
+            param={"params": {"radius": low, "range_filter": high}},
             limit=limit,
-            output_fields=["id", "text"])
+            output_fields=["id", "text"],
+        )
         # verify correctness
         for i in range(nq):
             log.info(f"res: {len(res_list[i])}")
@@ -2876,7 +3062,14 @@ class TestSearchWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("expr", [None])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_full_text_search_with_search_iterator(
-            self, tokenizer, expr, enable_inverted_index, enable_partition_key, empty_percent, index_type, nq
+        self,
+        tokenizer,
+        expr,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        nq,
     ):
         """
         target: test full text search
@@ -2948,8 +3141,12 @@ class TestSearchWithFullTextSearch(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -2961,18 +3158,22 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         word_freq = cf.analyze_documents(texts, language=language)
         tokens = list(word_freq.keys())
         if len(tokens) == 0:
-            log.info(f"empty tokens, add a dummy token")
+            log.info("empty tokens, add a dummy token")
             tokens = ["dummy"]
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -2982,13 +3183,15 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
         collection_w.load()
-        search_data = [fake.text().lower() + " " + random.choice(tokens) for _ in range(nq)]
+        search_data = [
+            fake.text().lower() + " " + random.choice(tokens) for _ in range(nq)
+        ]
         log.info(f"search data: {search_data}")
         # get distance with search data
         batch_size = 100
@@ -3001,7 +3204,7 @@ class TestSearchWithFullTextSearch(TestcaseBase):
                 "metric_type": "BM25",
             },
             output_fields=["id", "text"],
-            limit=limit
+            limit=limit,
         )
         iter_result = []
         while True:
@@ -3014,6 +3217,7 @@ class TestSearchWithFullTextSearch(TestcaseBase):
         for r in iter_result[:-1]:
             assert r == batch_size
 
+
 class TestSearchWithFullTextSearchNegative(TestcaseBase):
     """
     ******************************************************************
@@ -3030,7 +3234,13 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.parametrize("tokenizer", ["standard"])
     @pytest.mark.xfail(reason="issue: https://github.com/milvus-io/milvus/issues/37022")
     def test_search_for_full_text_search_with_empty_string_search_data(
-            self, tokenizer, enable_inverted_index, enable_partition_key, empty_percent, index_type, invalid_search_data
+        self,
+        tokenizer,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        invalid_search_data,
     ):
         """
         target: test full text search
@@ -3092,17 +3302,18 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
         )
         fake = fake_en
         if tokenizer == "jieba":
-            language = "zh"
             fake = fake_zh
-        else:
-            language = "en"
 
         data = [
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
@@ -3113,13 +3324,17 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -3129,8 +3344,8 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
@@ -3150,7 +3365,6 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
         for r in res:
             assert len(r) == 0
 
-
     @pytest.mark.tags(CaseLabel.L1)
     @pytest.mark.parametrize("empty_percent", [0])
     @pytest.mark.parametrize("enable_partition_key", [True])
@@ -3159,7 +3373,13 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
     @pytest.mark.parametrize("invalid_search_data", ["sparse_vector", "dense_vector"])
     @pytest.mark.parametrize("tokenizer", ["standard"])
     def test_search_for_full_text_search_with_invalid_search_data(
-            self, tokenizer, enable_inverted_index, enable_partition_key, empty_percent, index_type, invalid_search_data
+        self,
+        tokenizer,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        invalid_search_data,
     ):
         """
         target: test full text search
@@ -3230,32 +3450,39 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "emb": [random.random() for _ in range(dim)],
             }
             for i in range(data_size)
         ]
         df = pd.DataFrame(data)
-        corpus = df["text"].to_list()
         log.info(f"dataframe\n{df}")
         texts = df["text"].to_list()
         word_freq = cf.analyze_documents(texts, language=language)
         tokens = list(word_freq.keys())
         if len(tokens) == 0:
-            log.info(f"empty tokens, add a dummy token")
+            log.info("empty tokens, add a dummy token")
             tokens = ["dummy"]
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "text_sparse_emb",
@@ -3265,8 +3492,8 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                }
-            }
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
@@ -3274,12 +3501,18 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
         nq = 2
         limit = 100
         if invalid_search_data == "sparse_vector":
-            search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
+            search_data = cf.gen_vectors(
+                nb=nq, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR
+            )
         else:
-            search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.FLOAT_VECTOR)
+            search_data = cf.gen_vectors(
+                nb=nq, dim=1000, vector_data_type=DataType.FLOAT_VECTOR
+            )
         log.info(f"search data: {search_data}")
-        error = {ct.err_code: 65535,
-                 ct.err_msg: "please provide varchar/text for BM25 Function based search"}
+        error = {
+            ct.err_code: 65535,
+            ct.err_msg: "please provide varchar/text for BM25 Function based search",
+        }
         collection_w.search(
             data=search_data,
             anns_field="text_sparse_emb",
@@ -3287,7 +3520,7 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
             limit=limit,
             output_fields=["id", "text"],
             check_task=CheckTasks.err_res,
-            check_items=error
+            check_items=error,
         )
 
 
@@ -3307,7 +3540,13 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
     @pytest.mark.parametrize("tokenizer", ["standard"])
     @pytest.mark.parametrize("inverted_index_algo", ct.inverted_index_algo)
     def test_hybrid_search_with_full_text_search(
-            self, tokenizer, enable_inverted_index, enable_partition_key, empty_percent, index_type, inverted_index_algo
+        self,
+        tokenizer,
+        enable_inverted_index,
+        enable_partition_key,
+        empty_percent,
+        index_type,
+        inverted_index_algo,
     ):
         """
         target: test full text search
@@ -3374,11 +3613,17 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
             {
                 "id": i,
                 "word": fake.word().lower() if random.random() >= empty_percent else "",
-                "sentence": fake.sentence().lower() if random.random() >= empty_percent else "",
-                "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
+                "sentence": fake.sentence().lower()
+                if random.random() >= empty_percent
+                else "",
+                "paragraph": fake.paragraph().lower()
+                if random.random() >= empty_percent
+                else "",
                 "text": fake.text().lower() if random.random() >= empty_percent else "",
                 "dense_emb": [random.random() for _ in range(dim)],
-                "neural_sparse_emb": cf.gen_vectors(nb=1, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0],
+                "neural_sparse_emb": cf.gen_vectors(
+                    nb=1, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR
+                )[0],
             }
             for i in range(data_size)
         ]
@@ -3387,13 +3632,17 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
         batch_size = 5000
         for i in range(0, len(df), batch_size):
             collection_w.insert(
-                data[i: i + batch_size]
+                data[i : i + batch_size]
                 if i + batch_size < len(df)
-                else data[i: len(df)]
+                else data[i : len(df)]
             )
         collection_w.create_index(
             "dense_emb",
-            {"index_type": "HNSW", "metric_type": "L2", "params": {"M": 16, "efConstruction": 500}},
+            {
+                "index_type": "HNSW",
+                "metric_type": "L2",
+                "params": {"M": 16, "efConstruction": 500},
+            },
         )
         collection_w.create_index(
             "neural_sparse_emb",
@@ -3407,9 +3656,9 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
                 "params": {
                     "bm25_k1": 1.5,
                     "bm25_b": 0.75,
-                    "inverted_index_algo": inverted_index_algo
-                }
-            }
+                    "inverted_index_algo": inverted_index_algo,
+                },
+            },
         )
         if enable_inverted_index:
             collection_w.create_index("text", {"index_type": "INVERTED"})
@@ -3429,7 +3678,9 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
             limit=limit,
         )
         sparse_search = AnnSearchRequest(
-            data=cf.gen_vectors(nb=nq, dim=dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR),
+            data=cf.gen_vectors(
+                nb=nq, dim=dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR
+            ),
             anns_field="neural_sparse_emb",
             param={},
             limit=limit,
@@ -3439,7 +3690,7 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
             reqs=[bm25_search, dense_search, sparse_search],
             rerank=WeightedRanker(0.5, 0.5, 0.5),
             limit=limit,
-            output_fields=["id", "text"]
+            output_fields=["id", "text"],
         )
         assert len(res_list) == nq
         # check the result correctness
@@ -3447,3 +3698,758 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
             log.info(f"res length: {len(res_list[i])}")
             assert len(res_list[i]) == limit
 
+
+class TestFullTextSearchMultiAnalyzer(TestcaseBase):
+    """
+    Comprehensive tests for multi_analyzer_params (multi-analyzer BM25) functionality in Milvus.
+    Covers schema creation, data insertion, indexing, searching, alias/default/fallback, edge cases, and more.
+    """
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_create_collection_with_multi_analyzer(self):
+        """
+        target: test create collection with multi_analyzer_params
+        method: create collection with multi_analyzer_params
+        expected: create collection successfully
+        """
+        # Define multi_analyzer_params
+        multi_analyzer_params = {
+            "by_field": "language",
+            "analyzers": {
+                "en": {"type": "english"},
+                "zh": {"type": "chinese"},
+                "default": {"tokenizer": "icu"},
+            },
+            "alias": {"chinese": "zh", "eng": "en"},
+        }
+        # Define fields
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=multi_analyzer_params,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Multi-analyzer BM25 test collection"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
+        res, _ = collection_w.describe()
+        assert len(res["functions"]) == 1
+        assert res["fields"][2]["name"] == "article_content"
+        assert "multi_analyzer_params" in res["fields"][2]["params"]
+        assert (
+            json.loads(res["fields"][2]["params"]["multi_analyzer_params"])
+            == multi_analyzer_params
+        )
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_insert_and_search_with_multi_analyzer(self):
+        """
+        target: test insert and search with multi_analyzer
+        method: create collection, insert multilingual data, create index, search with analyzers
+        expected: insert and search works, correct analyzer is used
+        """
+        multi_analyzer_params = {
+            "by_field": "language",
+            "analyzers": {
+                "en": {"type": "english"},
+                "zh": {"type": "chinese"},
+                "default": {"tokenizer": "standard"},
+            },
+            "alias": {"chinese": "zh", "eng": "en"},
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=multi_analyzer_params,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Multi-analyzer BM25 test collection"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
+        # Prepare multilingual data
+        language_samples = {
+            "en": ["The quick brown fox.", "Machine learning is fun."],
+            "zh": ["自然语言处理很重要。", "人工智能改变世界。"],
+            "fr": ["L'intelligence artificielle.", "Traitement du langage naturel."],
+            "unknown": ["Some random text for default analyzer."],
+        }
+        data = []
+        idx = 0
+        for lang, samples in language_samples.items():
+            for s in samples:
+                data.append({"doc_id": idx, "language": lang, "article_content": s})
+                idx += 1
+        collection_w.insert(data)
+        fake_map = {
+            "en": fake_en,
+            "zh": fake_zh,
+            "de": fake_de,
+            "jp": fake_jp,
+            "unknown": fake_en,
+        }
+        add_data = []
+        for doc_id in range(idx, 3000):
+            lang = random.choice(["en", "zh", "de", "jp", "unknown"])
+            content = fake_map[lang].sentence()
+            add_data.append(
+                {"doc_id": doc_id, "language": lang, "article_content": content}
+            )
+        collection_w.insert(add_data)
+        collection_w.create_index(
+            "bm25_sparse_vector",
+            {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "BM25"},
+        )
+        collection_w.load()
+        # Search with different analyzers
+        analyzer_tests = [
+            {"language": "en", "query": "machine learning", "analyzer_name": "en"},
+            {"language": "zh", "query": "自然语言处理", "analyzer_name": "zh"},
+            {
+                "language": "fr",
+                "query": "intelligence artificielle",
+                "analyzer_name": "default",
+            },
+            {"language": "unknown", "query": "random text", "analyzer_name": "default"},
+        ]
+        for test in analyzer_tests:
+            search_params = {
+                "metric_type": "BM25",
+                "analyzer_name": test["analyzer_name"],
+            }
+            results, _ = collection_w.search(
+                data=[test["query"]],
+                anns_field="bm25_sparse_vector",
+                param=search_params,
+                output_fields=["doc_id", "language", "article_content"],
+                limit=5,
+            )
+            assert len(results) == 1
+            assert len(results[0]) > 0
+            log.info(
+                f"Query '{test['query']}' with analyzer '{test['analyzer_name']}' returned {len(results[0])} results"
+            )
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_multi_analyzer_fallback(self):
+        """
+        target: test fallback to default analyzer
+        method: insert data with languages not in analyzers, search without analyzer_name
+        expected: fallback to default analyzer
+        """
+        multi_analyzer_params = {
+            "by_field": "language",
+            "analyzers": {
+                "en": {"type": "english"},
+                "zh": {"type": "chinese"},
+                "default": {"tokenizer": "standard"},
+            },
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=multi_analyzer_params,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Multi-analyzer fallback test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
+        data = [
+            {
+                "doc_id": 1,
+                "language": "en",
+                "article_content": "English text for testing.",
+            },
+            {"doc_id": 2, "language": "zh", "article_content": "中文测试文本。"},
+            {
+                "doc_id": 3,
+                "language": "fr",
+                "article_content": "Texte français pour les tests.",
+            },
+            {
+                "doc_id": 4,
+                "language": "de",
+                "article_content": "Deutscher Text zum Testen.",
+            },
+            {
+                "doc_id": 5,
+                "language": "unknown",
+                "article_content": "Text in unknown language.",
+            },
+        ]
+        collection_w.insert(data)
+        collection_w.create_index(
+            "bm25_sparse_vector",
+            {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "BM25"},
+        )
+        collection_w.load()
+        fallback_tests = [
+            {"language": "fr", "query": "texte français"},
+            {"language": "de", "query": "deutscher text"},
+            {"language": "unknown", "query": "unknown language"},
+        ]
+        for test in fallback_tests:
+            search_params = {"metric_type": "BM25"}
+            results, _ = collection_w.search(
+                data=[test["query"]],
+                anns_field="bm25_sparse_vector",
+                param=search_params,
+                output_fields=["doc_id", "language", "article_content"],
+                limit=5,
+            )
+            assert len(results) == 1
+            assert len(results[0]) > 0
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_multi_analyzer_alias(self):
+        """
+        target: test alias for multi analyzer
+        method: insert data with languages in alias
+        expected: analyzer should be resolved correctly
+        """
+        stop_words = ["a", "an", "the", "of", "to", " "]
+        multi_analyzer_params = {
+            "by_field": "language",
+            "analyzers": {
+                "en": {
+                    "tokenizer": "standard",
+                    "filter": [
+                        {
+                            "type": "stop",  # Specifies 'stop' as the filter type
+                            "stop_words": stop_words,  # Customizes stop words for this filter type
+                        }
+                    ],
+                },
+                "zh": {
+                    "tokenizer": "jieba",
+                    "filter": [
+                        {
+                            "type": "stop",  # Specifies 'stop' as the filter type
+                            "stop_words": stop_words,  # Customizes stop words for this filter type
+                        }
+                    ],
+                },
+                "default": {"tokenizer": "icu"},
+            },
+            "alias": {"chinese": "zh", "eng": "en"},
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=8192,
+                enable_analyzer=True,
+                multi_analyzer_params=multi_analyzer_params,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Multi-analyzer fallback test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
+        data = [
+            {
+                "doc_id": 1,
+                "language": "en",
+                "article_content": "English text for testing",
+            },
+            {
+                "doc_id": 2,
+                "language": "eng",
+                "article_content": "English text for testing"
+                + " ".join(stop_words * 5),
+            },
+            {"doc_id": 3, "language": "zh", "article_content": "中文测试文本 "},
+            {
+                "doc_id": 4,
+                "language": "chinese",
+                "article_content": "中文测试文本 " + " ".join(stop_words * 5),
+            },
+            {
+                "doc_id": 5,
+                "language": "fr",
+                "article_content": "Texte français pour les tests.",
+            },
+            {
+                "doc_id": 6,
+                "language": "de",
+                "article_content": "Deutscher Text zum Testen.",
+            },
+            {
+                "doc_id": 7,
+                "language": "unknown",
+                "article_content": "Text in unknown language.",
+            },
+            {
+                "doc_id": 8,
+                "language": "default",
+                "article_content": " ".join(stop_words * 5),
+            },
+        ]
+        # " ." * 1000 will be removed in en and zh analyzer, but will be kept in icu analyzer
+        # if chinese and eng are not go to the alias as expected, then doc is 8 will be returned
+        collection_w.insert(data)
+        collection_w.create_index(
+            "bm25_sparse_vector",
+            {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "BM25"},
+        )
+        collection_w.load()
+        alias_tests = [
+            {
+                "analyzer_name": "eng",
+                "query": "English text for testing." + " ".join(stop_words * 10),
+            },
+            {
+                "analyzer_name": "chinese",
+                "query": "中文测试文本。" + " ".join(stop_words * 10),
+            },
+        ]
+
+        for test in alias_tests:
+            search_params = {
+                "metric_type": "BM25",
+                "analyzer_name": test["analyzer_name"],
+            }
+            results, _ = collection_w.search(
+                data=[test["query"]],
+                anns_field="bm25_sparse_vector",
+                param=search_params,
+                output_fields=["doc_id", "language", "article_content"],
+                limit=10,
+            )
+            log.info(test)
+            log.info(results)
+            assert len(results) == 1
+            assert len(results[0]) > 0
+            if test["analyzer_name"] == "eng":
+                # return id is 1,2
+                assert results[0][0]["doc_id"] in [1, 2]
+                assert results[0][1]["doc_id"] in [1, 2]
+            elif test["analyzer_name"] == "chinese":
+                # return id is 3,4
+                assert results[0][0]["doc_id"] in [3, 4]
+                assert results[0][1]["doc_id"] in [3, 4]
+
+        alias_tests = [
+            {"analyzer_name": "icu", "query": " ".join(stop_words * 10)},
+            {"analyzer_name": "default", "query": " ".join(stop_words * 10)},
+        ]
+        for test in alias_tests:
+            search_params = {
+                "metric_type": "BM25",
+                "analyzer_name": test["analyzer_name"],
+            }
+            results, _ = collection_w.search(
+                data=[test["query"]],
+                anns_field="bm25_sparse_vector",
+                param=search_params,
+                output_fields=["doc_id", "language", "article_content"],
+                limit=10,
+            )
+            log.info(test)
+            log.info(results)
+            assert len(results) == 1
+            assert len(results[0]) > 0
+            for r in results[0]:
+                assert r["doc_id"] not in [1, 2, 3, 4]
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_multi_analyzer_correctness(self):
+        """
+        target: test multi_analyzer correctness
+        method: create collection, insert and search using utility
+        expected: utility workflow works as expected
+        """
+        from utils.util_fts import FTSMultiAnalyzerChecker
+
+        self._connect()
+        client = self.client
+        c_name = cf.gen_unique_str(prefix)
+        language_field = "language"
+        text_field = "article_content"
+        ft_checker = FTSMultiAnalyzerChecker(
+            collection_name=c_name,
+            language_field_name=language_field,
+            text_field_name=text_field,
+            client=client,
+        )
+        ft_checker.init_collection()
+        language_list = ["en", "zh", "fr", "jp"]
+        data = ft_checker.generate_test_data(num_rows=100, lang_list=language_list)
+        original_data, tokenized_data = ft_checker.insert_data(data)
+        original_data = pd.DataFrame(original_data)
+        ft_checker.create_index()
+        sample_data = random.sample(tokenized_data, 10)
+        for item in sample_data:
+            doc_id = item["doc_id"]
+            tokenized_query = item[text_field]
+            original_query = original_data.loc[
+                original_data["doc_id"] == doc_id, text_field
+            ].iloc[0]
+            language = item[language_field]
+            res, mock_res = ft_checker.search(
+                original_query, tokenized_query, language, limit=5
+            )
+            res_set = set([r["doc_id"] for r in res[0]])
+            mock_res_set = set([r["doc_id"] for r in mock_res[0]])
+            res_diff = res_set - mock_res_set
+            mock_res_diff = mock_res_set - res_set
+            if res_diff or mock_res_diff:
+                log.error(f"result diff: {res_diff}, {mock_res_diff}")
+                assert False, (
+                    f"result diff: {res_diff} in origin but not in mock, {mock_res_diff} in mock but not in origin"
+                )
+
+
+class TestFullTextSearchMultiAnalyzerInvalid(TestcaseBase):
+    """
+    Cases for invalid multi_analyzer_params, should raise exceptions.
+    """
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_missing_by_field(self):
+        """
+        target: test missing by_field in multi_analyzer_params
+        method: create collection without by_field
+        expected: collection creation should fail because of missing by_field
+        """
+        missing_by_field = {
+            "analyzers": {
+                "en": {"type": "english"},
+                "default": {"tokenizer": "standard"},
+            }
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=missing_by_field,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Invalid multi-analyzer test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        collection_name = cf.gen_unique_str(prefix)
+        with pytest.raises(Exception):
+            self.init_collection_wrap(name=collection_name, schema=schema)
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_by_field_not_exist(self):
+        """
+        target: test by_field not exist in multi_analyzer_params
+        method: create collection with by_field not exist
+        expected: collection creation should fail because of by_field not exist
+        """
+        missing_by_field = {
+            "by_field": "not_exist",
+            "analyzers": {
+                "en": {"type": "english"},
+                "default": {"tokenizer": "standard"},
+            },
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=missing_by_field,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Invalid multi-analyzer test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        with pytest.raises(Exception):
+            self.init_collection_wrap(name=c_name, schema=schema)
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_by_field_is_nullable(self):
+        """
+        target: test by_field is nullable in multi_analyzer_params
+        method: create collection with by_field enable nullable
+        expected: collection creation should success because by_field can be nullable
+        """
+        multi_analyzer_params = {
+            "by_field": "language",
+            "analyzers": {
+                "en": {"type": "english"},
+                "default": {"tokenizer": "standard"},
+            },
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16, nullable=True),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=multi_analyzer_params,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Invalid multi-analyzer test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
+
+        data = [
+            {
+                "doc_id": 1,
+                "language": "en",
+                "article_content": "English text for testing.",
+            },
+            {"doc_id": 2, "language": "zh", "article_content": "中文测试文本。"},
+            {
+                "doc_id": 3,
+                "language": "fr",
+                "article_content": "Texte français pour les tests.",
+            },
+            {
+                "doc_id": 4,
+                "language": "de",
+                "article_content": "Deutscher Text zum Testen.",
+            },
+            {
+                "doc_id": 5,
+                "language": "unknown",
+                "article_content": "Text in unknown language.",
+            },
+            {"doc_id": 6, "language": None, "article_content": "nullable test"},
+            {"doc_id": 7, "language": None, "article_content": "nullable test"},
+        ]
+        collection_w.insert(data)
+        collection_w.create_index(
+            "bm25_sparse_vector",
+            {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "BM25"},
+        )
+        collection_w.load()
+        query_tests = [
+            {"analyzer_name": "", "query": "texte français"},
+            {"analyzer_name": "de", "query": "deutscher text"},
+            {"analyzer_name": "unknown", "query": "unknown language"},
+            {"analyzer_name": None, "query": "nullable language"},
+        ]
+        for test in query_tests:
+            search_params = {
+                "metric_type": "BM25",
+                "analyzer_name": test["analyzer_name"],
+            }
+            results, _ = collection_w.search(
+                data=[test["query"]],
+                anns_field="bm25_sparse_vector",
+                param=search_params,
+                output_fields=["doc_id", "language", "article_content"],
+                limit=5,
+            )
+            assert len(results) == 1
+            assert len(results[0]) > 0
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_text_field_is_nullable(self):
+        """
+        target: test text not exist in multi_analyzer_params
+        method: create collection with by_field not exist
+        expected: collection creation should fail because text field is nullable
+        """
+        multi_analyzer_params = {
+            "by_field": "language",
+            "analyzers": {
+                "en": {"type": "english"},
+                "default": {"tokenizer": "standard"},
+            },
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=multi_analyzer_params,
+                nullable=True,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Invalid multi-analyzer test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        error = {
+            ct.err_code: 65535,
+            ct.err_msg: "function input field cannot be nullable",
+        }
+        self.init_collection_wrap(
+            name=c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error
+        )
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_missing_default_analyzer(self):
+        """
+        target: test missing default analyzer in multi_analyzer_params
+        method: create collection without default analyzer
+        expected: collection creation should fail because of no default analyzer
+        """
+        missing_default = {
+            "by_field": "language",
+            "analyzers": {"en": {"type": "english"}, "zh": {"type": "chinese"}},
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=missing_default,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Invalid multi-analyzer test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        collection_name = cf.gen_unique_str(prefix)
+        with pytest.raises(Exception):
+            self.init_collection_wrap(name=collection_name, schema=schema)
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_alias_point_not_exist_analyzer(self):
+        """
+        target: test alias point to nonexist analyzer
+        method: create collection with alias pointing to not exist analyzer
+        expected: collection creation should success because fallback to default analyzer
+        """
+        missing_default = {
+            "by_field": "language",
+            "analyzers": {
+                "en": {"type": "english"},
+                "zh": {"type": "chinese"},
+                "default": {"type": "english"},
+            },
+            "alias": {"chinese": "zh", "eng": "en", "fr": "fr"},
+        }
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(name="language", dtype=DataType.VARCHAR, max_length=16),
+            FieldSchema(
+                name="article_content",
+                dtype=DataType.VARCHAR,
+                max_length=1024,
+                enable_analyzer=True,
+                multi_analyzer_params=missing_default,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Invalid multi-analyzer test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=["article_content"],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        c_name = cf.gen_unique_str(prefix)
+        self.init_collection_wrap(name=c_name, schema=schema)
diff --git a/tests/python_client/utils/util_fts.py b/tests/python_client/utils/util_fts.py
new file mode 100644
index 0000000000..c206bbe594
--- /dev/null
+++ b/tests/python_client/utils/util_fts.py
@@ -0,0 +1,356 @@
+import random
+import time
+import logging
+from typing import List, Dict, Optional, Tuple
+import pandas as pd
+from faker import Faker
+from pymilvus import (
+    FieldSchema,
+    CollectionSchema,
+    DataType,
+    Function,
+    FunctionType,
+    Collection,
+    connections,
+)
+from pymilvus import MilvusClient
+
+logger = logging.getLogger(__name__)
+
+
+class FTSMultiAnalyzerChecker:
+    """
+    Full-text search utility class providing various utility methods for full-text search testing.
+    Includes schema construction, test data generation, index creation, and more.
+    """
+
+    # Constant definitions
+    DEFAULT_TEXT_MAX_LENGTH = 8192
+    DEFAULT_LANG_MAX_LENGTH = 16
+    DEFAULT_DOC_ID_START = 100
+
+    # Faker multilingual instances as class attributes to avoid repeated creation
+    fake_en = Faker("en_US")
+    fake_zh = Faker("zh_CN")
+    fake_fr = Faker("fr_FR")
+    fake_jp = Faker("ja_JP")
+
+    def __init__(
+        self,
+        collection_name: str,
+        language_field_name: str,
+        text_field_name: str,
+        multi_analyzer_params: Optional[Dict] = None,
+        client: Optional[MilvusClient] = None,
+    ):
+        self.collection_name = collection_name
+        self.mock_collection_name = collection_name + "_mock"
+        self.language_field_name = language_field_name
+        self.text_field_name = text_field_name
+        self.multi_analyzer_params = (
+            multi_analyzer_params
+            if multi_analyzer_params is not None
+            else {
+                "by_field": self.language_field_name,
+                "analyzers": {
+                    "en": {"type": "english"},
+                    "zh": {"type": "chinese"},
+                    "icu": {
+                        "tokenizer": "icu",
+                        "filter": [{"type": "stop", "stop_words": [" "]}],
+                    },
+                    "default": {"tokenizer": "whitespace"},
+                },
+                "alias": {"chinese": "zh", "eng": "en", "fr": "icu", "jp": "icu"},
+            }
+        )
+        self.mock_multi_analyzer_params = {
+            "by_field": self.language_field_name,
+            "analyzers": {"default": {"tokenizer": "whitespace"}},
+        }
+        self.client = client
+        self.collection = None
+        self.mock_collection = None
+
+    def resolve_analyzer(self, lang: str) -> str:
+        """
+        Return the analyzer name according to the language.
+        Args:
+            lang (str): Language identifier
+        Returns:
+            str: Analyzer name
+        """
+        if lang in self.multi_analyzer_params["analyzers"]:
+            return lang
+        if lang in self.multi_analyzer_params.get("alias", {}):
+            return self.multi_analyzer_params["alias"][lang]
+        return "default"
+
+    def build_schema(self, multi_analyzer_params: dict) -> CollectionSchema:
+        """
+        Build a collection schema with multi-analyzer parameters.
+        Args:
+            multi_analyzer_params (dict): Analyzer parameters
+        Returns:
+            CollectionSchema: Constructed collection schema
+        """
+        fields = [
+            FieldSchema(name="doc_id", dtype=DataType.INT64, is_primary=True),
+            FieldSchema(
+                name=self.language_field_name,
+                dtype=DataType.VARCHAR,
+                max_length=self.DEFAULT_LANG_MAX_LENGTH,
+            ),
+            FieldSchema(
+                name=self.text_field_name,
+                dtype=DataType.VARCHAR,
+                max_length=self.DEFAULT_TEXT_MAX_LENGTH,
+                enable_analyzer=True,
+                multi_analyzer_params=multi_analyzer_params,
+            ),
+            FieldSchema(name="bm25_sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
+        ]
+        schema = CollectionSchema(
+            fields=fields, description="Multi-analyzer BM25 schema test"
+        )
+        bm25_func = Function(
+            name="bm25",
+            function_type=FunctionType.BM25,
+            input_field_names=[self.text_field_name],
+            output_field_names=["bm25_sparse_vector"],
+        )
+        schema.add_function(bm25_func)
+        return schema
+
+    def init_collection(self) -> None:
+        """
+        Initialize Milvus collections, delete if exists first.
+        """
+        try:
+            if self.client.has_collection(self.collection_name):
+                self.client.drop_collection(self.collection_name)
+            if self.client.has_collection(self.mock_collection_name):
+                self.client.drop_collection(self.mock_collection_name)
+            self.collection = Collection(
+                name=self.collection_name,
+                schema=self.build_schema(self.multi_analyzer_params),
+            )
+            self.mock_collection = Collection(
+                name=self.mock_collection_name,
+                schema=self.build_schema(self.mock_multi_analyzer_params),
+            )
+        except Exception as e:
+            logger.error(f"collection init failed: {e}")
+            raise
+
+    def get_tokens_by_analyzer(self, text: str, analyzer_params: dict) -> List[str]:
+        """
+        Tokenize text according to analyzer parameters.
+        Args:
+            text (str): Text to be tokenized
+            analyzer_params (dict): Analyzer parameters
+        Returns:
+            List[str]: List of tokenized text
+        """
+        try:
+            res = self.client.run_analyzer(text, analyzer_params)
+            # Filter out tokens that are just whitespace
+            return [token for token in res.tokens if token.strip()]
+        except Exception as e:
+            logger.error(f"Tokenization failed: {e}")
+            return []
+
+    def generate_test_data(
+        self, num_rows: int = 3000, lang_list: Optional[List[str]] = None
+    ) -> List[Dict]:
+        """
+        Generate test data according to the schema, row count and language list.
+        Each row will contain language, article content and other fields.
+        Args:
+            num_rows (int): Number of data rows to generate
+            lang_list (Optional[List[str]]): List of languages
+        Returns:
+            List[Dict]: Generated test data list
+        """
+        if lang_list is None:
+            lang_list = ["en", "eng", "zh", "fr", "chinese", "jp", ""]
+        data = []
+        for i in range(num_rows):
+            lang = random.choice(lang_list)
+            # Generate article content according to language
+            if lang in ("en", "eng"):
+                content = self.fake_en.sentence()
+            elif lang in ("zh", "chinese"):
+                content = self.fake_zh.sentence()
+            elif lang == "fr":
+                content = self.fake_fr.sentence()
+            elif lang == "jp":
+                content = self.fake_jp.sentence()
+            else:
+                content = ""
+            row = {
+                "doc_id": i + self.DEFAULT_DOC_ID_START,
+                self.language_field_name: lang,
+                self.text_field_name: content,
+            }
+            data.append(row)
+        return data
+
+    def tokenize_data_by_multi_analyzer(
+        self, data_list: List[Dict], verbose: bool = False
+    ) -> List[Dict]:
+        """
+        Tokenize data according to multi-analyzer parameters.
+        Args:
+            data_list (List[Dict]): Data list
+            verbose (bool): Whether to print detailed information
+        Returns:
+            List[Dict]: Tokenized data list
+        """
+        data_list_tokenized = []
+        for row in data_list:
+            lang = row.get(self.language_field_name, None)
+            content = row.get(self.text_field_name, "")
+            doc_analyzer = self.resolve_analyzer(lang)
+            doc_analyzer_params = self.multi_analyzer_params["analyzers"][doc_analyzer]
+            content_tokens = self.get_tokens_by_analyzer(content, doc_analyzer_params)
+            tokenized_content = " ".join(content_tokens)
+            data_list_tokenized.append(
+                {
+                    "doc_id": row.get("doc_id"),
+                    self.language_field_name: lang,
+                    self.text_field_name: tokenized_content,
+                }
+            )
+        if verbose:
+            original_data = pd.DataFrame(data_list)
+            tokenized_data = pd.DataFrame(data_list_tokenized)
+            logger.info(f"Original data:\n{original_data}")
+            logger.info(f"Tokenized data:\n{tokenized_data}")
+        return data_list_tokenized
+
+    def insert_data(
+        self, data: List[Dict], verbose: bool = False
+    ) -> Tuple[List[Dict], List[Dict]]:
+        """
+        Insert test data and return original and tokenized data.
+        Args:
+            data (List[Dict]): Original data list
+            verbose (bool): Whether to print detailed information
+        Returns:
+            Tuple[List[Dict], List[Dict]]: (original data, tokenized data)
+        """
+        try:
+            self.collection.insert(data)
+            self.collection.flush()
+        except Exception as e:
+            logger.error(f"Failed to insert original data: {e}")
+            raise
+        t0 = time.time()
+        tokenized_data = self.tokenize_data_by_multi_analyzer(data, verbose=verbose)
+        t1 = time.time()
+        logger.info(f"Tokenization time: {t1 - t0}")
+        try:
+            self.mock_collection.insert(tokenized_data)
+            self.mock_collection.flush()
+        except Exception as e:
+            logger.error(f"Failed to insert tokenized data: {e}")
+            raise
+        return data, tokenized_data
+
+    def create_index(self) -> None:
+        """
+        Create BM25 index for sparse vector field.
+        """
+        for c in [self.collection, self.mock_collection]:
+            try:
+                c.create_index(
+                    "bm25_sparse_vector",
+                    {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "BM25"},
+                )
+                c.load()
+            except Exception as e:
+                logger.error(f"Failed to create index: {e}")
+                raise
+
+    def search(
+        self, origin_query: str, tokenized_query: str, language: str, limit: int = 10
+    ) -> Tuple[list, list]:
+        """
+        Search interface, perform BM25 search on main and mock collections respectively.
+        Args:
+            origin_query (str): Original query text
+            tokenized_query (str): Tokenized query text
+            language (str): Query language
+            limit (int): Number of results to return
+        Returns:
+            Tuple[list, list]: (main collection results, mock collection results)
+        """
+        analyzer_name = self.resolve_analyzer(language)
+        search_params = {"metric_type": "BM25", "analyzer_name": analyzer_name}
+        logger.info(f"search_params: {search_params}")
+        try:
+            res = self.collection.search(
+                data=[origin_query],
+                anns_field="bm25_sparse_vector",
+                param=search_params,
+                output_fields=["doc_id"],
+                limit=limit,
+            )
+            mock_res = self.mock_collection.search(
+                data=[tokenized_query],
+                anns_field="bm25_sparse_vector",
+                param=search_params,
+                output_fields=["doc_id"],
+                limit=limit,
+            )
+            return res, mock_res
+        except Exception as e:
+            logger.error(f"Search failed: {e}")
+            return [], []
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    connections.connect("default", host="10.104.25.52", port="19530")
+    client = MilvusClient(uri="http://10.104.25.52:19530")
+    ft = FTSMultiAnalyzerChecker(
+        "test_collection", "language", "article_content", client=client
+    )
+    ft.init_collection()
+    ft.create_index()
+    language_list = ["jp", "en", "fr", "zh"]
+    data = ft.generate_test_data(1000, language_list)
+    _, tokenized_data = ft.insert_data(data)
+    search_sample_data = random.sample(tokenized_data, 10)
+    for row in search_sample_data:
+        tokenized_query = row[ft.text_field_name]
+        # Find the same doc_id in the original data and get the original query
+        # Use pandas to find the item with matching doc_id
+        # Convert data to DataFrame if it's not already
+        if not isinstance(data, pd.DataFrame):
+            data_df = pd.DataFrame(data)
+        else:
+            data_df = data
+        # Filter by doc_id and get the text field value
+        origin_query = data_df.loc[
+            data_df["doc_id"] == row["doc_id"], ft.text_field_name
+        ].iloc[0]
+        logger.info(f"Query: {tokenized_query}")
+        logger.info(f"Origin Query: {origin_query}")
+        language = row[ft.language_field_name]
+        logger.info(f"language: {language}")
+        res, mock_res = ft.search(origin_query, tokenized_query, language)
+        logger.info(f"Main collection search result: {res}")
+        logger.info(f"Mock collection search result: {mock_res}")
+        if res and mock_res:
+            res_set = set([r["doc_id"] for r in res[0]])
+            mock_res_set = set([r["doc_id"] for r in mock_res[0]])
+            res_diff = res_set - mock_res_set
+            mock_res_diff = mock_res_set - res_set
+            logger.info(f"Diff: {res_diff}, {mock_res_diff}")
+            if res_diff or mock_res_diff:
+                logger.error(
+                    f"Search results inconsistent: {res_diff}, {mock_res_diff}"
+                )
+                assert False