From cfd636ed5b787a2bf1b50c7930d951e99eebabf1 Mon Sep 17 00:00:00 2001 From: nico <109071306+NicoYuan1986@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:21:13 +0800 Subject: [PATCH] test: add different language tests and modify some cases (#36465) fix: #36396 Signed-off-by: nico --- tests/python_client/base/client_base.py | 6 +- tests/python_client/common/common_func.py | 64 ++++++-- tests/python_client/testcases/test_insert.py | 6 +- tests/python_client/testcases/test_query.py | 15 +- tests/python_client/testcases/test_search.py | 153 ++++++++++++------- 5 files changed, 161 insertions(+), 83 deletions(-) diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index cd93925af0..204a799708 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -242,7 +242,7 @@ class TestcaseBase(Base): primary_field=ct.default_int64_field_name, is_flush=True, name=None, enable_dynamic_field=False, with_json=True, random_primary_key=False, multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR", - nullable_fields={}, default_value_fields={}, **kwargs): + nullable_fields={}, default_value_fields={}, language=None, **kwargs): """ target: create specified collections method: 1. create collections (binary/non-binary, default/all data type, auto_id or not) @@ -311,7 +311,7 @@ class TestcaseBase(Base): dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json, random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array, primary_field=primary_field, vector_data_type=vector_data_type, - nullable_fields=nullable_fields) + nullable_fields=nullable_fields, language=language) if is_flush: assert collection_w.is_empty is False assert collection_w.num_entities == nb @@ -324,7 +324,7 @@ class TestcaseBase(Base): for vector_name in vector_name_list: collection_w.create_index(vector_name, ct.default_sparse_inverted_index) else: - if len(multiple_dim_array) == 0 or is_all_data_type == False: + if len(multiple_dim_array) == 0 or is_all_data_type is False: vector_name_list.append(ct.default_float_vec_field_name) for vector_name in vector_name_list: # Unlike dense vectors, sparse vectors cannot create flat index. diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 103775730c..e9ddc7c4b1 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -322,6 +322,7 @@ def generate_array_dataset(size, array_length, hit_probabilities, target_values) return dataset + def prepare_array_test_data(data_size, hit_rate=0.005, dim=128): size = data_size # Number of arrays in the dataset array_length = 10 # Length of each array @@ -421,7 +422,6 @@ def prepare_array_test_data(data_size, hit_rate=0.005, dim=128): return train_df, query_expr - def gen_unique_str(str_value=None): prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) return "test_" + prefix if str_value is None else str_value + "_" + prefix @@ -433,6 +433,26 @@ def gen_str_by_length(length=8, letters_only=False): return "".join(random.choice(string.ascii_letters + string.digits) for _ in range(length)) +def generate_random_sentence(language): + language_map = { + "English": "en_US", + "French": "fr_FR", + "Spanish": "es_ES", + "German": "de_DE", + "Italian": "it_IT", + "Portuguese": "pt_PT", + "Russian": "ru_RU", + "Chinese": "zh_CN", + "Japanese": "ja_JP", + "Korean": "ko_KR", + "Arabic": "ar_SA", + "Hindi": "hi_IN" + } + lang_code = language_map.get(language, "en_US") + faker = Faker(lang_code) + return faker.sentence() + + def gen_digits_by_length(length=8): return "".join(random.choice(string.digits) for _ in range(length)) @@ -957,7 +977,7 @@ def gen_binary_vectors(num, dim): def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False, - primary_field = ct.default_int64_field_name, nullable_fields={}): + primary_field=ct.default_int64_field_name, nullable_fields={}, language=None): if not random_primary_key: int_values = pd.Series(data=[i for i in range(start, start + nb)]) else: @@ -973,6 +993,8 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi float_values = pd.Series(data=float_data, dtype=object) string_data = [str(i) for i in range(start, start + nb)] + if language: + string_data = [generate_random_sentence(language) for _ in range(nb)] string_values = pd.Series(data=string_data, dtype="string") if ct.default_string_field_name in nullable_fields: null_number = int(nb*nullable_fields[ct.default_string_field_name]) @@ -1017,7 +1039,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False, - primary_field=ct.default_int64_field_name, nullable_fields={}): + primary_field=ct.default_int64_field_name, nullable_fields={}, language=None): insert_list = [] if not random_primary_key: int_values = pd.Series(data=[i for i in range(start, start + nb)]) @@ -1031,6 +1053,8 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js float_data = float_data[:nb - null_number] + null_data float_values = pd.Series(data=float_data, dtype=object) string_data = [str(i) for i in range(start, start + nb)] + if language: + string_data = [generate_random_sentence(language) for _ in range(nb)] string_values = pd.Series(data=string_data, dtype="string") if ct.default_string_field_name in nullable_fields: null_number = int(nb * nullable_fields[ct.default_string_field_name]) @@ -1069,7 +1093,7 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[], multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False, - primary_field = ct.default_int64_field_name, nullable_fields={}): + primary_field = ct.default_int64_field_name, nullable_fields={}, language=None): array = [] for i in range(start, start + nb): dict = {ct.default_int64_field_name: i, @@ -1080,6 +1104,8 @@ def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js } if with_json is False: dict.pop(ct.default_json_field_name, None) + if language: + dict[ct.default_string_field_name] = generate_random_sentence(language) if auto_id is True: if primary_field == ct.default_int64_field_name: dict.pop(ct.default_int64_field_name) @@ -1281,7 +1307,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, auto_id=False, random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], primary_field=ct.default_int64_field_name, - nullable_fields={}): + nullable_fields={}, language=None): if not random_primary_key: int64_values = pd.Series(data=[i for i in range(start, start + nb)]) else: @@ -1335,6 +1361,8 @@ def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0 double_values = pd.Series(data=double_data, dtype=object) string_data = [str(i) for i in range(start, start + nb)] + if language: + string_data = [generate_random_sentence(language) for _ in range(nb)] string_values = pd.Series(data=string_data, dtype="string") if ct.default_string_field_name in nullable_fields: null_number = int(nb * nullable_fields[ct.default_string_field_name]) @@ -1375,7 +1403,7 @@ def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0 def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[], multiple_vector_field_name=[], partition_id=0, - auto_id=False, primary_field=ct.default_int64_field_name): + auto_id=False, primary_field=ct.default_int64_field_name, language=None): array = [] for i in range(start, start + nb): dict = {ct.default_int64_field_name: i, @@ -1391,6 +1419,8 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st } if with_json is False: dict.pop(ct.default_json_field_name, None) + if language: + dict[ct.default_string_field_name] = generate_random_sentence(language) if auto_id is True: if primary_field == ct.default_int64_field_name: dict.pop(ct.default_int64_field_name, None) @@ -1412,7 +1442,7 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, auto_id=False, - primary_field=ct.default_int64_field_name, nullable_fields={}): + primary_field=ct.default_int64_field_name, nullable_fields={}, language=None): int_data = [i for i in range(start, start + nb)] int_values = pd.Series(data=int_data) if ct.default_int64_field_name in nullable_fields: @@ -1430,6 +1460,8 @@ def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, star float_values = pd.Series(data=float_data, dtype=object) string_data = [str(i) for i in range(start, start + nb)] + if language: + string_data = [generate_random_sentence(language) for _ in range(nb)] string_values = pd.Series(data=string_data, dtype="string") if ct.default_string_field_name in nullable_fields: null_number = int(nb * nullable_fields[ct.default_string_field_name]) @@ -2525,7 +2557,7 @@ def gen_partitions(collection_w, partition_num=1): def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True, random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name, - vector_data_type="FLOAT_VECTOR", nullable_fields={}): + vector_data_type="FLOAT_VECTOR", nullable_fields={}, language=None): """ target: insert non-binary/binary data method: insert non-binary/binary data into partitions if any @@ -2553,7 +2585,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ multiple_vector_field_name=vector_name_list, vector_data_type=vector_data_type, auto_id=auto_id, primary_field=primary_field, - nullable_fields=nullable_fields) + nullable_fields=nullable_fields, language=language) elif vector_data_type in ct.append_vector_type: default_data = gen_default_list_data(nb // num, dim=dim, start=start, with_json=with_json, random_primary_key=random_primary_key, @@ -2561,7 +2593,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ multiple_vector_field_name=vector_name_list, vector_data_type=vector_data_type, auto_id=auto_id, primary_field=primary_field, - nullable_fields=nullable_fields) + nullable_fields=nullable_fields, language=language) else: default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json, @@ -2569,7 +2601,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ multiple_vector_field_name=vector_name_list, vector_data_type=vector_data_type, auto_id=auto_id, primary_field=primary_field, - nullable_fields=nullable_fields) + nullable_fields=nullable_fields, language=language) else: if not enable_dynamic_field: @@ -2579,14 +2611,14 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ multiple_dim_array=multiple_dim_array, multiple_vector_field_name=vector_name_list, auto_id=auto_id, primary_field=primary_field, - nullable_fields=nullable_fields) + nullable_fields=nullable_fields, language=language) elif vector_data_type == "FLOAT16_VECTOR" or "BFLOAT16_VECTOR": default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json, random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array, multiple_vector_field_name=vector_name_list, auto_id=auto_id, primary_field=primary_field, - nullable_fields=nullable_fields) + nullable_fields=nullable_fields, language=language) else: if os.path.exists(ct.rows_all_data_type_file_path + f'_{i}' + f'_dim{dim}.txt'): with open(ct.rows_all_data_type_file_path + f'_{i}' + f'_dim{dim}.txt', 'rb') as f: @@ -2597,12 +2629,14 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ multiple_dim_array=multiple_dim_array, multiple_vector_field_name=vector_name_list, partition_id=i, auto_id=auto_id, - primary_field=primary_field) + primary_field=primary_field, + language=language) else: default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start, auto_id=auto_id, primary_field=primary_field, - nullable_fields=nullable_fields) + nullable_fields=nullable_fields, + language=language) binary_raw_vectors.extend(binary_raw_data) insert_res = collection_w.insert(default_data, par[i].name)[0] log.info(f"inserted {nb // num} data into collection {collection_w.name}") diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index 24ca464f80..7830a65eac 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -387,11 +387,11 @@ class TestInsertParams(TestcaseBase): """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) - data = cf.gen_default_list_data(nb=100) - data[0][1] = 1.0 + data = cf.gen_default_rows_data(nb=100) + data[0][ct.default_int64_field_name] = 1.0 error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema, {%s} field should be a int64, " - "but got a {} instead." % ct.default_int64_field_name} + "but got a {} instead." % ct.default_int64_field_name} collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error) diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index e40f5c4fdc..a5fbfa9ca5 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -2616,6 +2616,7 @@ class TestQueryOperation(TestcaseBase): ct.default_search_params, limit, multi_exprs) assert(check_res == True) + class TestQueryString(TestcaseBase): """ ****************************************************************** @@ -2726,14 +2727,14 @@ class TestQueryString(TestcaseBase): time.sleep(1) collection_w.load() expression = 'varchar like "0%"' - result , _ = collection_w.query(expression, output_fields=['varchar']) + result, _ = collection_w.query(expression, output_fields=['varchar']) res_len = len(result) collection_w.release() collection_w.alter_index("bitmap_offset_cache", {'indexoffsetcache.enabled': True}) collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"}) collection_w.load() expression = 'varchar like "0%"' - result , _ = collection_w.query(expression, output_fields=['varchar']) + result, _ = collection_w.query(expression, output_fields=['varchar']) res_len_new = len(result) assert res_len_new == res_len collection_w.release() @@ -2741,7 +2742,7 @@ class TestQueryString(TestcaseBase): collection_w.create_index("varchar", index_name="bitmap_offset_cache", index_params={"index_type": "BITMAP"}) collection_w.load() expression = 'varchar like "0%"' - result , _ = collection_w.query(expression, output_fields=['varchar']) + result, _ = collection_w.query(expression, output_fields=['varchar']) res_len_new = len(result) assert res_len_new == res_len collection_w.release() @@ -2824,12 +2825,13 @@ class TestQueryString(TestcaseBase): collection_w, vectors = self.init_collection_general(prefix, insert_data=True,is_index=False, primary_field=default_int_field_name)[0:2] - collection_w.create_index(ct.default_float_vec_field_name, default_index_params, index_name="query_expr_pre_index") + collection_w.create_index(ct.default_float_vec_field_name, default_index_params, + index_name="query_expr_pre_index") collection_w.create_index("varchar", index_name="bitmap_auto_index", index_params={"index_type": "BITMAP"}) time.sleep(1) collection_w.load() expression = 'varchar like "%0%"' - result , _ = collection_w.query(expression, output_fields=['varchar']) + result, _ = collection_w.query(expression, output_fields=['varchar']) res_len = len(result) collection_w.release() collection_w.drop_index(index_name="varchar_bitmap_index") @@ -2838,7 +2840,6 @@ class TestQueryString(TestcaseBase): res_len_1 = len(result) assert res_len_1 == res_len - @pytest.mark.tags(CaseLabel.L1) def test_query_string_with_invalid_prefix_expr(self): """ @@ -3062,6 +3063,8 @@ class TestQueryString(TestcaseBase): res, _ = collection_w.query(expr, output_fields=output_fields) assert len(res) == 4 + + class TestQueryArray(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 5f2470accf..0e57b40e76 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -1495,13 +1495,10 @@ class TestCollectionSearch(TestcaseBase): """ # 1. initialize collection with random primary key collection_w, _vectors, _, insert_ids, time_stamp = \ - self.init_collection_general( - prefix, True, 10, random_primary_key=random_primary_key)[0:5] + self.init_collection_general(prefix, True, 10, random_primary_key=random_primary_key, + language="Russian")[0:5] # 2. search - log.info("test_search_random_primary_key: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] + log.info("test_search_random_primary_key: searching collection %s" % collection_w.name) collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, @@ -5272,6 +5269,8 @@ class TestSearchDSL(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": ct.default_top_k}) + + class TestSearchArray(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @@ -5367,25 +5366,28 @@ class TestSearchString(TestcaseBase): # 1. initialize with data auto_id = True enable_dynamic_field = False - collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim, - enable_dynamic_field=enable_dynamic_field)[0:4] + collection_w, insert_data, _, insert_ids = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim, nb=1000, + enable_dynamic_field=enable_dynamic_field, language="Chinese")[0:4] + search_str = insert_data[0][default_string_field_name][1] + search_exp = f"{default_string_field_name} == '{search_str}'" # 2. search - log.info("test_search_string_field_not_primary: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] + log.info("test_search_string_field_not_primary: searching collection %s" % collection_w.name) + log.info("search expr: %s" % search_exp) output_fields = [default_string_field_name, default_float_field_name] - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, - default_search_string_exp, - output_fields=output_fields, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) + res, _ = collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, search_exp, + output_fields=output_fields, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": 1, + "_async": _async}) + if _async: + res.done() + res = res.result() + assert res[0][0].entity.varchar == search_str @pytest.mark.tags(CaseLabel.L2) def test_search_string_field_is_primary_true(self, _async): @@ -5399,25 +5401,29 @@ class TestSearchString(TestcaseBase): # 1. initialize with data dim = 64 enable_dynamic_field = True - collection_w, _, _, insert_ids = \ + collection_w, insert_data, _, insert_ids = \ self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name, - enable_dynamic_field=enable_dynamic_field)[0:4] + enable_dynamic_field=enable_dynamic_field, language="English", nb=1000)[0:4] + search_str = insert_data[0][1][default_string_field_name] + search_exp = f"{default_string_field_name} == '{search_str}'" # 2. search - log.info("test_search_string_field_is_primary_true: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] + log.info("test_search_string_field_is_primary_true: searching collection %s" % collection_w.name) + log.info("search expr: %s" % search_exp) + vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] output_fields = [default_string_field_name, default_float_field_name] - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, - default_search_string_exp, - output_fields=output_fields, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) + res, _ = collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, search_exp, + output_fields=output_fields, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": 1, + "_async": _async}) + if _async: + res.done() + res = res.result() + assert res[0][0].entity.varchar == search_str @pytest.mark.tags(CaseLabel.L2) def test_search_string_field_is_primary_true_multi_vector_fields(self, _async): @@ -5435,7 +5441,7 @@ class TestSearchString(TestcaseBase): collection_w, _, _, insert_ids = \ self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name, enable_dynamic_field=enable_dynamic_field, - multiple_dim_array=multiple_dim_array)[0:4] + multiple_dim_array=multiple_dim_array, language="German")[0:4] # 2. search log.info("test_search_string_field_is_primary_true: searching collection %s" % collection_w.name) @@ -5926,6 +5932,48 @@ class TestSearchString(TestcaseBase): "limit": default_limit, "_async": _async}) + @pytest.mark.tags(CaseLabel.L2) + def test_search_string_different_language(self): + """ + target: test search with string expr using different language + method: create collection and insert data + create index and collection load + collection search uses string expr in string field + expected: Search successfully + """ + # 1. initialize with data + _async = random.choice([True, False]) + auto_id = random.choice([True, False]) + enable_dynamic_field = random.choice([True, False]) + all_language = ["English", "French", "Spanish", "German", "Italian", "Portuguese", "Russian", "Chinese", + "Japanese", "Arabic", "Hindi"] + language = random.choice(all_language) + log.info(f"_async: {_async}, auto_id: {auto_id}, enable_dynamic_field: {enable_dynamic_field}," + f"language: {language}") + collection_w, insert_data, _, insert_ids = \ + self.init_collection_general(prefix, True, auto_id=auto_id, nb=100, + enable_dynamic_field=enable_dynamic_field, language=language)[0:4] + search_str = insert_data[0][default_string_field_name][1] if not enable_dynamic_field \ + else insert_data[0][1][default_string_field_name] + search_exp = f"{default_string_field_name} == '{search_str}'" + # 2. search + log.info("test_search_string_field_not_primary: searching collection %s" % collection_w.name) + log.info("search expr: %s" % search_exp) + output_fields = [default_string_field_name, default_float_field_name] + res, _ = collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, search_exp, + output_fields=output_fields, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": 1, + "_async": _async}) + if _async: + res.done() + res = res.result() + assert res[0][0].entity.varchar == search_str + class TestSearchPagination(TestcaseBase): """ Test case of search pagination """ @@ -6802,19 +6850,16 @@ class TestSearchDiskann(TestcaseBase): enable_dynamic_field = True collection_w, _, _, ids = \ self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:4] + enable_dynamic_field=enable_dynamic_field, language="French")[0:4] # 2. create index default_index = {"index_type": "DISKANN", "metric_type": "COSINE", "params": {}} - collection_w.create_index( - ct.default_float_vec_field_name, default_index, index_name=index_name1) + collection_w.create_index(ct.default_float_vec_field_name, default_index, index_name=index_name1) if not enable_dynamic_field: index_params_one = {} - collection_w.create_index( - "float", index_params_one, index_name="a") + collection_w.create_index("float", index_params_one, index_name="a") index_param_two = {} - collection_w.create_index( - "varchar", index_param_two, index_name="b") + collection_w.create_index("varchar", index_param_two, index_name="b") collection_w.load() tmp_expr = f'{ct.default_int64_field_name} in {[0]}' @@ -6826,12 +6871,9 @@ class TestSearchDiskann(TestcaseBase): assert del_res.delete_count == half_nb collection_w.delete(tmp_expr) - default_search_params = { - "metric_type": "COSINE", "params": {"search_list": 30}} - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - output_fields = [default_int64_field_name, - default_float_field_name, default_string_field_name] + default_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}} + vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] + output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name] collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, @@ -6841,8 +6883,7 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": ids, "limit": default_limit, - "_async": _async} - ) + "_async": _async}) @pytest.mark.tags(CaseLabel.L1) def test_search_with_scalar_field(self, _async): @@ -9754,7 +9795,7 @@ class TestCollectionSearchJSON(TestcaseBase): dim = 64 collection_w, _, _, insert_ids, time_stamp = \ self.init_collection_general(prefix, True, auto_id=True, dim=dim, is_flush=is_flush, - enable_dynamic_field=enable_dynamic_field)[0:5] + enable_dynamic_field=enable_dynamic_field, language="Hindi")[0:5] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. search after insert collection_w.search(vectors[:nq], default_search_field,