diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index a8fc3b9414..f91e2381db 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -1,3 +1,4 @@ +import pandas.core.frame from pymilvus.client.types import CompactionPlans from pymilvus import Role @@ -209,7 +210,6 @@ class ResponseChecker: collection = res elif isinstance(res, tuple): collection = res[0] - log.debug(collection.schema) else: raise Exception("The result to check isn't collection type object") if len(check_items) == 0: @@ -394,6 +394,9 @@ class ResponseChecker: expected: check the search is ok """ log.info("search_results_check: checking the searching results") + enable_milvus_client_api = check_items.get("enable_milvus_client_api", False) + pk_name = check_items.get("pk_name", ct.default_primary_field_name) + if func_name != 'search' and func_name != 'hybrid_search': log.warning("The function name is {} rather than {} or {}".format(func_name, "search", "hybrid_search")) if len(check_items) == 0: @@ -403,11 +406,12 @@ class ResponseChecker: search_res.done() search_res = search_res.result() if check_items.get("output_fields", None): - assert set(search_res[0][0].entity.fields) == set(check_items["output_fields"]) - log.info('search_results_check: Output fields of query searched is correct') - if check_items.get("original_entities", None): - original_entities = check_items["original_entities"][0] - pc.output_field_value_check(search_res, original_entities) + assert set(search_res[0][0].entity.fields.keys()) == set(check_items["output_fields"]) + original_entities = check_items.get("original_entities", None) + if original_entities is not None: + if not isinstance(original_entities, pandas.core.frame.DataFrame): + original_entities = pandas.DataFrame(original_entities) + pc.output_field_value_check(search_res, original_entities, pk_name=pk_name) if len(search_res) != check_items["nq"]: log.error("search_results_check: Numbers of query searched (%d) " "is not equal with expected (%d)" @@ -415,16 +419,14 @@ class ResponseChecker: assert len(search_res) == check_items["nq"] else: log.info("search_results_check: Numbers of query searched is correct") - enable_milvus_client_api = check_items.get("enable_milvus_client_api", False) # log.debug(search_res) nq_i = 0 for hits in search_res: - searched_original_vectors = [] ids = [] distances = [] if enable_milvus_client_api: for hit in hits: - ids.append(hit['id']) + ids.append(hit[pk_name]) distances.append(hit['distance']) else: ids = list(hits.ids) @@ -438,8 +440,7 @@ class ResponseChecker: assert len(ids) == check_items["limit"] else: if check_items.get("ids", None) is not None: - ids_match = pc.list_contain_check(ids, - list(check_items["ids"])) + ids_match = pc.list_contain_check(ids, list(check_items["ids"])) if not ids_match: log.error("search_results_check: ids searched not match") assert ids_match @@ -452,12 +453,6 @@ class ResponseChecker: if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None: log.debug("skip distance check for knowhere does not return the precise distances") else: - # for id in ids: - # searched_original_vectors.append(check_items["original_vectors"][id]) - # cf.compare_distance_vector_and_vector_list(check_items["vector_nq"][nq_i], - # searched_original_vectors, - # check_items["metric"], distances) - # log.info("search_results_check: Checked the distances for one nq: OK") pass else: pass # just check nq and topk, not specific ids need check @@ -544,10 +539,10 @@ class ResponseChecker: raise Exception("No expect values found in the check task") exp_res = check_items.get("exp_res", None) with_vec = check_items.get("with_vec", False) - primary_field = check_items.get("primary_field", None) + pk_name = check_items.get("pk_name", ct.default_primary_field_name) if exp_res is not None: if isinstance(query_res, list): - assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=primary_field, + assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, with_vec=with_vec) return True else: @@ -575,8 +570,7 @@ class ResponseChecker: log.info("search iteration finished, close") query_iterator.close() break - pk_name = ct.default_int64_field_name if res[0].get(ct.default_int64_field_name, None) is not None \ - else ct.default_string_field_name + pk_name = check_items.get("pk_name", ct.default_primary_field_name) for i in range(len(res)): pk_list.append(res[i][pk_name]) if check_items.get("limit", None): diff --git a/tests/python_client/check/param_check.py b/tests/python_client/check/param_check.py index 42e6a3dfb4..b4c975df4c 100644 --- a/tests/python_client/check/param_check.py +++ b/tests/python_client/check/param_check.py @@ -158,7 +158,7 @@ def entity_in(entity, entities, primary_field): :param primary_field: collection primary field :return: True or False """ - primary_default = ct.default_int64_field_name + primary_default = ct.default_primary_field_name primary_field = primary_default if primary_field is None else primary_field primary_key = entity.get(primary_field, None) primary_keys = [] @@ -180,7 +180,7 @@ def remove_entity(entity, entities, primary_field): :param primary_field: collection primary field :return: entities of removed entity """ - primary_default = ct.default_int64_field_name + primary_default = ct.default_primary_field_name primary_field = primary_default if primary_field is None else primary_field primary_key = entity.get(primary_field, None) primary_keys = [] @@ -226,16 +226,17 @@ def equal_entities_list(exp, actual, primary_field, with_vec=False): return True if len(exp) == 0 else False -def output_field_value_check(search_res, original): +def output_field_value_check(search_res, original, pk_name): """ check if the value of output fields is correct, it only works on auto_id = False :param search_res: the search result of specific output fields :param original: the data in the collection :return: True or False """ + pk_name = ct.default_primary_field_name if pk_name is None else pk_name limit = len(search_res[0]) for i in range(limit): - entity = search_res[0][i]['entity'] + entity = search_res[0][i].fields _id = search_res[0][i].id for field in entity.keys(): if isinstance(entity[field], list): @@ -246,7 +247,7 @@ def output_field_value_check(search_res, original): # but sparse only supports list data type insertion for now assert entity[field].keys() == original[-1][_id].keys() else: - num = original[original[ct.default_int64_field_name] == _id].index.to_list()[0] + num = original[original[pk_name] == _id].index.to_list()[0] assert original[field][num] == entity[field] return True diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 8122af7e22..2332829311 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -699,6 +699,7 @@ def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, description=description, dim=dim, is_primary=is_primary, **kwargs) else: + # no dim for sparse vector float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.SPARSE_FLOAT_VECTOR, description=description, is_primary=is_primary, **kwargs) @@ -1119,39 +1120,6 @@ def gen_schema_multi_string_fields(string_fields): return schema -def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR): - vectors = [] - if vector_data_type == DataType.FLOAT_VECTOR: - vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] - elif vector_data_type == DataType.FLOAT16_VECTOR: - vectors = gen_fp16_vectors(nb, dim)[1] - elif vector_data_type == DataType.BFLOAT16_VECTOR: - vectors = gen_bf16_vectors(nb, dim)[1] - elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR: - vectors = gen_sparse_vectors(nb, dim) - elif vector_data_type == ct.text_sparse_vector: - vectors = gen_text_vectors(nb) - elif vector_data_type == DataType.BINARY_VECTOR: - vectors = gen_binary_vectors(nb, dim)[1] - else: - log.error(f"Invalid vector data type: {vector_data_type}") - raise Exception(f"Invalid vector data type: {vector_data_type}") - if dim > 1: - if vector_data_type == DataType.FLOAT_VECTOR: - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - vectors = vectors.tolist() - return vectors - - -def gen_text_vectors(nb, language="en"): - - fake = Faker("en_US") - if language == "zh": - fake = Faker("zh_CN") - vectors = [" milvus " + fake.text() for _ in range(nb)] - return vectors - - def gen_string(nb): string_values = [str(random.random()) for _ in range(nb)] return string_values @@ -3318,28 +3286,38 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0): return vectors -def gen_vectors_based_on_vector_type(num, dim, vector_data_type=DataType.FLOAT_VECTOR): - """ - generate float16 vector data - raw_vectors : the vectors - fp16_vectors: the bytes used for insert - return: raw_vectors and fp16_vectors - """ +def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR): + vectors = [] if vector_data_type == DataType.FLOAT_VECTOR: - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] + vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] elif vector_data_type == DataType.FLOAT16_VECTOR: - vectors = gen_fp16_vectors(num, dim)[1] + vectors = gen_fp16_vectors(nb, dim)[1] elif vector_data_type == DataType.BFLOAT16_VECTOR: - vectors = gen_bf16_vectors(num, dim)[1] + vectors = gen_bf16_vectors(nb, dim)[1] elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR: - vectors = gen_sparse_vectors(num, dim) + vectors = gen_sparse_vectors(nb, dim) elif vector_data_type == ct.text_sparse_vector: - vectors = gen_text_vectors(num) + vectors = gen_text_vectors(nb) # for Full Text Search + elif vector_data_type == DataType.BINARY_VECTOR: + vectors = gen_binary_vectors(nb, dim)[1] else: - raise Exception("vector_data_type is invalid") + log.error(f"Invalid vector data type: {vector_data_type}") + raise Exception(f"Invalid vector data type: {vector_data_type}") + if dim > 1: + if vector_data_type == DataType.FLOAT_VECTOR: + vectors = preprocessing.normalize(vectors, axis=1, norm='l2') + vectors = vectors.tolist() return vectors +def gen_text_vectors(nb, language="en"): + + fake = Faker("en_US") + if language == "zh": + fake = Faker("zh_CN") + vectors = [" milvus " + fake.text() for _ in range(nb)] + return vectors + def field_types() -> dict: return dict(sorted(dict(DataType.__members__).items(), key=lambda item: item[0], reverse=True)) diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 5321ce1861..024d2aef60 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -19,6 +19,7 @@ default_batch_size = 1000 min_limit = 1 max_limit = 16384 max_top_k = 16384 +max_nq = 16384 max_partition_num = 1024 max_role_num = 10 default_partition_num = 16 # default num_partitions for partition key feature @@ -27,6 +28,7 @@ default_server_segment_row_limit = 1024 * 512 default_alias = "default" default_user = "root" default_password = "Milvus" +default_primary_field_name = 'pk' default_bool_field_name = "bool" default_int8_field_name = "int8" default_int16_field_name = "int16" diff --git a/tests/python_client/milvus_client/test_milvus_client_alias.py b/tests/python_client/milvus_client/test_milvus_client_alias.py index 2ca9ca93fa..ff5ae7cb33 100644 --- a/tests/python_client/milvus_client/test_milvus_client_alias.py +++ b/tests/python_client/milvus_client/test_milvus_client_alias.py @@ -411,13 +411,14 @@ class TestMilvusClientAliasValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.release_collection(client, collection_name) self.drop_collection(client, collection_name, check_task=CheckTasks.err_res, check_items={ct.err_code: 65535, diff --git a/tests/python_client/milvus_client/test_milvus_client_collection.py b/tests/python_client/milvus_client/test_milvus_client_collection.py index fea6a5cada..fed66c7e08 100644 --- a/tests/python_client/milvus_client/test_milvus_client_collection.py +++ b/tests/python_client/milvus_client/test_milvus_client_collection.py @@ -349,7 +349,8 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip(reason="issue 25110") @@ -382,13 +383,14 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 4. query self.query(client, collection_name, filter="id in [0, 1]", check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -419,6 +421,7 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) self.drop_collection(client, collection_name) @@ -452,6 +455,7 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) self.drop_collection(client, collection_name) @@ -487,13 +491,14 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": limit}) + "limit": limit, + "pk_name": default_primary_key_field_name}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -528,13 +533,14 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": limit}) + "limit": limit, + "pk_name": default_primary_key_field_name}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) diff --git a/tests/python_client/milvus_client/test_milvus_client_database.py b/tests/python_client/milvus_client/test_milvus_client_database.py index 5b1541a70c..64ee8e593f 100644 --- a/tests/python_client/milvus_client/test_milvus_client_database.py +++ b/tests/python_client/milvus_client/test_milvus_client_database.py @@ -408,13 +408,14 @@ class TestMilvusClientDatabaseValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 6. drop action self.drop_collection(client, collection_name) self.drop_database(client, db_name) @@ -463,13 +464,14 @@ class TestMilvusClientDatabaseValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 6. drop action self.drop_collection(client, collection_name) self.drop_database(client, db_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_delete.py b/tests/python_client/milvus_client/test_milvus_client_delete.py index bd570c4bb0..acceaf437c 100644 --- a/tests/python_client/milvus_client/test_milvus_client_delete.py +++ b/tests/python_client/milvus_client/test_milvus_client_delete.py @@ -144,7 +144,7 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -167,6 +167,7 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "ids": insert_ids, "limit": limit}) # 5. query @@ -174,7 +175,7 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -185,7 +186,7 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -209,13 +210,14 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -226,7 +228,7 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -252,13 +254,14 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) # 6. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -282,7 +285,7 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): expected: Delete and search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "my_json" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] @@ -341,11 +344,12 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_hybrid_search.py b/tests/python_client/milvus_client/test_milvus_client_hybrid_search.py index ede5b0db84..6463c1f970 100644 --- a/tests/python_client/milvus_client/test_milvus_client_hybrid_search.py +++ b/tests/python_client/milvus_client/test_milvus_client_hybrid_search.py @@ -365,7 +365,8 @@ class TestMilvusClientHybridSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -396,6 +397,7 @@ class TestMilvusClientHybridSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) self.drop_collection(client, collection_name) @@ -472,6 +474,7 @@ class TestMilvusClientHybridSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) sub_search1 = AnnSearchRequest(vectors_to_search, default_vector_field_name, {"level": 1}, 20, expr=f"{json_field_name}['a']['b']>=10") @@ -484,5 +487,6 @@ class TestMilvusClientHybridSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) self.drop_collection(client, collection_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_index.py b/tests/python_client/milvus_client/test_milvus_client_index.py index efb88641a8..8b345da406 100644 --- a/tests/python_client/milvus_client/test_milvus_client_index.py +++ b/tests/python_client/milvus_client/test_milvus_client_index.py @@ -273,13 +273,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 7. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -317,13 +318,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -395,13 +397,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 9. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -449,13 +452,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 7. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -511,13 +515,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 7. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 8. insert more distinct value to the scalar field to make the autoindex change rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), @@ -534,7 +539,8 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -575,13 +581,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 7. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -623,13 +630,14 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 8. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_insert.py b/tests/python_client/milvus_client/test_milvus_client_insert.py index b00f71a758..47b63e9053 100644 --- a/tests/python_client/milvus_client/test_milvus_client_insert.py +++ b/tests/python_client/milvus_client/test_milvus_client_insert.py @@ -370,13 +370,14 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.release_collection(client, collection_name) self.drop_collection(client, collection_name) @@ -417,7 +418,8 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -443,6 +445,7 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": [], + "pk_name": default_primary_key_field_name, "limit": 0}) self.drop_collection(client, collection_name) @@ -479,7 +482,8 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # partition_number = self.get_partition_stats(client, collection_name, "_default")[0] # assert partition_number == default_nb # partition_number = self.get_partition_stats(client, collection_name, partition_name)[0] @@ -876,13 +880,14 @@ class TestMilvusClientUpsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.release_collection(client, collection_name) self.drop_collection(client, collection_name) @@ -909,6 +914,7 @@ class TestMilvusClientUpsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": [], + "pk_name": default_primary_key_field_name, "limit": 0}) self.drop_collection(client, collection_name) @@ -948,7 +954,8 @@ class TestMilvusClientUpsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # partition_number = self.get_partition_stats(client, collection_name, "_default")[0] # assert partition_number == default_nb # partition_number = self.get_partition_stats(client, collection_name, partition_name)[0] @@ -996,7 +1003,8 @@ class TestMilvusClientUpsertValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) if self.has_partition(client, collection_name, partition_name)[0]: self.release_partitions(client, collection_name, partition_name) self.drop_partition(client, collection_name, partition_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_partition.py b/tests/python_client/milvus_client/test_milvus_client_partition.py index 1ca683ef84..fd467ccac6 100644 --- a/tests/python_client/milvus_client/test_milvus_client_partition.py +++ b/tests/python_client/milvus_client/test_milvus_client_partition.py @@ -212,6 +212,7 @@ class TestMilvusClientPartitionValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 4. query res = self.query(client, collection_name, filter=default_search_exp, @@ -219,7 +220,7 @@ class TestMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name})[0] + "pk_name": default_primary_key_field_name})[0] assert set(res[0].keys()) == {"ids", "vector"} partition_number = self.get_partition_stats(client, collection_name, "_default")[0] diff --git a/tests/python_client/milvus_client/test_milvus_client_query.py b/tests/python_client/milvus_client/test_milvus_client_query.py index 613308f371..adcdb70076 100644 --- a/tests/python_client/milvus_client/test_milvus_client_query.py +++ b/tests/python_client/milvus_client/test_milvus_client_query.py @@ -57,7 +57,7 @@ class TestMilvusClientQueryInvalid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") collections = self.list_collections(client)[0] @@ -103,7 +103,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -116,13 +116,13 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 4. query using filter self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -133,7 +133,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -146,7 +146,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 4. query using filter res = self.query(client, collection_name, filter=default_search_exp, output_fields=[default_primary_key_field_name, default_float_field_name, @@ -154,7 +154,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name})[0] + "pk_name": default_primary_key_field_name})[0] assert set(res[0].keys()) == {default_primary_key_field_name, default_vector_field_name, default_float_field_name, default_string_field_name} self.drop_collection(client, collection_name) @@ -167,7 +167,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -180,14 +180,14 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 4. query using filter res = self.query(client, collection_name, filter=default_search_exp, output_fields=["*"], check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name})[0] + "pk_name": default_primary_key_field_name})[0] assert set(res[0].keys()) == {default_primary_key_field_name, default_vector_field_name, default_float_field_name, default_string_field_name} self.drop_collection(client, collection_name) @@ -200,7 +200,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -215,14 +215,14 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={exp_res: rows[:limit], "with_vec": True, - "primary_field": default_primary_key_field_name[:limit]}) + "pk_name": default_primary_key_field_name[:limit]}) # 4. query using filter self.query(client, collection_name, filter=default_search_exp, limit=limit, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[:limit], "with_vec": True, - "primary_field": default_primary_key_field_name[:limit]}) + "pk_name": default_primary_key_field_name[:limit]}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -234,7 +234,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): expected: query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -269,7 +269,7 @@ class TestMilvusClientQueryValid(TestMilvusClientV2Base): expected: query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -312,7 +312,7 @@ class TestMilvusClientGetInvalid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -336,7 +336,7 @@ class TestMilvusClientGetInvalid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -362,7 +362,7 @@ class TestMilvusClientGetInvalid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -403,7 +403,7 @@ class TestMilvusClientGetValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -428,7 +428,7 @@ class TestMilvusClientGetValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -458,7 +458,7 @@ class TestMilvusClientGetValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, id_type="string", max_length=ct.default_length) # 2. insert @@ -485,7 +485,7 @@ class TestMilvusClientGetValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, id_type="string", max_length=ct.default_length) # 2. insert @@ -551,7 +551,7 @@ class TestMilvusClientQueryJsonPathIndex(TestMilvusClientV2Base): with that without json path index """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "json_field" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] diff --git a/tests/python_client/milvus_client/test_milvus_client_search.py b/tests/python_client/milvus_client/test_milvus_client_search.py index ef9e4f6c4f..aa0aedf913 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search.py +++ b/tests/python_client/milvus_client/test_milvus_client_search.py @@ -61,7 +61,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -83,7 +83,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -104,7 +104,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -125,7 +125,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -146,7 +146,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -167,7 +167,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -189,7 +189,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -211,7 +211,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -232,7 +232,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -254,7 +254,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -276,7 +276,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -299,7 +299,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection error = {ct.err_code: 1, ct.err_msg: f"Param id_type must be int or string"} self.create_collection(client, collection_name, default_dim, id_type="invalid", @@ -313,7 +313,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection error = {ct.err_code: 65535, ct.err_msg: f"type param(max_length) should be specified for the " f"field({default_primary_key_field_name}) of collection {collection_name}"} @@ -329,7 +329,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): 2. Report errors for creating collection with same name and different params """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. create collection with same params @@ -349,7 +349,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection error = {ct.err_code: 1100, ct.err_msg: "float vector index does not support metric type: invalid: " @@ -366,7 +366,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. search @@ -389,7 +389,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] @@ -408,7 +408,6 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): # 3. search vectors_to_search = rng.random((1, dim)) null_expr = default_vector_field_name + " " + null_expr_op - log.info(null_expr) error = {ct.err_code: 65535, ct.err_msg: f"unsupported data type: VECTOR_FLOAT"} self.search(client, collection_name, vectors_to_search, @@ -424,7 +423,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] @@ -444,7 +443,6 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) not_exist_field_name = "not_exist_field" null_expr = not_exist_field_name + " " + null_expr_op - log.info(null_expr) error = {ct.err_code: 1100, ct.err_msg: f"failed to create query plan: cannot parse expression: " f"{null_expr}, error: field {not_exist_field_name} not exist: invalid parameter"} @@ -462,7 +460,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -486,7 +484,6 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): null_expr = nullable_field_name + "['b']" + " " + null_expr_op self.insert(client, collection_name, rows) # 3. search - log.info(null_expr) self.search(client, collection_name, [vectors[0]], filter=null_expr) @@ -500,7 +497,7 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): expected: raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -524,7 +521,6 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): self.insert(client, collection_name, rows) # 3. search null_expr = nullable_field_name + "[0]" + " " + null_expr_op - log.info(null_expr) error = {ct.err_code: 65535, ct.err_msg: f"unsupported data type: ARRAY"} self.search(client, collection_name, [vectors[0]], @@ -557,7 +553,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() self.using_database(client, "default") # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Bounded") @@ -583,13 +579,14 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.release_collection(client, collection_name) self.drop_collection(client, collection_name) @@ -603,7 +600,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: create collection with default schema, index, and load successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 128 # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] @@ -634,7 +631,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Bounded") collections = self.list_collections(client)[0] @@ -662,13 +659,14 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 4. query self.query(client, new_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.release_collection(client, new_name) self.drop_collection(client, new_name) @@ -680,7 +678,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") collections = self.list_collections(client)[0] @@ -703,6 +701,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) @@ -714,7 +713,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, id_type="string", max_length=ct.default_length) self.describe_collection(client, collection_name, @@ -735,13 +734,14 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -752,7 +752,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search successfully with limit(topK) """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, metric_type=metric_type, auto_id=auto_id, consistency_level="Strong") @@ -772,6 +772,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) self.drop_collection(client, collection_name) @@ -784,7 +785,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search successfully with limit(topK) """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, metric_type=metric_type, auto_id=auto_id, consistency_level="Strong") @@ -805,6 +806,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) self.drop_collection(client, collection_name) @@ -816,7 +818,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -840,13 +842,14 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -857,7 +860,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search/query successfully without deleted data """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -881,13 +884,14 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) # 5. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -898,7 +902,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: search successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] dim = 32 @@ -951,7 +955,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): expected: raise error """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] dim = 5 @@ -1023,7 +1027,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1049,7 +1053,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1070,6 +1073,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1082,7 +1086,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1110,7 +1114,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1131,6 +1134,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1143,7 +1147,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1171,7 +1175,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1192,6 +1195,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1204,7 +1208,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1232,7 +1236,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1253,6 +1256,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1265,7 +1269,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1291,7 +1295,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1312,6 +1315,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1324,7 +1328,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1350,7 +1354,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1371,6 +1374,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1383,7 +1387,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1409,7 +1413,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1430,6 +1433,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1442,7 +1446,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1468,7 +1472,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1489,6 +1492,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L1) @@ -1501,7 +1505,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1534,7 +1538,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1556,6 +1559,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L1) @@ -1568,7 +1572,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1609,7 +1613,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1631,6 +1634,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @pytest.mark.tags(CaseLabel.L1) @@ -1643,7 +1647,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() dim = 5 # 1. create collection nullable_field_name = "nullable_field" @@ -1670,7 +1674,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): vectors_to_search = rng.random((1, dim)) insert_ids = [str(i) for i in range(default_nb)] null_expr = nullable_field_name + " " + null_expr_op - log.info(null_expr) if nullable: if "not" in null_expr or "NOT" in null_expr: insert_ids = [] @@ -1692,6 +1695,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": limit}) @@ -1732,7 +1736,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): expected: Search successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "my_json" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] @@ -1812,6 +1816,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 6. search with filter on json without output_fields expr = f"{json_field_name}['a']['b'] == {default_nb / 2}" @@ -1823,6 +1828,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": 1})[0] expr = f"{json_field_name} == {default_nb + 5}" insert_ids = [default_nb+5] @@ -1833,6 +1839,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": 1}) expr = f"{json_field_name}['a'][0] == 1" insert_ids = [i for i in range(default_nb + 20, default_nb + 30)] @@ -1843,6 +1850,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) expr = f"{json_field_name}['a'][0]['b'] == 1" insert_ids = [i for i in range(default_nb + 30, default_nb + 40)] @@ -1853,6 +1861,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) expr = f"{json_field_name}['a'] == 1" insert_ids = [i for i in range(default_nb + 50, default_nb + 60)] @@ -1863,6 +1872,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) @@ -1875,7 +1885,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): expected: successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "my_json" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] @@ -1913,6 +1923,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": 1}) @pytest.mark.tags(CaseLabel.L2) @@ -1925,7 +1936,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): expected: successfully with original inverted index """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) @@ -1969,6 +1980,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) @pytest.mark.tags(CaseLabel.L1) @@ -1984,7 +1996,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): if enable_dynamic_field: pytest.skip('need to fix the field name when enabling dynamic field') client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "my_json" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] @@ -2034,6 +2046,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) @@ -2048,7 +2061,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): expected: Search successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "my_json" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] @@ -2102,6 +2115,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) expr = f"{json_field_name}1['a']['b'] >= 0" vectors_to_search = [vectors[0]] @@ -2114,6 +2128,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) @pytest.mark.tags(CaseLabel.L1) @@ -2137,7 +2152,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): expected: Search successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "my_json" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] @@ -2217,6 +2232,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": 1}) expr = f"{json_field_name} == {default_nb + 5}" insert_ids = [default_nb + 5] @@ -2227,6 +2243,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": 1}) expr = f"{json_field_name}['a'][0] == 1" insert_ids = [i for i in range(default_nb + 20, default_nb + 30)] @@ -2237,6 +2254,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) expr = f"{json_field_name}['a'][0]['b'] == 1" insert_ids = [i for i in range(default_nb + 30, default_nb + 40)] @@ -2247,6 +2265,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) @@ -2271,7 +2290,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): expected: Search successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection json_field_name = "my_json" schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] @@ -2353,6 +2372,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": 1}) expr = f"{json_field_name} == {default_nb + 5}" insert_ids = [default_nb + 5] @@ -2363,6 +2383,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": 1}) expr = f"{json_field_name}['a'][0] == 1" insert_ids = [i for i in range(default_nb + 20, default_nb + 30)] @@ -2373,6 +2394,7 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) expr = f"{json_field_name}['a'][0]['b'] == 1" insert_ids = [i for i in range(default_nb + 30, default_nb + 40)] @@ -2383,4 +2405,5 @@ class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, + "pk_name": default_primary_key_field_name, "limit": default_limit}) \ No newline at end of file diff --git a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py index 58b2cb94c8..781df8bb1c 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py +++ b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py @@ -706,7 +706,9 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): res = self.search(client, collection_name, vectors_to_search, search_params=search_params, limit=200, check_task=CheckTasks.check_search_results, - check_items={"nq": 1, "limit": limit, "enable_milvus_client_api": True})[0] + check_items={"nq": 1, "limit": limit, + "enable_milvus_client_api": True, + "pk_name": default_primary_key_field_name})[0] for limit in [batch_size - 3, batch_size, batch_size * 2, -1]: if metric_type != "L2": radius = res[0][limit // 2].get('distance', 0) - 0.1 # pick a radius to make sure there exists results @@ -967,7 +969,8 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): res = self.search(client, collection_name, vectors_to_search, search_params=search_params, limit=limit, check_task=CheckTasks.check_search_results, - check_items={"nq": 1, "limit": limit, "enable_milvus_client_api": True})[0] + check_items={"nq": 1, "limit": limit, "pk_name": default_primary_key_field_name, + "enable_milvus_client_api": True})[0] for limit in [batch_size - 3, batch_size, batch_size * 2, -1]: if metric_type != "L2": radius = res[0][limit // 2].get('distance', 0) - 0.1 # pick a radius to make sure there exists results diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_alias_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_alias_v2.py index 71c7d49d8d..c0b5847daa 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_alias_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_alias_v2.py @@ -106,6 +106,7 @@ class TestMilvusClientV2AliasOperation(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(search_vectors), + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 6. create collection2 with index and load @@ -135,6 +136,7 @@ class TestMilvusClientV2AliasOperation(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(search_vectors), + "pk_name": default_primary_key_field_name, "limit": default_limit}) # 11. verify operations on collection1 still work diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py b/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py index 674523aa7c..90c724fb24 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py @@ -173,6 +173,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": "id", "limit": default_limit } ) @@ -194,7 +195,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": bool_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -210,7 +211,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": int8_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -226,7 +227,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": int16_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -242,7 +243,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": int32_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -258,7 +259,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": int64_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -274,7 +275,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": float_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -290,7 +291,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": double_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -306,7 +307,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": varchar_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -322,7 +323,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": varchar_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -338,7 +339,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": json_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -354,7 +355,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": array_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -370,7 +371,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": multi_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -386,7 +387,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": mix_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -403,7 +404,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": int8_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -419,7 +420,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": int16_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -435,7 +436,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": float_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -451,7 +452,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": double_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -467,7 +468,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": varchar_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -483,7 +484,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": json_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -499,7 +500,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": array_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -515,7 +516,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": multi_not_null_expected, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -534,7 +535,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": complex_mix_expected1, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -552,7 +553,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": complex_mix_expected2, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) @@ -570,7 +571,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_items={ "exp_res": complex_mix_expected3, "with_vec": True, - "primary_field": "id" + "pk_name": "id" } ) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py index 7ad326ee82..a545bc1241 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py @@ -169,7 +169,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): metrics = [] search_res_dict_array = [] search_res_dict_array_nq = [] - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vectors = cf.gen_vectors(nq, dim, vector_data_type) # get hybrid search req list for i in range(len(vector_name_list)): @@ -197,6 +197,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit})[0] ids = search_res[0].ids distance_array = search_res[0].distances @@ -216,7 +217,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 8. compare results through the re-calculated distances for k in range(len(score_answer_nq)): for i in range(len(score_answer_nq[k][:default_limit])): @@ -258,7 +260,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] @pytest.mark.tags(CaseLabel.L1) def test_hybrid_search_normal_expr(self): @@ -292,7 +295,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): # 5. hybrid search collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, check_task=CheckTasks.check_search_results, - check_items={"nq": nq, "ids": insert_ids, "limit": default_limit}) + check_items={"nq": nq, "ids": insert_ids, "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.skip(reason="issue 32288") @@ -410,14 +414,16 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] search_res = collection_w.search(vectors[:nq], search_field, default_search_params, default_limit, default_search_exp, check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 4. the effect of hybrid search to one field should equal to search log.info("The distance list is:\n") for i in range(nq): @@ -462,7 +468,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -519,7 +526,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -560,12 +568,14 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] hybrid_search_1 = collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] for i in range(nq): assert hybrid_search_0[i].ids == hybrid_search_1[i].ids assert hybrid_search_0[i].distances == hybrid_search_1[i].distances @@ -614,7 +624,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] for k in range(nq): id_list_nq[k].extend(search_res[k].ids) # 5. prepare hybrid search params @@ -672,7 +683,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -716,7 +728,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": min_dim})[0] + "limit": min_dim, + "pk_name": ct.default_int64_field_name})[0] id_list.extend(search_res[0].ids) # 4. hybrid search hybrid_search = collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit)[0] @@ -760,7 +773,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -802,7 +816,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -841,7 +856,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -880,7 +896,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -922,7 +939,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -961,7 +979,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip("issue: #29840") @@ -1000,7 +1019,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) # 5. hybrid search with two-dim list in WeightedRanker weights = [[random.random() for _ in range(1)] for _ in range(len(req_list))] # 4. hybrid search @@ -1008,7 +1028,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_hybrid_search_over_maximum_reqs_num(self): @@ -1089,7 +1110,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -1130,7 +1152,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids for j in range(len(ids)): search_res_dict[ids[j]] = 1 / (j + 60 + 1) @@ -1142,7 +1165,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 6. compare results through the re-calculated distances for i in range(len(score_answer[:default_limit])): assert score_answer[i] - hybrid_search_0[0].distances[i] < hybrid_search_epsilon @@ -1151,7 +1175,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] assert hybrid_search_0[0].ids == hybrid_search_1[0].ids assert hybrid_search_0[0].distances == hybrid_search_1[0].distances @@ -1198,7 +1223,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids for j in range(len(ids)): search_res_dict[ids[j]] = 1 / (j + k + 1) @@ -1211,7 +1237,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 6. compare results through the re-calculated distances for i in range(len(score_answer[:default_limit])): assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon @@ -1257,7 +1284,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 5. hybrid search with offset parameter req_list = [] for i in range(len(vector_name_list)): @@ -1274,7 +1302,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit - offset})[0] + "limit": default_limit - offset, + "pk_name": ct.default_int64_field_name})[0] assert hybrid_res_inside[0].distances[offset:] == hybrid_res[0].distances @@ -1336,7 +1365,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids for j in range(len(ids)): search_res_dict[ids[j]] = 1 / (j + k + 1) @@ -1348,7 +1378,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 6. compare results through the re-calculated distances for i in range(len(score_answer[:default_limit])): delta = math.fabs(score_answer[i] - hybrid_res[0].distances[i]) @@ -1396,7 +1427,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": limit})[0] + "limit": limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -1410,7 +1442,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": limit})[0] + "limit": limit, + "pk_name": ct.default_int64_field_name})[0] # 6. compare results through the re-calculated distances for i in range(len(score_answer[:limit])): delta = math.fabs(score_answer[i] - hybrid_res[0].distances[i]) @@ -1515,7 +1548,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): metrics = [] search_res_dict_array = [] search_res_dict_array_nq = [] - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vectors = cf.gen_vectors(nq, dim, vector_data_type) # get hybrid search req list for i in range(len(vector_name_list)): @@ -1543,7 +1576,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -1563,7 +1597,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 8. compare results through the re-calculated distances for k in range(len(score_answer_nq)): for i in range(len(score_answer_nq[k][:default_limit])): @@ -1596,7 +1631,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): metrics = [] search_res_dict_array = [] search_res_dict_array_nq = [] - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vectors = cf.gen_vectors(nq, dim, vector_data_type) # get hybrid search req list for i in range(len(vector_name_list)): @@ -1624,7 +1659,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -1646,7 +1682,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 8. compare results through the re-calculated distances for k in range(len(score_answer_nq)): for i in range(len(score_answer_nq[k][:default_limit])): @@ -1679,7 +1716,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): metrics = [] search_res_dict_array = [] search_res_dict_array_nq = [] - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vectors = cf.gen_vectors(nq, dim, vector_data_type) # get hybrid search req list for i in range(len(vector_name_list)): @@ -1707,7 +1744,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -1726,7 +1764,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 8. compare results through the re-calculated distances for k in range(len(score_answer_nq)): for i in range(len(score_answer_nq[k][:default_limit])): @@ -1786,6 +1825,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_items={"nq": 1, "ids": insert_ids, "limit": default_limit, + "pk_name": ct.default_int64_field_name, "_async": _async})[0] if _async: search_res.done() @@ -1809,7 +1849,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": default_limit, - "_async": _async})[0] + "_async": _async, + "pk_name": ct.default_int64_field_name})[0] if _async: hybrid_res.done() hybrid_res = hybrid_res.result() @@ -1880,7 +1921,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): metrics = [] search_res_dict_array = [] search_res_dict_array_nq = [] - vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, vector_data_type) + vectors = cf.gen_vectors(nq, default_dim, vector_data_type) # get hybrid search req list for i in range(len(vector_name_list)): @@ -1908,7 +1949,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] ids = search_res[0].ids distance_array = search_res[0].distances for j in range(len(ids)): @@ -1926,7 +1968,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 8. compare results through the re-calculated distances for k in range(len(score_answer_nq)): for i in range(len(score_answer_nq[k][:default_limit])): @@ -2050,7 +2093,8 @@ class TestCollectionHybridSearchValid(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "ids": insert_ids, - "limit": default_limit})[0] + "limit": default_limit, + "pk_name": ct.default_int64_field_name})[0] # 6. compare results through the re-calculated distances for i in range(len(score_answer[:default_limit])): delta = math.fabs(score_answer[i] - hybrid_res[0].distances[i]) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py index 937ee926d3..24a4a8aada 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py @@ -254,6 +254,7 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit})[0] log.info("test_range_search_normal: checking the distance of top 1") for hits in search_res: @@ -308,6 +309,7 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit}) # 4. range search with IP range_search_params = {"metric_type": "IP", @@ -376,6 +378,7 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit}) # 4. range search with IP range_search_params = {"metric_type": "IP", @@ -417,6 +420,7 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "_async": _async})[0] if _async: @@ -477,7 +481,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={ "nq": 1, "limit": limit, - "ids": list(distances_index_max) + "ids": list(distances_index_max), + "pk_name": ct.default_int64_field_name, }) @pytest.mark.tags(CaseLabel.L2) @@ -533,6 +538,7 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": limit, "_async": _async}) # 3. delete partitions @@ -559,7 +565,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids[:entity_num], "limit": limit - deleted_entity_num, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_collection_after_release_load(self, _async): @@ -599,7 +606,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_load_flush_load(self, _async): @@ -637,7 +645,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_new_data(self, nq): @@ -668,7 +677,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": nb_old}) + "limit": nb_old, + "pk_name": ct.default_int64_field_name}) # 3. insert new data nb_new = 300 _, _, _, insert_ids_new, time_stamp = cf.insert_data(collection_w, nb_new, dim=dim, @@ -685,7 +695,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": nb_old + nb_new}) + "limit": nb_old + nb_new, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_different_data_distribution_with_index(self, _async): @@ -723,7 +734,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip("not fixed yet") @@ -763,7 +775,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", range_search_supported_indexes) @@ -802,7 +815,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", range_search_supported_indexes) @@ -841,7 +855,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_index_one_partition(self, _async): @@ -881,7 +896,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids[par[0].num_entities:], "limit": limit_check, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"]) @@ -917,7 +933,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": 2, - "_async": _async})[0] + "_async": _async, + "pk_name": ct.default_int64_field_name})[0] if _async: res.done() res = res.result() @@ -952,7 +969,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": [], - "limit": 0}) + "limit": 0, + "pk_name": ct.default_int64_field_name}) # 5. range search search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10, "radius": 10, "range_filter": 2}} @@ -961,7 +979,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": [], - "limit": 0}) + "limit": 0, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"]) @@ -996,7 +1015,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": 2, - "_async": _async})[0] + "_async": _async, + "pk_name": ct.default_int64_field_name})[0] if _async: res.done() res = res.result() @@ -1031,7 +1051,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": [], - "limit": 0}) + "limit": 0, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip("tanimoto obsolete") @@ -1086,7 +1107,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": 1, "ids": insert_ids, "limit": limit, - "_async": _async})[0] + "_async": _async, + "pk_name": ct.default_int64_field_name})[0] if _async: res.done() res = res.result() @@ -1122,7 +1144,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": [], - "limit": 0}) + "limit": 0, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_binary_without_flush(self, metrics): @@ -1155,7 +1178,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) def test_range_search_with_expression(self, enable_dynamic_field): @@ -1202,7 +1226,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": min(nb, len(filter_ids)), - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) if _async: search_res.done() search_res = search_res.result() @@ -1221,7 +1246,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": min(nb, len(filter_ids)), - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) if _async: search_res.done() search_res = search_res.result() @@ -1255,7 +1281,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, - "_async": _async})[0] + "_async": _async, + "pk_name": ct.default_int64_field_name})[0] if _async: res.done() res = res.result() @@ -1290,7 +1317,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": default_limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) # 2. search with multi-processes log.info("test_range_search_concurrent_multi_threads: searching with %s processes" % threads_num) @@ -1408,7 +1436,7 @@ class TestCollectionRangeSearch(TestcaseBase): "ids": insert_ids, "limit": nb_old, "_async": _async, - }) + "pk_name": ct.default_int64_field_name}) kwargs = {} consistency_level = kwargs.get( @@ -1454,7 +1482,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": nb_old, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) nb_new = 400 _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, @@ -1472,7 +1501,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": nb_old + nb_new, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_with_consistency_eventually(self, nq, _async): @@ -1501,7 +1531,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": nb_old, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) nb_new = 400 _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, auto_id=auto_id, dim=dim, @@ -1542,7 +1573,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": nb_old, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) kwargs = {} consistency_level = kwargs.get( @@ -1562,7 +1594,8 @@ class TestCollectionRangeSearch(TestcaseBase): check_items={"nq": nq, "ids": insert_ids, "limit": nb_old + nb_new, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_range_search_sparse(self): diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_diskann.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_diskann.py index 0b1e6a3d44..77fa43be2d 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_diskann.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_diskann.py @@ -144,6 +144,7 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, + "pk_name": ct.default_int64_field_name, "_async": _async} ) @@ -177,7 +178,8 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_search_invalid_params_with_diskann_B(self): @@ -245,7 +247,8 @@ class TestSearchDiskann(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, - "limit": default_limit} + "limit": default_limit, + "pk_name": ct.default_int64_field_name} ) @pytest.mark.tags(CaseLabel.L2) @@ -294,7 +297,8 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": ids, "limit": default_limit, - "_async": _async} + "_async": _async, + "pk_name": ct.default_int64_field_name} ) @pytest.mark.tags(CaseLabel.L2) @@ -345,7 +349,8 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": ids, "limit": default_limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) def test_search_with_scalar_field(self, _async): @@ -390,7 +395,8 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": ids, "limit": limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("limit", [10, 100, 1000]) @@ -431,7 +437,8 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": limit, - "_async": _async} + "_async": _async, + "pk_name": ct.default_int64_field_name} ) @pytest.mark.tags(CaseLabel.L2) @@ -472,4 +479,5 @@ class TestSearchDiskann(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, - "_async": _async}) + "_async": _async, + "pk_name": ct.default_int64_field_name}) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py index ab7051fbec..4f7d26d47e 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py @@ -737,7 +737,7 @@ class TestCollectionSearchInvalid(TestcaseBase): % collection_w.name) # err_msg = "collection" + collection_w.name + "was not loaded into memory" err_msg = "collection not loaded" - vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, vector_data_type) + vectors = cf.gen_vectors(default_nq, default_dim, vector_data_type) collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, timeout=1, check_task=CheckTasks.err_res, diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_json.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_json.py index af633f9eda..2a0cdc3548 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_json.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_json.py @@ -178,7 +178,8 @@ class TestCollectionSearchJSON(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_search_json_nullable_load_before_insert(self, nq, is_flush, enable_dynamic_field): @@ -204,7 +205,8 @@ class TestCollectionSearchJSON(TestcaseBase): default_search_params, default_limit, check_task=CheckTasks.check_search_results, check_items={"nq": nq, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.skip(reason="issue 37113") @@ -233,7 +235,8 @@ class TestCollectionSearchJSON(TestcaseBase): default_search_params, default_limit, check_task=CheckTasks.check_search_results, check_items={"nq": nq, - "limit": default_limit}) + "limit": default_limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) def test_search_expression_json_contains(self, enable_dynamic_field): @@ -270,7 +273,8 @@ class TestCollectionSearchJSON(TestcaseBase): default_search_params, default_limit, expression, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, - "limit": 3}) + "limit": 3, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_search_expression_json_contains_list(self, auto_id): @@ -308,7 +312,8 @@ class TestCollectionSearchJSON(TestcaseBase): default_search_params, limit, expression, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, - "limit": limit}) + "limit": limit, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L2) def test_search_expression_json_contains_combined_with_normal(self, enable_dynamic_field): @@ -347,7 +352,8 @@ class TestCollectionSearchJSON(TestcaseBase): default_search_params, limit, expression, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, - "limit": limit // 2}) + "limit": limit // 2, + "pk_name": ct.default_int64_field_name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expr_prefix", ["array_contains", "ARRAY_CONTAINS"]) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py index a93a3f5d89..9925c7b808 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py @@ -165,7 +165,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): vector_data_type=vector_data_type, nullable_fields={ct.default_float_field_name: null_data_percent})[0:5] # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vectors = cf.gen_vectors(nq, dim, vector_data_type) # 3. search after insert collection_w.search(vectors[:nq], default_search_field, default_search_params, default_limit, @@ -176,6 +176,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "output_fields": [default_int64_field_name, default_float_field_name]}) @@ -233,6 +234,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": limit, + "pk_name": ct.default_int64_field_name, "_async": _async, "output_fields": [ct.default_string_field_name, ct.default_float_field_name]}) @@ -251,7 +253,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): vector_data_type=vector_data_type, default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5] # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vectors = cf.gen_vectors(nq, dim, vector_data_type) # 3. search after insert collection_w.search(vectors[:nq], default_search_field, default_search_params, default_limit, @@ -262,6 +264,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "output_fields": [default_int64_field_name, default_float_field_name]}) @@ -345,6 +348,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": limit, "_async": _async, "output_fields": output_fields}) @@ -365,7 +369,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): nullable_fields={ct.default_float_field_name: 1}, default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5] # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vectors = cf.gen_vectors(nq, dim, vector_data_type) # 3. search after insert collection_w.search(vectors[:nq], default_search_field, default_search_params, default_limit, @@ -376,6 +380,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "output_fields": [default_int64_field_name, default_float_field_name]}) @@ -410,6 +415,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "_async": _async, "output_fields": [ct.default_float_field_name, @@ -458,6 +464,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": limit, + "pk_name": ct.default_int64_field_name, "_async": _async, "output_fields": [ct.default_string_field_name, ct.default_float_field_name]}) @@ -503,7 +510,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): loaded_fields.append(default_float_field_name) collection_w.load(load_fields=loaded_fields) # 3. generate search data - vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim) + vectors = cf.gen_vectors(default_nq, default_dim) # 4. search after partial load field with None data output_fields = [default_int64_field_name, default_float_field_name] collection_w.search(vectors[:default_nq], default_search_field, @@ -513,6 +520,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "output_fields": output_fields}) @@ -536,7 +544,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): nullable_fields={ct.default_float_field_name: 0.5})[0:5] collection_name = collection_w.name # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim) + vectors = cf.gen_vectors(default_nq, default_dim) # 3. search with expr "nullableFid == 0" search_exp = f"{ct.default_float_field_name} == 0" output_fields = [default_int64_field_name, default_float_field_name] @@ -548,6 +556,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": 1, + "pk_name": ct.default_int64_field_name, "output_fields": output_fields}) # 4. drop collection collection_w.drop() diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py index 2486e341cb..31eb1d7ec3 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py @@ -83,7 +83,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): client = self._client() # Create collection - collection_schema = self.create_schema(client, enable_dynamic_field=self.enable_dynamic_field)[0] + collection_schema = self.create_schema(client)[0] collection_schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) collection_schema.add_field(self.float_vector_field_name, DataType.FLOAT_VECTOR, dim=128) collection_schema.add_field(self.bfloat16_vector_field_name, DataType.BFLOAT16_VECTOR, dim=200) @@ -92,7 +92,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): collection_schema.add_field(default_float_field_name, DataType.FLOAT) collection_schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=256) collection_schema.add_field(default_int64_field_name, DataType.INT64) - self.create_collection(client, self.collection_name, schema=collection_schema, force_teardown=False) + self.create_collection(client, self.collection_name, schema=collection_schema, + enable_dynamic_field=self.enable_dynamic_field, force_teardown=False) for partition_name in self.partition_names: self.create_partition(client, self.collection_name, partition_name=partition_name) @@ -167,7 +168,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): metric_type="JACCARD", index_type="BIN_IVF_FLAT", params={"nlist": 128}) - self.create_index(client, self.collection_name, index_params=index_params) + self.create_index(client, self.collection_name, index_params=index_params, timeout=300) # Load collection self.load_collection(client, self.collection_name) @@ -210,9 +211,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": default_nq, "limit": limit, - "metric": "COSINE", - "vector_nq": vectors_to_search[:default_nq], - "original_vectors": [self.datas[i][self.float_vector_field_name] for i in range(len(self.datas))] + "pk_name": default_primary_key_field_name, + "metric": "COSINE" } ) all_pages_results.append(search_res_with_offset) @@ -268,7 +268,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": default_nq, - "limit": limit + "limit": limit, + "pk_name": default_primary_key_field_name } ) all_pages_results.append(search_res_with_offset) @@ -325,7 +326,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": default_nq, - "limit": limit + "limit": limit, + "pk_name": default_primary_key_field_name } ) all_pages_results.append(search_res_with_offset) @@ -381,7 +383,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": default_nq, - "limit": limit + "limit": limit, + "pk_name": default_primary_key_field_name } ) all_pages_results.append(search_res_with_offset) @@ -435,7 +438,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): search_params=search_param, limit=limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": default_nq, - "limit": limit}) + "limit": limit, + "pk_name": default_primary_key_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("offset", [0, 100]) @@ -482,7 +486,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": default_nq, - "limit": limit} + "limit": limit, + "pk_name": default_primary_key_field_name} ) # 4. search with offset+limit @@ -523,7 +528,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": default_nq, - "limit": limit} + "limit": limit, + "pk_name": default_primary_key_field_name} ) # 7. search with offset+limit @@ -561,8 +567,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): collection_name = self.collection_name vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) # search with pagination in partition_1 - limit = 50 - pages = 10 + limit = 20 + pages = 5 for page in range(pages): offset = page * limit search_params = {"offset": offset} @@ -576,7 +582,9 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): limit=limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, - "nq": default_nq, "limit": limit}) + "nq": default_nq, + "limit": limit, + "pk_name": default_primary_key_field_name}) # assert every id in search_res_with_offset %3 ==1 for hits in search_res_with_offset: @@ -597,7 +605,9 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): limit=limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, - "nq": default_nq, "limit": limit}) + "nq": default_nq, + "limit": limit, + "pk_name": default_primary_key_field_name}) # assert every id in search_res_with_offset %3 ==1 or ==2 for hits in search_res_with_offset: @@ -623,7 +633,9 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): search_params=search_params, limit=default_limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, - "nq": default_nq, "limit": default_limit}) + "nq": default_nq, + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # search with offset = 0 offset = 0 search_params = {"offset": offset} @@ -632,7 +644,9 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): search_params=search_params, limit=default_limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, - "nq": default_nq, "limit": default_limit}) + "nq": default_nq, + "limit": default_limit, + "pk_name": default_primary_key_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("offset", [0, 20, 100, 200]) @@ -655,7 +669,9 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): limit=limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, - "nq": default_nq, "limit": limit}) + "nq": default_nq, + "limit": limit, + "pk_name": default_primary_key_field_name}) # 2. search with offset in search search_params = {} @@ -666,7 +682,9 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): limit=limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, - "nq": default_nq, "limit": limit}) + "nq": default_nq, + "limit": limit, + "pk_name": default_primary_key_field_name}) # 3. compare results assert res1 == res2 @@ -769,7 +787,7 @@ class TestSearchPaginationIndependent(TestMilvusClientV2Base): "nq": default_nq, "limit": limit, "metric": metric_type, - } + "pk_name": default_primary_key_field_name} ) all_pages_results.append(search_res_with_offset) @@ -805,6 +823,7 @@ class TestSearchPaginationIndependent(TestMilvusClientV2Base): ****************************************************************** """ @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.GPU) @pytest.mark.parametrize('vector_dtype', ct.all_dense_vector_types) @pytest.mark.parametrize('index', ct.all_index_types[:7]) @pytest.mark.parametrize('metric_type', ct.dense_metrics) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_string.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_string.py index 16fe83e19a..953f8dd194 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_string.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_string.py @@ -141,6 +141,7 @@ class TestSearchString(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": default_int64_field_name, "limit": 1, "_async": _async}) if _async: @@ -177,6 +178,7 @@ class TestSearchString(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": default_int64_field_name, "limit": 1, "_async": _async}) if _async: @@ -216,6 +218,7 @@ class TestSearchString(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": ct.default_string_field_name, "limit": default_limit, "_async": _async}) @@ -259,6 +262,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, + "pk_name": ct.default_string_field_name, "_async": _async}) @pytest.mark.tags(CaseLabel.L2) @@ -291,6 +295,7 @@ class TestSearchString(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": default_int64_field_name, "limit": default_limit, "_async": _async}) @@ -363,6 +368,7 @@ class TestSearchString(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": default_int64_field_name, "limit": min(nb, len(filter_ids)), "_async": _async}) if _async: @@ -404,6 +410,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": 2, + "pk_name": ct.default_string_field_name, "_async": _async}) @pytest.mark.tags(CaseLabel.L2) @@ -436,6 +443,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": 2, + "pk_name": default_int64_field_name, "_async": _async}) @pytest.mark.tags(CaseLabel.L2) @@ -472,6 +480,7 @@ class TestSearchString(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": default_int64_field_name, "limit": default_limit, "_async": _async}) @@ -512,6 +521,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": 1, + "pk_name": default_int64_field_name, "_async": _async} ) @@ -552,6 +562,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": 1, + "pk_name": default_int64_field_name, "_async": _async} ) @@ -597,6 +608,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": default_limit, + "pk_name": ct.default_string_field_name, "_async": _async}) @pytest.mark.tags(CaseLabel.L2) @@ -683,6 +695,7 @@ class TestSearchString(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": default_int64_field_name, "limit": default_limit, "_async": _async}) @@ -722,6 +735,7 @@ class TestSearchString(TestcaseBase): check_items={"nq": default_nq, "ids": insert_ids, "limit": 1, + "pk_name": default_int64_field_name, "_async": _async}) if _async: res.done() diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py index 8ab4bdb39e..268efae512 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py @@ -108,462 +108,6 @@ class TestSearchBase(TestcaseBase): def _async(self, request): yield request.param - @pytest.mark.tags(CaseLabel.L2) - def test_search_flat_top_k(self, get_nq): - """ - target: test basic search function, all the search params is correct, change top-k value - method: search with the given vectors, check the result - expected: the length of the result is top_k - """ - top_k = 16385 # max top k is 16384 - nq = get_nq - collection_w, data, _, insert_ids = self.init_collection_general(prefix, insert_data=True, nb=nq)[0:4] - collection_w.load() - if top_k <= max_top_k: - res, _ = collection_w.search(vectors[:nq], default_search_field, default_search_params, top_k) - assert len(res[0]) <= top_k - else: - collection_w.search(vectors[:nq], default_search_field, default_search_params, top_k, - check_task=CheckTasks.err_res, - check_items={"err_code": 65535, - "err_msg": f"topk [{top_k}] is invalid, it should be in range" - f" [1, 16384], but got {top_k}"}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_index_empty_partition(self, index): - """ - target: test basic search function, all the search params are correct, test all index params, and build - method: add vectors into collection, search with the given vectors, check the result - expected: the length of the result is top_k, search collection with partition tag return empty - """ - top_k = ct.default_top_k - nq = ct.default_nq - dim = ct.default_dim - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nq, - partition_num=1, - dim=dim, is_index=False)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create partition - partition_name = "search_partition_empty" - collection_w.create_partition(partition_name=partition_name, description="search partition empty") - par = collection_w.partitions - # collection_w.load() - # 3. create different index - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - - # 4. search - res, _ = collection_w.search(vectors[:nq], default_search_field, - default_search_params, top_k, - default_search_exp) - - assert len(res[0]) <= top_k - - collection_w.search(vectors[:nq], default_search_field, - default_search_params, top_k, - default_search_exp, [partition_name], - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": [], - "limit": 0}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_index_partitions(self, index, get_top_k): - """ - target: test basic search function, all the search params are correct, test all index params, and build - method: search collection with the given vectors and tags, check the result - expected: the length of the result is top_k - """ - top_k = get_top_k - nq = ct.default_nq - dim = ct.default_dim - # 1. initialize with data in 2 partitions - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, - partition_num=1, - dim=dim, is_index=False)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create different index - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) - - # 3. load and search - collection_w.load() - par = collection_w.partitions - collection_w.search(vectors[:nq], default_search_field, - ct.default_search_params, top_k, - default_search_exp, [par[0].name, par[1].name], - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "limit": top_k, - "ids": insert_ids}) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_ip_flat(self, get_top_k): - """ - target: test basic search function, all the search params are correct, change top-k value - method: search with the given vectors, check the result - expected: the length of the result is top_k - """ - top_k = get_top_k - nq = ct.default_nq - dim = ct.default_dim - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nq, - dim=dim, is_index=False)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create ip index - default_index = {"index_type": "IVF_FLAT", - "params": {"nlist": 128}, "metric_type": "IP"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - search_params = {"metric_type": "IP", "params": {"nprobe": 10}} - res, _ = collection_w.search(vectors[:nq], default_search_field, - search_params, top_k, - default_search_exp) - assert len(res[0]) <= top_k - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_ip_after_index(self, index): - """ - target: test basic search function, all the search params are correct, test all index params, and build - method: search with the given vectors, check the result - expected: the length of the result is top_k - """ - top_k = ct.default_top_k - nq = ct.default_nq - dim = ct.default_dim - - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nq, - dim=dim, is_index=False)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create ip index - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "IP"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - search_params = {"metric_type": "IP", "params": {"nprobe": 10}} - res, _ = collection_w.search(vectors[:nq], default_search_field, - search_params, top_k, - default_search_exp) - assert len(res[0]) <= top_k - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("dim", [2, 128, 768]) - @pytest.mark.parametrize("nb", [1, 2, 10, 100]) - def test_search_ip_brute_force(self, nb, dim): - """ - target: https://github.com/milvus-io/milvus/issues/17378. Ensure the logic of IP distances won't be changed. - method: search with the given vectors, check the result - expected: The inner product of vector themselves should be positive. - """ - top_k = 1 - - # 1. initialize with data - collection_w, insert_entities, _, insert_ids, _ = \ - self.init_collection_general(prefix, True, nb, is_binary=False, - is_index=False, dim=dim)[0:5] - flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "IP"} - collection_w.create_index(ct.default_float_vec_field_name, flat_index) - insert_vectors = insert_entities[0][default_search_field].tolist() - - # 2. load collection. - collection_w.load() - - # 3. search and then check if the distances are expected. - res, _ = collection_w.search(insert_vectors[:nb], default_search_field, - ct.default_search_ip_params, top_k, - default_search_exp) - for i, v in enumerate(insert_vectors): - assert len(res[i]) == 1 - ref = ip(v, v) - got = res[i][0].distance - assert abs(got - ref) <= epsilon - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_ip_index_empty_partition(self, index): - """ - target: test basic search function, all the search params are correct, test all index params, and build - method: add vectors into collection, search with the given vectors, check the result - expected: the length of the result is top_k, search collection with partition tag return empty - """ - top_k = ct.default_top_k - nq = ct.default_nq - dim = ct.default_dim - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nq, - partition_num=1, - dim=dim, is_index=False)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create partition - partition_name = "search_partition_empty" - collection_w.create_partition(partition_name=partition_name, description="search partition empty") - par = collection_w.partitions - # 3. create different index - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "IP"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - - # 4. search - search_params = {"metric_type": "IP", "params": {"nprobe": 10}} - res, _ = collection_w.search(vectors[:nq], default_search_field, - search_params, top_k, - default_search_exp) - - assert len(res[0]) <= top_k - - collection_w.search(vectors[:nq], default_search_field, - search_params, top_k, - default_search_exp, [partition_name], - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": [], - "limit": 0}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_ip_index_partitions(self, index): - """ - target: test basic search function, all the search params are correct, test all index params, and build - method: search collection with the given vectors and tags, check the result - expected: the length of the result is top_k - """ - top_k = ct.default_top_k - nq = ct.default_nq - dim = ct.default_dim - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nq, - partition_num=1, - dim=dim, is_index=False)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create partition - par_name = collection_w.partitions[0].name - # 3. create different index - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "IP"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - - # 4. search - search_params = {"metric_type": "IP", "params": {"nprobe": 10}} - res, _ = collection_w.search(vectors[:nq], default_search_field, - search_params, top_k, - default_search_exp, [par_name]) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_cosine_all_indexes(self, index): - """ - target: test basic search function, all the search params are correct, test all index params, and build - method: search collection with the given vectors and tags, check the result - expected: the length of the result is top_k - """ - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, - is_index=False)[0:5] - # 2. create index - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - - # 3. search - search_params = {"metric_type": "COSINE"} - res, _ = collection_w.search(vectors[:default_nq], default_search_field, - search_params, default_limit, default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit}) - - # 4. check cosine distance - for i in range(default_nq): - for distance in res[i].distances: - assert 1 >= distance >= -1 - - @pytest.mark.tags(CaseLabel.L2) - def test_search_cosine_results_same_as_l2(self): - """ - target: test search results of l2 and cosine keep the same - method: 1. search L2 - 2. search cosine - 3. compare the results - expected: raise no exception - """ - nb = ct.default_nb - # 1. prepare original data and normalized data - original_vec = [[random.random() for _ in range(ct.default_dim)] for _ in range(nb)] - normalize_vec = preprocessing.normalize(original_vec, axis=1, norm='l2') - normalize_vec = normalize_vec.tolist() - data = cf.gen_default_dataframe_data() - - # 2. create L2 collection and insert normalized data - collection_w1 = self.init_collection_general(prefix, is_index=False)[0] - data[ct.default_float_vec_field_name] = normalize_vec - collection_w1.insert(data) - - # 2. create index L2 - default_index = {"index_type": "IVF_SQ8", "params": {"nlist": 64}, "metric_type": "L2"} - collection_w1.create_index("float_vector", default_index) - collection_w1.load() - - # 3. search L2 - search_params = {"params": {"nprobe": 10}, "metric_type": "L2"} - res_l2, _ = collection_w1.search(vectors[:default_nq], default_search_field, - search_params, default_limit, default_search_exp) - - # 4. create cosine collection and insert original data - collection_w2 = self.init_collection_general(prefix, is_index=False)[0] - data[ct.default_float_vec_field_name] = original_vec - collection_w2.insert(data) - - # 5. create index cosine - default_index = {"index_type": "IVF_SQ8", "params": {"nlist": 64}, "metric_type": "COSINE"} - collection_w2.create_index("float_vector", default_index) - collection_w2.load() - - # 6. search cosine - search_params = {"params": {"nprobe": 10}, "metric_type": "COSINE"} - res_cosine, _ = collection_w2.search(vectors[:default_nq], default_search_field, - search_params, default_limit, default_search_exp) - - # 7. check the search results - for i in range(default_nq): - assert res_l2[i].ids == res_cosine[i].ids - - @pytest.mark.tags(CaseLabel.L2) - def test_search_cosine_results_same_as_ip(self): - """ - target: test search results of ip and cosine keep the same - method: 1. search IP - 2. search cosine - 3. compare the results - expected: raise no exception - """ - # 1. create collection and insert data - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - - # 2. search IP - default_index = {"index_type": "IVF_SQ8", "params": {"nlist": 64}, "metric_type": "IP"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - search_params = {"params": {"nprobe": 10}, "metric_type": "IP"} - res_ip, _ = collection_w.search(vectors[:default_nq], default_search_field, - search_params, default_limit, default_search_exp) - - # 3. search cosine - collection_w.release() - collection_w.drop_index() - default_index = {"index_type": "IVF_SQ8", "params": {"nlist": 64}, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - search_params = {"params": {"nprobe": 10}, "metric_type": "COSINE"} - res_cosine, _ = collection_w.search(vectors[:default_nq], default_search_field, - search_params, default_limit, default_search_exp) - - # 4. check the search results - for i in range(default_nq): - assert res_ip[i].ids == res_cosine[i].ids - log.info(res_cosine[i].distances) - log.info(res_ip[i].distances) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_without_connect(self): - """ - target: test search vectors without connection - method: use disconnected instance, call search method and check if search successfully - expected: raise exception - """ - self._connect() - - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, - ct.default_nq)[0:5] - vectors = [[random.random() for _ in range(ct.default_dim)] - for _ in range(nq)] - - collection_w.load() - self.connection_wrap.remove_connection(ct.default_alias) - res_list, _ = self.connection_wrap.list_connections() - assert ct.default_alias not in res_list - - res, _ = collection_w.search(vectors[:nq], default_search_field, - ct.default_search_params, ct.default_top_k, - default_search_exp, - check_task=CheckTasks.err_res, - check_items={"err_code": 1, - "err_msg": "should create connection first"}) - - @pytest.mark.tags(CaseLabel.L2) - # @pytest.mark.timeout(300) - def test_search_concurrent_multithreads_single_connection(self, _async): - """ - target: test concurrent search with multi processes - method: search with 10 processes, each process uses dependent connection - expected: status ok and the returned vectors should be query_records - """ - threads_num = 10 - threads = [] - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general( - prefix, True, ct.default_nb)[0:5] - - def search(collection_w): - vectors = [[random.random() for _ in range(ct.default_dim)] - for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - # 2. search with multi-processes - log.info("test_search_concurrent_multithreads_single_connection: searching with %s processes" % threads_num) - for i in range(threads_num): - t = threading.Thread(target=search, args=(collection_w,)) - threads.append(t) - t.start() - time.sleep(0.2) - for t in threads: - t.join() - - @pytest.mark.tags(CaseLabel.L2) - def test_search_multi_collections(self): - """ - target: test search multi collections of L2 - method: add vectors into 10 collections, and search - expected: search status ok, the length of result - """ - num = 10 - top_k = 10 - nq = 20 - - for i in range(num): - collection = gen_unique_str(uid + str(i)) - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general( - collection, True, ct.default_nb)[0:5] - assert len(insert_ids) == default_nb - vectors = [[random.random() for _ in range(ct.default_dim)] - for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, - default_search_params, top_k, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": top_k}) - @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index", ct.all_index_types[:6]) def test_each_index_with_mmap_enabled_search(self, index): @@ -694,505 +238,6 @@ class TestCollectionSearch(TestcaseBase): ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L0) - def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type): - """ - target: test search normal case - method: create connection, collection, insert and search - expected: 1. search successfully with limit(topK) - """ - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, - enable_dynamic_field=enable_dynamic_field, - vector_data_type=vector_data_type)[0:5] - # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) - # 3. search after insert - collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, - guarantee_timestamp=0, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit}) - - @pytest.mark.tags(CaseLabel.L0) - def test_search_normal_without_specify_metric_type(self): - """ - target: test search without specify metric type - method: create connection, collection, insert and search - expected: 1. search successfully with limit(topK) - """ - nq = 2 - dim = 32 - auto_id = True - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general( - prefix, True, auto_id=auto_id, dim=dim, is_flush=True)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - search_params = {"params": {"nprobe": 10}} - # 2. search after insert - collection_w.search(vectors[:nq], default_search_field, - search_params, default_limit, - default_search_exp, - guarantee_timestamp=0, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit}) - - @pytest.mark.tags(CaseLabel.L1) - def test_search_normal_without_specify_anns_field(self): - """ - target: test search normal case - method: create connection, collection, insert and search - expected: 1. search successfully with limit(topK) - """ - nq = 2 - dim = 32 - auto_id = True - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general( - prefix, True, auto_id=auto_id, dim=dim, is_flush=True)[0:5] - # 2. search after insert - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - collection_w.search(vectors[:nq], "", - default_search_params, default_limit, - default_search_exp, - guarantee_timestamp=0, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit}) - - @pytest.mark.tags(CaseLabel.L0) - def test_search_with_hit_vectors(self, nq): - """ - target: test search with vectors in collections - method: create connections,collection insert and search vectors in collections - expected: search successfully with limit(topK) and can be hit at top 1 (min distance is 0) - """ - dim = 64 - auto_id = False - enable_dynamic_field = True - collection_w, _vectors, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:4] - # get vectors that inserted into collection - vectors = [] - if enable_dynamic_field: - for vector in _vectors[0]: - vector = vector[ct.default_float_vec_field_name] - vectors.append(vector) - else: - vectors = np.array(_vectors[0]).tolist() - vectors = [vectors[i][-1] for i in range(nq)] - log.info("test_search_with_hit_vectors: searching collection %s" % - collection_w.name) - search_res, _ = collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit}) - log.info("test_search_with_hit_vectors: checking the distance of top 1") - for hits in search_res: - # verify that top 1 hit is itself,so min distance is 0 - assert 1.0 - hits.distances[0] <= epsilon - - @pytest.mark.tags(CaseLabel.L2) - def test_search_multi_vector_fields(self, nq, is_flush, vector_data_type): - """ - target: test search normal case - method: create connection, collection, insert and search - expected: 1. search successfully with limit(topK) - """ - # 1. initialize with data - dim = 64 - auto_id = True - enable_dynamic_field = False - multiple_dim_array = [dim, dim] - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, - enable_dynamic_field=enable_dynamic_field, - multiple_dim_array=multiple_dim_array, - vector_data_type=vector_data_type, - nullable_fields={ct.default_string_field_name: 1}, - default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5] - # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) - vector_name_list = cf.extract_vector_field_name_list(collection_w) - vector_name_list.append(default_search_field) - # 3. search after insert - for search_field in vector_name_list: - collection_w.search(vectors[:nq], search_field, - default_search_params, default_limit, - default_search_exp, - output_fields=[ct.default_float_field_name, ct.default_string_field_name], - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "output_fields": [ct.default_float_field_name, - ct.default_string_field_name]}) - - @pytest.mark.tags(CaseLabel.L1) - def test_search_random_primary_key(self, random_primary_key): - """ - target: test search for collection with random primary keys - method: create connection, collection, insert and search - expected: Search without errors and data consistency - """ - # 1. initialize collection with random primary key - collection_w, _vectors, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, 10, random_primary_key=random_primary_key, - language="Russian")[0:5] - # 2. search - log.info("test_search_random_primary_key: searching collection %s" % collection_w.name) - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, - default_search_exp, - output_fields=[default_int64_field_name, - default_float_field_name, - default_json_field_name], - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": 10, - "original_entities": _vectors, - "output_fields": [default_int64_field_name, - default_float_field_name, - default_json_field_name]}) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("dup_times", [1, 2, 3]) - def test_search_with_dup_primary_key(self, _async, dup_times): - """ - target: test search with duplicate primary key - method: 1.insert same data twice - 2.search - expected: search results are de-duplicated - """ - # initialize with data - nb = ct.default_nb - nq = ct.default_nq - dim = 128 - auto_id = True - collection_w, insert_data, _, insert_ids = self.init_collection_general(prefix, True, nb, - auto_id=auto_id, - dim=dim)[0:4] - # insert dup data multi times - for i in range(dup_times): - insert_res, _ = collection_w.insert(insert_data[0]) - insert_ids.extend(insert_res.primary_keys) - # search - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - search_res, _ = collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - if _async: - search_res.done() - search_res = search_res.result() - # assert that search results are de-duplicated - for hits in search_res: - ids = hits.ids - assert sorted(list(set(ids))) == sorted(ids) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("search_params", [{}, {"metric_type": "COSINE"}]) - def test_search_with_default_search_params(self, _async, search_params): - """ - target: test search with default search params - method: search with default search params - expected: search successfully - """ - # initialize with data - collection_w, insert_data, _, insert_ids = self.init_collection_general(prefix, True)[0:4] - # search - collection_w.search(vectors[:nq], default_search_field, - search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) - def test_accurate_search_with_multi_segments(self): - """ - target: search collection with multi segments accurately - method: insert and flush twice - expect: result pk should be [19,9,18] - """ - # 1. create a collection, insert data and flush - nb = 10 - dim = 64 - collection_w = self.init_collection_general( - prefix, True, nb, dim=dim, is_index=False)[0] - - # 2. insert data and flush again for two segments - data = cf.gen_default_dataframe_data(nb=nb, dim=dim, start=nb) - collection_w.insert(data) - collection_w.flush() - - # 3. create index and load - collection_w.create_index( - ct.default_float_vec_field_name, index_params=ct.default_flat_index) - collection_w.load() - - # 4. get inserted original data - inserted_vectors = collection_w.query(expr="int64 >= 0", output_fields=[ - ct.default_float_vec_field_name]) - original_vectors = [] - for single in inserted_vectors[0]: - single_vector = single[ct.default_float_vec_field_name] - original_vectors.append(single_vector) - - # 5. Calculate the searched ids - limit = 2 * nb - vectors = [[random.random() for _ in range(dim)] for _ in range(1)] - distances = [] - for original_vector in original_vectors: - distance = cf.cosine(vectors, original_vector) - distances.append(distance) - distances_max = heapq.nlargest(limit, distances) - distances_index_max = map(distances.index, distances_max) - - # 6. search - collection_w.search(vectors, default_search_field, - default_search_params, limit, - check_task=CheckTasks.check_search_results, - check_items={ - "nq": 1, - "limit": limit, - "ids": list(distances_index_max) - }) - - @pytest.mark.tags(CaseLabel.L1) - def test_search_with_empty_vectors(self, _async): - """ - target: test search with empty query vector - method: search using empty query vector - expected: search successfully with 0 results - """ - # 1. initialize without data - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w = self.init_collection_general(prefix, True, - auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0] - # 2. search collection without data - log.info("test_search_with_empty_vectors: Searching collection %s " - "using empty vector" % collection_w.name) - collection_w.search([], default_search_field, default_search_params, - default_limit, default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": 0, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_with_ndarray(self, _async): - """ - target: test search with ndarray - method: search using ndarray data - expected: search successfully - """ - # 1. initialize without data - dim = 64 - auto_id = True - enable_dynamic_field = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - auto_id=auto_id, - dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:4] - # 2. search collection without data - log.info("test_search_with_ndarray: Searching collection %s " - "using ndarray" % collection_w.name) - vectors = np.random.randn(default_nq, dim) - collection_w.search(vectors, default_search_field, default_search_params, - default_limit, default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("search_params", [{}, {"params": {}}, {"params": {"nprobe": 10}}]) - def test_search_normal_default_params(self, search_params, _async): - """ - target: test search normal case - method: create connection, collection, insert and search - expected: search successfully with limit(topK) - """ - # 1. initialize with data - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w, _, _, insert_ids = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:4] - # 2. rename collection - new_collection_name = cf.gen_unique_str(prefix + "new") - self.utility_wrap.rename_collection( - collection_w.name, new_collection_name) - collection_w = self.init_collection_general(auto_id=auto_id, dim=dim, name=new_collection_name, - enable_dynamic_field=enable_dynamic_field)[0] - # 3. search - log.info("test_search_normal_default_params: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - collection_w.search(vectors[:default_nq], default_search_field, - search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="partition load and release constraints") - def test_search_before_after_delete(self, nq, _async): - """ - target: test search function before and after deletion - method: 1. search the collection - 2. delete a partition - 3. search the collection - expected: the deleted entities should not be searched - """ - # 1. initialize with data - dim = 64 - auto_id = False - nb = 1000 - limit = 1000 - partition_num = 1 - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - partition_num, - auto_id=auto_id, dim=dim)[0:4] - # 2. search all the partitions before partition deletion - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - log.info( - "test_search_before_after_delete: searching before deleting partitions") - collection_w.search(vectors[:nq], default_search_field, - default_search_params, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": limit, - "_async": _async}) - # 3. delete partitions - log.info("test_search_before_after_delete: deleting a partition") - par = collection_w.partitions - deleted_entity_num = par[partition_num].num_entities - print(deleted_entity_num) - entity_num = nb - deleted_entity_num - collection_w.drop_partition(par[partition_num].name) - log.info("test_search_before_after_delete: deleted a partition") - collection_w.create_index( - ct.default_float_vec_field_name, index_params=ct.default_flat_index) - collection_w.load() - # 4. search non-deleted part after delete partitions - log.info( - "test_search_before_after_delete: searching after deleting partitions") - collection_w.search(vectors[:nq], default_search_field, - default_search_params, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids[:entity_num], - "limit": limit - deleted_entity_num, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) - def test_search_collection_after_release_load(self, nq, _async): - """ - target: search the pre-released collection after load - method: 1. create collection - 2. release collection - 3. load collection - 4. search the pre-released collection - expected: search successfully - """ - # 1. initialize without data - nb = 2000 - dim = 64 - auto_id = True - enable_dynamic_field = True - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, nb, 1, auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. release collection - log.info("test_search_collection_after_release_load: releasing collection %s" % - collection_w.name) - collection_w.release() - log.info("test_search_collection_after_release_load: released collection %s" % - collection_w.name) - # 3. Search the pre-released collection after load - log.info("test_search_collection_after_release_load: loading collection %s" % - collection_w.name) - collection_w.load() - log.info("test_search_collection_after_release_load: searching after load") - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, default_search_params, - default_limit, default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) - def test_search_load_flush_load(self, nq, _async): - """ - target: test search when load before flush - method: 1. insert data and load - 2. flush, and load - 3. search the collection - expected: search success with limit(topK) - """ - # 1. initialize with data - nb = 1000 - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w = self.init_collection_general(prefix, auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0] - # 2. insert data - insert_ids = cf.insert_data(collection_w, nb, auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[3] - # 3. load data - collection_w.create_index( - ct.default_float_vec_field_name, index_params=ct.default_flat_index) - collection_w.load() - # 4. flush and load - collection_w.num_entities - collection_w.load() - # 5. search - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - @pytest.mark.skip("enable this later using session/strong consistency") @pytest.mark.tags(CaseLabel.L1) def test_search_new_data(self, nq, _async): @@ -1242,118 +287,6 @@ class TestCollectionSearch(TestcaseBase): "limit": nb_old + nb_new, "_async": _async}) - @pytest.mark.tags(CaseLabel.L1) - def test_search_different_data_distribution_with_index(self, auto_id, _async): - """ - target: test search different data distribution with index - method: 1. connect milvus - 2. create a collection - 3. insert data - 4. create an index - 5. Load and search - expected: Search successfully - """ - # 1. connect, create collection and insert data - dim = 64 - self._connect() - collection_w = self.init_collection_general( - prefix, False, dim=dim, is_index=False)[0] - dataframe = cf.gen_default_dataframe_data(dim=dim, start=-1500) - collection_w.insert(dataframe) - - # 2. create index - index_param = {"index_type": "IVF_FLAT", - "metric_type": "COSINE", "params": {"nlist": 100}} - collection_w.create_index("float_vector", index_param) - - # 3. load and search - collection_w.load() - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_max_dim(self, _async): - """ - target: test search with max configuration - method: create connection, collection, insert and search with max dim - expected: search successfully with limit(topK) - """ - # 1. initialize with data - auto_id = True - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 100, - auto_id=auto_id, - dim=max_dim)[0:4] - # 2. search - nq = 2 - log.info("test_search_max_dim: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(max_dim)] - for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, - default_search_params, nq, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nq, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) - def test_search_min_dim(self, _async): - """ - target: test search with min configuration - method: create connection, collection, insert and search with dim=1 - expected: search successfully - """ - # 1. initialize with data - auto_id = True - enable_dynamic_field = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 100, - auto_id=auto_id, dim=min_dim, - enable_dynamic_field=enable_dynamic_field)[0:4] - # 2. search - nq = 2 - log.info("test_search_min_dim: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(min_dim)] - for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, - default_search_params, nq, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nq, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("nq", [1, 20, 100, 8000, 16384]) - def test_search_different_nq(self, nq): - """ - target: test search with different nq - method: create collection, insert, load and search with different nq ∈ [1, 16384] - expected: search successfully with different nq - """ - collection_w, _, _, insert_ids = self.init_collection_general( - prefix, True, nb=20000)[0:4] - log.info("test_search_max_nq: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit}) - @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("shards_num", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num]) def test_search_with_non_default_shard_nums(self, shards_num, _async): @@ -1392,40 +325,7 @@ class TestCollectionSearch(TestcaseBase): "limit": default_limit, "_async": _async}) - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("M", [4, 64]) - @pytest.mark.parametrize("efConstruction", [8, 512]) - def test_search_HNSW_index_with_max_ef(self, M, efConstruction, _async): - """ - target: test search HNSW index with max ef - method: connect milvus, create collection , insert, create index, load and search - expected: search successfully - """ - dim = M * 4 - auto_id = True - enable_dynamic_field = False - self._connect() - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - HNSW_index_params = {"M": M, "efConstruction": efConstruction} - HNSW_index = {"index_type": "HNSW", - "params": HNSW_index_params, "metric_type": "L2"} - collection_w.create_index("float_vector", HNSW_index) - collection_w.load() - search_param = {"metric_type": "L2", "params": {"ef": 32768}} - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("M", [4, 64]) @pytest.mark.parametrize("efConstruction", [8, 512]) def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, _async): @@ -1461,7 +361,7 @@ class TestCollectionSearch(TestcaseBase): "limit": default_limit, "_async": _async}) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("M", [4, 64]) @pytest.mark.parametrize("efConstruction", [8, 512]) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -1497,886 +397,6 @@ class TestCollectionSearch(TestcaseBase): "limit": limit, "_async": _async}) - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_after_different_index_with_params(self, index, _async, scalar_index): - """ - target: test search after different index - method: test search after different index and corresponding search params - expected: search successfully with limit(topK) - """ - # 1. initialize with data - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, 5000, partition_num=1, is_all_data_type=True, - auto_id=auto_id, dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. create index on vector field and load - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} - vector_name_list = cf.extract_vector_field_name_list(collection_w) - vector_name_list.append(ct.default_float_vec_field_name) - for vector_name in vector_name_list: - collection_w.create_index(vector_name, default_index) - # 3. create index on scalar field - scalar_index_params = {"index_type": scalar_index, "params": {}} - collection_w.create_index(ct.default_int64_field_name, scalar_index_params) - collection_w.load() - # 4. search - search_params = cf.gen_search_param(index, "COSINE") - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - limit = default_limit - if index == "HNSW": - limit = search_param["params"]["ef"] - if limit > max_limit: - limit = default_nb - if index == "DISKANN": - limit = search_param["params"]["search_list"] - collection_w.search(vectors[:default_nq], default_search_field, - search_param, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.skip(reason="waiting for the address of bf16 data generation slow problem") - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_after_different_index_with_params_all_vector_type_multiple_vectors(self, index, - _async, - scalar_index): - """ - target: test search after different index - method: test search after different index and corresponding search params - expected: search successfully with limit(topK) - """ - auto_id = False - enable_dynamic_field = False - if index == "DISKANN": - pytest.skip("https://github.com/milvus-io/milvus/issues/30793") - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp =\ - self.init_collection_general(prefix, True, 5000, partition_num=1, is_all_data_type=True, - auto_id=auto_id, dim=default_dim, is_index=False, - enable_dynamic_field=enable_dynamic_field, - multiple_dim_array=[default_dim, default_dim])[0:5] - # 2. create index on vector field and load - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} - vector_name_list = cf.extract_vector_field_name_list(collection_w) - for vector_name in vector_name_list: - collection_w.create_index(vector_name, default_index) - # 3. create index on scalar field - scalar_index_params = {"index_type": scalar_index, "params": {}} - collection_w.create_index(ct.default_int64_field_name, scalar_index_params) - collection_w.load() - # 4. search - search_params = cf.gen_search_param(index, "COSINE") - vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - limit = default_limit - if index == "HNSW": - limit = search_param["params"]["ef"] - if limit > max_limit: - limit = default_nb - if index == "DISKANN": - limit = search_param["params"]["search_list"] - collection_w.search(vectors[:default_nq], vector_name_list[0], - search_param, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index", ct.all_index_types[9:11]) - def test_search_after_different_index_with_params_gpu(self, index, _async): - """ - target: test search after different index - method: test search after different index and corresponding search params - expected: search successfully with limit(topK) - """ - # 1. initialize with data - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False, vector_data_type=DataType.SPARSE_FLOAT_VECTOR, - enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. create index and load - params = cf.get_index_params_params(index) - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 - default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index("sparse_vector", default_index) - collection_w.load() - # 3. search - search_params = cf.gen_search_param(index) - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("search_params", cf.gen_autoindex_search_params()) - @pytest.mark.skip("issue #24533 #24555") - def test_search_default_search_params_fit_for_autoindex(self, search_params, _async): - """ - target: test search using autoindex - method: test search using autoindex and its corresponding search params - expected: search successfully - """ - # 1. initialize with data - auto_id = True - collection_w = self.init_collection_general( - prefix, True, auto_id=auto_id, is_index=False)[0] - # 2. create index and load - collection_w.create_index("float_vector", {}) - collection_w.load() - # 3. search - log.info("Searching with search params: {}".format(search_params)) - collection_w.search(vectors[:default_nq], default_search_field, - search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.skip("issue #27252") - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_after_different_index_with_min_dim(self, index, _async): - """ - target: test search after different index with min dim - method: test search after different index and corresponding search params with dim = 1 - expected: search successfully with limit(topK) - """ - # 1. initialize with data - auto_id = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - auto_id=auto_id, - dim=min_dim, is_index=False)[0:5] - # 2. create index and load - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 3. search - search_params = cf.gen_search_param(index) - vectors = [[random.random() for _ in range(min_dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index", ct.all_index_types[9:11]) - def test_search_after_different_index_with_min_dim_gpu(self, index, _async): - """ - target: test search after different index with min dim - method: test search after different index and corresponding search params with dim = 1 - expected: search successfully with limit(topK) - """ - # 1. initialize with data - auto_id = False - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - auto_id=auto_id, - vector_data_type=DataType.SPARSE_FLOAT_VECTOR, - dim=min_dim, is_index=False)[0:5] - # 2. create index and load - params = cf.get_index_params_params(index) - if params.get("m"): - params["m"] = min_dim - if params.get("PQM"): - params["PQM"] = min_dim - default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index("sparse_vector", default_index) - collection_w.load() - # 3. search - search_params = cf.gen_search_param(index) - vectors = [[random.random() for _ in range(min_dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_after_index_different_metric_type(self, index, _async, metric_type): - """ - target: test search with different metric type - method: test search with different metric type - expected: searched successfully - """ - # 1. initialize with data - dim = 64 - auto_id = True - enable_dynamic_field = True - collection_w, _vectors, _, insert_ids, time_stamp =\ - self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. get vectors that inserted into collection - original_vectors = [] - if enable_dynamic_field: - for vector in _vectors[0]: - vector = vector[ct.default_float_vec_field_name] - original_vectors.append(vector) - else: - for _vector in _vectors: - vectors_tmp = np.array(_vector).tolist() - vectors_single = [vectors_tmp[i][-1] for i in range(2500)] - original_vectors.append(vectors_single) - log.info(len(original_vectors)) - # 3. create different index - params = cf.get_index_params_params(index) - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 - log.info("test_search_after_index_different_metric_type: Creating index-%s" % index) - default_index = {"index_type": index, "params": params, "metric_type": metric_type} - collection_w.create_index("float_vector", default_index) - log.info("test_search_after_index_different_metric_type: Created index-%s" % index) - collection_w.load() - # 4. search - search_params = cf.gen_search_param(index, metric_type) - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - limit = default_limit - if index == "HNSW": - limit = search_param["params"]["ef"] - if limit > max_limit: - limit = default_nb - if index == "DISKANN": - limit = search_param["params"]["search_list"] - collection_w.search(vectors[:default_nq], default_search_field, - search_param, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": limit, - "_async": _async, - "metric": metric_type, - "vector_nq": vectors[:default_nq], - "original_vectors": original_vectors}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="issue 24957") - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_after_release_recreate_index(self, index, _async, metric_type): - """ - target: test search after new metric with different metric type - method: test search after new metric with different metric type - expected: searched successfully - """ - # 1. initialize with data - dim = 64 - auto_id = True - enable_dynamic_field = False - collection_w, _vectors, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. get vectors that inserted into collection - original_vectors = [] - if enable_dynamic_field: - for vector in _vectors[0]: - vector = vector[ct.default_float_vec_field_name] - original_vectors.append(vector) - else: - for _vector in _vectors: - vectors_tmp = np.array(_vector).tolist() - vectors_single = [vectors_tmp[i][-1] for i in range(2500)] - original_vectors.append(vectors_single) - # 3. create different index - params = cf.get_index_params_params(index) - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 - log.info("test_search_after_release_recreate_index: Creating index-%s" % index) - default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) - log.info("test_search_after_release_recreate_index: Created index-%s" % index) - collection_w.load() - # 4. search - search_params = cf.gen_search_param(index, "COSINE") - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async) - # 5. re-create index - collection_w.release() - collection_w.drop_index() - default_index = {"index_type": index, "params": params, "metric_type": metric_type} - collection_w.create_index("float_vector", default_index) - collection_w.load() - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async, - "metric": metric_type, - "vector_nq": vectors[:default_nq], - "original_vectors": original_vectors}) - - @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index", ct.all_index_types[9:11]) - def test_search_after_index_different_metric_type_gpu(self, index, _async): - """ - target: test search with different metric type - method: test search with different metric type - expected: searched successfully - """ - # 1. initialize with data - dim = 64 - auto_id = True - enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - vector_data_type=DataType.SPARSE_FLOAT_VECTOR, - dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. create different index - params = cf.get_index_params_params(index) - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 - log.info("test_search_after_index_different_metric_type: Creating index-%s" % index) - default_index = {"index_type": index, "params": params, "metric_type": "IP"} - collection_w.create_index("sparse_vector", default_index) - log.info("test_search_after_index_different_metric_type: Created index-%s" % index) - collection_w.load() - # 3. search - search_params = cf.gen_search_param(index, "IP") - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - for search_param in search_params: - log.info("Searching with search params: {}".format(search_param)) - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_collection_multiple_times(self, nq, _async): - """ - target: test search for multiple times - method: search for multiple times - expected: searched successfully - """ - # 1. initialize with data - nb = 1000 - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:4] - # 2. search for multiple times - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - for i in range(search_num): - log.info( - "test_search_collection_multiple_times: searching round %d" % (i + 1)) - collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_sync_async_multiple_times(self, nq): - """ - target: test async search after sync search case - method: create connection, collection, insert, - sync search and async search - expected: search successfully with limit(topK) - """ - # 1. initialize with data - nb = 1000 - dim = 64 - auto_id = True - enable_dynamic_field = False - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, nb, auto_id=auto_id, dim=dim, - enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. search - log.info("test_search_sync_async_multiple_times: searching collection %s" % - collection_w.name) - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - for i in range(search_num): - log.info( - "test_search_sync_async_multiple_times: searching round %d" % (i + 1)) - for _async in [False, True]: - collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="issue #12680") - # TODO: add one more for binary vectors - # @pytest.mark.parametrize("vec_fields", [[cf.gen_float_vec_field(name="test_vector1")], - # [cf.gen_binary_vec_field(name="test_vector1")], - # [cf.gen_binary_vec_field(), cf.gen_binary_vec_field("test_vector1")]]) - def test_search_multiple_vectors_with_one_indexed(self): - """ - target: test indexing on one vector fields when there are multi float vec fields - method: 1. create collection with multiple float vector fields - 2. insert data and build index on one of float vector fields - 3. load collection and search - expected: load and search successfully - """ - vec_fields = [cf.gen_float_vec_field(name="test_vector1")] - schema = cf.gen_schema_multi_vector_fields(vec_fields) - collection_w = self.init_collection_wrap( - name=cf.gen_unique_str(prefix), schema=schema) - df = cf.gen_dataframe_multi_vec_fields(vec_fields=vec_fields) - collection_w.insert(df) - assert collection_w.num_entities == ct.default_nb - _index = {"index_type": "IVF_FLAT", "params": { - "nlist": 128}, "metric_type": "L2"} - res, ch = collection_w.create_index( - field_name="test_vector1", index_params=_index) - assert ch is True - collection_w.load() - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(2)] - search_params = {"metric_type": "L2", "params": {"nprobe": 16}} - res_1, _ = collection_w.search(data=vectors, anns_field="test_vector1", - param=search_params, limit=1) - - @pytest.mark.tags(CaseLabel.L1) - def test_search_index_one_partition(self, _async): - """ - target: test search from partition - method: search from one partition - expected: searched successfully - """ - # 1. initialize with data - nb = 1200 - auto_id = False - enable_dynamic_field = True - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, nb, partition_num=1, auto_id=auto_id, - is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - - # 2. create index - default_index = {"index_type": "IVF_FLAT", - "params": {"nlist": 128}, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 3. search in one partition - log.info( - "test_search_index_one_partition: searching (1000 entities) through one partition") - limit = 1000 - par = collection_w.partitions - if limit > par[1].num_entities: - limit_check = par[1].num_entities - else: - limit_check = limit - search_params = {"metric_type": "L2", "params": {"nprobe": 128}} - collection_w.search(vectors[:default_nq], default_search_field, - search_params, limit, default_search_exp, - [par[1].name], _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids[par[0].num_entities:], - "limit": limit_check, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_index_partitions(self, nq, _async): - """ - target: test search from partitions - method: search from partitions - expected: searched successfully - """ - # 1. initialize with data - dim = 64 - nb = 1000 - auto_id = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, - partition_num=1, auto_id=auto_id, - dim=dim, is_index=False)[0:4] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create index - default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 3. search through partitions - log.info("test_search_index_partitions: searching (1000 entities) through partitions") - par = collection_w.partitions - log.info("test_search_index_partitions: partitions: %s" % par) - search_params = {"metric_type": "L2", "params": {"nprobe": 64}} - collection_w.search(vectors[:nq], default_search_field, - search_params, ct.default_limit, default_search_exp, - [par[0].name, par[1].name], _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": ct.default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("partition_names", [["(.*)"], ["search(.*)"]]) - def test_search_index_partitions_fuzzy(self, partition_names): - """ - target: test search from partitions - method: search from partitions with fuzzy - partition name - expected: searched successfully - """ - # 1. initialize with data - nb = 2000 - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, partition_num=1, - auto_id=auto_id, dim=dim, is_index=False, - enable_dynamic_field=enable_dynamic_field)[0:4] - nq = 2 - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create index - nlist = 128 - default_index = {"index_type": "IVF_FLAT", "params": {"nlist": nlist}, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 3. search through partitions - limit = 100 - search_params = {"metric_type": "COSINE", "params": {"nprobe": nlist}} - collection_w.search(vectors[:nq], default_search_field, - search_params, limit=limit, expr=default_search_exp, - partition_names=partition_names, - check_task=CheckTasks.err_res, - check_items={ct.err_code: 65535, - ct.err_msg: f"partition name {partition_names[0]} not found"}) - - @pytest.mark.tags(CaseLabel.L2) - def test_search_index_partition_empty(self, nq, _async): - """ - target: test search the empty partition - method: search from the empty partition - expected: searched successfully with 0 results - """ - # 1. initialize with data - dim = 64 - auto_id = True - collection_w = self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create empty partition - partition_name = "search_partition_empty" - collection_w.create_partition( - partition_name=partition_name, description="search partition empty") - par = collection_w.partitions - log.info("test_search_index_partition_empty: partitions: %s" % par) - # 3. create index - default_index = {"index_type": "IVF_FLAT", "params": { - "nlist": 128}, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 4. search the empty partition - log.info("test_search_index_partition_empty: searching %s " - "entities through empty partition" % default_limit) - collection_w.search(vectors[:nq], default_search_field, - default_search_params, default_limit, - default_search_exp, [partition_name], - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": [], - "limit": 0, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"]) - def test_search_binary_jaccard_flat_index(self, nq, _async, index, is_flush): - """ - target: search binary_collection, and check the result: distance - method: compare the return distance value with value computed with JACCARD - expected: the return distance equals to the computed value - """ - # 1. initialize with binary data - dim = 64 - auto_id = False - collection_w, _, binary_raw_vector, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, - dim=dim, is_index=False, is_flush=is_flush)[0:5] - # 2. create index on sclalar and vector field - default_index = {"index_type": "INVERTED", "params": {}} - collection_w.create_index(ct.default_float_field_name, default_index) - default_index = {"index_type": index, "params": { - "nlist": 128}, "metric_type": "JACCARD"} - collection_w.create_index("binary_vector", default_index) - collection_w.load() - # 3. compute the distance - query_raw_vector, binary_vectors = cf.gen_binary_vectors(3000, dim) - distance_0 = cf.jaccard(query_raw_vector[0], binary_raw_vector[0]) - distance_1 = cf.jaccard(query_raw_vector[0], binary_raw_vector[1]) - # 4. search and compare the distance - search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} - res = collection_w.search(binary_vectors[:nq], "binary_vector", - search_params, default_limit, "int64 >= 0", - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": 2, - "_async": _async})[0] - if _async: - res.done() - res = res.result() - assert abs(res[0].distances[0] - - min(distance_0, distance_1)) <= epsilon - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"]) - def test_search_binary_hamming_flat_index(self, nq, _async, index, is_flush): - """ - target: search binary_collection, and check the result: distance - method: compare the return distance value with value computed with HAMMING - expected: the return distance equals to the computed value - """ - # 1. initialize with binary data - dim = 64 - auto_id = False - collection_w, _, binary_raw_vector, insert_ids = \ - self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, - dim=dim, is_index=False, is_flush=is_flush)[0:4] - # 2. create index - default_index = {"index_type": index, "params": { - "nlist": 128}, "metric_type": "HAMMING"} - collection_w.create_index("binary_vector", default_index) - # 3. compute the distance - collection_w.load() - query_raw_vector, binary_vectors = cf.gen_binary_vectors(3000, dim) - distance_0 = cf.hamming(query_raw_vector[0], binary_raw_vector[0]) - distance_1 = cf.hamming(query_raw_vector[0], binary_raw_vector[1]) - # 4. search and compare the distance - search_params = {"metric_type": "HAMMING", "params": {"nprobe": 10}} - res = collection_w.search(binary_vectors[:nq], "binary_vector", - search_params, default_limit, "int64 >= 0", - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": 2, - "_async": _async})[0] - if _async: - res.done() - res = res.result() - assert abs(res[0].distances[0] - - min(distance_0, distance_1)) <= epsilon - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("tanimoto obsolete") - @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"]) - def test_search_binary_tanimoto_flat_index(self, nq, _async, index, is_flush): - """ - target: search binary_collection, and check the result: distance - method: compare the return distance value with value computed with TANIMOTO - expected: the return distance equals to the computed value - """ - # 1. initialize with binary data - dim = 64 - auto_id = False - collection_w, _, binary_raw_vector, insert_ids = \ - self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, - dim=dim, is_index=False, is_flush=is_flush)[0:4] - log.info("auto_id= %s, _async= %s" % (auto_id, _async)) - # 2. create index - default_index = {"index_type": index, "params": { - "nlist": 128}, "metric_type": "TANIMOTO"} - collection_w.create_index("binary_vector", default_index) - collection_w.load() - # 3. compute the distance - query_raw_vector, binary_vectors = cf.gen_binary_vectors(3000, dim) - distance_0 = cf.tanimoto(query_raw_vector[0], binary_raw_vector[0]) - distance_1 = cf.tanimoto(query_raw_vector[0], binary_raw_vector[1]) - # 4. search and compare the distance - search_params = {"metric_type": "TANIMOTO", "params": {"nprobe": 10}} - res = collection_w.search(binary_vectors[:nq], "binary_vector", - search_params, default_limit, "int64 >= 0", - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": 2, - "_async": _async})[0] - if _async: - res.done() - res = res.result() - assert abs(res[0].distances[0] - - min(distance_0, distance_1)) <= epsilon - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ["BIN_FLAT"]) - def test_search_binary_substructure_flat_index(self, _async, index, is_flush): - """ - target: search binary_collection, and check the result: distance - method: compare the return distance value with value computed with SUBSTRUCTURE. - (1) The returned limit(topK) are impacted by dimension (dim) of data - (2) Searched topK is smaller than set limit when dim is large - (3) It does not support "BIN_IVF_FLAT" index - (4) Only two values for distance: 0 and 1, 0 means hits, 1 means not - expected: the return distance equals to the computed value - """ - # 1. initialize with binary data - nq = 1 - dim = 8 - auto_id = True - collection_w, _, binary_raw_vector, insert_ids, time_stamp \ - = self.init_collection_general(prefix, True, default_nb, is_binary=True, auto_id=auto_id, - dim=dim, is_index=False, is_flush=is_flush)[0:5] - # 2. create index - default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "SUBSTRUCTURE"} - collection_w.create_index("binary_vector", default_index) - collection_w.load() - # 3. generate search vectors - _, binary_vectors = cf.gen_binary_vectors(nq, dim) - # 4. search and compare the distance - search_params = {"metric_type": "SUBSTRUCTURE", "params": {"nprobe": 10}} - res = collection_w.search(binary_vectors[:nq], "binary_vector", - search_params, default_limit, "int64 >= 0", - _async=_async)[0] - if _async: - res.done() - res = res.result() - assert res[0].distances[0] == 0.0 - assert len(res) <= default_limit - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ["BIN_FLAT"]) - def test_search_binary_superstructure_flat_index(self, _async, index, is_flush): - """ - target: search binary_collection, and check the result: distance - method: compare the return distance value with value computed with SUPERSTRUCTURE - (1) The returned limit(topK) are impacted by dimension (dim) of data - (2) Searched topK is smaller than set limit when dim is large - (3) It does not support "BIN_IVF_FLAT" index - (4) Only two values for distance: 0 and 1, 0 means hits, 1 means not - expected: the return distance equals to the computed value - """ - # 1. initialize with binary data - nq = 1 - dim = 8 - auto_id = True - collection_w, _, binary_raw_vector, insert_ids, time_stamp \ - = self.init_collection_general(prefix, True, default_nb, is_binary=True, auto_id=auto_id, - dim=dim, is_index=False, is_flush=is_flush)[0:5] - # 2. create index - default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "SUPERSTRUCTURE"} - collection_w.create_index("binary_vector", default_index) - collection_w.load() - # 3. generate search vectors - _, binary_vectors = cf.gen_binary_vectors(nq, dim) - # 4. search and compare the distance - search_params = {"metric_type": "SUPERSTRUCTURE", "params": {"nprobe": 10}} - res = collection_w.search(binary_vectors[:nq], "binary_vector", - search_params, default_limit, "int64 >= 0", - _async=_async)[0] - if _async: - res.done() - res = res.result() - assert len(res[0]) <= default_limit - assert res[0].distances[0] == 0.0 - - @pytest.mark.tags(CaseLabel.L2) - def test_search_binary_without_flush(self, metrics): - """ - target: test search without flush for binary data (no index) - method: create connection, collection, insert, load and search - expected: search successfully with limit(topK) - """ - # 1. initialize a collection without data - auto_id = True - collection_w = self.init_collection_general( - prefix, is_binary=True, auto_id=auto_id, is_index=False)[0] - # 2. insert data - insert_ids = cf.insert_data( - collection_w, default_nb, is_binary=True, auto_id=auto_id)[3] - # 3. load data - index_params = {"index_type": "BIN_FLAT", "params": { - "nlist": 128}, "metric_type": metrics} - collection_w.create_index("binary_vector", index_params) - collection_w.load() - # 4. search - log.info("test_search_binary_without_flush: searching collection %s" % - collection_w.name) - binary_vectors = cf.gen_binary_vectors(default_nq, default_dim)[1] - search_params = {"metric_type": metrics, "params": {"nprobe": 10}} - collection_w.search(binary_vectors[:default_nq], "binary_vector", - search_params, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit}) - @pytest.mark.tags(CaseLabel.L1) def test_search_with_expression(self, null_data_percent): """ @@ -2890,8 +910,8 @@ class TestCollectionSearch(TestcaseBase): filter_ids_set = set(filter_ids) for hits in search_res: ids = hits.ids - log.info(ids) - log.info(filter_ids_set) + # log.info(ids) + # log.info(filter_ids_set) assert set(ids).issubset(filter_ids_set) @pytest.mark.tags(CaseLabel.L2) @@ -2917,8 +937,6 @@ class TestCollectionSearch(TestcaseBase): auto_id=auto_id, dim=dim, multiple_dim_array=[dim, dim], nullable_fields=nullable_fields)[0:4] # 2. search - log.info("test_search_expression_all_data_type: Searching collection %s" % - collection_w.name) search_exp = "int64 >= 0 && int32 >= 0 && int16 >= 0 " \ "&& int8 >= 0 && float >= 0 && double >= 0" limit = default_limit @@ -2973,7 +991,6 @@ class TestCollectionSearch(TestcaseBase): collection_w.load() # 3. search using expression which field value is out of bound - log.info("test_search_expression_different_data_type: Searching collection %s" % collection_w.name) expression = f"{field} >= {offset}" collection_w.search(vectors, default_search_field, default_search_params, default_limit, expression, output_fields=[field], @@ -3062,7 +1079,7 @@ class TestCollectionSearch(TestcaseBase): _id = random.randint(0, default_nb) string_value[_id] = string_value[_id].replace("\"", "\\\"") expression = f"{default_string_field_name} == \"{string_value[_id]}\"" - log.info("test_search_with_expression: searching with expression: %s" % expression) + log.debug("test_search_with_expression: searching with expression: %s" % expression) search_res, _ = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, expression, check_task=CheckTasks.check_search_results, @@ -3086,7 +1103,6 @@ class TestCollectionSearch(TestcaseBase): auto_id=auto_id, dim=dim)[0:4] # 2. search - log.info("test_search_with_output_fields_empty: Searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] collection_w.search(vectors[:nq], default_search_field, default_search_params, default_limit, @@ -3113,8 +1129,6 @@ class TestCollectionSearch(TestcaseBase): auto_id=auto_id, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search - log.info("test_search_with_output_field: Searching collection %s" % collection_w.name) - collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, _async=_async, @@ -3139,7 +1153,6 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids = \ self.init_collection_general(prefix, True, auto_id=auto_id, enable_dynamic_field=enable_dynamic_field)[0:4] # 2. search - log.info("test_search_with_output_field: Searching collection %s" % collection_w.name) collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, default_search_exp, _async=_async, @@ -3165,7 +1178,6 @@ class TestCollectionSearch(TestcaseBase): auto_id=auto_id, dim=dim)[0:4] # 2. search - log.info("test_search_with_output_fields: Searching collection %s" % collection_w.name) vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] output_fields = [default_int64_field_name, default_float_field_name] collection_w.search(vectors[:nq], default_search_field, @@ -3237,7 +1249,6 @@ class TestCollectionSearch(TestcaseBase): # 3. search with output field vector search_params = cf.gen_search_param(index, metrics) for search_param in search_params: - log.info(search_param) if index == "HNSW": limit = search_param["params"]["ef"] if limit > max_limit: @@ -3250,7 +1261,7 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "limit": limit, - "original_entities": _vectors, + "original_entities": _vectors[0], "output_fields": [field_name]}) @pytest.mark.tags(CaseLabel.L1) @@ -3342,7 +1353,7 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit, - "original_entities": _vectors, + "original_entities": _vectors[0], "output_fields": [field_name]}) @pytest.mark.tags(CaseLabel.L2) @@ -3379,7 +1390,8 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "limit": default_limit, - "original_entities": original_entities, + "pk_name": default_int64_field_name, + "original_entities": original_entities[0], "output_fields": output_fields}) if enable_dynamic_field: collection_w.search(vectors[:1], default_search_field, @@ -3388,7 +1400,8 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "limit": default_limit, - "original_entities": original_entities, + "pk_name": default_int64_field_name, + "original_entities": original_entities[0], "output_fields": output_fields}) @pytest.mark.tags(CaseLabel.L2) @@ -3441,7 +1454,7 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": 1, "limit": default_limit, - "original_entities": [data], + "original_entities": data, "output_fields": [field_name]}) @pytest.mark.tags(CaseLabel.L2) @@ -3458,7 +1471,6 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id)[0:4] # 2. search - log.info("test_search_with_output_field_wildcard: Searching collection %s" % collection_w.name) output_fields = cf.get_wildcard_output_field_names(collection_w, wildcard_output_fields) collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, @@ -3467,6 +1479,7 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "_async": _async, "output_fields": output_fields}) @@ -3521,6 +1534,7 @@ class TestCollectionSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": insert_ids, + "pk_name": ct.default_int64_field_name, "limit": default_limit, "_async": _async}) @@ -4315,7 +2329,7 @@ class TestCollectionSearch(TestcaseBase): self.init_collection_general(prefix, True, is_flush=is_flush)[0:5] collection_name = collection_w.name # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim) + vectors = cf.gen_vectors(default_nq, default_dim) # 3. search with expr "nullableFid == 0" search_exp = f"{ct.default_float_field_name} == 0" output_fields = [default_int64_field_name, default_float_field_name] diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py new file mode 100644 index 0000000000..303db9b879 --- /dev/null +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py @@ -0,0 +1,1222 @@ +import logging +import numpy as np +from common.constants import * +from utils.util_pymilvus import * +from common.common_type import CaseLabel, CheckTasks +from common import common_type as ct +from common import common_func as cf +from utils.util_log import test_log as log +from base.client_v2_base import TestMilvusClientV2Base +from concurrent.futures import ThreadPoolExecutor, as_completed +import random +import pytest +import pandas as pd +from faker import Faker + +Faker.seed(19530) +fake_en = Faker("en_US") +fake_zh = Faker("zh_CN") + +# patch faker to generate text with specific distribution +cf.patch_faker_text(fake_en, cf.en_vocabularies_distribution) +cf.patch_faker_text(fake_zh, cf.zh_vocabularies_distribution) + +pd.set_option("expand_frame_repr", False) + +prefix = "search_collection" +default_nb = ct.default_nb +default_nq = ct.default_nq +default_dim = ct.default_dim +default_limit = ct.default_limit +default_search_exp = "int64 >= 0" +default_search_string_exp = "varchar >= \"0\"" +default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" +default_json_search_exp = "json_field[\"number\"] >= 0" +perfix_expr = 'varchar like "0%"' + +default_vector_field_name = "vector" + + +@pytest.mark.xdist_group("TestMilvusClientSearchBasicV2") +@pytest.mark.tags(CaseLabel.GPU) +class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base): + """Test search functionality with new client API""" + + def setup_class(self): + super().setup_class(self) + self.collection_name = "TestMilvusClientSearchV2" + cf.gen_unique_str("_") + self.partition_names = ["partition_1", "partition_2"] + self.pk_field_name = ct.default_primary_field_name + self.float_vector_field_name = "float_vector" + self.bfloat16_vector_field_name = "bfloat16_vector" + self.sparse_vector_field_name = "sparse_vector" + self.binary_vector_field_name = "binary_vector" + self.float_vector_dim = 128 + self.bf16_vector_dim = 200 + self.binary_vector_dim = 256 + self.float_vector_metric = "COSINE" + self.bf16_vector_metric = "L2" + self.sparse_vector_metric = "IP" + self.binary_vector_metric = "JACCARD" + self.float_vector_index = "IVF_FLAT" + self.bf16_vector_index = "DISKANN" + self.sparse_vector_index = "SPARSE_INVERTED_INDEX" + self.binary_vector_index = "BIN_IVF_FLAT" + self.primary_keys = [] + self.enable_dynamic_field = True + self.dyna_filed_name1 = "dyna_filed_name1" + self.dyna_filed_name2 = "dyna_filed_name2" + self.datas = [] + + @pytest.fixture(scope="class", autouse=True) + def prepare_collection(self, request): + """ + Initialize collection before test class runs + """ + # Get client connection + client = self._client() + + # Create collection + collection_schema = self.create_schema(client, enable_dynamic_field=self.enable_dynamic_field)[0] + collection_schema.add_field(self.pk_field_name, DataType.INT64, is_primary=True, auto_id=False) + collection_schema.add_field(self.float_vector_field_name, DataType.FLOAT_VECTOR, dim=self.float_vector_dim) + collection_schema.add_field(self.bfloat16_vector_field_name, DataType.BFLOAT16_VECTOR, dim=self.bf16_vector_dim) + collection_schema.add_field(self.sparse_vector_field_name, DataType.SPARSE_FLOAT_VECTOR) + collection_schema.add_field(self.binary_vector_field_name, DataType.BINARY_VECTOR, dim=self.binary_vector_dim) + collection_schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True) + collection_schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=256, nullable=True) + self.create_collection(client, self.collection_name, schema=collection_schema, force_teardown=False) + for partition_name in self.partition_names: + self.create_partition(client, self.collection_name, partition_name=partition_name) + + # Define number of insert iterations + insert_times = 10 + + # Generate vectors for each type and store in self + float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim, + vector_data_type=DataType.FLOAT_VECTOR) + bfloat16_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.bf16_vector_dim, + vector_data_type=DataType.BFLOAT16_VECTOR) + sparse_vectors = cf.gen_sparse_vectors(default_nb * insert_times, empty_percentage=2) + _, binary_vectors = cf.gen_binary_vectors(default_nb * insert_times, dim=self.binary_vector_dim) + + # Insert data multiple times with non-duplicated primary keys + for j in range(insert_times): + # Group rows by partition based on primary key mod 3 + default_rows = [] + partition1_rows = [] + partition2_rows = [] + + for i in range(default_nb): + pk = i + j * default_nb + row = { + self.pk_field_name: pk, + self.float_vector_field_name: list(float_vectors[pk]), + self.bfloat16_vector_field_name: bfloat16_vectors[pk], + self.sparse_vector_field_name: sparse_vectors[pk], + self.binary_vector_field_name: binary_vectors[pk], + ct.default_float_field_name: pk * 1.0 if pk % 5 == 0 else None, + ct.default_string_field_name: str(pk) if pk % 5 == 0 else None, + self.dyna_filed_name1: f"dyna_value_{pk}", + self.dyna_filed_name2: pk * 1.0 + } + self.datas.append(row) + + # Distribute to partitions based on pk mod 3 + if pk % 3 == 0: + default_rows.append(row) + elif pk % 3 == 1: + partition1_rows.append(row) + else: + partition2_rows.append(row) + + # Insert into respective partitions + if default_rows: + self.insert(client, self.collection_name, data=default_rows) + if partition1_rows: + self.insert(client, self.collection_name, data=partition1_rows, partition_name=self.partition_names[0]) + if partition2_rows: + self.insert(client, self.collection_name, data=partition2_rows, partition_name=self.partition_names[1]) + + # Track all inserted data and primary keys + self.primary_keys.extend([i + j * default_nb for i in range(default_nb)]) + self.flush(client, self.collection_name) + + # Create index + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=self.float_vector_field_name, + metric_type=self.float_vector_metric, + index_type=self.float_vector_index, + params={"nlist": 128}) + index_params.add_index(field_name=self.bfloat16_vector_field_name, + metric_type=self.bf16_vector_metric, + index_type=self.bf16_vector_index, + params={}) + index_params.add_index(field_name=self.sparse_vector_field_name, + metric_type=self.sparse_vector_metric, + index_type=self.sparse_vector_index, + params={}) + index_params.add_index(field_name=self.binary_vector_field_name, + metric_type=self.binary_vector_metric, + index_type=self.binary_vector_index, + params={"nlist": 128}) + self.create_index(client, self.collection_name, index_params=index_params) + + # Load collection + self.load_collection(client, self.collection_name) + + def teardown(): + self.drop_collection(self._client(), self.collection_name) + + request.addfinalizer(teardown) + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.parametrize("search_params", [{"metric_type": "COSINE", "params": {"nprobe": 100}}, + {"metric_type": "COSINE", "nprobe": 100}, + {"metric_type": "COSINE"}, + {"params": {"nprobe": 100}}, + {"nprobe": 100}, + {}]) + def test_search_float_vectors(self, search_params): + """ + target: test search float vectors + method: 1. connect and create a collection + 2. search float vectors + 3. verify search results + expected: search successfully and results are correct + """ + client = self._client() + collection_name = self.collection_name + + # Search with inserted vectors + vectors_to_search = [self.datas[i][self.float_vector_field_name] for i in range(default_nq)] + + search_res, _ = self.search( + client, + collection_name, + vectors_to_search, + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "pk_name": self.pk_field_name, + "metric": self.float_vector_metric + } + ) + # verfiy the top 1 hit is itself, so the min distance is 0 + for i in range(default_nq): + assert 1.0 - search_res[i].distances[0] <= epsilon + + @pytest.mark.tags(CaseLabel.L0) + def test_search_bfloat16_vectors(self): + """ + target: test search bfloat16 vectors + method: 1. connect and create a collection + 2. search bfloat16 vectors + 3. verify search results + expected: search successfully and results are correct + """ + client = self._client() + collection_name = self.collection_name + + # Search with bfloat16 vectors + vectors_to_search = cf.gen_vectors(default_nq, self.bf16_vector_dim, + vector_data_type=DataType.BFLOAT16_VECTOR) + search_params = {"metric_type": self.bf16_vector_metric, "params": {}} + + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.bfloat16_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "pk_name": self.pk_field_name, + "metric": self.bf16_vector_metric + } + ) + + @pytest.mark.tags(CaseLabel.L0) + def test_search_sparse_vectors(self): + """ + target: test search sparse vectors + method: 1. connect and create a collection + 2. search sparse vectors + 3. verify search results + expected: search successfully and results are correct + """ + client = self._client() + collection_name = self.collection_name + + # Search with sparse vectors + vectors_to_search = cf.gen_sparse_vectors(default_nq, empty_percentage=2) + search_params = {"metric_type": self.sparse_vector_metric, "params": {}} + + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.sparse_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "pk_name": self.pk_field_name, + "metric": self.sparse_vector_metric + } + ) + + # search again without specify anns_field + error = {"err_code": 999, "err_msg": "multiple anns_fields exist, please specify a anns_field in search_params"} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.err_res, + check_items=error + ) + + @pytest.mark.tags(CaseLabel.L0) + def test_search_with_empty_vectors(self): + """ + target: test search with empty vectors + method: 1. connect and create a collection + 2. search with empty vectors + expected: search successfully with 0 results + """ + client = self._client() + collection_name = self.collection_name + + # search with empty vectors + error = {"err_code": 999, "err_msg": "Unexpected error, message="} + search_res, _ = self.search( + client, + collection_name, + data=[], + anns_field=self.sparse_vector_field_name, + search_params={}, + limit=default_limit, + check_task=CheckTasks.err_res, + check_items=error + ) + + @pytest.mark.tags(CaseLabel.L0) + def test_search_binary_vectors(self): + """ + target: test search binary vectors + method: 1. connect and create a collection + 2. search binary vectors + 3. verify search results + expected: search successfully and results are correct + """ + client = self._client() + collection_name = self.collection_name + + # Search with binary vectors + _, vectors_to_search = cf.gen_binary_vectors(default_nq, dim=self.binary_vector_dim) + search_params = {"metric_type": self.binary_vector_metric, "params": {"nprobe": 100}} + + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.binary_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "pk_name": self.pk_field_name, + "metric": self.binary_vector_metric + } + ) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("limit, nq", zip([1, 1000, ct.max_limit], [ct.max_nq, 10, 1])) + def test_search_with_different_nq_limits(self, limit, nq): + """ + target: test search with different nq and limit values + method: 1. connect and create a collection + 2. search with different nq and limit values + 3. verify search results + expected: search successfully with different nq and limit values + """ + client = self._client() + collection_name = self.collection_name + + # Generate vectors to search + vectors_to_search = cf.gen_vectors(nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + # search with limit + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": nq, + "limit": limit, + "pk_name": self.pk_field_name, + "metric": self.float_vector_metric + } + ) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_output_fields(self): + """ + target: test search with output fields + method: 1. connect and create a collection + 2. search with output fields + expected: search successfully with output fields + """ + client = self._client() + collection_name = self.collection_name + + # Generate vectors to search + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + # search with output fields + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + output_fields=[ct.default_string_field_name, self.dyna_filed_name1, self.dyna_filed_name2], + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "metric": self.float_vector_metric, + "output_fields": [ct.default_string_field_name, self.dyna_filed_name1, self.dyna_filed_name2], + "original_entities": self.datas, + "pk_name": self.pk_field_name + } + ) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_output_fields_all(self): + """ + target: test search with output all fields + method: 1. connect and create a collection + 2. search with output all fields + expected: search successfully with output all fields + """ + client = self._client() + collection_name = self.collection_name + collection_info = self.describe_collection(client, collection_name)[0] + fields = collection_info.get('fields', None) + field_names = [field.get('name') for field in fields] + + # Generate vectors to search + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + # search with output fields + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "metric": self.float_vector_metric, + "output_fields": field_names.extend([self.dyna_filed_name1, self.dyna_filed_name2]), + "original_entities": self.datas, + "pk_name": self.pk_field_name + } + ) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_more_than_max_limit(self): + """ + target: test search with more than max limit + method: 1. connect and create a collection + 2. search with more than max limit + expected: search successfully with more than max limit + """ + client = self._client() + collection_name = self.collection_name + + # Generate vectors to search + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + error = {"err_code": 999, "err_msg": f"topk [{ct.max_limit + 1}] is invalid, it should be in range " \ + f"[1, {ct.max_limit}], but got {ct.max_limit + 1}"} + # search with more than max limit + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=ct.max_limit + 1, + check_task=CheckTasks.err_res, + check_items=error + ) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_more_than_max_nq(self): + """ + target: test search with more than max nq + method: 1. connect and create a collection + 2. search with more than max nq + expected: search successfully with more than max nq + """ + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(ct.max_nq + 1, dim=128, vector_data_type=DataType.SPARSE_FLOAT_VECTOR) + search_params = {"metric_type": self.sparse_vector_metric} + + error = {"err_code": 999, + "err_msg": f"nq [{ct.max_nq + 1}] is invalid, nq (number of search vector per search request) should be in range " \ + f"[1, {ct.max_nq}], but got {ct.max_nq + 1}"} + # search with more than max nq + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:ct.max_nq + 1], + anns_field=self.sparse_vector_field_name, + search_params=search_params, + limit=ct.max_nq + 1, + check_task=CheckTasks.err_res, + check_items=error + ) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_concurrent_threads(self): + """ + target: test search with concurrent threads + method: 1. connect and create a collection + 2. search with concurrent threads + expected: search successfully with concurrent threads + """ + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + # search with concurrent threads using thread pool + num_threads = 10 + with ThreadPoolExecutor(max_workers=num_threads) as executor: + futures = [] + for i in range(num_threads): + future = executor.submit( + self.search, + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "metric": self.float_vector_metric, + "pk_name": self.pk_field_name + } + ) + futures.append(future) + + # Wait for all searches to complete + search_results = [] + for future in as_completed(futures): + search_res, _ = future.result() + search_results.append(search_res) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_ndarray(self): + """ + target: test search with ndarray + method: 1. connect and create a collection + 2. search with ndarray + expected: search successfully with ndarray + """ + client = self._client() + collection_name = self.collection_name + vectors_to_search = np.random.randn(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + # search with ndarray + search_res, _ = self.search( + client, + collection_name, + vectors_to_search, + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "metric": self.float_vector_metric, + "pk_name": self.pk_field_name} + ) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_client_closed(self): + """ + target: test search with client closed + method: 1. connect and create a collection + 2. search with client closed + expected: search successfully with client closed + """ + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + # close client + client.close() + + # search with client closed + error = {"err_code": 999, "err_msg": "should create connection first"} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.err_res, + check_items=error + ) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_dismatched_metric_type(self): + """ + target: test search with dismatched metric type + method: 1. connect and create a collection + 2. search with dismatched metric type + expected: search successfully with dismatched metric type + """ + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.sparse_vector_metric, "params": {"nprobe": 100}} + + # search with dismatched metric type + error = {"err_code": 999, "err_msg": "metric type not match: invalid parameter[expected=COSINE][actual=IP]"} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.err_res, + check_items=error + ) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("partition_name", ["*", "non_exist_partition", "par*"]) + def test_search_with_invalid_partition_name(self, partition_name): + """ + target: test search with invalid partition name + method: 1. connect and create a collection + 2. search with invalid partition name + expected: search successfully with invalid partition name + """ + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}} + + # search with invalid partition name + error = {"err_code": 999, "err_msg": f"partition name {partition_name} not found"} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + partition_names=[partition_name], + check_task=CheckTasks.err_res, + check_items=error + ) + + +class TestSearchV2Independent(TestMilvusClientV2Base): + """Test search functionality with independent collections""" + + @pytest.mark.tags(CaseLabel.L2) + def test_search_dense_vectors_indices_metrics_growing(self): + """ + target: test search with different dense vector types, indices and metrics + method: create connection, collection, insert data and search + expected: searched successfully + """ + # basic search on dense vectors, + # indices and metrics are covered in test_search_pagination_dense_vectors_indices_metrics_growing + pass + + @pytest.mark.tags(CaseLabel.L2) + def test_search_on_empty_partition(self): + """ + target: test search on empty partition + method: create connection, collection, insert data and search + expected: searched successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + self.create_collection(client, collection_name, dimension=ct.default_dim) + + # create partition + partition_name = "empty_partition" + self.create_partition(client, collection_name, partition_name=partition_name) + + # search + vectors_to_search = cf.gen_vectors(default_nq, ct.default_dim) + search_params = {} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + search_params=search_params, + limit=default_limit, + partition_names=[partition_name], + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": 0, + "pk_name": 'id', + "ids": []}) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_cosine_results_same_as_l2_and_ip(self): + """ + target: test search cosine results same as l2 and ip + method: create connection, collection, insert data and search + expected: searched successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # create collection with customized schema + schema = self.create_schema(client)[0] + schema.add_field(ct.default_primary_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(ct.default_float_vec_field_name, DataType.FLOAT_VECTOR, dim=ct.default_dim) + schema.add_field(ct.default_float_field_name, DataType.FLOAT) + schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=256) + self.create_collection(client, collection_name, schema=schema) + + # insert data + data = [] + for i in range(default_nb): + data.append({ + ct.default_primary_field_name: i, + ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0], + ct.default_float_field_name: i * 1.0, + ct.default_string_field_name: str(i) + }) + self.insert(client, collection_name, data) + + # create index with metric cosine + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=ct.default_float_vec_field_name, + metric_type='COSINE', + index_type='AUTOINDEX') + self.create_index(client, collection_name, index_params=index_params) + + # load collection + self.load_collection(client, collection_name) + + # search on the default metric cosine + vectors_to_search = cf.gen_vectors(default_nq, ct.default_dim) + search_params = {} + search_res_cosine, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=ct.default_float_vec_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "metric": 'COSINE', + "pk_name": ct.default_primary_field_name} + ) + + # release and drop index, and rebuild index with metric l2 + self.release_collection(client, collection_name) + self.drop_index(client, collection_name, ct.default_float_vec_field_name) + + # rebuild index with metric l2 + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=ct.default_float_vec_field_name, + metric_type='L2', + index_type='AUTOINDEX') + self.create_index(client, collection_name, index_params=index_params) + # load collection + self.load_collection(client, collection_name) + + # search on the metric l2 + search_params = {} + search_res_l2, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=ct.default_float_vec_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "metric": 'L2', + "pk_name": ct.default_primary_field_name} + ) + + # release and drop index, and rebuild index with metric ip + self.release_collection(client, collection_name) + self.drop_index(client, collection_name, ct.default_float_vec_field_name) + + # rebuild index with metric ip + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=ct.default_float_vec_field_name, + metric_type='IP', + index_type='AUTOINDEX') + self.create_index(client, collection_name, index_params=index_params) + + # load collection + self.load_collection(client, collection_name) + + # search on the metric ip + search_params = {} + search_res_ip, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=ct.default_float_vec_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": default_limit, + "metric": 'IP', + "pk_name": ct.default_primary_field_name} + ) + + # check the 3 metrics cosine, l2 and ip search results ids are the same + for i in range(default_nq): + assert search_res_cosine[i].ids == search_res_l2[i].ids == search_res_ip[i].ids + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_duplicate_primary_key(self): + """ + target: test search with duplicate primary key + method: create connection, collection, insert data and search + expected: searched successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + dim = 60 + self.create_collection(client, collection_name, dimension=dim) + + # insert data with duplicate primary key + data = [] + for i in range(default_nb): + data.append({ + "id": i if i % 2 == 0 else i + 1, + "vector": cf.gen_vectors(1, dim)[0], + }) + self.insert(client, collection_name, data) + client.flush(collection_name) + + # search + vectors_to_search = cf.gen_vectors(default_nq, dim) + search_params = {} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field="vector", + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "pk_name": "id", + "limit": default_limit} + ) + + # verify the search results are de-duplicated + for i in range(default_nq): + assert len(search_res[i].ids) == len(set(search_res[i].ids)) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("flush", [True, False]) + def test_search_after_release_load(self, flush): + """ + target: test search after release and load + method: create connection, collection, insert data and search + expected: searched successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + self.create_collection(client, collection_name, dimension=ct.default_dim) + + # insert data + data = [] + for i in range(default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, ct.default_dim)[0], + }) + self.insert(client, collection_name, data) + if flush: + self.flush(client, collection_name) + self.wait_for_index_ready(client, collection_name, index_name='vector') + + # release collection + self.release_collection(client, collection_name) + # search after release + error = {"err_code": 999, "err_msg": "collection not loaded"} + vectors_to_search = cf.gen_vectors(default_nq, ct.default_dim) + search_params = {} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field="vector", + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.err_res, + check_items=error + ) + + # load collection + self.load_collection(client, collection_name) + + # search + search_params = {} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field="vector", + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "pk_name": "id", + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L0) + def test_search_after_partition_release(self): + """ + target: test search after partition release + method: 1. create connection, collection, insert data and search + 2. release a partition + 3. search again + 4. load the released partition and search again + 5. release the partition again and load the collection + 6. search again + expected: searched and results are correct + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + self.create_collection(client, collection_name, dimension=ct.default_dim) + + # create 2 more partitions + partition_names = ["partition_1", "partition_2"] + for partition_name in partition_names: + self.create_partition(client, collection_name, partition_name=partition_name) + + # insert data into all the 3 partitions + insert_times = 2 + + # Generate vectors for each type and store in self + float_vectors = cf.gen_vectors(ct.default_nb * insert_times, dim=ct.default_dim, + vector_data_type=DataType.FLOAT_VECTOR) + + # Insert data multiple times with non-duplicated primary keys + for j in range(insert_times): + # Group rows by partition based on primary key mod 3 + default_rows = [] + partition1_rows = [] + partition2_rows = [] + + for i in range(ct.default_nb): + pk = i + j * ct.default_nb + row = { + 'id': pk, + 'vector': float_vectors[pk] + } + + # Distribute to partitions based on pk mod 3 + if pk % 3 == 0: + default_rows.append(row) + elif pk % 3 == 1: + partition1_rows.append(row) + else: + partition2_rows.append(row) + + # Insert into respective partitions + if default_rows: + self.insert(client, collection_name, data=default_rows) + if partition1_rows: + self.insert(client, collection_name, data=partition1_rows, partition_name=partition_names[0]) + if partition2_rows: + self.insert(client, collection_name, data=partition2_rows, partition_name=partition_names[1]) + + self.flush(client, collection_name) + self.wait_for_index_ready(client, collection_name, index_name='vector') + + # search in the collection + vectors_to_search = cf.gen_vectors(1, ct.default_dim) + limit = 1000 + search_params = {} + search_res1, _ = self.search( + client, + collection_name, + vectors_to_search[:1], + anns_field="vector", + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": 1, + "pk_name": "id", + "limit": limit}) + + # find one result that not in default partition + to_be_released_partition = None + the_other_partition = None + pk_remainder = 0 + for i in range(limit): + top_id = search_res1[0].ids[i] + if top_id % 3 == 0: + pass + elif top_id % 3 == 1: + to_be_released_partition = partition_names[0] + the_other_partition = partition_names[1] + pk_remainder = 1 + break + else: + to_be_released_partition = partition_names[1] + the_other_partition = partition_names[0] + pk_remainder = 2 + break + + # release the partition + if to_be_released_partition is not None: + self.release_partitions(client, collection_name, [to_be_released_partition]) + else: + assert False, "expected to find at least one result that not in default partition" + + # search again + search_res2, _ = self.search( + client, + collection_name, + vectors_to_search[:1], + anns_field="vector", + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": 1, + "pk_name": "id", + "limit": limit}) + # verify no results are from the released partition + for i in range(limit): + not_exist = (search_res2[0].ids[i] not in search_res1[0].ids) or (search_res2[0].ids[i] % 3 != pk_remainder) + assert not_exist + + # search in the non-released partitions + search_res3, _ = self.search( + client, + collection_name, + vectors_to_search[:1], + anns_field="vector", + partition_names=[ct.default_partition_name, the_other_partition], + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": 1, + "pk_name": "id", + "limit": limit}) + # verify the results are same as the 2nd search results + assert search_res3[0].ids == search_res2[0].ids + + # load the released partition and search again + self.load_partitions(client, collection_name, [to_be_released_partition]) + search_res4, _ = self.search( + client, + collection_name, + vectors_to_search[:1], + anns_field="vector", + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": 1, + "pk_name": "id", + "limit": limit}) + # verify the results are same as the first search results + # assert search_res4[0].ids == search_res1[0].ids + + # release the partition again and load the collection + self.release_partitions(client, collection_name, [to_be_released_partition]) + self.load_collection(client, collection_name) + + # search again + search_res5, _ = self.search( + client, + collection_name, + vectors_to_search[:1], + anns_field="vector", + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": 1, + "pk_name": "id", + "limit": limit}) + # verify the results are same as the first search results + assert search_res5[0].ids == search_res4[0].ids + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("dim", [ct.max_dim, ct.min_dim]) + def test_search_max_and_min_dim(self, dim): + """ + target: test search with max and min dimension collection + method: create connection, collection, insert and search with max and min dimension + expected: search successfully with limit(topK) + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + self.create_collection(client, collection_name, dimension=dim) + + # insert data + data = [] + nb = 200 + for i in range(nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, dim)[0] + }) + self.insert(client, collection_name, data) + + # search + vectors_to_search = cf.gen_vectors(ct.default_nq, dim) + search_params = {} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:ct.default_nq], + anns_field="vector", + search_params=search_params, + limit=ct.default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": ct.default_nq, + "pk_name": "id", + "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_after_recreate_index(self): + """ + target: test search after recreate index + method: create connection, collection, insert and search after recreate index + expected: search successfully with limit(topK) + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + self.create_collection(client, collection_name, dimension=ct.default_dim) + + # insert data + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, ct.default_dim)[0] + }) + self.insert(client, collection_name, data) + + self.flush(client, collection_name) + self.wait_for_index_ready(client, collection_name, index_name='vector') + + # search + vectors_to_search = cf.gen_vectors(ct.default_nq, ct.default_dim) + search_params = {} + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:ct.default_nq], + anns_field="vector", + search_params=search_params, + limit=ct.default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": ct.default_nq, + "pk_name": "id", + "limit": ct.default_limit}) + + # recreate index + self.release_collection(client, collection_name) + self.drop_index(client, collection_name, index_name='vector') + + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name='vector', index_type='HNSW', + params={"M": 8, "efConstruction": 128}, metric_type='L2') + self.create_index(client, collection_name, index_params=index_params) + + self.wait_for_index_ready(client, collection_name, index_name='vector') + self.load_collection(client, collection_name) + + # search + search_res, _ = self.search( + client, + collection_name, + vectors_to_search[:ct.default_nq], + anns_field="vector", + search_params=search_params, + limit=ct.default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": ct.default_nq, + "pk_name": "id", + "limit": ct.default_limit}) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_sparse_search.py b/tests/python_client/milvus_client_v2/test_milvus_client_sparse_search.py index c0c78e36bd..7e4a25ff0e 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_sparse_search.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_sparse_search.py @@ -114,7 +114,6 @@ class TestSparseSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit, - "original_entities": [data], "output_fields": [ct.default_sparse_vec_field_name]}) expr = "int64 < 100 " collection_w.search(data[-1][0:default_nq], ct.default_sparse_vec_field_name, @@ -123,7 +122,6 @@ class TestSparseSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit, - "original_entities": [data], "output_fields": [ct.default_sparse_vec_field_name]}) @pytest.mark.tags(CaseLabel.L2) @@ -193,7 +191,6 @@ class TestSparseSearch(TestcaseBase): check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, "limit": default_limit, - "original_entities": [all_data], "output_fields": [ct.default_sparse_vec_field_name]}) expr_id_list = [0, 1, 10, 100] term_expr = f'{ct.default_int64_field_name} in {expr_id_list}' diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index dbab09376c..28959d21ff 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -31,7 +31,6 @@ pytest-random-order pymilvus==2.6.0rc119 pymilvus[bulk_writer]==2.6.0rc119 - # for customize config test python-benedict==0.24.3 timeout-decorator==0.5.0 diff --git a/tests/python_client/testcases/async_milvus_client/test_collection_async.py b/tests/python_client/testcases/async_milvus_client/test_collection_async.py index 0a21fdac25..1ccc063964 100644 --- a/tests/python_client/testcases/async_milvus_client/test_collection_async.py +++ b/tests/python_client/testcases/async_milvus_client/test_collection_async.py @@ -175,6 +175,7 @@ class TestAsyncMilvusClientCollectionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), + "pk_name": default_primary_key_field_name, "limit": default_limit}) tasks.append(search_task) # 5. query @@ -182,7 +183,7 @@ class TestAsyncMilvusClientCollectionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={"exp_res": rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) tasks.append(query_task) res = await asyncio.gather(*tasks) @@ -204,13 +205,14 @@ class TestAsyncMilvusClientCollectionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 11. query await async_client.query(collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={"exp_res": rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 12. drop action if self.has_partition(client, collection_name, partition_name)[0]: diff --git a/tests/python_client/testcases/async_milvus_client/test_e2e_async.py b/tests/python_client/testcases/async_milvus_client/test_e2e_async.py index 5c1ee28873..0e682c748f 100644 --- a/tests/python_client/testcases/async_milvus_client/test_e2e_async.py +++ b/tests/python_client/testcases/async_milvus_client/test_e2e_async.py @@ -62,7 +62,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(default_search_task) # search with filter & search_params @@ -73,7 +74,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(filter_params_search_task) # search output fields @@ -82,7 +84,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(output_search_task) # query with filter and default output "*" @@ -92,7 +95,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): output_fields=[default_pk_name], check_task=CheckTasks.check_query_results, check_items={"exp_res": exp_query_res, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(filter_query_task) # query with ids and output all fields ids_query_task = self.async_milvus_client_wrap.query(c_name, @@ -101,7 +104,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={"exp_res": rows[:ct.default_limit], "with_vec": True, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(ids_query_task) # get with ids get_task = self.async_milvus_client_wrap.get(c_name, @@ -109,7 +112,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): output_fields=[default_pk_name, default_vector_name], check_task=CheckTasks.check_query_results, check_items={"exp_res": rows[:2], "with_vec": True, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(get_task) await asyncio.gather(*tasks) @@ -158,7 +161,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(default_search_task) # search with filter & search_params @@ -170,7 +174,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(filter_params_search_task) # search output fields @@ -180,7 +185,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(output_search_task) # query with filter and default output "*" @@ -191,7 +197,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): partition_names=[p_name], check_task=CheckTasks.check_query_results, check_items={"exp_res": exp_query_res, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(filter_query_task) # query with ids and output all fields ids_query_task = self.async_milvus_client_wrap.query(c_name, @@ -201,7 +207,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={"exp_res": rows[:ct.default_limit], "with_vec": True, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(ids_query_task) # get with ids get_task = self.async_milvus_client_wrap.get(c_name, @@ -209,7 +215,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): output_fields=[default_pk_name, default_vector_name], check_task=CheckTasks.check_query_results, check_items={"exp_res": rows[:2], "with_vec": True, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(get_task) await asyncio.gather(*tasks) @@ -283,7 +289,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(default_search_task) # hybrid_search @@ -309,7 +316,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_items={ "enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": 5}) + "limit": 5, + "pk_name": default_pk_name}) tasks.append(filter_params_search_task) # get with ids @@ -416,7 +424,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(default_search_task) # query with filter and default output "*" @@ -426,7 +435,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): output_fields=[default_pk_name], check_task=CheckTasks.check_query_results, check_items={"exp_res": exp_query_res, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(filter_query_task) # get with ids @@ -435,7 +444,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): output_fields=[default_pk_name, default_vector_name], check_task=CheckTasks.check_query_results, check_items={"exp_res": rows[:2], "with_vec": True, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(get_task) await asyncio.gather(*tasks) @@ -495,7 +504,8 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": ct.default_nq, - "limit": ct.default_limit}) + "limit": ct.default_limit, + "pk_name": default_pk_name}) tasks.append(default_search_task) # query with filter and default output "*" @@ -505,6 +515,6 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): output_fields=[default_pk_name], check_task=CheckTasks.check_query_results, check_items={"exp_res": exp_query_res, - "primary_field": default_pk_name}) + "pk_name": default_pk_name}) tasks.append(filter_query_task) await asyncio.gather(*tasks) diff --git a/tests/python_client/testcases/async_milvus_client/test_index_async.py b/tests/python_client/testcases/async_milvus_client/test_index_async.py index f54ecd96e6..55430a9af5 100644 --- a/tests/python_client/testcases/async_milvus_client/test_index_async.py +++ b/tests/python_client/testcases/async_milvus_client/test_index_async.py @@ -323,7 +323,8 @@ class TestAsyncMilvusClientIndexValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) tasks.append(search_task) # 6. query query_task = self.async_milvus_client_wrap. \ @@ -331,7 +332,7 @@ class TestAsyncMilvusClientIndexValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={"exp_res": rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) tasks.append(query_task) res = await asyncio.gather(*tasks) diff --git a/tests/python_client/testcases/async_milvus_client/test_partition_async.py b/tests/python_client/testcases/async_milvus_client/test_partition_async.py index e2eae89c35..aa3fd6678c 100644 --- a/tests/python_client/testcases/async_milvus_client/test_partition_async.py +++ b/tests/python_client/testcases/async_milvus_client/test_partition_async.py @@ -669,7 +669,8 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) tasks.append(search_task) # 5. query query_task = async_client.query(collection_name, filter=default_search_exp, @@ -677,7 +678,7 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={"exp_res": rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) tasks.append(query_task) res = await asyncio.gather(*tasks) @@ -744,7 +745,8 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) tasks.append(search_task) # search multi partition search_task_multi = async_client.search(collection_name, vectors_to_search, @@ -752,7 +754,8 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) tasks.append(search_task_multi) # query single partition query_task = async_client.query(collection_name, filter=default_search_exp, @@ -760,7 +763,7 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={"exp_res": rows_1, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) tasks.append(query_task) # query multi partition query_task_multi = async_client.query(collection_name, filter=default_search_exp, @@ -768,7 +771,7 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={"exp_res": rows_1 + rows_2, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) tasks.append(query_task_multi) res = await asyncio.gather(*tasks) # 5. release partitions, search and query @@ -789,13 +792,14 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) await async_client.query(collection_name, filter=default_search_exp, partition_names=[partition_name_2], check_task=CheckTasks.check_query_results, check_items={"exp_res": rows_2, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) # 6. load partitions, search and query tasks_after_load = [] @@ -804,13 +808,14 @@ class TestAsyncMilvusClientPartitionValid(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) tasks_after_load.append(search_task) query_task = async_client.query(collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={"exp_res": rows_default + rows_1 + rows_2, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) tasks_after_load.append(query_task) res = await asyncio.gather(*tasks_after_load) diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index f5e243a20c..4f22cc5b03 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -1294,7 +1294,7 @@ class TestCollectionDataframe(TestcaseBase): df = pd.DataFrame(data=mix_data, columns=list("ABC")) error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema, " - "{C} field should be a float_vector, but got a {} instead."} + "{C} field should be a FLOAT_VECTOR, but got a {} instead."} self.collection_wrap.construct_from_dataframe(c_name, df, primary_field='A', check_task=CheckTasks.err_res, check_items=error) @@ -4725,7 +4725,7 @@ class TestCollectionDefaultValueValid(TestcaseBase): ****************************************************************** """ @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="issue 36457") + # @pytest.mark.skip(reason="issue 36457") def test_create_collection_default_value_twice(self): """ target: test create collection with set default value twice @@ -4740,7 +4740,7 @@ class TestCollectionDefaultValueValid(TestcaseBase): int_fields.append(cf.gen_float_field(default_value=numpy.float32(10.0))) int_fields.append(cf.gen_float_vec_field()) schema = cf.gen_collection_schema(fields=int_fields) - self.collection_wrap.init_collection(c_name, schema=schema) + c=self.collection_wrap.init_collection(c_name, schema=schema)[0] self.collection_wrap.init_collection(c_name, schema=schema) @pytest.mark.tags(CaseLabel.L1) diff --git a/tests/python_client/testcases/test_compaction.py b/tests/python_client/testcases/test_compaction.py index 69289478ef..72a7ceda08 100644 --- a/tests/python_client/testcases/test_compaction.py +++ b/tests/python_client/testcases/test_compaction.py @@ -1051,8 +1051,9 @@ class TestCompactionOperation(TestcaseBase): collection_w.query(expr, check_task=CheckTasks.check_query_empty) expr_1 = f'{ct.default_int64_field_name} in {[1]}' - collection_w.query(expr_1, check_task=CheckTasks.check_query_results, check_items={ - 'exp_res': [{'int64': 1}]}) + collection_w.query(expr_1, check_task=CheckTasks.check_query_results, + check_items={'exp_res': [{'int64': 1}], + "pk_name": collection_w.primary_field.name,}) @pytest.mark.tags(CaseLabel.L1) def test_compact_cross_shards(self): diff --git a/tests/python_client/testcases/test_database.py b/tests/python_client/testcases/test_database.py index b88e262764..6c6e6771c5 100644 --- a/tests/python_client/testcases/test_database.py +++ b/tests/python_client/testcases/test_database.py @@ -892,6 +892,7 @@ class TestDatabaseOtherApi(TestcaseBase): partition_names=[partition_name], check_task=CheckTasks.check_query_iterator, check_items={"count": 1000, + "pk_name": self.database_wrap.primary_field.name, "batch_size": ct.default_limit * 10}) def prepare_data_for_db_search(self): diff --git a/tests/python_client/testcases/test_delete.py b/tests/python_client/testcases/test_delete.py index 70c6d2dbb9..456657ca93 100644 --- a/tests/python_client/testcases/test_delete.py +++ b/tests/python_client/testcases/test_delete.py @@ -214,7 +214,8 @@ class TestDeleteParams(TestcaseBase): expr = f'{ct.default_int64_field_name} in {[tmp_nb]}' collection_w.delete(expr=expr) collection_w.query(tmp_expr, check_task=CheckTasks.check_query_results, - check_items={exp_res: query_res_tmp_expr}) + check_items={'exp_res': query_res_tmp_expr, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_delete_part_not_existed_values(self): @@ -284,7 +285,9 @@ class TestDeleteParams(TestcaseBase): collection_w.query(tmp_expr, check_task=CheckTasks.check_query_empty, partition_names=[partition_w.name]) res = df.iloc[1:2, :1].to_dict('records') collection_w.query(f'{ct.default_int64_field_name} in [1]', - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_delete_default_partition(self): @@ -414,10 +417,8 @@ class TestDeleteOperation(TestcaseBase): search_res, _ = collection_w.search([df[ct.default_float_vec_field_name][0]], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) - log.debug(search_res[0].ids) # assert search results not contains deleted ids inter = set(insert_res.primary_keys[:ct.default_nb // 2]).intersection(set(search_res[0].ids)) - log.debug(inter) assert len(inter) == 0 @pytest.mark.tags(CaseLabel.L1) @@ -461,7 +462,9 @@ class TestDeleteOperation(TestcaseBase): res = df_same.iloc[-2:, [0, 1, -1]].to_dict('records') collection_w.query(expr=f'{ct.default_int64_field_name} >= {tmp_nb-1}', output_fields=[ct.default_float_vec_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'with_vec': True}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, 'with_vec': True, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_delete_query_delta_logs(self): @@ -497,7 +500,9 @@ class TestDeleteOperation(TestcaseBase): res = df_same.iloc[:, [0, 1, -1]].to_dict('records') collection_w.query(expr=f'{ct.default_int64_field_name} < {L0_binlog_num_compaction+2}', output_fields=[ct.default_float_vec_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'with_vec': True}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, 'with_vec': True, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_delete_search(self): @@ -521,7 +526,6 @@ class TestDeleteOperation(TestcaseBase): ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # assert search result is not equal to entity - log.debug(f"Second search result ids: {search_res_2[0].ids}") inter = set(ids[:ct.default_nb // 2] ).intersection(set(search_res_2[0].ids)) # Using bounded staleness, we could still search the "deleted" entities, @@ -555,7 +559,6 @@ class TestDeleteOperation(TestcaseBase): ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # assert search result is not equal to entity - log.debug(f"Second search result ids: {search_res_2[0].ids}") inter = set(ids[:ct.default_nb // 2] ).intersection(set(search_res_2[0].ids)) # Using bounded staleness, we could still search the "deleted" entities, @@ -651,8 +654,9 @@ class TestDeleteOperation(TestcaseBase): # delete entities from another partition expr = f'{ct.default_int64_field_name} in {[0]}' collection_w.delete(expr, partition_name=ct.default_partition_name) - collection_w.query(expr, check_task=CheckTasks.check_query_results, check_items={ - exp_res: query_res_tmp_expr}) + collection_w.query(expr, check_task=CheckTasks.check_query_results, + check_items={'exp_res': query_res_tmp_expr, + "pk_name": collection_w.primary_field.name}) # delete entities from own partition collection_w.delete(expr, partition_name=partition_w.name) @@ -685,7 +689,9 @@ class TestDeleteOperation(TestcaseBase): # query on partition_w with id 0 and get an result collection_w.query(tmp_expr, partition_names=[partition_w.name], - check_task=CheckTasks.check_query_results, check_items={exp_res: query_res_tmp_expr}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': query_res_tmp_expr, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L0) def test_delete_auto_id_collection(self): @@ -759,7 +765,9 @@ class TestDeleteOperation(TestcaseBase): res = df_same.iloc[:, [0, 1, -1]].to_dict('records') collection_w.query(expr=tmp_expr, output_fields=[ct.default_float_vec_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'with_vec': True}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, 'with_vec': True, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_delete_growing_data_channel_delete(self): @@ -923,14 +931,15 @@ class TestDeleteOperation(TestcaseBase): df_new = cf.gen_default_dataframe_data(4, start=tmp_nb) df_new[ct.default_int64_field_name] = [0, 1, 3, 5] collection_w.insert(df_new) - log.debug(f'to_flush:{to_flush}') if to_flush: log.debug(collection_w.num_entities) # query entity res = df_new.iloc[:, [0, 1, -1]].to_dict('records') collection_w.query(del_expr, output_fields=[ct.default_float_vec_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'with_vec': True}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, 'with_vec': True, + "pk_name": collection_w.primary_field.name}) search_res, _ = collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, param=default_search_params, limit=1) @@ -963,7 +972,9 @@ class TestDeleteOperation(TestcaseBase): res = df.iloc[:1, :1].to_dict('records') collection_w.search(data=[df[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, param=default_search_params, limit=1) - collection_w.query(tmp_expr, check_task=CheckTasks.check_query_results, check_items={'exp_res': res}) + collection_w.query(tmp_expr, check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, + "pk_name": collection_w.primary_field.name}) # delete collection_w.delete(tmp_expr) @@ -979,7 +990,9 @@ class TestDeleteOperation(TestcaseBase): # re-query res = df_new.iloc[[0], [0, 1, -1]].to_dict('records') collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'with_vec': True}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, 'with_vec': True, + "pk_name": collection_w.primary_field.name}) search_res, _ = collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, param=default_search_params, limit=1) @@ -1060,7 +1073,9 @@ class TestDeleteOperation(TestcaseBase): log.debug(collection_w.num_entities) collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, - check_items={'exp_res': df_new.iloc[[0], [0, 4]].to_dict('records'), 'with_vec': True}) + check_items={'exp_res': df_new.iloc[[0], [0, 4]].to_dict('records'), + 'with_vec': True, + "pk_name": collection_w.primary_field.name}) collection_w.delete(tmp_expr) if to_flush_delete: @@ -1324,11 +1339,9 @@ class TestDeleteString(TestcaseBase): search_res, _ = collection_w.search([df[ct.default_float_vec_field_name][0]], ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) - log.debug(search_res[0].ids) # assert search results not contains deleted ids inter = set(insert_res.primary_keys[:ct.default_nb // 2]).intersection(set(search_res[0].ids)) - log.debug(inter) - assert len(inter) == 0 + assert len(inter) == 0, "assert no deleted ids in search results" @pytest.mark.tags(CaseLabel.L1) def test_delete_query_ids_both_L0_segment_and_WAL_with_string(self): @@ -1374,7 +1387,8 @@ class TestDeleteString(TestcaseBase): collection_w.query(expr=default_string_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, - check_items={'exp_res': res, 'with_vec': True, "primary_field": ct.default_string_field_name}) + check_items={'exp_res': res, 'with_vec': True, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_delete_search_with_string(self): @@ -1400,7 +1414,6 @@ class TestDeleteString(TestcaseBase): ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # assert search result is not equal to entity - log.debug(f"Second search result ids: {search_res_2[0].ids}") inter = set(ids[:ct.default_nb // 2] ).intersection(set(search_res_2[0].ids)) # Using bounded staleness, we could still search the "deleted" entities, @@ -1483,7 +1496,9 @@ class TestDeleteString(TestcaseBase): # query on partition_w with id 0 and get an result collection_w.query(default_string_expr, partition_names=[partition_w.name], - check_task=CheckTasks.check_query_results, check_items={exp_res: query_tmp_expr_str}) + check_task=CheckTasks.check_query_results, + check_items={'exp_res': query_tmp_expr_str, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_delete_sealed_segment_without_flush_with_string(self): @@ -1519,7 +1534,8 @@ class TestDeleteString(TestcaseBase): collection_w.query(expr=default_string_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, - check_items={'exp_res': res, 'with_vec': True, "primary_field": ct.default_string_field_name}) + check_items={'exp_res': res, 'with_vec': True, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_delete_growing_data_channel_delete_with_string(self): @@ -1713,7 +1729,7 @@ class TestDeleteString(TestcaseBase): collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, check_items={'exp_res': df_new.iloc[[0], [2, 4]].to_dict('records'), - 'primary_field': ct.default_string_field_name, 'with_vec': True}) + 'pk_name': collection_w.primary_field.name, 'with_vec': True}) collection_w.delete(default_string_expr) if to_flush_delete: @@ -1871,7 +1887,9 @@ class TestDeleteString(TestcaseBase): res = df.iloc[:1, 2:3].to_dict('records') collection_w.search(data=[df[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, param=default_search_params, limit=1) - collection_w.query(default_string_expr, check_task=CheckTasks.check_query_results, check_items={'exp_res': res}) + collection_w.query(default_string_expr, check_task=CheckTasks.check_query_results, + check_items={'exp_res': res, + "pk_name": collection_w.primary_field.name}) # delete collection_w.delete(default_string_expr) @@ -1886,11 +1904,10 @@ class TestDeleteString(TestcaseBase): # re-query res = df_new.iloc[[0], [2, 4]].to_dict('records') - log.info(res) collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, check_items={'exp_res': res, - 'primary_field': ct.default_string_field_name, + 'pk_name': collection_w.primary_field.name, 'with_vec': True}) collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name, @@ -2495,7 +2512,6 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) # assert search result is not equal to entity - log.debug(f"Second search result ids: {search_res_2[0].ids}") inter = set(ids[:ct.default_nb // 2] ).intersection(set(search_res_2[0].ids)) # Using bounded staleness, we could still search the "deleted" entities, diff --git a/tests/python_client/testcases/test_high_level_api.py b/tests/python_client/testcases/test_high_level_api.py index ffb742fe80..6c72e3bde7 100644 --- a/tests/python_client/testcases/test_high_level_api.py +++ b/tests/python_client/testcases/test_high_level_api.py @@ -162,13 +162,14 @@ class TestHighLevelApi(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -203,7 +204,8 @@ class TestHighLevelApi(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip(reason="issue 25110") @@ -235,13 +237,14 @@ class TestHighLevelApi(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) # 4. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows, "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L2) @@ -274,7 +277,8 @@ class TestHighLevelApi(TestMilvusClientV2Base): check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), - "limit": default_limit}) + "limit": default_limit, + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -312,11 +316,12 @@ class TestHighLevelApi(TestMilvusClientV2Base): check_items={"enable_milvus_client_api": True, "nq": len(vectors_to_search), "ids": insert_ids, - "limit": limit}) + "limit": limit, + "pk_name": default_primary_key_field_name}) # 6. query self.query(client, collection_name, filter=default_search_exp, check_task=CheckTasks.check_query_results, check_items={exp_res: rows[delete_num:], "with_vec": True, - "primary_field": default_primary_key_field_name}) + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index 4fa958095a..520189977f 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -2173,9 +2173,10 @@ class TestUpsertValid(TestcaseBase): collection_w.load() for i in range(5): collection_w.upsert(data=data) - collection_w.query(expr=f'{ct.default_int64_field_name} >= 0', output_fields=[ct.default_count_output] - , check_task=CheckTasks.check_query_results, - check_items={"exp_res": [{"count(*)": ct.default_nb}]}) + collection_w.query(expr=f'{ct.default_int64_field_name} >= 0', + output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{"count(*)": ct.default_nb}]}) class TestUpsertInvalid(TestcaseBase): diff --git a/tests/python_client/testcases/test_mix_scenes.py b/tests/python_client/testcases/test_mix_scenes.py index 9aa9cd64d5..11516551af 100644 --- a/tests/python_client/testcases/test_mix_scenes.py +++ b/tests/python_client/testcases/test_mix_scenes.py @@ -898,7 +898,8 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): expr_count = len([i for i in self.insert_data.get(expr_field, []) if len(i) == length]) # query count(*) - self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results, + self.collection_wrap.query(expr=expr, output_fields=['count(*)'], + check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": expr_count}]}) @@ -1223,7 +1224,8 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): expr_count = len([i for i in self.insert_data.get(expr_field, []) if len(i) == length]) # query count(*) - self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results, + self.collection_wrap.query(expr=expr, output_fields=['count(*)'], + check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": expr_count}]}) @pytest.mark.tags(CaseLabel.L1) @@ -1239,7 +1241,8 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): 1. query response equal to insert nb """ # query count(*) - self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results, + self.collection_wrap.query(expr='', output_fields=['count(*)'], + check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": self.nb}]}) @pytest.mark.tags(CaseLabel.L1) @@ -1637,7 +1640,8 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): expr_count = len([i for i in self.insert_data.get(expr_field, []) if len(i) == length]) # query count(*) - self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results, + self.collection_wrap.query(expr=expr, output_fields=['count(*)'], + check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": expr_count}]}) @pytest.mark.tags(CaseLabel.L1) @@ -1653,7 +1657,8 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): 1. query response equal to insert nb """ # query count(*) - self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results, + self.collection_wrap.query(expr='', output_fields=['count(*)'], + check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": self.nb}]}) @pytest.mark.tags(CaseLabel.L1) @@ -1908,7 +1913,8 @@ class TestBitmapIndexMmap(TestCaseClassBase): 1. query response equal to insert nb """ # query count(*) - self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results, + self.collection_wrap.query(expr='', output_fields=['count(*)'], + check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": self.nb}]}) @pytest.mark.tags(CaseLabel.L1) @@ -2132,8 +2138,10 @@ class TestMixScenes(TestcaseBase): # query before upsert expected_res = [{k: v[10] for k, v in insert_data.items() if k != DataType.FLOAT_VECTOR.name}] - self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results, - check_items={"exp_res": expected_res, "primary_field": primary_field}) + self.collection_wrap.query(expr=expr, output_fields=scalar_fields, + check_task=CheckTasks.check_query_results, + check_items={"exp_res": expected_res, + "pk_name": primary_field}) # upsert int64_pk = 10 upsert_data = cf.gen_field_values(self.collection_wrap.schema, nb=1, @@ -2141,14 +2149,18 @@ class TestMixScenes(TestcaseBase): self.collection_wrap.upsert(data=list(upsert_data.values())) # re-query expected_upsert_res = [{k: v[0] for k, v in upsert_data.items() if k != DataType.FLOAT_VECTOR.name}] - self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results, - check_items={"exp_res": expected_upsert_res, "primary_field": primary_field}) + self.collection_wrap.query(expr=expr, output_fields=scalar_fields, + check_task=CheckTasks.check_query_results, + check_items={"exp_res": expected_upsert_res, + "pk_name": primary_field}) # delete int64_pk = 10 self.collection_wrap.delete(expr=expr) # re-query - self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results, - check_items={"exp_res": []}) + self.collection_wrap.query(expr=expr, output_fields=scalar_fields, + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [], + "pk_name": primary_field}) @pytest.mark.tags(CaseLabel.L2) def test_bitmap_offset_cache_and_mmap(self, request): @@ -2207,8 +2219,10 @@ class TestMixScenes(TestcaseBase): self.collection_wrap.load() # query before upsert - self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results, - check_items={"exp_res": []}) + self.collection_wrap.query(expr=expr, output_fields=scalar_fields, + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [], + "pk_name": primary_field}) # upsert int64_pk = 33333 upsert_data = cf.gen_field_values(self.collection_wrap.schema, nb=1, @@ -2216,14 +2230,18 @@ class TestMixScenes(TestcaseBase): self.collection_wrap.upsert(data=list(upsert_data.values())) # re-query expected_upsert_res = [{k: v[0] for k, v in upsert_data.items() if k != DataType.FLOAT_VECTOR.name}] - self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results, - check_items={"exp_res": expected_upsert_res, "primary_field": primary_field}) + self.collection_wrap.query(expr=expr, output_fields=scalar_fields, + check_task=CheckTasks.check_query_results, + check_items={"exp_res": expected_upsert_res, + "pk_name": primary_field}) # delete int64_pk = 33333 self.collection_wrap.delete(expr=expr) # re-query - self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results, - check_items={"exp_res": []}) + self.collection_wrap.query(expr=expr, output_fields=scalar_fields, + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [], + "pk_name": primary_field}) # search expr_left, expr_right = Expr.GT(Expr.SUB('INT64', 37).subset, 13).value, Expr.LIKE('VARCHAR', '%a').value diff --git a/tests/python_client/testcases/test_partition.py b/tests/python_client/testcases/test_partition.py index ef56ab9d27..4f64d01ae9 100644 --- a/tests/python_client/testcases/test_partition.py +++ b/tests/python_client/testcases/test_partition.py @@ -397,16 +397,20 @@ class TestPartitionParams(TestcaseBase): collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) partition_w.load(replica_number=1) - collection_w.query(expr=f"{ct.default_int64_field_name} in [0]", check_task=CheckTasks.check_query_results, - check_items={'exp_res': [{'int64': 0}]}) + collection_w.query(expr=f"{ct.default_int64_field_name} in [0]", + check_task=CheckTasks.check_query_results, + check_items={'exp_res': [{'int64': 0}], + "pk_name": collection_w.primary_field.name}) error = {ct.err_code: 1100, ct.err_msg: "can't change the replica number for loaded partitions: " "invalid parameter[expected=1][actual=2]"} partition_w.load(replica_number=2, check_task=CheckTasks.err_res, check_items=error) partition_w.release() partition_w.load(replica_number=2) - collection_w.query(expr=f"{ct.default_int64_field_name} in [0]", check_task=CheckTasks.check_query_results, - check_items={'exp_res': [{'int64': 0}]}) + collection_w.query(expr=f"{ct.default_int64_field_name} in [0]", + check_task=CheckTasks.check_query_results, + check_items={'exp_res': [{'int64': 0}], + "pk_name": collection_w.primary_field.name}) two_replicas, _ = collection_w.get_replicas() assert len(two_replicas.groups) == 2 diff --git a/tests/python_client/testcases/test_partition_key_isolation.py b/tests/python_client/testcases/test_partition_key_isolation.py index ef1cb09eb6..6431412056 100644 --- a/tests/python_client/testcases/test_partition_key_isolation.py +++ b/tests/python_client/testcases/test_partition_key_isolation.py @@ -24,7 +24,8 @@ class TestPartitionKeyIsolation(TestcaseBase): def test_par_key_isolation_with_valid_expr(self): # create self._connect() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() + dim = 128 partition_key = "scalar_6" enable_isolation = "true" if collection_name in list_collections(): @@ -42,7 +43,7 @@ class TestPartitionKeyIsolation(TestcaseBase): is_partition_key=bool(partition_key == "scalar_12")), FieldSchema(name="scalar_5_linear", dtype=DataType.VARCHAR, max_length=1000, is_partition_key=bool(partition_key == "scalar_5_linear")), - FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=768) + FieldSchema(name="emb", dtype=DataType.FLOAT_VECTOR, dim=dim) ] schema = CollectionSchema(fields=fields, description="test collection", enable_dynamic_field=True, num_partitions=1) @@ -72,12 +73,14 @@ class TestPartitionKeyIsolation(TestcaseBase): "scalar_9": [str(i % 9) for i in range(start_idx, end_idx)], "scalar_12": [str(i % 12) for i in range(start_idx, end_idx)], "scalar_5_linear": [str(i % 5) for i in range(start_idx, end_idx)], - "emb": [[random.random() for _ in range(768)] for _ in range(batch_size)] + "emb": [[random.random() for _ in range(dim)] for _ in range(batch_size)] } df = pd.DataFrame(data) all_data.append(df) log.info(f"generate test data {batch_size} cost time {time.time() - t0}") collection.insert(df) + num = collection.num_entities + log.info(f"collection {collection_name} loaded, num_entities: {num}") all_df = pd.concat(all_data) collection.compact() collection.wait_for_compaction_completed() @@ -98,8 +101,6 @@ class TestPartitionKeyIsolation(TestcaseBase): t0 = time.time() collection.load() log.info(f"load collection cost time {time.time() - t0}") - num = collection.num_entities - log.info(f"collection {collection_name} loaded, num_entities: {num}") valid_expressions = [ "scalar_6 == '1' and scalar_12 == '1'", @@ -111,17 +112,15 @@ class TestPartitionKeyIsolation(TestcaseBase): ] for expr in valid_expressions: res = collection.search( - data=[[random.random() for _ in range(768)]], + data=[[random.random() for _ in range(dim)]], anns_field="emb", expr=expr, - param={"metric_type": "L2", "params": {"nprobe": 16}}, + param={"metric_type": "L2", "params": {}}, limit=10000, output_fields=["scalar_3", "scalar_6", "scalar_12"], consistency_level="Strong" ) - log.info(f"search res {res}") true_res = all_df.query(expr) - log.info(f"true res {true_res}") assert len(res[0]) == len(true_res) def test_par_key_isolation_with_unsupported_expr(self): diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 2eb669190a..b4e33a8653 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -119,7 +119,9 @@ class TestQueryParams(TestcaseBase): res = vectors[0].iloc[0:pos, :1].to_dict('records') term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}' - collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + collection_w.query(term_expr, + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_no_collection(self): @@ -181,12 +183,14 @@ class TestQueryParams(TestcaseBase): term_expr_1 = f'{ct.default_int64_field_name} in {ids[:pos]}' for i in range(5): res[i][ct.default_int64_field_name] = ids[i] - self.collection_wrap.query(term_expr_1, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + self.collection_wrap.query(term_expr_1, + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": self.collection_wrap.primary_field.name}) # query with part primary keys term_expr_2 = f'{ct.default_int64_field_name} in {[ids[0], 0]}' self.collection_wrap.query(term_expr_2, check_task=CheckTasks.check_query_results, - check_items={exp_res: res[:1]}) + check_items={exp_res: res[:1], "pk_name": self.collection_wrap.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("dup_times", [1, 2, 3]) @@ -276,7 +280,9 @@ class TestQueryParams(TestcaseBase): """ collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2] res = vectors[0].iloc[:2, :1].to_dict('records') - collection_w.query(default_term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + collection_w.query(default_term_expr, + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_not_existed_field(self): @@ -331,7 +337,8 @@ class TestQueryParams(TestcaseBase): log.info(res) self.collection_wrap.query(term_expr, output_fields=["*"], check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, + "pk_name": self.collection_wrap.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_by_bool_field(self): @@ -422,7 +429,8 @@ class TestQueryParams(TestcaseBase): self.collection_wrap.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index) self.collection_wrap.load() self.collection_wrap.query(term_expr, output_fields=["float", "int64", "int8", "varchar"], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": self.collection_wrap.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_with_expression(self, enable_dynamic_field): @@ -503,7 +511,8 @@ class TestQueryParams(TestcaseBase): term_expr = f'{field} not in {values[pos:]}' res = df.iloc[:pos, :3].to_dict('records') self.collection_wrap.query(term_expr, output_fields=["float", "int64", "varchar"], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": self.collection_wrap.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("pos", [0, ct.default_nb]) @@ -523,7 +532,9 @@ class TestQueryParams(TestcaseBase): int64_values = df[ct.default_int64_field_name].tolist() term_expr = f'{ct.default_int64_field_name} not in {int64_values[pos:]}' res = df.iloc[:pos, :1].to_dict('records') - self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + self.collection_wrap.query(term_expr, + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": self.collection_wrap.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_random_values(self): @@ -545,7 +556,9 @@ class TestQueryParams(TestcaseBase): random_values = [0, 2, 4, 3] term_expr = f'{ct.default_int64_field_name} in {random_values}' res = df.iloc[random_values, :1].to_dict('records') - self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + self.collection_wrap.query(term_expr, + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": self.collection_wrap.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_not_in_random(self): @@ -568,7 +581,9 @@ class TestQueryParams(TestcaseBase): random.shuffle(random_values) term_expr = f'{ct.default_int64_field_name} not in {random_values}' res = df.iloc[:10, :1].to_dict('records') - self.collection_wrap.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + self.collection_wrap.query(term_expr, + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": self.collection_wrap.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_non_array_term(self): @@ -1202,7 +1217,9 @@ class TestQueryParams(TestcaseBase): res.append({ct.default_int64_field_name: ids}) # 2. query with limit - collection_w.query("", limit=limit, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + collection_w.query("", limit=limit, + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_empty_pk_string(self): @@ -1221,12 +1238,14 @@ class TestQueryParams(TestcaseBase): # 2. query with limit collection_w.query("", limit=ct.default_limit, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) # 2. query with limit + offset res = res[5:] collection_w.query("", limit=5, offset=5, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("offset", [100, 1000]) @@ -1246,7 +1265,8 @@ class TestQueryParams(TestcaseBase): # 2. query with limit and offset collection_w.query("", limit=limit, offset=offset, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("offset", [100, 1000]) @@ -1275,7 +1295,8 @@ class TestQueryParams(TestcaseBase): res.append({ct.default_int64_field_name: ids, ct.default_string_field_name: str(ids)}) collection_w.query("", limit=limit, output_fields=[ct.default_string_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) # 4. query with pagination exp_ids, res = sorted(unordered_ids)[:limit + offset][offset:], [] @@ -1283,7 +1304,8 @@ class TestQueryParams(TestcaseBase): res.append({ct.default_int64_field_name: ids, ct.default_string_field_name: str(ids)}) collection_w.query("", limit=limit, offset=offset, output_fields=[ct.default_string_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L0) def test_query_expr_with_limit_offset_out_of_range(self): @@ -1400,7 +1422,8 @@ class TestQueryParams(TestcaseBase): collection_w.load() actual_res, _ = collection_w.query(default_term_expr, output_fields=all_fields, check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, + "pk_name": collection_w.primary_field.name}) assert set(actual_res[0].keys()) == set(all_fields) @pytest.mark.tags(CaseLabel.L2) @@ -1422,7 +1445,8 @@ class TestQueryParams(TestcaseBase): for output_fields in fields: collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("wildcard_output_fields", [["*"], ["*", default_float_field_name], @@ -1471,7 +1495,7 @@ class TestQueryParams(TestcaseBase): collection_w.load() collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.skip(reason="https://github.com/milvus-io/milvus/issues/12680") @@ -1500,12 +1524,12 @@ class TestQueryParams(TestcaseBase): collection_w.load() collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, "pk_name": collection_w.primary_field.name}) # query with wildcard % collection_w.query(default_term_expr, output_fields=["*"], check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_output_binary_vec_field(self): @@ -1578,7 +1602,7 @@ class TestQueryParams(TestcaseBase): res3 = df.iloc[:2].to_dict('records') collection_w.query(default_term_expr, output_fields=["*"], check_task=CheckTasks.check_query_results, - check_items={exp_res: res3, "with_vec": True}) + check_items={exp_res: res3, "with_vec": True, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.skip(reason="issue 24637") @@ -1598,7 +1622,7 @@ class TestQueryParams(TestcaseBase): collection_w.load() collection_w.query(default_term_expr, output_fields=["*", ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("output_fields", [["*%"], ["**"], ["*", "@"]]) @@ -1633,7 +1657,8 @@ class TestQueryParams(TestcaseBase): partition_w.load() res = df.iloc[:2, :1].to_dict('records') collection_w.query(default_term_expr, partition_names=[partition_w.name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_partition_without_loading(self): @@ -1661,7 +1686,8 @@ class TestQueryParams(TestcaseBase): collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2] res = vectors[0].iloc[:2, :1].to_dict('records') collection_w.query(default_term_expr, partition_names=[ct.default_partition_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_empty_partition_names(self): @@ -1677,8 +1703,9 @@ class TestQueryParams(TestcaseBase): # query from empty partition_names term_expr = f'{ct.default_int64_field_name} in [0, {half}, {ct.default_nb}-1]' res = [{'int64': 0}, {'int64': half}, {'int64': ct.default_nb - 1}] - collection_w.query(term_expr, partition_names=[], check_task=CheckTasks.check_query_results, - check_items={exp_res: res}) + collection_w.query(term_expr, partition_names=[], + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_empty_partition(self): @@ -1797,7 +1824,7 @@ class TestQueryParams(TestcaseBase): query_params = {"offset": offset, "limit": 10} query_res = collection_w.query(term_expr, params=query_params, check_task=CheckTasks.check_query_results, - check_items={exp_res: res})[0] + check_items={exp_res: res, "pk_name": collection_w.primary_field.name})[0] key_res = [item[key] for item in query_res for key in item] assert key_res == int_values[offset: pos + offset] @@ -1819,7 +1846,7 @@ class TestQueryParams(TestcaseBase): query_params = {"offset": offset, "limit": 10} query_res = collection_w.query(term_expr, params=query_params, check_task=CheckTasks.check_query_results, - check_items={exp_res: res})[0] + check_items={exp_res: res, "pk_name": collection_w.primary_field.name})[0] key_res = [item[key] for item in query_res for key in item] assert key_res == int_values[offset: pos + offset] @@ -1875,7 +1902,8 @@ class TestQueryParams(TestcaseBase): res = df.iloc[:2, :1].to_dict('records') query_params = {"offset": offset, "limit": 10} collection_w.query(default_term_expr, params=query_params, partition_names=[partition_w.name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_pagination_with_insert_data(self, offset): @@ -1893,7 +1921,8 @@ class TestQueryParams(TestcaseBase): res = df.iloc[:2, :1].to_dict('records') query_params = {"offset": offset, "limit": 10} collection_w.query(default_term_expr, params=query_params, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_pagination_without_limit(self, offset): @@ -1911,10 +1940,10 @@ class TestQueryParams(TestcaseBase): query_params = {"offset": offset} query_res = collection_w.query(term_expr, params=query_params, check_task=CheckTasks.check_query_results, - check_items={exp_res: res})[0] + check_items={exp_res: res, "pk_name": collection_w.primary_field.name})[0] res = collection_w.query(term_expr, check_task=CheckTasks.check_query_results, - check_items={exp_res: res})[0] + check_items={exp_res: res, "pk_name": collection_w.primary_field.name})[0] assert query_res == res @pytest.mark.tags(CaseLabel.L2) @@ -2059,12 +2088,14 @@ class TestQueryParams(TestcaseBase): collection_w.load() # 2. query with limit collection_w.query("", limit=ct.default_limit, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) # 3. query with limit + offset res = res[5:] collection_w.query("", limit=5, offset=5, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_enable_mmap_query_with_expression(self, enable_dynamic_field): @@ -2180,7 +2211,8 @@ class TestQueryParams(TestcaseBase): expression = 'varchar like "0%"' output_fields = [default_int_field_name, default_float_field_name, default_string_field_name] collection_w.query(expression, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) class TestQueryOperation(TestcaseBase): @@ -2249,7 +2281,8 @@ class TestQueryOperation(TestcaseBase): # query the first row of data check_vec = vectors[0].iloc[:, [0]][0:1].to_dict('records') collection_w.query(term_expr, - check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: check_vec, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("term_expr", [f'{ct.default_int64_field_name} in [0]']) @@ -2267,7 +2300,8 @@ class TestQueryOperation(TestcaseBase): # query the first row of data check_vec = vectors[0].iloc[:, [0]][0:1].to_dict('records') collection_w.query(term_expr, - check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: check_vec, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_expr_all_term_array(self): @@ -2288,7 +2322,8 @@ class TestQueryOperation(TestcaseBase): # query all array value collection_w.query(term_expr, - check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: check_vec, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_half_term_array(self): @@ -2339,7 +2374,7 @@ class TestQueryOperation(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in {[0, 0, 0]}' res = df.iloc[:, :2].to_dict('records') collection_w.query(term_expr, output_fields=["*"], check_items=CheckTasks.check_query_results, - check_task={exp_res: res}) + check_task={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("with_growing", [True]) @@ -2397,7 +2432,8 @@ class TestQueryOperation(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in {int_values}' check_vec = vectors[0].iloc[:, [0]][0:len(int_values)].to_dict('records') collection_w.query(term_expr, - check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: check_vec, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_after_search(self): @@ -2426,7 +2462,8 @@ class TestQueryOperation(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in [0, 1]' check_vec = vectors[0].iloc[:, [0]][0:2].to_dict('records') collection_w.query(term_expr, - check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: check_vec, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_output_vec_field_after_index(self): @@ -2446,7 +2483,7 @@ class TestQueryOperation(TestcaseBase): collection_w.load() collection_w.query(default_term_expr, output_fields=fields, check_task=CheckTasks.check_query_results, - check_items={exp_res: res, "with_vec": True}) + check_items={exp_res: res, "with_vec": True, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_output_binary_vec_field_after_index(self): @@ -2525,7 +2562,8 @@ class TestQueryOperation(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in [{half}]' # half entity in _default partition rather than partition_w collection_w.query(term_expr, partition_names=[partition_w.name], - check_task=CheckTasks.check_query_results, check_items={exp_res: []}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: [], "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_multi_partitions_multi_results(self): @@ -2584,7 +2622,8 @@ class TestQueryOperation(TestcaseBase): res = df.iloc[1:2, :1].to_dict('records') time.sleep(1) collection_w.query(f'{ct.default_int64_field_name} in [1]', - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip("not support default_value now") @@ -2691,7 +2730,8 @@ class TestQueryString(TestcaseBase): res = vectors[0].iloc[:2, :3].to_dict('records') output_fields = [default_float_field_name, default_string_field_name] collection_w.query(default_string_term_expr, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions([default_string_field_name])) @@ -2719,7 +2759,8 @@ class TestQueryString(TestcaseBase): res = vectors[0].iloc[:, 1:3].to_dict('records') output_fields = [default_float_field_name, default_string_field_name] collection_w.query(default_mix_expr, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expression", cf.gen_invalid_string_expressions()) @@ -2763,7 +2804,8 @@ class TestQueryString(TestcaseBase): expression = 'varchar like "0%"' output_fields = [default_int_field_name, default_float_field_name, default_string_field_name] collection_w.query(expression, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_string_expr_with_suffix(self): @@ -2779,7 +2821,8 @@ class TestQueryString(TestcaseBase): res = filtered_data.iloc[:, :3].to_dict('records') output_fields = [default_int_field_name, default_float_field_name, default_string_field_name] collection_w.query(expression, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_string_expr_with_inner_match(self): @@ -2795,7 +2838,8 @@ class TestQueryString(TestcaseBase): res = filtered_data.iloc[:, :3].to_dict('records') output_fields = [default_int_field_name, default_float_field_name, default_string_field_name] collection_w.query(expression, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_bitmap_alter_offset_cache_param(self): @@ -2955,7 +2999,8 @@ class TestQueryString(TestcaseBase): expression = 'float > int64' output_fields = [default_int_field_name, default_float_field_name, default_string_field_name] collection_w.query(expression, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_compare_invalid_fields(self): @@ -3018,7 +3063,7 @@ class TestQueryString(TestcaseBase): collection_w.query(expression, output_fields=output_fields, check_task=CheckTasks.check_query_results, check_items={exp_res: df_dict_list, - "primary_field": default_int_field_name, + "pk_name": collection_w.primary_field.name, "with_vec": True}) @pytest.mark.tags(CaseLabel.L2) @@ -3098,7 +3143,7 @@ class TestQueryString(TestcaseBase): check_vec = vectors[0].iloc[:, [0]][0:len(int_values)].to_dict('records') collection_w.query(term_expr, check_task=CheckTasks.check_query_results, - check_items={exp_res: check_vec}) + check_items={exp_res: check_vec, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_query_with_create_diskann_with_string_pk(self): @@ -3116,7 +3161,8 @@ class TestQueryString(TestcaseBase): res = vectors[0].iloc[:, 1:3].to_dict('records') output_fields = [default_float_field_name, default_string_field_name] collection_w.query(default_mix_expr, output_fields=output_fields, - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_with_scalar_field(self): @@ -3306,7 +3352,8 @@ class TestQueryCount(TestcaseBase): collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb}]}) + check_items={exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("invalid_output_field", ["count", "count(int64)", "count(**)"]) @@ -3367,15 +3414,13 @@ class TestQueryCount(TestcaseBase): # query count collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb}]} - ) + check_items={exp_res: [{count: tmp_nb}],"pk_name": collection_w.primary_field.name}) # delete and verify count collection_w.delete(default_term_expr) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 0}]} - ) + check_items={exp_res: [{count: 0}], "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_count_multi_partitions(self): @@ -3395,7 +3440,7 @@ class TestQueryCount(TestcaseBase): for p_name in [p1.name, ct.default_partition_name]: collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], partition_names=[p_name], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: half}]}) + check_items={exp_res: [{count: half}], "pk_name": collection_w.primary_field.name}) # delete entities from _default delete_expr = f"{ct.default_int64_field_name} in {[i for i in range(half, ct.default_nb)]} " @@ -3403,11 +3448,11 @@ class TestQueryCount(TestcaseBase): collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], partition_names=[ct.default_partition_name], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 0}]}) + check_items={exp_res: [{count: 0}], "pk_name": collection_w.primary_field.name}) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], partition_names=[p1.name, ct.default_partition_name], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: half}]}) + check_items={exp_res: [{count: half}], "pk_name": collection_w.primary_field.name}) # drop p1 partition p1.release() @@ -3420,7 +3465,7 @@ class TestQueryCount(TestcaseBase): collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], partition_names=[ct.default_partition_name], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 0}]}) + check_items={exp_res: [{count: 0}], "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_count_partition_duplicate(self): @@ -3447,7 +3492,8 @@ class TestQueryCount(TestcaseBase): # count collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb * 2}]} + check_items={exp_res: [{count: ct.default_nb * 2}], + "pk_name": collection_w.primary_field.name} ) # delete some duplicate ids @@ -3455,7 +3501,8 @@ class TestQueryCount(TestcaseBase): collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], partition_names=[p1], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb - delete_res.delete_count}]} + check_items={exp_res: [{count: ct.default_nb - delete_res.delete_count}], + "pk_name": collection_w.primary_field.name} ) @pytest.mark.tags(CaseLabel.L1) @@ -3473,15 +3520,14 @@ class TestQueryCount(TestcaseBase): collection_w = self.init_collection_general(insert_data=True, nb=tmp_nb)[0] collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb}]} - ) + check_items={exp_res: [{count: tmp_nb}], "pk_name": collection_w.primary_field.name}) # new insert and growing count df = cf.gen_default_dataframe_data(nb=tmp_nb, start=tmp_nb) collection_w.insert(df) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb * 2}]}) + check_items={exp_res: [{count: tmp_nb * 2}], "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_count_during_handoff(self): @@ -3506,8 +3552,8 @@ class TestQueryCount(TestcaseBase): kwargs={ "output_fields": [ct.default_count_output], "check_task": CheckTasks.check_query_results, - "check_items": {exp_res: [{count: ct.default_nb}]} - }) + "check_items": {exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}}) t_flush.start() t_count.start() @@ -3534,7 +3580,8 @@ class TestQueryCount(TestcaseBase): collection_w.delete(f"{ct.default_int64_field_name} in {[i for i in range(ct.default_nb)]}") collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb}]} + check_items={exp_res: [{count: tmp_nb}], + "pk_name": collection_w.primary_field.name} ) # re-insert deleted ids [0, default_nb) with different vectors @@ -3542,7 +3589,8 @@ class TestQueryCount(TestcaseBase): collection_w.insert(df_same) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb + tmp_nb}]} + check_items={exp_res: [{count: ct.default_nb + tmp_nb}], + "pk_name": collection_w.primary_field.name} ) @pytest.mark.tags(CaseLabel.L1) @@ -3575,7 +3623,8 @@ class TestQueryCount(TestcaseBase): # count after compact collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb * segment_num}]}) + check_items={exp_res: [{count: tmp_nb * segment_num}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_count_compact_delete(self): @@ -3605,7 +3654,8 @@ class TestQueryCount(TestcaseBase): collection_w.load() collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb // 2}]} + check_items={exp_res: [{count: ct.default_nb // 2}], + "pk_name": collection_w.primary_field.name} ) @pytest.mark.tags(CaseLabel.L2) @@ -3634,7 +3684,8 @@ class TestQueryCount(TestcaseBase): kwargs={ "output_fields": [ct.default_count_output], "check_task": CheckTasks.check_query_results, - "check_items": {exp_res: [{count: tmp_nb * 10}]} + "check_items": {exp_res: [{count: tmp_nb * 10}], + "pk_name": collection_w.primary_field.name} }) t_compact.start() @@ -3655,11 +3706,13 @@ class TestQueryCount(TestcaseBase): # count with expr collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb}]}) + check_items={exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}) collection_w.query(expr=default_term_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 2}]}) + check_items={exp_res: [{count: 2}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_count_expr_json(self): @@ -3687,7 +3740,8 @@ class TestQueryCount(TestcaseBase): expression = f'{ct.default_json_field_name}["number"] < 100' collection_w.query(expression, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 50}]}) + check_items={exp_res: [{count: 50}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_json_expr_on_search_n_query(self): @@ -3753,7 +3807,9 @@ class TestQueryCount(TestcaseBase): for expr in query_exprs: log.debug(f"query_expr: {expr}") collection_w.query(expr=expr, output_fields=[count], - check_task=CheckTasks.check_query_results, check_items={exp_res: [{count: 10}]}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{count: 10}], + "pk_name": collection_w.primary_field.name}) collection_w.search(data=search_data, anns_field=ct.default_float_vec_field_name, param=search_param, limit=10, expr=expr, check_task=CheckTasks.check_search_results, @@ -3764,7 +3820,9 @@ class TestQueryCount(TestcaseBase): f'{json_embedded_object}["{json_embedded_object}"] in []']: log.debug(f"query_expr: {expr}") collection_w.query(expr=expr, output_fields=[count], - check_task=CheckTasks.check_query_results, check_items={exp_res: [{count: 0}]}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{count: 0}], + "pk_name": collection_w.primary_field.name}) collection_w.search(data=search_data, anns_field=ct.default_float_vec_field_name, param=search_param, limit=10, expr=expr, check_task=CheckTasks.check_search_results, @@ -3783,8 +3841,8 @@ class TestQueryCount(TestcaseBase): # only params offset is not considered pagination collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], offset=10, check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb}]} - ) + check_items={exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}) # count with limit collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], limit=10, check_task=CheckTasks.err_res, @@ -3819,7 +3877,8 @@ class TestQueryCount(TestcaseBase): collection_w_alias.insert(cf.gen_default_dataframe_data(start=ct.default_nb), partition_name=p_name) collection_w_alias.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb * 2}]}) + check_items={exp_res: [{count: ct.default_nb * 2}], + "pk_name": collection_w.primary_field.name}) # release collection and alias drop partition collection_w_alias.drop_partition(p_name, check_task=CheckTasks.err_res, @@ -3834,13 +3893,15 @@ class TestQueryCount(TestcaseBase): assert res is False collection_w_alias.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb}]}) + check_items={exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}) # alias delete and count collection_w_alias.delete(f"{ct.default_int64_field_name} in {[i for i in range(ct.default_nb)]}") collection_w_alias.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 0}]}) + check_items={exp_res: [{count: 0}], + "pk_name": collection_w.primary_field.name}) collection_w_alias.drop(check_task=CheckTasks.err_res, check_items={ct.err_code: 1, @@ -3886,21 +3947,24 @@ class TestQueryCount(TestcaseBase): collection_w.upsert(df_zero) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb}]}) + check_items={exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}) # upsert new id and count df_new = cf.gen_default_dataframe_data(nb=1, start=ct.default_nb) collection_w.upsert(df_new) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb + 1}]}) + check_items={exp_res: [{count: ct.default_nb + 1}], + "pk_name": collection_w.primary_field.name}) # upsert existed id and count df_existed = cf.gen_default_dataframe_data(nb=1, start=10) collection_w.upsert(df_existed) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb + 1}]}) + check_items={exp_res: [{count: ct.default_nb + 1}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_count_upsert_duplicate(self): @@ -3927,21 +3991,24 @@ class TestQueryCount(TestcaseBase): collection_w.upsert(df_existed) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb}]} + check_items={exp_res: [{count: tmp_nb}], + "pk_name": collection_w.primary_field.name} ) # delete id and count delete_res, _ = collection_w.delete(default_term_expr) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb - delete_res.delete_count}]}) + check_items={exp_res: [{count: tmp_nb - delete_res.delete_count}], + "pk_name": collection_w.primary_field.name}) # upsert deleted id and count df_deleted = cf.gen_default_dataframe_data(nb=delete_res.delete_count, start=0) collection_w.upsert(df_deleted) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: tmp_nb}]}) + check_items={exp_res: [{count: tmp_nb}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_count_rename_collection(self): @@ -3959,7 +4026,8 @@ class TestQueryCount(TestcaseBase): self.collection_wrap.init_collection(new_name) self.collection_wrap.query(expr=default_expr, output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb}]}) + check_items={exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_count_disable_growing_segments(self): @@ -3978,7 +4046,8 @@ class TestQueryCount(TestcaseBase): collection_w.insert(cf.gen_default_dataframe_data(nb=100)) collection_w.query(expr=default_expr, output_fields=[ct.default_count_output], ignore_growing=True, check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 0}]}) + check_items={exp_res: [{count: 0}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_count_expressions(self): @@ -4006,14 +4075,16 @@ class TestQueryCount(TestcaseBase): # count with expr collection_w.query(expr=expr, output_fields=[count], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: res}]}) + check_items={exp_res: [{count: res}], + "pk_name": collection_w.primary_field.name}) # count agian with expr template expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") expr_params = cf.get_expr_params_from_template(expressions[1]) collection_w.query(expr=expr, expr_params=expr_params, output_fields=[count], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: res}]}) + check_items={exp_res: [{count: res}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("bool_type", [True, False, "true", "false"]) @@ -4048,7 +4119,8 @@ class TestQueryCount(TestcaseBase): expression = f"{ct.default_bool_field_name} == {bool_type}" collection_w.query(expr=expression, output_fields=[count], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: res}]}) + check_items={exp_res: [{count: res}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_count_expression_auto_field(self): @@ -4074,12 +4146,16 @@ class TestQueryCount(TestcaseBase): # count with expr collection_w.query(expr=expr, output_fields=[count], - check_task=CheckTasks.check_query_results, check_items={exp_res: [{count: res}]}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{count: res}], + "pk_name": collection_w.primary_field.name}) # count with expr and expr_params expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") expr_params = cf.get_expr_params_from_template(expressions[1]) collection_w.query(expr=expr, expr_params=expr_params, output_fields=[count], - check_task=CheckTasks.check_query_results, check_items={exp_res: [{count: res}]}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{count: res}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L2) def test_count_expression_all_datatype(self): @@ -4095,7 +4171,8 @@ class TestQueryCount(TestcaseBase): expr = "int64 >= 0 && int32 >= 1999 && int16 >= 0 && int8 <= 0 && float <= 1999.0 && double >= 0" collection_w.query(expr=expr, output_fields=[count], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 1}]}) + check_items={exp_res: [{count: 1}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_count_expression_comparative(self): @@ -4124,7 +4201,8 @@ class TestQueryCount(TestcaseBase): expression = "int64_1 >= int64_2" collection_w.query(expr=expression, output_fields=[count], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: res}]}) + check_items={exp_res: [{count: res}], + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("index", ct.all_index_types[9:11]) @@ -4146,16 +4224,19 @@ class TestQueryCount(TestcaseBase): collection_w.load() collection_w.query(expr=default_expr, output_fields=[count], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: ct.default_nb}]}) + check_items={exp_res: [{count: ct.default_nb}], + "pk_name": collection_w.primary_field.name}) expr = "int64 > 50 && int64 < 100 && float < 75" collection_w.query(expr=expr, output_fields=[count], check_task=CheckTasks.check_query_results, - check_items={exp_res: [{count: 24}]}) + check_items={exp_res: [{count: 24}], + "pk_name": collection_w.primary_field.name}) batch_size = 100 collection_w.query_iterator(batch_size=batch_size, expr=default_expr, check_task=CheckTasks.check_query_iterator, check_items={"count": ct.default_nb, - "batch_size": batch_size}) + "batch_size": batch_size, + "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.repeat(3) @@ -4175,7 +4256,8 @@ class TestQueryCount(TestcaseBase): collection_w = self.init_collection_general(prefix, True, 200, partition_num=1, is_index=True)[0] collection_w.query(expr='', output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={"exp_res": [{ct.default_count_output: 200}]}) + check_items={"exp_res": [{ct.default_count_output: 200}], + "pk_name": collection_w.primary_field.name}) collection_w.release() partition_w1, partition_w2 = collection_w.partitions # load @@ -4190,10 +4272,12 @@ class TestQueryCount(TestcaseBase): # search on collection, partition1, partition2 collection_w.query(expr='', output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={"exp_res": [{ct.default_count_output: 50}]}) + check_items={"exp_res": [{ct.default_count_output: 50}], + "pk_name": collection_w.primary_field.name}) partition_w1.query(expr='', output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={"exp_res": [{ct.default_count_output: 50}]}) + check_items={"exp_res": [{ct.default_count_output: 50}], + "pk_name": collection_w.primary_field.name}) vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)] collection_w.search(vectors[:1], ct.default_float_vec_field_name, ct.default_search_params, 200, partition_names=[partition_w2.name], @@ -4249,7 +4333,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}' collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L0) def test_query_by_expr_none_with_none_data(self, enable_dynamic_field, null_data_percent): @@ -4276,7 +4361,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'' collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], - limit=pos, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + limit=pos, check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L0) def test_query_by_nullable_field_with_none_data(self): @@ -4298,7 +4384,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'{default_float_field_name} < {pos}' collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L0) def test_query_after_none_data_all_field_datatype(self, varchar_scalar_index, numeric_scalar_index, @@ -4351,7 +4438,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'0 <= {ct.default_int64_field_name} < {pos}' collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L0) def test_query_default_value_with_insert(self, enable_dynamic_field): @@ -4379,7 +4467,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}' # 2. query collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_default_value_without_insert(self, enable_dynamic_field): @@ -4396,7 +4485,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'{ct.default_int64_field_name} > 0' # 2. query collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: []}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: [], "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L0) def test_query_after_default_data_all_field_datatype(self, varchar_scalar_index, numeric_scalar_index): @@ -4449,7 +4539,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'0 <= {ct.default_int64_field_name} < {pos}' # 5. query collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.skip(reason="issue #36003") @@ -4477,7 +4568,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'{ct.default_float_field_name} in [10.0]' collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], - limit=pos, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + limit=pos, check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.GPU) @@ -4522,7 +4614,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in {int64_values[:pos]}' # 5. query collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, ct.default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_iterator_with_none_data(self, null_data_percent): @@ -4544,6 +4637,7 @@ class TestQueryNoneAndDefaultData(TestcaseBase): collection_w.query_iterator(batch_size, expr=expr, check_task=CheckTasks.check_query_iterator, check_items={"count": ct.default_nb, + "pk_name": collection_w.primary_field.name, "batch_size": batch_size}) @pytest.mark.tags(CaseLabel.L1) @@ -4573,7 +4667,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}' collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], - check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + check_task=CheckTasks.check_query_results, + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.skip(reason="issue #36538") @@ -4594,7 +4689,8 @@ class TestQueryNoneAndDefaultData(TestcaseBase): default_value_fields={ct.default_string_field_name: "data"})[0] collection_w.query(expr='', output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={"exp_res": [{ct.default_count_output: 200}]}) + check_items={"exp_res": [{ct.default_count_output: 200}], + "pk_name": collection_w.primary_field.name}) collection_w.release() partition_w1, partition_w2 = collection_w.partitions # load @@ -4609,10 +4705,12 @@ class TestQueryNoneAndDefaultData(TestcaseBase): # search on collection, partition1, partition2 collection_w.query(expr='', output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={"exp_res": [{ct.default_count_output: 50}]}) + check_items={"exp_res": [{ct.default_count_output: 50}], + "pk_name": collection_w.primary_field.name}) partition_w1.query(expr='', output_fields=[ct.default_count_output], check_task=CheckTasks.check_query_results, - check_items={"exp_res": [{ct.default_count_output: 50}]}) + check_items={"exp_res": [{ct.default_count_output: 50}], + "pk_name": collection_w.primary_field.name}) vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)] collection_w.search(vectors[:1], ct.default_float_vec_field_name, ct.default_search_params, 200, partition_names=[partition_w2.name], @@ -7044,8 +7142,7 @@ class TestQueryFunction(TestcaseBase): mixed_call_expr, output_fields=output_fields, check_task=CheckTasks.check_query_results, - check_items={exp_res: res}, - ) + check_items={exp_res: res, "pk_name": collection_w.primary_field.name}) @pytest.mark.tags(CaseLabel.L1) def test_query_invalid(self): diff --git a/tests/python_client/testcases/test_query_iterator.py b/tests/python_client/testcases/test_query_iterator.py index 562771e0c0..5658eeb423 100644 --- a/tests/python_client/testcases/test_query_iterator.py +++ b/tests/python_client/testcases/test_query_iterator.py @@ -45,6 +45,7 @@ class TestQueryIterator(TestcaseBase): collection_w.query_iterator(batch_size, expr=expr, check_task=CheckTasks.check_query_iterator, check_items={"count": nb, + "pk_name": collection_w.primary_field.name, "batch_size": batch_size}) # 3. query iterator with checkpoint file iterator_cp_file = f"/tmp/it_{collection_w.name}_cp" @@ -103,6 +104,7 @@ class TestQueryIterator(TestcaseBase): # 2. query iterator collection_w.query_iterator(check_task=CheckTasks.check_query_iterator, check_items={"count": ct.default_nb, + "pk_name": collection_w.primary_field.name, "batch_size": ct.default_batch_size}) @pytest.mark.tags(CaseLabel.L2) @@ -124,6 +126,7 @@ class TestQueryIterator(TestcaseBase): collection_w.query_iterator(batch_size, expr=expr, offset=offset, check_task=CheckTasks.check_query_iterator, check_items={"count": ct.default_nb - offset, + "pk_name": collection_w.primary_field.name, "batch_size": batch_size}) @pytest.mark.tags(CaseLabel.L2) @@ -145,6 +148,7 @@ class TestQueryIterator(TestcaseBase): output_fields=[ct.default_float_vec_field_name], check_task=CheckTasks.check_query_iterator, check_items={"count": ct.default_nb, + "pk_name": collection_w.primary_field.name, "batch_size": batch_size}) @pytest.mark.tags(CaseLabel.L2) @@ -166,6 +170,7 @@ class TestQueryIterator(TestcaseBase): collection_w.query_iterator(batch_size=batch_size, expr=expr, offset=offset, check_task=CheckTasks.check_query_iterator, check_items={"count": ct.default_nb - offset, + "pk_name": collection_w.primary_field.name, "batch_size": batch_size}) @pytest.mark.tags(CaseLabel.L2) @@ -185,6 +190,7 @@ class TestQueryIterator(TestcaseBase): collection_w.query_iterator(limit=limit, expr="", offset=offset, check_task=CheckTasks.check_query_iterator, check_items={"count": max(Count, 0), + "pk_name": collection_w.primary_field.name, "batch_size": ct.default_batch_size}) @pytest.mark.tags(CaseLabel.L2) @@ -235,6 +241,7 @@ class TestQueryIterator(TestcaseBase): check_task=CheckTasks.check_query_iterator, check_items={"batch_size": batch_size, "count": ct.default_nb, + "pk_name": collection_w.primary_field.name, "exp_ids": insert_ids}) file_exist = os.path.isfile(iterator_cp_file) assert file_exist is True, "The checkpoint exists if not iterator.close()" @@ -258,13 +265,17 @@ class TestQueryIterator(TestcaseBase): exp_ids = sorted(insert_ids) collection_w.query_iterator(batch_size, output_fields=[ct.default_string_field_name], check_task=CheckTasks.check_query_iterator, - check_items={"batch_size": batch_size, "count": ct.default_nb, "exp_ids": exp_ids}) + check_items={"batch_size": batch_size, + "pk_name": collection_w.primary_field.name, + "count": ct.default_nb, "exp_ids": exp_ids}) # 3. query with pagination exp_ids = sorted(insert_ids)[offset:] collection_w.query_iterator(batch_size, offset=offset, output_fields=[ct.default_string_field_name], check_task=CheckTasks.check_query_iterator, - check_items={"batch_size": batch_size, "count": ct.default_nb - offset, "exp_ids": exp_ids}) + check_items={"batch_size": batch_size, + "pk_name": collection_w.primary_field.name, + "count": ct.default_nb - offset, "exp_ids": exp_ids}) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("primary_field", [ct.default_string_field_name, ct.default_int64_field_name]) @@ -294,6 +305,7 @@ class TestQueryIterator(TestcaseBase): # 2. query iterator collection_w.query_iterator(check_task=CheckTasks.check_query_iterator, check_items={"count": nb, + "pk_name": collection_w.primary_field.name, "batch_size": ct.default_batch_size}) @pytest.mark.tags(CaseLabel.L2) diff --git a/tests/python_client/testcases/test_utility.py b/tests/python_client/testcases/test_utility.py index 534b30d6cc..08a08f99ea 100644 --- a/tests/python_client/testcases/test_utility.py +++ b/tests/python_client/testcases/test_utility.py @@ -1804,7 +1804,8 @@ class TestUtilityAdvanced(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in {insert_res.primary_keys[:10]}' res = df.iloc[:10, :1].to_dict('records') collection_w.query(term_expr, check_task=CheckTasks.check_query_results, - check_items={'exp_res': res}) + check_items={'exp_res': res, + "pk_name": collection_w.primary_field.name}) search_res_before, _ = collection_w.search(df[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit) @@ -1822,7 +1823,8 @@ class TestUtilityAdvanced(TestcaseBase): # query and search from handoff segments collection_w.query(term_expr, check_task=CheckTasks.check_query_results, - check_items={'exp_res': res}) + check_items={'exp_res': res, + "pk_name": collection_w.primary_field.name}) search_res_after, _ = collection_w.search(df[ct.default_float_vec_field_name][:1].to_list(), ct.default_float_vec_field_name, ct.default_search_params, ct.default_limit)