diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index e3d2407f88..e0f79ebc10 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -275,7 +275,7 @@ class TestcaseBase(Base): auto_id=False, dim=ct.default_dim, is_index=True, primary_field=ct.default_int64_field_name, is_flush=True, name=None, enable_dynamic_field=False, with_json=True, random_primary_key=False, - multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR", + multiple_dim_array=[], is_partition_key=None, vector_data_type=DataType.FLOAT_VECTOR, nullable_fields={}, default_value_fields={}, language=None, **kwargs): """ target: create specified collections @@ -317,7 +317,7 @@ class TestcaseBase(Base): primary_field=primary_field, nullable_fields=nullable_fields, default_value_fields=default_value_fields) - if vector_data_type == ct.sparse_vector: + if vector_data_type == DataType.SPARSE_FLOAT_VECTOR: default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field, enable_dynamic_field=enable_dynamic_field, with_json=with_json, @@ -354,7 +354,7 @@ class TestcaseBase(Base): # This condition will be removed after auto index feature if is_binary: collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index) - elif vector_data_type == ct.sparse_vector: + elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR: for vector_name in vector_name_list: collection_w.create_index(vector_name, ct.default_sparse_inverted_index) else: @@ -362,7 +362,7 @@ class TestcaseBase(Base): vector_name_list.append(ct.default_float_vec_field_name) for vector_name in vector_name_list: # Unlike dense vectors, sparse vectors cannot create flat index. - if ct.sparse_vector in vector_name: + if DataType.SPARSE_FLOAT_VECTOR.name in vector_name: collection_w.create_index(vector_name, ct.default_sparse_inverted_index) else: collection_w.create_index(vector_name, ct.default_flat_index) diff --git a/tests/python_client/base/client_v2_base.py b/tests/python_client/base/client_v2_base.py index 13855ab024..269af18923 100644 --- a/tests/python_client/base/client_v2_base.py +++ b/tests/python_client/base/client_v2_base.py @@ -1,4 +1,5 @@ import sys +import time from typing import Optional from pymilvus import MilvusClient @@ -544,7 +545,17 @@ class TestMilvusClientV2Base(Base): index_name=index_name, **kwargs).run() return res, check_result - + + def wait_for_index_ready(self, client, collection_name, index_name, timeout=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + start_time = time.time() + while start_time + timeout > time.time(): + index_info, _ = self.describe_index(client, collection_name, index_name, **kwargs) + if index_info.get("pending_index_rows", 1) == 0: + return True + time.sleep(2) + return False + @trace() def list_indexes(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index d9d6abb6bf..a8fc3b9414 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -445,19 +445,20 @@ class ResponseChecker: assert ids_match elif check_items.get("metric", None) is not None: # verify the distances are already sorted - if check_items.get("metric").lower() in ["ip", "bm25"]: - assert distances == sorted(distances, reverse=False) - else: + if check_items.get("metric").upper() in ["IP", "COSINE", "BM25"]: assert distances == sorted(distances, reverse=True) - if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None: - log.debug("vector for searched (nq) and inserted vectors are needed for distance check") else: - for id in ids: - searched_original_vectors.append(check_items["original_vectors"][id]) - cf.compare_distance_vector_and_vector_list(check_items["vector_nq"][nq_i], - searched_original_vectors, - check_items["metric"], distances) - log.info("search_results_check: Checked the distances for one nq: OK") + assert distances == sorted(distances, reverse=False) + if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None: + log.debug("skip distance check for knowhere does not return the precise distances") + else: + # for id in ids: + # searched_original_vectors.append(check_items["original_vectors"][id]) + # cf.compare_distance_vector_and_vector_list(check_items["vector_nq"][nq_i], + # searched_original_vectors, + # check_items["metric"], distances) + # log.info("search_results_check: Checked the distances for one nq: OK") + pass else: pass # just check nq and topk, not specific ids need check nq_i += 1 diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index ad40e035e8..8122af7e22 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -692,22 +692,17 @@ def gen_double_field(name=ct.default_double_field_name, is_primary=False, descri def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim, - description=ct.default_desc, vector_data_type="FLOAT_VECTOR", **kwargs): - if vector_data_type == "SPARSE_FLOAT_VECTOR": - dtype = DataType.SPARSE_FLOAT_VECTOR - float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype, - description=description, - is_primary=is_primary, **kwargs) - return float_vec_field - if vector_data_type == "FLOAT_VECTOR": - dtype = DataType.FLOAT_VECTOR - elif vector_data_type == "FLOAT16_VECTOR": - dtype = DataType.FLOAT16_VECTOR - elif vector_data_type == "BFLOAT16_VECTOR": - dtype = DataType.BFLOAT16_VECTOR - float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype, + description=ct.default_desc, vector_data_type=DataType.FLOAT_VECTOR, **kwargs): + + if vector_data_type != DataType.SPARSE_FLOAT_VECTOR: + float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=vector_data_type, description=description, dim=dim, is_primary=is_primary, **kwargs) + else: + float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.SPARSE_FLOAT_VECTOR, + description=description, + is_primary=is_primary, **kwargs) + return float_vec_field @@ -744,7 +739,7 @@ def gen_sparse_vec_field(name=ct.default_sparse_vec_field_name, is_primary=False def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True, - multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR", + multiple_dim_array=[], is_partition_key=None, vector_data_type=DataType.FLOAT_VECTOR, nullable_fields={}, default_value_fields={}, **kwargs): # gen primary key field if default_value_fields.get(ct.default_int64_field_name) is None: @@ -824,7 +819,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel gen_array_field(name="array_bool", element_type=DataType.BOOL), gen_float_vec_field(dim=dim), gen_float_vec_field(name="image_emb", dim=dim), - gen_float_vec_field(name="text_sparse_emb", vector_data_type="SPARSE_FLOAT_VECTOR"), + gen_float_vec_field(name="text_sparse_emb", vector_data_type=DataType.SPARSE_FLOAT_VECTOR), gen_float_vec_field(name="voice_emb", dim=dim), ] @@ -998,25 +993,25 @@ def gen_collection_schema_all_datatype(description=ct.default_desc, primary_fiel else: multiple_dim_array.insert(0, dim) for i in range(len(multiple_dim_array)): - if ct.append_vector_type[i%3] != ct.sparse_vector: + if ct.append_vector_type[i%3] != DataType.SPARSE_FLOAT_VECTOR: if default_value_fields.get(ct.append_vector_type[i%3]) is None: - vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}", + vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3].name}", dim=multiple_dim_array[i], vector_data_type=ct.append_vector_type[i%3]) else: - vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}", + vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3].name}", dim=multiple_dim_array[i], vector_data_type=ct.append_vector_type[i%3], - default_value=default_value_fields.get(ct.append_vector_type[i%3])) + default_value=default_value_fields.get(ct.append_vector_type[i%3].name)) fields.append(vector_field) else: # The field of a sparse vector cannot be dimensioned if default_value_fields.get(ct.default_sparse_vec_field_name) is None: - sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}", - vector_data_type=ct.sparse_vector) + sparse_vector_field = gen_sparse_vec_field(name=f"multiple_vector_{DataType.SPARSE_FLOAT_VECTOR.name}", + vector_data_type=DataType.SPARSE_FLOAT_VECTOR) else: - sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}", - vector_data_type=ct.sparse_vector, + sparse_vector_field = gen_sparse_vec_field(name=f"multiple_vector_{DataType.SPARSE_FLOAT_VECTOR.name}", + vector_data_type=DataType.SPARSE_FLOAT_VECTOR, default_value=default_value_fields.get(ct.default_sparse_vec_field_name)) fields.append(sparse_vector_field) @@ -1124,23 +1119,25 @@ def gen_schema_multi_string_fields(string_fields): return schema -def gen_vectors(nb, dim, vector_data_type="FLOAT_VECTOR"): +def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR): vectors = [] - if vector_data_type == "FLOAT_VECTOR": + if vector_data_type == DataType.FLOAT_VECTOR: vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] - elif vector_data_type == "FLOAT16_VECTOR": + elif vector_data_type == DataType.FLOAT16_VECTOR: vectors = gen_fp16_vectors(nb, dim)[1] - elif vector_data_type == "BFLOAT16_VECTOR": + elif vector_data_type == DataType.BFLOAT16_VECTOR: vectors = gen_bf16_vectors(nb, dim)[1] - elif vector_data_type == "SPARSE_FLOAT_VECTOR": + elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR: vectors = gen_sparse_vectors(nb, dim) - elif vector_data_type == "TEXT_SPARSE_VECTOR": + elif vector_data_type == ct.text_sparse_vector: vectors = gen_text_vectors(nb) + elif vector_data_type == DataType.BINARY_VECTOR: + vectors = gen_binary_vectors(nb, dim)[1] else: log.error(f"Invalid vector data type: {vector_data_type}") raise Exception(f"Invalid vector data type: {vector_data_type}") if dim > 1: - if vector_data_type == "FLOAT_VECTOR": + if vector_data_type == DataType.FLOAT_VECTOR: vectors = preprocessing.normalize(vectors, axis=1, norm='l2') vectors = vectors.tolist() return vectors @@ -1173,7 +1170,7 @@ def gen_binary_vectors(num, dim): def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], - vector_data_type="FLOAT_VECTOR", auto_id=False, + vector_data_type=DataType.FLOAT_VECTOR, auto_id=False, primary_field=ct.default_int64_field_name, nullable_fields={}, language=None): if not random_primary_key: int_values = pd.Series(data=[i for i in range(start, start + nb)]) @@ -1235,7 +1232,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], - vector_data_type="FLOAT_VECTOR", auto_id=False, + vector_data_type=DataType.FLOAT_VECTOR, auto_id=False, primary_field=ct.default_int64_field_name, nullable_fields={}, language=None): insert_list = [] if not random_primary_key: @@ -1289,7 +1286,7 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[], - multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False, + multiple_vector_field_name=[], vector_data_type=DataType.FLOAT_VECTOR, auto_id=False, primary_field = ct.default_int64_field_name, nullable_fields={}, language=None): array = [] for i in range(start, start + nb): @@ -1703,7 +1700,7 @@ def gen_default_list_sparse_data(nb=ct.default_nb, dim=ct.default_dim, start=0, string_values = [str(i) for i in range(start, start + nb)] json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]} for i in range(start, start + nb)] - sparse_vec_values = gen_vectors(nb, dim, vector_data_type="SPARSE_FLOAT_VECTOR") + sparse_vec_values = gen_vectors(nb, dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR) if with_json: data = [int_values, float_values, string_values, json_values, sparse_vec_values] else: @@ -2812,7 +2809,7 @@ def compare_distance_vector_and_vector_list(x, y, metric, distance): assert False for i in range(len(y)): if metric == "L2": - distance_i = l2(x, y[i]) + distance_i = (l2(x, y[i]))**2 elif metric == "IP": distance_i = ip(x, y[i]) elif metric == "COSINE": @@ -2820,7 +2817,7 @@ def compare_distance_vector_and_vector_list(x, y, metric, distance): else: raise Exception("metric type is invalid") if abs(distance_i - distance[i]) > ct.epsilon: - log.error(f"The distance between {x} and {y[i]} is not equal with {distance[i]}") + log.error(f"The distance between {x} and {y[i]} does not equal {distance[i]}, expected: {distance_i}") assert abs(distance_i - distance[i]) < ct.epsilon return True @@ -2927,7 +2924,7 @@ def gen_partitions(collection_w, partition_num=1): def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True, random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name, - vector_data_type="FLOAT_VECTOR", nullable_fields={}, language=None): + vector_data_type=DataType.FLOAT_VECTOR, nullable_fields={}, language=None): """ target: insert non-binary/binary data method: insert non-binary/binary data into partitions if any @@ -2948,7 +2945,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ if not is_binary: if not is_all_data_type: if not enable_dynamic_field: - if vector_data_type == "FLOAT_VECTOR": + if vector_data_type == DataType.FLOAT_VECTOR: default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json, random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array, @@ -2975,14 +2972,14 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ else: if not enable_dynamic_field: - if vector_data_type == "FLOAT_VECTOR": + if vector_data_type == DataType.FLOAT_VECTOR: default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json, random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array, multiple_vector_field_name=vector_name_list, auto_id=auto_id, primary_field=primary_field, nullable_fields=nullable_fields, language=language) - elif vector_data_type == "FLOAT16_VECTOR" or "BFLOAT16_VECTOR": + elif vector_data_type == DataType.FLOAT16_VECTOR or vector_data_type == DataType.BFLOAT16_VECTOR: default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json, random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array, @@ -3173,6 +3170,20 @@ def extract_vector_field_name_list(collection_w): return vector_name_list +def get_field_dtype_by_field_name(collection_w, field_name): + """ + get the vector field data type by field name + collection_w : the collection object to be extracted + return: the field data type of the field name + """ + schema_dict = collection_w.schema.to_dict() + fields = schema_dict.get('fields') + for field in fields: + if field['name'] == field_name: + return field['type'] + return None + + def get_activate_func_from_metric_type(metric_type): activate_function = lambda x: x if metric_type == "COSINE": @@ -3307,20 +3318,20 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0): return vectors -def gen_vectors_based_on_vector_type(num, dim, vector_data_type=ct.float_type): +def gen_vectors_based_on_vector_type(num, dim, vector_data_type=DataType.FLOAT_VECTOR): """ generate float16 vector data raw_vectors : the vectors fp16_vectors: the bytes used for insert return: raw_vectors and fp16_vectors """ - if vector_data_type == ct.float_type: + if vector_data_type == DataType.FLOAT_VECTOR: vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - elif vector_data_type == ct.float16_type: + elif vector_data_type == DataType.FLOAT16_VECTOR: vectors = gen_fp16_vectors(num, dim)[1] - elif vector_data_type == ct.bfloat16_type: + elif vector_data_type == DataType.BFLOAT16_VECTOR: vectors = gen_bf16_vectors(num, dim)[1] - elif vector_data_type == ct.sparse_vector: + elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR: vectors = gen_sparse_vectors(num, dim) elif vector_data_type == ct.text_sparse_vector: vectors = gen_text_vectors(num) diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 52ca7af6a5..5321ce1861 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -1,4 +1,5 @@ import numpy as np +from pymilvus import DataType """ Initialized parameters """ port = 19530 @@ -44,14 +45,10 @@ default_float16_vec_field_name = "float16_vector" default_bfloat16_vec_field_name = "bfloat16_vector" another_float_vec_field_name = "float_vector1" default_binary_vec_field_name = "binary_vector" -float_type = "FLOAT_VECTOR" -float16_type = "FLOAT16_VECTOR" -bfloat16_type = "BFLOAT16_VECTOR" -sparse_vector = "SPARSE_FLOAT_VECTOR" text_sparse_vector = "TEXT_SPARSE_VECTOR" -append_vector_type = [float16_type, bfloat16_type, sparse_vector] -all_dense_vector_types = [float_type, float16_type, bfloat16_type] -all_vector_data_types = [float_type, float16_type, bfloat16_type, sparse_vector] +append_vector_type = [DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, DataType.SPARSE_FLOAT_VECTOR] +all_dense_vector_types = [DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR] +all_float_vector_dtypes = [DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, DataType.SPARSE_FLOAT_VECTOR] default_sparse_vec_field_name = "sparse_vector" default_partition_name = "_default" default_resource_group_name = '__default_resource_group' @@ -246,13 +243,14 @@ default_all_search_params_params = [{}, {"nprobe": 32}, {"nprobe": 32}, {"nprobe {}, {}] Handler_type = ["GRPC", "HTTP"] -binary_support = ["BIN_FLAT", "BIN_IVF_FLAT"] -sparse_support = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"] -gpu_support = ["GPU_IVF_FLAT", "GPU_IVF_PQ"] +binary_supported_index_types = ["BIN_FLAT", "BIN_IVF_FLAT"] +sparse_supported_index_types = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"] +gpu_supported_index_types = ["GPU_IVF_FLAT", "GPU_IVF_PQ"] default_L0_metric = "COSINE" -float_metrics = ["L2", "IP", "COSINE"] +dense_metrics = ["L2", "IP", "COSINE"] binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"] structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"] +sparse_metrics = ["IP", "BM25"] all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar'] diff --git a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py index e8ad2f0550..58b2cb94c8 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py +++ b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py @@ -665,7 +665,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): """ @pytest.mark.tags(CaseLabel.L0) - @pytest.mark.parametrize("metric_type", ct.float_metrics) + @pytest.mark.parametrize("metric_type", ct.dense_metrics) def test_milvus_client_search_iterator_default(self, metric_type): """ target: test search iterator (high level api) normal case @@ -892,7 +892,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): pass @pytest.mark.tags(CaseLabel.L0) - @pytest.mark.parametrize("metric_type", ct.float_metrics) + @pytest.mark.parametrize("metric_type", ct.dense_metrics) @pytest.mark.parametrize("enable_dynamic_field", [True, False]) def test_milvus_client_search_iterator_after_json_path_index(self, metric_type, enable_dynamic_field, supported_json_cast_type, diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py index e2e4f8d2e8..7ad326ee82 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_hybrid_search_v2.py @@ -124,7 +124,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): def random_primary_key(self, request): yield request.param - @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.fixture(scope="function", params=ct.all_dense_vector_types) def vector_data_type(self, request): yield request.param @@ -242,7 +242,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): # 3. prepare search params req_list = [] weights = [1] - vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") + vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR) # 4. get hybrid search req list for i in range(len(vector_name_list)): search_param = { @@ -276,7 +276,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): # 3. prepare search params req_list = [] weights = [1] - vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") + vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR) # 4. get hybrid search req list for i in range(len(vector_name_list)): search_param = { @@ -311,7 +311,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): # 3. prepare search params req_list = [] weights = [1] - vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") + vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR) # 4. get hybrid search req list for i in range(len(vector_name_list)): search_param = { @@ -344,7 +344,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): vector_name_list = cf.extract_vector_field_name_list(collection_w) vector_name_list.append(ct.default_float_vec_field_name) # 3. prepare search params - vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") + vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR) # get hybrid search req list search_param = { @@ -1757,7 +1757,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): metrics = [] search_res_dict_array = [] search_res_dict_array_nq = [] - vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") + vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR) # get hybrid search req list for i in range(len(vector_name_list)): @@ -2014,7 +2014,7 @@ class TestCollectionHybridSearchValid(TestcaseBase): # 1. init collection collection_w, insert_vectors, _, insert_ids = \ self.init_collection_general(prefix, True, nb=nb, multiple_dim_array=[dim, dim * 2], - with_json=False, vector_data_type="SPARSE_FLOAT_VECTOR")[0:4] + with_json=False, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0:4] # 2. extract vector field name vector_name_list = cf.extract_vector_field_name_list(collection_w) # 3. prepare search params diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py index dd3971e8e0..937ee926d3 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py @@ -92,7 +92,7 @@ class TestCollectionRangeSearch(TestcaseBase): pytest.skip(f"skip index type {request.param}") yield request.param - @pytest.fixture(scope="function", params=ct.float_metrics) + @pytest.fixture(scope="function", params=ct.dense_metrics) def metric(self, request): tags = request.config.getoption("--tags") if CaseLabel.L2 not in tags: @@ -1574,7 +1574,7 @@ class TestCollectionRangeSearch(TestcaseBase): # 1. initialize with data collection_w = self.init_collection_general(prefix, True, nb=5000, with_json=True, - vector_data_type=ct.sparse_vector)[0] + vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0] range_filter = random.uniform(0.5, 1) radius = random.uniform(0, 0.5) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_dsl.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_dsl.py deleted file mode 100644 index ac09a1dc0f..0000000000 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_dsl.py +++ /dev/null @@ -1,102 +0,0 @@ -import numpy as np -from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY -from pymilvus import AnnSearchRequest, RRFRanker, WeightedRanker -from pymilvus import ( - FieldSchema, CollectionSchema, DataType, - Collection -) -from common.constants import * -from utils.util_pymilvus import * -from common.common_type import CaseLabel, CheckTasks -from common import common_type as ct -from common import common_func as cf -from utils.util_log import test_log as log -from base.client_base import TestcaseBase -import heapq -from time import sleep -from decimal import Decimal, getcontext -import decimal -import multiprocessing -import numbers -import random -import math -import numpy -import threading -import pytest -import pandas as pd -from faker import Faker - -Faker.seed(19530) -fake_en = Faker("en_US") -fake_zh = Faker("zh_CN") - -# patch faker to generate text with specific distribution -cf.patch_faker_text(fake_en, cf.en_vocabularies_distribution) -cf.patch_faker_text(fake_zh, cf.zh_vocabularies_distribution) - -pd.set_option("expand_frame_repr", False) - -prefix = "search_collection" -search_num = 10 -max_dim = ct.max_dim -min_dim = ct.min_dim -epsilon = ct.epsilon -hybrid_search_epsilon = 0.01 -gracefulTime = ct.gracefulTime -default_nb = ct.default_nb -default_nb_medium = ct.default_nb_medium -default_nq = ct.default_nq -default_dim = ct.default_dim -default_limit = ct.default_limit -max_limit = ct.max_limit -default_search_exp = "int64 >= 0" -default_search_string_exp = "varchar >= \"0\"" -default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" -default_invaild_string_exp = "varchar >= 0" -default_json_search_exp = "json_field[\"number\"] >= 0" -perfix_expr = 'varchar like "0%"' -default_search_field = ct.default_float_vec_field_name -default_search_params = ct.default_search_params -default_int64_field_name = ct.default_int64_field_name -default_float_field_name = ct.default_float_field_name -default_bool_field_name = ct.default_bool_field_name -default_string_field_name = ct.default_string_field_name -default_json_field_name = ct.default_json_field_name -default_index_params = ct.default_index -vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] -range_search_supported_indexes = ct.all_index_types[:7] -uid = "test_search" -nq = 1 -epsilon = 0.001 -field_name = default_float_vec_field_name -binary_field_name = default_binary_vec_field_name -search_param = {"nprobe": 1} -entity = gen_entities(1, is_normal=True) -entities = gen_entities(default_nb, is_normal=True) -raw_vectors, binary_entities = gen_binary_entities(default_nb) -default_query, _ = gen_search_vectors_params(field_name, entities, default_top_k, nq) -index_name1 = cf.gen_unique_str("float") -index_name2 = cf.gen_unique_str("varhar") -half_nb = ct.default_nb // 2 -max_hybrid_search_req_num = ct.max_hybrid_search_req_num - - -class TestSearchDSL(TestcaseBase): - @pytest.mark.tags(CaseLabel.L0) - def test_search_vector_only(self): - """ - target: test search normal scenario - method: search vector only - expected: search status ok, the length of result - """ - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, ct.default_nb)[0:5] - vectors = [[random.random() for _ in range(ct.default_dim)] - for _ in range(nq)] - collection_w.search(vectors[:nq], default_search_field, - default_search_params, ct.default_top_k, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": ct.default_top_k}) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py index ecd8d2e67e..ab7051fbec 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_invalid.py @@ -110,7 +110,7 @@ class TestCollectionSearchInvalid(TestcaseBase): def enable_dynamic_field(self, request): yield request.param - @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.fixture(scope="function", params=ct.all_dense_vector_types) def vector_data_type(self, request): yield request.param diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_iterator_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_iterator_v2.py index ce7e9b6364..c27931e155 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_iterator_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_iterator_v2.py @@ -85,8 +85,8 @@ class TestSearchIterator(TestcaseBase): """ Test case of search iterator """ @pytest.mark.tags(CaseLabel.L0) - @pytest.mark.parametrize("metric_type", ct.float_metrics) - @pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.mark.parametrize("metric_type", ct.dense_metrics) + @pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types) def test_range_search_iterator_default(self, metric_type, vector_data_type): """ target: test iterator range search @@ -151,7 +151,7 @@ class TestSearchIterator(TestcaseBase): check_items={"batch_size": batch_size}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("metrics", ct.float_metrics) + @pytest.mark.parametrize("metrics", ct.dense_metrics) def test_search_iterator_with_expression(self, metrics): """ target: test search iterator normal diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py index dd943814b3..a93a3f5d89 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_none_default.py @@ -124,7 +124,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): def random_primary_key(self, request): yield request.param - @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.fixture(scope="function", params=ct.all_dense_vector_types) def vector_data_type(self, request): yield request.param @@ -280,7 +280,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): default_value_fields={ ct.default_float_field_name: np.float32(10.0)})[0] # 2. generate search data - vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, "FLOAT_VECTOR") + vectors = cf.gen_vectors(default_nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR) # 3. search after insert collection_w.search(vectors[:default_nq], default_search_field, default_search_params, default_limit, @@ -479,7 +479,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): collection_w.load() # 2. search iterator search_params = {"metric_type": "L2"} - vectors = cf.gen_vectors_based_on_vector_type(1, dim, "FLOAT_VECTOR") + vectors = cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR) collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size}) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py index 332db87e06..2486e341cb 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_pagination.py @@ -62,6 +62,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): def setup_class(self): super().setup_class(self) self.collection_name = "TestMilvusClientSearchPagination" + cf.gen_unique_str("_") + self.partition_names = ["partition_1", "partition_2"] self.float_vector_field_name = "float_vector" self.bfloat16_vector_field_name = "bfloat16_vector" self.sparse_vector_field_name = "sparse_vector" @@ -92,32 +93,60 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): collection_schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=256) collection_schema.add_field(default_int64_field_name, DataType.INT64) self.create_collection(client, self.collection_name, schema=collection_schema, force_teardown=False) + for partition_name in self.partition_names: + self.create_partition(client, self.collection_name, partition_name=partition_name) # Define number of insert iterations insert_times = 10 # Generate vectors for each type and store in self - float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim, vector_data_type='FLOAT_VECTOR') - bfloat16_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.bf16_vector_dim, vector_data_type='BFLOAT16_VECTOR') + float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim, + vector_data_type=DataType.FLOAT_VECTOR) + bfloat16_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.bf16_vector_dim, + vector_data_type=DataType.BFLOAT16_VECTOR) sparse_vectors = cf.gen_sparse_vectors(default_nb * insert_times, empty_percentage=2) _, binary_vectors = cf.gen_binary_vectors(default_nb * insert_times, dim=self.binary_vector_dim) # Insert data multiple times with non-duplicated primary keys for j in range(insert_times): - rows = [{ - default_primary_key_field_name: i + j * default_nb, - self.float_vector_field_name: list(float_vectors[i + j * default_nb]), - self.bfloat16_vector_field_name: bfloat16_vectors[i + j * default_nb], - self.sparse_vector_field_name: sparse_vectors[i + j * default_nb], - self.binary_vector_field_name: binary_vectors[i + j * default_nb], - default_float_field_name: (i + j * default_nb) * 1.0, - default_string_field_name: str(i + j * default_nb), - default_int64_field_name: i + j * default_nb - } - for i in range(default_nb)] - self.datas.extend(rows) + # Group rows by partition based on primary key mod 3 + default_rows = [] + partition1_rows = [] + partition2_rows = [] + + for i in range(default_nb): + pk = i + j * default_nb + row = { + default_primary_key_field_name: pk, + self.float_vector_field_name: list(float_vectors[pk]), + self.bfloat16_vector_field_name: bfloat16_vectors[pk], + self.sparse_vector_field_name: sparse_vectors[pk], + self.binary_vector_field_name: binary_vectors[pk], + default_float_field_name: pk * 1.0, + default_string_field_name: str(pk), + default_int64_field_name: pk + } + self.datas.append(row) + + # Distribute to partitions based on pk mod 3 + if pk % 3 == 0: + default_rows.append(row) + elif pk % 3 == 1: + partition1_rows.append(row) + else: + partition2_rows.append(row) + + # Insert into respective partitions + if default_rows: + self.insert(client, self.collection_name, data=default_rows) + if partition1_rows: + self.insert(client, self.collection_name, data=partition1_rows, partition_name=self.partition_names[0]) + if partition2_rows: + self.insert(client, self.collection_name, data=partition2_rows, partition_name=self.partition_names[1]) + + # Track all inserted data and primary keys self.primary_keys.extend([i + j * default_nb for i in range(default_nb)]) - self.insert(client, self.collection_name, data=rows) + self.flush(client, self.collection_name) # Create index @@ -165,7 +194,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): # 2. Search with pagination for 10 pages limit = 100 pages = 10 - vectors_to_search = cf.gen_vectors(default_nq, default_dim) + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) all_pages_results = [] for page in range(pages): offset = page * limit @@ -224,7 +253,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): # 2. Search with pagination for 10 pages limit = 100 pages = 10 - vectors_to_search = cf.gen_vectors(default_nq, self.bf16_vector_dim, vector_data_type='BFLOAT16_VECTOR') + vectors_to_search = cf.gen_vectors(default_nq, self.bf16_vector_dim, vector_data_type=DataType.BFLOAT16_VECTOR) all_pages_results = [] for page in range(pages): offset = page * limit @@ -374,11 +403,13 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): for i in range(default_nq): page_ids = [page_res[i][j].get('id') for j in range(limit)] ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)] - # Calculate percentage of matching items - matching_items = sum(1 for x, y in zip(page_ids, ids_in_full) if x == y) - match_percentage = (matching_items / len(page_ids)) * 100 - assert match_percentage >= 80, f"Only {match_percentage}% items matched, expected >= 80%" - + + # Calculate intersection between paginated results and baseline full results + common_ids = set(page_ids) & set(ids_in_full) + # Calculate overlap ratio using full results as baseline + overlap_ratio = len(common_ids) / len(ids_in_full) * 100 + assert overlap_ratio >= 80, f"Only {overlap_ratio}% overlap with baseline results, expected >= 80%" + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("limit", [100, 3000, 10000]) def test_search_with_pagination_topk(self, limit): @@ -399,8 +430,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): topK=16384 offset = topK - limit search_param = {"nprobe": 10, "offset": offset} - vectors_to_search = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) client.search(collection_name, vectors_to_search[:default_nq], anns_field=self.float_vector_field_name, search_params=search_param, limit=limit, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, @@ -438,7 +468,8 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): limit = 0 elif len(filter_ids) - offset < default_limit: limit = len(filter_ids) - offset - search_params = {"metric_type": "COSINE", "params": {"nprobe": 128}, "offset": offset} + # 3. search with a high nprobe for better accuracy + search_params = {"metric_type": "COSINE", "params": {"nprobe": 128}, "offset": offset} vectors_to_search = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] search_res_with_offset, _ = self.search( client, @@ -454,7 +485,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): "limit": limit} ) - # 3. search with offset+limit + # 4. search with offset+limit search_params_full = {"metric_type": "COSINE", "params": {"nprobe": 128}} search_res_full, _ = self.search( client, @@ -466,7 +497,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): filter=expr ) - # 4. Compare results + # 5. Compare results filter_ids_set = set(filter_ids) for hits in search_res_with_offset: ids = [hit.get('id') for hit in hits] @@ -477,7 +508,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)] assert page_ids == ids_in_full - # 5. search again with expression template + # 6. search again with expression template expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") expr_params = cf.get_expr_params_from_template(expressions[1]) search_res_with_offset, _ = self.search( @@ -495,7 +526,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): "limit": limit} ) - # 6. search with offset+limit + # 7. search with offset+limit search_res_full, _ = self.search( client, collection_name, @@ -507,7 +538,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): filter_params=expr_params ) - # Compare results + # 8. Compare results filter_ids_set = set(filter_ids) for hits in search_res_with_offset: ids = [hit.get('id') for hit in hits] @@ -517,349 +548,467 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base): page_ids = [search_res_with_offset[0][j].get('id') for j in range(limit)] ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)] assert page_ids == ids_in_full - - -class TestSearchPagination(TestcaseBase): - """ Test case of search pagination """ - - @pytest.fixture(scope="function", params=[0, 10, 100]) - def offset(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[False, True]) - def auto_id(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[False, True]) - def _async(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[True, False]) - def enable_dynamic_field(self, request): - yield request.param - - @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) - def vector_data_type(self, request): - yield request.param - - """ - ****************************************************************** - # The following are valid base cases - ****************************************************************** - """ - - @pytest.mark.tags(CaseLabel.L2) - def test_search_pagination_with_index_partition(self, offset, _async): + + @pytest.mark.tags(CaseLabel.L1) + def test_search_pagination_in_partitions(self): """ - target: test search pagination with index and partition - method: create connection, collection, insert data, create index and search - expected: searched successfully + target: test search pagination in partitions + method: 1. create collection and insert data + 2. search with pagination in partitions + 3. compare with the search results whose corresponding ids should be the same """ - # 1. initialize with data - auto_id = False - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, - partition_num=1, - auto_id=auto_id, - is_index=False)[0:4] - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] - # 2. create index - default_index = {"index_type": "IVF_FLAT", - "params": {"nlist": 128}, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 3. search through partitions - par = collection_w.partitions - limit = 100 - search_params = {"metric_type": "L2", - "params": {"nprobe": 10}, "offset": offset} - search_res = collection_w.search(vectors[:default_nq], default_search_field, - search_params, limit, default_search_exp, - [par[0].name, par[1].name], _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": limit, - "_async": _async})[0] - # 3. search through partitions with offset+limit - search_params = {"metric_type": "L2"} - res = collection_w.search(vectors[:default_nq], default_search_field, search_params, - limit + offset, default_search_exp, - [par[0].name, par[1].name], _async=_async)[0] - if _async: - search_res.done() - search_res = search_res.result() - res.done() - res = res.result() - res_distance = res[0].distances[offset:] - # assert cf.sort_search_distance(search_res[0].distances) == cf.sort_search_distance(res_distance) - assert set(search_res[0].ids) == set(res[0].ids[offset:]) + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + # search with pagination in partition_1 + limit = 50 + pages = 10 + for page in range(pages): + offset = page * limit + search_params = {"offset": offset} + search_res_with_offset, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + partition_names=[self.partition_names[0]], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, "limit": limit}) + + # assert every id in search_res_with_offset %3 ==1 + for hits in search_res_with_offset: + for hit in hits: + assert hit.get('id') % 3 == 1 - @pytest.mark.tags(CaseLabel.L2) - def test_search_pagination_with_inserted_data(self, offset, _async): + # search with pagination in partition_1 and partition_2 + for page in range(pages): + offset = page * limit + search_params = {"offset": offset} + search_res_with_offset, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + partition_names=self.partition_names, + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, "limit": limit}) + + # assert every id in search_res_with_offset %3 ==1 or ==2 + for hits in search_res_with_offset: + for hit in hits: + assert hit.get('id') % 3 == 1 or hit.get('id') % 3 == 2 + + @pytest.mark.tags(CaseLabel.L1) + def test_search_pagination_with_different_offset(self): """ - target: test search pagination with inserted data - method: create connection, collection, insert data and search - check the results by searching with limit+offset - expected: searched successfully + target: test search pagination with different offset + method: 1. create collection and insert data + 2. search with different offset, including offset > limit, offset = 0 + 3. compare with the search results whose corresponding ids should be the same """ - # 1. create collection - collection_w = self.init_collection_general( - prefix, False, dim=default_dim)[0] - # 2. insert data - data = cf.gen_default_dataframe_data(dim=default_dim) - collection_w.insert(data) - collection_w.load() - # 3. search + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + # search with offset > limit + offset = default_limit + 10 search_params = {"offset": offset} - search_res = collection_w.search(vectors[:default_nq], default_search_field, - search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit, - "_async": _async})[0] - # 4. search through partitions with offset+limit - search_params = {} - res = collection_w.search(vectors[:default_nq], default_search_field, search_params, - default_limit + offset, default_search_exp, _async=_async)[0] - if _async: - search_res.done() - search_res = search_res.result() - res.done() - res = res.result() - res_distance = res[0].distances[offset:] - assert sorted(search_res[0].distances) == sorted(res_distance) - assert set(search_res[0].ids) == set(res[0].ids[offset:]) + self.search(client, collection_name, vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, "limit": default_limit}) + # search with offset = 0 + offset = 0 + search_params = {"offset": offset} + self.search(client, collection_name, vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) - def test_search_pagination_empty(self, offset, _async): - """ - target: test search pagination empty - method: connect, create collection, insert data and search - expected: search successfully - """ - # 1. initialize without data - auto_id = False - collection_w = self.init_collection_general( - prefix, True, auto_id=auto_id, dim=default_dim)[0] - # 2. search collection without data - search_param = {"metric_type": "COSINE", - "params": {"nprobe": 10}, "offset": offset} - search_res = collection_w.search([], default_search_field, search_param, - default_limit, default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": 0, - "_async": _async})[0] - if _async: - search_res.done() - search_res = search_res.result() - assert len(search_res) == 0 - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("offset", [3000, 5000]) - def test_search_pagination_with_offset_over_num_entities(self, offset): - """ - target: test search pagination with offset over num_entities - method: create connection, collection, insert 3000 entities and search with offset over 3000 - expected: return an empty list - """ - # 1. initialize - collection_w = self.init_collection_general( - prefix, True, dim=default_dim)[0] - # 2. search - search_param = {"metric_type": "COSINE", - "params": {"nprobe": 10}, "offset": offset} - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] - res = collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": 0})[0] - assert res[0].ids == [] - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - def test_search_pagination_after_different_index(self, index, offset, _async): - """ - target: test search pagination after different index - method: test search pagination after different index and corresponding search params - expected: search successfully - """ - # 1. initialize with data - dim = 128 - auto_id = True - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 1000, - partition_num=1, - auto_id=auto_id, - dim=dim, is_index=False)[0:5] - # 2. create index and load - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 3. search - search_params = cf.gen_search_param(index) - vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - for search_param in search_params: - res = collection_w.search(vectors[:default_nq], default_search_field, search_param, - default_limit + offset, default_search_exp, _async=_async)[0] - search_param["offset"] = offset - log.info("Searching with search params: {}".format(search_param)) - search_res = collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async})[0] - if _async: - search_res.done() - search_res = search_res.result() - res.done() - res = res.result() - res_distance = res[0].distances[offset:] - # assert sorted(search_res[0].distances, key=numpy.float32) == sorted(res_distance, key=numpy.float32) - assert set(search_res[0].ids) == set(res[0].ids[offset:]) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("offset", [100, default_nb // 2]) + @pytest.mark.parametrize("offset", [0, 20, 100, 200]) def test_search_offset_different_position(self, offset): """ - target: test search pagination with offset in different position - method: create connection, collection, insert entities and search with offset + target: test search offset param in different position + method: create connection, collection, insert data, search with offset in different position expected: search successfully """ - # 1. initialize - collection_w = self.init_collection_general(prefix, True)[0] - # 2. search with offset in params - search_params = {"metric_type": "COSINE", - "params": {"nprobe": 10}, "offset": offset} - res1 = collection_w.search(vectors[:default_nq], default_search_field, - search_params, default_limit)[0] + client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) - # 3. search with offset outside params - res2 = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, - default_limit, offset=offset)[0] - assert res1[0].ids == res2[0].ids + # 1. search with offset in search_params + limit = 100 + search_params = {"offset": offset} + res1, _ = self.search(client, collection_name, vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, "limit": limit}) + + # 2. search with offset in search + search_params = {} + res2, _ = self.search(client, collection_name, vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + offset=offset, + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, "limit": limit}) + # 3. compare results + assert res1 == res2 @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("offset", [1, 5, 20]) - def test_search_sparse_with_pagination(self, offset): + def test_search_pagination_empty_list(self): """ - target: test search sparse with pagination - method: 1. connect and create a collection - 2. search pagination with offset - 3. search with offset+limit - 4. compare with the search results whose corresponding ids should be the same - expected: search successfully and ids is correct + target: test search pagination with empty list of vectors + method: create connection, collection, insert data, search with offset + expected: search successfully """ - # 1. create a collection - auto_id = False - collection_w, _, _, insert_ids = \ - self.init_collection_general( - prefix, True, auto_id=auto_id, vector_data_type=ct.sparse_vector)[0:4] - # 2. search with offset+limit - search_param = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}, "offset": offset} - search_vectors = cf.gen_default_list_sparse_data()[-1][-2:] - search_res = collection_w.search(search_vectors, ct.default_sparse_vec_field_name, - search_param, default_limit)[0] - # 3. search - _search_param = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}} - res = collection_w.search(search_vectors[:default_nq], ct.default_sparse_vec_field_name, _search_param, - default_limit + offset)[0] - assert len(search_res[0].ids) == len(res[0].ids[offset:]) - assert sorted(search_res[0].distances, key=np.float32) == sorted( - res[0].distances[offset:], key=np.float32) - - -class TestSearchPaginationInvalid(TestMilvusClientV2Base): - """ Test case of search pagination """ - """ - ****************************************************************** - # The following are invalid cases - ****************************************************************** - """ - + client = self._client() + collection_name = self.collection_name + vectors_to_search = [] + offset = 10 + limit = 100 + search_params = {"offset": offset} + error ={"err_code": 1, "err_msg": "list index out of range"} + self.search(client, collection_name, vectors_to_search, + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=limit, + check_task=CheckTasks.err_res, + check_items=error) + @pytest.mark.tags(CaseLabel.L1) - def test_search_pagination_with_invalid_offset_type(self): + @pytest.mark.parametrize("offset", [" ", 1.0, [1, 2], {1}, "12 s"]) + def test_search_pagination_with_invalid_offset_type(self, offset): """ target: test search pagination with invalid offset type method: create connection, collection, insert and search with invalid offset type expected: raise exception """ client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) - # 1. Create collection with schema - collection_name = cf.gen_collection_name_by_testcase_name() - self.create_collection(client, collection_name, default_dim) - - # Insert data - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - self.insert(client, collection_name, rows) - self.flush(client, collection_name) - - # Search with invalid offset types - vectors_to_search = cf.gen_vectors(default_nq, default_dim) - invalid_offsets = [" ", [1, 2], {1}, "12 s"] - - for offset in invalid_offsets: - log.debug(f"assert search error if offset={offset}") - search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}, "offset": offset} - self.search( - client, - collection_name, - vectors_to_search[:default_nq], - anns_field=default_vector_field_name, - search_params=search_params, - limit=default_limit, - check_task=CheckTasks.err_res, - check_items={ - "err_code": 1, - "err_msg": "wrong type for offset, expect int" - } - ) + search_params = {"offset": offset} + error = {"err_code": 1, "err_msg": "wrong type for offset, expect int"} + self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.err_res, + check_items=error) + @pytest.mark.tags(CaseLabel.L1) - def test_search_pagination_with_invalid_offset_value(self): + @pytest.mark.parametrize("offset", [-1, 16385]) + def test_search_pagination_with_invalid_offset_value(self, offset): """ target: test search pagination with invalid offset value method: create connection, collection, insert and search with invalid offset value expected: raise exception """ client = self._client() + collection_name = self.collection_name + vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim) + search_params = {"offset": offset} + error = {"err_code": 1, "err_msg": f"offset [{offset}] is invalid, it should be in range [1, 16384]"} + self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=self.float_vector_field_name, + search_params=search_params, + limit=default_limit, + check_task=CheckTasks.err_res, + check_items=error + ) - # 1. Create collection with schema - collection_name = cf.gen_collection_name_by_testcase_name() - self.create_collection(client, collection_name, default_dim) - # Insert data - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - self.insert(client, collection_name, rows) - self.flush(client, collection_name) +class TestSearchPaginationIndependent(TestMilvusClientV2Base): + """ Test case of search pagination with independent collection """ - # Search with invalid offset values - vectors_to_search = cf.gen_vectors(default_nq, default_dim) - invalid_offsets = [-1, 16385] - - for offset in invalid_offsets: - log.debug(f"assert search error if offset={offset}") - search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}, "offset": offset} - self.search( + def do_search_pagination_and_assert(self, client, collection_name, + limit=10, pages=10, + dim=default_dim, + vector_dtype=DataType.FLOAT_VECTOR, + index=ct.L0_index_types[0], + metric_type=ct.default_L0_metric, + expected_overlap_ratio=80): + # 2. Search with pagination for 5 pages + vectors_to_search = cf.gen_vectors(default_nq, dim, vector_data_type=vector_dtype) + all_pages_results = [] + for page in range(pages): + offset = page * limit + search_params = {"offset": offset} + search_res_with_offset, _ = self.search( client, collection_name, vectors_to_search[:default_nq], anns_field=default_vector_field_name, search_params=search_params, - limit=default_limit, - check_task=CheckTasks.err_res, - check_items={ - "err_code": 1, - "err_msg": f"offset [{offset}] is invalid, it should be in range [1, 16384]" - } - ) \ No newline at end of file + limit=limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": default_nq, + "limit": limit, + "metric": metric_type, + } + ) + all_pages_results.append(search_res_with_offset) + + # 3. Search without pagination + search_params_full = {} + search_res_full, _ = self.search( + client, + collection_name, + vectors_to_search[:default_nq], + anns_field=default_vector_field_name, + search_params=search_params_full, + limit=limit * pages + ) + + # 4. Compare results - verify pagination results equal the results in full search with offsets + for p in range(pages): + page_res = all_pages_results[p] + for i in range(default_nq): + page_ids = [page_res[i][j].get('id') for j in range(limit)] + ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)] + # Calculate intersection between paginated results and baseline full results + common_ids = set(page_ids) & set(ids_in_full) + # Calculate overlap ratio using full results as baseline + overlap_ratio = len(common_ids) / len(ids_in_full) * 100 + log.debug( + f"range search {vector_dtype.name} {index} {metric_type} results overlap {overlap_ratio}") + assert overlap_ratio >= expected_overlap_ratio, \ + f"Only {overlap_ratio}% overlap with baseline results, expected >= {expected_overlap_ratio}%" + + """ + ****************************************************************** + # The following are invalid cases + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize('vector_dtype', ct.all_dense_vector_types) + @pytest.mark.parametrize('index', ct.all_index_types[:7]) + @pytest.mark.parametrize('metric_type', ct.dense_metrics) + def test_search_pagination_dense_vectors_indices_metrics_growing(self, vector_dtype, index, metric_type): + """ + target: test search pagination with growing data + method: create connection, collection, insert data and search + check the results by searching with limit+offset + expected: searched successfully + """ + client = self._client() + + collection_name = cf.gen_collection_name_by_testcase_name() + schema, _ = self.create_schema(client) + schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, datatype=vector_dtype, dim=default_dim) + schema.add_field(default_float_field_name, datatype=DataType.FLOAT) + schema.add_field(default_string_field_name, datatype=DataType.VARCHAR, max_length=100) + self.create_collection(client, collection_name, schema=schema) + + # Insert data in 3 batches with unique primary keys using a loop + insert_times = 3 + random_vectors = list(cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_dtype)) \ + if vector_dtype == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_dtype) + for j in range(insert_times): + start_pk = j * default_nb + rows = [{ + default_primary_key_field_name: i + start_pk, + default_vector_field_name: random_vectors[i + start_pk], + default_float_field_name: (i + start_pk) * 1.0, + default_string_field_name: str(i + start_pk) + } for i in range(default_nb)] + self.insert(client, collection_name, rows) + self.flush(client, collection_name) + + # build index + index_params, _ = self.prepare_index_params(client) + index_params.add_index(default_vector_field_name, index_type=index, + metric_type=metric_type, + params=cf.get_index_params_params(index_type=index)) + self.create_index(client, collection_name, index_params=index_params) + + # load the collection with index + assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120) + self.load_collection(client, collection_name) + + # search and assert + limit = 50 + pages = 5 + expected_overlap_ratio = 20 + self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim, + vector_dtype=vector_dtype, index=index, metric_type=metric_type, + expected_overlap_ratio=expected_overlap_ratio) + + # insert additional data without flush + random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \ + if vector_dtype == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype) + start_pk = default_nb * insert_times + rows = [{ + default_primary_key_field_name: i + start_pk, + default_vector_field_name: random_vectors[i], + default_float_field_name: (i + start_pk) * 1.0, + default_string_field_name: str(i + start_pk) + } for i in range(default_nb)] + self.insert(client, collection_name, rows) + + # search and assert + self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim, + vector_dtype=vector_dtype, index=index, metric_type=metric_type, + expected_overlap_ratio=expected_overlap_ratio) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize('index', ct.binary_supported_index_types) + @pytest.mark.parametrize('metric_type', ct.binary_metrics[:2]) + def test_search_pagination_binary_index_growing(self, index, metric_type): + """ + target: test search pagination with binary index + method: create connection, collection, insert data, create index and search + expected: searched successfully + """ + + vector_dtype = DataType.BINARY_VECTOR + client = self._client() + + collection_name = cf.gen_collection_name_by_testcase_name() + schema, _ = self.create_schema(client) + schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, datatype=vector_dtype, dim=default_dim) + self.create_collection(client, collection_name, schema=schema) + + # Insert data in 3 batches with unique primary keys using a loop + insert_times = 3 + random_vectors = list(cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)) \ + if vector_dtype == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype) + for j in range(insert_times): + start_pk = j * default_nb + rows = [{ + default_primary_key_field_name: i + start_pk, + default_vector_field_name: random_vectors[i + start_pk] + } for i in range(default_nb)] + self.insert(client, collection_name, rows) + self.flush(client, collection_name) + + # build index + index_params, _ = self.prepare_index_params(client) + index_params.add_index(default_vector_field_name, index_type=index, + metric_type=metric_type, + params=cf.get_index_params_params(index_type=index)) + self.create_index(client, collection_name, index_params=index_params) + + # load the collection with index + assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120) + self.load_collection(client, collection_name) + + # search and assert + limit = 50 + pages = 5 + expected_overlap_ratio = 20 + self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim, + vector_dtype=vector_dtype, index=index, metric_type=metric_type, + expected_overlap_ratio=expected_overlap_ratio) + + # insert additional data without flush + random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \ + if vector_dtype == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype) + start_pk = default_nb * insert_times + rows = [{ + default_primary_key_field_name: i + start_pk, + default_vector_field_name: random_vectors[i] + } for i in range(default_nb)] + self.insert(client, collection_name, rows) + + # search and assert + self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim, + vector_dtype=vector_dtype, index=index, metric_type=metric_type, + expected_overlap_ratio=expected_overlap_ratio) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize('index', ct.sparse_supported_index_types) + @pytest.mark.parametrize('metric_type', ["IP"]) + def test_search_pagination_sparse_index_growing(self, index, metric_type): + """ + target: test search pagination with sparse index + method: create connection, collection, insert data, create index and search + expected: searched successfully + """ + vector_dtype = DataType.SPARSE_FLOAT_VECTOR + client = self._client() + + collection_name = cf.gen_collection_name_by_testcase_name() + schema, _ = self.create_schema(client) + schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, datatype=vector_dtype) + self.create_collection(client, collection_name, schema=schema) + + # Insert data in 3 batches with unique primary keys using a loop + insert_times = 3 + random_vectors = list(cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)) \ + if vector_dtype == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype) + for j in range(insert_times): + start_pk = j * default_nb + rows = [{ + default_primary_key_field_name: i + start_pk, + default_vector_field_name: random_vectors[i + start_pk] + } for i in range(default_nb)] + self.insert(client, collection_name, rows) + self.flush(client, collection_name) + + # build index + index_params, _ = self.prepare_index_params(client) + index_params.add_index(default_vector_field_name, index_type=index, + metric_type=metric_type, + params=cf.get_index_params_params(index_type=index)) + self.create_index(client, collection_name, index_params=index_params) + + # load the collection with index + assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120) + self.load_collection(client, collection_name) + + # search and assert + limit = 50 + pages = 5 + expected_overlap_ratio = 20 + self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim, + vector_dtype=vector_dtype, index=index, metric_type=metric_type, + expected_overlap_ratio=expected_overlap_ratio) + + # insert additional data without flush + random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \ + if vector_dtype == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype) + start_pk = default_nb * insert_times + rows = [{ + default_primary_key_field_name: i + start_pk, + default_vector_field_name: random_vectors[i] + } for i in range(default_nb)] + self.insert(client, collection_name, rows) + + # search and assert + self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim, + vector_dtype=vector_dtype, index=index, metric_type=metric_type, + expected_overlap_ratio=expected_overlap_ratio) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py index 8ceff97b3b..8ab4bdb39e 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py @@ -676,7 +676,7 @@ class TestCollectionSearch(TestcaseBase): def random_primary_key(self, request): yield request.param - @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.fixture(scope="function", params=ct.all_dense_vector_types) def vector_data_type(self, request): yield request.param @@ -1613,7 +1613,7 @@ class TestCollectionSearch(TestcaseBase): enable_dynamic_field = False collection_w, _, _, insert_ids, time_stamp = \ self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False, + dim=dim, is_index=False, vector_data_type=DataType.SPARSE_FLOAT_VECTOR, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index and load params = cf.get_index_params_params(index) @@ -1624,7 +1624,7 @@ class TestCollectionSearch(TestcaseBase): if (dim % params["PQM"]) != 0: params["PQM"] = dim // 4 default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) + collection_w.create_index("sparse_vector", default_index) collection_w.load() # 3. search search_params = cf.gen_search_param(index) @@ -1714,6 +1714,7 @@ class TestCollectionSearch(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, + vector_data_type=DataType.SPARSE_FLOAT_VECTOR, dim=min_dim, is_index=False)[0:5] # 2. create index and load params = cf.get_index_params_params(index) @@ -1722,7 +1723,7 @@ class TestCollectionSearch(TestcaseBase): if params.get("PQM"): params["PQM"] = min_dim default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) + collection_w.create_index("sparse_vector", default_index) collection_w.load() # 3. search search_params = cf.gen_search_param(index) @@ -1885,6 +1886,7 @@ class TestCollectionSearch(TestcaseBase): enable_dynamic_field = False collection_w, _, _, insert_ids, time_stamp = \ self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, + vector_data_type=DataType.SPARSE_FLOAT_VECTOR, dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create different index params = cf.get_index_params_params(index) @@ -1896,7 +1898,7 @@ class TestCollectionSearch(TestcaseBase): params["PQM"] = dim // 4 log.info("test_search_after_index_different_metric_type: Creating index-%s" % index) default_index = {"index_type": index, "params": params, "metric_type": "IP"} - collection_w.create_index("float_vector", default_index) + collection_w.create_index("sparse_vector", default_index) log.info("test_search_after_index_different_metric_type: Created index-%s" % index) collection_w.load() # 3. search @@ -2924,10 +2926,10 @@ class TestCollectionSearch(TestcaseBase): limit = 0 insert_ids = [] vector_name_list = cf.extract_vector_field_name_list(collection_w) - for search_field in vector_name_list: - vector_data_type = search_field.lstrip("multiple_vector_") - vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) - res = collection_w.search(vectors[:nq], search_field, + for vector_field_name in vector_name_list: + vector_data_type = cf.get_field_dtype_by_field_name(collection_w, vector_field_name) + vectors = cf.gen_vectors(nq, dim, vector_data_type) + res = collection_w.search(vectors[:nq], vector_field_name, default_search_params, default_limit, search_exp, _async=_async, output_fields=[default_int64_field_name, @@ -3213,7 +3215,7 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("index", ct.all_index_types[:7]) - @pytest.mark.parametrize("metrics", ct.float_metrics) + @pytest.mark.parametrize("metrics", ct.dense_metrics) @pytest.mark.parametrize("limit", [20, 1200]) def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit): """ diff --git a/tests/python_client/testcases/async_milvus_client/test_e2e_async.py b/tests/python_client/testcases/async_milvus_client/test_e2e_async.py index ecbbd52e9d..5c1ee28873 100644 --- a/tests/python_client/testcases/async_milvus_client/test_e2e_async.py +++ b/tests/python_client/testcases/async_milvus_client/test_e2e_async.py @@ -288,7 +288,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): # hybrid_search search_param = { - "data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type="FLOAT_VECTOR"), + "data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR), "anns_field": ct.default_float_vec_field_name, "param": {"metric_type": "COSINE", "params": {"ef": "96"}}, "limit": ct.default_limit, @@ -296,7 +296,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base): req = AnnSearchRequest(**search_param) search_param2 = { - "data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type="FLOAT_VECTOR"), + "data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR), "anns_field": default_vector_name, "param": {"metric_type": "L2", "params": {"nprobe": "32"}}, "limit": ct.default_limit diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py index 481f723c6c..a61d08b358 100644 --- a/tests/python_client/testcases/test_bulk_insert.py +++ b/tests/python_client/testcases/test_bulk_insert.py @@ -857,16 +857,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert): # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]: - vector_data_type = "FLOAT_VECTOR" + vector_data_type = DataType.FLOAT_VECTOR if f == df.float_vec_field: dim = float_vec_field_dim - vector_data_type = "FLOAT_VECTOR" + vector_data_type = DataType.FLOAT_VECTOR elif f == df.bf16_vec_field: dim = bf16_vec_field_dim - vector_data_type = "BFLOAT16_VECTOR" + vector_data_type = DataType.BFLOAT16_VECTOR else: dim = fp16_vec_field_dim - vector_data_type = "FLOAT16_VECTOR" + vector_data_type = DataType.FLOAT16_VECTOR search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type) search_params = ct.default_search_params @@ -1043,16 +1043,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert): # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]: - vector_data_type = "FLOAT_VECTOR" + vector_data_type = DataType.FLOAT_VECTOR if f == df.float_vec_field: dim = float_vec_field_dim - vector_data_type = "FLOAT_VECTOR" + vector_data_type = DataType.FLOAT_VECTOR elif f == df.bf16_vec_field: dim = bf16_vec_field_dim - vector_data_type = "BFLOAT16_VECTOR" + vector_data_type = DataType.BFLOAT16_VECTOR else: dim = fp16_vec_field_dim - vector_data_type = "FLOAT16_VECTOR" + vector_data_type = DataType.FLOAT16_VECTOR search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type) search_params = ct.default_search_params @@ -1217,16 +1217,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert): # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]: - vector_data_type = "FLOAT_VECTOR" + vector_data_type = DataType.FLOAT_VECTOR if f == df.float_vec_field: dim = float_vec_field_dim - vector_data_type = "FLOAT_VECTOR" + vector_data_type = DataType.FLOAT_VECTOR elif f == df.bf16_vec_field: dim = bf16_vec_field_dim - vector_data_type = "BFLOAT16_VECTOR" + vector_data_type = DataType.BFLOAT16_VECTOR else: dim = fp16_vec_field_dim - vector_data_type = "FLOAT16_VECTOR" + vector_data_type = DataType.FLOAT16_VECTOR search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type) search_params = ct.default_search_params @@ -1616,8 +1616,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert): df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None, df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None, df.float_vec_field: cf.gen_vectors(1, dim)[0], - df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0], - df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0], + df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0], + df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0], df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0] } if auto_id: @@ -1922,8 +1922,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert): df.string_field: "string", df.json_field: json_value[i%len(json_value)], df.float_vec_field: cf.gen_vectors(1, dim)[0], - df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0], - df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0], + df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT16_VECTOR)[0], + df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0], } if auto_id: row.pop(df.pk_field) @@ -2064,8 +2064,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert): df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None, df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None, df.float_vec_field: cf.gen_vectors(1, dim)[0], - df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0], - df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0], + df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT16_VECTOR)[0], + df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0], df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0] } if auto_id: @@ -2536,7 +2536,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): # verify search self.collection_wrap.search( - data=cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR.name), + data=cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR), anns_field=df.float_vec_field, param=DefaultVectorSearchParams.IVF_SQ8(), limit=ct.default_limit, check_task=CheckTasks.check_search_results, diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index c5d09118fd..f5e243a20c 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -50,7 +50,7 @@ vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_ default_search_field = ct.default_float_vec_field_name default_search_params = ct.default_search_params max_vector_field_num = ct.max_vector_field_num -SPARSE_FLOAT_VECTOR_data_type = "SPARSE_FLOAT_VECTOR" +SPARSE_FLOAT_VECTOR_data_type = DataType.SPARSE_FLOAT_VECTOR class TestCollectionParams(TestcaseBase): @@ -1061,7 +1061,7 @@ class TestCollectionParams(TestcaseBase): # 2. create collection with multiple vectors c_name = cf.gen_unique_str(prefix) fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), - cf.gen_float_vec_field(vector_data_type=ct.sparse_vector), cf.gen_float_vec_field(name="vec_sparse", vector_data_type=ct.sparse_vector)] + cf.gen_float_vec_field(vector_data_type=DataType.FLOAT_VECTOR), cf.gen_float_vec_field(name="vec_sparse", vector_data_type=DataType.SPARSE_FLOAT_VECTOR)] schema = cf.gen_collection_schema(fields=fields) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, @@ -3260,7 +3260,7 @@ class TestLoadPartition(TestcaseBase): ) def get_binary_index(self, request): log.info(request.param) - if request.param["index_type"] in ct.binary_support: + if request.param["index_type"] in ct.binary_supported_index_types: return request.param else: pytest.skip("Skip index Temporary") @@ -4560,7 +4560,7 @@ class TestCollectionNullInvalid(TestcaseBase): ****************************************************************** """ @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("vector_type", ct.all_vector_data_types) + @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes[:1]) def test_create_collection_set_nullable_on_pk_field(self, vector_type): """ target: test create collection with set nullable=True on pk field @@ -4578,7 +4578,7 @@ class TestCollectionNullInvalid(TestcaseBase): self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("vector_type", ct.all_vector_data_types) + @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes) def test_create_collection_set_nullable_on_vector_field(self, vector_type): """ target: test create collection with set nullable=True on vector field @@ -4623,7 +4623,7 @@ class TestCollectionDefaultValueInvalid(TestcaseBase): ****************************************************************** """ @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("vector_type", ct.all_vector_data_types) + @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes[:1]) def test_create_collection_default_value_on_pk_field(self, vector_type): """ target: test create collection with set default value on pk field @@ -4641,7 +4641,7 @@ class TestCollectionDefaultValueInvalid(TestcaseBase): self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("vector_type", ct.all_vector_data_types) + @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes) def test_create_collection_default_value_on_vector_field(self, vector_type): """ target: test create collection with set default value on vector field diff --git a/tests/python_client/testcases/test_field_partial_load.py b/tests/python_client/testcases/test_field_partial_load.py index 38978f1cf7..33f568f637 100644 --- a/tests/python_client/testcases/test_field_partial_load.py +++ b/tests/python_client/testcases/test_field_partial_load.py @@ -147,13 +147,13 @@ class TestFieldPartialLoad(TestcaseBase): pk_field = cf.gen_int64_field(name='pk', is_primary=True) load_string_field = cf.gen_string_field(name="string_load") vector_field = cf.gen_float_vec_field(name="vec_float32", dim=dim) - sparse_vector_field = cf.gen_float_vec_field(name="sparse", vector_data_type="SPARSE_FLOAT_VECTOR") + sparse_vector_field = cf.gen_float_vec_field(name="sparse", vector_data_type=DataType.SPARSE_FLOAT_VECTOR) schema = cf.gen_collection_schema(fields=[pk_field, load_string_field, vector_field, sparse_vector_field], auto_id=True) collection_w = self.init_collection_wrap(name=name, schema=schema) string_values = [str(i) for i in range(nb)] float_vec_values = cf.gen_vectors(nb, dim) - sparse_vec_values = cf.gen_vectors(nb, dim, vector_data_type="SPARSE_FLOAT_VECTOR") + sparse_vec_values = cf.gen_vectors(nb, dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR) collection_w.insert([string_values, float_vec_values, sparse_vec_values]) # build index on one of vector fields diff --git a/tests/python_client/testcases/test_full_text_search.py b/tests/python_client/testcases/test_full_text_search.py index 427c5f655c..5c39dd2fca 100644 --- a/tests/python_client/testcases/test_full_text_search.py +++ b/tests/python_client/testcases/test_full_text_search.py @@ -3273,9 +3273,9 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase): nq = 2 limit = 100 if invalid_search_data == "sparse_vector": - search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type="SPARSE_FLOAT_VECTOR") + search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR) else: - search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type="FLOAT_VECTOR") + search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.FLOAT_VECTOR) log.info(f"search data: {search_data}") error = {ct.err_code: 65535, ct.err_msg: "please provide varchar/text for BM25 Function based search"} @@ -3377,7 +3377,7 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase): "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "", "text": fake.text().lower() if random.random() >= empty_percent else "", "dense_emb": [random.random() for _ in range(dim)], - "neural_sparse_emb": cf.gen_vectors(nb=1, dim=1000, vector_data_type="SPARSE_FLOAT_VECTOR")[0], + "neural_sparse_emb": cf.gen_vectors(nb=1, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0], } for i in range(data_size) ] @@ -3428,7 +3428,7 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase): limit=limit, ) sparse_search = AnnSearchRequest( - data=cf.gen_vectors(nb=nq, dim=dim, vector_data_type="SPARSE_FLOAT_VECTOR"), + data=cf.gen_vectors(nb=nq, dim=dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR), anns_field="neural_sparse_emb", param={}, limit=limit, diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 81ceabd0b9..1ed670dbef 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -1124,7 +1124,7 @@ class TestIndexInvalid(TestcaseBase): def scalar_index(self, request): yield request.param - @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.fixture(scope="function", params=ct.all_dense_vector_types) def vector_data_type(self, request): yield request.param @@ -2171,7 +2171,7 @@ class TestInvertedIndexValid(TestcaseBase): def scalar_index(self, request): yield request.param - @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.fixture(scope="function", params=ct.all_dense_vector_types) def vector_data_type(self, request): yield request.param diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index 2138a1c7f8..4fa958095a 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -1429,7 +1429,7 @@ class TestInsertInvalid(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("index ", ct.all_index_types[9:11]) - @pytest.mark.parametrize("invalid_vector_type ", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.mark.parametrize("invalid_vector_type ", ct.all_dense_vector_types) def test_invalid_sparse_vector_data(self, index, invalid_vector_type): """ target: insert illegal data type diff --git a/tests/python_client/testcases/test_mix_scenes.py b/tests/python_client/testcases/test_mix_scenes.py index 081eb13d9a..9aa9cd64d5 100644 --- a/tests/python_client/testcases/test_mix_scenes.py +++ b/tests/python_client/testcases/test_mix_scenes.py @@ -591,10 +591,10 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): expected: 1. search output fields with Hybrid index """ - search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1 + search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR, 3, 1 self.collection_wrap.search( - cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit, + cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit, "output_fields": self.all_fields}) @@ -1247,8 +1247,8 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): @pytest.mark.parametrize("group_by_field", ['INT8', 'INT16', 'INT32', 'INT64', 'BOOL', 'VARCHAR']) @pytest.mark.parametrize( "dim, search_params, vector_field", - [(3, {"metric_type": MetricType.L2, "ef": 32}, DataType.FLOAT16_VECTOR.name), - (1000, {"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, DataType.SPARSE_FLOAT_VECTOR.name)]) + [(3, {"metric_type": MetricType.L2, "ef": 32}, DataType.FLOAT16_VECTOR), + (1000, {"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, DataType.SPARSE_FLOAT_VECTOR)]) def test_bitmap_index_search_group_by(self, limit, group_by_field, dim, search_params, vector_field): """ target: @@ -1259,7 +1259,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): expected: 1. search group by with BITMAP index """ - res, _ = self.collection_wrap.search(cf.gen_vectors(nb=1, dim=dim, vector_data_type=vector_field), vector_field, + res, _ = self.collection_wrap.search(cf.gen_vectors(nb=1, dim=dim, vector_data_type=vector_field), vector_field.name, search_params, limit, group_by_field=group_by_field, output_fields=[group_by_field]) output_values = [i.fields for r in res for i in r] @@ -1285,9 +1285,9 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): 1. search iterator with BITMAP index """ ef = 32 if batch_size <= 32 else batch_size # ef must be larger than or equal to batch size - search_params, vector_field = {"metric_type": "L2", "ef": ef}, DataType.FLOAT16_VECTOR.name + search_params, vector_field = {"metric_type": "L2", "ef": ef}, DataType.FLOAT16_VECTOR self.collection_wrap.search_iterator( - cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size, + cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field.name, search_params, batch_size, expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size}) @pytest.mark.tags(CaseLabel.L1) @@ -1301,10 +1301,10 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): expected: 1. search output fields with BITMAP index """ - search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1 + search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR, 3, 1 self.collection_wrap.search( - cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit, + cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit, "output_fields": self.all_fields}) @@ -1667,11 +1667,11 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): expected: 1. search output fields with BITMAP index """ - search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1 + search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR, 3, 1 self.collection_wrap.search( cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field), - vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, + vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit, "output_fields": self.all_fields}) @@ -1922,11 +1922,11 @@ class TestBitmapIndexMmap(TestCaseClassBase): expected: 1. search output fields with BITMAP index """ - search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1 + search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR, 3, 1 self.collection_wrap.search( cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field), - vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, + vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit, "output_fields": self.all_fields}) @@ -2345,7 +2345,7 @@ class TestGroupSearch(TestCaseClassBase): string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string") data = [string_values] for i in range(len(self.vector_fields)): - data.append(cf.gen_vectors(dim=self.dims[i], nb=nb, vector_data_type=self.vector_fields[i])) + data.append(cf.gen_vectors(dim=self.dims[i], nb=nb, vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i]))) data.append(pd.Series(data=[np.int8(i) for i in range(nb)], dtype="int8")) data.append(pd.Series(data=[np.int64(i) for i in range(nb)], dtype="int64")) data.append(pd.Series(data=[np.bool_(i) for i in range(nb)], dtype="bool")) @@ -2384,7 +2384,7 @@ class TestGroupSearch(TestCaseClassBase): limit = 50 group_size = 5 for j in range(len(self.vector_fields)): - search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j]) + search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j])) search_params = {"params": cf.get_search_params_params(self.index_types[j])} # when strict_group_size=true, it shall return results with entities = limit * group_size res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j], @@ -2424,7 +2424,7 @@ class TestGroupSearch(TestCaseClassBase): req_list = [] for j in range(len(self.vector_fields)): search_params = { - "data": cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j]), + "data": cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j])), "anns_field": self.vector_fields[j], "param": {"params": cf.get_search_params_params(self.index_types[j])}, "limit": limit, @@ -2473,7 +2473,7 @@ class TestGroupSearch(TestCaseClassBase): req_list = [] for i in range(len(self.vector_fields)): search_param = { - "data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=self.vector_fields[i]), + "data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])), "anns_field": self.vector_fields[i], "param": {}, "limit": ct.default_limit, @@ -2497,7 +2497,7 @@ class TestGroupSearch(TestCaseClassBase): req_list = [] for i in range(1, len(self.vector_fields)): search_param = { - "data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=self.vector_fields[i]), + "data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])), "anns_field": self.vector_fields[i], "param": {}, "limit": ct.default_limit, @@ -2519,7 +2519,7 @@ class TestGroupSearch(TestCaseClassBase): nq = 2 limit = 15 for j in range(len(self.vector_fields)): - search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j]) + search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j])) search_params = {"params": cf.get_search_params_params(self.index_types[j])} res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j], param=search_params, limit=limit, @@ -2561,7 +2561,7 @@ class TestGroupSearch(TestCaseClassBase): default_search_exp = f"{self.primary_field} >= 0" grpby_field = self.inverted_string_field default_search_field = self.vector_fields[1] - search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1]) + search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1])) all_pages_ids = [] all_pages_grpby_field_values = [] for r in range(page_rounds): @@ -2603,7 +2603,7 @@ class TestGroupSearch(TestCaseClassBase): default_search_exp = f"{self.primary_field} >= 0" grpby_field = self.inverted_string_field default_search_field = self.vector_fields[1] - search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1]) + search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1])) all_pages_ids = [] all_pages_grpby_field_values = [] res_count = limit * group_size @@ -2655,7 +2655,7 @@ class TestGroupSearch(TestCaseClassBase): """ group_by_field = self.inverted_string_field default_search_field = self.vector_fields[1] - search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1]) + search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1])) search_params = {} limit = 10 max_group_size = 10 diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 30059c5c27..9c72a64d99 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -2351,7 +2351,7 @@ class TestQueryOperation(TestcaseBase): expected: return the latest entity; verify the result is same as dedup entities """ collection_w = self.init_collection_general(prefix, dim=16, is_flush=False, insert_data=False, is_index=False, - vector_data_type=ct.float_type, with_json=False)[0] + vector_data_type=DataType.FLOAT_VECTOR, with_json=False)[0] nb = 50 rounds = 10 for i in range(rounds): @@ -2465,7 +2465,7 @@ class TestQueryOperation(TestcaseBase): assert res[0].keys() == set(fields) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types) def test_query_output_all_vector_type(self, vector_data_type): """ target: test query output different vector type diff --git a/tests/python_client/testcases/test_query_iterator.py b/tests/python_client/testcases/test_query_iterator.py index a74c55e8f1..562771e0c0 100644 --- a/tests/python_client/testcases/test_query_iterator.py +++ b/tests/python_client/testcases/test_query_iterator.py @@ -127,7 +127,7 @@ class TestQueryIterator(TestcaseBase): "batch_size": batch_size}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + @pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types) def test_query_iterator_output_different_vector_type(self, vector_data_type): """ target: test query iterator with output fields diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py deleted file mode 100644 index e69de29bb2..0000000000