test: [E2e Refactor] use vector datatype instead of hard code dataype names (#41497)

related issue: #40698 
1. use vector datat types instead of hard code datatpe names
2. update search pagination tests
3. remove checking distances in search results checking, for knowhere
customize the distances for different metrics and indexes. Now only
assert the distances are sorted correct.

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
yanliang567 2025-04-25 10:46:38 +08:00 committed by GitHub
parent 6084930854
commit 70b311735b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 670 additions and 600 deletions

View File

@ -275,7 +275,7 @@ class TestcaseBase(Base):
auto_id=False, dim=ct.default_dim, is_index=True, auto_id=False, dim=ct.default_dim, is_index=True,
primary_field=ct.default_int64_field_name, is_flush=True, name=None, primary_field=ct.default_int64_field_name, is_flush=True, name=None,
enable_dynamic_field=False, with_json=True, random_primary_key=False, enable_dynamic_field=False, with_json=True, random_primary_key=False,
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR", multiple_dim_array=[], is_partition_key=None, vector_data_type=DataType.FLOAT_VECTOR,
nullable_fields={}, default_value_fields={}, language=None, **kwargs): nullable_fields={}, default_value_fields={}, language=None, **kwargs):
""" """
target: create specified collections target: create specified collections
@ -317,7 +317,7 @@ class TestcaseBase(Base):
primary_field=primary_field, primary_field=primary_field,
nullable_fields=nullable_fields, nullable_fields=nullable_fields,
default_value_fields=default_value_fields) default_value_fields=default_value_fields)
if vector_data_type == ct.sparse_vector: if vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field, default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field,
enable_dynamic_field=enable_dynamic_field, enable_dynamic_field=enable_dynamic_field,
with_json=with_json, with_json=with_json,
@ -354,7 +354,7 @@ class TestcaseBase(Base):
# This condition will be removed after auto index feature # This condition will be removed after auto index feature
if is_binary: if is_binary:
collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index) collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index)
elif vector_data_type == ct.sparse_vector: elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
for vector_name in vector_name_list: for vector_name in vector_name_list:
collection_w.create_index(vector_name, ct.default_sparse_inverted_index) collection_w.create_index(vector_name, ct.default_sparse_inverted_index)
else: else:
@ -362,7 +362,7 @@ class TestcaseBase(Base):
vector_name_list.append(ct.default_float_vec_field_name) vector_name_list.append(ct.default_float_vec_field_name)
for vector_name in vector_name_list: for vector_name in vector_name_list:
# Unlike dense vectors, sparse vectors cannot create flat index. # Unlike dense vectors, sparse vectors cannot create flat index.
if ct.sparse_vector in vector_name: if DataType.SPARSE_FLOAT_VECTOR.name in vector_name:
collection_w.create_index(vector_name, ct.default_sparse_inverted_index) collection_w.create_index(vector_name, ct.default_sparse_inverted_index)
else: else:
collection_w.create_index(vector_name, ct.default_flat_index) collection_w.create_index(vector_name, ct.default_flat_index)

View File

@ -1,4 +1,5 @@
import sys import sys
import time
from typing import Optional from typing import Optional
from pymilvus import MilvusClient from pymilvus import MilvusClient
@ -545,6 +546,16 @@ class TestMilvusClientV2Base(Base):
**kwargs).run() **kwargs).run()
return res, check_result return res, check_result
def wait_for_index_ready(self, client, collection_name, index_name, timeout=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
start_time = time.time()
while start_time + timeout > time.time():
index_info, _ = self.describe_index(client, collection_name, index_name, **kwargs)
if index_info.get("pending_index_rows", 1) == 0:
return True
time.sleep(2)
return False
@trace() @trace()
def list_indexes(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs): def list_indexes(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout timeout = TIMEOUT if timeout is None else timeout

View File

@ -445,19 +445,20 @@ class ResponseChecker:
assert ids_match assert ids_match
elif check_items.get("metric", None) is not None: elif check_items.get("metric", None) is not None:
# verify the distances are already sorted # verify the distances are already sorted
if check_items.get("metric").lower() in ["ip", "bm25"]: if check_items.get("metric").upper() in ["IP", "COSINE", "BM25"]:
assert distances == sorted(distances, reverse=False)
else:
assert distances == sorted(distances, reverse=True) assert distances == sorted(distances, reverse=True)
if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None:
log.debug("vector for searched (nq) and inserted vectors are needed for distance check")
else: else:
for id in ids: assert distances == sorted(distances, reverse=False)
searched_original_vectors.append(check_items["original_vectors"][id]) if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None:
cf.compare_distance_vector_and_vector_list(check_items["vector_nq"][nq_i], log.debug("skip distance check for knowhere does not return the precise distances")
searched_original_vectors, else:
check_items["metric"], distances) # for id in ids:
log.info("search_results_check: Checked the distances for one nq: OK") # searched_original_vectors.append(check_items["original_vectors"][id])
# cf.compare_distance_vector_and_vector_list(check_items["vector_nq"][nq_i],
# searched_original_vectors,
# check_items["metric"], distances)
# log.info("search_results_check: Checked the distances for one nq: OK")
pass
else: else:
pass # just check nq and topk, not specific ids need check pass # just check nq and topk, not specific ids need check
nq_i += 1 nq_i += 1

View File

@ -692,22 +692,17 @@ def gen_double_field(name=ct.default_double_field_name, is_primary=False, descri
def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim, def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
description=ct.default_desc, vector_data_type="FLOAT_VECTOR", **kwargs): description=ct.default_desc, vector_data_type=DataType.FLOAT_VECTOR, **kwargs):
if vector_data_type == "SPARSE_FLOAT_VECTOR":
dtype = DataType.SPARSE_FLOAT_VECTOR if vector_data_type != DataType.SPARSE_FLOAT_VECTOR:
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype, float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=vector_data_type,
description=description,
is_primary=is_primary, **kwargs)
return float_vec_field
if vector_data_type == "FLOAT_VECTOR":
dtype = DataType.FLOAT_VECTOR
elif vector_data_type == "FLOAT16_VECTOR":
dtype = DataType.FLOAT16_VECTOR
elif vector_data_type == "BFLOAT16_VECTOR":
dtype = DataType.BFLOAT16_VECTOR
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype,
description=description, dim=dim, description=description, dim=dim,
is_primary=is_primary, **kwargs) is_primary=is_primary, **kwargs)
else:
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.SPARSE_FLOAT_VECTOR,
description=description,
is_primary=is_primary, **kwargs)
return float_vec_field return float_vec_field
@ -744,7 +739,7 @@ def gen_sparse_vec_field(name=ct.default_sparse_vec_field_name, is_primary=False
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True, auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True,
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR", multiple_dim_array=[], is_partition_key=None, vector_data_type=DataType.FLOAT_VECTOR,
nullable_fields={}, default_value_fields={}, **kwargs): nullable_fields={}, default_value_fields={}, **kwargs):
# gen primary key field # gen primary key field
if default_value_fields.get(ct.default_int64_field_name) is None: if default_value_fields.get(ct.default_int64_field_name) is None:
@ -824,7 +819,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
gen_array_field(name="array_bool", element_type=DataType.BOOL), gen_array_field(name="array_bool", element_type=DataType.BOOL),
gen_float_vec_field(dim=dim), gen_float_vec_field(dim=dim),
gen_float_vec_field(name="image_emb", dim=dim), gen_float_vec_field(name="image_emb", dim=dim),
gen_float_vec_field(name="text_sparse_emb", vector_data_type="SPARSE_FLOAT_VECTOR"), gen_float_vec_field(name="text_sparse_emb", vector_data_type=DataType.SPARSE_FLOAT_VECTOR),
gen_float_vec_field(name="voice_emb", dim=dim), gen_float_vec_field(name="voice_emb", dim=dim),
] ]
@ -998,25 +993,25 @@ def gen_collection_schema_all_datatype(description=ct.default_desc, primary_fiel
else: else:
multiple_dim_array.insert(0, dim) multiple_dim_array.insert(0, dim)
for i in range(len(multiple_dim_array)): for i in range(len(multiple_dim_array)):
if ct.append_vector_type[i%3] != ct.sparse_vector: if ct.append_vector_type[i%3] != DataType.SPARSE_FLOAT_VECTOR:
if default_value_fields.get(ct.append_vector_type[i%3]) is None: if default_value_fields.get(ct.append_vector_type[i%3]) is None:
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}", vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3].name}",
dim=multiple_dim_array[i], dim=multiple_dim_array[i],
vector_data_type=ct.append_vector_type[i%3]) vector_data_type=ct.append_vector_type[i%3])
else: else:
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}", vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3].name}",
dim=multiple_dim_array[i], dim=multiple_dim_array[i],
vector_data_type=ct.append_vector_type[i%3], vector_data_type=ct.append_vector_type[i%3],
default_value=default_value_fields.get(ct.append_vector_type[i%3])) default_value=default_value_fields.get(ct.append_vector_type[i%3].name))
fields.append(vector_field) fields.append(vector_field)
else: else:
# The field of a sparse vector cannot be dimensioned # The field of a sparse vector cannot be dimensioned
if default_value_fields.get(ct.default_sparse_vec_field_name) is None: if default_value_fields.get(ct.default_sparse_vec_field_name) is None:
sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}", sparse_vector_field = gen_sparse_vec_field(name=f"multiple_vector_{DataType.SPARSE_FLOAT_VECTOR.name}",
vector_data_type=ct.sparse_vector) vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
else: else:
sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}", sparse_vector_field = gen_sparse_vec_field(name=f"multiple_vector_{DataType.SPARSE_FLOAT_VECTOR.name}",
vector_data_type=ct.sparse_vector, vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
default_value=default_value_fields.get(ct.default_sparse_vec_field_name)) default_value=default_value_fields.get(ct.default_sparse_vec_field_name))
fields.append(sparse_vector_field) fields.append(sparse_vector_field)
@ -1124,23 +1119,25 @@ def gen_schema_multi_string_fields(string_fields):
return schema return schema
def gen_vectors(nb, dim, vector_data_type="FLOAT_VECTOR"): def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR):
vectors = [] vectors = []
if vector_data_type == "FLOAT_VECTOR": if vector_data_type == DataType.FLOAT_VECTOR:
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
elif vector_data_type == "FLOAT16_VECTOR": elif vector_data_type == DataType.FLOAT16_VECTOR:
vectors = gen_fp16_vectors(nb, dim)[1] vectors = gen_fp16_vectors(nb, dim)[1]
elif vector_data_type == "BFLOAT16_VECTOR": elif vector_data_type == DataType.BFLOAT16_VECTOR:
vectors = gen_bf16_vectors(nb, dim)[1] vectors = gen_bf16_vectors(nb, dim)[1]
elif vector_data_type == "SPARSE_FLOAT_VECTOR": elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
vectors = gen_sparse_vectors(nb, dim) vectors = gen_sparse_vectors(nb, dim)
elif vector_data_type == "TEXT_SPARSE_VECTOR": elif vector_data_type == ct.text_sparse_vector:
vectors = gen_text_vectors(nb) vectors = gen_text_vectors(nb)
elif vector_data_type == DataType.BINARY_VECTOR:
vectors = gen_binary_vectors(nb, dim)[1]
else: else:
log.error(f"Invalid vector data type: {vector_data_type}") log.error(f"Invalid vector data type: {vector_data_type}")
raise Exception(f"Invalid vector data type: {vector_data_type}") raise Exception(f"Invalid vector data type: {vector_data_type}")
if dim > 1: if dim > 1:
if vector_data_type == "FLOAT_VECTOR": if vector_data_type == DataType.FLOAT_VECTOR:
vectors = preprocessing.normalize(vectors, axis=1, norm='l2') vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
vectors = vectors.tolist() vectors = vectors.tolist()
return vectors return vectors
@ -1173,7 +1170,7 @@ def gen_binary_vectors(num, dim):
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
vector_data_type="FLOAT_VECTOR", auto_id=False, vector_data_type=DataType.FLOAT_VECTOR, auto_id=False,
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None): primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
if not random_primary_key: if not random_primary_key:
int_values = pd.Series(data=[i for i in range(start, start + nb)]) int_values = pd.Series(data=[i for i in range(start, start + nb)])
@ -1235,7 +1232,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
vector_data_type="FLOAT_VECTOR", auto_id=False, vector_data_type=DataType.FLOAT_VECTOR, auto_id=False,
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None): primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
insert_list = [] insert_list = []
if not random_primary_key: if not random_primary_key:
@ -1289,7 +1286,7 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[], def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[],
multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False, multiple_vector_field_name=[], vector_data_type=DataType.FLOAT_VECTOR, auto_id=False,
primary_field = ct.default_int64_field_name, nullable_fields={}, language=None): primary_field = ct.default_int64_field_name, nullable_fields={}, language=None):
array = [] array = []
for i in range(start, start + nb): for i in range(start, start + nb):
@ -1703,7 +1700,7 @@ def gen_default_list_sparse_data(nb=ct.default_nb, dim=ct.default_dim, start=0,
string_values = [str(i) for i in range(start, start + nb)] string_values = [str(i) for i in range(start, start + nb)]
json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]} json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
for i in range(start, start + nb)] for i in range(start, start + nb)]
sparse_vec_values = gen_vectors(nb, dim, vector_data_type="SPARSE_FLOAT_VECTOR") sparse_vec_values = gen_vectors(nb, dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
if with_json: if with_json:
data = [int_values, float_values, string_values, json_values, sparse_vec_values] data = [int_values, float_values, string_values, json_values, sparse_vec_values]
else: else:
@ -2812,7 +2809,7 @@ def compare_distance_vector_and_vector_list(x, y, metric, distance):
assert False assert False
for i in range(len(y)): for i in range(len(y)):
if metric == "L2": if metric == "L2":
distance_i = l2(x, y[i]) distance_i = (l2(x, y[i]))**2
elif metric == "IP": elif metric == "IP":
distance_i = ip(x, y[i]) distance_i = ip(x, y[i])
elif metric == "COSINE": elif metric == "COSINE":
@ -2820,7 +2817,7 @@ def compare_distance_vector_and_vector_list(x, y, metric, distance):
else: else:
raise Exception("metric type is invalid") raise Exception("metric type is invalid")
if abs(distance_i - distance[i]) > ct.epsilon: if abs(distance_i - distance[i]) > ct.epsilon:
log.error(f"The distance between {x} and {y[i]} is not equal with {distance[i]}") log.error(f"The distance between {x} and {y[i]} does not equal {distance[i]}, expected: {distance_i}")
assert abs(distance_i - distance[i]) < ct.epsilon assert abs(distance_i - distance[i]) < ct.epsilon
return True return True
@ -2927,7 +2924,7 @@ def gen_partitions(collection_w, partition_num=1):
def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False, def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False,
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True, auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True,
random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name, random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name,
vector_data_type="FLOAT_VECTOR", nullable_fields={}, language=None): vector_data_type=DataType.FLOAT_VECTOR, nullable_fields={}, language=None):
""" """
target: insert non-binary/binary data target: insert non-binary/binary data
method: insert non-binary/binary data into partitions if any method: insert non-binary/binary data into partitions if any
@ -2948,7 +2945,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
if not is_binary: if not is_binary:
if not is_all_data_type: if not is_all_data_type:
if not enable_dynamic_field: if not enable_dynamic_field:
if vector_data_type == "FLOAT_VECTOR": if vector_data_type == DataType.FLOAT_VECTOR:
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json, default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json,
random_primary_key=random_primary_key, random_primary_key=random_primary_key,
multiple_dim_array=multiple_dim_array, multiple_dim_array=multiple_dim_array,
@ -2975,14 +2972,14 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
else: else:
if not enable_dynamic_field: if not enable_dynamic_field:
if vector_data_type == "FLOAT_VECTOR": if vector_data_type == DataType.FLOAT_VECTOR:
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json, default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
random_primary_key=random_primary_key, random_primary_key=random_primary_key,
multiple_dim_array=multiple_dim_array, multiple_dim_array=multiple_dim_array,
multiple_vector_field_name=vector_name_list, multiple_vector_field_name=vector_name_list,
auto_id=auto_id, primary_field=primary_field, auto_id=auto_id, primary_field=primary_field,
nullable_fields=nullable_fields, language=language) nullable_fields=nullable_fields, language=language)
elif vector_data_type == "FLOAT16_VECTOR" or "BFLOAT16_VECTOR": elif vector_data_type == DataType.FLOAT16_VECTOR or vector_data_type == DataType.BFLOAT16_VECTOR:
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json, default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
random_primary_key=random_primary_key, random_primary_key=random_primary_key,
multiple_dim_array=multiple_dim_array, multiple_dim_array=multiple_dim_array,
@ -3173,6 +3170,20 @@ def extract_vector_field_name_list(collection_w):
return vector_name_list return vector_name_list
def get_field_dtype_by_field_name(collection_w, field_name):
"""
get the vector field data type by field name
collection_w : the collection object to be extracted
return: the field data type of the field name
"""
schema_dict = collection_w.schema.to_dict()
fields = schema_dict.get('fields')
for field in fields:
if field['name'] == field_name:
return field['type']
return None
def get_activate_func_from_metric_type(metric_type): def get_activate_func_from_metric_type(metric_type):
activate_function = lambda x: x activate_function = lambda x: x
if metric_type == "COSINE": if metric_type == "COSINE":
@ -3307,20 +3318,20 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0):
return vectors return vectors
def gen_vectors_based_on_vector_type(num, dim, vector_data_type=ct.float_type): def gen_vectors_based_on_vector_type(num, dim, vector_data_type=DataType.FLOAT_VECTOR):
""" """
generate float16 vector data generate float16 vector data
raw_vectors : the vectors raw_vectors : the vectors
fp16_vectors: the bytes used for insert fp16_vectors: the bytes used for insert
return: raw_vectors and fp16_vectors return: raw_vectors and fp16_vectors
""" """
if vector_data_type == ct.float_type: if vector_data_type == DataType.FLOAT_VECTOR:
vectors = [[random.random() for _ in range(dim)] for _ in range(num)] vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
elif vector_data_type == ct.float16_type: elif vector_data_type == DataType.FLOAT16_VECTOR:
vectors = gen_fp16_vectors(num, dim)[1] vectors = gen_fp16_vectors(num, dim)[1]
elif vector_data_type == ct.bfloat16_type: elif vector_data_type == DataType.BFLOAT16_VECTOR:
vectors = gen_bf16_vectors(num, dim)[1] vectors = gen_bf16_vectors(num, dim)[1]
elif vector_data_type == ct.sparse_vector: elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
vectors = gen_sparse_vectors(num, dim) vectors = gen_sparse_vectors(num, dim)
elif vector_data_type == ct.text_sparse_vector: elif vector_data_type == ct.text_sparse_vector:
vectors = gen_text_vectors(num) vectors = gen_text_vectors(num)

View File

@ -1,4 +1,5 @@
import numpy as np import numpy as np
from pymilvus import DataType
""" Initialized parameters """ """ Initialized parameters """
port = 19530 port = 19530
@ -44,14 +45,10 @@ default_float16_vec_field_name = "float16_vector"
default_bfloat16_vec_field_name = "bfloat16_vector" default_bfloat16_vec_field_name = "bfloat16_vector"
another_float_vec_field_name = "float_vector1" another_float_vec_field_name = "float_vector1"
default_binary_vec_field_name = "binary_vector" default_binary_vec_field_name = "binary_vector"
float_type = "FLOAT_VECTOR"
float16_type = "FLOAT16_VECTOR"
bfloat16_type = "BFLOAT16_VECTOR"
sparse_vector = "SPARSE_FLOAT_VECTOR"
text_sparse_vector = "TEXT_SPARSE_VECTOR" text_sparse_vector = "TEXT_SPARSE_VECTOR"
append_vector_type = [float16_type, bfloat16_type, sparse_vector] append_vector_type = [DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, DataType.SPARSE_FLOAT_VECTOR]
all_dense_vector_types = [float_type, float16_type, bfloat16_type] all_dense_vector_types = [DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR]
all_vector_data_types = [float_type, float16_type, bfloat16_type, sparse_vector] all_float_vector_dtypes = [DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, DataType.SPARSE_FLOAT_VECTOR]
default_sparse_vec_field_name = "sparse_vector" default_sparse_vec_field_name = "sparse_vector"
default_partition_name = "_default" default_partition_name = "_default"
default_resource_group_name = '__default_resource_group' default_resource_group_name = '__default_resource_group'
@ -246,13 +243,14 @@ default_all_search_params_params = [{}, {"nprobe": 32}, {"nprobe": 32}, {"nprobe
{}, {}] {}, {}]
Handler_type = ["GRPC", "HTTP"] Handler_type = ["GRPC", "HTTP"]
binary_support = ["BIN_FLAT", "BIN_IVF_FLAT"] binary_supported_index_types = ["BIN_FLAT", "BIN_IVF_FLAT"]
sparse_support = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"] sparse_supported_index_types = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"]
gpu_support = ["GPU_IVF_FLAT", "GPU_IVF_PQ"] gpu_supported_index_types = ["GPU_IVF_FLAT", "GPU_IVF_PQ"]
default_L0_metric = "COSINE" default_L0_metric = "COSINE"
float_metrics = ["L2", "IP", "COSINE"] dense_metrics = ["L2", "IP", "COSINE"]
binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"] binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"]
structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"] structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"]
sparse_metrics = ["IP", "BM25"]
all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar'] all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar']

View File

@ -665,7 +665,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
""" """
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("metric_type", ct.float_metrics) @pytest.mark.parametrize("metric_type", ct.dense_metrics)
def test_milvus_client_search_iterator_default(self, metric_type): def test_milvus_client_search_iterator_default(self, metric_type):
""" """
target: test search iterator (high level api) normal case target: test search iterator (high level api) normal case
@ -892,7 +892,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
pass pass
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("metric_type", ct.float_metrics) @pytest.mark.parametrize("metric_type", ct.dense_metrics)
@pytest.mark.parametrize("enable_dynamic_field", [True, False]) @pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_search_iterator_after_json_path_index(self, metric_type, enable_dynamic_field, def test_milvus_client_search_iterator_after_json_path_index(self, metric_type, enable_dynamic_field,
supported_json_cast_type, supported_json_cast_type,

View File

@ -124,7 +124,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
def random_primary_key(self, request): def random_primary_key(self, request):
yield request.param yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request): def vector_data_type(self, request):
yield request.param yield request.param
@ -242,7 +242,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
# 3. prepare search params # 3. prepare search params
req_list = [] req_list = []
weights = [1] weights = [1]
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
# 4. get hybrid search req list # 4. get hybrid search req list
for i in range(len(vector_name_list)): for i in range(len(vector_name_list)):
search_param = { search_param = {
@ -276,7 +276,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
# 3. prepare search params # 3. prepare search params
req_list = [] req_list = []
weights = [1] weights = [1]
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
# 4. get hybrid search req list # 4. get hybrid search req list
for i in range(len(vector_name_list)): for i in range(len(vector_name_list)):
search_param = { search_param = {
@ -311,7 +311,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
# 3. prepare search params # 3. prepare search params
req_list = [] req_list = []
weights = [1] weights = [1]
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
# 4. get hybrid search req list # 4. get hybrid search req list
for i in range(len(vector_name_list)): for i in range(len(vector_name_list)):
search_param = { search_param = {
@ -344,7 +344,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
vector_name_list = cf.extract_vector_field_name_list(collection_w) vector_name_list = cf.extract_vector_field_name_list(collection_w)
vector_name_list.append(ct.default_float_vec_field_name) vector_name_list.append(ct.default_float_vec_field_name)
# 3. prepare search params # 3. prepare search params
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
# get hybrid search req list # get hybrid search req list
search_param = { search_param = {
@ -1757,7 +1757,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
metrics = [] metrics = []
search_res_dict_array = [] search_res_dict_array = []
search_res_dict_array_nq = [] search_res_dict_array_nq = []
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR") vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
# get hybrid search req list # get hybrid search req list
for i in range(len(vector_name_list)): for i in range(len(vector_name_list)):
@ -2014,7 +2014,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
# 1. init collection # 1. init collection
collection_w, insert_vectors, _, insert_ids = \ collection_w, insert_vectors, _, insert_ids = \
self.init_collection_general(prefix, True, nb=nb, multiple_dim_array=[dim, dim * 2], self.init_collection_general(prefix, True, nb=nb, multiple_dim_array=[dim, dim * 2],
with_json=False, vector_data_type="SPARSE_FLOAT_VECTOR")[0:4] with_json=False, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0:4]
# 2. extract vector field name # 2. extract vector field name
vector_name_list = cf.extract_vector_field_name_list(collection_w) vector_name_list = cf.extract_vector_field_name_list(collection_w)
# 3. prepare search params # 3. prepare search params

View File

@ -92,7 +92,7 @@ class TestCollectionRangeSearch(TestcaseBase):
pytest.skip(f"skip index type {request.param}") pytest.skip(f"skip index type {request.param}")
yield request.param yield request.param
@pytest.fixture(scope="function", params=ct.float_metrics) @pytest.fixture(scope="function", params=ct.dense_metrics)
def metric(self, request): def metric(self, request):
tags = request.config.getoption("--tags") tags = request.config.getoption("--tags")
if CaseLabel.L2 not in tags: if CaseLabel.L2 not in tags:
@ -1574,7 +1574,7 @@ class TestCollectionRangeSearch(TestcaseBase):
# 1. initialize with data # 1. initialize with data
collection_w = self.init_collection_general(prefix, True, nb=5000, collection_w = self.init_collection_general(prefix, True, nb=5000,
with_json=True, with_json=True,
vector_data_type=ct.sparse_vector)[0] vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0]
range_filter = random.uniform(0.5, 1) range_filter = random.uniform(0.5, 1)
radius = random.uniform(0, 0.5) radius = random.uniform(0, 0.5)

View File

@ -1,102 +0,0 @@
import numpy as np
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
from pymilvus import AnnSearchRequest, RRFRanker, WeightedRanker
from pymilvus import (
FieldSchema, CollectionSchema, DataType,
Collection
)
from common.constants import *
from utils.util_pymilvus import *
from common.common_type import CaseLabel, CheckTasks
from common import common_type as ct
from common import common_func as cf
from utils.util_log import test_log as log
from base.client_base import TestcaseBase
import heapq
from time import sleep
from decimal import Decimal, getcontext
import decimal
import multiprocessing
import numbers
import random
import math
import numpy
import threading
import pytest
import pandas as pd
from faker import Faker
Faker.seed(19530)
fake_en = Faker("en_US")
fake_zh = Faker("zh_CN")
# patch faker to generate text with specific distribution
cf.patch_faker_text(fake_en, cf.en_vocabularies_distribution)
cf.patch_faker_text(fake_zh, cf.zh_vocabularies_distribution)
pd.set_option("expand_frame_repr", False)
prefix = "search_collection"
search_num = 10
max_dim = ct.max_dim
min_dim = ct.min_dim
epsilon = ct.epsilon
hybrid_search_epsilon = 0.01
gracefulTime = ct.gracefulTime
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
max_limit = ct.max_limit
default_search_exp = "int64 >= 0"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_int64_field_name = ct.default_int64_field_name
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_json_field_name = ct.default_json_field_name
default_index_params = ct.default_index
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
range_search_supported_indexes = ct.all_index_types[:7]
uid = "test_search"
nq = 1
epsilon = 0.001
field_name = default_float_vec_field_name
binary_field_name = default_binary_vec_field_name
search_param = {"nprobe": 1}
entity = gen_entities(1, is_normal=True)
entities = gen_entities(default_nb, is_normal=True)
raw_vectors, binary_entities = gen_binary_entities(default_nb)
default_query, _ = gen_search_vectors_params(field_name, entities, default_top_k, nq)
index_name1 = cf.gen_unique_str("float")
index_name2 = cf.gen_unique_str("varhar")
half_nb = ct.default_nb // 2
max_hybrid_search_req_num = ct.max_hybrid_search_req_num
class TestSearchDSL(TestcaseBase):
@pytest.mark.tags(CaseLabel.L0)
def test_search_vector_only(self):
"""
target: test search normal scenario
method: search vector only
expected: search status ok, the length of result
"""
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, ct.default_nb)[0:5]
vectors = [[random.random() for _ in range(ct.default_dim)]
for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, ct.default_top_k,
default_search_exp,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": ct.default_top_k})

View File

@ -110,7 +110,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
def enable_dynamic_field(self, request): def enable_dynamic_field(self, request):
yield request.param yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request): def vector_data_type(self, request):
yield request.param yield request.param

View File

@ -85,8 +85,8 @@ class TestSearchIterator(TestcaseBase):
""" Test case of search iterator """ """ Test case of search iterator """
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("metric_type", ct.float_metrics) @pytest.mark.parametrize("metric_type", ct.dense_metrics)
@pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types)
def test_range_search_iterator_default(self, metric_type, vector_data_type): def test_range_search_iterator_default(self, metric_type, vector_data_type):
""" """
target: test iterator range search target: test iterator range search
@ -151,7 +151,7 @@ class TestSearchIterator(TestcaseBase):
check_items={"batch_size": batch_size}) check_items={"batch_size": batch_size})
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metrics", ct.float_metrics) @pytest.mark.parametrize("metrics", ct.dense_metrics)
def test_search_iterator_with_expression(self, metrics): def test_search_iterator_with_expression(self, metrics):
""" """
target: test search iterator normal target: test search iterator normal

View File

@ -124,7 +124,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
def random_primary_key(self, request): def random_primary_key(self, request):
yield request.param yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request): def vector_data_type(self, request):
yield request.param yield request.param
@ -280,7 +280,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
default_value_fields={ default_value_fields={
ct.default_float_field_name: np.float32(10.0)})[0] ct.default_float_field_name: np.float32(10.0)})[0]
# 2. generate search data # 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, "FLOAT_VECTOR") vectors = cf.gen_vectors(default_nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
# 3. search after insert # 3. search after insert
collection_w.search(vectors[:default_nq], default_search_field, collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit, default_search_params, default_limit,
@ -479,7 +479,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
collection_w.load() collection_w.load()
# 2. search iterator # 2. search iterator
search_params = {"metric_type": "L2"} search_params = {"metric_type": "L2"}
vectors = cf.gen_vectors_based_on_vector_type(1, dim, "FLOAT_VECTOR") vectors = cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator, check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": batch_size}) check_items={"batch_size": batch_size})

View File

@ -62,6 +62,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
def setup_class(self): def setup_class(self):
super().setup_class(self) super().setup_class(self)
self.collection_name = "TestMilvusClientSearchPagination" + cf.gen_unique_str("_") self.collection_name = "TestMilvusClientSearchPagination" + cf.gen_unique_str("_")
self.partition_names = ["partition_1", "partition_2"]
self.float_vector_field_name = "float_vector" self.float_vector_field_name = "float_vector"
self.bfloat16_vector_field_name = "bfloat16_vector" self.bfloat16_vector_field_name = "bfloat16_vector"
self.sparse_vector_field_name = "sparse_vector" self.sparse_vector_field_name = "sparse_vector"
@ -92,32 +93,60 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
collection_schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=256) collection_schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=256)
collection_schema.add_field(default_int64_field_name, DataType.INT64) collection_schema.add_field(default_int64_field_name, DataType.INT64)
self.create_collection(client, self.collection_name, schema=collection_schema, force_teardown=False) self.create_collection(client, self.collection_name, schema=collection_schema, force_teardown=False)
for partition_name in self.partition_names:
self.create_partition(client, self.collection_name, partition_name=partition_name)
# Define number of insert iterations # Define number of insert iterations
insert_times = 10 insert_times = 10
# Generate vectors for each type and store in self # Generate vectors for each type and store in self
float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim, vector_data_type='FLOAT_VECTOR') float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim,
bfloat16_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.bf16_vector_dim, vector_data_type='BFLOAT16_VECTOR') vector_data_type=DataType.FLOAT_VECTOR)
bfloat16_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.bf16_vector_dim,
vector_data_type=DataType.BFLOAT16_VECTOR)
sparse_vectors = cf.gen_sparse_vectors(default_nb * insert_times, empty_percentage=2) sparse_vectors = cf.gen_sparse_vectors(default_nb * insert_times, empty_percentage=2)
_, binary_vectors = cf.gen_binary_vectors(default_nb * insert_times, dim=self.binary_vector_dim) _, binary_vectors = cf.gen_binary_vectors(default_nb * insert_times, dim=self.binary_vector_dim)
# Insert data multiple times with non-duplicated primary keys # Insert data multiple times with non-duplicated primary keys
for j in range(insert_times): for j in range(insert_times):
rows = [{ # Group rows by partition based on primary key mod 3
default_primary_key_field_name: i + j * default_nb, default_rows = []
self.float_vector_field_name: list(float_vectors[i + j * default_nb]), partition1_rows = []
self.bfloat16_vector_field_name: bfloat16_vectors[i + j * default_nb], partition2_rows = []
self.sparse_vector_field_name: sparse_vectors[i + j * default_nb],
self.binary_vector_field_name: binary_vectors[i + j * default_nb], for i in range(default_nb):
default_float_field_name: (i + j * default_nb) * 1.0, pk = i + j * default_nb
default_string_field_name: str(i + j * default_nb), row = {
default_int64_field_name: i + j * default_nb default_primary_key_field_name: pk,
self.float_vector_field_name: list(float_vectors[pk]),
self.bfloat16_vector_field_name: bfloat16_vectors[pk],
self.sparse_vector_field_name: sparse_vectors[pk],
self.binary_vector_field_name: binary_vectors[pk],
default_float_field_name: pk * 1.0,
default_string_field_name: str(pk),
default_int64_field_name: pk
} }
for i in range(default_nb)] self.datas.append(row)
self.datas.extend(rows)
# Distribute to partitions based on pk mod 3
if pk % 3 == 0:
default_rows.append(row)
elif pk % 3 == 1:
partition1_rows.append(row)
else:
partition2_rows.append(row)
# Insert into respective partitions
if default_rows:
self.insert(client, self.collection_name, data=default_rows)
if partition1_rows:
self.insert(client, self.collection_name, data=partition1_rows, partition_name=self.partition_names[0])
if partition2_rows:
self.insert(client, self.collection_name, data=partition2_rows, partition_name=self.partition_names[1])
# Track all inserted data and primary keys
self.primary_keys.extend([i + j * default_nb for i in range(default_nb)]) self.primary_keys.extend([i + j * default_nb for i in range(default_nb)])
self.insert(client, self.collection_name, data=rows)
self.flush(client, self.collection_name) self.flush(client, self.collection_name)
# Create index # Create index
@ -165,7 +194,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
# 2. Search with pagination for 10 pages # 2. Search with pagination for 10 pages
limit = 100 limit = 100
pages = 10 pages = 10
vectors_to_search = cf.gen_vectors(default_nq, default_dim) vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
all_pages_results = [] all_pages_results = []
for page in range(pages): for page in range(pages):
offset = page * limit offset = page * limit
@ -224,7 +253,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
# 2. Search with pagination for 10 pages # 2. Search with pagination for 10 pages
limit = 100 limit = 100
pages = 10 pages = 10
vectors_to_search = cf.gen_vectors(default_nq, self.bf16_vector_dim, vector_data_type='BFLOAT16_VECTOR') vectors_to_search = cf.gen_vectors(default_nq, self.bf16_vector_dim, vector_data_type=DataType.BFLOAT16_VECTOR)
all_pages_results = [] all_pages_results = []
for page in range(pages): for page in range(pages):
offset = page * limit offset = page * limit
@ -374,10 +403,12 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
for i in range(default_nq): for i in range(default_nq):
page_ids = [page_res[i][j].get('id') for j in range(limit)] page_ids = [page_res[i][j].get('id') for j in range(limit)]
ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)] ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)]
# Calculate percentage of matching items
matching_items = sum(1 for x, y in zip(page_ids, ids_in_full) if x == y) # Calculate intersection between paginated results and baseline full results
match_percentage = (matching_items / len(page_ids)) * 100 common_ids = set(page_ids) & set(ids_in_full)
assert match_percentage >= 80, f"Only {match_percentage}% items matched, expected >= 80%" # Calculate overlap ratio using full results as baseline
overlap_ratio = len(common_ids) / len(ids_in_full) * 100
assert overlap_ratio >= 80, f"Only {overlap_ratio}% overlap with baseline results, expected >= 80%"
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("limit", [100, 3000, 10000]) @pytest.mark.parametrize("limit", [100, 3000, 10000])
@ -399,8 +430,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
topK=16384 topK=16384
offset = topK - limit offset = topK - limit
search_param = {"nprobe": 10, "offset": offset} search_param = {"nprobe": 10, "offset": offset}
vectors_to_search = [[random.random() for _ in range(default_dim)] vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
for _ in range(default_nq)]
client.search(collection_name, vectors_to_search[:default_nq], anns_field=self.float_vector_field_name, client.search(collection_name, vectors_to_search[:default_nq], anns_field=self.float_vector_field_name,
search_params=search_param, limit=limit, check_task=CheckTasks.check_search_results, search_params=search_param, limit=limit, check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True, check_items={"enable_milvus_client_api": True,
@ -438,6 +468,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
limit = 0 limit = 0
elif len(filter_ids) - offset < default_limit: elif len(filter_ids) - offset < default_limit:
limit = len(filter_ids) - offset limit = len(filter_ids) - offset
# 3. search with a high nprobe for better accuracy
search_params = {"metric_type": "COSINE", "params": {"nprobe": 128}, "offset": offset} search_params = {"metric_type": "COSINE", "params": {"nprobe": 128}, "offset": offset}
vectors_to_search = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] vectors_to_search = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
search_res_with_offset, _ = self.search( search_res_with_offset, _ = self.search(
@ -454,7 +485,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
"limit": limit} "limit": limit}
) )
# 3. search with offset+limit # 4. search with offset+limit
search_params_full = {"metric_type": "COSINE", "params": {"nprobe": 128}} search_params_full = {"metric_type": "COSINE", "params": {"nprobe": 128}}
search_res_full, _ = self.search( search_res_full, _ = self.search(
client, client,
@ -466,7 +497,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
filter=expr filter=expr
) )
# 4. Compare results # 5. Compare results
filter_ids_set = set(filter_ids) filter_ids_set = set(filter_ids)
for hits in search_res_with_offset: for hits in search_res_with_offset:
ids = [hit.get('id') for hit in hits] ids = [hit.get('id') for hit in hits]
@ -477,7 +508,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)] ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)]
assert page_ids == ids_in_full assert page_ids == ids_in_full
# 5. search again with expression template # 6. search again with expression template
expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or") expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or")
expr_params = cf.get_expr_params_from_template(expressions[1]) expr_params = cf.get_expr_params_from_template(expressions[1])
search_res_with_offset, _ = self.search( search_res_with_offset, _ = self.search(
@ -495,7 +526,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
"limit": limit} "limit": limit}
) )
# 6. search with offset+limit # 7. search with offset+limit
search_res_full, _ = self.search( search_res_full, _ = self.search(
client, client,
collection_name, collection_name,
@ -507,7 +538,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
filter_params=expr_params filter_params=expr_params
) )
# Compare results # 8. Compare results
filter_ids_set = set(filter_ids) filter_ids_set = set(filter_ids)
for hits in search_res_with_offset: for hits in search_res_with_offset:
ids = [hit.get('id') for hit in hits] ids = [hit.get('id') for hit in hits]
@ -518,348 +549,466 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)] ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)]
assert page_ids == ids_in_full assert page_ids == ids_in_full
@pytest.mark.tags(CaseLabel.L1)
class TestSearchPagination(TestcaseBase): def test_search_pagination_in_partitions(self):
""" Test case of search pagination """
@pytest.fixture(scope="function", params=[0, 10, 100])
def offset(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def _async(self, request):
yield request.param
@pytest.fixture(scope="function", params=[True, False])
def enable_dynamic_field(self, request):
yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
def vector_data_type(self, request):
yield request.param
""" """
****************************************************************** target: test search pagination in partitions
# The following are valid base cases method: 1. create collection and insert data
****************************************************************** 2. search with pagination in partitions
3. compare with the search results whose corresponding ids should be the same
""" """
client = self._client()
@pytest.mark.tags(CaseLabel.L2) collection_name = self.collection_name
def test_search_pagination_with_index_partition(self, offset, _async): vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
""" # search with pagination in partition_1
target: test search pagination with index and partition limit = 50
method: create connection, collection, insert data, create index and search pages = 10
expected: searched successfully for page in range(pages):
""" offset = page * limit
# 1. initialize with data
auto_id = False
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
partition_num=1,
auto_id=auto_id,
is_index=False)[0:4]
vectors = [[random.random() for _ in range(default_dim)]
for _ in range(default_nq)]
# 2. create index
default_index = {"index_type": "IVF_FLAT",
"params": {"nlist": 128}, "metric_type": "L2"}
collection_w.create_index("float_vector", default_index)
collection_w.load()
# 3. search through partitions
par = collection_w.partitions
limit = 100
search_params = {"metric_type": "L2",
"params": {"nprobe": 10}, "offset": offset}
search_res = collection_w.search(vectors[:default_nq], default_search_field,
search_params, limit, default_search_exp,
[par[0].name, par[1].name], _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async})[0]
# 3. search through partitions with offset+limit
search_params = {"metric_type": "L2"}
res = collection_w.search(vectors[:default_nq], default_search_field, search_params,
limit + offset, default_search_exp,
[par[0].name, par[1].name], _async=_async)[0]
if _async:
search_res.done()
search_res = search_res.result()
res.done()
res = res.result()
res_distance = res[0].distances[offset:]
# assert cf.sort_search_distance(search_res[0].distances) == cf.sort_search_distance(res_distance)
assert set(search_res[0].ids) == set(res[0].ids[offset:])
@pytest.mark.tags(CaseLabel.L2)
def test_search_pagination_with_inserted_data(self, offset, _async):
"""
target: test search pagination with inserted data
method: create connection, collection, insert data and search
check the results by searching with limit+offset
expected: searched successfully
"""
# 1. create collection
collection_w = self.init_collection_general(
prefix, False, dim=default_dim)[0]
# 2. insert data
data = cf.gen_default_dataframe_data(dim=default_dim)
collection_w.insert(data)
collection_w.load()
# 3. search
search_params = {"offset": offset} search_params = {"offset": offset}
search_res = collection_w.search(vectors[:default_nq], default_search_field, search_res_with_offset, _ = self.search(
search_params, default_limit, client,
default_search_exp, _async=_async, collection_name,
vectors_to_search[:default_nq],
partition_names=[self.partition_names[0]],
anns_field=self.float_vector_field_name,
search_params=search_params,
limit=limit,
check_task=CheckTasks.check_search_results, check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq, check_items={"enable_milvus_client_api": True,
"limit": default_limit, "nq": default_nq, "limit": limit})
"_async": _async})[0]
# 4. search through partitions with offset+limit
search_params = {}
res = collection_w.search(vectors[:default_nq], default_search_field, search_params,
default_limit + offset, default_search_exp, _async=_async)[0]
if _async:
search_res.done()
search_res = search_res.result()
res.done()
res = res.result()
res_distance = res[0].distances[offset:]
assert sorted(search_res[0].distances) == sorted(res_distance)
assert set(search_res[0].ids) == set(res[0].ids[offset:])
@pytest.mark.tags(CaseLabel.L2) # assert every id in search_res_with_offset %3 ==1
def test_search_pagination_empty(self, offset, _async): for hits in search_res_with_offset:
""" for hit in hits:
target: test search pagination empty assert hit.get('id') % 3 == 1
method: connect, create collection, insert data and search
expected: search successfully # search with pagination in partition_1 and partition_2
""" for page in range(pages):
# 1. initialize without data offset = page * limit
auto_id = False search_params = {"offset": offset}
collection_w = self.init_collection_general( search_res_with_offset, _ = self.search(
prefix, True, auto_id=auto_id, dim=default_dim)[0] client,
# 2. search collection without data collection_name,
search_param = {"metric_type": "COSINE", vectors_to_search[:default_nq],
"params": {"nprobe": 10}, "offset": offset} partition_names=self.partition_names,
search_res = collection_w.search([], default_search_field, search_param, anns_field=self.float_vector_field_name,
default_limit, default_search_exp, _async=_async, search_params=search_params,
limit=limit,
check_task=CheckTasks.check_search_results, check_task=CheckTasks.check_search_results,
check_items={"nq": 0, check_items={"enable_milvus_client_api": True,
"_async": _async})[0] "nq": default_nq, "limit": limit})
if _async:
search_res.done()
search_res = search_res.result()
assert len(search_res) == 0
@pytest.mark.tags(CaseLabel.L2) # assert every id in search_res_with_offset %3 ==1 or ==2
@pytest.mark.parametrize("offset", [3000, 5000]) for hits in search_res_with_offset:
def test_search_pagination_with_offset_over_num_entities(self, offset): for hit in hits:
""" assert hit.get('id') % 3 == 1 or hit.get('id') % 3 == 2
target: test search pagination with offset over num_entities
method: create connection, collection, insert 3000 entities and search with offset over 3000
expected: return an empty list
"""
# 1. initialize
collection_w = self.init_collection_general(
prefix, True, dim=default_dim)[0]
# 2. search
search_param = {"metric_type": "COSINE",
"params": {"nprobe": 10}, "offset": offset}
vectors = [[random.random() for _ in range(default_dim)]
for _ in range(default_nq)]
res = collection_w.search(vectors[:default_nq], default_search_field,
search_param, default_limit,
default_search_exp,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": 0})[0]
assert res[0].ids == []
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index", ct.all_index_types[:7])
def test_search_pagination_after_different_index(self, index, offset, _async):
"""
target: test search pagination after different index
method: test search pagination after different index and corresponding search params
expected: search successfully
"""
# 1. initialize with data
dim = 128
auto_id = True
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 1000,
partition_num=1,
auto_id=auto_id,
dim=dim, is_index=False)[0:5]
# 2. create index and load
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index("float_vector", default_index)
collection_w.load()
# 3. search
search_params = cf.gen_search_param(index)
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
for search_param in search_params:
res = collection_w.search(vectors[:default_nq], default_search_field, search_param,
default_limit + offset, default_search_exp, _async=_async)[0]
search_param["offset"] = offset
log.info("Searching with search params: {}".format(search_param))
search_res = collection_w.search(vectors[:default_nq], default_search_field,
search_param, default_limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async})[0]
if _async:
search_res.done()
search_res = search_res.result()
res.done()
res = res.result()
res_distance = res[0].distances[offset:]
# assert sorted(search_res[0].distances, key=numpy.float32) == sorted(res_distance, key=numpy.float32)
assert set(search_res[0].ids) == set(res[0].ids[offset:])
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("offset", [100, default_nb // 2])
def test_search_offset_different_position(self, offset):
"""
target: test search pagination with offset in different position
method: create connection, collection, insert entities and search with offset
expected: search successfully
"""
# 1. initialize
collection_w = self.init_collection_general(prefix, True)[0]
# 2. search with offset in params
search_params = {"metric_type": "COSINE",
"params": {"nprobe": 10}, "offset": offset}
res1 = collection_w.search(vectors[:default_nq], default_search_field,
search_params, default_limit)[0]
# 3. search with offset outside params
res2 = collection_w.search(vectors[:default_nq], default_search_field, default_search_params,
default_limit, offset=offset)[0]
assert res1[0].ids == res2[0].ids
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("offset", [1, 5, 20])
def test_search_sparse_with_pagination(self, offset):
"""
target: test search sparse with pagination
method: 1. connect and create a collection
2. search pagination with offset
3. search with offset+limit
4. compare with the search results whose corresponding ids should be the same
expected: search successfully and ids is correct
"""
# 1. create a collection
auto_id = False
collection_w, _, _, insert_ids = \
self.init_collection_general(
prefix, True, auto_id=auto_id, vector_data_type=ct.sparse_vector)[0:4]
# 2. search with offset+limit
search_param = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}, "offset": offset}
search_vectors = cf.gen_default_list_sparse_data()[-1][-2:]
search_res = collection_w.search(search_vectors, ct.default_sparse_vec_field_name,
search_param, default_limit)[0]
# 3. search
_search_param = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}}
res = collection_w.search(search_vectors[:default_nq], ct.default_sparse_vec_field_name, _search_param,
default_limit + offset)[0]
assert len(search_res[0].ids) == len(res[0].ids[offset:])
assert sorted(search_res[0].distances, key=np.float32) == sorted(
res[0].distances[offset:], key=np.float32)
class TestSearchPaginationInvalid(TestMilvusClientV2Base):
""" Test case of search pagination """
"""
******************************************************************
# The following are invalid cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_search_pagination_with_invalid_offset_type(self): def test_search_pagination_with_different_offset(self):
"""
target: test search pagination with different offset
method: 1. create collection and insert data
2. search with different offset, including offset > limit, offset = 0
3. compare with the search results whose corresponding ids should be the same
"""
client = self._client()
collection_name = self.collection_name
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
# search with offset > limit
offset = default_limit + 10
search_params = {"offset": offset}
self.search(client, collection_name, vectors_to_search[:default_nq],
anns_field=self.float_vector_field_name,
search_params=search_params, limit=default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": default_nq, "limit": default_limit})
# search with offset = 0
offset = 0
search_params = {"offset": offset}
self.search(client, collection_name, vectors_to_search[:default_nq],
anns_field=self.float_vector_field_name,
search_params=search_params, limit=default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": default_nq, "limit": default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("offset", [0, 20, 100, 200])
def test_search_offset_different_position(self, offset):
"""
target: test search offset param in different position
method: create connection, collection, insert data, search with offset in different position
expected: search successfully
"""
client = self._client()
collection_name = self.collection_name
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
# 1. search with offset in search_params
limit = 100
search_params = {"offset": offset}
res1, _ = self.search(client, collection_name, vectors_to_search[:default_nq],
anns_field=self.float_vector_field_name,
search_params=search_params,
limit=limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": default_nq, "limit": limit})
# 2. search with offset in search
search_params = {}
res2, _ = self.search(client, collection_name, vectors_to_search[:default_nq],
anns_field=self.float_vector_field_name,
search_params=search_params,
offset=offset,
limit=limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": default_nq, "limit": limit})
# 3. compare results
assert res1 == res2
@pytest.mark.tags(CaseLabel.L2)
def test_search_pagination_empty_list(self):
"""
target: test search pagination with empty list of vectors
method: create connection, collection, insert data, search with offset
expected: search successfully
"""
client = self._client()
collection_name = self.collection_name
vectors_to_search = []
offset = 10
limit = 100
search_params = {"offset": offset}
error ={"err_code": 1, "err_msg": "list index out of range"}
self.search(client, collection_name, vectors_to_search,
anns_field=self.float_vector_field_name,
search_params=search_params,
limit=limit,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("offset", [" ", 1.0, [1, 2], {1}, "12 s"])
def test_search_pagination_with_invalid_offset_type(self, offset):
""" """
target: test search pagination with invalid offset type target: test search pagination with invalid offset type
method: create connection, collection, insert and search with invalid offset type method: create connection, collection, insert and search with invalid offset type
expected: raise exception expected: raise exception
""" """
client = self._client() client = self._client()
collection_name = self.collection_name
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
# 1. Create collection with schema search_params = {"offset": offset}
collection_name = cf.gen_collection_name_by_testcase_name() error = {"err_code": 1, "err_msg": "wrong type for offset, expect int"}
self.create_collection(client, collection_name, default_dim)
# Insert data
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# Search with invalid offset types
vectors_to_search = cf.gen_vectors(default_nq, default_dim)
invalid_offsets = [" ", [1, 2], {1}, "12 s"]
for offset in invalid_offsets:
log.debug(f"assert search error if offset={offset}")
search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}, "offset": offset}
self.search( self.search(
client, client,
collection_name, collection_name,
vectors_to_search[:default_nq], vectors_to_search[:default_nq],
anns_field=default_vector_field_name, anns_field=self.float_vector_field_name,
search_params=search_params, search_params=search_params,
limit=default_limit, limit=default_limit,
check_task=CheckTasks.err_res, check_task=CheckTasks.err_res,
check_items={ check_items=error)
"err_code": 1,
"err_msg": "wrong type for offset, expect int"
}
)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_search_pagination_with_invalid_offset_value(self): @pytest.mark.parametrize("offset", [-1, 16385])
def test_search_pagination_with_invalid_offset_value(self, offset):
""" """
target: test search pagination with invalid offset value target: test search pagination with invalid offset value
method: create connection, collection, insert and search with invalid offset value method: create connection, collection, insert and search with invalid offset value
expected: raise exception expected: raise exception
""" """
client = self._client() client = self._client()
collection_name = self.collection_name
# 1. Create collection with schema vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
collection_name = cf.gen_collection_name_by_testcase_name() search_params = {"offset": offset}
self.create_collection(client, collection_name, default_dim) error = {"err_code": 1, "err_msg": f"offset [{offset}] is invalid, it should be in range [1, 16384]"}
# Insert data
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# Search with invalid offset values
vectors_to_search = cf.gen_vectors(default_nq, default_dim)
invalid_offsets = [-1, 16385]
for offset in invalid_offsets:
log.debug(f"assert search error if offset={offset}")
search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}, "offset": offset}
self.search( self.search(
client,
collection_name,
vectors_to_search[:default_nq],
anns_field=self.float_vector_field_name,
search_params=search_params,
limit=default_limit,
check_task=CheckTasks.err_res,
check_items=error
)
class TestSearchPaginationIndependent(TestMilvusClientV2Base):
""" Test case of search pagination with independent collection """
def do_search_pagination_and_assert(self, client, collection_name,
limit=10, pages=10,
dim=default_dim,
vector_dtype=DataType.FLOAT_VECTOR,
index=ct.L0_index_types[0],
metric_type=ct.default_L0_metric,
expected_overlap_ratio=80):
# 2. Search with pagination for 5 pages
vectors_to_search = cf.gen_vectors(default_nq, dim, vector_data_type=vector_dtype)
all_pages_results = []
for page in range(pages):
offset = page * limit
search_params = {"offset": offset}
search_res_with_offset, _ = self.search(
client, client,
collection_name, collection_name,
vectors_to_search[:default_nq], vectors_to_search[:default_nq],
anns_field=default_vector_field_name, anns_field=default_vector_field_name,
search_params=search_params, search_params=search_params,
limit=default_limit, limit=limit,
check_task=CheckTasks.err_res, check_task=CheckTasks.check_search_results,
check_items={ check_items={"enable_milvus_client_api": True,
"err_code": 1, "nq": default_nq,
"err_msg": f"offset [{offset}] is invalid, it should be in range [1, 16384]" "limit": limit,
"metric": metric_type,
} }
) )
all_pages_results.append(search_res_with_offset)
# 3. Search without pagination
search_params_full = {}
search_res_full, _ = self.search(
client,
collection_name,
vectors_to_search[:default_nq],
anns_field=default_vector_field_name,
search_params=search_params_full,
limit=limit * pages
)
# 4. Compare results - verify pagination results equal the results in full search with offsets
for p in range(pages):
page_res = all_pages_results[p]
for i in range(default_nq):
page_ids = [page_res[i][j].get('id') for j in range(limit)]
ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)]
# Calculate intersection between paginated results and baseline full results
common_ids = set(page_ids) & set(ids_in_full)
# Calculate overlap ratio using full results as baseline
overlap_ratio = len(common_ids) / len(ids_in_full) * 100
log.debug(
f"range search {vector_dtype.name} {index} {metric_type} results overlap {overlap_ratio}")
assert overlap_ratio >= expected_overlap_ratio, \
f"Only {overlap_ratio}% overlap with baseline results, expected >= {expected_overlap_ratio}%"
"""
******************************************************************
# The following are invalid cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize('vector_dtype', ct.all_dense_vector_types)
@pytest.mark.parametrize('index', ct.all_index_types[:7])
@pytest.mark.parametrize('metric_type', ct.dense_metrics)
def test_search_pagination_dense_vectors_indices_metrics_growing(self, vector_dtype, index, metric_type):
"""
target: test search pagination with growing data
method: create connection, collection, insert data and search
check the results by searching with limit+offset
expected: searched successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, datatype=vector_dtype, dim=default_dim)
schema.add_field(default_float_field_name, datatype=DataType.FLOAT)
schema.add_field(default_string_field_name, datatype=DataType.VARCHAR, max_length=100)
self.create_collection(client, collection_name, schema=schema)
# Insert data in 3 batches with unique primary keys using a loop
insert_times = 3
random_vectors = list(cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_dtype)
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
default_primary_key_field_name: i + start_pk,
default_vector_field_name: random_vectors[i + start_pk],
default_float_field_name: (i + start_pk) * 1.0,
default_string_field_name: str(i + start_pk)
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# build index
index_params, _ = self.prepare_index_params(client)
index_params.add_index(default_vector_field_name, index_type=index,
metric_type=metric_type,
params=cf.get_index_params_params(index_type=index))
self.create_index(client, collection_name, index_params=index_params)
# load the collection with index
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
self.load_collection(client, collection_name)
# search and assert
limit = 50
pages = 5
expected_overlap_ratio = 20
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
expected_overlap_ratio=expected_overlap_ratio)
# insert additional data without flush
random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)
start_pk = default_nb * insert_times
rows = [{
default_primary_key_field_name: i + start_pk,
default_vector_field_name: random_vectors[i],
default_float_field_name: (i + start_pk) * 1.0,
default_string_field_name: str(i + start_pk)
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# search and assert
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
expected_overlap_ratio=expected_overlap_ratio)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize('index', ct.binary_supported_index_types)
@pytest.mark.parametrize('metric_type', ct.binary_metrics[:2])
def test_search_pagination_binary_index_growing(self, index, metric_type):
"""
target: test search pagination with binary index
method: create connection, collection, insert data, create index and search
expected: searched successfully
"""
vector_dtype = DataType.BINARY_VECTOR
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, datatype=vector_dtype, dim=default_dim)
self.create_collection(client, collection_name, schema=schema)
# Insert data in 3 batches with unique primary keys using a loop
insert_times = 3
random_vectors = list(cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
default_primary_key_field_name: i + start_pk,
default_vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# build index
index_params, _ = self.prepare_index_params(client)
index_params.add_index(default_vector_field_name, index_type=index,
metric_type=metric_type,
params=cf.get_index_params_params(index_type=index))
self.create_index(client, collection_name, index_params=index_params)
# load the collection with index
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
self.load_collection(client, collection_name)
# search and assert
limit = 50
pages = 5
expected_overlap_ratio = 20
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
expected_overlap_ratio=expected_overlap_ratio)
# insert additional data without flush
random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)
start_pk = default_nb * insert_times
rows = [{
default_primary_key_field_name: i + start_pk,
default_vector_field_name: random_vectors[i]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# search and assert
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
expected_overlap_ratio=expected_overlap_ratio)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize('index', ct.sparse_supported_index_types)
@pytest.mark.parametrize('metric_type', ["IP"])
def test_search_pagination_sparse_index_growing(self, index, metric_type):
"""
target: test search pagination with sparse index
method: create connection, collection, insert data, create index and search
expected: searched successfully
"""
vector_dtype = DataType.SPARSE_FLOAT_VECTOR
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, datatype=vector_dtype)
self.create_collection(client, collection_name, schema=schema)
# Insert data in 3 batches with unique primary keys using a loop
insert_times = 3
random_vectors = list(cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
default_primary_key_field_name: i + start_pk,
default_vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# build index
index_params, _ = self.prepare_index_params(client)
index_params.add_index(default_vector_field_name, index_type=index,
metric_type=metric_type,
params=cf.get_index_params_params(index_type=index))
self.create_index(client, collection_name, index_params=index_params)
# load the collection with index
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
self.load_collection(client, collection_name)
# search and assert
limit = 50
pages = 5
expected_overlap_ratio = 20
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
expected_overlap_ratio=expected_overlap_ratio)
# insert additional data without flush
random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)
start_pk = default_nb * insert_times
rows = [{
default_primary_key_field_name: i + start_pk,
default_vector_field_name: random_vectors[i]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# search and assert
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
expected_overlap_ratio=expected_overlap_ratio)

View File

@ -676,7 +676,7 @@ class TestCollectionSearch(TestcaseBase):
def random_primary_key(self, request): def random_primary_key(self, request):
yield request.param yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request): def vector_data_type(self, request):
yield request.param yield request.param
@ -1613,7 +1613,7 @@ class TestCollectionSearch(TestcaseBase):
enable_dynamic_field = False enable_dynamic_field = False
collection_w, _, _, insert_ids, time_stamp = \ collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id,
dim=dim, is_index=False, dim=dim, is_index=False, vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
enable_dynamic_field=enable_dynamic_field)[0:5] enable_dynamic_field=enable_dynamic_field)[0:5]
# 2. create index and load # 2. create index and load
params = cf.get_index_params_params(index) params = cf.get_index_params_params(index)
@ -1624,7 +1624,7 @@ class TestCollectionSearch(TestcaseBase):
if (dim % params["PQM"]) != 0: if (dim % params["PQM"]) != 0:
params["PQM"] = dim // 4 params["PQM"] = dim // 4
default_index = {"index_type": index, "params": params, "metric_type": "L2"} default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index("float_vector", default_index) collection_w.create_index("sparse_vector", default_index)
collection_w.load() collection_w.load()
# 3. search # 3. search
search_params = cf.gen_search_param(index) search_params = cf.gen_search_param(index)
@ -1714,6 +1714,7 @@ class TestCollectionSearch(TestcaseBase):
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000,
partition_num=1, partition_num=1,
auto_id=auto_id, auto_id=auto_id,
vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
dim=min_dim, is_index=False)[0:5] dim=min_dim, is_index=False)[0:5]
# 2. create index and load # 2. create index and load
params = cf.get_index_params_params(index) params = cf.get_index_params_params(index)
@ -1722,7 +1723,7 @@ class TestCollectionSearch(TestcaseBase):
if params.get("PQM"): if params.get("PQM"):
params["PQM"] = min_dim params["PQM"] = min_dim
default_index = {"index_type": index, "params": params, "metric_type": "L2"} default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index("float_vector", default_index) collection_w.create_index("sparse_vector", default_index)
collection_w.load() collection_w.load()
# 3. search # 3. search
search_params = cf.gen_search_param(index) search_params = cf.gen_search_param(index)
@ -1885,6 +1886,7 @@ class TestCollectionSearch(TestcaseBase):
enable_dynamic_field = False enable_dynamic_field = False
collection_w, _, _, insert_ids, time_stamp = \ collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id, self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id,
vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5]
# 2. create different index # 2. create different index
params = cf.get_index_params_params(index) params = cf.get_index_params_params(index)
@ -1896,7 +1898,7 @@ class TestCollectionSearch(TestcaseBase):
params["PQM"] = dim // 4 params["PQM"] = dim // 4
log.info("test_search_after_index_different_metric_type: Creating index-%s" % index) log.info("test_search_after_index_different_metric_type: Creating index-%s" % index)
default_index = {"index_type": index, "params": params, "metric_type": "IP"} default_index = {"index_type": index, "params": params, "metric_type": "IP"}
collection_w.create_index("float_vector", default_index) collection_w.create_index("sparse_vector", default_index)
log.info("test_search_after_index_different_metric_type: Created index-%s" % index) log.info("test_search_after_index_different_metric_type: Created index-%s" % index)
collection_w.load() collection_w.load()
# 3. search # 3. search
@ -2924,10 +2926,10 @@ class TestCollectionSearch(TestcaseBase):
limit = 0 limit = 0
insert_ids = [] insert_ids = []
vector_name_list = cf.extract_vector_field_name_list(collection_w) vector_name_list = cf.extract_vector_field_name_list(collection_w)
for search_field in vector_name_list: for vector_field_name in vector_name_list:
vector_data_type = search_field.lstrip("multiple_vector_") vector_data_type = cf.get_field_dtype_by_field_name(collection_w, vector_field_name)
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) vectors = cf.gen_vectors(nq, dim, vector_data_type)
res = collection_w.search(vectors[:nq], search_field, res = collection_w.search(vectors[:nq], vector_field_name,
default_search_params, default_limit, default_search_params, default_limit,
search_exp, _async=_async, search_exp, _async=_async,
output_fields=[default_int64_field_name, output_fields=[default_int64_field_name,
@ -3213,7 +3215,7 @@ class TestCollectionSearch(TestcaseBase):
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("index", ct.all_index_types[:7]) @pytest.mark.parametrize("index", ct.all_index_types[:7])
@pytest.mark.parametrize("metrics", ct.float_metrics) @pytest.mark.parametrize("metrics", ct.dense_metrics)
@pytest.mark.parametrize("limit", [20, 1200]) @pytest.mark.parametrize("limit", [20, 1200])
def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit): def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit):
""" """

View File

@ -288,7 +288,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base):
# hybrid_search # hybrid_search
search_param = { search_param = {
"data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type="FLOAT_VECTOR"), "data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR),
"anns_field": ct.default_float_vec_field_name, "anns_field": ct.default_float_vec_field_name,
"param": {"metric_type": "COSINE", "params": {"ef": "96"}}, "param": {"metric_type": "COSINE", "params": {"ef": "96"}},
"limit": ct.default_limit, "limit": ct.default_limit,
@ -296,7 +296,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base):
req = AnnSearchRequest(**search_param) req = AnnSearchRequest(**search_param)
search_param2 = { search_param2 = {
"data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type="FLOAT_VECTOR"), "data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR),
"anns_field": default_vector_name, "anns_field": default_vector_name,
"param": {"metric_type": "L2", "params": {"nprobe": "32"}}, "param": {"metric_type": "L2", "params": {"nprobe": "32"}},
"limit": ct.default_limit "limit": ct.default_limit

View File

@ -857,16 +857,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]: for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
vector_data_type = "FLOAT_VECTOR" vector_data_type = DataType.FLOAT_VECTOR
if f == df.float_vec_field: if f == df.float_vec_field:
dim = float_vec_field_dim dim = float_vec_field_dim
vector_data_type = "FLOAT_VECTOR" vector_data_type = DataType.FLOAT_VECTOR
elif f == df.bf16_vec_field: elif f == df.bf16_vec_field:
dim = bf16_vec_field_dim dim = bf16_vec_field_dim
vector_data_type = "BFLOAT16_VECTOR" vector_data_type = DataType.BFLOAT16_VECTOR
else: else:
dim = fp16_vec_field_dim dim = fp16_vec_field_dim
vector_data_type = "FLOAT16_VECTOR" vector_data_type = DataType.FLOAT16_VECTOR
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type) search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
search_params = ct.default_search_params search_params = ct.default_search_params
@ -1043,16 +1043,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]: for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
vector_data_type = "FLOAT_VECTOR" vector_data_type = DataType.FLOAT_VECTOR
if f == df.float_vec_field: if f == df.float_vec_field:
dim = float_vec_field_dim dim = float_vec_field_dim
vector_data_type = "FLOAT_VECTOR" vector_data_type = DataType.FLOAT_VECTOR
elif f == df.bf16_vec_field: elif f == df.bf16_vec_field:
dim = bf16_vec_field_dim dim = bf16_vec_field_dim
vector_data_type = "BFLOAT16_VECTOR" vector_data_type = DataType.BFLOAT16_VECTOR
else: else:
dim = fp16_vec_field_dim dim = fp16_vec_field_dim
vector_data_type = "FLOAT16_VECTOR" vector_data_type = DataType.FLOAT16_VECTOR
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type) search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
search_params = ct.default_search_params search_params = ct.default_search_params
@ -1217,16 +1217,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}") # log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]: for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
vector_data_type = "FLOAT_VECTOR" vector_data_type = DataType.FLOAT_VECTOR
if f == df.float_vec_field: if f == df.float_vec_field:
dim = float_vec_field_dim dim = float_vec_field_dim
vector_data_type = "FLOAT_VECTOR" vector_data_type = DataType.FLOAT_VECTOR
elif f == df.bf16_vec_field: elif f == df.bf16_vec_field:
dim = bf16_vec_field_dim dim = bf16_vec_field_dim
vector_data_type = "BFLOAT16_VECTOR" vector_data_type = DataType.BFLOAT16_VECTOR
else: else:
dim = fp16_vec_field_dim dim = fp16_vec_field_dim
vector_data_type = "FLOAT16_VECTOR" vector_data_type = DataType.FLOAT16_VECTOR
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type) search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
search_params = ct.default_search_params search_params = ct.default_search_params
@ -1616,8 +1616,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None, df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None, df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
df.float_vec_field: cf.gen_vectors(1, dim)[0], df.float_vec_field: cf.gen_vectors(1, dim)[0],
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0], df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0],
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0], df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0],
df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0] df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0]
} }
if auto_id: if auto_id:
@ -1922,8 +1922,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
df.string_field: "string", df.string_field: "string",
df.json_field: json_value[i%len(json_value)], df.json_field: json_value[i%len(json_value)],
df.float_vec_field: cf.gen_vectors(1, dim)[0], df.float_vec_field: cf.gen_vectors(1, dim)[0],
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0], df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT16_VECTOR)[0],
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0], df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0],
} }
if auto_id: if auto_id:
row.pop(df.pk_field) row.pop(df.pk_field)
@ -2064,8 +2064,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None, df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None, df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
df.float_vec_field: cf.gen_vectors(1, dim)[0], df.float_vec_field: cf.gen_vectors(1, dim)[0],
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0], df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT16_VECTOR)[0],
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0], df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0],
df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0] df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0]
} }
if auto_id: if auto_id:
@ -2536,7 +2536,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# verify search # verify search
self.collection_wrap.search( self.collection_wrap.search(
data=cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR.name), data=cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR),
anns_field=df.float_vec_field, param=DefaultVectorSearchParams.IVF_SQ8(), anns_field=df.float_vec_field, param=DefaultVectorSearchParams.IVF_SQ8(),
limit=ct.default_limit, limit=ct.default_limit,
check_task=CheckTasks.check_search_results, check_task=CheckTasks.check_search_results,

View File

@ -50,7 +50,7 @@ vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_
default_search_field = ct.default_float_vec_field_name default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params default_search_params = ct.default_search_params
max_vector_field_num = ct.max_vector_field_num max_vector_field_num = ct.max_vector_field_num
SPARSE_FLOAT_VECTOR_data_type = "SPARSE_FLOAT_VECTOR" SPARSE_FLOAT_VECTOR_data_type = DataType.SPARSE_FLOAT_VECTOR
class TestCollectionParams(TestcaseBase): class TestCollectionParams(TestcaseBase):
@ -1061,7 +1061,7 @@ class TestCollectionParams(TestcaseBase):
# 2. create collection with multiple vectors # 2. create collection with multiple vectors
c_name = cf.gen_unique_str(prefix) c_name = cf.gen_unique_str(prefix)
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(), fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
cf.gen_float_vec_field(vector_data_type=ct.sparse_vector), cf.gen_float_vec_field(name="vec_sparse", vector_data_type=ct.sparse_vector)] cf.gen_float_vec_field(vector_data_type=DataType.FLOAT_VECTOR), cf.gen_float_vec_field(name="vec_sparse", vector_data_type=DataType.SPARSE_FLOAT_VECTOR)]
schema = cf.gen_collection_schema(fields=fields) schema = cf.gen_collection_schema(fields=fields)
self.collection_wrap.init_collection(c_name, schema=schema, self.collection_wrap.init_collection(c_name, schema=schema,
check_task=CheckTasks.check_collection_property, check_task=CheckTasks.check_collection_property,
@ -3260,7 +3260,7 @@ class TestLoadPartition(TestcaseBase):
) )
def get_binary_index(self, request): def get_binary_index(self, request):
log.info(request.param) log.info(request.param)
if request.param["index_type"] in ct.binary_support: if request.param["index_type"] in ct.binary_supported_index_types:
return request.param return request.param
else: else:
pytest.skip("Skip index Temporary") pytest.skip("Skip index Temporary")
@ -4560,7 +4560,7 @@ class TestCollectionNullInvalid(TestcaseBase):
****************************************************************** ******************************************************************
""" """
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types) @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes[:1])
def test_create_collection_set_nullable_on_pk_field(self, vector_type): def test_create_collection_set_nullable_on_pk_field(self, vector_type):
""" """
target: test create collection with set nullable=True on pk field target: test create collection with set nullable=True on pk field
@ -4578,7 +4578,7 @@ class TestCollectionNullInvalid(TestcaseBase):
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types) @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes)
def test_create_collection_set_nullable_on_vector_field(self, vector_type): def test_create_collection_set_nullable_on_vector_field(self, vector_type):
""" """
target: test create collection with set nullable=True on vector field target: test create collection with set nullable=True on vector field
@ -4623,7 +4623,7 @@ class TestCollectionDefaultValueInvalid(TestcaseBase):
****************************************************************** ******************************************************************
""" """
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types) @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes[:1])
def test_create_collection_default_value_on_pk_field(self, vector_type): def test_create_collection_default_value_on_pk_field(self, vector_type):
""" """
target: test create collection with set default value on pk field target: test create collection with set default value on pk field
@ -4641,7 +4641,7 @@ class TestCollectionDefaultValueInvalid(TestcaseBase):
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types) @pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes)
def test_create_collection_default_value_on_vector_field(self, vector_type): def test_create_collection_default_value_on_vector_field(self, vector_type):
""" """
target: test create collection with set default value on vector field target: test create collection with set default value on vector field

View File

@ -147,13 +147,13 @@ class TestFieldPartialLoad(TestcaseBase):
pk_field = cf.gen_int64_field(name='pk', is_primary=True) pk_field = cf.gen_int64_field(name='pk', is_primary=True)
load_string_field = cf.gen_string_field(name="string_load") load_string_field = cf.gen_string_field(name="string_load")
vector_field = cf.gen_float_vec_field(name="vec_float32", dim=dim) vector_field = cf.gen_float_vec_field(name="vec_float32", dim=dim)
sparse_vector_field = cf.gen_float_vec_field(name="sparse", vector_data_type="SPARSE_FLOAT_VECTOR") sparse_vector_field = cf.gen_float_vec_field(name="sparse", vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
schema = cf.gen_collection_schema(fields=[pk_field, load_string_field, vector_field, sparse_vector_field], schema = cf.gen_collection_schema(fields=[pk_field, load_string_field, vector_field, sparse_vector_field],
auto_id=True) auto_id=True)
collection_w = self.init_collection_wrap(name=name, schema=schema) collection_w = self.init_collection_wrap(name=name, schema=schema)
string_values = [str(i) for i in range(nb)] string_values = [str(i) for i in range(nb)]
float_vec_values = cf.gen_vectors(nb, dim) float_vec_values = cf.gen_vectors(nb, dim)
sparse_vec_values = cf.gen_vectors(nb, dim, vector_data_type="SPARSE_FLOAT_VECTOR") sparse_vec_values = cf.gen_vectors(nb, dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
collection_w.insert([string_values, float_vec_values, sparse_vec_values]) collection_w.insert([string_values, float_vec_values, sparse_vec_values])
# build index on one of vector fields # build index on one of vector fields

View File

@ -3273,9 +3273,9 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
nq = 2 nq = 2
limit = 100 limit = 100
if invalid_search_data == "sparse_vector": if invalid_search_data == "sparse_vector":
search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type="SPARSE_FLOAT_VECTOR") search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
else: else:
search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type="FLOAT_VECTOR") search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.FLOAT_VECTOR)
log.info(f"search data: {search_data}") log.info(f"search data: {search_data}")
error = {ct.err_code: 65535, error = {ct.err_code: 65535,
ct.err_msg: "please provide varchar/text for BM25 Function based search"} ct.err_msg: "please provide varchar/text for BM25 Function based search"}
@ -3377,7 +3377,7 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
"paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "", "paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
"text": fake.text().lower() if random.random() >= empty_percent else "", "text": fake.text().lower() if random.random() >= empty_percent else "",
"dense_emb": [random.random() for _ in range(dim)], "dense_emb": [random.random() for _ in range(dim)],
"neural_sparse_emb": cf.gen_vectors(nb=1, dim=1000, vector_data_type="SPARSE_FLOAT_VECTOR")[0], "neural_sparse_emb": cf.gen_vectors(nb=1, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0],
} }
for i in range(data_size) for i in range(data_size)
] ]
@ -3428,7 +3428,7 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
limit=limit, limit=limit,
) )
sparse_search = AnnSearchRequest( sparse_search = AnnSearchRequest(
data=cf.gen_vectors(nb=nq, dim=dim, vector_data_type="SPARSE_FLOAT_VECTOR"), data=cf.gen_vectors(nb=nq, dim=dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR),
anns_field="neural_sparse_emb", anns_field="neural_sparse_emb",
param={}, param={},
limit=limit, limit=limit,

View File

@ -1124,7 +1124,7 @@ class TestIndexInvalid(TestcaseBase):
def scalar_index(self, request): def scalar_index(self, request):
yield request.param yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request): def vector_data_type(self, request):
yield request.param yield request.param
@ -2171,7 +2171,7 @@ class TestInvertedIndexValid(TestcaseBase):
def scalar_index(self, request): def scalar_index(self, request):
yield request.param yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.fixture(scope="function", params=ct.all_dense_vector_types)
def vector_data_type(self, request): def vector_data_type(self, request):
yield request.param yield request.param

View File

@ -1429,7 +1429,7 @@ class TestInsertInvalid(TestcaseBase):
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index ", ct.all_index_types[9:11]) @pytest.mark.parametrize("index ", ct.all_index_types[9:11])
@pytest.mark.parametrize("invalid_vector_type ", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.mark.parametrize("invalid_vector_type ", ct.all_dense_vector_types)
def test_invalid_sparse_vector_data(self, index, invalid_vector_type): def test_invalid_sparse_vector_data(self, index, invalid_vector_type):
""" """
target: insert illegal data type target: insert illegal data type

View File

@ -591,10 +591,10 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
expected: expected:
1. search output fields with Hybrid index 1. search output fields with Hybrid index
""" """
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1 search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR, 3, 1
self.collection_wrap.search( self.collection_wrap.search(
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit, cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field.name, search_params, limit,
output_fields=['*'], check_task=CheckTasks.check_search_results, output_fields=['*'], check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
"limit": limit, "output_fields": self.all_fields}) "limit": limit, "output_fields": self.all_fields})
@ -1247,8 +1247,8 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
@pytest.mark.parametrize("group_by_field", ['INT8', 'INT16', 'INT32', 'INT64', 'BOOL', 'VARCHAR']) @pytest.mark.parametrize("group_by_field", ['INT8', 'INT16', 'INT32', 'INT64', 'BOOL', 'VARCHAR'])
@pytest.mark.parametrize( @pytest.mark.parametrize(
"dim, search_params, vector_field", "dim, search_params, vector_field",
[(3, {"metric_type": MetricType.L2, "ef": 32}, DataType.FLOAT16_VECTOR.name), [(3, {"metric_type": MetricType.L2, "ef": 32}, DataType.FLOAT16_VECTOR),
(1000, {"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, DataType.SPARSE_FLOAT_VECTOR.name)]) (1000, {"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, DataType.SPARSE_FLOAT_VECTOR)])
def test_bitmap_index_search_group_by(self, limit, group_by_field, dim, search_params, vector_field): def test_bitmap_index_search_group_by(self, limit, group_by_field, dim, search_params, vector_field):
""" """
target: target:
@ -1259,7 +1259,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
expected: expected:
1. search group by with BITMAP index 1. search group by with BITMAP index
""" """
res, _ = self.collection_wrap.search(cf.gen_vectors(nb=1, dim=dim, vector_data_type=vector_field), vector_field, res, _ = self.collection_wrap.search(cf.gen_vectors(nb=1, dim=dim, vector_data_type=vector_field), vector_field.name,
search_params, limit, group_by_field=group_by_field, search_params, limit, group_by_field=group_by_field,
output_fields=[group_by_field]) output_fields=[group_by_field])
output_values = [i.fields for r in res for i in r] output_values = [i.fields for r in res for i in r]
@ -1285,9 +1285,9 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
1. search iterator with BITMAP index 1. search iterator with BITMAP index
""" """
ef = 32 if batch_size <= 32 else batch_size # ef must be larger than or equal to batch size ef = 32 if batch_size <= 32 else batch_size # ef must be larger than or equal to batch size
search_params, vector_field = {"metric_type": "L2", "ef": ef}, DataType.FLOAT16_VECTOR.name search_params, vector_field = {"metric_type": "L2", "ef": ef}, DataType.FLOAT16_VECTOR
self.collection_wrap.search_iterator( self.collection_wrap.search_iterator(
cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size, cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field.name, search_params, batch_size,
expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size}) expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@ -1301,10 +1301,10 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
expected: expected:
1. search output fields with BITMAP index 1. search output fields with BITMAP index
""" """
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1 search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR, 3, 1
self.collection_wrap.search( self.collection_wrap.search(
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit, cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field.name, search_params, limit,
output_fields=['*'], check_task=CheckTasks.check_search_results, output_fields=['*'], check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
"limit": limit, "output_fields": self.all_fields}) "limit": limit, "output_fields": self.all_fields})
@ -1667,11 +1667,11 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
expected: expected:
1. search output fields with BITMAP index 1. search output fields with BITMAP index
""" """
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1 search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR, 3, 1
self.collection_wrap.search( self.collection_wrap.search(
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field), cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
"limit": limit, "output_fields": self.all_fields}) "limit": limit, "output_fields": self.all_fields})
@ -1922,11 +1922,11 @@ class TestBitmapIndexMmap(TestCaseClassBase):
expected: expected:
1. search output fields with BITMAP index 1. search output fields with BITMAP index
""" """
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1 search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR, 3, 1
self.collection_wrap.search( self.collection_wrap.search(
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field), cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results, vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
"limit": limit, "output_fields": self.all_fields}) "limit": limit, "output_fields": self.all_fields})
@ -2345,7 +2345,7 @@ class TestGroupSearch(TestCaseClassBase):
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string") string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
data = [string_values] data = [string_values]
for i in range(len(self.vector_fields)): for i in range(len(self.vector_fields)):
data.append(cf.gen_vectors(dim=self.dims[i], nb=nb, vector_data_type=self.vector_fields[i])) data.append(cf.gen_vectors(dim=self.dims[i], nb=nb, vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])))
data.append(pd.Series(data=[np.int8(i) for i in range(nb)], dtype="int8")) data.append(pd.Series(data=[np.int8(i) for i in range(nb)], dtype="int8"))
data.append(pd.Series(data=[np.int64(i) for i in range(nb)], dtype="int64")) data.append(pd.Series(data=[np.int64(i) for i in range(nb)], dtype="int64"))
data.append(pd.Series(data=[np.bool_(i) for i in range(nb)], dtype="bool")) data.append(pd.Series(data=[np.bool_(i) for i in range(nb)], dtype="bool"))
@ -2384,7 +2384,7 @@ class TestGroupSearch(TestCaseClassBase):
limit = 50 limit = 50
group_size = 5 group_size = 5
for j in range(len(self.vector_fields)): for j in range(len(self.vector_fields)):
search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j]) search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j]))
search_params = {"params": cf.get_search_params_params(self.index_types[j])} search_params = {"params": cf.get_search_params_params(self.index_types[j])}
# when strict_group_size=true, it shall return results with entities = limit * group_size # when strict_group_size=true, it shall return results with entities = limit * group_size
res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j], res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j],
@ -2424,7 +2424,7 @@ class TestGroupSearch(TestCaseClassBase):
req_list = [] req_list = []
for j in range(len(self.vector_fields)): for j in range(len(self.vector_fields)):
search_params = { search_params = {
"data": cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j]), "data": cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j])),
"anns_field": self.vector_fields[j], "anns_field": self.vector_fields[j],
"param": {"params": cf.get_search_params_params(self.index_types[j])}, "param": {"params": cf.get_search_params_params(self.index_types[j])},
"limit": limit, "limit": limit,
@ -2473,7 +2473,7 @@ class TestGroupSearch(TestCaseClassBase):
req_list = [] req_list = []
for i in range(len(self.vector_fields)): for i in range(len(self.vector_fields)):
search_param = { search_param = {
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=self.vector_fields[i]), "data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])),
"anns_field": self.vector_fields[i], "anns_field": self.vector_fields[i],
"param": {}, "param": {},
"limit": ct.default_limit, "limit": ct.default_limit,
@ -2497,7 +2497,7 @@ class TestGroupSearch(TestCaseClassBase):
req_list = [] req_list = []
for i in range(1, len(self.vector_fields)): for i in range(1, len(self.vector_fields)):
search_param = { search_param = {
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=self.vector_fields[i]), "data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])),
"anns_field": self.vector_fields[i], "anns_field": self.vector_fields[i],
"param": {}, "param": {},
"limit": ct.default_limit, "limit": ct.default_limit,
@ -2519,7 +2519,7 @@ class TestGroupSearch(TestCaseClassBase):
nq = 2 nq = 2
limit = 15 limit = 15
for j in range(len(self.vector_fields)): for j in range(len(self.vector_fields)):
search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j]) search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j]))
search_params = {"params": cf.get_search_params_params(self.index_types[j])} search_params = {"params": cf.get_search_params_params(self.index_types[j])}
res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j], res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j],
param=search_params, limit=limit, param=search_params, limit=limit,
@ -2561,7 +2561,7 @@ class TestGroupSearch(TestCaseClassBase):
default_search_exp = f"{self.primary_field} >= 0" default_search_exp = f"{self.primary_field} >= 0"
grpby_field = self.inverted_string_field grpby_field = self.inverted_string_field
default_search_field = self.vector_fields[1] default_search_field = self.vector_fields[1]
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1]) search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1]))
all_pages_ids = [] all_pages_ids = []
all_pages_grpby_field_values = [] all_pages_grpby_field_values = []
for r in range(page_rounds): for r in range(page_rounds):
@ -2603,7 +2603,7 @@ class TestGroupSearch(TestCaseClassBase):
default_search_exp = f"{self.primary_field} >= 0" default_search_exp = f"{self.primary_field} >= 0"
grpby_field = self.inverted_string_field grpby_field = self.inverted_string_field
default_search_field = self.vector_fields[1] default_search_field = self.vector_fields[1]
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1]) search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1]))
all_pages_ids = [] all_pages_ids = []
all_pages_grpby_field_values = [] all_pages_grpby_field_values = []
res_count = limit * group_size res_count = limit * group_size
@ -2655,7 +2655,7 @@ class TestGroupSearch(TestCaseClassBase):
""" """
group_by_field = self.inverted_string_field group_by_field = self.inverted_string_field
default_search_field = self.vector_fields[1] default_search_field = self.vector_fields[1]
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1]) search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1]))
search_params = {} search_params = {}
limit = 10 limit = 10
max_group_size = 10 max_group_size = 10

View File

@ -2351,7 +2351,7 @@ class TestQueryOperation(TestcaseBase):
expected: return the latest entity; verify the result is same as dedup entities expected: return the latest entity; verify the result is same as dedup entities
""" """
collection_w = self.init_collection_general(prefix, dim=16, is_flush=False, insert_data=False, is_index=False, collection_w = self.init_collection_general(prefix, dim=16, is_flush=False, insert_data=False, is_index=False,
vector_data_type=ct.float_type, with_json=False)[0] vector_data_type=DataType.FLOAT_VECTOR, with_json=False)[0]
nb = 50 nb = 50
rounds = 10 rounds = 10
for i in range(rounds): for i in range(rounds):
@ -2465,7 +2465,7 @@ class TestQueryOperation(TestcaseBase):
assert res[0].keys() == set(fields) assert res[0].keys() == set(fields)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types)
def test_query_output_all_vector_type(self, vector_data_type): def test_query_output_all_vector_type(self, vector_data_type):
""" """
target: test query output different vector type target: test query output different vector type

View File

@ -127,7 +127,7 @@ class TestQueryIterator(TestcaseBase):
"batch_size": batch_size}) "batch_size": batch_size})
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) @pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types)
def test_query_iterator_output_different_vector_type(self, vector_data_type): def test_query_iterator_output_different_vector_type(self, vector_data_type):
""" """
target: test query iterator with output fields target: test query iterator with output fields