test: Refactor diskann and hsnw index, and update gen data functions (#43452)

related issue #40698
1. add diskann and hnsw index test
2. update gen_row_data and gen_column_data functions

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
yanliang567 2025-07-23 22:04:54 +08:00 committed by GitHub
parent e9ab73e93d
commit abb3aeacdf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1525 additions and 1313 deletions

View File

@ -360,6 +360,18 @@ class TestMilvusClientV2Base(Base):
collection_name=collection_name, **kwargs).run()
return res, check_result
@trace()
def refresh_load(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.refresh_load, collection_name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task,
check_items, check,
collection_name=collection_name, **kwargs).run()
return res, check_result
@trace()
def release_collection(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout

View File

@ -247,17 +247,12 @@ class ResponseChecker:
raise Exception("No expect values found in the check task")
if check_items.get("collection_name", None) is not None:
assert res["collection_name"] == check_items.get("collection_name")
if check_items.get("auto_id", False):
assert res["auto_id"] == check_items.get("auto_id")
if check_items.get("num_shards", 1):
assert res["num_shards"] == check_items.get("num_shards", 1)
if check_items.get("consistency_level", 2):
assert res["consistency_level"] == check_items.get("consistency_level", 2)
if check_items.get("enable_dynamic_field", True):
assert res["enable_dynamic_field"] == check_items.get("enable_dynamic_field", True)
if check_items.get("num_partitions", 1):
assert res["num_partitions"] == check_items.get("num_partitions", 1)
if check_items.get("id_name", "id"):
assert res["auto_id"] == check_items.get("auto_id", False)
assert res["num_shards"] == check_items.get("num_shards", 1)
assert res["consistency_level"] == check_items.get("consistency_level", 0)
assert res["enable_dynamic_field"] == check_items.get("enable_dynamic_field", True)
assert res["num_partitions"] == check_items.get("num_partitions", 1)
if check_items.get("id_name", None):
assert res["fields"][0]["name"] == check_items.get("id_name", "id")
if check_items.get("vector_name", "vector"):
vector_name_list = []
@ -474,9 +469,9 @@ class ResponseChecker:
elif check_items.get("metric", None) is not None:
# verify the distances are already sorted
if check_items.get("metric").upper() in ["IP", "COSINE", "BM25"]:
assert distances == sorted(distances, reverse=True)
assert pc.compare_lists_with_epsilon_ignore_dict_order(distances, sorted(distances, reverse=True))
else:
assert distances == sorted(distances, reverse=False)
assert pc.compare_lists_with_epsilon_ignore_dict_order(distances, sorted(distances, reverse=False))
if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None:
log.debug("skip distance check for knowhere does not return the precise distances")
else:
@ -484,9 +479,9 @@ class ResponseChecker:
else:
pass # just check nq and topk, not specific ids need check
nq_i += 1
log.info("search_results_check: limit (topK) and "
"ids searched for %d queries are correct" % len(search_res))
return True
@staticmethod
@ -600,7 +595,7 @@ class ResponseChecker:
if isinstance(query_res, list):
# assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, with_vec=with_vec)
# return True
assert pc.compare_lists_ignore_order(a=query_res, b=exp_res)
assert pc.compare_lists_with_epsilon_ignore_dict_order(a=query_res, b=exp_res)
return True
else:
log.error(f"Query result {query_res} is not list")

View File

@ -69,7 +69,7 @@ def deep_approx_compare(x, y, epsilon=epsilon):
return x == y
def compare_lists_ignore_order(a, b, epsilon=epsilon):
def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
"""
Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance.

View File

@ -1654,20 +1654,6 @@ def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, star
return df, binary_raw_values
#
# def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
# int_values = [i for i in range(start, start + nb)]
# float_values = [np.float32(i) for i in range(start, start + nb)]
# string_values = [str(i) for i in range(start, start + nb)]
# json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
# for i in range(start, start + nb)]
# float_vec_values = gen_vectors(nb, dim)
# if with_json is False:
# data = [int_values, float_values, string_values, float_vec_values]
# else:
# data = [int_values, float_values, string_values, json_values, float_vec_values]
# return data
def gen_default_list_sparse_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=False):
int_values = [i for i in range(start, start + nb)]
@ -1728,56 +1714,122 @@ def prepare_bulk_insert_data(schema=None,
return files
def get_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False, start=None):
def gen_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False, start=0):
return get_column_data_by_schema(nb=nb, schema=schema, skip_vectors=skip_vectors, start=start)
def get_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False, start=0, random_pk=False):
"""
Generates column data based on the given schema.
Args:
nb (int): Number of rows to generate. Defaults to ct.default_nb.
schema (Schema): Collection schema. If None, uses default schema.
skip_vectors (bool): Whether to skip vector fields. Defaults to False.
start (int): Starting value for primary key fields (default: 0)
random_pk (bool, optional): Whether to generate random primary key values (default: False)
Returns:
list: List of column data arrays matching the schema fields (excluding auto_id fields).
"""
if schema is None:
schema = gen_default_collection_schema()
fields = schema.fields
fields_not_auto_id = []
fields_to_gen = []
for field in fields:
if not field.auto_id:
fields_not_auto_id.append(field)
if not field.auto_id and not field.is_function_output:
fields_to_gen.append(field)
data = []
for field in fields_not_auto_id:
if field.dtype == DataType.FLOAT_VECTOR and skip_vectors is True:
for field in fields_to_gen:
if field.dtype in ct.all_vector_types and skip_vectors is True:
tmp = []
else:
tmp = gen_data_by_collection_field(field, nb=nb, start=start)
tmp = gen_data_by_collection_field(field, nb=nb, start=start, random_pk=random_pk)
data.append(tmp)
return data
def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=None):
def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=False):
"""
Generates row data based on the given schema.
Args:
nb (int): Number of rows to generate. Defaults to ct.default_nb.
schema (Schema): Collection schema or collection info. If None, uses default schema.
start (int): Starting value for primary key fields. Defaults to 0.
random_pk (bool, optional): Whether to generate random primary key values (default: False)
Returns:
list[dict]: List of dictionaries where each dictionary represents a row,
with field names as keys and generated data as values.
Notes:
- Skips auto_id fields and function output fields.
- For primary key fields, generates sequential values starting from 'start'.
- For non-primary fields, generates random data based on field type.
"""
if schema is None:
schema = gen_default_collection_schema()
# ignore auto id field and the fields in function output
func_output_fields = []
if hasattr(schema, "functions"):
functions = schema.functions
if isinstance(schema, dict):
# a dict of collection schema info is usually from client.describe_collection()
fields = schema.get('fields', [])
functions = schema.get('functions', [])
for func in functions:
output_field_names = func.output_field_names
output_field_names = func.get('output_field_names', [])
func_output_fields.extend(output_field_names)
func_output_fields = list(set(func_output_fields))
fields = schema.fields
fields_needs_data = []
for field in fields:
if field.auto_id:
continue
if field.name in func_output_fields:
continue
fields_needs_data.append(field)
data = []
for i in range(nb):
tmp = {}
for field in fields_needs_data:
tmp[field.name] = gen_data_by_collection_field(field)
if start is not None and field.dtype == DataType.INT64:
tmp[field.name] = start
start += 1
if field.nullable is True:
# 10% percent of data is null
if random.random() < 0.1:
tmp[field.name] = None
data.append(tmp)
func_output_fields = list(set(func_output_fields))
fields_needs_data = []
for field in fields:
if field.get('auto_id', False):
continue
if field.get('name', None) in func_output_fields:
continue
fields_needs_data.append(field)
data = []
for i in range(nb):
tmp = {}
for field in fields_needs_data:
tmp[field.get('name', None)] = gen_data_by_collection_field(field, random_pk=random_pk)
if field.get('is_primary', False) is True and field.get('type', None) == DataType.INT64:
tmp[field.get('name', None)] = start
start += 1
if field.get('is_primary', False) is True and field.get('type', None) == DataType.VARCHAR:
tmp[field.get('name', None)] = str(start)
start += 1
data.append(tmp)
else:
# a schema object is usually form orm schema object
fields = schema.fields
if hasattr(schema, "functions"):
functions = schema.functions
for func in functions:
output_field_names = func.output_field_names
func_output_fields.extend(output_field_names)
func_output_fields = list(set(func_output_fields))
fields_needs_data = []
for field in fields:
if field.auto_id:
continue
if field.name in func_output_fields:
continue
fields_needs_data.append(field)
data = []
for i in range(nb):
tmp = {}
for field in fields_needs_data:
tmp[field.name] = gen_data_by_collection_field(field, random_pk=random_pk)
if field.is_primary is True and field.dtype == DataType.INT64:
tmp[field.name] = start
start += 1
if field.is_primary is True and field.dtype == DataType.VARCHAR:
tmp[field.name] = str(start)
start += 1
data.append(tmp)
return data
@ -1957,6 +2009,7 @@ def get_dense_anns_field_name_list(schema=None):
anns_fields.append(item)
return anns_fields
def gen_varchar_data(length: int, nb: int, text_mode=False):
if text_mode:
return [fake.text() for _ in range(nb)]
@ -1964,164 +2017,222 @@ def gen_varchar_data(length: int, nb: int, text_mode=False):
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
def gen_data_by_collection_field(field, nb=None, start=None):
# if nb is None, return one data, else return a list of data
nullable = field.nullable
if nullable is True:
if random.random() < 0.1:
return None
data_type = field.dtype
enable_analyzer = field.params.get("enable_analyzer", False)
def gen_data_by_collection_field(field, nb=None, start=0, random_pk=False):
"""
Generates test data for a given collection field based on its data type and properties.
Args:
field (dict or Field): Field information, either as a dictionary (v2 client) or Field object (ORM client)
nb (int, optional): Bumber of data batch to generate. If None, returns a single value which usually used by row data generation
start (int, optional): Starting value for primary key fields (default: 0)
random_pk (bool, optional): Whether to generate random primary key values (default: False)
Returns:
Single value if nb is None, otherwise returns a list of generated values
Notes:
- Handles various data types including primitive types, vectors, arrays and JSON
- For nullable fields, generates None values approximately 20% of the time
- Special handling for primary key fields (sequential values)
- For varchar field, use min(20, max_length) to gen data
- For vector fields, generates random vectors of specified dimension
- For array fields, generates arrays filled with random values of element type
"""
if isinstance(field, dict):
# for v2 client, it accepts a dict of field info
nullable = field.get('nullable', False)
data_type = field.get('type', None)
enable_analyzer = field.get('params').get("enable_analyzer", False)
is_primary = field.get('is_primary', False)
else:
# for ORM client, it accepts a field object
nullable = field.nullable
data_type = field.dtype
enable_analyzer = field.params.get("enable_analyzer", False)
is_primary = field.is_primary
# generate data according to the data type
if data_type == DataType.BOOL:
if nb is None:
return random.choice([True, False])
return [random.choice([True, False]) for _ in range(nb)]
if data_type == DataType.INT8:
return random.choice([True, False]) if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [random.choice([True, False]) for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.choice([True, False]) for i in range(nb)]
elif data_type == DataType.INT8:
if nb is None:
return random.randint(-128, 127)
return [random.randint(-128, 127) for _ in range(nb)]
if data_type == DataType.INT16:
return random.randint(-128, 127) if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [random.randint(-128, 127) for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-128, 127) for i in range(nb)]
elif data_type == DataType.INT16:
if nb is None:
return random.randint(-32768, 32767)
return [random.randint(-32768, 32767) for _ in range(nb)]
if data_type == DataType.INT32:
return random.randint(-32768, 32767) if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [random.randint(-32768, 32767) for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-32768, 32767) for i in range(nb)]
elif data_type == DataType.INT32:
if nb is None:
return random.randint(-2147483648, 2147483647)
return [random.randint(-2147483648, 2147483647) for _ in range(nb)]
if data_type == DataType.INT64:
return random.randint(-2147483648, 2147483647) if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [random.randint(-2147483648, 2147483647) for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-2147483648, 2147483647) for i in range(nb)]
elif data_type == DataType.INT64:
if nb is None:
return random.randint(-9223372036854775808, 9223372036854775807)
if start is not None:
return [i for i in range(start, start+nb)]
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(nb)]
if data_type == DataType.FLOAT:
return random.randint(-9223372036854775808, 9223372036854775807) if random.random() < 0.8 or nullable is False else None
if nullable is False:
if is_primary is True and random_pk is False:
return [i for i in range(start, start+nb)]
else:
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-9223372036854775808, 9223372036854775807) for i in range(nb)]
elif data_type == DataType.FLOAT:
if nb is None:
return np.float32(random.random())
return [np.float32(random.random()) for _ in range(nb)]
if data_type == DataType.DOUBLE:
return np.float32(random.random()) if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [np.float32(random.random()) for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else np.float32(random.random()) for i in range(nb)]
elif data_type == DataType.DOUBLE:
if nb is None:
return np.float64(random.random())
return [np.float64(random.random()) for _ in range(nb)]
if data_type == DataType.VARCHAR:
max_length = field.params['max_length']
return np.float64(random.random()) if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [np.float64(random.random()) for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else np.float64(random.random()) for i in range(nb)]
elif data_type == DataType.VARCHAR:
if isinstance(field, dict):
max_length = field.get('params')['max_length']
else:
max_length = field.params['max_length']
max_length = min(20, max_length-1)
length = random.randint(0, max_length)
if nb is None:
return gen_varchar_data(length=length, nb=1, text_mode=enable_analyzer)[0]
return gen_varchar_data(length=length, nb=nb, text_mode=enable_analyzer)
if data_type == DataType.JSON:
return gen_varchar_data(length=length, nb=1, text_mode=enable_analyzer)[0] if random.random() < 0.8 or nullable is False else None
if nullable is False:
if is_primary is True and random_pk is False:
return [str(i) for i in range(start, start+nb)]
else:
return gen_varchar_data(length=length, nb=nb, text_mode=enable_analyzer)
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else gen_varchar_data(length=length, nb=1, text_mode=enable_analyzer)[0] for i in range(nb)]
elif data_type == DataType.JSON:
if nb is None:
return {"name": fake.name(), "address": fake.address(), "count": random.randint(0, 100)}
data = [{"name": str(i), "address": i, "count": random.randint(0, 100)} for i in range(nb)]
return data
if data_type == DataType.FLOAT_VECTOR:
dim = field.params['dim']
return {"name": fake.name(), "address": fake.address(), "count": random.randint(0, 100)} if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [{"name": str(i), "address": i, "count": random.randint(0, 100)} for i in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else {"name": str(i), "address": i, "count": random.randint(0, 100)} for i in range(nb)]
elif data_type in ct.all_vector_types:
if isinstance(field, dict):
dim = ct.default_dim if data_type == DataType.SPARSE_FLOAT_VECTOR else field.get('params')['dim']
else:
dim = ct.default_dim if data_type == DataType.SPARSE_FLOAT_VECTOR else field.params['dim']
if nb is None:
return [random.random() for i in range(dim)]
return [[random.random() for i in range(dim)] for _ in range(nb)]
if data_type == DataType.BFLOAT16_VECTOR:
dim = field.params['dim']
if nb is None:
return RNG.uniform(size=dim).astype(bfloat16)
return [RNG.uniform(size=dim).astype(bfloat16) for _ in range(int(nb))]
# if nb is None:
# raw_vector = [random.random() for _ in range(dim)]
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
# return bytes(bf16_vector)
# bf16_vectors = []
# for i in range(nb):
# raw_vector = [random.random() for _ in range(dim)]
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
# bf16_vectors.append(bytes(bf16_vector))
# return bf16_vectors
if data_type == DataType.FLOAT16_VECTOR:
dim = field.params['dim']
if nb is None:
return np.array([random.random() for _ in range(int(dim))], dtype=np.float16)
return [np.array([random.random() for _ in range(int(dim))], dtype=np.float16) for _ in range(int(nb))]
if data_type == DataType.INT8_VECTOR:
dim = field.params['dim']
if nb is None:
raw_vector = [random.randint(-128, 127) for _ in range(dim)]
int8_vector = np.array(raw_vector, dtype=np.int8)
return int8_vector
raw_vectors = [[random.randint(-128, 127) for _ in range(dim)] for _ in range(nb)]
int8_vectors = [np.array(raw_vector, dtype=np.int8) for raw_vector in raw_vectors]
return int8_vectors
if data_type == DataType.BINARY_VECTOR:
dim = field.params['dim']
if nb is None:
raw_vector = [random.randint(0, 1) for _ in range(dim)]
binary_byte = bytes(np.packbits(raw_vector, axis=-1).tolist())
return binary_byte
return [bytes(np.packbits([random.randint(0, 1) for _ in range(dim)], axis=-1).tolist()) for _ in range(nb)]
if data_type == DataType.SPARSE_FLOAT_VECTOR:
if nb is None:
return gen_sparse_vectors(nb=1)[0]
return gen_sparse_vectors(nb=nb)
if data_type == DataType.ARRAY:
max_capacity = field.params['max_capacity']
return gen_vectors(1, dim, vector_data_type=data_type)[0]
if nullable is False:
return gen_vectors(nb, dim, vector_data_type=data_type)
else:
raise MilvusException(message=f"gen data failed, vector field does not support nullable")
elif data_type == DataType.ARRAY:
if isinstance(field, dict):
max_capacity = field.get('params')['max_capacity']
else:
max_capacity = field.params['max_capacity']
element_type = field.element_type
if element_type == DataType.INT8:
if nb is None:
return [random.randint(-128, 127) for _ in range(max_capacity)]
return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
return [random.randint(-128, 127) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-128, 127) for i in range(nb)]
if element_type == DataType.INT16:
if nb is None:
return [random.randint(-32768, 32767) for _ in range(max_capacity)]
return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
return [random.randint(-32768, 32767) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-32768, 32767) for i in range(nb)]
if element_type == DataType.INT32:
if nb is None:
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
return [[random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] for _ in range(nb)]
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [[random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-2147483648, 2147483647) for i in range(nb)]
if element_type == DataType.INT64:
if nb is None:
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)]
return [[random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)] for _ in range(nb)]
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [[random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-9223372036854775808, 9223372036854775807) for i in range(nb)]
if element_type == DataType.BOOL:
if nb is None:
return [random.choice([True, False]) for _ in range(max_capacity)]
return [[random.choice([True, False]) for _ in range(max_capacity)] for _ in range(nb)]
return [random.choice([True, False]) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [[random.choice([True, False]) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else random.choice([True, False]) for i in range(nb)]
if element_type == DataType.FLOAT:
if nb is None:
return [np.float32(random.random()) for _ in range(max_capacity)]
return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
return [np.float32(random.random()) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else np.float32(random.random()) for i in range(nb)]
if element_type == DataType.DOUBLE:
if nb is None:
return [np.float64(random.random()) for _ in range(max_capacity)]
return [[np.float64(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
return [np.float64(random.random()) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [[np.float64(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else np.float64(random.random()) for i in range(nb)]
if element_type == DataType.VARCHAR:
max_length = field.params['max_length']
if isinstance(field, dict):
max_length = field.get('params')['max_length']
else:
max_length = field.params['max_length']
max_length = min(20, max_length - 1)
length = random.randint(0, max_length)
if nb is None:
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
if nullable is False:
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
else:
# gen 20% none data for nullable field
return [None if i % 2 == 0 and random.random() < 0.4 else "".join([chr(random.randint(97, 122)) for _ in range(length)]) for i in range(nb)]
else:
raise MilvusException(message=f"gen data failed, data type {data_type} not implemented")
return None
def gen_data_by_collection_schema(schema, nb, r=0):
"""
gen random data by collection schema, regardless of primary key or auto_id
vector type only support for DataType.FLOAT_VECTOR
"""
data = []
start_uid = r * nb
fields = schema.fields
for field in fields:
data.append(gen_data_by_collection_field(field, nb, start_uid))
return data
def gen_varchar_values(nb: int, length: int = 0):
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}):
def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}, random_pk=False):
"""
generate default value according to the collection fields,
which can replace the value of the specified field
@ -2132,11 +2243,11 @@ def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict =
if default_value is not None:
data.append(default_value)
elif field.auto_id is False:
data.append(gen_data_by_collection_field(field, nb, start_id))
data.append(gen_data_by_collection_field(field, nb, start_id, random_pk=random_pk))
return data
def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}) -> dict:
def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}, random_pk=False) -> dict:
"""
generate default value according to the collection fields,
which can replace the value of the specified field
@ -2150,7 +2261,7 @@ def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: d
if default_value is not None:
data[field.name] = default_value
elif field.auto_id is False:
data[field.name] = gen_data_by_collection_field(field, nb, start_id * nb)
data[field.name] = gen_data_by_collection_field(field, nb, start_id * nb, random_pk=random_pk)
return data
@ -3406,11 +3517,30 @@ def install_milvus_operator_specific_config(namespace, milvus_mode, release_name
def get_wildcard_output_field_names(collection_w, output_fields):
all_fields = [field.name for field in collection_w.schema.fields]
"""
Processes output fields with wildcard ('*') expansion for collection queries.
Args:
collection_w (Union[dict, CollectionWrapper]): Collection information,
either as a dict (v2 client) or ORM wrapper.
output_fields (List[str]): List of requested output fields, may contain '*' wildcard.
Returns:
List[str]: Expanded list of output fields with wildcard replaced by all available field names.
"""
if not isinstance(collection_w, dict):
# in orm, it accepts a collection wrapper
field_names = [field.name for field in collection_w.schema.fields]
else:
# in client v2, it accepts a dict of collection info
fields = collection_w.get('fields', None)
field_names = [field.get('name') for field in fields]
output_fields = output_fields.copy()
if "*" in output_fields:
output_fields.remove("*")
output_fields.extend(all_fields)
output_fields.extend(field_names)
return output_fields
@ -3748,3 +3878,34 @@ def gen_collection_name_by_testcase_name(module_index=1):
if calling from the testcase, module_index=1
"""
return inspect.stack()[module_index][3] + gen_unique_str("_")
def parse_fmod(x: int, y: int) -> int:
"""
Computes the floating-point remainder of x/y with the same sign as x.
This function mimics the behavior of the C fmod() function for integer inputs,
where the result has the same sign as the dividend (x).
Args:
x (int): The dividend
y (int): The divisor
Returns:
int: The remainder of x/y with the same sign as x
Raises:
ValueError: If y is 0 (division by zero)
Examples:
parse_fmod(5, 3) -> 2
parse_fmod(-5, 3) -> -2
parse_fmod(5, -3) -> 2
parse_fmod(-5, -3) -> -2
"""
if y == 0:
raise ValueError(f'[parse_fmod] Math domain error, `y` can not bt `0`')
v = abs(x) % abs(y)
return v if x >= 0 else -v

View File

@ -288,6 +288,7 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": dim,
"auto_id": auto_id,
"consistency_level": 0})
index = self.list_indexes(client, collection_name)[0]
assert index == ['vector']

View File

@ -1782,7 +1782,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
"consistency_level": 2})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
@ -1892,7 +1892,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
"consistency_level": 2})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
@ -1991,7 +1991,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
"consistency_level": 2})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
@ -2086,7 +2086,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
"consistency_level": 2})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
@ -2181,7 +2181,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
"consistency_level": 2})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
@ -2276,7 +2276,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
"consistency_level": 2})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
@ -2393,42 +2393,74 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
expected: search/query successfully
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
old_name = cf.gen_collection_name_by_testcase_name()
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
self.create_collection(client, old_name, default_dim, consistency_level="Strong")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
old_name = collection_name
new_name = collection_name + "new"
assert old_name in collections
c_info = self.describe_collection(client, old_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": old_name,
"dim": default_dim,
"consistency_level": 0})[0]
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=c_info)
self.insert(client, old_name, rows)
self.flush(client, old_name)
self.wait_for_index_ready(client, collection_name=old_name, index_name='vector')
vectors_to_search = cf.gen_vectors(ct.default_nq, default_dim)
insert_ids = [item.get('id') for item in rows]
old_search_res = self.search(client, old_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"ids": insert_ids,
"pk_name": "id",
"limit": default_limit})[0]
old_query_res = self.query(client, old_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True})[0]
new_name = old_name + "new"
self.rename_collection(client, old_name, new_name)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.describe_collection(client, new_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": new_name,
"dim": default_dim})
# search again after rename collection
new_search_res = self.search(client, new_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"ids": insert_ids,
"pk_name": "id",
"limit": default_limit})[0]
new_query_res = self.query(client, new_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True})[0]
assert old_search_res[0].ids == new_search_res[0].ids
assert old_query_res == new_query_res
rows = cf.gen_row_data_by_schema(nb=200, schema=c_info, start=default_nb)
error = {ct.err_code: 0, ct.err_msg: f"collection not found"}
self.insert(client, old_name, rows,
check_task=CheckTasks.err_res,
check_items=error)
self.insert(client, new_name, rows)
self.flush(client, new_name)
# assert self.num_entities(client, collection_name)[0] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
new_ids = [item.get('id') for item in rows]
insert_ids.extend(new_ids)
self.search(client, new_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"nq": ct.default_nq,
"ids": insert_ids,
"pk_name": default_primary_key_field_name,
"pk_name": "id",
"limit": default_limit})
# 4. query
self.query(client, new_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"pk_name": default_primary_key_field_name})
self.release_collection(client, new_name)
self.drop_collection(client, new_name)

View File

@ -618,6 +618,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"consistency_level": 2,
"dim": default_dim})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
@ -688,7 +689,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i,
default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),

View File

@ -87,169 +87,10 @@ class TestSearchDiskann(TestcaseBase):
******************************************************************
"""
@pytest.fixture(scope="function", params=[32, 128])
def dim(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def _async(self, request):
yield request.param
@pytest.fixture(scope="function", params=[True, False])
def enable_dynamic_field(self, request):
yield request.param
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_diskann_index(self, _async):
"""
target: test delete after creating index
method: 1.create collection , insert data, primary_field is int field
2.create diskann index , then load
3.search
expected: search successfully
"""
# 1. initialize with data
dim = 100
auto_id = False
enable_dynamic_field = True
nb = 2000
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id,
nb=nb, dim=dim, is_index=False,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. create index
default_index = {"index_type": "DISKANN",
"metric_type": "L2", "params": {}}
collection_w.create_index(
ct.default_float_vec_field_name, default_index)
collection_w.load()
default_search_params = {
"metric_type": "L2", "params": {"search_list": 30}}
vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
output_fields = [default_int64_field_name,
default_float_field_name, default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=output_fields,
_async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"pk_name": ct.default_int64_field_name,
"_async": _async}
)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("search_list", [20, 200])
def test_search_with_limit_20(self, _async, search_list):
"""
target: test delete after creating index
method: 1.create collection , insert data, primary_field is int field
2.create diskann index , then load
3.search
expected: search successfully
"""
limit = 20
# 1. initialize with data
enable_dynamic_field = True
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, is_index=False,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. create index
default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}}
collection_w.create_index(ct.default_float_vec_field_name, default_index)
collection_w.load()
search_params = {"metric_type": "L2", "params": {"search_list": search_list}}
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
search_params, limit, default_search_exp,
output_fields=output_fields, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async,
"pk_name": ct.default_int64_field_name})
@pytest.mark.tags(CaseLabel.L2)
def test_search_invalid_params_with_diskann_B(self):
"""
target: test delete after creating index
method: 1.create collection , insert data, primary_field is int field
2.create diskann index
3.search with invalid params, [k, 200] when k <= 20
expected: search report an error
"""
# 1. initialize with data
dim = 100
limit = 20
auto_id = True
collection_w, _, _, insert_ids = \
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0:4]
# 2. create index
default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}}
collection_w.create_index(ct.default_float_vec_field_name, default_index)
collection_w.load()
default_search_params = {"metric_type": "L2", "params": {"search_list": limit-1}}
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, limit,
default_search_exp,
output_fields=output_fields,
check_task=CheckTasks.err_res,
check_items={"err_code": 999,
"err_msg": f"should be larger than k({limit})"})
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_diskann_with_string_pk(self):
"""
target: test delete after creating index
method: 1.create collection , insert data, primary_field is string field
2.create diskann index
3.search with invalid metric type
expected: search successfully
"""
# 1. initialize with data
dim = 128
enable_dynamic_field = True
collection_w, _, _, insert_ids = \
self.init_collection_general(prefix, True, auto_id=False, dim=dim, is_index=False,
primary_field=ct.default_string_field_name,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. create index
default_index = {"index_type": "DISKANN",
"metric_type": "L2", "params": {}}
collection_w.create_index(
ct.default_float_vec_field_name, default_index)
collection_w.load()
search_list = 20
default_search_params = {"metric_type": "L2",
"params": {"search_list": search_list}}
vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
output_fields = [default_int64_field_name,
default_float_field_name, default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"pk_name": ct.default_int64_field_name}
)
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_delete_data(self, _async):
"""
@ -300,57 +141,6 @@ class TestSearchDiskann(TestcaseBase):
"pk_name": ct.default_int64_field_name}
)
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_diskann_and_more_index(self, _async):
"""
target: test delete after creating index
method: 1.create collection , insert data
2.create more index ,then load
3.delete half data, search
expected: assert index and deleted id not in search result
"""
# 1. initialize with data
dim = 64
auto_id = False
enable_dynamic_field = True
collection_w, _, _, ids = \
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False,
enable_dynamic_field=enable_dynamic_field, language="French")[0:4]
# 2. create index
default_index = {"index_type": "DISKANN",
"metric_type": "COSINE", "params": {}}
collection_w.create_index(ct.default_float_vec_field_name, default_index, index_name=index_name1)
if not enable_dynamic_field:
index_params_one = {}
collection_w.create_index("float", index_params_one, index_name="a")
index_param_two = {}
collection_w.create_index("varchar", index_param_two, index_name="b")
collection_w.load()
tmp_expr = f'{ct.default_int64_field_name} in {[0]}'
expr = f'{ct.default_int64_field_name} in {ids[:half_nb]}'
# delete half of data
del_res = collection_w.delete(expr)[0]
assert del_res.delete_count == half_nb
collection_w.delete(tmp_expr)
default_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}}
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=output_fields,
_async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": ids,
"limit": default_limit,
"_async": _async,
"pk_name": ct.default_int64_field_name})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_scalar_field(self, _async):
"""
@ -396,87 +186,3 @@ class TestSearchDiskann(TestcaseBase):
"limit": limit,
"_async": _async,
"pk_name": ct.default_int64_field_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("limit", [10, 100, 1000])
def test_search_diskann_search_list_equal_to_limit(self, limit, _async):
"""
target: test search diskann index when search_list equal to limit
method: 1.create collection , insert data, primary_field is int field
2.create diskann index , then load
3.search
expected: search successfully
"""
# 1. initialize with data
dim = 77
auto_id = False
enable_dynamic_field = False
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id,
dim=dim, is_index=False,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. create index
default_index = {"index_type": "DISKANN",
"metric_type": "L2", "params": {}}
collection_w.create_index(
ct.default_float_vec_field_name, default_index)
collection_w.load()
search_params = {"metric_type": "L2", "params": {"search_list": limit}}
vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
output_fields = [default_int64_field_name,
default_float_field_name, default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
search_params, limit,
default_search_exp,
output_fields=output_fields,
_async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async,
"pk_name": ct.default_int64_field_name}
)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="issue #23672")
def test_search_diskann_search_list_up_to_min(self, _async):
"""
target: test search diskann index when search_list up to min
method: 1.create collection , insert data, primary_field is int field
2.create diskann index , then load
3.search
expected: search successfully
"""
# 1. initialize with data
dim = 100
auto_id = True
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id,
dim=dim, is_index=False)[0:4]
# 2. create index
default_index = {"index_type": "DISKANN",
"metric_type": "L2", "params": {}}
collection_w.create_index(
ct.default_float_vec_field_name, default_index)
collection_w.load()
search_params = {"metric_type": "L2",
"params": {"k": 200, "search_list": 201}}
search_vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
output_fields = [default_int64_field_name,
default_float_field_name, default_string_field_name]
collection_w.search(search_vectors[:default_nq], default_search_field,
search_params, default_limit,
default_search_exp,
output_fields=output_fields,
_async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async,
"pk_name": ct.default_int64_field_name})

View File

@ -141,43 +141,6 @@ class TestCollectionSearch(TestcaseBase):
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("M", [4, 64])
@pytest.mark.parametrize("efConstruction", [8, 512])
@pytest.mark.parametrize("limit", [1, 10, 3000])
def test_search_HNSW_index_with_min_ef(self, M, efConstruction, limit, _async):
"""
target: test search HNSW index with min ef
method: connect milvus, create collection , insert, create index, load and search
expected: search successfully
"""
dim = M * 4
ef = limit
auto_id = True
enable_dynamic_field = True
self._connect()
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, 5000, partition_num=1,
auto_id=auto_id, dim=dim, is_index=False,
enable_dynamic_field=enable_dynamic_field)[0:5]
HNSW_index_params = {"M": M, "efConstruction": efConstruction}
HNSW_index = {"index_type": "HNSW",
"params": HNSW_index_params, "metric_type": "L2"}
collection_w.create_index("float_vector", HNSW_index)
collection_w.load()
search_param = {"metric_type": "L2", "params": {"ef": ef}}
vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
search_param, limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_expression(self, null_data_percent):
"""
@ -871,497 +834,6 @@ class TestCollectionSearch(TestcaseBase):
"limit": 1})
assert search_res[0].ids == [_id]
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_output_fields_empty(self, nq, _async):
"""
target: test search with output fields
method: search with empty output_field
expected: search success
"""
# 1. initialize with data
nb = 1500
dim = 32
auto_id = True
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb,
auto_id=auto_id,
dim=dim)[0:4]
# 2. search
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
output_fields=[],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async,
"output_fields": []})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_output_field(self, _async):
"""
target: test search with output fields
method: search with one output_field
expected: search success
"""
# 1. initialize with data
auto_id = False
enable_dynamic_field = False
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
auto_id=auto_id,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. search
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
output_fields=[default_int64_field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async,
"output_fields": [default_int64_field_name]})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_output_vector_field(self, _async):
"""
target: test search with output fields
method: search with one output_field
expected: search success
"""
# 1. initialize with data
auto_id = True
enable_dynamic_field = False
collection_w, _, _, insert_ids = \
self.init_collection_general(prefix, True, auto_id=auto_id, enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. search
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
output_fields=[field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq, "ids": insert_ids,
"limit": default_limit, "_async": _async,
"output_fields": [field_name]})[0]
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_output_fields(self, _async):
"""
target: test search with output fields
method: search with multiple output_field
expected: search success
"""
# 1. initialize with data
nb = 2000
dim = 64
auto_id = False
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb,
is_all_data_type=True,
auto_id=auto_id,
dim=dim)[0:4]
# 2. search
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
output_fields = [default_int64_field_name, default_float_field_name]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async,
"output_fields": output_fields})
@pytest.mark.tags(CaseLabel.L2)
def test_search_output_array_field(self, enable_dynamic_field):
"""
target: test search output array field
method: create connection, collection, insert and search
expected: search successfully
"""
# 1. create a collection
auto_id = True
schema = cf.gen_array_collection_schema(auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
# 2. insert data
if enable_dynamic_field:
data = cf.gen_row_data_by_schema(schema=schema)
else:
data = cf.gen_array_dataframe_data(auto_id=auto_id)
collection_w.insert(data)
# 3. create index and load
collection_w.create_index(default_search_field)
collection_w.load()
# 4. search output array field, check
output_fields = [ct.default_int64_field_name, ct.default_int32_array_field_name,
ct.default_float_array_field_name]
collection_w.search(vectors[:default_nq], default_search_field, {}, default_limit,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit,
"output_fields": output_fields})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("index", ct.all_index_types[:8])
@pytest.mark.parametrize("metrics", ct.dense_metrics)
@pytest.mark.parametrize("limit", [200])
def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit):
"""
target: test search with output vector field after different index
method: 1. create a collection and insert data
2. create index and load
3. search with output field vector
4. check the result vectors should be equal to the inserted
expected: search success
"""
collection_w, _vectors = self.init_collection_general(prefix, True, is_index=False)[:2]
# 2. create index and load
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": metrics}
collection_w.create_index(field_name, default_index)
collection_w.load()
# 3. search with output field vector
search_params = cf.gen_search_param(index, metrics)
for search_param in search_params:
if index == "HNSW":
limit = search_param["params"]["ef"]
if limit > max_limit:
limit = default_nb
if index == "DISKANN":
limit = search_param["params"]["search_list"]
collection_w.search(vectors[:1], default_search_field,
search_param, limit, default_search_exp,
output_fields=[field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": 1,
"limit": limit,
"original_entities": _vectors[0],
"output_fields": [field_name]})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("metrics", ct.binary_metrics[:2])
@pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"])
def test_search_output_field_vector_after_binary_index(self, metrics, index):
"""
target: test search with output vector field after binary index
method: 1. create a collection and insert data
2. create index and load
3. search with output field vector
4. check the result vectors should be equal to the inserted
expected: search success
"""
# 1. create a collection and insert data
collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0]
data = cf.gen_default_binary_dataframe_data()[0]
collection_w.insert(data)
# 2. create index and load
params = {"M": 48, "efConstruction": 500} if index == "HNSW" else {"nlist": 128}
default_index = {"index_type": index, "metric_type": metrics, "params": params}
collection_w.create_index(binary_field_name, default_index)
collection_w.load()
# 3. search with output field vector
search_params = cf.gen_search_param(index, metrics)
binary_vectors = cf.gen_binary_vectors(1, default_dim)[1]
for search_param in search_params:
res = collection_w.search(binary_vectors, binary_field_name,
search_param, 2, default_search_exp,
output_fields=[binary_field_name])[0]
# 4. check the result vectors should be equal to the inserted
assert res[0][0].entity.binary_vector == data[binary_field_name][res[0][0].id]
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metrics", ct.structure_metrics)
@pytest.mark.parametrize("index", ["BIN_FLAT"])
def test_search_output_field_vector_after_structure_metrics(self, metrics, index):
"""
target: test search with output vector field after binary index
method: 1. create a collection and insert data
2. create index and load
3. search with output field vector
4. check the result vectors should be equal to the inserted
expected: search success
"""
dim = 8
# 1. create a collection and insert data
collection_w = self.init_collection_general(prefix, dim=dim, is_binary=True, is_index=False)[0]
data = cf.gen_default_binary_dataframe_data(dim=dim)[0]
collection_w.insert(data)
# 2. create index and load
default_index = {"index_type": index, "metric_type": metrics, "params": {"nlist": 128}}
collection_w.create_index(binary_field_name, default_index)
collection_w.load()
# 3. search with output field vector
search_params = {"metric_type": metrics, "params": {"nprobe": 10}}
binary_vectors = cf.gen_binary_vectors(ct.default_nq, dim)[1]
res = collection_w.search(binary_vectors, binary_field_name,
search_params, 2, default_search_exp,
output_fields=[binary_field_name])[0]
# 4. check the result vectors should be equal to the inserted
assert res[0][0].entity.binary_vector == data[binary_field_name][res[0][0].id]
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("dim", [32, 77, 768])
def test_search_output_field_vector_with_different_dim(self, dim):
"""
target: test search with output vector field after binary index
method: 1. create a collection and insert data
2. create index and load
3. search with output field vector
4. check the result vectors should be equal to the inserted
expected: search success
"""
# 1. create a collection and insert data
collection_w, _vectors = self.init_collection_general(prefix, True, dim=dim)[:2]
# 2. search with output field vector
vectors = cf.gen_vectors(default_nq, dim=dim)
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit, default_search_exp,
output_fields=[field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit,
"original_entities": _vectors[0],
"output_fields": [field_name]})
@pytest.mark.tags(CaseLabel.L2)
def test_search_output_vector_field_and_scalar_field(self, enable_dynamic_field):
"""
target: test search with output vector field and scalar field
method: 1. initialize a collection
2. search with output field vector
3. check no field missing
expected: search success
"""
# 1. initialize a collection
collection_w, _vectors = self.init_collection_general(prefix, True,
enable_dynamic_field=enable_dynamic_field)[:2]
# search with output field vector
output_fields = [default_float_field_name, default_string_field_name,
default_json_field_name, default_search_field]
original_entities = []
if enable_dynamic_field:
entities = []
for vector in _vectors[0]:
entities.append({default_int64_field_name: vector[default_int64_field_name],
default_float_field_name: vector[default_float_field_name],
default_string_field_name: vector[default_string_field_name],
default_json_field_name: vector[default_json_field_name],
default_search_field: vector[default_search_field]})
original_entities.append(pd.DataFrame(entities))
else:
original_entities = _vectors
collection_w.search(vectors[:1], default_search_field,
default_search_params, default_limit, default_search_exp,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": 1,
"limit": default_limit,
"pk_name": default_int64_field_name,
"original_entities": original_entities[0],
"output_fields": output_fields})
if enable_dynamic_field:
collection_w.search(vectors[:1], default_search_field,
default_search_params, default_limit, default_search_exp,
output_fields=["$meta", default_search_field],
check_task=CheckTasks.check_search_results,
check_items={"nq": 1,
"limit": default_limit,
"pk_name": default_int64_field_name,
"original_entities": original_entities[0],
"output_fields": output_fields})
@pytest.mark.tags(CaseLabel.L2)
def test_search_output_vector_field_and_pk_field(self, enable_dynamic_field):
"""
target: test search with output vector field and pk field
method: 1. initialize a collection
2. search with output field vector
3. check no field missing
expected: search success
"""
# 1. initialize a collection
collection_w = self.init_collection_general(prefix, True,
enable_dynamic_field=enable_dynamic_field)[0]
# 2. search with output field vector
output_fields = [default_int64_field_name, default_string_field_name, default_search_field]
collection_w.search(vectors[:1], default_search_field,
default_search_params, default_limit, default_search_exp,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": 1,
"limit": default_limit,
"output_fields": output_fields})
@pytest.mark.tags(CaseLabel.L2)
def test_search_output_field_vector_with_partition(self):
"""
target: test search with output vector field
method: 1. create a collection and insert data
2. create index and load
3. search with output field vector
4. check the result vectors should be equal to the inserted
expected: search success
"""
# 1. create a collection and insert data
collection_w = self.init_collection_general(prefix, is_index=False)[0]
partition_w = self.init_partition_wrap(collection_w)
data = cf.gen_default_dataframe_data()
partition_w.insert(data)
# 2. create index and load
collection_w.create_index(field_name, default_index_params)
collection_w.load()
# 3. search with output field vector
partition_w.search(vectors[:1], default_search_field,
default_search_params, default_limit, default_search_exp,
output_fields=[field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": 1,
"limit": default_limit,
"original_entities": data,
"output_fields": [field_name]})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("wildcard_output_fields", [["*"], ["*", default_int64_field_name],
["*", default_search_field]])
def test_search_with_output_field_wildcard(self, wildcard_output_fields, _async):
"""
target: test search with output fields using wildcard
method: search with one output_field (wildcard)
expected: search success
"""
# 1. initialize with data
auto_id = True
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
auto_id=auto_id)[0:4]
# 2. search
output_fields = cf.get_wildcard_output_field_names(collection_w, wildcard_output_fields)
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
output_fields=wildcard_output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"pk_name": ct.default_int64_field_name,
"limit": default_limit,
"_async": _async,
"output_fields": output_fields})
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_invalid_output_fields(self):
"""
target: test search with output fields using wildcard
method: search with one output_field (wildcard)
expected: search success
"""
# 1. initialize with data
invalid_output_fields = [["%"], [""], ["-"]]
auto_id = False
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id)[0:4]
# 2. search
for field in invalid_output_fields:
error1 = {ct.err_code: 999, ct.err_msg: "field %s not exist" % field[0]}
error2 = {ct.err_code: 999, ct.err_msg: "`output_fields` value %s is illegal" % field}
error = error2 if field == [""] else error1
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=field,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_search_multi_collections(self, nq, _async):
"""
target: test search multi collections of L2
method: add vectors into 10 collections, and search
expected: search status ok, the length of result
"""
nb = 1000
dim = 64
auto_id = True
self._connect()
collection_num = 10
for i in range(collection_num):
# 1. initialize with data
log.info("test_search_multi_collections: search round %d" % (i + 1))
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb,
auto_id=auto_id,
dim=dim)[0:4]
# 2. search
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
log.info("test_search_multi_collections: searching %s entities (nq = %s) from collection %s" %
(default_limit, nq, collection_w.name))
collection_w.search(vectors[:nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"pk_name": ct.default_int64_field_name,
"limit": default_limit,
"_async": _async})
@pytest.mark.tags(CaseLabel.L2)
def test_search_concurrent_multi_threads(self, nq, _async, null_data_percent):
"""
target: test concurrent search with multi-processes
method: search with 10 processes, each process uses dependent connection
expected: status ok and the returned vectors should be query_records
"""
# 1. initialize with data
nb = 3000
dim = 64
auto_id = False
enable_dynamic_field = False
threads_num = 10
threads = []
collection_w, _, _, insert_ids = \
self.init_collection_general(prefix, True, nb, auto_id=auto_id, dim=dim,
enable_dynamic_field=enable_dynamic_field,
nullable_fields={ct.default_string_field_name: null_data_percent})[0:4]
def search(collection_w):
vectors = [[random.random() for _ in range(dim)]
for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async})
# 2. search with multi-processes
log.info("test_search_concurrent_multi_threads: searching with %s processes" % threads_num)
for i in range(threads_num):
t = threading.Thread(target=search, args=(collection_w,))
threads.append(t)
t.start()
time.sleep(0.2)
for t in threads:
t.join()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="issue 37113")
def test_search_concurrent_two_collections_nullable(self, nq, _async):
@ -1565,192 +1037,6 @@ class TestCollectionSearch(TestcaseBase):
"limit": default_limit,
})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_consistency_bounded(self, nq, _async):
"""
target: test search with different consistency level
method: 1. create a collection
2. insert data
3. search with consistency_level is "bounded"
expected: searched successfully
"""
limit = 1000
nb_old = 500
dim = 64
auto_id = True
enable_dynamic_field = False
collection_w, _, _, insert_ids = \
self.init_collection_general(prefix, True, nb_old, auto_id=auto_id,
dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. search for original data after load
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old,
"_async": _async,
})
kwargs = {}
consistency_level = kwargs.get(
"consistency_level", CONSISTENCY_BOUNDED)
kwargs.update({"consistency_level": consistency_level})
nb_new = 400
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
auto_id=auto_id, dim=dim,
insert_offset=nb_old,
enable_dynamic_field=enable_dynamic_field)
insert_ids.extend(insert_ids_new)
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
**kwargs,
)
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_consistency_strong(self, nq, _async):
"""
target: test search with different consistency level
method: 1. create a collection
2. insert data
3. search with consistency_level is "Strong"
expected: searched successfully
"""
limit = 1000
nb_old = 500
dim = 64
auto_id = False
enable_dynamic_field = False
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
auto_id=auto_id, dim=dim,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. search for original data after load
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old,
"_async": _async})
nb_new = 400
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
auto_id=auto_id, dim=dim,
insert_offset=nb_old,
enable_dynamic_field=enable_dynamic_field)
insert_ids.extend(insert_ids_new)
kwargs = {}
consistency_level = kwargs.get("consistency_level", CONSISTENCY_STRONG)
kwargs.update({"consistency_level": consistency_level})
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
**kwargs,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old + nb_new,
"_async": _async})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_consistency_eventually(self, nq, _async):
"""
target: test search with different consistency level
method: 1. create a collection
2. insert data
3. search with consistency_level is "eventually"
expected: searched successfully
"""
limit = 1000
nb_old = 500
dim = 64
auto_id = True
enable_dynamic_field = True
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
auto_id=auto_id, dim=dim,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. search for original data after load
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old,
"_async": _async})
nb_new = 400
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
auto_id=auto_id, dim=dim,
insert_offset=nb_old,
enable_dynamic_field=enable_dynamic_field)
insert_ids.extend(insert_ids_new)
kwargs = {}
consistency_level = kwargs.get(
"consistency_level", CONSISTENCY_EVENTUALLY)
kwargs.update({"consistency_level": consistency_level})
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
**kwargs)
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_consistency_session(self, nq, _async):
"""
target: test search with different consistency level
method: 1. create a collection
2. insert data
3. search with consistency_level is "session"
expected: searched successfully
"""
limit = 1000
nb_old = 500
dim = 64
auto_id = False
enable_dynamic_field = True
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
auto_id=auto_id, dim=dim,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. search for original data after load
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old,
"_async": _async})
kwargs = {}
consistency_level = kwargs.get(
"consistency_level", CONSISTENCY_SESSION)
kwargs.update({"consistency_level": consistency_level})
nb_new = 400
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
auto_id=auto_id, dim=dim,
insert_offset=nb_old,
enable_dynamic_field=enable_dynamic_field)
insert_ids.extend(insert_ids_new)
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
**kwargs,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old + nb_new,
"_async": _async})
@pytest.mark.tags(CaseLabel.L1)
def test_search_ignore_growing(self, nq, _async):
"""
@ -2161,3 +1447,4 @@ class TestCollectionSearch(TestcaseBase):
"invalid parameter"})

View File

@ -1,4 +1,6 @@
import logging
import time
import numpy as np
from common.constants import *
from utils.util_pymilvus import *
@ -47,13 +49,13 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
self.collection_name = "TestMilvusClientSearchV2" + cf.gen_unique_str("_")
self.partition_names = ["partition_1", "partition_2"]
self.pk_field_name = ct.default_primary_field_name
self.float_vector_field_name = "float_vector"
self.float_vector_field_name = ct.default_float_vec_field_name
self.bfloat16_vector_field_name = "bfloat16_vector"
self.sparse_vector_field_name = "sparse_vector"
self.binary_vector_field_name = "binary_vector"
self.float_vector_dim = 128
self.bf16_vector_dim = 200
self.binary_vector_dim = 256
self.float_vector_dim = 36
self.bf16_vector_dim = 35
self.binary_vector_dim = 32
self.float_vector_metric = "COSINE"
self.bf16_vector_metric = "L2"
self.sparse_vector_metric = "IP"
@ -346,7 +348,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("limit, nq", zip([1, 1000, ct.max_limit], [ct.max_nq, 10, 1]))
# @pytest.mark.parametrize("limit, nq", zip([1, 1000, ct.max_limit], [ct.max_nq, 10, 1]))
@pytest.mark.parametrize("limit, nq", zip([ct.max_limit], [1]))
def test_search_with_different_nq_limits(self, limit, nq):
"""
target: test search with different nq and limit values
@ -360,7 +363,7 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
# Generate vectors to search
vectors_to_search = cf.gen_vectors(nq, self.float_vector_dim)
search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}}
search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 128}}
# search with limit
search_res, _ = self.search(
@ -454,6 +457,73 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
}
)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("wildcard_output_fields", [["*"], ["*", ct.default_primary_field_name],
["*", ct.default_float_vec_field_name]])
def test_search_partition_with_output_fields(self, wildcard_output_fields):
"""
target: test partition search with output fields
method: 1. connect to milvus
2. partition search on an existing collection with output fields
expected: search successfully with output fields
"""
client = self._client()
collection_name = self.collection_name
collection_info = self.describe_collection(client, collection_name)[0]
fields = collection_info.get('fields', None)
field_names = [field.get('name') for field in fields]
partition_name = self.partition_names[0]
# Generate vectors to search
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}}
# search with output fields
output_fields = cf.get_wildcard_output_field_names(collection_info, wildcard_output_fields)
search_res, _ = self.search(
client,
collection_name,
vectors_to_search[:default_nq],
partition_names=[partition_name],
anns_field=self.float_vector_field_name,
search_params=search_params,
limit=default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": default_nq,
"limit": default_limit,
"output_fields": field_names.extend([self.dyna_filed_name1, self.dyna_filed_name2])})
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_invalid_output_fields(self):
"""
target: test search with output fields using wildcard
method: search with one output_field (wildcard)
expected: search success
"""
client = self._client()
collection_name = self.collection_name
collection_info = self.describe_collection(client, collection_name)[0]
fields = collection_info.get('fields', None)
field_names = [field.get('name') for field in fields]
partition_name = self.partition_names[0]
# Generate vectors to search
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
search_params = {}
invalid_output_fields = [["%"], [""], ["-"], ["non_exist_field"]]
for field in invalid_output_fields:
error1 = {ct.err_code: 999, ct.err_msg: "field %s not exist" % field[0]}
error2 = {ct.err_code: 999, ct.err_msg: "`output_fields` value %s is illegal" % field}
error = error2 if field == [""] else error1
self.search(client, collection_name, vectors_to_search[:default_nq],
anns_field=self.float_vector_field_name,
search_params=search_params,
limit=default_limit,
output_fields=field,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_more_than_max_limit(self):
"""
@ -727,19 +797,12 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
schema = self.create_schema(client)[0]
schema.add_field(ct.default_primary_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(ct.default_float_vec_field_name, DataType.FLOAT_VECTOR, dim=ct.default_dim)
schema.add_field(ct.default_float_field_name, DataType.FLOAT)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=256)
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=256, nullable=True)
self.create_collection(client, collection_name, schema=schema)
# insert data
data = []
for i in range(default_nb):
data.append({
ct.default_primary_field_name: i,
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
ct.default_float_field_name: i * 1.0,
ct.default_string_field_name: str(i)
})
data = cf.gen_row_data_by_schema(schema=schema, nb=default_nb)
self.insert(client, collection_name, data)
# create index with metric cosine
@ -1006,7 +1069,7 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
# search in the collection
vectors_to_search = cf.gen_vectors(1, ct.default_dim)
limit = 1000
limit = 100
search_params = {}
search_res1, _ = self.search(
client,
@ -1101,6 +1164,7 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
# release the partition again and load the collection
self.release_partitions(client, collection_name, [to_be_released_partition])
self.load_collection(client, collection_name)
self.refresh_load(client, collection_name) # workaround for #43386, remove this line after it was fixed
# search again
search_res5, _ = self.search(
@ -1271,7 +1335,8 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
search_params=search_params, limit=ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit})
# disable mmap
self.release_collection(client, collection_name)
@ -1283,7 +1348,8 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
search_params=search_params, limit=ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
@ -1345,7 +1411,8 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
search_params=search_params, limit=ct.default_limit,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit})
# disable mmap
self.release_collection(client, collection_name)
@ -1357,7 +1424,8 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
search_params=search_params, limit=ct.default_limit,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
@ -1366,17 +1434,21 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
"""
Test search functionality with non-default shard numbers.
This test verifies that:
1. Collections are created with default shard numbers when num_shards <= 0
2. Collections are created with specified shard numbers when num_shards > 0
3. Search operations work correctly with different shard configurations
This test verifies that search operations work correctly when collections are created with:
- Negative shard numbers (should use default)
- Zero shards (should use default)
- Half of max shards
- Max shards
The test follows these steps:
1. Creates a collection with specified shard numbers
The test performs the following steps:
1. Creates a collection with specified shard number
2. Inserts test data
3. Builds an index
4. Performs a search operation
5. Validates the results
3. Builds index
4. Loads collection
5. Executes search and verifies results
@param num_shards: Number of shards to test (parameterized)
@tags: L2
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
@ -1412,36 +1484,193 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
def test_search_HNSW_index_with_redundant_param(self):
@pytest.mark.parametrize('vector_dtype', ct.all_dense_vector_types)
@pytest.mark.parametrize('index', ct.all_index_types[:8])
def test_search_output_field_vector_with_dense_vextor_and_index(self, vector_dtype, index):
"""
Test search functionality with HNSW index and redundant parameters.
Test search with output vector field after different index types.
This test verifies that:
1. HNSW index can be created with redundant parameters
2. Search operations work correctly with redundant parameters
3. Redundant parameters are ignored
Steps:
1. Create a collection with specified schema and insert test data
2. Build index (with error handling for unsupported index types)
3. Load collection and perform search operations with:
- All output fields ("*")
- Explicitly specified all fields
- Subset of fields
4. Verify search results match expected output fields
The test performs following steps:
1. Creates a collection with float vectors
2. Inserts test data
3. Creates HNSW index with redundant parameters
4. Performs a search operation
5. Validates the results
Parameters:
- vector_dtype: Type of vector data (all supported dense vector types)
- index: Index type (first 8 supported index types)
Expected:
- Successful search operations with correct output fields returned
- Proper error when attempting unsupported index combinations
"""
dim = 16
index = "HNSW"
metrics = 'COSINE'
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
dim = 32
schema = self.create_schema(client)[0]
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
schema.add_field('vector', vector_dtype, dim=dim)
schema.add_field('float_array', DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=200)
schema.add_field('json_field', DataType.JSON, max_length=200)
schema.add_field('string_field', DataType.VARCHAR, max_length=200)
self.create_collection(client, collection_name, schema=schema)
# insert
# Insert data in 3 batches with unique primary keys using a loop
insert_times = 3
random_vectors = list(cf.gen_vectors(ct.default_nb*insert_times, dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(ct.default_nb*insert_times, dim, vector_data_type=vector_dtype)
for j in range(insert_times):
start_pk = j * ct.default_nb
rows = [{
"id": i + start_pk,
"vector": random_vectors[i + start_pk],
"float_array": [random.random() for _ in range(10)],
"json_field": {"name": "abook", "words": i},
"string_field": "Hello, Milvus!"
} for i in range(ct.default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# build index
index_params, _ = self.prepare_index_params(client)
index_params.add_index(field_name='vector', index_type=index,
metric_type=metrics,
params=cf.get_index_params_params(index_type=index))
if vector_dtype == DataType.INT8_VECTOR and index != 'HNSW':
# INT8_Vector only supports HNSW index for now
error = {"err_code": 999, "err_msg": f"data type Int8Vector can't build with this index {index}"}
self.create_index(client, collection_name, index_params=index_params,
check_task=CheckTasks.err_res, check_items=error)
else:
self.create_index(client, collection_name, index_params=index_params)
# load the collection with index
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
self.load_collection(client, collection_name)
# search with output field vector
search_params = {}
vectors = random_vectors[:ct.default_nq]
# search output all fields
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit,
"output_fields": ["id", "vector", "float_array", "json_field", "string_field"]})
# search output specify all fields
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=["id", "vector", "float_array", "json_field", "string_field"],
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit,
"output_fields": ["id", "vector", "float_array", "json_field", "string_field"]})
# search output specify some fields
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=["id", "vector", "json_field"],
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit,
"output_fields": ["id", "vector", "json_field"]})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize('index', ct.binary_supported_index_types)
def test_search_with_output_fields_vector_with_binary_vector_and_index(self, index):
"""
Test search functionality with output fields for binary vector type and specified index.
This test case verifies that:
1. A collection with binary vector field can be created and data inserted
2. Index can be built on the binary vector field
3. Search operation with output fields (including vector field) works correctly
4. Results contain expected output fields (id and vector)
Parameters:
index: The index type to test with (parametrized via pytest.mark.parametrize)
The test performs following steps:
- Creates collection with binary vector field
- Inserts test data in batches
- Builds specified index type
- Performs search with output fields
- Validates search results contain expected fields
"""
vector_dtype = DataType.BINARY_VECTOR
client = self._client()
dim = 32
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field("id", datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field("vector", datatype=vector_dtype, dim=dim)
self.create_collection(client, collection_name, schema=schema)
# Insert data in 3 batches with unique primary keys using a loop
insert_times = 3
random_vectors = list(cf.gen_vectors(ct.default_nb * insert_times, dim, vector_data_type=vector_dtype)) \
if vector_dtype == DataType.FLOAT_VECTOR \
else cf.gen_vectors(ct.default_nb * insert_times, dim, vector_data_type=vector_dtype)
for j in range(insert_times):
start_pk = j * ct.default_nb
rows = [{
"id": i + start_pk,
"vector": random_vectors[i + start_pk]
} for i in range(ct.default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# build index
index_params, _ = self.prepare_index_params(client)
index_params.add_index(field_name='vector', index_type=index,
metric_type='JACCARD',
params=cf.get_index_params_params(index_type=index))
self.create_index(client, collection_name, index_params=index_params)
# load the collection with index
assert self.wait_for_index_ready(client, collection_name, 'vector', timeout=120)
self.load_collection(client, collection_name)
# search with output field vector
search_params = {}
vectors = random_vectors[:ct.default_nq]
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit,
"output_fields": ["id", "vector"]})
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_output_fields_empty(self):
"""
target: test search with output fields
method: search with empty output_field
expected: search success
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
dim = 32
# create collection with fast mode
self.create_collection(client, collection_name, dimension=dim)
# insert data
data = []
for i in range(ct.default_nb):
data.append({
@ -1450,24 +1679,20 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
})
self.insert(client, collection_name, data)
self.flush(client, collection_name)
# create index
index_params = self.prepare_index_params(client)[0]
params = cf.get_index_params_params(index)
params["nlist"] = 100 # nlist is redundant parameter
index_params.add_index(field_name='vector', index_type=index,
metric_type='COSINE', params=params)
self.create_index(client, collection_name, index_params=index_params)
self.wait_for_index_ready(client, collection_name, index_name='vector')
index_info = self.describe_index(client, collection_name, index_name='vector')
assert index_info[0]["nlist"] == '100'
# load
self.load_collection(client, collection_name)
# search
vectors = cf.gen_vectors(ct.default_nq, dim)
# search with empty output fields
search_params = {}
vectors = cf.gen_vectors(ct.default_nq, dim)
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=[],
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit})
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=None,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": ct.default_nq,
"limit": ct.default_limit})

View File

@ -0,0 +1,95 @@
from pymilvus import DataType
from common import common_type as ct
success = "success"
class DISKANN:
supported_vector_types = [
DataType.FLOAT_VECTOR,
DataType.FLOAT16_VECTOR,
DataType.BFLOAT16_VECTOR
]
supported_metrics = ['L2', 'IP', 'COSINE']
build_params = [
# search_list_size
# Type: Integer Range: [1, int_max]
# Default value: 100
{"description": "Minimum Boundary Test", "params": {"search_list_size": 1}, "expected": success},
{"description": "Large Value Test", "params": {"search_list_size": 10000}, "expected": success},
{"description": "Out of Range Test - Negative", "params": {"search_list_size": -1}, "expected": success},
{"description": "String Type Test", "params": {"search_list_size": "100"}, "expected": success},
{"description": "Float Type Test", "params": {"search_list_size": 100.0}, "expected": success},
{"description": "Boolean Type Test", "params": {"search_list_size": True}, "expected": success},
{"description": "None Type Test", "params": {"search_list_size": None}, "expected": success},
# search_cache_budget_gb_ratio
# Type: Float Range: [0.0, 0.3)
# Default value: 0.10
# TODO: runt he minium bourndary test after issue #43176 fixed
# {"description": "Minimum Boundary Test", "params": {"search_cache_budget_gb_ratio": 0.0}, "expected": success},
{"description": "Maximum Boundary Test", "params": {"search_cache_budget_gb_ratio": 0.3}, "expected": success},
{"description": "Default value Test", "params": {"search_cache_budget_gb_ratio": 0.1}, "expected": success},
{"description": "Out of Range Test - Negative", "params": {"search_cache_budget_gb_ratio": -0.1}, "expected": success},
{"description": "Out of Range Test - Too Large", "params": {"search_cache_budget_gb_ratio": 0.31}, "expected": success},
{"description": "String Type Test", "params": {"search_cache_budget_gb_ratio": "0.2"}, "expected": success},
{"description": "Boolean Type Test", "params": {"search_cache_budget_gb_ratio": True}, "expected": success},
{"description": "None Type Test", "params": {"search_cache_budget_gb_ratio": None}, "expected": success},
# pq_code_budget_gb_ratio
# Type: Float Range: (0.0, 0.25]
# Default value: 0.125
{"description": "Minimum Boundary Test", "params": {"pq_code_budget_gb_ratio": 0.0001}, "expected": success},
{"description": "Maximum Boundary Test", "params": {"pq_code_budget_gb_ratio": 0.25}, "expected": success},
{"description": "Default value Test", "params": {"pq_code_budget_gb_ratio": 0.125}, "expected": success},
{"description": "Out of Range Test - Negative", "params": {"pq_code_budget_gb_ratio": -0.1}, "expected": success},
{"description": "Out of Range Test - Too Large", "params": {"pq_code_budget_gb_ratio": 0.26}, "expected": success},
{"description": "String Type Test", "params": {"pq_code_budget_gb_ratio": "0.1"}, "expected": success},
{"description": "Boolean Type Test", "params": {"pq_code_budget_gb_ratio": True}, "expected": success},
{"description": "None Type Test", "params": {"pq_code_budget_gb_ratio": None}, "expected": success},
# max_degree
# Type: Integer Range: [1, 512]
# Default value: 56
{"description": "Minimum Boundary Test", "params": {"max_degree": 1}, "expected": success},
{"description": "Maximum Boundary Test", "params": {"max_degree": 512}, "expected": success},
{"description": "Default value Test", "params": {"max_degree": 56}, "expected": success},
{"description": "Large Value Test", "params": {"max_degree": 128}, "expected": success},
{"description": "Out of Range Test - Negative", "params": {"max_degree": -1}, "expected": success},
{"description": "String Type Test", "params": {"max_degree": "32"}, "expected": success},
{"description": "Float Type Test", "params": {"max_degree": 32.0}, "expected": success},
{"description": "Boolean Type Test", "params": {"max_degree": True}, "expected": success},
{"description": "None Type Test", "params": {"max_degree": None}, "expected": success},
# 组合参数
{"description": "Optimal Performance Combination Test", "params": {"search_list_size": 100, "beamwidth": 10, "search_cache_budget_gb_ratio": 0.5, "pq_code_budget_gb_ratio": 0.5}, "expected": success},
{"description": "empty dict params", "params": {}, "expected": success},
{"description": "not_defined_param in the dict params", "params": {"search_list_size": 100, "not_defined_param": "nothing"}, "expected": success},
]
search_params = [
# beam_width_ratio
# Type: Float Range: [1, max(128 / CPU number, 16)]
# Default value: 4.0
{"description": "Minimum Boundary Test", "params": {"beam_width_ratio": 1.0}, "expected": success},
{"description": "Maximum Boundary Test", "params": {"beam_width_ratio": 16.0}, "expected": success},
{"description": "Default value Test", "params": {"beam_width_ratio": 4.0}, "expected": success},
{"description": "Out of Range Test - Negative", "params": {"beam_width_ratio": -0.1}, "expected": success},
{"description": "Out of Range Test - Too Large", "params": {"beam_width_ratio": 17.0}, "expected": success},
{"description": "String Type Test", "params": {"beam_width_ratio": "2.0"}, "expected": success},
{"description": "Boolean Type Test", "params": {"beam_width_ratio": True}, "expected": success},
{"description": "None Type Test", "params": {"beam_width_ratio": None}, "expected": success},
# search_list_size
# Type: Integer Range: [1, int_max]
# Default value: 100
{"description": "Minimum Boundary Test", "params": {"search_list_size": 1}, "expected": {"err_code": 999, "err_msg": "search_list_size(1) should be larger than k(10)"}},
{"description": "Large Value Test", "params": {"search_list_size": 1000}, "expected": success},
{"description": "Default value Test", "params": {"search_list_size": 100}, "expected": success},
{"description": "Out of Range Test - Negative", "params": {"search_list_size": -1}, "expected": {"err_code": 999, "err_msg": "param 'search_list_size' (-1) should be in range [1, 2147483647]"}},
{"description": "String Type Test", "params": {"search_list_size": "100"}, "expected": success},
{"description": "Float Type Test", "params": {"search_list_size": 100.0}, "expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'search_list_size' (100.0) should be integer"}},
{"description": "Boolean Type Test", "params": {"search_list_size": True}, "expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'search_list_size' (true) should be integer"}},
{"description": "None Type Test", "params": {"search_list_size": None}, "expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'search_list_size' (null) should be integer"}},
# mix params
{"description": "mix params", "params": {"search_list_size": 100, "beam_width_ratio": 0.5}, "expected": success},
{"description": "mix params", "params": {}, "expected": success},
]

View File

@ -0,0 +1,175 @@
from pymilvus import DataType
from common import common_type as ct
success = "success"
class HNSW:
supported_vector_types = [
DataType.FLOAT_VECTOR,
DataType.FLOAT16_VECTOR,
DataType.BFLOAT16_VECTOR,
DataType.INT8_VECTOR
]
supported_metrics = ['L2', 'IP', 'COSINE']
build_params = [
# M params test
{
"description": "Minimum Boundary Test",
"params": {"M": 2},
"expected": success
},
{
"description": "Maximum Boundary Test",
"params": {"M": 2048},
"expected": success
},
{
"description": "Out of Range Test - Negative",
"params": {"M": -1},
"expected": {"err_code": 999, "err_msg": "param 'M' (-1) should be in range [2, 2048]"}
},
{
"description": "Out of Range Test - Too Large",
"params": {"M": 2049},
"expected": {"err_code": 999, "err_msg": "param 'M' (2049) should be in range [2, 2048]"}
},
{
"description": "String Type Test will ignore the wrong type",
"params": {"M": "16"},
"expected": success
},
{
"description": "Float Type Test",
"params": {"M": 16.0},
"expected": {"err_code": 999, "err_msg": "wrong data type in json"}
},
{
"description": "Boolean Type Test",
"params": {"M": True},
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'M', value: 'True': invalid parameter"}
},
{
"description": "None Type Test, use default value",
"params": {"M": None},
"expected": success
},
{
"description": "List Type Test",
"params": {"M": [16]},
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'M', value: '[16]': invalid parameter"}
},
# efConstruction params test
{
"description": "Minimum Boundary Test",
"params": {"efConstruction": 1},
"expected": success
},
{
"description": "Large Value Test",
"params": {"efConstruction": 10000},
"expected": success
},
{
"description": "Out of Range Test - Negative",
"params": {"efConstruction": -1},
"expected": {"err_code": 999, "err_msg": "param 'efConstruction' (-1) should be in range [1, 2147483647]"}
},
{
"description": "String Type Test will ignore the wrong type",
"params": {"efConstruction": "100"},
"expected": success
},
{
"description": "Float Type Test",
"params": {"efConstruction": 100.0},
"expected": {"err_code": 999, "err_msg": "wrong data type in json"}
},
{
"description": "Boolean Type Test",
"params": {"efConstruction": True},
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'efConstruction', value: 'True': invalid parameter"}
},
{
"description": "None Type Test, use default value",
"params": {"efConstruction": None},
"expected": success
},
{
"description": "List Type Test",
"params": {"efConstruction": [100]},
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'efConstruction', value: '[100]': invalid parameter"}
},
# combination params test
{
"description": "Optimal Performance Combination Test",
"params": {"M": 16, "efConstruction": 200},
"expected": success
},
{
"description": "empty dict params",
"params": {},
"expected": success
},
{
"description": "not_defined_param in the dict params",
"params": {"M": 16, "efConstruction": 200, "not_defined_param": "nothing"},
"expected": success
},
]
search_params = [
# ef params test
{
"description": "Minimum Boundary Test",
"params": {"ef": 1},
"expected": {"err_code": 999, "err_msg": "ef(1) should be larger than k(10)"} # assume default limit=10
},
{
"description": "Large Value Test",
"params": {"ef": 10000},
"expected": success
},
{
"description": "Out of Range Test - Negative",
"params": {"ef": -1},
"expected": {"err_code": 999, "err_msg": "param 'ef' (-1) should be in range [1, 2147483647]"}
},
{
"description": "String Type Test, not check data type",
"params": {"ef": "32"},
"expected": success
},
{
"description": "Float Type Test",
"params": {"ef": 32.0},
"expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'ef' (32.0) should be integer"}
},
{
"description": "Boolean Type Test",
"params": {"ef": True},
"expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'ef' (true) should be integer"}
},
{
"description": "None Type Test",
"params": {"ef": None},
"expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'ef' (null) should be integer"}
},
{
"description": "List Type Test",
"params": {"ef": [32]},
"expected": {"err_code": 999, "err_msg": "param 'ef' ([32]) should be integer"}
},
# combination params test
{
"description": "Optimal Performance Combination Test",
"params": {"ef": 64},
"expected": success
},
{
"description": "empty dict params",
"params": {},
"expected": success
},
]

View File

@ -0,0 +1,229 @@
import logging
from utils.util_pymilvus import *
from common.common_type import CaseLabel, CheckTasks
from common import common_type as ct
from common import common_func as cf
from base.client_v2_base import TestMilvusClientV2Base
import pytest
from idx_diskann import DISKANN
index_type = "DISKANN"
success = "success"
pk_field_name = 'id'
vector_field_name = 'vector'
dim = ct.default_dim
default_nb = 2000
default_build_params = {"search_list_size": 100, "beamwidth": 10, "pq_code_budget_gb": 1.0, "num_threads": 8, "max_degree": 64, "indexing_list_size": 100, "build_dram_budget_gb": 2.0, "search_dram_budget_gb": 1.0}
default_search_params = {"search_list_size": 100, "beamwidth": 10, "search_dram_budget_gb": 1.0}
class TestDiskannBuildParams(TestMilvusClientV2Base):
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("params", DISKANN.build_params)
def test_diskann_build_params(self, params):
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
self.create_collection(client, collection_name, schema=schema)
insert_times = 2
random_vectors = list(cf.gen_vectors(default_nb * insert_times, dim, vector_data_type=DataType.FLOAT_VECTOR))
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
pk_field_name: i + start_pk,
vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
build_params = params.get("params", None)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=vector_field_name,
metric_type=cf.get_default_metric_for_vector_type(vector_type=DataType.FLOAT_VECTOR),
index_type=index_type,
params=build_params)
if params.get("expected", None) != success:
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res,
check_items=params.get("expected"))
else:
self.create_index(client, collection_name, index_params)
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
self.load_collection(client, collection_name)
nq = 2
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
self.search(client, collection_name, search_vectors,
search_params=default_search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})
idx_info = client.describe_index(collection_name, vector_field_name)
if build_params is not None:
for key, value in build_params.items():
if value is not None:
assert key in idx_info.keys()
assert str(value) == idx_info[key]
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("vector_data_type", ct.all_vector_types)
def test_diskann_on_all_vector_types(self, vector_data_type):
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
if vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
schema.add_field(vector_field_name, datatype=vector_data_type)
else:
schema.add_field(vector_field_name, datatype=vector_data_type, dim=dim)
self.create_collection(client, collection_name, schema=schema)
insert_times = 2
random_vectors = list(cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_data_type)) \
if vector_data_type == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_data_type)
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
pk_field_name: i + start_pk,
vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
index_params = self.prepare_index_params(client)[0]
metric_type = cf.get_default_metric_for_vector_type(vector_data_type)
index_params.add_index(field_name=vector_field_name,
metric_type=metric_type,
index_type=index_type,
**default_build_params)
if vector_data_type not in DISKANN.supported_vector_types:
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res,
check_items={"err_code": 999,
"err_msg": f"can't build with this index DISKANN: invalid parameter"})
else:
self.create_index(client, collection_name, index_params)
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
self.load_collection(client, collection_name)
nq = 2
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=vector_data_type)
self.search(client, collection_name, search_vectors,
search_params=default_search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metric", DISKANN.supported_metrics)
def test_diskann_on_all_metrics(self, metric):
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
self.create_collection(client, collection_name, schema=schema)
insert_times = 2
random_vectors = list(cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=DataType.FLOAT_VECTOR))
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
pk_field_name: i + start_pk,
vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=vector_field_name,
metric_type=metric,
index_type=index_type,
**default_build_params)
self.create_index(client, collection_name, index_params)
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
self.load_collection(client, collection_name)
nq = 2
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
self.search(client, collection_name, search_vectors,
search_params=default_search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})
@pytest.mark.xdist_group("TestDiskannSearchParams")
class TestDiskannSearchParams(TestMilvusClientV2Base):
def setup_class(self):
super().setup_class(self)
self.collection_name = "TestDiskannSearchParams" + cf.gen_unique_str("_")
self.float_vector_field_name = vector_field_name
self.float_vector_dim = dim
self.primary_keys = []
self.enable_dynamic_field = False
self.datas = []
@pytest.fixture(scope="class", autouse=True)
def prepare_collection(self, request):
client = self._client()
collection_schema = self.create_schema(client)[0]
collection_schema.add_field(pk_field_name, DataType.INT64, is_primary=True, auto_id=False)
collection_schema.add_field(self.float_vector_field_name, DataType.FLOAT_VECTOR, dim=128)
self.create_collection(client, self.collection_name, schema=collection_schema,
enable_dynamic_field=self.enable_dynamic_field, force_teardown=False)
insert_times = 2
float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim,
vector_data_type=DataType.FLOAT_VECTOR)
for j in range(insert_times):
rows = []
for i in range(default_nb):
pk = i + j * default_nb
row = {
pk_field_name: pk,
self.float_vector_field_name: list(float_vectors[pk])
}
self.datas.append(row)
rows.append(row)
self.insert(client, self.collection_name, data=rows)
self.primary_keys.extend([i + j * default_nb for i in range(default_nb)])
self.flush(client, self.collection_name)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=self.float_vector_field_name,
metric_type="COSINE",
index_type=index_type,
params=default_build_params)
self.create_index(client, self.collection_name, index_params=index_params)
self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name)
self.load_collection(client, self.collection_name)
def teardown():
self.drop_collection(self._client(), self.collection_name)
request.addfinalizer(teardown)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("params", DISKANN.search_params)
def test_diskann_search_params(self, params):
client = self._client()
collection_name = self.collection_name
nq = 2
search_vectors = cf.gen_vectors(nq, dim=self.float_vector_dim, vector_data_type=DataType.FLOAT_VECTOR)
search_params = params.get("params", None)
if params.get("expected", None) != success:
self.search(client, collection_name, search_vectors,
search_params=search_params,
limit=ct.default_limit,
check_task=CheckTasks.err_res,
check_items=params.get("expected"))
else:
self.search(client, collection_name, search_vectors,
search_params=search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})

View File

@ -0,0 +1,273 @@
import logging
from utils.util_pymilvus import *
from common.common_type import CaseLabel, CheckTasks
from common import common_type as ct
from common import common_func as cf
from base.client_v2_base import TestMilvusClientV2Base
import pytest
from idx_hnsw import HNSW
index_type = "HNSW"
success = "success"
pk_field_name = 'id'
vector_field_name = 'vector'
dim = ct.default_dim
default_nb = 2000
default_build_params = {"M": 16, "efConstruction": 200}
default_search_params = {"ef": 64}
class TestHnswBuildParams(TestMilvusClientV2Base):
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("params", HNSW.build_params)
def test_hnsw_build_params(self, params):
"""
Test the build params of HNSW index
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
self.create_collection(client, collection_name, schema=schema)
# Insert data in 2 batches with unique primary keys
insert_times = 2
random_vectors = list(cf.gen_vectors(default_nb * insert_times, dim, vector_data_type=DataType.FLOAT_VECTOR))
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
pk_field_name: i + start_pk,
vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# create index
build_params = params.get("params", None)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=vector_field_name,
metric_type=cf.get_default_metric_for_vector_type(vector_type=DataType.FLOAT_VECTOR),
index_type=index_type,
params=build_params)
# build index
if params.get("expected", None) != success:
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res,
check_items=params.get("expected"))
else:
self.create_index(client, collection_name, index_params)
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
# load collection
self.load_collection(client, collection_name)
# search
nq = 2
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
self.search(client, collection_name, search_vectors,
search_params=default_search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})
# verify the index params are persisted
idx_info = client.describe_index(collection_name, vector_field_name)
if build_params is not None:
for key, value in build_params.items():
if value is not None:
assert key in idx_info.keys()
assert str(value) in idx_info.values()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("vector_data_type", ct.all_vector_types)
def test_hnsw_on_all_vector_types(self, vector_data_type):
"""
Test HNSW index on all the vector types and metrics
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
if vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
schema.add_field(vector_field_name, datatype=vector_data_type)
else:
schema.add_field(vector_field_name, datatype=vector_data_type, dim=dim)
self.create_collection(client, collection_name, schema=schema)
# Insert data in 2 batches with unique primary keys
insert_times = 2
random_vectors = list(cf.gen_vectors(default_nb*insert_times, dim, vector_data_type=vector_data_type)) \
if vector_data_type == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb*insert_times, dim, vector_data_type=vector_data_type)
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
pk_field_name: i + start_pk,
vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# create index
index_params = self.prepare_index_params(client)[0]
metric_type = cf.get_default_metric_for_vector_type(vector_data_type)
index_params.add_index(field_name=vector_field_name,
metric_type=metric_type,
index_type=index_type,
M=16,
efConstruction=200)
if vector_data_type not in HNSW.supported_vector_types:
self.create_index(client, collection_name, index_params,
check_task=CheckTasks.err_res,
check_items={"err_code": 999,
"err_msg": f"can't build with this index HNSW: invalid parameter"})
else:
self.create_index(client, collection_name, index_params)
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
# load collection
self.load_collection(client, collection_name)
# search
nq = 2
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=vector_data_type)
self.search(client, collection_name, search_vectors,
search_params=default_search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metric", HNSW.supported_metrics)
def test_hnsw_on_all_metrics(self, metric):
"""
Test the search params of HNSW index
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema, _ = self.create_schema(client)
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
self.create_collection(client, collection_name, schema=schema)
# insert data
insert_times = 2
random_vectors = list(cf.gen_vectors(default_nb*insert_times, dim, vector_data_type=DataType.FLOAT_VECTOR))
for j in range(insert_times):
start_pk = j * default_nb
rows = [{
pk_field_name: i + start_pk,
vector_field_name: random_vectors[i + start_pk]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# create index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=vector_field_name,
metric_type=metric,
index_type=index_type,
M=16,
efConstruction=200)
self.create_index(client, collection_name, index_params)
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
# load collection
self.load_collection(client, collection_name)
# search
nq = 2
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
self.search(client, collection_name, search_vectors,
search_params=default_search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})
@pytest.mark.xdist_group("TestHnswSearchParams")
class TestHnswSearchParams(TestMilvusClientV2Base):
"""Test search with pagination functionality for HNSW index"""
def setup_class(self):
super().setup_class(self)
self.collection_name = "TestHnswSearchParams" + cf.gen_unique_str("_")
self.float_vector_field_name = vector_field_name
self.float_vector_dim = dim
self.primary_keys = []
self.enable_dynamic_field = False
self.datas = []
@pytest.fixture(scope="class", autouse=True)
def prepare_collection(self, request):
"""
Initialize collection before test class runs
"""
client = self._client()
collection_schema = self.create_schema(client)[0]
collection_schema.add_field(pk_field_name, DataType.INT64, is_primary=True, auto_id=False)
collection_schema.add_field(self.float_vector_field_name, DataType.FLOAT_VECTOR, dim=128)
self.create_collection(client, self.collection_name, schema=collection_schema,
enable_dynamic_field=self.enable_dynamic_field, force_teardown=False)
insert_times = 2
float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim,
vector_data_type=DataType.FLOAT_VECTOR)
for j in range(insert_times):
rows = []
for i in range(default_nb):
pk = i + j * default_nb
row = {
pk_field_name: pk,
self.float_vector_field_name: list(float_vectors[pk])
}
self.datas.append(row)
rows.append(row)
self.insert(client, self.collection_name, data=rows)
self.primary_keys.extend([i + j * default_nb for i in range(default_nb)])
self.flush(client, self.collection_name)
# Create HNSW index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=self.float_vector_field_name,
metric_type="COSINE",
index_type=index_type,
params=default_build_params)
self.create_index(client, self.collection_name, index_params=index_params)
self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name)
self.load_collection(client, self.collection_name)
def teardown():
self.drop_collection(self._client(), self.collection_name)
request.addfinalizer(teardown)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("params", HNSW.search_params)
def test_hnsw_search_params(self, params):
"""
Test the search params of HNSW index
"""
client = self._client()
collection_name = self.collection_name
nq = 2
search_vectors = cf.gen_vectors(nq, dim=self.float_vector_dim, vector_data_type=DataType.FLOAT_VECTOR)
search_params = params.get("params", None)
if params.get("expected", None) != success:
self.search(client, collection_name, search_vectors,
search_params=search_params,
limit=ct.default_limit,
check_task=CheckTasks.err_res,
check_items=params.get("expected"))
else:
self.search(client, collection_name, search_vectors,
search_params=search_params,
limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": nq,
"limit": ct.default_limit,
"pk_name": pk_field_name})

View File

@ -527,7 +527,7 @@ class TestInsertOperation(TestcaseBase):
expected: error raised
"""
collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
nb = 1
nb = 10
data = []
fields = collection_w.schema.fields
for field in fields:
@ -747,12 +747,9 @@ class TestInsertOperation(TestcaseBase):
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_default_collection_schema(primary_field=pk_field, auto_id=True)
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
data = []
nb = 100
for field in collection_w.schema.fields:
field_data = cf.gen_data_by_collection_field(field, nb=nb)
if field.name != pk_field:
data.append(field_data)
data = cf.gen_column_data_by_schema(nb=nb, schema=collection_w.schema)
collection_w.insert(data=data)
assert collection_w.num_entities == nb
@ -1246,7 +1243,7 @@ class TestInsertInvalid(TestcaseBase):
primary_field=primary_field, is_index=False,
is_all_data_type=True, with_json=True)[0]
nb = 100
data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
data = cf.gen_column_data_by_schema(schema=collection_w.schema, nb=nb)
for dirty_i in [0, nb // 2, nb - 1]: # check the dirty data at first, middle and last
log.debug(f"dirty_i: {dirty_i}")
for i in range(len(data)):
@ -2194,7 +2191,7 @@ class TestUpsertInvalid(TestcaseBase):
primary_field=primary_field, is_index=False,
is_all_data_type=True, with_json=True)[0]
nb = 100
data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
data = cf.gen_column_data_by_schema(schema=collection_w.schema, nb=nb)
for dirty_i in [0, nb // 2, nb - 1]: # check the dirty data at first, middle and last
log.debug(f"dirty_i: {dirty_i}")
for i in range(len(data)):

View File

@ -128,7 +128,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
@ -359,7 +359,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
@ -696,7 +696,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
@ -1022,7 +1022,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
@ -1438,7 +1438,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
@ -1796,7 +1796,7 @@ class TestBitmapIndexMmap(TestCaseClassBase):
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
@ -2519,7 +2519,6 @@ class TestGroupSearch(TestCaseClassBase):
output_fields=[DataType.VARCHAR.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq, "limit": ct.default_limit})[0]
print(res)
for i in range(ct.default_nq):
group_values = []
for l in range(ct.default_limit):
@ -2542,6 +2541,31 @@ class TestGroupSearch(TestCaseClassBase):
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq, "limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
def test_hybrid_search_group_by_empty_results(self):
"""
verify hybrid search group by works if group by empty results
"""
# 3. prepare search params
req_list = []
for i in range(len(self.vector_fields)):
search_param = {
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i],
vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap,
self.vector_fields[i])),
"anns_field": self.vector_fields[i],
"param": {},
"limit": ct.default_limit,
"expr": f"{self.primary_field} < 0"} # make sure return empty results
req = AnnSearchRequest(**search_param)
req_list.append(req)
# 4. hybrid search group by empty resutls
self.collection_wrap.hybrid_search(req_list, WeightedRanker(0.1, 0.9, 0.2, 0.3), ct.default_limit,
group_by_field=DataType.VARCHAR.name,
output_fields=[DataType.VARCHAR.name],
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq, "limit": 0})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("support_field", [DataType.INT8.name, DataType.INT64.name,
DataType.BOOL.name, DataType.VARCHAR.name])

View File

@ -1387,8 +1387,8 @@ class TestUtilityAdvanced(TestcaseBase):
)
for _ in range(segment_num):
# insert random pks, ***start=None will generate random data***
data = cf.gen_values(self.collection_wrap.schema, nb=nb, start_id=None)
# insert random pks
data = cf.gen_values(self.collection_wrap.schema, nb=nb, random_pk=True)
self.collection_wrap.insert(data)
self.collection_wrap.flush()
@ -1443,15 +1443,14 @@ class TestUtilityAdvanced(TestcaseBase):
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(ct.default_float_vec_field_name))
self.collection_wrap.load()
# insert random pks, ***start=None will generate random data***
data = cf.gen_values(self.collection_wrap.schema, nb=nb, start_id=None)
# insert random pks ***
data = cf.gen_values(self.collection_wrap.schema, nb=nb, random_pk=True)
self.collection_wrap.insert(data)
# get_query_segment_info and verify results
res_sealed, _ = self.utility_wrap.get_query_segment_info(collection_name)
assert len(res_sealed) == 0
@pytest.mark.tags(CaseLabel.L1)
def test_get_sealed_query_segment_info_after_create_index(self):
"""