mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-07 17:48:29 +08:00
test: Refactor diskann and hsnw index, and update gen data functions (#43452)
related issue #40698 1. add diskann and hnsw index test 2. update gen_row_data and gen_column_data functions --------- Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
parent
e9ab73e93d
commit
abb3aeacdf
@ -360,6 +360,18 @@ class TestMilvusClientV2Base(Base):
|
||||
collection_name=collection_name, **kwargs).run()
|
||||
return res, check_result
|
||||
|
||||
@trace()
|
||||
def refresh_load(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
|
||||
timeout = TIMEOUT if timeout is None else timeout
|
||||
kwargs.update({"timeout": timeout})
|
||||
|
||||
func_name = sys._getframe().f_code.co_name
|
||||
res, check = api_request([client.refresh_load, collection_name], **kwargs)
|
||||
check_result = ResponseChecker(res, func_name, check_task,
|
||||
check_items, check,
|
||||
collection_name=collection_name, **kwargs).run()
|
||||
return res, check_result
|
||||
|
||||
@trace()
|
||||
def release_collection(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
|
||||
timeout = TIMEOUT if timeout is None else timeout
|
||||
|
||||
@ -247,17 +247,12 @@ class ResponseChecker:
|
||||
raise Exception("No expect values found in the check task")
|
||||
if check_items.get("collection_name", None) is not None:
|
||||
assert res["collection_name"] == check_items.get("collection_name")
|
||||
if check_items.get("auto_id", False):
|
||||
assert res["auto_id"] == check_items.get("auto_id")
|
||||
if check_items.get("num_shards", 1):
|
||||
assert res["num_shards"] == check_items.get("num_shards", 1)
|
||||
if check_items.get("consistency_level", 2):
|
||||
assert res["consistency_level"] == check_items.get("consistency_level", 2)
|
||||
if check_items.get("enable_dynamic_field", True):
|
||||
assert res["enable_dynamic_field"] == check_items.get("enable_dynamic_field", True)
|
||||
if check_items.get("num_partitions", 1):
|
||||
assert res["num_partitions"] == check_items.get("num_partitions", 1)
|
||||
if check_items.get("id_name", "id"):
|
||||
assert res["auto_id"] == check_items.get("auto_id", False)
|
||||
assert res["num_shards"] == check_items.get("num_shards", 1)
|
||||
assert res["consistency_level"] == check_items.get("consistency_level", 0)
|
||||
assert res["enable_dynamic_field"] == check_items.get("enable_dynamic_field", True)
|
||||
assert res["num_partitions"] == check_items.get("num_partitions", 1)
|
||||
if check_items.get("id_name", None):
|
||||
assert res["fields"][0]["name"] == check_items.get("id_name", "id")
|
||||
if check_items.get("vector_name", "vector"):
|
||||
vector_name_list = []
|
||||
@ -474,9 +469,9 @@ class ResponseChecker:
|
||||
elif check_items.get("metric", None) is not None:
|
||||
# verify the distances are already sorted
|
||||
if check_items.get("metric").upper() in ["IP", "COSINE", "BM25"]:
|
||||
assert distances == sorted(distances, reverse=True)
|
||||
assert pc.compare_lists_with_epsilon_ignore_dict_order(distances, sorted(distances, reverse=True))
|
||||
else:
|
||||
assert distances == sorted(distances, reverse=False)
|
||||
assert pc.compare_lists_with_epsilon_ignore_dict_order(distances, sorted(distances, reverse=False))
|
||||
if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None:
|
||||
log.debug("skip distance check for knowhere does not return the precise distances")
|
||||
else:
|
||||
@ -484,9 +479,9 @@ class ResponseChecker:
|
||||
else:
|
||||
pass # just check nq and topk, not specific ids need check
|
||||
nq_i += 1
|
||||
|
||||
log.info("search_results_check: limit (topK) and "
|
||||
"ids searched for %d queries are correct" % len(search_res))
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
@ -600,7 +595,7 @@ class ResponseChecker:
|
||||
if isinstance(query_res, list):
|
||||
# assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, with_vec=with_vec)
|
||||
# return True
|
||||
assert pc.compare_lists_ignore_order(a=query_res, b=exp_res)
|
||||
assert pc.compare_lists_with_epsilon_ignore_dict_order(a=query_res, b=exp_res)
|
||||
return True
|
||||
else:
|
||||
log.error(f"Query result {query_res} is not list")
|
||||
|
||||
@ -69,7 +69,7 @@ def deep_approx_compare(x, y, epsilon=epsilon):
|
||||
return x == y
|
||||
|
||||
|
||||
def compare_lists_ignore_order(a, b, epsilon=epsilon):
|
||||
def compare_lists_with_epsilon_ignore_dict_order(a, b, epsilon=epsilon):
|
||||
"""
|
||||
Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance.
|
||||
|
||||
|
||||
@ -1654,20 +1654,6 @@ def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, star
|
||||
|
||||
return df, binary_raw_values
|
||||
|
||||
#
|
||||
# def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
|
||||
# int_values = [i for i in range(start, start + nb)]
|
||||
# float_values = [np.float32(i) for i in range(start, start + nb)]
|
||||
# string_values = [str(i) for i in range(start, start + nb)]
|
||||
# json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
|
||||
# for i in range(start, start + nb)]
|
||||
# float_vec_values = gen_vectors(nb, dim)
|
||||
# if with_json is False:
|
||||
# data = [int_values, float_values, string_values, float_vec_values]
|
||||
# else:
|
||||
# data = [int_values, float_values, string_values, json_values, float_vec_values]
|
||||
# return data
|
||||
|
||||
|
||||
def gen_default_list_sparse_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=False):
|
||||
int_values = [i for i in range(start, start + nb)]
|
||||
@ -1728,56 +1714,122 @@ def prepare_bulk_insert_data(schema=None,
|
||||
return files
|
||||
|
||||
|
||||
def get_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False, start=None):
|
||||
def gen_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False, start=0):
|
||||
return get_column_data_by_schema(nb=nb, schema=schema, skip_vectors=skip_vectors, start=start)
|
||||
|
||||
|
||||
def get_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False, start=0, random_pk=False):
|
||||
"""
|
||||
Generates column data based on the given schema.
|
||||
|
||||
Args:
|
||||
nb (int): Number of rows to generate. Defaults to ct.default_nb.
|
||||
schema (Schema): Collection schema. If None, uses default schema.
|
||||
skip_vectors (bool): Whether to skip vector fields. Defaults to False.
|
||||
start (int): Starting value for primary key fields (default: 0)
|
||||
random_pk (bool, optional): Whether to generate random primary key values (default: False)
|
||||
|
||||
Returns:
|
||||
list: List of column data arrays matching the schema fields (excluding auto_id fields).
|
||||
"""
|
||||
if schema is None:
|
||||
schema = gen_default_collection_schema()
|
||||
fields = schema.fields
|
||||
fields_not_auto_id = []
|
||||
fields_to_gen = []
|
||||
for field in fields:
|
||||
if not field.auto_id:
|
||||
fields_not_auto_id.append(field)
|
||||
if not field.auto_id and not field.is_function_output:
|
||||
fields_to_gen.append(field)
|
||||
data = []
|
||||
for field in fields_not_auto_id:
|
||||
if field.dtype == DataType.FLOAT_VECTOR and skip_vectors is True:
|
||||
for field in fields_to_gen:
|
||||
if field.dtype in ct.all_vector_types and skip_vectors is True:
|
||||
tmp = []
|
||||
else:
|
||||
tmp = gen_data_by_collection_field(field, nb=nb, start=start)
|
||||
tmp = gen_data_by_collection_field(field, nb=nb, start=start, random_pk=random_pk)
|
||||
data.append(tmp)
|
||||
return data
|
||||
|
||||
|
||||
def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=None):
|
||||
def gen_row_data_by_schema(nb=ct.default_nb, schema=None, start=0, random_pk=False):
|
||||
"""
|
||||
Generates row data based on the given schema.
|
||||
|
||||
Args:
|
||||
nb (int): Number of rows to generate. Defaults to ct.default_nb.
|
||||
schema (Schema): Collection schema or collection info. If None, uses default schema.
|
||||
start (int): Starting value for primary key fields. Defaults to 0.
|
||||
random_pk (bool, optional): Whether to generate random primary key values (default: False)
|
||||
|
||||
Returns:
|
||||
list[dict]: List of dictionaries where each dictionary represents a row,
|
||||
with field names as keys and generated data as values.
|
||||
|
||||
Notes:
|
||||
- Skips auto_id fields and function output fields.
|
||||
- For primary key fields, generates sequential values starting from 'start'.
|
||||
- For non-primary fields, generates random data based on field type.
|
||||
"""
|
||||
if schema is None:
|
||||
schema = gen_default_collection_schema()
|
||||
|
||||
# ignore auto id field and the fields in function output
|
||||
func_output_fields = []
|
||||
if hasattr(schema, "functions"):
|
||||
functions = schema.functions
|
||||
if isinstance(schema, dict):
|
||||
# a dict of collection schema info is usually from client.describe_collection()
|
||||
fields = schema.get('fields', [])
|
||||
functions = schema.get('functions', [])
|
||||
for func in functions:
|
||||
output_field_names = func.output_field_names
|
||||
output_field_names = func.get('output_field_names', [])
|
||||
func_output_fields.extend(output_field_names)
|
||||
func_output_fields = list(set(func_output_fields))
|
||||
fields = schema.fields
|
||||
fields_needs_data = []
|
||||
for field in fields:
|
||||
if field.auto_id:
|
||||
continue
|
||||
if field.name in func_output_fields:
|
||||
continue
|
||||
fields_needs_data.append(field)
|
||||
data = []
|
||||
for i in range(nb):
|
||||
tmp = {}
|
||||
for field in fields_needs_data:
|
||||
tmp[field.name] = gen_data_by_collection_field(field)
|
||||
if start is not None and field.dtype == DataType.INT64:
|
||||
tmp[field.name] = start
|
||||
start += 1
|
||||
if field.nullable is True:
|
||||
# 10% percent of data is null
|
||||
if random.random() < 0.1:
|
||||
tmp[field.name] = None
|
||||
data.append(tmp)
|
||||
func_output_fields = list(set(func_output_fields))
|
||||
|
||||
fields_needs_data = []
|
||||
for field in fields:
|
||||
if field.get('auto_id', False):
|
||||
continue
|
||||
if field.get('name', None) in func_output_fields:
|
||||
continue
|
||||
fields_needs_data.append(field)
|
||||
data = []
|
||||
for i in range(nb):
|
||||
tmp = {}
|
||||
for field in fields_needs_data:
|
||||
tmp[field.get('name', None)] = gen_data_by_collection_field(field, random_pk=random_pk)
|
||||
if field.get('is_primary', False) is True and field.get('type', None) == DataType.INT64:
|
||||
tmp[field.get('name', None)] = start
|
||||
start += 1
|
||||
if field.get('is_primary', False) is True and field.get('type', None) == DataType.VARCHAR:
|
||||
tmp[field.get('name', None)] = str(start)
|
||||
start += 1
|
||||
data.append(tmp)
|
||||
else:
|
||||
# a schema object is usually form orm schema object
|
||||
fields = schema.fields
|
||||
if hasattr(schema, "functions"):
|
||||
functions = schema.functions
|
||||
for func in functions:
|
||||
output_field_names = func.output_field_names
|
||||
func_output_fields.extend(output_field_names)
|
||||
func_output_fields = list(set(func_output_fields))
|
||||
|
||||
fields_needs_data = []
|
||||
for field in fields:
|
||||
if field.auto_id:
|
||||
continue
|
||||
if field.name in func_output_fields:
|
||||
continue
|
||||
fields_needs_data.append(field)
|
||||
data = []
|
||||
for i in range(nb):
|
||||
tmp = {}
|
||||
for field in fields_needs_data:
|
||||
tmp[field.name] = gen_data_by_collection_field(field, random_pk=random_pk)
|
||||
if field.is_primary is True and field.dtype == DataType.INT64:
|
||||
tmp[field.name] = start
|
||||
start += 1
|
||||
if field.is_primary is True and field.dtype == DataType.VARCHAR:
|
||||
tmp[field.name] = str(start)
|
||||
start += 1
|
||||
data.append(tmp)
|
||||
return data
|
||||
|
||||
|
||||
@ -1957,6 +2009,7 @@ def get_dense_anns_field_name_list(schema=None):
|
||||
anns_fields.append(item)
|
||||
return anns_fields
|
||||
|
||||
|
||||
def gen_varchar_data(length: int, nb: int, text_mode=False):
|
||||
if text_mode:
|
||||
return [fake.text() for _ in range(nb)]
|
||||
@ -1964,164 +2017,222 @@ def gen_varchar_data(length: int, nb: int, text_mode=False):
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
|
||||
|
||||
|
||||
def gen_data_by_collection_field(field, nb=None, start=None):
|
||||
# if nb is None, return one data, else return a list of data
|
||||
nullable = field.nullable
|
||||
if nullable is True:
|
||||
if random.random() < 0.1:
|
||||
return None
|
||||
data_type = field.dtype
|
||||
enable_analyzer = field.params.get("enable_analyzer", False)
|
||||
def gen_data_by_collection_field(field, nb=None, start=0, random_pk=False):
|
||||
"""
|
||||
Generates test data for a given collection field based on its data type and properties.
|
||||
|
||||
Args:
|
||||
field (dict or Field): Field information, either as a dictionary (v2 client) or Field object (ORM client)
|
||||
nb (int, optional): Bumber of data batch to generate. If None, returns a single value which usually used by row data generation
|
||||
start (int, optional): Starting value for primary key fields (default: 0)
|
||||
random_pk (bool, optional): Whether to generate random primary key values (default: False)
|
||||
Returns:
|
||||
Single value if nb is None, otherwise returns a list of generated values
|
||||
|
||||
Notes:
|
||||
- Handles various data types including primitive types, vectors, arrays and JSON
|
||||
- For nullable fields, generates None values approximately 20% of the time
|
||||
- Special handling for primary key fields (sequential values)
|
||||
- For varchar field, use min(20, max_length) to gen data
|
||||
- For vector fields, generates random vectors of specified dimension
|
||||
- For array fields, generates arrays filled with random values of element type
|
||||
"""
|
||||
|
||||
if isinstance(field, dict):
|
||||
# for v2 client, it accepts a dict of field info
|
||||
nullable = field.get('nullable', False)
|
||||
data_type = field.get('type', None)
|
||||
enable_analyzer = field.get('params').get("enable_analyzer", False)
|
||||
is_primary = field.get('is_primary', False)
|
||||
else:
|
||||
# for ORM client, it accepts a field object
|
||||
nullable = field.nullable
|
||||
data_type = field.dtype
|
||||
enable_analyzer = field.params.get("enable_analyzer", False)
|
||||
is_primary = field.is_primary
|
||||
|
||||
# generate data according to the data type
|
||||
if data_type == DataType.BOOL:
|
||||
if nb is None:
|
||||
return random.choice([True, False])
|
||||
return [random.choice([True, False]) for _ in range(nb)]
|
||||
if data_type == DataType.INT8:
|
||||
return random.choice([True, False]) if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [random.choice([True, False]) for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.choice([True, False]) for i in range(nb)]
|
||||
elif data_type == DataType.INT8:
|
||||
if nb is None:
|
||||
return random.randint(-128, 127)
|
||||
return [random.randint(-128, 127) for _ in range(nb)]
|
||||
if data_type == DataType.INT16:
|
||||
return random.randint(-128, 127) if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [random.randint(-128, 127) for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-128, 127) for i in range(nb)]
|
||||
elif data_type == DataType.INT16:
|
||||
if nb is None:
|
||||
return random.randint(-32768, 32767)
|
||||
return [random.randint(-32768, 32767) for _ in range(nb)]
|
||||
if data_type == DataType.INT32:
|
||||
return random.randint(-32768, 32767) if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [random.randint(-32768, 32767) for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-32768, 32767) for i in range(nb)]
|
||||
elif data_type == DataType.INT32:
|
||||
if nb is None:
|
||||
return random.randint(-2147483648, 2147483647)
|
||||
return [random.randint(-2147483648, 2147483647) for _ in range(nb)]
|
||||
if data_type == DataType.INT64:
|
||||
return random.randint(-2147483648, 2147483647) if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [random.randint(-2147483648, 2147483647) for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-2147483648, 2147483647) for i in range(nb)]
|
||||
elif data_type == DataType.INT64:
|
||||
if nb is None:
|
||||
return random.randint(-9223372036854775808, 9223372036854775807)
|
||||
if start is not None:
|
||||
return [i for i in range(start, start+nb)]
|
||||
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(nb)]
|
||||
if data_type == DataType.FLOAT:
|
||||
return random.randint(-9223372036854775808, 9223372036854775807) if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
if is_primary is True and random_pk is False:
|
||||
return [i for i in range(start, start+nb)]
|
||||
else:
|
||||
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-9223372036854775808, 9223372036854775807) for i in range(nb)]
|
||||
elif data_type == DataType.FLOAT:
|
||||
if nb is None:
|
||||
return np.float32(random.random())
|
||||
return [np.float32(random.random()) for _ in range(nb)]
|
||||
if data_type == DataType.DOUBLE:
|
||||
return np.float32(random.random()) if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [np.float32(random.random()) for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else np.float32(random.random()) for i in range(nb)]
|
||||
elif data_type == DataType.DOUBLE:
|
||||
if nb is None:
|
||||
return np.float64(random.random())
|
||||
return [np.float64(random.random()) for _ in range(nb)]
|
||||
if data_type == DataType.VARCHAR:
|
||||
max_length = field.params['max_length']
|
||||
return np.float64(random.random()) if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [np.float64(random.random()) for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else np.float64(random.random()) for i in range(nb)]
|
||||
elif data_type == DataType.VARCHAR:
|
||||
if isinstance(field, dict):
|
||||
max_length = field.get('params')['max_length']
|
||||
else:
|
||||
max_length = field.params['max_length']
|
||||
max_length = min(20, max_length-1)
|
||||
length = random.randint(0, max_length)
|
||||
if nb is None:
|
||||
return gen_varchar_data(length=length, nb=1, text_mode=enable_analyzer)[0]
|
||||
return gen_varchar_data(length=length, nb=nb, text_mode=enable_analyzer)
|
||||
if data_type == DataType.JSON:
|
||||
return gen_varchar_data(length=length, nb=1, text_mode=enable_analyzer)[0] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
if is_primary is True and random_pk is False:
|
||||
return [str(i) for i in range(start, start+nb)]
|
||||
else:
|
||||
return gen_varchar_data(length=length, nb=nb, text_mode=enable_analyzer)
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else gen_varchar_data(length=length, nb=1, text_mode=enable_analyzer)[0] for i in range(nb)]
|
||||
elif data_type == DataType.JSON:
|
||||
if nb is None:
|
||||
return {"name": fake.name(), "address": fake.address(), "count": random.randint(0, 100)}
|
||||
data = [{"name": str(i), "address": i, "count": random.randint(0, 100)} for i in range(nb)]
|
||||
return data
|
||||
if data_type == DataType.FLOAT_VECTOR:
|
||||
dim = field.params['dim']
|
||||
return {"name": fake.name(), "address": fake.address(), "count": random.randint(0, 100)} if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [{"name": str(i), "address": i, "count": random.randint(0, 100)} for i in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else {"name": str(i), "address": i, "count": random.randint(0, 100)} for i in range(nb)]
|
||||
elif data_type in ct.all_vector_types:
|
||||
if isinstance(field, dict):
|
||||
dim = ct.default_dim if data_type == DataType.SPARSE_FLOAT_VECTOR else field.get('params')['dim']
|
||||
else:
|
||||
dim = ct.default_dim if data_type == DataType.SPARSE_FLOAT_VECTOR else field.params['dim']
|
||||
if nb is None:
|
||||
return [random.random() for i in range(dim)]
|
||||
return [[random.random() for i in range(dim)] for _ in range(nb)]
|
||||
if data_type == DataType.BFLOAT16_VECTOR:
|
||||
dim = field.params['dim']
|
||||
if nb is None:
|
||||
return RNG.uniform(size=dim).astype(bfloat16)
|
||||
return [RNG.uniform(size=dim).astype(bfloat16) for _ in range(int(nb))]
|
||||
# if nb is None:
|
||||
# raw_vector = [random.random() for _ in range(dim)]
|
||||
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
|
||||
# return bytes(bf16_vector)
|
||||
# bf16_vectors = []
|
||||
# for i in range(nb):
|
||||
# raw_vector = [random.random() for _ in range(dim)]
|
||||
# bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
|
||||
# bf16_vectors.append(bytes(bf16_vector))
|
||||
# return bf16_vectors
|
||||
if data_type == DataType.FLOAT16_VECTOR:
|
||||
dim = field.params['dim']
|
||||
if nb is None:
|
||||
return np.array([random.random() for _ in range(int(dim))], dtype=np.float16)
|
||||
return [np.array([random.random() for _ in range(int(dim))], dtype=np.float16) for _ in range(int(nb))]
|
||||
if data_type == DataType.INT8_VECTOR:
|
||||
dim = field.params['dim']
|
||||
if nb is None:
|
||||
raw_vector = [random.randint(-128, 127) for _ in range(dim)]
|
||||
int8_vector = np.array(raw_vector, dtype=np.int8)
|
||||
return int8_vector
|
||||
raw_vectors = [[random.randint(-128, 127) for _ in range(dim)] for _ in range(nb)]
|
||||
int8_vectors = [np.array(raw_vector, dtype=np.int8) for raw_vector in raw_vectors]
|
||||
return int8_vectors
|
||||
|
||||
if data_type == DataType.BINARY_VECTOR:
|
||||
dim = field.params['dim']
|
||||
if nb is None:
|
||||
raw_vector = [random.randint(0, 1) for _ in range(dim)]
|
||||
binary_byte = bytes(np.packbits(raw_vector, axis=-1).tolist())
|
||||
return binary_byte
|
||||
return [bytes(np.packbits([random.randint(0, 1) for _ in range(dim)], axis=-1).tolist()) for _ in range(nb)]
|
||||
if data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||
if nb is None:
|
||||
return gen_sparse_vectors(nb=1)[0]
|
||||
return gen_sparse_vectors(nb=nb)
|
||||
if data_type == DataType.ARRAY:
|
||||
max_capacity = field.params['max_capacity']
|
||||
return gen_vectors(1, dim, vector_data_type=data_type)[0]
|
||||
if nullable is False:
|
||||
return gen_vectors(nb, dim, vector_data_type=data_type)
|
||||
else:
|
||||
raise MilvusException(message=f"gen data failed, vector field does not support nullable")
|
||||
elif data_type == DataType.ARRAY:
|
||||
if isinstance(field, dict):
|
||||
max_capacity = field.get('params')['max_capacity']
|
||||
else:
|
||||
max_capacity = field.params['max_capacity']
|
||||
element_type = field.element_type
|
||||
if element_type == DataType.INT8:
|
||||
if nb is None:
|
||||
return [random.randint(-128, 127) for _ in range(max_capacity)]
|
||||
return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
return [random.randint(-128, 127) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-128, 127) for i in range(nb)]
|
||||
if element_type == DataType.INT16:
|
||||
if nb is None:
|
||||
return [random.randint(-32768, 32767) for _ in range(max_capacity)]
|
||||
return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
return [random.randint(-32768, 32767) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-32768, 32767) for i in range(nb)]
|
||||
if element_type == DataType.INT32:
|
||||
if nb is None:
|
||||
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
|
||||
return [[random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [[random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-2147483648, 2147483647) for i in range(nb)]
|
||||
if element_type == DataType.INT64:
|
||||
if nb is None:
|
||||
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)]
|
||||
return [[random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
|
||||
return [random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [[random.randint(-9223372036854775808, 9223372036854775807) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.randint(-9223372036854775808, 9223372036854775807) for i in range(nb)]
|
||||
if element_type == DataType.BOOL:
|
||||
if nb is None:
|
||||
return [random.choice([True, False]) for _ in range(max_capacity)]
|
||||
return [[random.choice([True, False]) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
|
||||
return [random.choice([True, False]) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [[random.choice([True, False]) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else random.choice([True, False]) for i in range(nb)]
|
||||
if element_type == DataType.FLOAT:
|
||||
if nb is None:
|
||||
return [np.float32(random.random()) for _ in range(max_capacity)]
|
||||
return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
return [np.float32(random.random()) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else np.float32(random.random()) for i in range(nb)]
|
||||
if element_type == DataType.DOUBLE:
|
||||
if nb is None:
|
||||
return [np.float64(random.random()) for _ in range(max_capacity)]
|
||||
return [[np.float64(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
|
||||
return [np.float64(random.random()) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [[np.float64(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else np.float64(random.random()) for i in range(nb)]
|
||||
if element_type == DataType.VARCHAR:
|
||||
max_length = field.params['max_length']
|
||||
if isinstance(field, dict):
|
||||
max_length = field.get('params')['max_length']
|
||||
else:
|
||||
max_length = field.params['max_length']
|
||||
max_length = min(20, max_length - 1)
|
||||
length = random.randint(0, max_length)
|
||||
if nb is None:
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
|
||||
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] if random.random() < 0.8 or nullable is False else None
|
||||
if nullable is False:
|
||||
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
else:
|
||||
# gen 20% none data for nullable field
|
||||
return [None if i % 2 == 0 and random.random() < 0.4 else "".join([chr(random.randint(97, 122)) for _ in range(length)]) for i in range(nb)]
|
||||
else:
|
||||
raise MilvusException(message=f"gen data failed, data type {data_type} not implemented")
|
||||
return None
|
||||
|
||||
|
||||
def gen_data_by_collection_schema(schema, nb, r=0):
|
||||
"""
|
||||
gen random data by collection schema, regardless of primary key or auto_id
|
||||
vector type only support for DataType.FLOAT_VECTOR
|
||||
"""
|
||||
data = []
|
||||
start_uid = r * nb
|
||||
fields = schema.fields
|
||||
for field in fields:
|
||||
data.append(gen_data_by_collection_field(field, nb, start_uid))
|
||||
return data
|
||||
|
||||
|
||||
def gen_varchar_values(nb: int, length: int = 0):
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
|
||||
|
||||
|
||||
def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}):
|
||||
def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}, random_pk=False):
|
||||
"""
|
||||
generate default value according to the collection fields,
|
||||
which can replace the value of the specified field
|
||||
@ -2132,11 +2243,11 @@ def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict =
|
||||
if default_value is not None:
|
||||
data.append(default_value)
|
||||
elif field.auto_id is False:
|
||||
data.append(gen_data_by_collection_field(field, nb, start_id))
|
||||
data.append(gen_data_by_collection_field(field, nb, start_id, random_pk=random_pk))
|
||||
return data
|
||||
|
||||
|
||||
def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}) -> dict:
|
||||
def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}, random_pk=False) -> dict:
|
||||
"""
|
||||
generate default value according to the collection fields,
|
||||
which can replace the value of the specified field
|
||||
@ -2150,7 +2261,7 @@ def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: d
|
||||
if default_value is not None:
|
||||
data[field.name] = default_value
|
||||
elif field.auto_id is False:
|
||||
data[field.name] = gen_data_by_collection_field(field, nb, start_id * nb)
|
||||
data[field.name] = gen_data_by_collection_field(field, nb, start_id * nb, random_pk=random_pk)
|
||||
return data
|
||||
|
||||
|
||||
@ -3406,11 +3517,30 @@ def install_milvus_operator_specific_config(namespace, milvus_mode, release_name
|
||||
|
||||
|
||||
def get_wildcard_output_field_names(collection_w, output_fields):
|
||||
all_fields = [field.name for field in collection_w.schema.fields]
|
||||
"""
|
||||
Processes output fields with wildcard ('*') expansion for collection queries.
|
||||
|
||||
Args:
|
||||
collection_w (Union[dict, CollectionWrapper]): Collection information,
|
||||
either as a dict (v2 client) or ORM wrapper.
|
||||
output_fields (List[str]): List of requested output fields, may contain '*' wildcard.
|
||||
|
||||
Returns:
|
||||
List[str]: Expanded list of output fields with wildcard replaced by all available field names.
|
||||
"""
|
||||
if not isinstance(collection_w, dict):
|
||||
# in orm, it accepts a collection wrapper
|
||||
field_names = [field.name for field in collection_w.schema.fields]
|
||||
else:
|
||||
# in client v2, it accepts a dict of collection info
|
||||
fields = collection_w.get('fields', None)
|
||||
field_names = [field.get('name') for field in fields]
|
||||
|
||||
output_fields = output_fields.copy()
|
||||
if "*" in output_fields:
|
||||
output_fields.remove("*")
|
||||
output_fields.extend(all_fields)
|
||||
output_fields.extend(field_names)
|
||||
|
||||
return output_fields
|
||||
|
||||
|
||||
@ -3748,3 +3878,34 @@ def gen_collection_name_by_testcase_name(module_index=1):
|
||||
if calling from the testcase, module_index=1
|
||||
"""
|
||||
return inspect.stack()[module_index][3] + gen_unique_str("_")
|
||||
|
||||
|
||||
def parse_fmod(x: int, y: int) -> int:
|
||||
"""
|
||||
Computes the floating-point remainder of x/y with the same sign as x.
|
||||
|
||||
This function mimics the behavior of the C fmod() function for integer inputs,
|
||||
where the result has the same sign as the dividend (x).
|
||||
|
||||
Args:
|
||||
x (int): The dividend
|
||||
y (int): The divisor
|
||||
|
||||
Returns:
|
||||
int: The remainder of x/y with the same sign as x
|
||||
|
||||
Raises:
|
||||
ValueError: If y is 0 (division by zero)
|
||||
|
||||
Examples:
|
||||
parse_fmod(5, 3) -> 2
|
||||
parse_fmod(-5, 3) -> -2
|
||||
parse_fmod(5, -3) -> 2
|
||||
parse_fmod(-5, -3) -> -2
|
||||
"""
|
||||
if y == 0:
|
||||
raise ValueError(f'[parse_fmod] Math domain error, `y` can not bt `0`')
|
||||
|
||||
v = abs(x) % abs(y)
|
||||
|
||||
return v if x >= 0 else -v
|
||||
@ -288,6 +288,7 @@ class TestMilvusClientCollectionValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": dim,
|
||||
"auto_id": auto_id,
|
||||
"consistency_level": 0})
|
||||
index = self.list_indexes(client, collection_name)[0]
|
||||
assert index == ['vector']
|
||||
|
||||
@ -1782,7 +1782,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
"consistency_level": 2})
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||
@ -1892,7 +1892,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
"consistency_level": 2})
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||
@ -1991,7 +1991,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
"consistency_level": 2})
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||
@ -2086,7 +2086,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
"consistency_level": 2})
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||
@ -2181,7 +2181,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
"consistency_level": 2})
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||
@ -2276,7 +2276,7 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
"consistency_level": 2})
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||
@ -2393,42 +2393,74 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base):
|
||||
expected: search/query successfully
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
old_name = cf.gen_collection_name_by_testcase_name()
|
||||
# 1. create collection
|
||||
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
|
||||
self.create_collection(client, old_name, default_dim, consistency_level="Strong")
|
||||
collections = self.list_collections(client)[0]
|
||||
assert collection_name in collections
|
||||
self.describe_collection(client, collection_name,
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
old_name = collection_name
|
||||
new_name = collection_name + "new"
|
||||
assert old_name in collections
|
||||
c_info = self.describe_collection(client, old_name,
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": old_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})[0]
|
||||
|
||||
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=c_info)
|
||||
self.insert(client, old_name, rows)
|
||||
self.flush(client, old_name)
|
||||
self.wait_for_index_ready(client, collection_name=old_name, index_name='vector')
|
||||
|
||||
vectors_to_search = cf.gen_vectors(ct.default_nq, default_dim)
|
||||
insert_ids = [item.get('id') for item in rows]
|
||||
old_search_res = self.search(client, old_name, vectors_to_search,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"ids": insert_ids,
|
||||
"pk_name": "id",
|
||||
"limit": default_limit})[0]
|
||||
old_query_res = self.query(client, old_name, filter=default_search_exp,
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={exp_res: rows,
|
||||
"with_vec": True})[0]
|
||||
|
||||
new_name = old_name + "new"
|
||||
self.rename_collection(client, old_name, new_name)
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
||||
self.describe_collection(client, new_name,
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": new_name,
|
||||
"dim": default_dim})
|
||||
|
||||
# search again after rename collection
|
||||
new_search_res = self.search(client, new_name, vectors_to_search,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"ids": insert_ids,
|
||||
"pk_name": "id",
|
||||
"limit": default_limit})[0]
|
||||
new_query_res = self.query(client, new_name, filter=default_search_exp,
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={exp_res: rows,
|
||||
"with_vec": True})[0]
|
||||
assert old_search_res[0].ids == new_search_res[0].ids
|
||||
assert old_query_res == new_query_res
|
||||
|
||||
rows = cf.gen_row_data_by_schema(nb=200, schema=c_info, start=default_nb)
|
||||
error = {ct.err_code: 0, ct.err_msg: f"collection not found"}
|
||||
self.insert(client, old_name, rows,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=error)
|
||||
self.insert(client, new_name, rows)
|
||||
self.flush(client, new_name)
|
||||
# assert self.num_entities(client, collection_name)[0] == default_nb
|
||||
# 3. search
|
||||
vectors_to_search = rng.random((1, default_dim))
|
||||
insert_ids = [i for i in range(default_nb)]
|
||||
new_ids = [item.get('id') for item in rows]
|
||||
insert_ids.extend(new_ids)
|
||||
self.search(client, new_name, vectors_to_search,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": len(vectors_to_search),
|
||||
"nq": ct.default_nq,
|
||||
"ids": insert_ids,
|
||||
"pk_name": default_primary_key_field_name,
|
||||
"pk_name": "id",
|
||||
"limit": default_limit})
|
||||
# 4. query
|
||||
self.query(client, new_name, filter=default_search_exp,
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={exp_res: rows,
|
||||
"with_vec": True,
|
||||
"pk_name": default_primary_key_field_name})
|
||||
|
||||
self.release_collection(client, new_name)
|
||||
self.drop_collection(client, new_name)
|
||||
|
||||
|
||||
@ -618,6 +618,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
|
||||
self.describe_collection(client, collection_name,
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"consistency_level": 2,
|
||||
"dim": default_dim})
|
||||
# 2. insert
|
||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
|
||||
@ -688,7 +689,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_describe_collection_property,
|
||||
check_items={"collection_name": collection_name,
|
||||
"dim": default_dim,
|
||||
"consistency_level": 0})
|
||||
"consistency_level": 2})
|
||||
# 2. insert
|
||||
rows = [{default_primary_key_field_name: i,
|
||||
default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
|
||||
|
||||
@ -87,169 +87,10 @@ class TestSearchDiskann(TestcaseBase):
|
||||
******************************************************************
|
||||
"""
|
||||
|
||||
@pytest.fixture(scope="function", params=[32, 128])
|
||||
def dim(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[False, True])
|
||||
def auto_id(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[False, True])
|
||||
def _async(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[True, False])
|
||||
def enable_dynamic_field(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_diskann_index(self, _async):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index , then load
|
||||
3.search
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 100
|
||||
auto_id = False
|
||||
enable_dynamic_field = True
|
||||
nb = 2000
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id,
|
||||
nb=nb, dim=dim, is_index=False,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN",
|
||||
"metric_type": "L2", "params": {}}
|
||||
collection_w.create_index(
|
||||
ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
default_search_params = {
|
||||
"metric_type": "L2", "params": {"search_list": 30}}
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name,
|
||||
default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"pk_name": ct.default_int64_field_name,
|
||||
"_async": _async}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("search_list", [20, 200])
|
||||
def test_search_with_limit_20(self, _async, search_list):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index , then load
|
||||
3.search
|
||||
expected: search successfully
|
||||
"""
|
||||
limit = 20
|
||||
# 1. initialize with data
|
||||
enable_dynamic_field = True
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, is_index=False,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
search_params = {"metric_type": "L2", "params": {"search_list": search_list}}
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
search_params, limit, default_search_exp,
|
||||
output_fields=output_fields, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": limit,
|
||||
"_async": _async,
|
||||
"pk_name": ct.default_int64_field_name})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_invalid_params_with_diskann_B(self):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index
|
||||
3.search with invalid params, [k, 200] when k <= 20
|
||||
expected: search report an error
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 100
|
||||
limit = 20
|
||||
auto_id = True
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
default_search_params = {"metric_type": "L2", "params": {"search_list": limit-1}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 999,
|
||||
"err_msg": f"should be larger than k({limit})"})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_diskann_with_string_pk(self):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is string field
|
||||
2.create diskann index
|
||||
3.search with invalid metric type
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 128
|
||||
enable_dynamic_field = True
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=False, dim=dim, is_index=False,
|
||||
primary_field=ct.default_string_field_name,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN",
|
||||
"metric_type": "L2", "params": {}}
|
||||
collection_w.create_index(
|
||||
ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
search_list = 20
|
||||
default_search_params = {"metric_type": "L2",
|
||||
"params": {"search_list": search_list}}
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name,
|
||||
default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"pk_name": ct.default_int64_field_name}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_delete_data(self, _async):
|
||||
"""
|
||||
@ -300,57 +141,6 @@ class TestSearchDiskann(TestcaseBase):
|
||||
"pk_name": ct.default_int64_field_name}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_diskann_and_more_index(self, _async):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data
|
||||
2.create more index ,then load
|
||||
3.delete half data, search
|
||||
expected: assert index and deleted id not in search result
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 64
|
||||
auto_id = False
|
||||
enable_dynamic_field = True
|
||||
collection_w, _, _, ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=False,
|
||||
enable_dynamic_field=enable_dynamic_field, language="French")[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN",
|
||||
"metric_type": "COSINE", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index, index_name=index_name1)
|
||||
if not enable_dynamic_field:
|
||||
index_params_one = {}
|
||||
collection_w.create_index("float", index_params_one, index_name="a")
|
||||
index_param_two = {}
|
||||
collection_w.create_index("varchar", index_param_two, index_name="b")
|
||||
|
||||
collection_w.load()
|
||||
tmp_expr = f'{ct.default_int64_field_name} in {[0]}'
|
||||
|
||||
expr = f'{ct.default_int64_field_name} in {ids[:half_nb]}'
|
||||
|
||||
# delete half of data
|
||||
del_res = collection_w.delete(expr)[0]
|
||||
assert del_res.delete_count == half_nb
|
||||
|
||||
collection_w.delete(tmp_expr)
|
||||
default_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async,
|
||||
"pk_name": ct.default_int64_field_name})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_scalar_field(self, _async):
|
||||
"""
|
||||
@ -396,87 +186,3 @@ class TestSearchDiskann(TestcaseBase):
|
||||
"limit": limit,
|
||||
"_async": _async,
|
||||
"pk_name": ct.default_int64_field_name})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("limit", [10, 100, 1000])
|
||||
def test_search_diskann_search_list_equal_to_limit(self, limit, _async):
|
||||
"""
|
||||
target: test search diskann index when search_list equal to limit
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index , then load
|
||||
3.search
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 77
|
||||
auto_id = False
|
||||
enable_dynamic_field = False
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id,
|
||||
dim=dim, is_index=False,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN",
|
||||
"metric_type": "L2", "params": {}}
|
||||
collection_w.create_index(
|
||||
ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
search_params = {"metric_type": "L2", "params": {"search_list": limit}}
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name,
|
||||
default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
search_params, limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": limit,
|
||||
"_async": _async,
|
||||
"pk_name": ct.default_int64_field_name}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.skip(reason="issue #23672")
|
||||
def test_search_diskann_search_list_up_to_min(self, _async):
|
||||
"""
|
||||
target: test search diskann index when search_list up to min
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index , then load
|
||||
3.search
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 100
|
||||
auto_id = True
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id,
|
||||
dim=dim, is_index=False)[0:4]
|
||||
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN",
|
||||
"metric_type": "L2", "params": {}}
|
||||
collection_w.create_index(
|
||||
ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
search_params = {"metric_type": "L2",
|
||||
"params": {"k": 200, "search_list": 201}}
|
||||
search_vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name,
|
||||
default_float_field_name, default_string_field_name]
|
||||
collection_w.search(search_vectors[:default_nq], default_search_field,
|
||||
search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async,
|
||||
"pk_name": ct.default_int64_field_name})
|
||||
|
||||
@ -141,43 +141,6 @@ class TestCollectionSearch(TestcaseBase):
|
||||
# The following are valid base cases
|
||||
******************************************************************
|
||||
"""
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("M", [4, 64])
|
||||
@pytest.mark.parametrize("efConstruction", [8, 512])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_search_HNSW_index_with_min_ef(self, M, efConstruction, limit, _async):
|
||||
"""
|
||||
target: test search HNSW index with min ef
|
||||
method: connect milvus, create collection , insert, create index, load and search
|
||||
expected: search successfully
|
||||
"""
|
||||
dim = M * 4
|
||||
ef = limit
|
||||
auto_id = True
|
||||
enable_dynamic_field = True
|
||||
self._connect()
|
||||
collection_w, _, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(prefix, True, 5000, partition_num=1,
|
||||
auto_id=auto_id, dim=dim, is_index=False,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:5]
|
||||
HNSW_index_params = {"M": M, "efConstruction": efConstruction}
|
||||
HNSW_index = {"index_type": "HNSW",
|
||||
"params": HNSW_index_params, "metric_type": "L2"}
|
||||
collection_w.create_index("float_vector", HNSW_index)
|
||||
collection_w.load()
|
||||
search_param = {"metric_type": "L2", "params": {"ef": ef}}
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
search_param, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": limit,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_expression(self, null_data_percent):
|
||||
"""
|
||||
@ -871,497 +834,6 @@ class TestCollectionSearch(TestcaseBase):
|
||||
"limit": 1})
|
||||
assert search_res[0].ids == [_id]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_output_fields_empty(self, nq, _async):
|
||||
"""
|
||||
target: test search with output fields
|
||||
method: search with empty output_field
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize with data
|
||||
nb = 1500
|
||||
dim = 32
|
||||
auto_id = True
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb,
|
||||
auto_id=auto_id,
|
||||
dim=dim)[0:4]
|
||||
# 2. search
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
output_fields=[],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async,
|
||||
"output_fields": []})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_output_field(self, _async):
|
||||
"""
|
||||
target: test search with output fields
|
||||
method: search with one output_field
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize with data
|
||||
auto_id = False
|
||||
enable_dynamic_field = False
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
|
||||
auto_id=auto_id,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
# 2. search
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
output_fields=[default_int64_field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async,
|
||||
"output_fields": [default_int64_field_name]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_output_vector_field(self, _async):
|
||||
"""
|
||||
target: test search with output fields
|
||||
method: search with one output_field
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize with data
|
||||
auto_id = True
|
||||
enable_dynamic_field = False
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
# 2. search
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
output_fields=[field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq, "ids": insert_ids,
|
||||
"limit": default_limit, "_async": _async,
|
||||
"output_fields": [field_name]})[0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_output_fields(self, _async):
|
||||
"""
|
||||
target: test search with output fields
|
||||
method: search with multiple output_field
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize with data
|
||||
nb = 2000
|
||||
dim = 64
|
||||
auto_id = False
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb,
|
||||
is_all_data_type=True,
|
||||
auto_id=auto_id,
|
||||
dim=dim)[0:4]
|
||||
# 2. search
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name]
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async,
|
||||
"output_fields": output_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_output_array_field(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test search output array field
|
||||
method: create connection, collection, insert and search
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. create a collection
|
||||
auto_id = True
|
||||
schema = cf.gen_array_collection_schema(auto_id=auto_id)
|
||||
collection_w = self.init_collection_wrap(schema=schema)
|
||||
|
||||
# 2. insert data
|
||||
if enable_dynamic_field:
|
||||
data = cf.gen_row_data_by_schema(schema=schema)
|
||||
else:
|
||||
data = cf.gen_array_dataframe_data(auto_id=auto_id)
|
||||
|
||||
collection_w.insert(data)
|
||||
|
||||
# 3. create index and load
|
||||
collection_w.create_index(default_search_field)
|
||||
collection_w.load()
|
||||
|
||||
# 4. search output array field, check
|
||||
output_fields = [ct.default_int64_field_name, ct.default_int32_array_field_name,
|
||||
ct.default_float_array_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field, {}, default_limit,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": default_limit,
|
||||
"output_fields": output_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("index", ct.all_index_types[:8])
|
||||
@pytest.mark.parametrize("metrics", ct.dense_metrics)
|
||||
@pytest.mark.parametrize("limit", [200])
|
||||
def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit):
|
||||
"""
|
||||
target: test search with output vector field after different index
|
||||
method: 1. create a collection and insert data
|
||||
2. create index and load
|
||||
3. search with output field vector
|
||||
4. check the result vectors should be equal to the inserted
|
||||
expected: search success
|
||||
"""
|
||||
collection_w, _vectors = self.init_collection_general(prefix, True, is_index=False)[:2]
|
||||
|
||||
# 2. create index and load
|
||||
params = cf.get_index_params_params(index)
|
||||
default_index = {"index_type": index, "params": params, "metric_type": metrics}
|
||||
collection_w.create_index(field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
# 3. search with output field vector
|
||||
search_params = cf.gen_search_param(index, metrics)
|
||||
for search_param in search_params:
|
||||
if index == "HNSW":
|
||||
limit = search_param["params"]["ef"]
|
||||
if limit > max_limit:
|
||||
limit = default_nb
|
||||
if index == "DISKANN":
|
||||
limit = search_param["params"]["search_list"]
|
||||
collection_w.search(vectors[:1], default_search_field,
|
||||
search_param, limit, default_search_exp,
|
||||
output_fields=[field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": 1,
|
||||
"limit": limit,
|
||||
"original_entities": _vectors[0],
|
||||
"output_fields": [field_name]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("metrics", ct.binary_metrics[:2])
|
||||
@pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"])
|
||||
def test_search_output_field_vector_after_binary_index(self, metrics, index):
|
||||
"""
|
||||
target: test search with output vector field after binary index
|
||||
method: 1. create a collection and insert data
|
||||
2. create index and load
|
||||
3. search with output field vector
|
||||
4. check the result vectors should be equal to the inserted
|
||||
expected: search success
|
||||
"""
|
||||
# 1. create a collection and insert data
|
||||
collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0]
|
||||
data = cf.gen_default_binary_dataframe_data()[0]
|
||||
collection_w.insert(data)
|
||||
|
||||
# 2. create index and load
|
||||
params = {"M": 48, "efConstruction": 500} if index == "HNSW" else {"nlist": 128}
|
||||
default_index = {"index_type": index, "metric_type": metrics, "params": params}
|
||||
collection_w.create_index(binary_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
# 3. search with output field vector
|
||||
search_params = cf.gen_search_param(index, metrics)
|
||||
binary_vectors = cf.gen_binary_vectors(1, default_dim)[1]
|
||||
for search_param in search_params:
|
||||
res = collection_w.search(binary_vectors, binary_field_name,
|
||||
search_param, 2, default_search_exp,
|
||||
output_fields=[binary_field_name])[0]
|
||||
|
||||
# 4. check the result vectors should be equal to the inserted
|
||||
assert res[0][0].entity.binary_vector == data[binary_field_name][res[0][0].id]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("metrics", ct.structure_metrics)
|
||||
@pytest.mark.parametrize("index", ["BIN_FLAT"])
|
||||
def test_search_output_field_vector_after_structure_metrics(self, metrics, index):
|
||||
"""
|
||||
target: test search with output vector field after binary index
|
||||
method: 1. create a collection and insert data
|
||||
2. create index and load
|
||||
3. search with output field vector
|
||||
4. check the result vectors should be equal to the inserted
|
||||
expected: search success
|
||||
"""
|
||||
dim = 8
|
||||
# 1. create a collection and insert data
|
||||
collection_w = self.init_collection_general(prefix, dim=dim, is_binary=True, is_index=False)[0]
|
||||
data = cf.gen_default_binary_dataframe_data(dim=dim)[0]
|
||||
collection_w.insert(data)
|
||||
|
||||
# 2. create index and load
|
||||
default_index = {"index_type": index, "metric_type": metrics, "params": {"nlist": 128}}
|
||||
collection_w.create_index(binary_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
# 3. search with output field vector
|
||||
search_params = {"metric_type": metrics, "params": {"nprobe": 10}}
|
||||
binary_vectors = cf.gen_binary_vectors(ct.default_nq, dim)[1]
|
||||
res = collection_w.search(binary_vectors, binary_field_name,
|
||||
search_params, 2, default_search_exp,
|
||||
output_fields=[binary_field_name])[0]
|
||||
|
||||
# 4. check the result vectors should be equal to the inserted
|
||||
assert res[0][0].entity.binary_vector == data[binary_field_name][res[0][0].id]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("dim", [32, 77, 768])
|
||||
def test_search_output_field_vector_with_different_dim(self, dim):
|
||||
"""
|
||||
target: test search with output vector field after binary index
|
||||
method: 1. create a collection and insert data
|
||||
2. create index and load
|
||||
3. search with output field vector
|
||||
4. check the result vectors should be equal to the inserted
|
||||
expected: search success
|
||||
"""
|
||||
# 1. create a collection and insert data
|
||||
collection_w, _vectors = self.init_collection_general(prefix, True, dim=dim)[:2]
|
||||
|
||||
# 2. search with output field vector
|
||||
vectors = cf.gen_vectors(default_nq, dim=dim)
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit, default_search_exp,
|
||||
output_fields=[field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": default_limit,
|
||||
"original_entities": _vectors[0],
|
||||
"output_fields": [field_name]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_output_vector_field_and_scalar_field(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test search with output vector field and scalar field
|
||||
method: 1. initialize a collection
|
||||
2. search with output field vector
|
||||
3. check no field missing
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize a collection
|
||||
collection_w, _vectors = self.init_collection_general(prefix, True,
|
||||
enable_dynamic_field=enable_dynamic_field)[:2]
|
||||
|
||||
# search with output field vector
|
||||
output_fields = [default_float_field_name, default_string_field_name,
|
||||
default_json_field_name, default_search_field]
|
||||
original_entities = []
|
||||
if enable_dynamic_field:
|
||||
entities = []
|
||||
for vector in _vectors[0]:
|
||||
entities.append({default_int64_field_name: vector[default_int64_field_name],
|
||||
default_float_field_name: vector[default_float_field_name],
|
||||
default_string_field_name: vector[default_string_field_name],
|
||||
default_json_field_name: vector[default_json_field_name],
|
||||
default_search_field: vector[default_search_field]})
|
||||
original_entities.append(pd.DataFrame(entities))
|
||||
else:
|
||||
original_entities = _vectors
|
||||
collection_w.search(vectors[:1], default_search_field,
|
||||
default_search_params, default_limit, default_search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": 1,
|
||||
"limit": default_limit,
|
||||
"pk_name": default_int64_field_name,
|
||||
"original_entities": original_entities[0],
|
||||
"output_fields": output_fields})
|
||||
if enable_dynamic_field:
|
||||
collection_w.search(vectors[:1], default_search_field,
|
||||
default_search_params, default_limit, default_search_exp,
|
||||
output_fields=["$meta", default_search_field],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": 1,
|
||||
"limit": default_limit,
|
||||
"pk_name": default_int64_field_name,
|
||||
"original_entities": original_entities[0],
|
||||
"output_fields": output_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_output_vector_field_and_pk_field(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test search with output vector field and pk field
|
||||
method: 1. initialize a collection
|
||||
2. search with output field vector
|
||||
3. check no field missing
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize a collection
|
||||
collection_w = self.init_collection_general(prefix, True,
|
||||
enable_dynamic_field=enable_dynamic_field)[0]
|
||||
|
||||
# 2. search with output field vector
|
||||
output_fields = [default_int64_field_name, default_string_field_name, default_search_field]
|
||||
collection_w.search(vectors[:1], default_search_field,
|
||||
default_search_params, default_limit, default_search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": 1,
|
||||
"limit": default_limit,
|
||||
"output_fields": output_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_output_field_vector_with_partition(self):
|
||||
"""
|
||||
target: test search with output vector field
|
||||
method: 1. create a collection and insert data
|
||||
2. create index and load
|
||||
3. search with output field vector
|
||||
4. check the result vectors should be equal to the inserted
|
||||
expected: search success
|
||||
"""
|
||||
# 1. create a collection and insert data
|
||||
collection_w = self.init_collection_general(prefix, is_index=False)[0]
|
||||
partition_w = self.init_partition_wrap(collection_w)
|
||||
data = cf.gen_default_dataframe_data()
|
||||
partition_w.insert(data)
|
||||
|
||||
# 2. create index and load
|
||||
collection_w.create_index(field_name, default_index_params)
|
||||
collection_w.load()
|
||||
|
||||
# 3. search with output field vector
|
||||
partition_w.search(vectors[:1], default_search_field,
|
||||
default_search_params, default_limit, default_search_exp,
|
||||
output_fields=[field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": 1,
|
||||
"limit": default_limit,
|
||||
"original_entities": data,
|
||||
"output_fields": [field_name]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("wildcard_output_fields", [["*"], ["*", default_int64_field_name],
|
||||
["*", default_search_field]])
|
||||
def test_search_with_output_field_wildcard(self, wildcard_output_fields, _async):
|
||||
"""
|
||||
target: test search with output fields using wildcard
|
||||
method: search with one output_field (wildcard)
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize with data
|
||||
auto_id = True
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
|
||||
auto_id=auto_id)[0:4]
|
||||
# 2. search
|
||||
output_fields = cf.get_wildcard_output_field_names(collection_w, wildcard_output_fields)
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
output_fields=wildcard_output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"pk_name": ct.default_int64_field_name,
|
||||
"limit": default_limit,
|
||||
"_async": _async,
|
||||
"output_fields": output_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_invalid_output_fields(self):
|
||||
"""
|
||||
target: test search with output fields using wildcard
|
||||
method: search with one output_field (wildcard)
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize with data
|
||||
invalid_output_fields = [["%"], [""], ["-"]]
|
||||
auto_id = False
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id)[0:4]
|
||||
# 2. search
|
||||
for field in invalid_output_fields:
|
||||
error1 = {ct.err_code: 999, ct.err_msg: "field %s not exist" % field[0]}
|
||||
error2 = {ct.err_code: 999, ct.err_msg: "`output_fields` value %s is illegal" % field}
|
||||
error = error2 if field == [""] else error1
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=field,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_multi_collections(self, nq, _async):
|
||||
"""
|
||||
target: test search multi collections of L2
|
||||
method: add vectors into 10 collections, and search
|
||||
expected: search status ok, the length of result
|
||||
"""
|
||||
nb = 1000
|
||||
dim = 64
|
||||
auto_id = True
|
||||
self._connect()
|
||||
collection_num = 10
|
||||
for i in range(collection_num):
|
||||
# 1. initialize with data
|
||||
log.info("test_search_multi_collections: search round %d" % (i + 1))
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb,
|
||||
auto_id=auto_id,
|
||||
dim=dim)[0:4]
|
||||
# 2. search
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
log.info("test_search_multi_collections: searching %s entities (nq = %s) from collection %s" %
|
||||
(default_limit, nq, collection_w.name))
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"pk_name": ct.default_int64_field_name,
|
||||
"limit": default_limit,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_concurrent_multi_threads(self, nq, _async, null_data_percent):
|
||||
"""
|
||||
target: test concurrent search with multi-processes
|
||||
method: search with 10 processes, each process uses dependent connection
|
||||
expected: status ok and the returned vectors should be query_records
|
||||
"""
|
||||
# 1. initialize with data
|
||||
nb = 3000
|
||||
dim = 64
|
||||
auto_id = False
|
||||
enable_dynamic_field = False
|
||||
threads_num = 10
|
||||
threads = []
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, nb, auto_id=auto_id, dim=dim,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
nullable_fields={ct.default_string_field_name: null_data_percent})[0:4]
|
||||
|
||||
def search(collection_w):
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(nq)]
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async})
|
||||
|
||||
# 2. search with multi-processes
|
||||
log.info("test_search_concurrent_multi_threads: searching with %s processes" % threads_num)
|
||||
for i in range(threads_num):
|
||||
t = threading.Thread(target=search, args=(collection_w,))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
time.sleep(0.2)
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.skip(reason="issue 37113")
|
||||
def test_search_concurrent_two_collections_nullable(self, nq, _async):
|
||||
@ -1565,192 +1037,6 @@ class TestCollectionSearch(TestcaseBase):
|
||||
"limit": default_limit,
|
||||
})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_consistency_bounded(self, nq, _async):
|
||||
"""
|
||||
target: test search with different consistency level
|
||||
method: 1. create a collection
|
||||
2. insert data
|
||||
3. search with consistency_level is "bounded"
|
||||
expected: searched successfully
|
||||
"""
|
||||
limit = 1000
|
||||
nb_old = 500
|
||||
dim = 64
|
||||
auto_id = True
|
||||
enable_dynamic_field = False
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, nb_old, auto_id=auto_id,
|
||||
dim=dim, enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
# 2. search for original data after load
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old,
|
||||
"_async": _async,
|
||||
})
|
||||
|
||||
kwargs = {}
|
||||
consistency_level = kwargs.get(
|
||||
"consistency_level", CONSISTENCY_BOUNDED)
|
||||
kwargs.update({"consistency_level": consistency_level})
|
||||
|
||||
nb_new = 400
|
||||
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
|
||||
auto_id=auto_id, dim=dim,
|
||||
insert_offset=nb_old,
|
||||
enable_dynamic_field=enable_dynamic_field)
|
||||
insert_ids.extend(insert_ids_new)
|
||||
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_consistency_strong(self, nq, _async):
|
||||
"""
|
||||
target: test search with different consistency level
|
||||
method: 1. create a collection
|
||||
2. insert data
|
||||
3. search with consistency_level is "Strong"
|
||||
expected: searched successfully
|
||||
"""
|
||||
limit = 1000
|
||||
nb_old = 500
|
||||
dim = 64
|
||||
auto_id = False
|
||||
enable_dynamic_field = False
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
|
||||
auto_id=auto_id, dim=dim,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
# 2. search for original data after load
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old,
|
||||
"_async": _async})
|
||||
|
||||
nb_new = 400
|
||||
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
|
||||
auto_id=auto_id, dim=dim,
|
||||
insert_offset=nb_old,
|
||||
enable_dynamic_field=enable_dynamic_field)
|
||||
insert_ids.extend(insert_ids_new)
|
||||
kwargs = {}
|
||||
consistency_level = kwargs.get("consistency_level", CONSISTENCY_STRONG)
|
||||
kwargs.update({"consistency_level": consistency_level})
|
||||
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
**kwargs,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old + nb_new,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_consistency_eventually(self, nq, _async):
|
||||
"""
|
||||
target: test search with different consistency level
|
||||
method: 1. create a collection
|
||||
2. insert data
|
||||
3. search with consistency_level is "eventually"
|
||||
expected: searched successfully
|
||||
"""
|
||||
limit = 1000
|
||||
nb_old = 500
|
||||
dim = 64
|
||||
auto_id = True
|
||||
enable_dynamic_field = True
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
|
||||
auto_id=auto_id, dim=dim,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
# 2. search for original data after load
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old,
|
||||
"_async": _async})
|
||||
nb_new = 400
|
||||
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
|
||||
auto_id=auto_id, dim=dim,
|
||||
insert_offset=nb_old,
|
||||
enable_dynamic_field=enable_dynamic_field)
|
||||
insert_ids.extend(insert_ids_new)
|
||||
kwargs = {}
|
||||
consistency_level = kwargs.get(
|
||||
"consistency_level", CONSISTENCY_EVENTUALLY)
|
||||
kwargs.update({"consistency_level": consistency_level})
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
**kwargs)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_consistency_session(self, nq, _async):
|
||||
"""
|
||||
target: test search with different consistency level
|
||||
method: 1. create a collection
|
||||
2. insert data
|
||||
3. search with consistency_level is "session"
|
||||
expected: searched successfully
|
||||
"""
|
||||
limit = 1000
|
||||
nb_old = 500
|
||||
dim = 64
|
||||
auto_id = False
|
||||
enable_dynamic_field = True
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
|
||||
auto_id=auto_id, dim=dim,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:4]
|
||||
# 2. search for original data after load
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old,
|
||||
"_async": _async})
|
||||
|
||||
kwargs = {}
|
||||
consistency_level = kwargs.get(
|
||||
"consistency_level", CONSISTENCY_SESSION)
|
||||
kwargs.update({"consistency_level": consistency_level})
|
||||
|
||||
nb_new = 400
|
||||
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
|
||||
auto_id=auto_id, dim=dim,
|
||||
insert_offset=nb_old,
|
||||
enable_dynamic_field=enable_dynamic_field)
|
||||
insert_ids.extend(insert_ids_new)
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
**kwargs,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old + nb_new,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_ignore_growing(self, nq, _async):
|
||||
"""
|
||||
@ -2161,3 +1447,4 @@ class TestCollectionSearch(TestcaseBase):
|
||||
"invalid parameter"})
|
||||
|
||||
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from common.constants import *
|
||||
from utils.util_pymilvus import *
|
||||
@ -47,13 +49,13 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
|
||||
self.collection_name = "TestMilvusClientSearchV2" + cf.gen_unique_str("_")
|
||||
self.partition_names = ["partition_1", "partition_2"]
|
||||
self.pk_field_name = ct.default_primary_field_name
|
||||
self.float_vector_field_name = "float_vector"
|
||||
self.float_vector_field_name = ct.default_float_vec_field_name
|
||||
self.bfloat16_vector_field_name = "bfloat16_vector"
|
||||
self.sparse_vector_field_name = "sparse_vector"
|
||||
self.binary_vector_field_name = "binary_vector"
|
||||
self.float_vector_dim = 128
|
||||
self.bf16_vector_dim = 200
|
||||
self.binary_vector_dim = 256
|
||||
self.float_vector_dim = 36
|
||||
self.bf16_vector_dim = 35
|
||||
self.binary_vector_dim = 32
|
||||
self.float_vector_metric = "COSINE"
|
||||
self.bf16_vector_metric = "L2"
|
||||
self.sparse_vector_metric = "IP"
|
||||
@ -346,7 +348,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("limit, nq", zip([1, 1000, ct.max_limit], [ct.max_nq, 10, 1]))
|
||||
# @pytest.mark.parametrize("limit, nq", zip([1, 1000, ct.max_limit], [ct.max_nq, 10, 1]))
|
||||
@pytest.mark.parametrize("limit, nq", zip([ct.max_limit], [1]))
|
||||
def test_search_with_different_nq_limits(self, limit, nq):
|
||||
"""
|
||||
target: test search with different nq and limit values
|
||||
@ -360,7 +363,7 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
|
||||
|
||||
# Generate vectors to search
|
||||
vectors_to_search = cf.gen_vectors(nq, self.float_vector_dim)
|
||||
search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}}
|
||||
search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 128}}
|
||||
|
||||
# search with limit
|
||||
search_res, _ = self.search(
|
||||
@ -453,6 +456,73 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
|
||||
"pk_name": self.pk_field_name
|
||||
}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("wildcard_output_fields", [["*"], ["*", ct.default_primary_field_name],
|
||||
["*", ct.default_float_vec_field_name]])
|
||||
def test_search_partition_with_output_fields(self, wildcard_output_fields):
|
||||
"""
|
||||
target: test partition search with output fields
|
||||
method: 1. connect to milvus
|
||||
2. partition search on an existing collection with output fields
|
||||
expected: search successfully with output fields
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = self.collection_name
|
||||
collection_info = self.describe_collection(client, collection_name)[0]
|
||||
fields = collection_info.get('fields', None)
|
||||
field_names = [field.get('name') for field in fields]
|
||||
partition_name = self.partition_names[0]
|
||||
|
||||
# Generate vectors to search
|
||||
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||
search_params = {"metric_type": self.float_vector_metric, "params": {"nprobe": 100}}
|
||||
|
||||
# search with output fields
|
||||
output_fields = cf.get_wildcard_output_field_names(collection_info, wildcard_output_fields)
|
||||
search_res, _ = self.search(
|
||||
client,
|
||||
collection_name,
|
||||
vectors_to_search[:default_nq],
|
||||
partition_names=[partition_name],
|
||||
anns_field=self.float_vector_field_name,
|
||||
search_params=search_params,
|
||||
limit=default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": default_nq,
|
||||
"limit": default_limit,
|
||||
"output_fields": field_names.extend([self.dyna_filed_name1, self.dyna_filed_name2])})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_invalid_output_fields(self):
|
||||
"""
|
||||
target: test search with output fields using wildcard
|
||||
method: search with one output_field (wildcard)
|
||||
expected: search success
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = self.collection_name
|
||||
collection_info = self.describe_collection(client, collection_name)[0]
|
||||
fields = collection_info.get('fields', None)
|
||||
field_names = [field.get('name') for field in fields]
|
||||
partition_name = self.partition_names[0]
|
||||
|
||||
# Generate vectors to search
|
||||
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||
search_params = {}
|
||||
invalid_output_fields = [["%"], [""], ["-"], ["non_exist_field"]]
|
||||
for field in invalid_output_fields:
|
||||
error1 = {ct.err_code: 999, ct.err_msg: "field %s not exist" % field[0]}
|
||||
error2 = {ct.err_code: 999, ct.err_msg: "`output_fields` value %s is illegal" % field}
|
||||
error = error2 if field == [""] else error1
|
||||
self.search(client, collection_name, vectors_to_search[:default_nq],
|
||||
anns_field=self.float_vector_field_name,
|
||||
search_params=search_params,
|
||||
limit=default_limit,
|
||||
output_fields=field,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_more_than_max_limit(self):
|
||||
@ -727,19 +797,12 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
schema = self.create_schema(client)[0]
|
||||
schema.add_field(ct.default_primary_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(ct.default_float_vec_field_name, DataType.FLOAT_VECTOR, dim=ct.default_dim)
|
||||
schema.add_field(ct.default_float_field_name, DataType.FLOAT)
|
||||
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=256)
|
||||
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
|
||||
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=256, nullable=True)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# insert data
|
||||
data = []
|
||||
for i in range(default_nb):
|
||||
data.append({
|
||||
ct.default_primary_field_name: i,
|
||||
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
|
||||
ct.default_float_field_name: i * 1.0,
|
||||
ct.default_string_field_name: str(i)
|
||||
})
|
||||
data = cf.gen_row_data_by_schema(schema=schema, nb=default_nb)
|
||||
self.insert(client, collection_name, data)
|
||||
|
||||
# create index with metric cosine
|
||||
@ -1006,7 +1069,7 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
|
||||
# search in the collection
|
||||
vectors_to_search = cf.gen_vectors(1, ct.default_dim)
|
||||
limit = 1000
|
||||
limit = 100
|
||||
search_params = {}
|
||||
search_res1, _ = self.search(
|
||||
client,
|
||||
@ -1101,6 +1164,7 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
# release the partition again and load the collection
|
||||
self.release_partitions(client, collection_name, [to_be_released_partition])
|
||||
self.load_collection(client, collection_name)
|
||||
self.refresh_load(client, collection_name) # workaround for #43386, remove this line after it was fixed
|
||||
|
||||
# search again
|
||||
search_res5, _ = self.search(
|
||||
@ -1271,7 +1335,8 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
# disable mmap
|
||||
self.release_collection(client, collection_name)
|
||||
@ -1283,7 +1348,8 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@ -1345,7 +1411,8 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
# disable mmap
|
||||
self.release_collection(client, collection_name)
|
||||
@ -1357,26 +1424,31 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("num_shards", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num])
|
||||
def test_search_with_non_default_shard_nums(self, num_shards):
|
||||
def test_search_with_non_default_shard_nums(self, num_shards):
|
||||
"""
|
||||
Test search functionality with non-default shard numbers.
|
||||
|
||||
This test verifies that:
|
||||
1. Collections are created with default shard numbers when num_shards <= 0
|
||||
2. Collections are created with specified shard numbers when num_shards > 0
|
||||
3. Search operations work correctly with different shard configurations
|
||||
This test verifies that search operations work correctly when collections are created with:
|
||||
- Negative shard numbers (should use default)
|
||||
- Zero shards (should use default)
|
||||
- Half of max shards
|
||||
- Max shards
|
||||
|
||||
The test follows these steps:
|
||||
1. Creates a collection with specified shard numbers
|
||||
The test performs the following steps:
|
||||
1. Creates a collection with specified shard number
|
||||
2. Inserts test data
|
||||
3. Builds an index
|
||||
4. Performs a search operation
|
||||
5. Validates the results
|
||||
3. Builds index
|
||||
4. Loads collection
|
||||
5. Executes search and verifies results
|
||||
|
||||
@param num_shards: Number of shards to test (parameterized)
|
||||
@tags: L2
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
@ -1412,36 +1484,193 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_HNSW_index_with_redundant_param(self):
|
||||
@pytest.mark.parametrize('vector_dtype', ct.all_dense_vector_types)
|
||||
@pytest.mark.parametrize('index', ct.all_index_types[:8])
|
||||
def test_search_output_field_vector_with_dense_vextor_and_index(self, vector_dtype, index):
|
||||
"""
|
||||
Test search functionality with HNSW index and redundant parameters.
|
||||
Test search with output vector field after different index types.
|
||||
|
||||
This test verifies that:
|
||||
1. HNSW index can be created with redundant parameters
|
||||
2. Search operations work correctly with redundant parameters
|
||||
3. Redundant parameters are ignored
|
||||
Steps:
|
||||
1. Create a collection with specified schema and insert test data
|
||||
2. Build index (with error handling for unsupported index types)
|
||||
3. Load collection and perform search operations with:
|
||||
- All output fields ("*")
|
||||
- Explicitly specified all fields
|
||||
- Subset of fields
|
||||
4. Verify search results match expected output fields
|
||||
|
||||
The test performs following steps:
|
||||
1. Creates a collection with float vectors
|
||||
2. Inserts test data
|
||||
3. Creates HNSW index with redundant parameters
|
||||
4. Performs a search operation
|
||||
5. Validates the results
|
||||
Parameters:
|
||||
- vector_dtype: Type of vector data (all supported dense vector types)
|
||||
- index: Index type (first 8 supported index types)
|
||||
|
||||
Expected:
|
||||
- Successful search operations with correct output fields returned
|
||||
- Proper error when attempting unsupported index combinations
|
||||
"""
|
||||
dim = 16
|
||||
index = "HNSW"
|
||||
|
||||
metrics = 'COSINE'
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
dim = 32
|
||||
schema = self.create_schema(client)[0]
|
||||
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
|
||||
schema.add_field('vector', vector_dtype, dim=dim)
|
||||
schema.add_field('float_array', DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=200)
|
||||
schema.add_field('json_field', DataType.JSON, max_length=200)
|
||||
schema.add_field('string_field', DataType.VARCHAR, max_length=200)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# insert
|
||||
# Insert data in 3 batches with unique primary keys using a loop
|
||||
insert_times = 3
|
||||
random_vectors = list(cf.gen_vectors(ct.default_nb*insert_times, dim, vector_data_type=vector_dtype)) \
|
||||
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||
else cf.gen_vectors(ct.default_nb*insert_times, dim, vector_data_type=vector_dtype)
|
||||
for j in range(insert_times):
|
||||
start_pk = j * ct.default_nb
|
||||
rows = [{
|
||||
"id": i + start_pk,
|
||||
"vector": random_vectors[i + start_pk],
|
||||
"float_array": [random.random() for _ in range(10)],
|
||||
"json_field": {"name": "abook", "words": i},
|
||||
"string_field": "Hello, Milvus!"
|
||||
} for i in range(ct.default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
|
||||
# build index
|
||||
index_params, _ = self.prepare_index_params(client)
|
||||
index_params.add_index(field_name='vector', index_type=index,
|
||||
metric_type=metrics,
|
||||
params=cf.get_index_params_params(index_type=index))
|
||||
if vector_dtype == DataType.INT8_VECTOR and index != 'HNSW':
|
||||
# INT8_Vector only supports HNSW index for now
|
||||
error = {"err_code": 999, "err_msg": f"data type Int8Vector can't build with this index {index}"}
|
||||
self.create_index(client, collection_name, index_params=index_params,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
else:
|
||||
self.create_index(client, collection_name, index_params=index_params)
|
||||
|
||||
# load the collection with index
|
||||
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
|
||||
self.load_collection(client, collection_name)
|
||||
|
||||
# search with output field vector
|
||||
search_params = {}
|
||||
vectors = random_vectors[:ct.default_nq]
|
||||
# search output all fields
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit,
|
||||
"output_fields": ["id", "vector", "float_array", "json_field", "string_field"]})
|
||||
# search output specify all fields
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["id", "vector", "float_array", "json_field", "string_field"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit,
|
||||
"output_fields": ["id", "vector", "float_array", "json_field", "string_field"]})
|
||||
# search output specify some fields
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["id", "vector", "json_field"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit,
|
||||
"output_fields": ["id", "vector", "json_field"]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize('index', ct.binary_supported_index_types)
|
||||
def test_search_with_output_fields_vector_with_binary_vector_and_index(self, index):
|
||||
"""
|
||||
Test search functionality with output fields for binary vector type and specified index.
|
||||
|
||||
This test case verifies that:
|
||||
1. A collection with binary vector field can be created and data inserted
|
||||
2. Index can be built on the binary vector field
|
||||
3. Search operation with output fields (including vector field) works correctly
|
||||
4. Results contain expected output fields (id and vector)
|
||||
|
||||
Parameters:
|
||||
index: The index type to test with (parametrized via pytest.mark.parametrize)
|
||||
|
||||
The test performs following steps:
|
||||
- Creates collection with binary vector field
|
||||
- Inserts test data in batches
|
||||
- Builds specified index type
|
||||
- Performs search with output fields
|
||||
- Validates search results contain expected fields
|
||||
"""
|
||||
vector_dtype = DataType.BINARY_VECTOR
|
||||
client = self._client()
|
||||
dim = 32
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema, _ = self.create_schema(client)
|
||||
schema.add_field("id", datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field("vector", datatype=vector_dtype, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# Insert data in 3 batches with unique primary keys using a loop
|
||||
insert_times = 3
|
||||
random_vectors = list(cf.gen_vectors(ct.default_nb * insert_times, dim, vector_data_type=vector_dtype)) \
|
||||
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||
else cf.gen_vectors(ct.default_nb * insert_times, dim, vector_data_type=vector_dtype)
|
||||
for j in range(insert_times):
|
||||
start_pk = j * ct.default_nb
|
||||
rows = [{
|
||||
"id": i + start_pk,
|
||||
"vector": random_vectors[i + start_pk]
|
||||
} for i in range(ct.default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
|
||||
# build index
|
||||
index_params, _ = self.prepare_index_params(client)
|
||||
index_params.add_index(field_name='vector', index_type=index,
|
||||
metric_type='JACCARD',
|
||||
params=cf.get_index_params_params(index_type=index))
|
||||
self.create_index(client, collection_name, index_params=index_params)
|
||||
|
||||
# load the collection with index
|
||||
assert self.wait_for_index_ready(client, collection_name, 'vector', timeout=120)
|
||||
self.load_collection(client, collection_name)
|
||||
|
||||
# search with output field vector
|
||||
search_params = {}
|
||||
vectors = random_vectors[:ct.default_nq]
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit,
|
||||
"output_fields": ["id", "vector"]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_output_fields_empty(self):
|
||||
"""
|
||||
target: test search with output fields
|
||||
method: search with empty output_field
|
||||
expected: search success
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
dim = 32
|
||||
# create collection with fast mode
|
||||
self.create_collection(client, collection_name, dimension=dim)
|
||||
# insert data
|
||||
data = []
|
||||
for i in range(ct.default_nb):
|
||||
data.append({
|
||||
@ -1450,24 +1679,20 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
})
|
||||
self.insert(client, collection_name, data)
|
||||
self.flush(client, collection_name)
|
||||
# create index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
params["nlist"] = 100 # nlist is redundant parameter
|
||||
index_params.add_index(field_name='vector', index_type=index,
|
||||
metric_type='COSINE', params=params)
|
||||
self.create_index(client, collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name='vector')
|
||||
index_info = self.describe_index(client, collection_name, index_name='vector')
|
||||
assert index_info[0]["nlist"] == '100'
|
||||
# load
|
||||
self.load_collection(client, collection_name)
|
||||
# search
|
||||
vectors = cf.gen_vectors(ct.default_nq, dim)
|
||||
# search with empty output fields
|
||||
search_params = {}
|
||||
vectors = cf.gen_vectors(ct.default_nq, dim)
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=[],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=None,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
95
tests/python_client/testcases/indexes/idx_diskann.py
Normal file
95
tests/python_client/testcases/indexes/idx_diskann.py
Normal file
@ -0,0 +1,95 @@
|
||||
from pymilvus import DataType
|
||||
from common import common_type as ct
|
||||
|
||||
success = "success"
|
||||
|
||||
|
||||
class DISKANN:
|
||||
supported_vector_types = [
|
||||
DataType.FLOAT_VECTOR,
|
||||
DataType.FLOAT16_VECTOR,
|
||||
DataType.BFLOAT16_VECTOR
|
||||
]
|
||||
|
||||
supported_metrics = ['L2', 'IP', 'COSINE']
|
||||
|
||||
build_params = [
|
||||
# search_list_size
|
||||
# Type: Integer Range: [1, int_max]
|
||||
# Default value: 100
|
||||
{"description": "Minimum Boundary Test", "params": {"search_list_size": 1}, "expected": success},
|
||||
{"description": "Large Value Test", "params": {"search_list_size": 10000}, "expected": success},
|
||||
{"description": "Out of Range Test - Negative", "params": {"search_list_size": -1}, "expected": success},
|
||||
{"description": "String Type Test", "params": {"search_list_size": "100"}, "expected": success},
|
||||
{"description": "Float Type Test", "params": {"search_list_size": 100.0}, "expected": success},
|
||||
{"description": "Boolean Type Test", "params": {"search_list_size": True}, "expected": success},
|
||||
{"description": "None Type Test", "params": {"search_list_size": None}, "expected": success},
|
||||
# search_cache_budget_gb_ratio
|
||||
# Type: Float Range: [0.0, 0.3)
|
||||
# Default value: 0.10
|
||||
# TODO: runt he minium bourndary test after issue #43176 fixed
|
||||
# {"description": "Minimum Boundary Test", "params": {"search_cache_budget_gb_ratio": 0.0}, "expected": success},
|
||||
{"description": "Maximum Boundary Test", "params": {"search_cache_budget_gb_ratio": 0.3}, "expected": success},
|
||||
{"description": "Default value Test", "params": {"search_cache_budget_gb_ratio": 0.1}, "expected": success},
|
||||
{"description": "Out of Range Test - Negative", "params": {"search_cache_budget_gb_ratio": -0.1}, "expected": success},
|
||||
{"description": "Out of Range Test - Too Large", "params": {"search_cache_budget_gb_ratio": 0.31}, "expected": success},
|
||||
{"description": "String Type Test", "params": {"search_cache_budget_gb_ratio": "0.2"}, "expected": success},
|
||||
{"description": "Boolean Type Test", "params": {"search_cache_budget_gb_ratio": True}, "expected": success},
|
||||
{"description": "None Type Test", "params": {"search_cache_budget_gb_ratio": None}, "expected": success},
|
||||
# pq_code_budget_gb_ratio
|
||||
# Type: Float Range: (0.0, 0.25]
|
||||
# Default value: 0.125
|
||||
{"description": "Minimum Boundary Test", "params": {"pq_code_budget_gb_ratio": 0.0001}, "expected": success},
|
||||
{"description": "Maximum Boundary Test", "params": {"pq_code_budget_gb_ratio": 0.25}, "expected": success},
|
||||
{"description": "Default value Test", "params": {"pq_code_budget_gb_ratio": 0.125}, "expected": success},
|
||||
{"description": "Out of Range Test - Negative", "params": {"pq_code_budget_gb_ratio": -0.1}, "expected": success},
|
||||
{"description": "Out of Range Test - Too Large", "params": {"pq_code_budget_gb_ratio": 0.26}, "expected": success},
|
||||
{"description": "String Type Test", "params": {"pq_code_budget_gb_ratio": "0.1"}, "expected": success},
|
||||
{"description": "Boolean Type Test", "params": {"pq_code_budget_gb_ratio": True}, "expected": success},
|
||||
{"description": "None Type Test", "params": {"pq_code_budget_gb_ratio": None}, "expected": success},
|
||||
# max_degree
|
||||
# Type: Integer Range: [1, 512]
|
||||
# Default value: 56
|
||||
{"description": "Minimum Boundary Test", "params": {"max_degree": 1}, "expected": success},
|
||||
{"description": "Maximum Boundary Test", "params": {"max_degree": 512}, "expected": success},
|
||||
{"description": "Default value Test", "params": {"max_degree": 56}, "expected": success},
|
||||
{"description": "Large Value Test", "params": {"max_degree": 128}, "expected": success},
|
||||
{"description": "Out of Range Test - Negative", "params": {"max_degree": -1}, "expected": success},
|
||||
{"description": "String Type Test", "params": {"max_degree": "32"}, "expected": success},
|
||||
{"description": "Float Type Test", "params": {"max_degree": 32.0}, "expected": success},
|
||||
{"description": "Boolean Type Test", "params": {"max_degree": True}, "expected": success},
|
||||
{"description": "None Type Test", "params": {"max_degree": None}, "expected": success},
|
||||
# 组合参数
|
||||
{"description": "Optimal Performance Combination Test", "params": {"search_list_size": 100, "beamwidth": 10, "search_cache_budget_gb_ratio": 0.5, "pq_code_budget_gb_ratio": 0.5}, "expected": success},
|
||||
{"description": "empty dict params", "params": {}, "expected": success},
|
||||
{"description": "not_defined_param in the dict params", "params": {"search_list_size": 100, "not_defined_param": "nothing"}, "expected": success},
|
||||
|
||||
]
|
||||
|
||||
search_params = [
|
||||
# beam_width_ratio
|
||||
# Type: Float Range: [1, max(128 / CPU number, 16)]
|
||||
# Default value: 4.0
|
||||
{"description": "Minimum Boundary Test", "params": {"beam_width_ratio": 1.0}, "expected": success},
|
||||
{"description": "Maximum Boundary Test", "params": {"beam_width_ratio": 16.0}, "expected": success},
|
||||
{"description": "Default value Test", "params": {"beam_width_ratio": 4.0}, "expected": success},
|
||||
{"description": "Out of Range Test - Negative", "params": {"beam_width_ratio": -0.1}, "expected": success},
|
||||
{"description": "Out of Range Test - Too Large", "params": {"beam_width_ratio": 17.0}, "expected": success},
|
||||
{"description": "String Type Test", "params": {"beam_width_ratio": "2.0"}, "expected": success},
|
||||
{"description": "Boolean Type Test", "params": {"beam_width_ratio": True}, "expected": success},
|
||||
{"description": "None Type Test", "params": {"beam_width_ratio": None}, "expected": success},
|
||||
# search_list_size
|
||||
# Type: Integer Range: [1, int_max]
|
||||
# Default value: 100
|
||||
{"description": "Minimum Boundary Test", "params": {"search_list_size": 1}, "expected": {"err_code": 999, "err_msg": "search_list_size(1) should be larger than k(10)"}},
|
||||
{"description": "Large Value Test", "params": {"search_list_size": 1000}, "expected": success},
|
||||
{"description": "Default value Test", "params": {"search_list_size": 100}, "expected": success},
|
||||
{"description": "Out of Range Test - Negative", "params": {"search_list_size": -1}, "expected": {"err_code": 999, "err_msg": "param 'search_list_size' (-1) should be in range [1, 2147483647]"}},
|
||||
{"description": "String Type Test", "params": {"search_list_size": "100"}, "expected": success},
|
||||
{"description": "Float Type Test", "params": {"search_list_size": 100.0}, "expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'search_list_size' (100.0) should be integer"}},
|
||||
{"description": "Boolean Type Test", "params": {"search_list_size": True}, "expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'search_list_size' (true) should be integer"}},
|
||||
{"description": "None Type Test", "params": {"search_list_size": None}, "expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'search_list_size' (null) should be integer"}},
|
||||
# mix params
|
||||
{"description": "mix params", "params": {"search_list_size": 100, "beam_width_ratio": 0.5}, "expected": success},
|
||||
{"description": "mix params", "params": {}, "expected": success},
|
||||
]
|
||||
175
tests/python_client/testcases/indexes/idx_hnsw.py
Normal file
175
tests/python_client/testcases/indexes/idx_hnsw.py
Normal file
@ -0,0 +1,175 @@
|
||||
from pymilvus import DataType
|
||||
from common import common_type as ct
|
||||
|
||||
success = "success"
|
||||
|
||||
class HNSW:
|
||||
supported_vector_types = [
|
||||
DataType.FLOAT_VECTOR,
|
||||
DataType.FLOAT16_VECTOR,
|
||||
DataType.BFLOAT16_VECTOR,
|
||||
DataType.INT8_VECTOR
|
||||
]
|
||||
|
||||
supported_metrics = ['L2', 'IP', 'COSINE']
|
||||
|
||||
build_params = [
|
||||
# M params test
|
||||
{
|
||||
"description": "Minimum Boundary Test",
|
||||
"params": {"M": 2},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Maximum Boundary Test",
|
||||
"params": {"M": 2048},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Out of Range Test - Negative",
|
||||
"params": {"M": -1},
|
||||
"expected": {"err_code": 999, "err_msg": "param 'M' (-1) should be in range [2, 2048]"}
|
||||
},
|
||||
{
|
||||
"description": "Out of Range Test - Too Large",
|
||||
"params": {"M": 2049},
|
||||
"expected": {"err_code": 999, "err_msg": "param 'M' (2049) should be in range [2, 2048]"}
|
||||
},
|
||||
{
|
||||
"description": "String Type Test will ignore the wrong type",
|
||||
"params": {"M": "16"},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Float Type Test",
|
||||
"params": {"M": 16.0},
|
||||
"expected": {"err_code": 999, "err_msg": "wrong data type in json"}
|
||||
},
|
||||
{
|
||||
"description": "Boolean Type Test",
|
||||
"params": {"M": True},
|
||||
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'M', value: 'True': invalid parameter"}
|
||||
},
|
||||
{
|
||||
"description": "None Type Test, use default value",
|
||||
"params": {"M": None},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "List Type Test",
|
||||
"params": {"M": [16]},
|
||||
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'M', value: '[16]': invalid parameter"}
|
||||
},
|
||||
# efConstruction params test
|
||||
{
|
||||
"description": "Minimum Boundary Test",
|
||||
"params": {"efConstruction": 1},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Large Value Test",
|
||||
"params": {"efConstruction": 10000},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Out of Range Test - Negative",
|
||||
"params": {"efConstruction": -1},
|
||||
"expected": {"err_code": 999, "err_msg": "param 'efConstruction' (-1) should be in range [1, 2147483647]"}
|
||||
},
|
||||
{
|
||||
"description": "String Type Test will ignore the wrong type",
|
||||
"params": {"efConstruction": "100"},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Float Type Test",
|
||||
"params": {"efConstruction": 100.0},
|
||||
"expected": {"err_code": 999, "err_msg": "wrong data type in json"}
|
||||
},
|
||||
{
|
||||
"description": "Boolean Type Test",
|
||||
"params": {"efConstruction": True},
|
||||
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'efConstruction', value: 'True': invalid parameter"}
|
||||
},
|
||||
{
|
||||
"description": "None Type Test, use default value",
|
||||
"params": {"efConstruction": None},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "List Type Test",
|
||||
"params": {"efConstruction": [100]},
|
||||
"expected": {"err_code": 999, "err_msg": "invalid integer value, key: 'efConstruction', value: '[100]': invalid parameter"}
|
||||
},
|
||||
# combination params test
|
||||
{
|
||||
"description": "Optimal Performance Combination Test",
|
||||
"params": {"M": 16, "efConstruction": 200},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "empty dict params",
|
||||
"params": {},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "not_defined_param in the dict params",
|
||||
"params": {"M": 16, "efConstruction": 200, "not_defined_param": "nothing"},
|
||||
"expected": success
|
||||
},
|
||||
]
|
||||
|
||||
search_params = [
|
||||
# ef params test
|
||||
{
|
||||
"description": "Minimum Boundary Test",
|
||||
"params": {"ef": 1},
|
||||
"expected": {"err_code": 999, "err_msg": "ef(1) should be larger than k(10)"} # assume default limit=10
|
||||
},
|
||||
{
|
||||
"description": "Large Value Test",
|
||||
"params": {"ef": 10000},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Out of Range Test - Negative",
|
||||
"params": {"ef": -1},
|
||||
"expected": {"err_code": 999, "err_msg": "param 'ef' (-1) should be in range [1, 2147483647]"}
|
||||
},
|
||||
{
|
||||
"description": "String Type Test, not check data type",
|
||||
"params": {"ef": "32"},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Float Type Test",
|
||||
"params": {"ef": 32.0},
|
||||
"expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'ef' (32.0) should be integer"}
|
||||
},
|
||||
{
|
||||
"description": "Boolean Type Test",
|
||||
"params": {"ef": True},
|
||||
"expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'ef' (true) should be integer"}
|
||||
},
|
||||
{
|
||||
"description": "None Type Test",
|
||||
"params": {"ef": None},
|
||||
"expected": {"err_code": 999, "err_msg": "Type conflict in json: param 'ef' (null) should be integer"}
|
||||
},
|
||||
{
|
||||
"description": "List Type Test",
|
||||
"params": {"ef": [32]},
|
||||
"expected": {"err_code": 999, "err_msg": "param 'ef' ([32]) should be integer"}
|
||||
},
|
||||
# combination params test
|
||||
{
|
||||
"description": "Optimal Performance Combination Test",
|
||||
"params": {"ef": 64},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "empty dict params",
|
||||
"params": {},
|
||||
"expected": success
|
||||
},
|
||||
]
|
||||
229
tests/python_client/testcases/indexes/test_diskann.py
Normal file
229
tests/python_client/testcases/indexes/test_diskann.py
Normal file
@ -0,0 +1,229 @@
|
||||
import logging
|
||||
from utils.util_pymilvus import *
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
from common import common_type as ct
|
||||
from common import common_func as cf
|
||||
from base.client_v2_base import TestMilvusClientV2Base
|
||||
import pytest
|
||||
from idx_diskann import DISKANN
|
||||
|
||||
index_type = "DISKANN"
|
||||
success = "success"
|
||||
pk_field_name = 'id'
|
||||
vector_field_name = 'vector'
|
||||
dim = ct.default_dim
|
||||
default_nb = 2000
|
||||
default_build_params = {"search_list_size": 100, "beamwidth": 10, "pq_code_budget_gb": 1.0, "num_threads": 8, "max_degree": 64, "indexing_list_size": 100, "build_dram_budget_gb": 2.0, "search_dram_budget_gb": 1.0}
|
||||
default_search_params = {"search_list_size": 100, "beamwidth": 10, "search_dram_budget_gb": 1.0}
|
||||
|
||||
|
||||
class TestDiskannBuildParams(TestMilvusClientV2Base):
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("params", DISKANN.build_params)
|
||||
def test_diskann_build_params(self, params):
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema, _ = self.create_schema(client)
|
||||
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
insert_times = 2
|
||||
random_vectors = list(cf.gen_vectors(default_nb * insert_times, dim, vector_data_type=DataType.FLOAT_VECTOR))
|
||||
for j in range(insert_times):
|
||||
start_pk = j * default_nb
|
||||
rows = [{
|
||||
pk_field_name: i + start_pk,
|
||||
vector_field_name: random_vectors[i + start_pk]
|
||||
} for i in range(default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
build_params = params.get("params", None)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=vector_field_name,
|
||||
metric_type=cf.get_default_metric_for_vector_type(vector_type=DataType.FLOAT_VECTOR),
|
||||
index_type=index_type,
|
||||
params=build_params)
|
||||
if params.get("expected", None) != success:
|
||||
self.create_index(client, collection_name, index_params,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=params.get("expected"))
|
||||
else:
|
||||
self.create_index(client, collection_name, index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
|
||||
self.load_collection(client, collection_name)
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=default_search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
idx_info = client.describe_index(collection_name, vector_field_name)
|
||||
if build_params is not None:
|
||||
for key, value in build_params.items():
|
||||
if value is not None:
|
||||
assert key in idx_info.keys()
|
||||
assert str(value) == idx_info[key]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("vector_data_type", ct.all_vector_types)
|
||||
def test_diskann_on_all_vector_types(self, vector_data_type):
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema, _ = self.create_schema(client)
|
||||
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||
if vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||
schema.add_field(vector_field_name, datatype=vector_data_type)
|
||||
else:
|
||||
schema.add_field(vector_field_name, datatype=vector_data_type, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
insert_times = 2
|
||||
random_vectors = list(cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_data_type)) \
|
||||
if vector_data_type == DataType.FLOAT_VECTOR \
|
||||
else cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_data_type)
|
||||
for j in range(insert_times):
|
||||
start_pk = j * default_nb
|
||||
rows = [{
|
||||
pk_field_name: i + start_pk,
|
||||
vector_field_name: random_vectors[i + start_pk]
|
||||
} for i in range(default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
metric_type = cf.get_default_metric_for_vector_type(vector_data_type)
|
||||
index_params.add_index(field_name=vector_field_name,
|
||||
metric_type=metric_type,
|
||||
index_type=index_type,
|
||||
**default_build_params)
|
||||
if vector_data_type not in DISKANN.supported_vector_types:
|
||||
self.create_index(client, collection_name, index_params,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 999,
|
||||
"err_msg": f"can't build with this index DISKANN: invalid parameter"})
|
||||
else:
|
||||
self.create_index(client, collection_name, index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
|
||||
self.load_collection(client, collection_name)
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=vector_data_type)
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=default_search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("metric", DISKANN.supported_metrics)
|
||||
def test_diskann_on_all_metrics(self, metric):
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema, _ = self.create_schema(client)
|
||||
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
insert_times = 2
|
||||
random_vectors = list(cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=DataType.FLOAT_VECTOR))
|
||||
for j in range(insert_times):
|
||||
start_pk = j * default_nb
|
||||
rows = [{
|
||||
pk_field_name: i + start_pk,
|
||||
vector_field_name: random_vectors[i + start_pk]
|
||||
} for i in range(default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=vector_field_name,
|
||||
metric_type=metric,
|
||||
index_type=index_type,
|
||||
**default_build_params)
|
||||
self.create_index(client, collection_name, index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
|
||||
self.load_collection(client, collection_name)
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=default_search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestDiskannSearchParams")
|
||||
class TestDiskannSearchParams(TestMilvusClientV2Base):
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
self.collection_name = "TestDiskannSearchParams" + cf.gen_unique_str("_")
|
||||
self.float_vector_field_name = vector_field_name
|
||||
self.float_vector_dim = dim
|
||||
self.primary_keys = []
|
||||
self.enable_dynamic_field = False
|
||||
self.datas = []
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_collection(self, request):
|
||||
client = self._client()
|
||||
collection_schema = self.create_schema(client)[0]
|
||||
collection_schema.add_field(pk_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
collection_schema.add_field(self.float_vector_field_name, DataType.FLOAT_VECTOR, dim=128)
|
||||
self.create_collection(client, self.collection_name, schema=collection_schema,
|
||||
enable_dynamic_field=self.enable_dynamic_field, force_teardown=False)
|
||||
insert_times = 2
|
||||
float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim,
|
||||
vector_data_type=DataType.FLOAT_VECTOR)
|
||||
for j in range(insert_times):
|
||||
rows = []
|
||||
for i in range(default_nb):
|
||||
pk = i + j * default_nb
|
||||
row = {
|
||||
pk_field_name: pk,
|
||||
self.float_vector_field_name: list(float_vectors[pk])
|
||||
}
|
||||
self.datas.append(row)
|
||||
rows.append(row)
|
||||
self.insert(client, self.collection_name, data=rows)
|
||||
self.primary_keys.extend([i + j * default_nb for i in range(default_nb)])
|
||||
self.flush(client, self.collection_name)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=self.float_vector_field_name,
|
||||
metric_type="COSINE",
|
||||
index_type=index_type,
|
||||
params=default_build_params)
|
||||
self.create_index(client, self.collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name)
|
||||
self.load_collection(client, self.collection_name)
|
||||
def teardown():
|
||||
self.drop_collection(self._client(), self.collection_name)
|
||||
request.addfinalizer(teardown)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("params", DISKANN.search_params)
|
||||
def test_diskann_search_params(self, params):
|
||||
client = self._client()
|
||||
collection_name = self.collection_name
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=self.float_vector_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||
search_params = params.get("params", None)
|
||||
if params.get("expected", None) != success:
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=params.get("expected"))
|
||||
else:
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
273
tests/python_client/testcases/indexes/test_hnsw.py
Normal file
273
tests/python_client/testcases/indexes/test_hnsw.py
Normal file
@ -0,0 +1,273 @@
|
||||
import logging
|
||||
from utils.util_pymilvus import *
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
from common import common_type as ct
|
||||
from common import common_func as cf
|
||||
from base.client_v2_base import TestMilvusClientV2Base
|
||||
import pytest
|
||||
from idx_hnsw import HNSW
|
||||
|
||||
index_type = "HNSW"
|
||||
success = "success"
|
||||
pk_field_name = 'id'
|
||||
vector_field_name = 'vector'
|
||||
dim = ct.default_dim
|
||||
default_nb = 2000
|
||||
default_build_params = {"M": 16, "efConstruction": 200}
|
||||
default_search_params = {"ef": 64}
|
||||
|
||||
|
||||
class TestHnswBuildParams(TestMilvusClientV2Base):
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("params", HNSW.build_params)
|
||||
def test_hnsw_build_params(self, params):
|
||||
"""
|
||||
Test the build params of HNSW index
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema, _ = self.create_schema(client)
|
||||
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# Insert data in 2 batches with unique primary keys
|
||||
insert_times = 2
|
||||
random_vectors = list(cf.gen_vectors(default_nb * insert_times, dim, vector_data_type=DataType.FLOAT_VECTOR))
|
||||
for j in range(insert_times):
|
||||
start_pk = j * default_nb
|
||||
rows = [{
|
||||
pk_field_name: i + start_pk,
|
||||
vector_field_name: random_vectors[i + start_pk]
|
||||
} for i in range(default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
|
||||
# create index
|
||||
build_params = params.get("params", None)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=vector_field_name,
|
||||
metric_type=cf.get_default_metric_for_vector_type(vector_type=DataType.FLOAT_VECTOR),
|
||||
index_type=index_type,
|
||||
params=build_params)
|
||||
# build index
|
||||
if params.get("expected", None) != success:
|
||||
self.create_index(client, collection_name, index_params,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=params.get("expected"))
|
||||
else:
|
||||
self.create_index(client, collection_name, index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
|
||||
|
||||
# load collection
|
||||
self.load_collection(client, collection_name)
|
||||
|
||||
# search
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=default_search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
|
||||
# verify the index params are persisted
|
||||
idx_info = client.describe_index(collection_name, vector_field_name)
|
||||
if build_params is not None:
|
||||
for key, value in build_params.items():
|
||||
if value is not None:
|
||||
assert key in idx_info.keys()
|
||||
assert str(value) in idx_info.values()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("vector_data_type", ct.all_vector_types)
|
||||
def test_hnsw_on_all_vector_types(self, vector_data_type):
|
||||
"""
|
||||
Test HNSW index on all the vector types and metrics
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema, _ = self.create_schema(client)
|
||||
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||
if vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||
schema.add_field(vector_field_name, datatype=vector_data_type)
|
||||
else:
|
||||
schema.add_field(vector_field_name, datatype=vector_data_type, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# Insert data in 2 batches with unique primary keys
|
||||
insert_times = 2
|
||||
random_vectors = list(cf.gen_vectors(default_nb*insert_times, dim, vector_data_type=vector_data_type)) \
|
||||
if vector_data_type == DataType.FLOAT_VECTOR \
|
||||
else cf.gen_vectors(default_nb*insert_times, dim, vector_data_type=vector_data_type)
|
||||
for j in range(insert_times):
|
||||
start_pk = j * default_nb
|
||||
rows = [{
|
||||
pk_field_name: i + start_pk,
|
||||
vector_field_name: random_vectors[i + start_pk]
|
||||
} for i in range(default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
|
||||
# create index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
metric_type = cf.get_default_metric_for_vector_type(vector_data_type)
|
||||
index_params.add_index(field_name=vector_field_name,
|
||||
metric_type=metric_type,
|
||||
index_type=index_type,
|
||||
M=16,
|
||||
efConstruction=200)
|
||||
if vector_data_type not in HNSW.supported_vector_types:
|
||||
self.create_index(client, collection_name, index_params,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 999,
|
||||
"err_msg": f"can't build with this index HNSW: invalid parameter"})
|
||||
else:
|
||||
self.create_index(client, collection_name, index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
|
||||
# load collection
|
||||
self.load_collection(client, collection_name)
|
||||
# search
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=vector_data_type)
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=default_search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("metric", HNSW.supported_metrics)
|
||||
def test_hnsw_on_all_metrics(self, metric):
|
||||
"""
|
||||
Test the search params of HNSW index
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema, _ = self.create_schema(client)
|
||||
schema.add_field(pk_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(vector_field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# insert data
|
||||
insert_times = 2
|
||||
random_vectors = list(cf.gen_vectors(default_nb*insert_times, dim, vector_data_type=DataType.FLOAT_VECTOR))
|
||||
for j in range(insert_times):
|
||||
start_pk = j * default_nb
|
||||
rows = [{
|
||||
pk_field_name: i + start_pk,
|
||||
vector_field_name: random_vectors[i + start_pk]
|
||||
} for i in range(default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
self.flush(client, collection_name)
|
||||
|
||||
# create index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=vector_field_name,
|
||||
metric_type=metric,
|
||||
index_type=index_type,
|
||||
M=16,
|
||||
efConstruction=200)
|
||||
self.create_index(client, collection_name, index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name=vector_field_name)
|
||||
# load collection
|
||||
self.load_collection(client, collection_name)
|
||||
# search
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=default_search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestHnswSearchParams")
|
||||
class TestHnswSearchParams(TestMilvusClientV2Base):
|
||||
"""Test search with pagination functionality for HNSW index"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
self.collection_name = "TestHnswSearchParams" + cf.gen_unique_str("_")
|
||||
self.float_vector_field_name = vector_field_name
|
||||
self.float_vector_dim = dim
|
||||
self.primary_keys = []
|
||||
self.enable_dynamic_field = False
|
||||
self.datas = []
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_collection(self, request):
|
||||
"""
|
||||
Initialize collection before test class runs
|
||||
"""
|
||||
client = self._client()
|
||||
collection_schema = self.create_schema(client)[0]
|
||||
collection_schema.add_field(pk_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
collection_schema.add_field(self.float_vector_field_name, DataType.FLOAT_VECTOR, dim=128)
|
||||
self.create_collection(client, self.collection_name, schema=collection_schema,
|
||||
enable_dynamic_field=self.enable_dynamic_field, force_teardown=False)
|
||||
insert_times = 2
|
||||
float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim,
|
||||
vector_data_type=DataType.FLOAT_VECTOR)
|
||||
for j in range(insert_times):
|
||||
rows = []
|
||||
for i in range(default_nb):
|
||||
pk = i + j * default_nb
|
||||
row = {
|
||||
pk_field_name: pk,
|
||||
self.float_vector_field_name: list(float_vectors[pk])
|
||||
}
|
||||
self.datas.append(row)
|
||||
rows.append(row)
|
||||
self.insert(client, self.collection_name, data=rows)
|
||||
self.primary_keys.extend([i + j * default_nb for i in range(default_nb)])
|
||||
self.flush(client, self.collection_name)
|
||||
# Create HNSW index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=self.float_vector_field_name,
|
||||
metric_type="COSINE",
|
||||
index_type=index_type,
|
||||
params=default_build_params)
|
||||
self.create_index(client, self.collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name)
|
||||
self.load_collection(client, self.collection_name)
|
||||
|
||||
def teardown():
|
||||
self.drop_collection(self._client(), self.collection_name)
|
||||
request.addfinalizer(teardown)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("params", HNSW.search_params)
|
||||
def test_hnsw_search_params(self, params):
|
||||
"""
|
||||
Test the search params of HNSW index
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = self.collection_name
|
||||
nq = 2
|
||||
search_vectors = cf.gen_vectors(nq, dim=self.float_vector_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||
search_params = params.get("params", None)
|
||||
if params.get("expected", None) != success:
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items=params.get("expected"))
|
||||
else:
|
||||
self.search(client, collection_name, search_vectors,
|
||||
search_params=search_params,
|
||||
limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": nq,
|
||||
"limit": ct.default_limit,
|
||||
"pk_name": pk_field_name})
|
||||
@ -527,7 +527,7 @@ class TestInsertOperation(TestcaseBase):
|
||||
expected: error raised
|
||||
"""
|
||||
collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
|
||||
nb = 1
|
||||
nb = 10
|
||||
data = []
|
||||
fields = collection_w.schema.fields
|
||||
for field in fields:
|
||||
@ -747,12 +747,9 @@ class TestInsertOperation(TestcaseBase):
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
schema = cf.gen_default_collection_schema(primary_field=pk_field, auto_id=True)
|
||||
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
||||
data = []
|
||||
nb = 100
|
||||
for field in collection_w.schema.fields:
|
||||
field_data = cf.gen_data_by_collection_field(field, nb=nb)
|
||||
if field.name != pk_field:
|
||||
data.append(field_data)
|
||||
data = cf.gen_column_data_by_schema(nb=nb, schema=collection_w.schema)
|
||||
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == nb
|
||||
|
||||
@ -1246,7 +1243,7 @@ class TestInsertInvalid(TestcaseBase):
|
||||
primary_field=primary_field, is_index=False,
|
||||
is_all_data_type=True, with_json=True)[0]
|
||||
nb = 100
|
||||
data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
|
||||
data = cf.gen_column_data_by_schema(schema=collection_w.schema, nb=nb)
|
||||
for dirty_i in [0, nb // 2, nb - 1]: # check the dirty data at first, middle and last
|
||||
log.debug(f"dirty_i: {dirty_i}")
|
||||
for i in range(len(data)):
|
||||
@ -2194,7 +2191,7 @@ class TestUpsertInvalid(TestcaseBase):
|
||||
primary_field=primary_field, is_index=False,
|
||||
is_all_data_type=True, with_json=True)[0]
|
||||
nb = 100
|
||||
data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
|
||||
data = cf.gen_column_data_by_schema(schema=collection_w.schema, nb=nb)
|
||||
for dirty_i in [0, nb // 2, nb - 1]: # check the dirty data at first, middle and last
|
||||
log.debug(f"dirty_i: {dirty_i}")
|
||||
for i in range(len(data)):
|
||||
|
||||
@ -128,7 +128,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
@ -359,7 +359,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
@ -696,7 +696,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
@ -1022,7 +1022,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
@ -1438,7 +1438,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
|
||||
@ -1796,7 +1796,7 @@ class TestBitmapIndexMmap(TestCaseClassBase):
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
eval('cf.parse_fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
@ -2519,7 +2519,6 @@ class TestGroupSearch(TestCaseClassBase):
|
||||
output_fields=[DataType.VARCHAR.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq, "limit": ct.default_limit})[0]
|
||||
print(res)
|
||||
for i in range(ct.default_nq):
|
||||
group_values = []
|
||||
for l in range(ct.default_limit):
|
||||
@ -2542,6 +2541,31 @@ class TestGroupSearch(TestCaseClassBase):
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq, "limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_hybrid_search_group_by_empty_results(self):
|
||||
"""
|
||||
verify hybrid search group by works if group by empty results
|
||||
"""
|
||||
# 3. prepare search params
|
||||
req_list = []
|
||||
for i in range(len(self.vector_fields)):
|
||||
search_param = {
|
||||
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i],
|
||||
vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap,
|
||||
self.vector_fields[i])),
|
||||
"anns_field": self.vector_fields[i],
|
||||
"param": {},
|
||||
"limit": ct.default_limit,
|
||||
"expr": f"{self.primary_field} < 0"} # make sure return empty results
|
||||
req = AnnSearchRequest(**search_param)
|
||||
req_list.append(req)
|
||||
# 4. hybrid search group by empty resutls
|
||||
self.collection_wrap.hybrid_search(req_list, WeightedRanker(0.1, 0.9, 0.2, 0.3), ct.default_limit,
|
||||
group_by_field=DataType.VARCHAR.name,
|
||||
output_fields=[DataType.VARCHAR.name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq, "limit": 0})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("support_field", [DataType.INT8.name, DataType.INT64.name,
|
||||
DataType.BOOL.name, DataType.VARCHAR.name])
|
||||
|
||||
@ -1387,8 +1387,8 @@ class TestUtilityAdvanced(TestcaseBase):
|
||||
)
|
||||
|
||||
for _ in range(segment_num):
|
||||
# insert random pks, ***start=None will generate random data***
|
||||
data = cf.gen_values(self.collection_wrap.schema, nb=nb, start_id=None)
|
||||
# insert random pks
|
||||
data = cf.gen_values(self.collection_wrap.schema, nb=nb, random_pk=True)
|
||||
self.collection_wrap.insert(data)
|
||||
self.collection_wrap.flush()
|
||||
|
||||
@ -1443,15 +1443,14 @@ class TestUtilityAdvanced(TestcaseBase):
|
||||
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(ct.default_float_vec_field_name))
|
||||
self.collection_wrap.load()
|
||||
|
||||
# insert random pks, ***start=None will generate random data***
|
||||
data = cf.gen_values(self.collection_wrap.schema, nb=nb, start_id=None)
|
||||
# insert random pks ***
|
||||
data = cf.gen_values(self.collection_wrap.schema, nb=nb, random_pk=True)
|
||||
self.collection_wrap.insert(data)
|
||||
|
||||
# get_query_segment_info and verify results
|
||||
res_sealed, _ = self.utility_wrap.get_query_segment_info(collection_name)
|
||||
assert len(res_sealed) == 0
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_get_sealed_query_segment_info_after_create_index(self):
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user