mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
test: [E2e Refactor] use vector datatype instead of hard code dataype names (#41497)
related issue: #40698 1. use vector datat types instead of hard code datatpe names 2. update search pagination tests 3. remove checking distances in search results checking, for knowhere customize the distances for different metrics and indexes. Now only assert the distances are sorted correct. --------- Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
parent
6084930854
commit
70b311735b
@ -275,7 +275,7 @@ class TestcaseBase(Base):
|
|||||||
auto_id=False, dim=ct.default_dim, is_index=True,
|
auto_id=False, dim=ct.default_dim, is_index=True,
|
||||||
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
|
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
|
||||||
enable_dynamic_field=False, with_json=True, random_primary_key=False,
|
enable_dynamic_field=False, with_json=True, random_primary_key=False,
|
||||||
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR",
|
multiple_dim_array=[], is_partition_key=None, vector_data_type=DataType.FLOAT_VECTOR,
|
||||||
nullable_fields={}, default_value_fields={}, language=None, **kwargs):
|
nullable_fields={}, default_value_fields={}, language=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
target: create specified collections
|
target: create specified collections
|
||||||
@ -317,7 +317,7 @@ class TestcaseBase(Base):
|
|||||||
primary_field=primary_field,
|
primary_field=primary_field,
|
||||||
nullable_fields=nullable_fields,
|
nullable_fields=nullable_fields,
|
||||||
default_value_fields=default_value_fields)
|
default_value_fields=default_value_fields)
|
||||||
if vector_data_type == ct.sparse_vector:
|
if vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||||
default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field,
|
default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field,
|
||||||
enable_dynamic_field=enable_dynamic_field,
|
enable_dynamic_field=enable_dynamic_field,
|
||||||
with_json=with_json,
|
with_json=with_json,
|
||||||
@ -354,7 +354,7 @@ class TestcaseBase(Base):
|
|||||||
# This condition will be removed after auto index feature
|
# This condition will be removed after auto index feature
|
||||||
if is_binary:
|
if is_binary:
|
||||||
collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index)
|
collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index)
|
||||||
elif vector_data_type == ct.sparse_vector:
|
elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||||
for vector_name in vector_name_list:
|
for vector_name in vector_name_list:
|
||||||
collection_w.create_index(vector_name, ct.default_sparse_inverted_index)
|
collection_w.create_index(vector_name, ct.default_sparse_inverted_index)
|
||||||
else:
|
else:
|
||||||
@ -362,7 +362,7 @@ class TestcaseBase(Base):
|
|||||||
vector_name_list.append(ct.default_float_vec_field_name)
|
vector_name_list.append(ct.default_float_vec_field_name)
|
||||||
for vector_name in vector_name_list:
|
for vector_name in vector_name_list:
|
||||||
# Unlike dense vectors, sparse vectors cannot create flat index.
|
# Unlike dense vectors, sparse vectors cannot create flat index.
|
||||||
if ct.sparse_vector in vector_name:
|
if DataType.SPARSE_FLOAT_VECTOR.name in vector_name:
|
||||||
collection_w.create_index(vector_name, ct.default_sparse_inverted_index)
|
collection_w.create_index(vector_name, ct.default_sparse_inverted_index)
|
||||||
else:
|
else:
|
||||||
collection_w.create_index(vector_name, ct.default_flat_index)
|
collection_w.create_index(vector_name, ct.default_flat_index)
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pymilvus import MilvusClient
|
from pymilvus import MilvusClient
|
||||||
|
|
||||||
@ -545,6 +546,16 @@ class TestMilvusClientV2Base(Base):
|
|||||||
**kwargs).run()
|
**kwargs).run()
|
||||||
return res, check_result
|
return res, check_result
|
||||||
|
|
||||||
|
def wait_for_index_ready(self, client, collection_name, index_name, timeout=None, **kwargs):
|
||||||
|
timeout = TIMEOUT if timeout is None else timeout
|
||||||
|
start_time = time.time()
|
||||||
|
while start_time + timeout > time.time():
|
||||||
|
index_info, _ = self.describe_index(client, collection_name, index_name, **kwargs)
|
||||||
|
if index_info.get("pending_index_rows", 1) == 0:
|
||||||
|
return True
|
||||||
|
time.sleep(2)
|
||||||
|
return False
|
||||||
|
|
||||||
@trace()
|
@trace()
|
||||||
def list_indexes(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
|
def list_indexes(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs):
|
||||||
timeout = TIMEOUT if timeout is None else timeout
|
timeout = TIMEOUT if timeout is None else timeout
|
||||||
|
|||||||
@ -445,19 +445,20 @@ class ResponseChecker:
|
|||||||
assert ids_match
|
assert ids_match
|
||||||
elif check_items.get("metric", None) is not None:
|
elif check_items.get("metric", None) is not None:
|
||||||
# verify the distances are already sorted
|
# verify the distances are already sorted
|
||||||
if check_items.get("metric").lower() in ["ip", "bm25"]:
|
if check_items.get("metric").upper() in ["IP", "COSINE", "BM25"]:
|
||||||
assert distances == sorted(distances, reverse=False)
|
|
||||||
else:
|
|
||||||
assert distances == sorted(distances, reverse=True)
|
assert distances == sorted(distances, reverse=True)
|
||||||
if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None:
|
|
||||||
log.debug("vector for searched (nq) and inserted vectors are needed for distance check")
|
|
||||||
else:
|
else:
|
||||||
for id in ids:
|
assert distances == sorted(distances, reverse=False)
|
||||||
searched_original_vectors.append(check_items["original_vectors"][id])
|
if check_items.get("vector_nq") is None or check_items.get("original_vectors") is None:
|
||||||
cf.compare_distance_vector_and_vector_list(check_items["vector_nq"][nq_i],
|
log.debug("skip distance check for knowhere does not return the precise distances")
|
||||||
searched_original_vectors,
|
else:
|
||||||
check_items["metric"], distances)
|
# for id in ids:
|
||||||
log.info("search_results_check: Checked the distances for one nq: OK")
|
# searched_original_vectors.append(check_items["original_vectors"][id])
|
||||||
|
# cf.compare_distance_vector_and_vector_list(check_items["vector_nq"][nq_i],
|
||||||
|
# searched_original_vectors,
|
||||||
|
# check_items["metric"], distances)
|
||||||
|
# log.info("search_results_check: Checked the distances for one nq: OK")
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
pass # just check nq and topk, not specific ids need check
|
pass # just check nq and topk, not specific ids need check
|
||||||
nq_i += 1
|
nq_i += 1
|
||||||
|
|||||||
@ -692,22 +692,17 @@ def gen_double_field(name=ct.default_double_field_name, is_primary=False, descri
|
|||||||
|
|
||||||
|
|
||||||
def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
|
def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
|
||||||
description=ct.default_desc, vector_data_type="FLOAT_VECTOR", **kwargs):
|
description=ct.default_desc, vector_data_type=DataType.FLOAT_VECTOR, **kwargs):
|
||||||
if vector_data_type == "SPARSE_FLOAT_VECTOR":
|
|
||||||
dtype = DataType.SPARSE_FLOAT_VECTOR
|
if vector_data_type != DataType.SPARSE_FLOAT_VECTOR:
|
||||||
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype,
|
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=vector_data_type,
|
||||||
description=description,
|
|
||||||
is_primary=is_primary, **kwargs)
|
|
||||||
return float_vec_field
|
|
||||||
if vector_data_type == "FLOAT_VECTOR":
|
|
||||||
dtype = DataType.FLOAT_VECTOR
|
|
||||||
elif vector_data_type == "FLOAT16_VECTOR":
|
|
||||||
dtype = DataType.FLOAT16_VECTOR
|
|
||||||
elif vector_data_type == "BFLOAT16_VECTOR":
|
|
||||||
dtype = DataType.BFLOAT16_VECTOR
|
|
||||||
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype,
|
|
||||||
description=description, dim=dim,
|
description=description, dim=dim,
|
||||||
is_primary=is_primary, **kwargs)
|
is_primary=is_primary, **kwargs)
|
||||||
|
else:
|
||||||
|
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.SPARSE_FLOAT_VECTOR,
|
||||||
|
description=description,
|
||||||
|
is_primary=is_primary, **kwargs)
|
||||||
|
|
||||||
return float_vec_field
|
return float_vec_field
|
||||||
|
|
||||||
|
|
||||||
@ -744,7 +739,7 @@ def gen_sparse_vec_field(name=ct.default_sparse_vec_field_name, is_primary=False
|
|||||||
|
|
||||||
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||||
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True,
|
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True,
|
||||||
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR",
|
multiple_dim_array=[], is_partition_key=None, vector_data_type=DataType.FLOAT_VECTOR,
|
||||||
nullable_fields={}, default_value_fields={}, **kwargs):
|
nullable_fields={}, default_value_fields={}, **kwargs):
|
||||||
# gen primary key field
|
# gen primary key field
|
||||||
if default_value_fields.get(ct.default_int64_field_name) is None:
|
if default_value_fields.get(ct.default_int64_field_name) is None:
|
||||||
@ -824,7 +819,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
|
|||||||
gen_array_field(name="array_bool", element_type=DataType.BOOL),
|
gen_array_field(name="array_bool", element_type=DataType.BOOL),
|
||||||
gen_float_vec_field(dim=dim),
|
gen_float_vec_field(dim=dim),
|
||||||
gen_float_vec_field(name="image_emb", dim=dim),
|
gen_float_vec_field(name="image_emb", dim=dim),
|
||||||
gen_float_vec_field(name="text_sparse_emb", vector_data_type="SPARSE_FLOAT_VECTOR"),
|
gen_float_vec_field(name="text_sparse_emb", vector_data_type=DataType.SPARSE_FLOAT_VECTOR),
|
||||||
gen_float_vec_field(name="voice_emb", dim=dim),
|
gen_float_vec_field(name="voice_emb", dim=dim),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -998,25 +993,25 @@ def gen_collection_schema_all_datatype(description=ct.default_desc, primary_fiel
|
|||||||
else:
|
else:
|
||||||
multiple_dim_array.insert(0, dim)
|
multiple_dim_array.insert(0, dim)
|
||||||
for i in range(len(multiple_dim_array)):
|
for i in range(len(multiple_dim_array)):
|
||||||
if ct.append_vector_type[i%3] != ct.sparse_vector:
|
if ct.append_vector_type[i%3] != DataType.SPARSE_FLOAT_VECTOR:
|
||||||
if default_value_fields.get(ct.append_vector_type[i%3]) is None:
|
if default_value_fields.get(ct.append_vector_type[i%3]) is None:
|
||||||
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}",
|
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3].name}",
|
||||||
dim=multiple_dim_array[i],
|
dim=multiple_dim_array[i],
|
||||||
vector_data_type=ct.append_vector_type[i%3])
|
vector_data_type=ct.append_vector_type[i%3])
|
||||||
else:
|
else:
|
||||||
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3]}",
|
vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.append_vector_type[i%3].name}",
|
||||||
dim=multiple_dim_array[i],
|
dim=multiple_dim_array[i],
|
||||||
vector_data_type=ct.append_vector_type[i%3],
|
vector_data_type=ct.append_vector_type[i%3],
|
||||||
default_value=default_value_fields.get(ct.append_vector_type[i%3]))
|
default_value=default_value_fields.get(ct.append_vector_type[i%3].name))
|
||||||
fields.append(vector_field)
|
fields.append(vector_field)
|
||||||
else:
|
else:
|
||||||
# The field of a sparse vector cannot be dimensioned
|
# The field of a sparse vector cannot be dimensioned
|
||||||
if default_value_fields.get(ct.default_sparse_vec_field_name) is None:
|
if default_value_fields.get(ct.default_sparse_vec_field_name) is None:
|
||||||
sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}",
|
sparse_vector_field = gen_sparse_vec_field(name=f"multiple_vector_{DataType.SPARSE_FLOAT_VECTOR.name}",
|
||||||
vector_data_type=ct.sparse_vector)
|
vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
|
||||||
else:
|
else:
|
||||||
sparse_vector_field = gen_float_vec_field(name=f"multiple_vector_{ct.sparse_vector}",
|
sparse_vector_field = gen_sparse_vec_field(name=f"multiple_vector_{DataType.SPARSE_FLOAT_VECTOR.name}",
|
||||||
vector_data_type=ct.sparse_vector,
|
vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
|
||||||
default_value=default_value_fields.get(ct.default_sparse_vec_field_name))
|
default_value=default_value_fields.get(ct.default_sparse_vec_field_name))
|
||||||
fields.append(sparse_vector_field)
|
fields.append(sparse_vector_field)
|
||||||
|
|
||||||
@ -1124,23 +1119,25 @@ def gen_schema_multi_string_fields(string_fields):
|
|||||||
return schema
|
return schema
|
||||||
|
|
||||||
|
|
||||||
def gen_vectors(nb, dim, vector_data_type="FLOAT_VECTOR"):
|
def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR):
|
||||||
vectors = []
|
vectors = []
|
||||||
if vector_data_type == "FLOAT_VECTOR":
|
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||||
elif vector_data_type == "FLOAT16_VECTOR":
|
elif vector_data_type == DataType.FLOAT16_VECTOR:
|
||||||
vectors = gen_fp16_vectors(nb, dim)[1]
|
vectors = gen_fp16_vectors(nb, dim)[1]
|
||||||
elif vector_data_type == "BFLOAT16_VECTOR":
|
elif vector_data_type == DataType.BFLOAT16_VECTOR:
|
||||||
vectors = gen_bf16_vectors(nb, dim)[1]
|
vectors = gen_bf16_vectors(nb, dim)[1]
|
||||||
elif vector_data_type == "SPARSE_FLOAT_VECTOR":
|
elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||||
vectors = gen_sparse_vectors(nb, dim)
|
vectors = gen_sparse_vectors(nb, dim)
|
||||||
elif vector_data_type == "TEXT_SPARSE_VECTOR":
|
elif vector_data_type == ct.text_sparse_vector:
|
||||||
vectors = gen_text_vectors(nb)
|
vectors = gen_text_vectors(nb)
|
||||||
|
elif vector_data_type == DataType.BINARY_VECTOR:
|
||||||
|
vectors = gen_binary_vectors(nb, dim)[1]
|
||||||
else:
|
else:
|
||||||
log.error(f"Invalid vector data type: {vector_data_type}")
|
log.error(f"Invalid vector data type: {vector_data_type}")
|
||||||
raise Exception(f"Invalid vector data type: {vector_data_type}")
|
raise Exception(f"Invalid vector data type: {vector_data_type}")
|
||||||
if dim > 1:
|
if dim > 1:
|
||||||
if vector_data_type == "FLOAT_VECTOR":
|
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||||
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
|
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
|
||||||
vectors = vectors.tolist()
|
vectors = vectors.tolist()
|
||||||
return vectors
|
return vectors
|
||||||
@ -1173,7 +1170,7 @@ def gen_binary_vectors(num, dim):
|
|||||||
|
|
||||||
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||||
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
|
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
|
||||||
vector_data_type="FLOAT_VECTOR", auto_id=False,
|
vector_data_type=DataType.FLOAT_VECTOR, auto_id=False,
|
||||||
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
|
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
|
||||||
if not random_primary_key:
|
if not random_primary_key:
|
||||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||||
@ -1235,7 +1232,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
|
|||||||
|
|
||||||
def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||||
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
|
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
|
||||||
vector_data_type="FLOAT_VECTOR", auto_id=False,
|
vector_data_type=DataType.FLOAT_VECTOR, auto_id=False,
|
||||||
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
|
primary_field=ct.default_int64_field_name, nullable_fields={}, language=None):
|
||||||
insert_list = []
|
insert_list = []
|
||||||
if not random_primary_key:
|
if not random_primary_key:
|
||||||
@ -1289,7 +1286,7 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
|
|||||||
|
|
||||||
|
|
||||||
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[],
|
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[],
|
||||||
multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR", auto_id=False,
|
multiple_vector_field_name=[], vector_data_type=DataType.FLOAT_VECTOR, auto_id=False,
|
||||||
primary_field = ct.default_int64_field_name, nullable_fields={}, language=None):
|
primary_field = ct.default_int64_field_name, nullable_fields={}, language=None):
|
||||||
array = []
|
array = []
|
||||||
for i in range(start, start + nb):
|
for i in range(start, start + nb):
|
||||||
@ -1703,7 +1700,7 @@ def gen_default_list_sparse_data(nb=ct.default_nb, dim=ct.default_dim, start=0,
|
|||||||
string_values = [str(i) for i in range(start, start + nb)]
|
string_values = [str(i) for i in range(start, start + nb)]
|
||||||
json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
|
json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
|
||||||
for i in range(start, start + nb)]
|
for i in range(start, start + nb)]
|
||||||
sparse_vec_values = gen_vectors(nb, dim, vector_data_type="SPARSE_FLOAT_VECTOR")
|
sparse_vec_values = gen_vectors(nb, dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
|
||||||
if with_json:
|
if with_json:
|
||||||
data = [int_values, float_values, string_values, json_values, sparse_vec_values]
|
data = [int_values, float_values, string_values, json_values, sparse_vec_values]
|
||||||
else:
|
else:
|
||||||
@ -2812,7 +2809,7 @@ def compare_distance_vector_and_vector_list(x, y, metric, distance):
|
|||||||
assert False
|
assert False
|
||||||
for i in range(len(y)):
|
for i in range(len(y)):
|
||||||
if metric == "L2":
|
if metric == "L2":
|
||||||
distance_i = l2(x, y[i])
|
distance_i = (l2(x, y[i]))**2
|
||||||
elif metric == "IP":
|
elif metric == "IP":
|
||||||
distance_i = ip(x, y[i])
|
distance_i = ip(x, y[i])
|
||||||
elif metric == "COSINE":
|
elif metric == "COSINE":
|
||||||
@ -2820,7 +2817,7 @@ def compare_distance_vector_and_vector_list(x, y, metric, distance):
|
|||||||
else:
|
else:
|
||||||
raise Exception("metric type is invalid")
|
raise Exception("metric type is invalid")
|
||||||
if abs(distance_i - distance[i]) > ct.epsilon:
|
if abs(distance_i - distance[i]) > ct.epsilon:
|
||||||
log.error(f"The distance between {x} and {y[i]} is not equal with {distance[i]}")
|
log.error(f"The distance between {x} and {y[i]} does not equal {distance[i]}, expected: {distance_i}")
|
||||||
assert abs(distance_i - distance[i]) < ct.epsilon
|
assert abs(distance_i - distance[i]) < ct.epsilon
|
||||||
|
|
||||||
return True
|
return True
|
||||||
@ -2927,7 +2924,7 @@ def gen_partitions(collection_w, partition_num=1):
|
|||||||
def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False,
|
def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False,
|
||||||
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True,
|
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True,
|
||||||
random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name,
|
random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name,
|
||||||
vector_data_type="FLOAT_VECTOR", nullable_fields={}, language=None):
|
vector_data_type=DataType.FLOAT_VECTOR, nullable_fields={}, language=None):
|
||||||
"""
|
"""
|
||||||
target: insert non-binary/binary data
|
target: insert non-binary/binary data
|
||||||
method: insert non-binary/binary data into partitions if any
|
method: insert non-binary/binary data into partitions if any
|
||||||
@ -2948,7 +2945,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
|||||||
if not is_binary:
|
if not is_binary:
|
||||||
if not is_all_data_type:
|
if not is_all_data_type:
|
||||||
if not enable_dynamic_field:
|
if not enable_dynamic_field:
|
||||||
if vector_data_type == "FLOAT_VECTOR":
|
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||||
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json,
|
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||||
random_primary_key=random_primary_key,
|
random_primary_key=random_primary_key,
|
||||||
multiple_dim_array=multiple_dim_array,
|
multiple_dim_array=multiple_dim_array,
|
||||||
@ -2975,14 +2972,14 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
if not enable_dynamic_field:
|
if not enable_dynamic_field:
|
||||||
if vector_data_type == "FLOAT_VECTOR":
|
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||||
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
|
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
|
||||||
random_primary_key=random_primary_key,
|
random_primary_key=random_primary_key,
|
||||||
multiple_dim_array=multiple_dim_array,
|
multiple_dim_array=multiple_dim_array,
|
||||||
multiple_vector_field_name=vector_name_list,
|
multiple_vector_field_name=vector_name_list,
|
||||||
auto_id=auto_id, primary_field=primary_field,
|
auto_id=auto_id, primary_field=primary_field,
|
||||||
nullable_fields=nullable_fields, language=language)
|
nullable_fields=nullable_fields, language=language)
|
||||||
elif vector_data_type == "FLOAT16_VECTOR" or "BFLOAT16_VECTOR":
|
elif vector_data_type == DataType.FLOAT16_VECTOR or vector_data_type == DataType.BFLOAT16_VECTOR:
|
||||||
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
|
default_data = gen_general_list_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
|
||||||
random_primary_key=random_primary_key,
|
random_primary_key=random_primary_key,
|
||||||
multiple_dim_array=multiple_dim_array,
|
multiple_dim_array=multiple_dim_array,
|
||||||
@ -3173,6 +3170,20 @@ def extract_vector_field_name_list(collection_w):
|
|||||||
return vector_name_list
|
return vector_name_list
|
||||||
|
|
||||||
|
|
||||||
|
def get_field_dtype_by_field_name(collection_w, field_name):
|
||||||
|
"""
|
||||||
|
get the vector field data type by field name
|
||||||
|
collection_w : the collection object to be extracted
|
||||||
|
return: the field data type of the field name
|
||||||
|
"""
|
||||||
|
schema_dict = collection_w.schema.to_dict()
|
||||||
|
fields = schema_dict.get('fields')
|
||||||
|
for field in fields:
|
||||||
|
if field['name'] == field_name:
|
||||||
|
return field['type']
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_activate_func_from_metric_type(metric_type):
|
def get_activate_func_from_metric_type(metric_type):
|
||||||
activate_function = lambda x: x
|
activate_function = lambda x: x
|
||||||
if metric_type == "COSINE":
|
if metric_type == "COSINE":
|
||||||
@ -3307,20 +3318,20 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0):
|
|||||||
return vectors
|
return vectors
|
||||||
|
|
||||||
|
|
||||||
def gen_vectors_based_on_vector_type(num, dim, vector_data_type=ct.float_type):
|
def gen_vectors_based_on_vector_type(num, dim, vector_data_type=DataType.FLOAT_VECTOR):
|
||||||
"""
|
"""
|
||||||
generate float16 vector data
|
generate float16 vector data
|
||||||
raw_vectors : the vectors
|
raw_vectors : the vectors
|
||||||
fp16_vectors: the bytes used for insert
|
fp16_vectors: the bytes used for insert
|
||||||
return: raw_vectors and fp16_vectors
|
return: raw_vectors and fp16_vectors
|
||||||
"""
|
"""
|
||||||
if vector_data_type == ct.float_type:
|
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||||
vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
|
vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
|
||||||
elif vector_data_type == ct.float16_type:
|
elif vector_data_type == DataType.FLOAT16_VECTOR:
|
||||||
vectors = gen_fp16_vectors(num, dim)[1]
|
vectors = gen_fp16_vectors(num, dim)[1]
|
||||||
elif vector_data_type == ct.bfloat16_type:
|
elif vector_data_type == DataType.BFLOAT16_VECTOR:
|
||||||
vectors = gen_bf16_vectors(num, dim)[1]
|
vectors = gen_bf16_vectors(num, dim)[1]
|
||||||
elif vector_data_type == ct.sparse_vector:
|
elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||||
vectors = gen_sparse_vectors(num, dim)
|
vectors = gen_sparse_vectors(num, dim)
|
||||||
elif vector_data_type == ct.text_sparse_vector:
|
elif vector_data_type == ct.text_sparse_vector:
|
||||||
vectors = gen_text_vectors(num)
|
vectors = gen_text_vectors(num)
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
from pymilvus import DataType
|
||||||
|
|
||||||
""" Initialized parameters """
|
""" Initialized parameters """
|
||||||
port = 19530
|
port = 19530
|
||||||
@ -44,14 +45,10 @@ default_float16_vec_field_name = "float16_vector"
|
|||||||
default_bfloat16_vec_field_name = "bfloat16_vector"
|
default_bfloat16_vec_field_name = "bfloat16_vector"
|
||||||
another_float_vec_field_name = "float_vector1"
|
another_float_vec_field_name = "float_vector1"
|
||||||
default_binary_vec_field_name = "binary_vector"
|
default_binary_vec_field_name = "binary_vector"
|
||||||
float_type = "FLOAT_VECTOR"
|
|
||||||
float16_type = "FLOAT16_VECTOR"
|
|
||||||
bfloat16_type = "BFLOAT16_VECTOR"
|
|
||||||
sparse_vector = "SPARSE_FLOAT_VECTOR"
|
|
||||||
text_sparse_vector = "TEXT_SPARSE_VECTOR"
|
text_sparse_vector = "TEXT_SPARSE_VECTOR"
|
||||||
append_vector_type = [float16_type, bfloat16_type, sparse_vector]
|
append_vector_type = [DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, DataType.SPARSE_FLOAT_VECTOR]
|
||||||
all_dense_vector_types = [float_type, float16_type, bfloat16_type]
|
all_dense_vector_types = [DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR]
|
||||||
all_vector_data_types = [float_type, float16_type, bfloat16_type, sparse_vector]
|
all_float_vector_dtypes = [DataType.FLOAT_VECTOR, DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, DataType.SPARSE_FLOAT_VECTOR]
|
||||||
default_sparse_vec_field_name = "sparse_vector"
|
default_sparse_vec_field_name = "sparse_vector"
|
||||||
default_partition_name = "_default"
|
default_partition_name = "_default"
|
||||||
default_resource_group_name = '__default_resource_group'
|
default_resource_group_name = '__default_resource_group'
|
||||||
@ -246,13 +243,14 @@ default_all_search_params_params = [{}, {"nprobe": 32}, {"nprobe": 32}, {"nprobe
|
|||||||
{}, {}]
|
{}, {}]
|
||||||
|
|
||||||
Handler_type = ["GRPC", "HTTP"]
|
Handler_type = ["GRPC", "HTTP"]
|
||||||
binary_support = ["BIN_FLAT", "BIN_IVF_FLAT"]
|
binary_supported_index_types = ["BIN_FLAT", "BIN_IVF_FLAT"]
|
||||||
sparse_support = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"]
|
sparse_supported_index_types = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"]
|
||||||
gpu_support = ["GPU_IVF_FLAT", "GPU_IVF_PQ"]
|
gpu_supported_index_types = ["GPU_IVF_FLAT", "GPU_IVF_PQ"]
|
||||||
default_L0_metric = "COSINE"
|
default_L0_metric = "COSINE"
|
||||||
float_metrics = ["L2", "IP", "COSINE"]
|
dense_metrics = ["L2", "IP", "COSINE"]
|
||||||
binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"]
|
binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"]
|
||||||
structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"]
|
structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"]
|
||||||
|
sparse_metrics = ["IP", "BM25"]
|
||||||
all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar']
|
all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar']
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -665,7 +665,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L0)
|
@pytest.mark.tags(CaseLabel.L0)
|
||||||
@pytest.mark.parametrize("metric_type", ct.float_metrics)
|
@pytest.mark.parametrize("metric_type", ct.dense_metrics)
|
||||||
def test_milvus_client_search_iterator_default(self, metric_type):
|
def test_milvus_client_search_iterator_default(self, metric_type):
|
||||||
"""
|
"""
|
||||||
target: test search iterator (high level api) normal case
|
target: test search iterator (high level api) normal case
|
||||||
@ -892,7 +892,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L0)
|
@pytest.mark.tags(CaseLabel.L0)
|
||||||
@pytest.mark.parametrize("metric_type", ct.float_metrics)
|
@pytest.mark.parametrize("metric_type", ct.dense_metrics)
|
||||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||||
def test_milvus_client_search_iterator_after_json_path_index(self, metric_type, enable_dynamic_field,
|
def test_milvus_client_search_iterator_after_json_path_index(self, metric_type, enable_dynamic_field,
|
||||||
supported_json_cast_type,
|
supported_json_cast_type,
|
||||||
|
|||||||
@ -124,7 +124,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
|||||||
def random_primary_key(self, request):
|
def random_primary_key(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
|
||||||
def vector_data_type(self, request):
|
def vector_data_type(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@ -242,7 +242,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
|||||||
# 3. prepare search params
|
# 3. prepare search params
|
||||||
req_list = []
|
req_list = []
|
||||||
weights = [1]
|
weights = [1]
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR")
|
vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
# 4. get hybrid search req list
|
# 4. get hybrid search req list
|
||||||
for i in range(len(vector_name_list)):
|
for i in range(len(vector_name_list)):
|
||||||
search_param = {
|
search_param = {
|
||||||
@ -276,7 +276,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
|||||||
# 3. prepare search params
|
# 3. prepare search params
|
||||||
req_list = []
|
req_list = []
|
||||||
weights = [1]
|
weights = [1]
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR")
|
vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
# 4. get hybrid search req list
|
# 4. get hybrid search req list
|
||||||
for i in range(len(vector_name_list)):
|
for i in range(len(vector_name_list)):
|
||||||
search_param = {
|
search_param = {
|
||||||
@ -311,7 +311,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
|||||||
# 3. prepare search params
|
# 3. prepare search params
|
||||||
req_list = []
|
req_list = []
|
||||||
weights = [1]
|
weights = [1]
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR")
|
vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
# 4. get hybrid search req list
|
# 4. get hybrid search req list
|
||||||
for i in range(len(vector_name_list)):
|
for i in range(len(vector_name_list)):
|
||||||
search_param = {
|
search_param = {
|
||||||
@ -344,7 +344,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
|||||||
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||||
vector_name_list.append(ct.default_float_vec_field_name)
|
vector_name_list.append(ct.default_float_vec_field_name)
|
||||||
# 3. prepare search params
|
# 3. prepare search params
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR")
|
vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
|
|
||||||
# get hybrid search req list
|
# get hybrid search req list
|
||||||
search_param = {
|
search_param = {
|
||||||
@ -1757,7 +1757,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
|||||||
metrics = []
|
metrics = []
|
||||||
search_res_dict_array = []
|
search_res_dict_array = []
|
||||||
search_res_dict_array_nq = []
|
search_res_dict_array_nq = []
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(nq, default_dim, "FLOAT_VECTOR")
|
vectors = cf.gen_vectors(nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
|
|
||||||
# get hybrid search req list
|
# get hybrid search req list
|
||||||
for i in range(len(vector_name_list)):
|
for i in range(len(vector_name_list)):
|
||||||
@ -2014,7 +2014,7 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
|||||||
# 1. init collection
|
# 1. init collection
|
||||||
collection_w, insert_vectors, _, insert_ids = \
|
collection_w, insert_vectors, _, insert_ids = \
|
||||||
self.init_collection_general(prefix, True, nb=nb, multiple_dim_array=[dim, dim * 2],
|
self.init_collection_general(prefix, True, nb=nb, multiple_dim_array=[dim, dim * 2],
|
||||||
with_json=False, vector_data_type="SPARSE_FLOAT_VECTOR")[0:4]
|
with_json=False, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0:4]
|
||||||
# 2. extract vector field name
|
# 2. extract vector field name
|
||||||
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||||
# 3. prepare search params
|
# 3. prepare search params
|
||||||
|
|||||||
@ -92,7 +92,7 @@ class TestCollectionRangeSearch(TestcaseBase):
|
|||||||
pytest.skip(f"skip index type {request.param}")
|
pytest.skip(f"skip index type {request.param}")
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=ct.float_metrics)
|
@pytest.fixture(scope="function", params=ct.dense_metrics)
|
||||||
def metric(self, request):
|
def metric(self, request):
|
||||||
tags = request.config.getoption("--tags")
|
tags = request.config.getoption("--tags")
|
||||||
if CaseLabel.L2 not in tags:
|
if CaseLabel.L2 not in tags:
|
||||||
@ -1574,7 +1574,7 @@ class TestCollectionRangeSearch(TestcaseBase):
|
|||||||
# 1. initialize with data
|
# 1. initialize with data
|
||||||
collection_w = self.init_collection_general(prefix, True, nb=5000,
|
collection_w = self.init_collection_general(prefix, True, nb=5000,
|
||||||
with_json=True,
|
with_json=True,
|
||||||
vector_data_type=ct.sparse_vector)[0]
|
vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0]
|
||||||
range_filter = random.uniform(0.5, 1)
|
range_filter = random.uniform(0.5, 1)
|
||||||
radius = random.uniform(0, 0.5)
|
radius = random.uniform(0, 0.5)
|
||||||
|
|
||||||
|
|||||||
@ -1,102 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
|
|
||||||
from pymilvus import AnnSearchRequest, RRFRanker, WeightedRanker
|
|
||||||
from pymilvus import (
|
|
||||||
FieldSchema, CollectionSchema, DataType,
|
|
||||||
Collection
|
|
||||||
)
|
|
||||||
from common.constants import *
|
|
||||||
from utils.util_pymilvus import *
|
|
||||||
from common.common_type import CaseLabel, CheckTasks
|
|
||||||
from common import common_type as ct
|
|
||||||
from common import common_func as cf
|
|
||||||
from utils.util_log import test_log as log
|
|
||||||
from base.client_base import TestcaseBase
|
|
||||||
import heapq
|
|
||||||
from time import sleep
|
|
||||||
from decimal import Decimal, getcontext
|
|
||||||
import decimal
|
|
||||||
import multiprocessing
|
|
||||||
import numbers
|
|
||||||
import random
|
|
||||||
import math
|
|
||||||
import numpy
|
|
||||||
import threading
|
|
||||||
import pytest
|
|
||||||
import pandas as pd
|
|
||||||
from faker import Faker
|
|
||||||
|
|
||||||
Faker.seed(19530)
|
|
||||||
fake_en = Faker("en_US")
|
|
||||||
fake_zh = Faker("zh_CN")
|
|
||||||
|
|
||||||
# patch faker to generate text with specific distribution
|
|
||||||
cf.patch_faker_text(fake_en, cf.en_vocabularies_distribution)
|
|
||||||
cf.patch_faker_text(fake_zh, cf.zh_vocabularies_distribution)
|
|
||||||
|
|
||||||
pd.set_option("expand_frame_repr", False)
|
|
||||||
|
|
||||||
prefix = "search_collection"
|
|
||||||
search_num = 10
|
|
||||||
max_dim = ct.max_dim
|
|
||||||
min_dim = ct.min_dim
|
|
||||||
epsilon = ct.epsilon
|
|
||||||
hybrid_search_epsilon = 0.01
|
|
||||||
gracefulTime = ct.gracefulTime
|
|
||||||
default_nb = ct.default_nb
|
|
||||||
default_nb_medium = ct.default_nb_medium
|
|
||||||
default_nq = ct.default_nq
|
|
||||||
default_dim = ct.default_dim
|
|
||||||
default_limit = ct.default_limit
|
|
||||||
max_limit = ct.max_limit
|
|
||||||
default_search_exp = "int64 >= 0"
|
|
||||||
default_search_string_exp = "varchar >= \"0\""
|
|
||||||
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
|
|
||||||
default_invaild_string_exp = "varchar >= 0"
|
|
||||||
default_json_search_exp = "json_field[\"number\"] >= 0"
|
|
||||||
perfix_expr = 'varchar like "0%"'
|
|
||||||
default_search_field = ct.default_float_vec_field_name
|
|
||||||
default_search_params = ct.default_search_params
|
|
||||||
default_int64_field_name = ct.default_int64_field_name
|
|
||||||
default_float_field_name = ct.default_float_field_name
|
|
||||||
default_bool_field_name = ct.default_bool_field_name
|
|
||||||
default_string_field_name = ct.default_string_field_name
|
|
||||||
default_json_field_name = ct.default_json_field_name
|
|
||||||
default_index_params = ct.default_index
|
|
||||||
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
|
||||||
range_search_supported_indexes = ct.all_index_types[:7]
|
|
||||||
uid = "test_search"
|
|
||||||
nq = 1
|
|
||||||
epsilon = 0.001
|
|
||||||
field_name = default_float_vec_field_name
|
|
||||||
binary_field_name = default_binary_vec_field_name
|
|
||||||
search_param = {"nprobe": 1}
|
|
||||||
entity = gen_entities(1, is_normal=True)
|
|
||||||
entities = gen_entities(default_nb, is_normal=True)
|
|
||||||
raw_vectors, binary_entities = gen_binary_entities(default_nb)
|
|
||||||
default_query, _ = gen_search_vectors_params(field_name, entities, default_top_k, nq)
|
|
||||||
index_name1 = cf.gen_unique_str("float")
|
|
||||||
index_name2 = cf.gen_unique_str("varhar")
|
|
||||||
half_nb = ct.default_nb // 2
|
|
||||||
max_hybrid_search_req_num = ct.max_hybrid_search_req_num
|
|
||||||
|
|
||||||
|
|
||||||
class TestSearchDSL(TestcaseBase):
|
|
||||||
@pytest.mark.tags(CaseLabel.L0)
|
|
||||||
def test_search_vector_only(self):
|
|
||||||
"""
|
|
||||||
target: test search normal scenario
|
|
||||||
method: search vector only
|
|
||||||
expected: search status ok, the length of result
|
|
||||||
"""
|
|
||||||
collection_w, _, _, insert_ids, time_stamp = \
|
|
||||||
self.init_collection_general(prefix, True, ct.default_nb)[0:5]
|
|
||||||
vectors = [[random.random() for _ in range(ct.default_dim)]
|
|
||||||
for _ in range(nq)]
|
|
||||||
collection_w.search(vectors[:nq], default_search_field,
|
|
||||||
default_search_params, ct.default_top_k,
|
|
||||||
default_search_exp,
|
|
||||||
check_task=CheckTasks.check_search_results,
|
|
||||||
check_items={"nq": nq,
|
|
||||||
"ids": insert_ids,
|
|
||||||
"limit": ct.default_top_k})
|
|
||||||
@ -110,7 +110,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
|
|||||||
def enable_dynamic_field(self, request):
|
def enable_dynamic_field(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
|
||||||
def vector_data_type(self, request):
|
def vector_data_type(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
|
|||||||
@ -85,8 +85,8 @@ class TestSearchIterator(TestcaseBase):
|
|||||||
""" Test case of search iterator """
|
""" Test case of search iterator """
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L0)
|
@pytest.mark.tags(CaseLabel.L0)
|
||||||
@pytest.mark.parametrize("metric_type", ct.float_metrics)
|
@pytest.mark.parametrize("metric_type", ct.dense_metrics)
|
||||||
@pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types)
|
||||||
def test_range_search_iterator_default(self, metric_type, vector_data_type):
|
def test_range_search_iterator_default(self, metric_type, vector_data_type):
|
||||||
"""
|
"""
|
||||||
target: test iterator range search
|
target: test iterator range search
|
||||||
@ -151,7 +151,7 @@ class TestSearchIterator(TestcaseBase):
|
|||||||
check_items={"batch_size": batch_size})
|
check_items={"batch_size": batch_size})
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
@pytest.mark.parametrize("metrics", ct.float_metrics)
|
@pytest.mark.parametrize("metrics", ct.dense_metrics)
|
||||||
def test_search_iterator_with_expression(self, metrics):
|
def test_search_iterator_with_expression(self, metrics):
|
||||||
"""
|
"""
|
||||||
target: test search iterator normal
|
target: test search iterator normal
|
||||||
|
|||||||
@ -124,7 +124,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
|||||||
def random_primary_key(self, request):
|
def random_primary_key(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
|
||||||
def vector_data_type(self, request):
|
def vector_data_type(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@ -280,7 +280,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
|||||||
default_value_fields={
|
default_value_fields={
|
||||||
ct.default_float_field_name: np.float32(10.0)})[0]
|
ct.default_float_field_name: np.float32(10.0)})[0]
|
||||||
# 2. generate search data
|
# 2. generate search data
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, "FLOAT_VECTOR")
|
vectors = cf.gen_vectors(default_nq, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
# 3. search after insert
|
# 3. search after insert
|
||||||
collection_w.search(vectors[:default_nq], default_search_field,
|
collection_w.search(vectors[:default_nq], default_search_field,
|
||||||
default_search_params, default_limit,
|
default_search_params, default_limit,
|
||||||
@ -479,7 +479,7 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
|||||||
collection_w.load()
|
collection_w.load()
|
||||||
# 2. search iterator
|
# 2. search iterator
|
||||||
search_params = {"metric_type": "L2"}
|
search_params = {"metric_type": "L2"}
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(1, dim, "FLOAT_VECTOR")
|
vectors = cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
|
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
|
||||||
check_task=CheckTasks.check_search_iterator,
|
check_task=CheckTasks.check_search_iterator,
|
||||||
check_items={"batch_size": batch_size})
|
check_items={"batch_size": batch_size})
|
||||||
|
|||||||
@ -62,6 +62,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
def setup_class(self):
|
def setup_class(self):
|
||||||
super().setup_class(self)
|
super().setup_class(self)
|
||||||
self.collection_name = "TestMilvusClientSearchPagination" + cf.gen_unique_str("_")
|
self.collection_name = "TestMilvusClientSearchPagination" + cf.gen_unique_str("_")
|
||||||
|
self.partition_names = ["partition_1", "partition_2"]
|
||||||
self.float_vector_field_name = "float_vector"
|
self.float_vector_field_name = "float_vector"
|
||||||
self.bfloat16_vector_field_name = "bfloat16_vector"
|
self.bfloat16_vector_field_name = "bfloat16_vector"
|
||||||
self.sparse_vector_field_name = "sparse_vector"
|
self.sparse_vector_field_name = "sparse_vector"
|
||||||
@ -92,32 +93,60 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
collection_schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=256)
|
collection_schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=256)
|
||||||
collection_schema.add_field(default_int64_field_name, DataType.INT64)
|
collection_schema.add_field(default_int64_field_name, DataType.INT64)
|
||||||
self.create_collection(client, self.collection_name, schema=collection_schema, force_teardown=False)
|
self.create_collection(client, self.collection_name, schema=collection_schema, force_teardown=False)
|
||||||
|
for partition_name in self.partition_names:
|
||||||
|
self.create_partition(client, self.collection_name, partition_name=partition_name)
|
||||||
|
|
||||||
# Define number of insert iterations
|
# Define number of insert iterations
|
||||||
insert_times = 10
|
insert_times = 10
|
||||||
|
|
||||||
# Generate vectors for each type and store in self
|
# Generate vectors for each type and store in self
|
||||||
float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim, vector_data_type='FLOAT_VECTOR')
|
float_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.float_vector_dim,
|
||||||
bfloat16_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.bf16_vector_dim, vector_data_type='BFLOAT16_VECTOR')
|
vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
|
bfloat16_vectors = cf.gen_vectors(default_nb * insert_times, dim=self.bf16_vector_dim,
|
||||||
|
vector_data_type=DataType.BFLOAT16_VECTOR)
|
||||||
sparse_vectors = cf.gen_sparse_vectors(default_nb * insert_times, empty_percentage=2)
|
sparse_vectors = cf.gen_sparse_vectors(default_nb * insert_times, empty_percentage=2)
|
||||||
_, binary_vectors = cf.gen_binary_vectors(default_nb * insert_times, dim=self.binary_vector_dim)
|
_, binary_vectors = cf.gen_binary_vectors(default_nb * insert_times, dim=self.binary_vector_dim)
|
||||||
|
|
||||||
# Insert data multiple times with non-duplicated primary keys
|
# Insert data multiple times with non-duplicated primary keys
|
||||||
for j in range(insert_times):
|
for j in range(insert_times):
|
||||||
rows = [{
|
# Group rows by partition based on primary key mod 3
|
||||||
default_primary_key_field_name: i + j * default_nb,
|
default_rows = []
|
||||||
self.float_vector_field_name: list(float_vectors[i + j * default_nb]),
|
partition1_rows = []
|
||||||
self.bfloat16_vector_field_name: bfloat16_vectors[i + j * default_nb],
|
partition2_rows = []
|
||||||
self.sparse_vector_field_name: sparse_vectors[i + j * default_nb],
|
|
||||||
self.binary_vector_field_name: binary_vectors[i + j * default_nb],
|
for i in range(default_nb):
|
||||||
default_float_field_name: (i + j * default_nb) * 1.0,
|
pk = i + j * default_nb
|
||||||
default_string_field_name: str(i + j * default_nb),
|
row = {
|
||||||
default_int64_field_name: i + j * default_nb
|
default_primary_key_field_name: pk,
|
||||||
|
self.float_vector_field_name: list(float_vectors[pk]),
|
||||||
|
self.bfloat16_vector_field_name: bfloat16_vectors[pk],
|
||||||
|
self.sparse_vector_field_name: sparse_vectors[pk],
|
||||||
|
self.binary_vector_field_name: binary_vectors[pk],
|
||||||
|
default_float_field_name: pk * 1.0,
|
||||||
|
default_string_field_name: str(pk),
|
||||||
|
default_int64_field_name: pk
|
||||||
}
|
}
|
||||||
for i in range(default_nb)]
|
self.datas.append(row)
|
||||||
self.datas.extend(rows)
|
|
||||||
|
# Distribute to partitions based on pk mod 3
|
||||||
|
if pk % 3 == 0:
|
||||||
|
default_rows.append(row)
|
||||||
|
elif pk % 3 == 1:
|
||||||
|
partition1_rows.append(row)
|
||||||
|
else:
|
||||||
|
partition2_rows.append(row)
|
||||||
|
|
||||||
|
# Insert into respective partitions
|
||||||
|
if default_rows:
|
||||||
|
self.insert(client, self.collection_name, data=default_rows)
|
||||||
|
if partition1_rows:
|
||||||
|
self.insert(client, self.collection_name, data=partition1_rows, partition_name=self.partition_names[0])
|
||||||
|
if partition2_rows:
|
||||||
|
self.insert(client, self.collection_name, data=partition2_rows, partition_name=self.partition_names[1])
|
||||||
|
|
||||||
|
# Track all inserted data and primary keys
|
||||||
self.primary_keys.extend([i + j * default_nb for i in range(default_nb)])
|
self.primary_keys.extend([i + j * default_nb for i in range(default_nb)])
|
||||||
self.insert(client, self.collection_name, data=rows)
|
|
||||||
self.flush(client, self.collection_name)
|
self.flush(client, self.collection_name)
|
||||||
|
|
||||||
# Create index
|
# Create index
|
||||||
@ -165,7 +194,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
# 2. Search with pagination for 10 pages
|
# 2. Search with pagination for 10 pages
|
||||||
limit = 100
|
limit = 100
|
||||||
pages = 10
|
pages = 10
|
||||||
vectors_to_search = cf.gen_vectors(default_nq, default_dim)
|
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||||
all_pages_results = []
|
all_pages_results = []
|
||||||
for page in range(pages):
|
for page in range(pages):
|
||||||
offset = page * limit
|
offset = page * limit
|
||||||
@ -224,7 +253,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
# 2. Search with pagination for 10 pages
|
# 2. Search with pagination for 10 pages
|
||||||
limit = 100
|
limit = 100
|
||||||
pages = 10
|
pages = 10
|
||||||
vectors_to_search = cf.gen_vectors(default_nq, self.bf16_vector_dim, vector_data_type='BFLOAT16_VECTOR')
|
vectors_to_search = cf.gen_vectors(default_nq, self.bf16_vector_dim, vector_data_type=DataType.BFLOAT16_VECTOR)
|
||||||
all_pages_results = []
|
all_pages_results = []
|
||||||
for page in range(pages):
|
for page in range(pages):
|
||||||
offset = page * limit
|
offset = page * limit
|
||||||
@ -374,10 +403,12 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
for i in range(default_nq):
|
for i in range(default_nq):
|
||||||
page_ids = [page_res[i][j].get('id') for j in range(limit)]
|
page_ids = [page_res[i][j].get('id') for j in range(limit)]
|
||||||
ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)]
|
ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)]
|
||||||
# Calculate percentage of matching items
|
|
||||||
matching_items = sum(1 for x, y in zip(page_ids, ids_in_full) if x == y)
|
# Calculate intersection between paginated results and baseline full results
|
||||||
match_percentage = (matching_items / len(page_ids)) * 100
|
common_ids = set(page_ids) & set(ids_in_full)
|
||||||
assert match_percentage >= 80, f"Only {match_percentage}% items matched, expected >= 80%"
|
# Calculate overlap ratio using full results as baseline
|
||||||
|
overlap_ratio = len(common_ids) / len(ids_in_full) * 100
|
||||||
|
assert overlap_ratio >= 80, f"Only {overlap_ratio}% overlap with baseline results, expected >= 80%"
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
@pytest.mark.parametrize("limit", [100, 3000, 10000])
|
@pytest.mark.parametrize("limit", [100, 3000, 10000])
|
||||||
@ -399,8 +430,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
topK=16384
|
topK=16384
|
||||||
offset = topK - limit
|
offset = topK - limit
|
||||||
search_param = {"nprobe": 10, "offset": offset}
|
search_param = {"nprobe": 10, "offset": offset}
|
||||||
vectors_to_search = [[random.random() for _ in range(default_dim)]
|
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||||
for _ in range(default_nq)]
|
|
||||||
client.search(collection_name, vectors_to_search[:default_nq], anns_field=self.float_vector_field_name,
|
client.search(collection_name, vectors_to_search[:default_nq], anns_field=self.float_vector_field_name,
|
||||||
search_params=search_param, limit=limit, check_task=CheckTasks.check_search_results,
|
search_params=search_param, limit=limit, check_task=CheckTasks.check_search_results,
|
||||||
check_items={"enable_milvus_client_api": True,
|
check_items={"enable_milvus_client_api": True,
|
||||||
@ -438,6 +468,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
limit = 0
|
limit = 0
|
||||||
elif len(filter_ids) - offset < default_limit:
|
elif len(filter_ids) - offset < default_limit:
|
||||||
limit = len(filter_ids) - offset
|
limit = len(filter_ids) - offset
|
||||||
|
# 3. search with a high nprobe for better accuracy
|
||||||
search_params = {"metric_type": "COSINE", "params": {"nprobe": 128}, "offset": offset}
|
search_params = {"metric_type": "COSINE", "params": {"nprobe": 128}, "offset": offset}
|
||||||
vectors_to_search = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
vectors_to_search = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||||
search_res_with_offset, _ = self.search(
|
search_res_with_offset, _ = self.search(
|
||||||
@ -454,7 +485,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
"limit": limit}
|
"limit": limit}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. search with offset+limit
|
# 4. search with offset+limit
|
||||||
search_params_full = {"metric_type": "COSINE", "params": {"nprobe": 128}}
|
search_params_full = {"metric_type": "COSINE", "params": {"nprobe": 128}}
|
||||||
search_res_full, _ = self.search(
|
search_res_full, _ = self.search(
|
||||||
client,
|
client,
|
||||||
@ -466,7 +497,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
filter=expr
|
filter=expr
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. Compare results
|
# 5. Compare results
|
||||||
filter_ids_set = set(filter_ids)
|
filter_ids_set = set(filter_ids)
|
||||||
for hits in search_res_with_offset:
|
for hits in search_res_with_offset:
|
||||||
ids = [hit.get('id') for hit in hits]
|
ids = [hit.get('id') for hit in hits]
|
||||||
@ -477,7 +508,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)]
|
ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)]
|
||||||
assert page_ids == ids_in_full
|
assert page_ids == ids_in_full
|
||||||
|
|
||||||
# 5. search again with expression template
|
# 6. search again with expression template
|
||||||
expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or")
|
expr = cf.get_expr_from_template(expressions[1]).replace("&&", "and").replace("||", "or")
|
||||||
expr_params = cf.get_expr_params_from_template(expressions[1])
|
expr_params = cf.get_expr_params_from_template(expressions[1])
|
||||||
search_res_with_offset, _ = self.search(
|
search_res_with_offset, _ = self.search(
|
||||||
@ -495,7 +526,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
"limit": limit}
|
"limit": limit}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 6. search with offset+limit
|
# 7. search with offset+limit
|
||||||
search_res_full, _ = self.search(
|
search_res_full, _ = self.search(
|
||||||
client,
|
client,
|
||||||
collection_name,
|
collection_name,
|
||||||
@ -507,7 +538,7 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
filter_params=expr_params
|
filter_params=expr_params
|
||||||
)
|
)
|
||||||
|
|
||||||
# Compare results
|
# 8. Compare results
|
||||||
filter_ids_set = set(filter_ids)
|
filter_ids_set = set(filter_ids)
|
||||||
for hits in search_res_with_offset:
|
for hits in search_res_with_offset:
|
||||||
ids = [hit.get('id') for hit in hits]
|
ids = [hit.get('id') for hit in hits]
|
||||||
@ -518,348 +549,466 @@ class TestMilvusClientSearchPagination(TestMilvusClientV2Base):
|
|||||||
ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)]
|
ids_in_full = [search_res_full[0][offset:offset + limit][j].get('id') for j in range(limit)]
|
||||||
assert page_ids == ids_in_full
|
assert page_ids == ids_in_full
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
class TestSearchPagination(TestcaseBase):
|
def test_search_pagination_in_partitions(self):
|
||||||
""" Test case of search pagination """
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=[0, 10, 100])
|
|
||||||
def offset(self, request):
|
|
||||||
yield request.param
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=[False, True])
|
|
||||||
def auto_id(self, request):
|
|
||||||
yield request.param
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=[False, True])
|
|
||||||
def _async(self, request):
|
|
||||||
yield request.param
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=[True, False])
|
|
||||||
def enable_dynamic_field(self, request):
|
|
||||||
yield request.param
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
|
||||||
def vector_data_type(self, request):
|
|
||||||
yield request.param
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
******************************************************************
|
target: test search pagination in partitions
|
||||||
# The following are valid base cases
|
method: 1. create collection and insert data
|
||||||
******************************************************************
|
2. search with pagination in partitions
|
||||||
|
3. compare with the search results whose corresponding ids should be the same
|
||||||
"""
|
"""
|
||||||
|
client = self._client()
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
collection_name = self.collection_name
|
||||||
def test_search_pagination_with_index_partition(self, offset, _async):
|
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||||
"""
|
# search with pagination in partition_1
|
||||||
target: test search pagination with index and partition
|
limit = 50
|
||||||
method: create connection, collection, insert data, create index and search
|
pages = 10
|
||||||
expected: searched successfully
|
for page in range(pages):
|
||||||
"""
|
offset = page * limit
|
||||||
# 1. initialize with data
|
|
||||||
auto_id = False
|
|
||||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
|
|
||||||
partition_num=1,
|
|
||||||
auto_id=auto_id,
|
|
||||||
is_index=False)[0:4]
|
|
||||||
vectors = [[random.random() for _ in range(default_dim)]
|
|
||||||
for _ in range(default_nq)]
|
|
||||||
# 2. create index
|
|
||||||
default_index = {"index_type": "IVF_FLAT",
|
|
||||||
"params": {"nlist": 128}, "metric_type": "L2"}
|
|
||||||
collection_w.create_index("float_vector", default_index)
|
|
||||||
collection_w.load()
|
|
||||||
# 3. search through partitions
|
|
||||||
par = collection_w.partitions
|
|
||||||
limit = 100
|
|
||||||
search_params = {"metric_type": "L2",
|
|
||||||
"params": {"nprobe": 10}, "offset": offset}
|
|
||||||
search_res = collection_w.search(vectors[:default_nq], default_search_field,
|
|
||||||
search_params, limit, default_search_exp,
|
|
||||||
[par[0].name, par[1].name], _async=_async,
|
|
||||||
check_task=CheckTasks.check_search_results,
|
|
||||||
check_items={"nq": default_nq,
|
|
||||||
"ids": insert_ids,
|
|
||||||
"limit": limit,
|
|
||||||
"_async": _async})[0]
|
|
||||||
# 3. search through partitions with offset+limit
|
|
||||||
search_params = {"metric_type": "L2"}
|
|
||||||
res = collection_w.search(vectors[:default_nq], default_search_field, search_params,
|
|
||||||
limit + offset, default_search_exp,
|
|
||||||
[par[0].name, par[1].name], _async=_async)[0]
|
|
||||||
if _async:
|
|
||||||
search_res.done()
|
|
||||||
search_res = search_res.result()
|
|
||||||
res.done()
|
|
||||||
res = res.result()
|
|
||||||
res_distance = res[0].distances[offset:]
|
|
||||||
# assert cf.sort_search_distance(search_res[0].distances) == cf.sort_search_distance(res_distance)
|
|
||||||
assert set(search_res[0].ids) == set(res[0].ids[offset:])
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
|
||||||
def test_search_pagination_with_inserted_data(self, offset, _async):
|
|
||||||
"""
|
|
||||||
target: test search pagination with inserted data
|
|
||||||
method: create connection, collection, insert data and search
|
|
||||||
check the results by searching with limit+offset
|
|
||||||
expected: searched successfully
|
|
||||||
"""
|
|
||||||
# 1. create collection
|
|
||||||
collection_w = self.init_collection_general(
|
|
||||||
prefix, False, dim=default_dim)[0]
|
|
||||||
# 2. insert data
|
|
||||||
data = cf.gen_default_dataframe_data(dim=default_dim)
|
|
||||||
collection_w.insert(data)
|
|
||||||
collection_w.load()
|
|
||||||
# 3. search
|
|
||||||
search_params = {"offset": offset}
|
search_params = {"offset": offset}
|
||||||
search_res = collection_w.search(vectors[:default_nq], default_search_field,
|
search_res_with_offset, _ = self.search(
|
||||||
search_params, default_limit,
|
client,
|
||||||
default_search_exp, _async=_async,
|
collection_name,
|
||||||
|
vectors_to_search[:default_nq],
|
||||||
|
partition_names=[self.partition_names[0]],
|
||||||
|
anns_field=self.float_vector_field_name,
|
||||||
|
search_params=search_params,
|
||||||
|
limit=limit,
|
||||||
check_task=CheckTasks.check_search_results,
|
check_task=CheckTasks.check_search_results,
|
||||||
check_items={"nq": default_nq,
|
check_items={"enable_milvus_client_api": True,
|
||||||
"limit": default_limit,
|
"nq": default_nq, "limit": limit})
|
||||||
"_async": _async})[0]
|
|
||||||
# 4. search through partitions with offset+limit
|
|
||||||
search_params = {}
|
|
||||||
res = collection_w.search(vectors[:default_nq], default_search_field, search_params,
|
|
||||||
default_limit + offset, default_search_exp, _async=_async)[0]
|
|
||||||
if _async:
|
|
||||||
search_res.done()
|
|
||||||
search_res = search_res.result()
|
|
||||||
res.done()
|
|
||||||
res = res.result()
|
|
||||||
res_distance = res[0].distances[offset:]
|
|
||||||
assert sorted(search_res[0].distances) == sorted(res_distance)
|
|
||||||
assert set(search_res[0].ids) == set(res[0].ids[offset:])
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
# assert every id in search_res_with_offset %3 ==1
|
||||||
def test_search_pagination_empty(self, offset, _async):
|
for hits in search_res_with_offset:
|
||||||
"""
|
for hit in hits:
|
||||||
target: test search pagination empty
|
assert hit.get('id') % 3 == 1
|
||||||
method: connect, create collection, insert data and search
|
|
||||||
expected: search successfully
|
# search with pagination in partition_1 and partition_2
|
||||||
"""
|
for page in range(pages):
|
||||||
# 1. initialize without data
|
offset = page * limit
|
||||||
auto_id = False
|
search_params = {"offset": offset}
|
||||||
collection_w = self.init_collection_general(
|
search_res_with_offset, _ = self.search(
|
||||||
prefix, True, auto_id=auto_id, dim=default_dim)[0]
|
client,
|
||||||
# 2. search collection without data
|
collection_name,
|
||||||
search_param = {"metric_type": "COSINE",
|
vectors_to_search[:default_nq],
|
||||||
"params": {"nprobe": 10}, "offset": offset}
|
partition_names=self.partition_names,
|
||||||
search_res = collection_w.search([], default_search_field, search_param,
|
anns_field=self.float_vector_field_name,
|
||||||
default_limit, default_search_exp, _async=_async,
|
search_params=search_params,
|
||||||
|
limit=limit,
|
||||||
check_task=CheckTasks.check_search_results,
|
check_task=CheckTasks.check_search_results,
|
||||||
check_items={"nq": 0,
|
check_items={"enable_milvus_client_api": True,
|
||||||
"_async": _async})[0]
|
"nq": default_nq, "limit": limit})
|
||||||
if _async:
|
|
||||||
search_res.done()
|
|
||||||
search_res = search_res.result()
|
|
||||||
assert len(search_res) == 0
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
# assert every id in search_res_with_offset %3 ==1 or ==2
|
||||||
@pytest.mark.parametrize("offset", [3000, 5000])
|
for hits in search_res_with_offset:
|
||||||
def test_search_pagination_with_offset_over_num_entities(self, offset):
|
for hit in hits:
|
||||||
"""
|
assert hit.get('id') % 3 == 1 or hit.get('id') % 3 == 2
|
||||||
target: test search pagination with offset over num_entities
|
|
||||||
method: create connection, collection, insert 3000 entities and search with offset over 3000
|
|
||||||
expected: return an empty list
|
|
||||||
"""
|
|
||||||
# 1. initialize
|
|
||||||
collection_w = self.init_collection_general(
|
|
||||||
prefix, True, dim=default_dim)[0]
|
|
||||||
# 2. search
|
|
||||||
search_param = {"metric_type": "COSINE",
|
|
||||||
"params": {"nprobe": 10}, "offset": offset}
|
|
||||||
vectors = [[random.random() for _ in range(default_dim)]
|
|
||||||
for _ in range(default_nq)]
|
|
||||||
res = collection_w.search(vectors[:default_nq], default_search_field,
|
|
||||||
search_param, default_limit,
|
|
||||||
default_search_exp,
|
|
||||||
check_task=CheckTasks.check_search_results,
|
|
||||||
check_items={"nq": default_nq,
|
|
||||||
"limit": 0})[0]
|
|
||||||
assert res[0].ids == []
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
|
||||||
@pytest.mark.parametrize("index", ct.all_index_types[:7])
|
|
||||||
def test_search_pagination_after_different_index(self, index, offset, _async):
|
|
||||||
"""
|
|
||||||
target: test search pagination after different index
|
|
||||||
method: test search pagination after different index and corresponding search params
|
|
||||||
expected: search successfully
|
|
||||||
"""
|
|
||||||
# 1. initialize with data
|
|
||||||
dim = 128
|
|
||||||
auto_id = True
|
|
||||||
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 1000,
|
|
||||||
partition_num=1,
|
|
||||||
auto_id=auto_id,
|
|
||||||
dim=dim, is_index=False)[0:5]
|
|
||||||
# 2. create index and load
|
|
||||||
params = cf.get_index_params_params(index)
|
|
||||||
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
|
||||||
collection_w.create_index("float_vector", default_index)
|
|
||||||
collection_w.load()
|
|
||||||
# 3. search
|
|
||||||
search_params = cf.gen_search_param(index)
|
|
||||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
|
||||||
for search_param in search_params:
|
|
||||||
res = collection_w.search(vectors[:default_nq], default_search_field, search_param,
|
|
||||||
default_limit + offset, default_search_exp, _async=_async)[0]
|
|
||||||
search_param["offset"] = offset
|
|
||||||
log.info("Searching with search params: {}".format(search_param))
|
|
||||||
search_res = collection_w.search(vectors[:default_nq], default_search_field,
|
|
||||||
search_param, default_limit,
|
|
||||||
default_search_exp, _async=_async,
|
|
||||||
check_task=CheckTasks.check_search_results,
|
|
||||||
check_items={"nq": default_nq,
|
|
||||||
"ids": insert_ids,
|
|
||||||
"limit": default_limit,
|
|
||||||
"_async": _async})[0]
|
|
||||||
if _async:
|
|
||||||
search_res.done()
|
|
||||||
search_res = search_res.result()
|
|
||||||
res.done()
|
|
||||||
res = res.result()
|
|
||||||
res_distance = res[0].distances[offset:]
|
|
||||||
# assert sorted(search_res[0].distances, key=numpy.float32) == sorted(res_distance, key=numpy.float32)
|
|
||||||
assert set(search_res[0].ids) == set(res[0].ids[offset:])
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
|
||||||
@pytest.mark.parametrize("offset", [100, default_nb // 2])
|
|
||||||
def test_search_offset_different_position(self, offset):
|
|
||||||
"""
|
|
||||||
target: test search pagination with offset in different position
|
|
||||||
method: create connection, collection, insert entities and search with offset
|
|
||||||
expected: search successfully
|
|
||||||
"""
|
|
||||||
# 1. initialize
|
|
||||||
collection_w = self.init_collection_general(prefix, True)[0]
|
|
||||||
# 2. search with offset in params
|
|
||||||
search_params = {"metric_type": "COSINE",
|
|
||||||
"params": {"nprobe": 10}, "offset": offset}
|
|
||||||
res1 = collection_w.search(vectors[:default_nq], default_search_field,
|
|
||||||
search_params, default_limit)[0]
|
|
||||||
|
|
||||||
# 3. search with offset outside params
|
|
||||||
res2 = collection_w.search(vectors[:default_nq], default_search_field, default_search_params,
|
|
||||||
default_limit, offset=offset)[0]
|
|
||||||
assert res1[0].ids == res2[0].ids
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
|
||||||
@pytest.mark.parametrize("offset", [1, 5, 20])
|
|
||||||
def test_search_sparse_with_pagination(self, offset):
|
|
||||||
"""
|
|
||||||
target: test search sparse with pagination
|
|
||||||
method: 1. connect and create a collection
|
|
||||||
2. search pagination with offset
|
|
||||||
3. search with offset+limit
|
|
||||||
4. compare with the search results whose corresponding ids should be the same
|
|
||||||
expected: search successfully and ids is correct
|
|
||||||
"""
|
|
||||||
# 1. create a collection
|
|
||||||
auto_id = False
|
|
||||||
collection_w, _, _, insert_ids = \
|
|
||||||
self.init_collection_general(
|
|
||||||
prefix, True, auto_id=auto_id, vector_data_type=ct.sparse_vector)[0:4]
|
|
||||||
# 2. search with offset+limit
|
|
||||||
search_param = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}, "offset": offset}
|
|
||||||
search_vectors = cf.gen_default_list_sparse_data()[-1][-2:]
|
|
||||||
search_res = collection_w.search(search_vectors, ct.default_sparse_vec_field_name,
|
|
||||||
search_param, default_limit)[0]
|
|
||||||
# 3. search
|
|
||||||
_search_param = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}}
|
|
||||||
res = collection_w.search(search_vectors[:default_nq], ct.default_sparse_vec_field_name, _search_param,
|
|
||||||
default_limit + offset)[0]
|
|
||||||
assert len(search_res[0].ids) == len(res[0].ids[offset:])
|
|
||||||
assert sorted(search_res[0].distances, key=np.float32) == sorted(
|
|
||||||
res[0].distances[offset:], key=np.float32)
|
|
||||||
|
|
||||||
|
|
||||||
class TestSearchPaginationInvalid(TestMilvusClientV2Base):
|
|
||||||
""" Test case of search pagination """
|
|
||||||
"""
|
|
||||||
******************************************************************
|
|
||||||
# The following are invalid cases
|
|
||||||
******************************************************************
|
|
||||||
"""
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_search_pagination_with_invalid_offset_type(self):
|
def test_search_pagination_with_different_offset(self):
|
||||||
|
"""
|
||||||
|
target: test search pagination with different offset
|
||||||
|
method: 1. create collection and insert data
|
||||||
|
2. search with different offset, including offset > limit, offset = 0
|
||||||
|
3. compare with the search results whose corresponding ids should be the same
|
||||||
|
"""
|
||||||
|
client = self._client()
|
||||||
|
collection_name = self.collection_name
|
||||||
|
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||||
|
# search with offset > limit
|
||||||
|
offset = default_limit + 10
|
||||||
|
search_params = {"offset": offset}
|
||||||
|
self.search(client, collection_name, vectors_to_search[:default_nq],
|
||||||
|
anns_field=self.float_vector_field_name,
|
||||||
|
search_params=search_params, limit=default_limit,
|
||||||
|
check_task=CheckTasks.check_search_results,
|
||||||
|
check_items={"enable_milvus_client_api": True,
|
||||||
|
"nq": default_nq, "limit": default_limit})
|
||||||
|
# search with offset = 0
|
||||||
|
offset = 0
|
||||||
|
search_params = {"offset": offset}
|
||||||
|
self.search(client, collection_name, vectors_to_search[:default_nq],
|
||||||
|
anns_field=self.float_vector_field_name,
|
||||||
|
search_params=search_params, limit=default_limit,
|
||||||
|
check_task=CheckTasks.check_search_results,
|
||||||
|
check_items={"enable_milvus_client_api": True,
|
||||||
|
"nq": default_nq, "limit": default_limit})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize("offset", [0, 20, 100, 200])
|
||||||
|
def test_search_offset_different_position(self, offset):
|
||||||
|
"""
|
||||||
|
target: test search offset param in different position
|
||||||
|
method: create connection, collection, insert data, search with offset in different position
|
||||||
|
expected: search successfully
|
||||||
|
"""
|
||||||
|
client = self._client()
|
||||||
|
collection_name = self.collection_name
|
||||||
|
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||||
|
|
||||||
|
# 1. search with offset in search_params
|
||||||
|
limit = 100
|
||||||
|
search_params = {"offset": offset}
|
||||||
|
res1, _ = self.search(client, collection_name, vectors_to_search[:default_nq],
|
||||||
|
anns_field=self.float_vector_field_name,
|
||||||
|
search_params=search_params,
|
||||||
|
limit=limit,
|
||||||
|
check_task=CheckTasks.check_search_results,
|
||||||
|
check_items={"enable_milvus_client_api": True,
|
||||||
|
"nq": default_nq, "limit": limit})
|
||||||
|
|
||||||
|
# 2. search with offset in search
|
||||||
|
search_params = {}
|
||||||
|
res2, _ = self.search(client, collection_name, vectors_to_search[:default_nq],
|
||||||
|
anns_field=self.float_vector_field_name,
|
||||||
|
search_params=search_params,
|
||||||
|
offset=offset,
|
||||||
|
limit=limit,
|
||||||
|
check_task=CheckTasks.check_search_results,
|
||||||
|
check_items={"enable_milvus_client_api": True,
|
||||||
|
"nq": default_nq, "limit": limit})
|
||||||
|
# 3. compare results
|
||||||
|
assert res1 == res2
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_search_pagination_empty_list(self):
|
||||||
|
"""
|
||||||
|
target: test search pagination with empty list of vectors
|
||||||
|
method: create connection, collection, insert data, search with offset
|
||||||
|
expected: search successfully
|
||||||
|
"""
|
||||||
|
client = self._client()
|
||||||
|
collection_name = self.collection_name
|
||||||
|
vectors_to_search = []
|
||||||
|
offset = 10
|
||||||
|
limit = 100
|
||||||
|
search_params = {"offset": offset}
|
||||||
|
error ={"err_code": 1, "err_msg": "list index out of range"}
|
||||||
|
self.search(client, collection_name, vectors_to_search,
|
||||||
|
anns_field=self.float_vector_field_name,
|
||||||
|
search_params=search_params,
|
||||||
|
limit=limit,
|
||||||
|
check_task=CheckTasks.err_res,
|
||||||
|
check_items=error)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
@pytest.mark.parametrize("offset", [" ", 1.0, [1, 2], {1}, "12 s"])
|
||||||
|
def test_search_pagination_with_invalid_offset_type(self, offset):
|
||||||
"""
|
"""
|
||||||
target: test search pagination with invalid offset type
|
target: test search pagination with invalid offset type
|
||||||
method: create connection, collection, insert and search with invalid offset type
|
method: create connection, collection, insert and search with invalid offset type
|
||||||
expected: raise exception
|
expected: raise exception
|
||||||
"""
|
"""
|
||||||
client = self._client()
|
client = self._client()
|
||||||
|
collection_name = self.collection_name
|
||||||
|
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||||
|
|
||||||
# 1. Create collection with schema
|
search_params = {"offset": offset}
|
||||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
error = {"err_code": 1, "err_msg": "wrong type for offset, expect int"}
|
||||||
self.create_collection(client, collection_name, default_dim)
|
|
||||||
|
|
||||||
# Insert data
|
|
||||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
|
|
||||||
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
||||||
self.insert(client, collection_name, rows)
|
|
||||||
self.flush(client, collection_name)
|
|
||||||
|
|
||||||
# Search with invalid offset types
|
|
||||||
vectors_to_search = cf.gen_vectors(default_nq, default_dim)
|
|
||||||
invalid_offsets = [" ", [1, 2], {1}, "12 s"]
|
|
||||||
|
|
||||||
for offset in invalid_offsets:
|
|
||||||
log.debug(f"assert search error if offset={offset}")
|
|
||||||
search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}, "offset": offset}
|
|
||||||
self.search(
|
self.search(
|
||||||
client,
|
client,
|
||||||
collection_name,
|
collection_name,
|
||||||
vectors_to_search[:default_nq],
|
vectors_to_search[:default_nq],
|
||||||
anns_field=default_vector_field_name,
|
anns_field=self.float_vector_field_name,
|
||||||
search_params=search_params,
|
search_params=search_params,
|
||||||
limit=default_limit,
|
limit=default_limit,
|
||||||
check_task=CheckTasks.err_res,
|
check_task=CheckTasks.err_res,
|
||||||
check_items={
|
check_items=error)
|
||||||
"err_code": 1,
|
|
||||||
"err_msg": "wrong type for offset, expect int"
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_search_pagination_with_invalid_offset_value(self):
|
@pytest.mark.parametrize("offset", [-1, 16385])
|
||||||
|
def test_search_pagination_with_invalid_offset_value(self, offset):
|
||||||
"""
|
"""
|
||||||
target: test search pagination with invalid offset value
|
target: test search pagination with invalid offset value
|
||||||
method: create connection, collection, insert and search with invalid offset value
|
method: create connection, collection, insert and search with invalid offset value
|
||||||
expected: raise exception
|
expected: raise exception
|
||||||
"""
|
"""
|
||||||
client = self._client()
|
client = self._client()
|
||||||
|
collection_name = self.collection_name
|
||||||
# 1. Create collection with schema
|
vectors_to_search = cf.gen_vectors(default_nq, self.float_vector_dim)
|
||||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
search_params = {"offset": offset}
|
||||||
self.create_collection(client, collection_name, default_dim)
|
error = {"err_code": 1, "err_msg": f"offset [{offset}] is invalid, it should be in range [1, 16384]"}
|
||||||
|
|
||||||
# Insert data
|
|
||||||
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
|
|
||||||
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
|
|
||||||
self.insert(client, collection_name, rows)
|
|
||||||
self.flush(client, collection_name)
|
|
||||||
|
|
||||||
# Search with invalid offset values
|
|
||||||
vectors_to_search = cf.gen_vectors(default_nq, default_dim)
|
|
||||||
invalid_offsets = [-1, 16385]
|
|
||||||
|
|
||||||
for offset in invalid_offsets:
|
|
||||||
log.debug(f"assert search error if offset={offset}")
|
|
||||||
search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}, "offset": offset}
|
|
||||||
self.search(
|
self.search(
|
||||||
|
client,
|
||||||
|
collection_name,
|
||||||
|
vectors_to_search[:default_nq],
|
||||||
|
anns_field=self.float_vector_field_name,
|
||||||
|
search_params=search_params,
|
||||||
|
limit=default_limit,
|
||||||
|
check_task=CheckTasks.err_res,
|
||||||
|
check_items=error
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchPaginationIndependent(TestMilvusClientV2Base):
|
||||||
|
""" Test case of search pagination with independent collection """
|
||||||
|
|
||||||
|
def do_search_pagination_and_assert(self, client, collection_name,
|
||||||
|
limit=10, pages=10,
|
||||||
|
dim=default_dim,
|
||||||
|
vector_dtype=DataType.FLOAT_VECTOR,
|
||||||
|
index=ct.L0_index_types[0],
|
||||||
|
metric_type=ct.default_L0_metric,
|
||||||
|
expected_overlap_ratio=80):
|
||||||
|
# 2. Search with pagination for 5 pages
|
||||||
|
vectors_to_search = cf.gen_vectors(default_nq, dim, vector_data_type=vector_dtype)
|
||||||
|
all_pages_results = []
|
||||||
|
for page in range(pages):
|
||||||
|
offset = page * limit
|
||||||
|
search_params = {"offset": offset}
|
||||||
|
search_res_with_offset, _ = self.search(
|
||||||
client,
|
client,
|
||||||
collection_name,
|
collection_name,
|
||||||
vectors_to_search[:default_nq],
|
vectors_to_search[:default_nq],
|
||||||
anns_field=default_vector_field_name,
|
anns_field=default_vector_field_name,
|
||||||
search_params=search_params,
|
search_params=search_params,
|
||||||
limit=default_limit,
|
limit=limit,
|
||||||
check_task=CheckTasks.err_res,
|
check_task=CheckTasks.check_search_results,
|
||||||
check_items={
|
check_items={"enable_milvus_client_api": True,
|
||||||
"err_code": 1,
|
"nq": default_nq,
|
||||||
"err_msg": f"offset [{offset}] is invalid, it should be in range [1, 16384]"
|
"limit": limit,
|
||||||
|
"metric": metric_type,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
all_pages_results.append(search_res_with_offset)
|
||||||
|
|
||||||
|
# 3. Search without pagination
|
||||||
|
search_params_full = {}
|
||||||
|
search_res_full, _ = self.search(
|
||||||
|
client,
|
||||||
|
collection_name,
|
||||||
|
vectors_to_search[:default_nq],
|
||||||
|
anns_field=default_vector_field_name,
|
||||||
|
search_params=search_params_full,
|
||||||
|
limit=limit * pages
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Compare results - verify pagination results equal the results in full search with offsets
|
||||||
|
for p in range(pages):
|
||||||
|
page_res = all_pages_results[p]
|
||||||
|
for i in range(default_nq):
|
||||||
|
page_ids = [page_res[i][j].get('id') for j in range(limit)]
|
||||||
|
ids_in_full = [search_res_full[i][p * limit:p * limit + limit][j].get('id') for j in range(limit)]
|
||||||
|
# Calculate intersection between paginated results and baseline full results
|
||||||
|
common_ids = set(page_ids) & set(ids_in_full)
|
||||||
|
# Calculate overlap ratio using full results as baseline
|
||||||
|
overlap_ratio = len(common_ids) / len(ids_in_full) * 100
|
||||||
|
log.debug(
|
||||||
|
f"range search {vector_dtype.name} {index} {metric_type} results overlap {overlap_ratio}")
|
||||||
|
assert overlap_ratio >= expected_overlap_ratio, \
|
||||||
|
f"Only {overlap_ratio}% overlap with baseline results, expected >= {expected_overlap_ratio}%"
|
||||||
|
|
||||||
|
"""
|
||||||
|
******************************************************************
|
||||||
|
# The following are invalid cases
|
||||||
|
******************************************************************
|
||||||
|
"""
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize('vector_dtype', ct.all_dense_vector_types)
|
||||||
|
@pytest.mark.parametrize('index', ct.all_index_types[:7])
|
||||||
|
@pytest.mark.parametrize('metric_type', ct.dense_metrics)
|
||||||
|
def test_search_pagination_dense_vectors_indices_metrics_growing(self, vector_dtype, index, metric_type):
|
||||||
|
"""
|
||||||
|
target: test search pagination with growing data
|
||||||
|
method: create connection, collection, insert data and search
|
||||||
|
check the results by searching with limit+offset
|
||||||
|
expected: searched successfully
|
||||||
|
"""
|
||||||
|
client = self._client()
|
||||||
|
|
||||||
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||||
|
schema, _ = self.create_schema(client)
|
||||||
|
schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||||
|
schema.add_field(default_vector_field_name, datatype=vector_dtype, dim=default_dim)
|
||||||
|
schema.add_field(default_float_field_name, datatype=DataType.FLOAT)
|
||||||
|
schema.add_field(default_string_field_name, datatype=DataType.VARCHAR, max_length=100)
|
||||||
|
self.create_collection(client, collection_name, schema=schema)
|
||||||
|
|
||||||
|
# Insert data in 3 batches with unique primary keys using a loop
|
||||||
|
insert_times = 3
|
||||||
|
random_vectors = list(cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_dtype)) \
|
||||||
|
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||||
|
else cf.gen_vectors(default_nb*insert_times, default_dim, vector_data_type=vector_dtype)
|
||||||
|
for j in range(insert_times):
|
||||||
|
start_pk = j * default_nb
|
||||||
|
rows = [{
|
||||||
|
default_primary_key_field_name: i + start_pk,
|
||||||
|
default_vector_field_name: random_vectors[i + start_pk],
|
||||||
|
default_float_field_name: (i + start_pk) * 1.0,
|
||||||
|
default_string_field_name: str(i + start_pk)
|
||||||
|
} for i in range(default_nb)]
|
||||||
|
self.insert(client, collection_name, rows)
|
||||||
|
self.flush(client, collection_name)
|
||||||
|
|
||||||
|
# build index
|
||||||
|
index_params, _ = self.prepare_index_params(client)
|
||||||
|
index_params.add_index(default_vector_field_name, index_type=index,
|
||||||
|
metric_type=metric_type,
|
||||||
|
params=cf.get_index_params_params(index_type=index))
|
||||||
|
self.create_index(client, collection_name, index_params=index_params)
|
||||||
|
|
||||||
|
# load the collection with index
|
||||||
|
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
|
||||||
|
self.load_collection(client, collection_name)
|
||||||
|
|
||||||
|
# search and assert
|
||||||
|
limit = 50
|
||||||
|
pages = 5
|
||||||
|
expected_overlap_ratio = 20
|
||||||
|
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
|
||||||
|
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
|
||||||
|
expected_overlap_ratio=expected_overlap_ratio)
|
||||||
|
|
||||||
|
# insert additional data without flush
|
||||||
|
random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \
|
||||||
|
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||||
|
else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)
|
||||||
|
start_pk = default_nb * insert_times
|
||||||
|
rows = [{
|
||||||
|
default_primary_key_field_name: i + start_pk,
|
||||||
|
default_vector_field_name: random_vectors[i],
|
||||||
|
default_float_field_name: (i + start_pk) * 1.0,
|
||||||
|
default_string_field_name: str(i + start_pk)
|
||||||
|
} for i in range(default_nb)]
|
||||||
|
self.insert(client, collection_name, rows)
|
||||||
|
|
||||||
|
# search and assert
|
||||||
|
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
|
||||||
|
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
|
||||||
|
expected_overlap_ratio=expected_overlap_ratio)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize('index', ct.binary_supported_index_types)
|
||||||
|
@pytest.mark.parametrize('metric_type', ct.binary_metrics[:2])
|
||||||
|
def test_search_pagination_binary_index_growing(self, index, metric_type):
|
||||||
|
"""
|
||||||
|
target: test search pagination with binary index
|
||||||
|
method: create connection, collection, insert data, create index and search
|
||||||
|
expected: searched successfully
|
||||||
|
"""
|
||||||
|
|
||||||
|
vector_dtype = DataType.BINARY_VECTOR
|
||||||
|
client = self._client()
|
||||||
|
|
||||||
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||||
|
schema, _ = self.create_schema(client)
|
||||||
|
schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||||
|
schema.add_field(default_vector_field_name, datatype=vector_dtype, dim=default_dim)
|
||||||
|
self.create_collection(client, collection_name, schema=schema)
|
||||||
|
|
||||||
|
# Insert data in 3 batches with unique primary keys using a loop
|
||||||
|
insert_times = 3
|
||||||
|
random_vectors = list(cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)) \
|
||||||
|
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||||
|
else cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)
|
||||||
|
for j in range(insert_times):
|
||||||
|
start_pk = j * default_nb
|
||||||
|
rows = [{
|
||||||
|
default_primary_key_field_name: i + start_pk,
|
||||||
|
default_vector_field_name: random_vectors[i + start_pk]
|
||||||
|
} for i in range(default_nb)]
|
||||||
|
self.insert(client, collection_name, rows)
|
||||||
|
self.flush(client, collection_name)
|
||||||
|
|
||||||
|
# build index
|
||||||
|
index_params, _ = self.prepare_index_params(client)
|
||||||
|
index_params.add_index(default_vector_field_name, index_type=index,
|
||||||
|
metric_type=metric_type,
|
||||||
|
params=cf.get_index_params_params(index_type=index))
|
||||||
|
self.create_index(client, collection_name, index_params=index_params)
|
||||||
|
|
||||||
|
# load the collection with index
|
||||||
|
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
|
||||||
|
self.load_collection(client, collection_name)
|
||||||
|
|
||||||
|
# search and assert
|
||||||
|
limit = 50
|
||||||
|
pages = 5
|
||||||
|
expected_overlap_ratio = 20
|
||||||
|
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
|
||||||
|
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
|
||||||
|
expected_overlap_ratio=expected_overlap_ratio)
|
||||||
|
|
||||||
|
# insert additional data without flush
|
||||||
|
random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \
|
||||||
|
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||||
|
else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)
|
||||||
|
start_pk = default_nb * insert_times
|
||||||
|
rows = [{
|
||||||
|
default_primary_key_field_name: i + start_pk,
|
||||||
|
default_vector_field_name: random_vectors[i]
|
||||||
|
} for i in range(default_nb)]
|
||||||
|
self.insert(client, collection_name, rows)
|
||||||
|
|
||||||
|
# search and assert
|
||||||
|
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
|
||||||
|
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
|
||||||
|
expected_overlap_ratio=expected_overlap_ratio)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize('index', ct.sparse_supported_index_types)
|
||||||
|
@pytest.mark.parametrize('metric_type', ["IP"])
|
||||||
|
def test_search_pagination_sparse_index_growing(self, index, metric_type):
|
||||||
|
"""
|
||||||
|
target: test search pagination with sparse index
|
||||||
|
method: create connection, collection, insert data, create index and search
|
||||||
|
expected: searched successfully
|
||||||
|
"""
|
||||||
|
vector_dtype = DataType.SPARSE_FLOAT_VECTOR
|
||||||
|
client = self._client()
|
||||||
|
|
||||||
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||||
|
schema, _ = self.create_schema(client)
|
||||||
|
schema.add_field(default_primary_key_field_name, datatype=DataType.INT64, is_primary=True, auto_id=False)
|
||||||
|
schema.add_field(default_vector_field_name, datatype=vector_dtype)
|
||||||
|
self.create_collection(client, collection_name, schema=schema)
|
||||||
|
|
||||||
|
# Insert data in 3 batches with unique primary keys using a loop
|
||||||
|
insert_times = 3
|
||||||
|
random_vectors = list(cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)) \
|
||||||
|
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||||
|
else cf.gen_vectors(default_nb * insert_times, default_dim, vector_data_type=vector_dtype)
|
||||||
|
for j in range(insert_times):
|
||||||
|
start_pk = j * default_nb
|
||||||
|
rows = [{
|
||||||
|
default_primary_key_field_name: i + start_pk,
|
||||||
|
default_vector_field_name: random_vectors[i + start_pk]
|
||||||
|
} for i in range(default_nb)]
|
||||||
|
self.insert(client, collection_name, rows)
|
||||||
|
self.flush(client, collection_name)
|
||||||
|
|
||||||
|
# build index
|
||||||
|
index_params, _ = self.prepare_index_params(client)
|
||||||
|
index_params.add_index(default_vector_field_name, index_type=index,
|
||||||
|
metric_type=metric_type,
|
||||||
|
params=cf.get_index_params_params(index_type=index))
|
||||||
|
self.create_index(client, collection_name, index_params=index_params)
|
||||||
|
|
||||||
|
# load the collection with index
|
||||||
|
assert self.wait_for_index_ready(client, collection_name, default_vector_field_name, timeout=120)
|
||||||
|
self.load_collection(client, collection_name)
|
||||||
|
|
||||||
|
# search and assert
|
||||||
|
limit = 50
|
||||||
|
pages = 5
|
||||||
|
expected_overlap_ratio = 20
|
||||||
|
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
|
||||||
|
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
|
||||||
|
expected_overlap_ratio=expected_overlap_ratio)
|
||||||
|
|
||||||
|
# insert additional data without flush
|
||||||
|
random_vectors = list(cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)) \
|
||||||
|
if vector_dtype == DataType.FLOAT_VECTOR \
|
||||||
|
else cf.gen_vectors(default_nb, default_dim, vector_data_type=vector_dtype)
|
||||||
|
start_pk = default_nb * insert_times
|
||||||
|
rows = [{
|
||||||
|
default_primary_key_field_name: i + start_pk,
|
||||||
|
default_vector_field_name: random_vectors[i]
|
||||||
|
} for i in range(default_nb)]
|
||||||
|
self.insert(client, collection_name, rows)
|
||||||
|
|
||||||
|
# search and assert
|
||||||
|
self.do_search_pagination_and_assert(client, collection_name, limit=limit, pages=pages, dim=default_dim,
|
||||||
|
vector_dtype=vector_dtype, index=index, metric_type=metric_type,
|
||||||
|
expected_overlap_ratio=expected_overlap_ratio)
|
||||||
|
|||||||
@ -676,7 +676,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
def random_primary_key(self, request):
|
def random_primary_key(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
|
||||||
def vector_data_type(self, request):
|
def vector_data_type(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@ -1613,7 +1613,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
enable_dynamic_field = False
|
enable_dynamic_field = False
|
||||||
collection_w, _, _, insert_ids, time_stamp = \
|
collection_w, _, _, insert_ids, time_stamp = \
|
||||||
self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id,
|
self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id,
|
||||||
dim=dim, is_index=False,
|
dim=dim, is_index=False, vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
|
||||||
enable_dynamic_field=enable_dynamic_field)[0:5]
|
enable_dynamic_field=enable_dynamic_field)[0:5]
|
||||||
# 2. create index and load
|
# 2. create index and load
|
||||||
params = cf.get_index_params_params(index)
|
params = cf.get_index_params_params(index)
|
||||||
@ -1624,7 +1624,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
if (dim % params["PQM"]) != 0:
|
if (dim % params["PQM"]) != 0:
|
||||||
params["PQM"] = dim // 4
|
params["PQM"] = dim // 4
|
||||||
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
||||||
collection_w.create_index("float_vector", default_index)
|
collection_w.create_index("sparse_vector", default_index)
|
||||||
collection_w.load()
|
collection_w.load()
|
||||||
# 3. search
|
# 3. search
|
||||||
search_params = cf.gen_search_param(index)
|
search_params = cf.gen_search_param(index)
|
||||||
@ -1714,6 +1714,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000,
|
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000,
|
||||||
partition_num=1,
|
partition_num=1,
|
||||||
auto_id=auto_id,
|
auto_id=auto_id,
|
||||||
|
vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
|
||||||
dim=min_dim, is_index=False)[0:5]
|
dim=min_dim, is_index=False)[0:5]
|
||||||
# 2. create index and load
|
# 2. create index and load
|
||||||
params = cf.get_index_params_params(index)
|
params = cf.get_index_params_params(index)
|
||||||
@ -1722,7 +1723,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
if params.get("PQM"):
|
if params.get("PQM"):
|
||||||
params["PQM"] = min_dim
|
params["PQM"] = min_dim
|
||||||
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
||||||
collection_w.create_index("float_vector", default_index)
|
collection_w.create_index("sparse_vector", default_index)
|
||||||
collection_w.load()
|
collection_w.load()
|
||||||
# 3. search
|
# 3. search
|
||||||
search_params = cf.gen_search_param(index)
|
search_params = cf.gen_search_param(index)
|
||||||
@ -1885,6 +1886,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
enable_dynamic_field = False
|
enable_dynamic_field = False
|
||||||
collection_w, _, _, insert_ids, time_stamp = \
|
collection_w, _, _, insert_ids, time_stamp = \
|
||||||
self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id,
|
self.init_collection_general(prefix, True, 5000, partition_num=1, auto_id=auto_id,
|
||||||
|
vector_data_type=DataType.SPARSE_FLOAT_VECTOR,
|
||||||
dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5]
|
dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5]
|
||||||
# 2. create different index
|
# 2. create different index
|
||||||
params = cf.get_index_params_params(index)
|
params = cf.get_index_params_params(index)
|
||||||
@ -1896,7 +1898,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
params["PQM"] = dim // 4
|
params["PQM"] = dim // 4
|
||||||
log.info("test_search_after_index_different_metric_type: Creating index-%s" % index)
|
log.info("test_search_after_index_different_metric_type: Creating index-%s" % index)
|
||||||
default_index = {"index_type": index, "params": params, "metric_type": "IP"}
|
default_index = {"index_type": index, "params": params, "metric_type": "IP"}
|
||||||
collection_w.create_index("float_vector", default_index)
|
collection_w.create_index("sparse_vector", default_index)
|
||||||
log.info("test_search_after_index_different_metric_type: Created index-%s" % index)
|
log.info("test_search_after_index_different_metric_type: Created index-%s" % index)
|
||||||
collection_w.load()
|
collection_w.load()
|
||||||
# 3. search
|
# 3. search
|
||||||
@ -2924,10 +2926,10 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
limit = 0
|
limit = 0
|
||||||
insert_ids = []
|
insert_ids = []
|
||||||
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||||
for search_field in vector_name_list:
|
for vector_field_name in vector_name_list:
|
||||||
vector_data_type = search_field.lstrip("multiple_vector_")
|
vector_data_type = cf.get_field_dtype_by_field_name(collection_w, vector_field_name)
|
||||||
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
|
vectors = cf.gen_vectors(nq, dim, vector_data_type)
|
||||||
res = collection_w.search(vectors[:nq], search_field,
|
res = collection_w.search(vectors[:nq], vector_field_name,
|
||||||
default_search_params, default_limit,
|
default_search_params, default_limit,
|
||||||
search_exp, _async=_async,
|
search_exp, _async=_async,
|
||||||
output_fields=[default_int64_field_name,
|
output_fields=[default_int64_field_name,
|
||||||
@ -3213,7 +3215,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.parametrize("index", ct.all_index_types[:7])
|
@pytest.mark.parametrize("index", ct.all_index_types[:7])
|
||||||
@pytest.mark.parametrize("metrics", ct.float_metrics)
|
@pytest.mark.parametrize("metrics", ct.dense_metrics)
|
||||||
@pytest.mark.parametrize("limit", [20, 1200])
|
@pytest.mark.parametrize("limit", [20, 1200])
|
||||||
def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit):
|
def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -288,7 +288,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base):
|
|||||||
|
|
||||||
# hybrid_search
|
# hybrid_search
|
||||||
search_param = {
|
search_param = {
|
||||||
"data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type="FLOAT_VECTOR"),
|
"data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR),
|
||||||
"anns_field": ct.default_float_vec_field_name,
|
"anns_field": ct.default_float_vec_field_name,
|
||||||
"param": {"metric_type": "COSINE", "params": {"ef": "96"}},
|
"param": {"metric_type": "COSINE", "params": {"ef": "96"}},
|
||||||
"limit": ct.default_limit,
|
"limit": ct.default_limit,
|
||||||
@ -296,7 +296,7 @@ class TestAsyncMilvusClient(TestMilvusClientV2Base):
|
|||||||
req = AnnSearchRequest(**search_param)
|
req = AnnSearchRequest(**search_param)
|
||||||
|
|
||||||
search_param2 = {
|
search_param2 = {
|
||||||
"data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type="FLOAT_VECTOR"),
|
"data": cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR),
|
||||||
"anns_field": default_vector_name,
|
"anns_field": default_vector_name,
|
||||||
"param": {"metric_type": "L2", "params": {"nprobe": "32"}},
|
"param": {"metric_type": "L2", "params": {"nprobe": "32"}},
|
||||||
"limit": ct.default_limit
|
"limit": ct.default_limit
|
||||||
|
|||||||
@ -857,16 +857,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
|
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
|
||||||
|
|
||||||
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
|
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
|
||||||
vector_data_type = "FLOAT_VECTOR"
|
vector_data_type = DataType.FLOAT_VECTOR
|
||||||
if f == df.float_vec_field:
|
if f == df.float_vec_field:
|
||||||
dim = float_vec_field_dim
|
dim = float_vec_field_dim
|
||||||
vector_data_type = "FLOAT_VECTOR"
|
vector_data_type = DataType.FLOAT_VECTOR
|
||||||
elif f == df.bf16_vec_field:
|
elif f == df.bf16_vec_field:
|
||||||
dim = bf16_vec_field_dim
|
dim = bf16_vec_field_dim
|
||||||
vector_data_type = "BFLOAT16_VECTOR"
|
vector_data_type = DataType.BFLOAT16_VECTOR
|
||||||
else:
|
else:
|
||||||
dim = fp16_vec_field_dim
|
dim = fp16_vec_field_dim
|
||||||
vector_data_type = "FLOAT16_VECTOR"
|
vector_data_type = DataType.FLOAT16_VECTOR
|
||||||
|
|
||||||
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
|
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
|
||||||
search_params = ct.default_search_params
|
search_params = ct.default_search_params
|
||||||
@ -1043,16 +1043,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
|
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
|
||||||
|
|
||||||
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
|
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
|
||||||
vector_data_type = "FLOAT_VECTOR"
|
vector_data_type = DataType.FLOAT_VECTOR
|
||||||
if f == df.float_vec_field:
|
if f == df.float_vec_field:
|
||||||
dim = float_vec_field_dim
|
dim = float_vec_field_dim
|
||||||
vector_data_type = "FLOAT_VECTOR"
|
vector_data_type = DataType.FLOAT_VECTOR
|
||||||
elif f == df.bf16_vec_field:
|
elif f == df.bf16_vec_field:
|
||||||
dim = bf16_vec_field_dim
|
dim = bf16_vec_field_dim
|
||||||
vector_data_type = "BFLOAT16_VECTOR"
|
vector_data_type = DataType.BFLOAT16_VECTOR
|
||||||
else:
|
else:
|
||||||
dim = fp16_vec_field_dim
|
dim = fp16_vec_field_dim
|
||||||
vector_data_type = "FLOAT16_VECTOR"
|
vector_data_type = DataType.FLOAT16_VECTOR
|
||||||
|
|
||||||
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
|
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
|
||||||
search_params = ct.default_search_params
|
search_params = ct.default_search_params
|
||||||
@ -1217,16 +1217,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
|
# log.info(f"query seg info: {self.utility_wrap.get_query_segment_info(c_name)[0]}")
|
||||||
|
|
||||||
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
|
for f in [df.float_vec_field, df.bf16_vec_field, df.fp16_vec_field]:
|
||||||
vector_data_type = "FLOAT_VECTOR"
|
vector_data_type = DataType.FLOAT_VECTOR
|
||||||
if f == df.float_vec_field:
|
if f == df.float_vec_field:
|
||||||
dim = float_vec_field_dim
|
dim = float_vec_field_dim
|
||||||
vector_data_type = "FLOAT_VECTOR"
|
vector_data_type = DataType.FLOAT_VECTOR
|
||||||
elif f == df.bf16_vec_field:
|
elif f == df.bf16_vec_field:
|
||||||
dim = bf16_vec_field_dim
|
dim = bf16_vec_field_dim
|
||||||
vector_data_type = "BFLOAT16_VECTOR"
|
vector_data_type = DataType.BFLOAT16_VECTOR
|
||||||
else:
|
else:
|
||||||
dim = fp16_vec_field_dim
|
dim = fp16_vec_field_dim
|
||||||
vector_data_type = "FLOAT16_VECTOR"
|
vector_data_type = DataType.FLOAT16_VECTOR
|
||||||
|
|
||||||
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
|
search_data = cf.gen_vectors(1, dim, vector_data_type=vector_data_type)
|
||||||
search_params = ct.default_search_params
|
search_params = ct.default_search_params
|
||||||
@ -1616,8 +1616,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
||||||
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
|
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
|
||||||
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
||||||
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0],
|
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0],
|
||||||
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0],
|
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0],
|
||||||
df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0]
|
df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0]
|
||||||
}
|
}
|
||||||
if auto_id:
|
if auto_id:
|
||||||
@ -1922,8 +1922,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
df.string_field: "string",
|
df.string_field: "string",
|
||||||
df.json_field: json_value[i%len(json_value)],
|
df.json_field: json_value[i%len(json_value)],
|
||||||
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
||||||
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0],
|
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT16_VECTOR)[0],
|
||||||
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0],
|
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0],
|
||||||
}
|
}
|
||||||
if auto_id:
|
if auto_id:
|
||||||
row.pop(df.pk_field)
|
row.pop(df.pk_field)
|
||||||
@ -2064,8 +2064,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
||||||
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
|
df.array_bool_field: [True, False] if not (nullable and random.random() < 0.5) else None,
|
||||||
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
df.float_vec_field: cf.gen_vectors(1, dim)[0],
|
||||||
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type="FLOAT16_VECTOR")[0],
|
df.fp16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT16_VECTOR)[0],
|
||||||
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type="BFLOAT16_VECTOR")[0],
|
df.bf16_vec_field: cf.gen_vectors(1, dim, vector_data_type=DataType.BFLOAT16_VECTOR)[0],
|
||||||
df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0]
|
df.sparse_vec_field: cf.gen_sparse_vectors(1, dim, sparse_format=sparse_format)[0]
|
||||||
}
|
}
|
||||||
if auto_id:
|
if auto_id:
|
||||||
@ -2536,7 +2536,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
|
|
||||||
# verify search
|
# verify search
|
||||||
self.collection_wrap.search(
|
self.collection_wrap.search(
|
||||||
data=cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR.name),
|
data=cf.gen_vectors(ct.default_nq, ct.default_dim, vector_data_type=DataType.FLOAT_VECTOR),
|
||||||
anns_field=df.float_vec_field, param=DefaultVectorSearchParams.IVF_SQ8(),
|
anns_field=df.float_vec_field, param=DefaultVectorSearchParams.IVF_SQ8(),
|
||||||
limit=ct.default_limit,
|
limit=ct.default_limit,
|
||||||
check_task=CheckTasks.check_search_results,
|
check_task=CheckTasks.check_search_results,
|
||||||
|
|||||||
@ -50,7 +50,7 @@ vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_
|
|||||||
default_search_field = ct.default_float_vec_field_name
|
default_search_field = ct.default_float_vec_field_name
|
||||||
default_search_params = ct.default_search_params
|
default_search_params = ct.default_search_params
|
||||||
max_vector_field_num = ct.max_vector_field_num
|
max_vector_field_num = ct.max_vector_field_num
|
||||||
SPARSE_FLOAT_VECTOR_data_type = "SPARSE_FLOAT_VECTOR"
|
SPARSE_FLOAT_VECTOR_data_type = DataType.SPARSE_FLOAT_VECTOR
|
||||||
|
|
||||||
|
|
||||||
class TestCollectionParams(TestcaseBase):
|
class TestCollectionParams(TestcaseBase):
|
||||||
@ -1061,7 +1061,7 @@ class TestCollectionParams(TestcaseBase):
|
|||||||
# 2. create collection with multiple vectors
|
# 2. create collection with multiple vectors
|
||||||
c_name = cf.gen_unique_str(prefix)
|
c_name = cf.gen_unique_str(prefix)
|
||||||
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
||||||
cf.gen_float_vec_field(vector_data_type=ct.sparse_vector), cf.gen_float_vec_field(name="vec_sparse", vector_data_type=ct.sparse_vector)]
|
cf.gen_float_vec_field(vector_data_type=DataType.FLOAT_VECTOR), cf.gen_float_vec_field(name="vec_sparse", vector_data_type=DataType.SPARSE_FLOAT_VECTOR)]
|
||||||
schema = cf.gen_collection_schema(fields=fields)
|
schema = cf.gen_collection_schema(fields=fields)
|
||||||
self.collection_wrap.init_collection(c_name, schema=schema,
|
self.collection_wrap.init_collection(c_name, schema=schema,
|
||||||
check_task=CheckTasks.check_collection_property,
|
check_task=CheckTasks.check_collection_property,
|
||||||
@ -3260,7 +3260,7 @@ class TestLoadPartition(TestcaseBase):
|
|||||||
)
|
)
|
||||||
def get_binary_index(self, request):
|
def get_binary_index(self, request):
|
||||||
log.info(request.param)
|
log.info(request.param)
|
||||||
if request.param["index_type"] in ct.binary_support:
|
if request.param["index_type"] in ct.binary_supported_index_types:
|
||||||
return request.param
|
return request.param
|
||||||
else:
|
else:
|
||||||
pytest.skip("Skip index Temporary")
|
pytest.skip("Skip index Temporary")
|
||||||
@ -4560,7 +4560,7 @@ class TestCollectionNullInvalid(TestcaseBase):
|
|||||||
******************************************************************
|
******************************************************************
|
||||||
"""
|
"""
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
|
@pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes[:1])
|
||||||
def test_create_collection_set_nullable_on_pk_field(self, vector_type):
|
def test_create_collection_set_nullable_on_pk_field(self, vector_type):
|
||||||
"""
|
"""
|
||||||
target: test create collection with set nullable=True on pk field
|
target: test create collection with set nullable=True on pk field
|
||||||
@ -4578,7 +4578,7 @@ class TestCollectionNullInvalid(TestcaseBase):
|
|||||||
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
|
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
|
@pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes)
|
||||||
def test_create_collection_set_nullable_on_vector_field(self, vector_type):
|
def test_create_collection_set_nullable_on_vector_field(self, vector_type):
|
||||||
"""
|
"""
|
||||||
target: test create collection with set nullable=True on vector field
|
target: test create collection with set nullable=True on vector field
|
||||||
@ -4623,7 +4623,7 @@ class TestCollectionDefaultValueInvalid(TestcaseBase):
|
|||||||
******************************************************************
|
******************************************************************
|
||||||
"""
|
"""
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
|
@pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes[:1])
|
||||||
def test_create_collection_default_value_on_pk_field(self, vector_type):
|
def test_create_collection_default_value_on_pk_field(self, vector_type):
|
||||||
"""
|
"""
|
||||||
target: test create collection with set default value on pk field
|
target: test create collection with set default value on pk field
|
||||||
@ -4641,7 +4641,7 @@ class TestCollectionDefaultValueInvalid(TestcaseBase):
|
|||||||
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
|
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.parametrize("vector_type", ct.all_vector_data_types)
|
@pytest.mark.parametrize("vector_type", ct.all_float_vector_dtypes)
|
||||||
def test_create_collection_default_value_on_vector_field(self, vector_type):
|
def test_create_collection_default_value_on_vector_field(self, vector_type):
|
||||||
"""
|
"""
|
||||||
target: test create collection with set default value on vector field
|
target: test create collection with set default value on vector field
|
||||||
|
|||||||
@ -147,13 +147,13 @@ class TestFieldPartialLoad(TestcaseBase):
|
|||||||
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
||||||
load_string_field = cf.gen_string_field(name="string_load")
|
load_string_field = cf.gen_string_field(name="string_load")
|
||||||
vector_field = cf.gen_float_vec_field(name="vec_float32", dim=dim)
|
vector_field = cf.gen_float_vec_field(name="vec_float32", dim=dim)
|
||||||
sparse_vector_field = cf.gen_float_vec_field(name="sparse", vector_data_type="SPARSE_FLOAT_VECTOR")
|
sparse_vector_field = cf.gen_float_vec_field(name="sparse", vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
|
||||||
schema = cf.gen_collection_schema(fields=[pk_field, load_string_field, vector_field, sparse_vector_field],
|
schema = cf.gen_collection_schema(fields=[pk_field, load_string_field, vector_field, sparse_vector_field],
|
||||||
auto_id=True)
|
auto_id=True)
|
||||||
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
collection_w = self.init_collection_wrap(name=name, schema=schema)
|
||||||
string_values = [str(i) for i in range(nb)]
|
string_values = [str(i) for i in range(nb)]
|
||||||
float_vec_values = cf.gen_vectors(nb, dim)
|
float_vec_values = cf.gen_vectors(nb, dim)
|
||||||
sparse_vec_values = cf.gen_vectors(nb, dim, vector_data_type="SPARSE_FLOAT_VECTOR")
|
sparse_vec_values = cf.gen_vectors(nb, dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
|
||||||
collection_w.insert([string_values, float_vec_values, sparse_vec_values])
|
collection_w.insert([string_values, float_vec_values, sparse_vec_values])
|
||||||
|
|
||||||
# build index on one of vector fields
|
# build index on one of vector fields
|
||||||
|
|||||||
@ -3273,9 +3273,9 @@ class TestSearchWithFullTextSearchNegative(TestcaseBase):
|
|||||||
nq = 2
|
nq = 2
|
||||||
limit = 100
|
limit = 100
|
||||||
if invalid_search_data == "sparse_vector":
|
if invalid_search_data == "sparse_vector":
|
||||||
search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type="SPARSE_FLOAT_VECTOR")
|
search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)
|
||||||
else:
|
else:
|
||||||
search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type="FLOAT_VECTOR")
|
search_data = cf.gen_vectors(nb=nq, dim=1000, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
log.info(f"search data: {search_data}")
|
log.info(f"search data: {search_data}")
|
||||||
error = {ct.err_code: 65535,
|
error = {ct.err_code: 65535,
|
||||||
ct.err_msg: "please provide varchar/text for BM25 Function based search"}
|
ct.err_msg: "please provide varchar/text for BM25 Function based search"}
|
||||||
@ -3377,7 +3377,7 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
|
|||||||
"paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
|
"paragraph": fake.paragraph().lower() if random.random() >= empty_percent else "",
|
||||||
"text": fake.text().lower() if random.random() >= empty_percent else "",
|
"text": fake.text().lower() if random.random() >= empty_percent else "",
|
||||||
"dense_emb": [random.random() for _ in range(dim)],
|
"dense_emb": [random.random() for _ in range(dim)],
|
||||||
"neural_sparse_emb": cf.gen_vectors(nb=1, dim=1000, vector_data_type="SPARSE_FLOAT_VECTOR")[0],
|
"neural_sparse_emb": cf.gen_vectors(nb=1, dim=1000, vector_data_type=DataType.SPARSE_FLOAT_VECTOR)[0],
|
||||||
}
|
}
|
||||||
for i in range(data_size)
|
for i in range(data_size)
|
||||||
]
|
]
|
||||||
@ -3428,7 +3428,7 @@ class TestHybridSearchWithFullTextSearch(TestcaseBase):
|
|||||||
limit=limit,
|
limit=limit,
|
||||||
)
|
)
|
||||||
sparse_search = AnnSearchRequest(
|
sparse_search = AnnSearchRequest(
|
||||||
data=cf.gen_vectors(nb=nq, dim=dim, vector_data_type="SPARSE_FLOAT_VECTOR"),
|
data=cf.gen_vectors(nb=nq, dim=dim, vector_data_type=DataType.SPARSE_FLOAT_VECTOR),
|
||||||
anns_field="neural_sparse_emb",
|
anns_field="neural_sparse_emb",
|
||||||
param={},
|
param={},
|
||||||
limit=limit,
|
limit=limit,
|
||||||
|
|||||||
@ -1124,7 +1124,7 @@ class TestIndexInvalid(TestcaseBase):
|
|||||||
def scalar_index(self, request):
|
def scalar_index(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
|
||||||
def vector_data_type(self, request):
|
def vector_data_type(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@ -2171,7 +2171,7 @@ class TestInvertedIndexValid(TestcaseBase):
|
|||||||
def scalar_index(self, request):
|
def scalar_index(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.fixture(scope="function", params=ct.all_dense_vector_types)
|
||||||
def vector_data_type(self, request):
|
def vector_data_type(self, request):
|
||||||
yield request.param
|
yield request.param
|
||||||
|
|
||||||
|
|||||||
@ -1429,7 +1429,7 @@ class TestInsertInvalid(TestcaseBase):
|
|||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
@pytest.mark.parametrize("index ", ct.all_index_types[9:11])
|
@pytest.mark.parametrize("index ", ct.all_index_types[9:11])
|
||||||
@pytest.mark.parametrize("invalid_vector_type ", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.mark.parametrize("invalid_vector_type ", ct.all_dense_vector_types)
|
||||||
def test_invalid_sparse_vector_data(self, index, invalid_vector_type):
|
def test_invalid_sparse_vector_data(self, index, invalid_vector_type):
|
||||||
"""
|
"""
|
||||||
target: insert illegal data type
|
target: insert illegal data type
|
||||||
|
|||||||
@ -591,10 +591,10 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||||||
expected:
|
expected:
|
||||||
1. search output fields with Hybrid index
|
1. search output fields with Hybrid index
|
||||||
"""
|
"""
|
||||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1
|
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR, 3, 1
|
||||||
|
|
||||||
self.collection_wrap.search(
|
self.collection_wrap.search(
|
||||||
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit,
|
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field.name, search_params, limit,
|
||||||
output_fields=['*'], check_task=CheckTasks.check_search_results,
|
output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||||
"limit": limit, "output_fields": self.all_fields})
|
"limit": limit, "output_fields": self.all_fields})
|
||||||
@ -1247,8 +1247,8 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||||||
@pytest.mark.parametrize("group_by_field", ['INT8', 'INT16', 'INT32', 'INT64', 'BOOL', 'VARCHAR'])
|
@pytest.mark.parametrize("group_by_field", ['INT8', 'INT16', 'INT32', 'INT64', 'BOOL', 'VARCHAR'])
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"dim, search_params, vector_field",
|
"dim, search_params, vector_field",
|
||||||
[(3, {"metric_type": MetricType.L2, "ef": 32}, DataType.FLOAT16_VECTOR.name),
|
[(3, {"metric_type": MetricType.L2, "ef": 32}, DataType.FLOAT16_VECTOR),
|
||||||
(1000, {"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, DataType.SPARSE_FLOAT_VECTOR.name)])
|
(1000, {"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, DataType.SPARSE_FLOAT_VECTOR)])
|
||||||
def test_bitmap_index_search_group_by(self, limit, group_by_field, dim, search_params, vector_field):
|
def test_bitmap_index_search_group_by(self, limit, group_by_field, dim, search_params, vector_field):
|
||||||
"""
|
"""
|
||||||
target:
|
target:
|
||||||
@ -1259,7 +1259,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||||||
expected:
|
expected:
|
||||||
1. search group by with BITMAP index
|
1. search group by with BITMAP index
|
||||||
"""
|
"""
|
||||||
res, _ = self.collection_wrap.search(cf.gen_vectors(nb=1, dim=dim, vector_data_type=vector_field), vector_field,
|
res, _ = self.collection_wrap.search(cf.gen_vectors(nb=1, dim=dim, vector_data_type=vector_field), vector_field.name,
|
||||||
search_params, limit, group_by_field=group_by_field,
|
search_params, limit, group_by_field=group_by_field,
|
||||||
output_fields=[group_by_field])
|
output_fields=[group_by_field])
|
||||||
output_values = [i.fields for r in res for i in r]
|
output_values = [i.fields for r in res for i in r]
|
||||||
@ -1285,9 +1285,9 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||||||
1. search iterator with BITMAP index
|
1. search iterator with BITMAP index
|
||||||
"""
|
"""
|
||||||
ef = 32 if batch_size <= 32 else batch_size # ef must be larger than or equal to batch size
|
ef = 32 if batch_size <= 32 else batch_size # ef must be larger than or equal to batch size
|
||||||
search_params, vector_field = {"metric_type": "L2", "ef": ef}, DataType.FLOAT16_VECTOR.name
|
search_params, vector_field = {"metric_type": "L2", "ef": ef}, DataType.FLOAT16_VECTOR
|
||||||
self.collection_wrap.search_iterator(
|
self.collection_wrap.search_iterator(
|
||||||
cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size,
|
cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field.name, search_params, batch_size,
|
||||||
expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})
|
expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@ -1301,10 +1301,10 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||||||
expected:
|
expected:
|
||||||
1. search output fields with BITMAP index
|
1. search output fields with BITMAP index
|
||||||
"""
|
"""
|
||||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1
|
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR, 3, 1
|
||||||
|
|
||||||
self.collection_wrap.search(
|
self.collection_wrap.search(
|
||||||
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit,
|
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field.name, search_params, limit,
|
||||||
output_fields=['*'], check_task=CheckTasks.check_search_results,
|
output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||||
"limit": limit, "output_fields": self.all_fields})
|
"limit": limit, "output_fields": self.all_fields})
|
||||||
@ -1667,11 +1667,11 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
|||||||
expected:
|
expected:
|
||||||
1. search output fields with BITMAP index
|
1. search output fields with BITMAP index
|
||||||
"""
|
"""
|
||||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1
|
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR, 3, 1
|
||||||
|
|
||||||
self.collection_wrap.search(
|
self.collection_wrap.search(
|
||||||
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
|
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
|
||||||
vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
|
vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||||
"limit": limit, "output_fields": self.all_fields})
|
"limit": limit, "output_fields": self.all_fields})
|
||||||
|
|
||||||
@ -1922,11 +1922,11 @@ class TestBitmapIndexMmap(TestCaseClassBase):
|
|||||||
expected:
|
expected:
|
||||||
1. search output fields with BITMAP index
|
1. search output fields with BITMAP index
|
||||||
"""
|
"""
|
||||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1
|
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR, 3, 1
|
||||||
|
|
||||||
self.collection_wrap.search(
|
self.collection_wrap.search(
|
||||||
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
|
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
|
||||||
vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
|
vector_field.name, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||||
"limit": limit, "output_fields": self.all_fields})
|
"limit": limit, "output_fields": self.all_fields})
|
||||||
|
|
||||||
@ -2345,7 +2345,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
|
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
|
||||||
data = [string_values]
|
data = [string_values]
|
||||||
for i in range(len(self.vector_fields)):
|
for i in range(len(self.vector_fields)):
|
||||||
data.append(cf.gen_vectors(dim=self.dims[i], nb=nb, vector_data_type=self.vector_fields[i]))
|
data.append(cf.gen_vectors(dim=self.dims[i], nb=nb, vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])))
|
||||||
data.append(pd.Series(data=[np.int8(i) for i in range(nb)], dtype="int8"))
|
data.append(pd.Series(data=[np.int8(i) for i in range(nb)], dtype="int8"))
|
||||||
data.append(pd.Series(data=[np.int64(i) for i in range(nb)], dtype="int64"))
|
data.append(pd.Series(data=[np.int64(i) for i in range(nb)], dtype="int64"))
|
||||||
data.append(pd.Series(data=[np.bool_(i) for i in range(nb)], dtype="bool"))
|
data.append(pd.Series(data=[np.bool_(i) for i in range(nb)], dtype="bool"))
|
||||||
@ -2384,7 +2384,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
limit = 50
|
limit = 50
|
||||||
group_size = 5
|
group_size = 5
|
||||||
for j in range(len(self.vector_fields)):
|
for j in range(len(self.vector_fields)):
|
||||||
search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j])
|
search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j]))
|
||||||
search_params = {"params": cf.get_search_params_params(self.index_types[j])}
|
search_params = {"params": cf.get_search_params_params(self.index_types[j])}
|
||||||
# when strict_group_size=true, it shall return results with entities = limit * group_size
|
# when strict_group_size=true, it shall return results with entities = limit * group_size
|
||||||
res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j],
|
res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j],
|
||||||
@ -2424,7 +2424,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
req_list = []
|
req_list = []
|
||||||
for j in range(len(self.vector_fields)):
|
for j in range(len(self.vector_fields)):
|
||||||
search_params = {
|
search_params = {
|
||||||
"data": cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j]),
|
"data": cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j])),
|
||||||
"anns_field": self.vector_fields[j],
|
"anns_field": self.vector_fields[j],
|
||||||
"param": {"params": cf.get_search_params_params(self.index_types[j])},
|
"param": {"params": cf.get_search_params_params(self.index_types[j])},
|
||||||
"limit": limit,
|
"limit": limit,
|
||||||
@ -2473,7 +2473,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
req_list = []
|
req_list = []
|
||||||
for i in range(len(self.vector_fields)):
|
for i in range(len(self.vector_fields)):
|
||||||
search_param = {
|
search_param = {
|
||||||
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=self.vector_fields[i]),
|
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])),
|
||||||
"anns_field": self.vector_fields[i],
|
"anns_field": self.vector_fields[i],
|
||||||
"param": {},
|
"param": {},
|
||||||
"limit": ct.default_limit,
|
"limit": ct.default_limit,
|
||||||
@ -2497,7 +2497,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
req_list = []
|
req_list = []
|
||||||
for i in range(1, len(self.vector_fields)):
|
for i in range(1, len(self.vector_fields)):
|
||||||
search_param = {
|
search_param = {
|
||||||
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=self.vector_fields[i]),
|
"data": cf.gen_vectors(ct.default_nq, dim=self.dims[i], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[i])),
|
||||||
"anns_field": self.vector_fields[i],
|
"anns_field": self.vector_fields[i],
|
||||||
"param": {},
|
"param": {},
|
||||||
"limit": ct.default_limit,
|
"limit": ct.default_limit,
|
||||||
@ -2519,7 +2519,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
nq = 2
|
nq = 2
|
||||||
limit = 15
|
limit = 15
|
||||||
for j in range(len(self.vector_fields)):
|
for j in range(len(self.vector_fields)):
|
||||||
search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=self.vector_fields[j])
|
search_vectors = cf.gen_vectors(nq, dim=self.dims[j], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[j]))
|
||||||
search_params = {"params": cf.get_search_params_params(self.index_types[j])}
|
search_params = {"params": cf.get_search_params_params(self.index_types[j])}
|
||||||
res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j],
|
res1 = self.collection_wrap.search(data=search_vectors, anns_field=self.vector_fields[j],
|
||||||
param=search_params, limit=limit,
|
param=search_params, limit=limit,
|
||||||
@ -2561,7 +2561,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
default_search_exp = f"{self.primary_field} >= 0"
|
default_search_exp = f"{self.primary_field} >= 0"
|
||||||
grpby_field = self.inverted_string_field
|
grpby_field = self.inverted_string_field
|
||||||
default_search_field = self.vector_fields[1]
|
default_search_field = self.vector_fields[1]
|
||||||
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1])
|
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1]))
|
||||||
all_pages_ids = []
|
all_pages_ids = []
|
||||||
all_pages_grpby_field_values = []
|
all_pages_grpby_field_values = []
|
||||||
for r in range(page_rounds):
|
for r in range(page_rounds):
|
||||||
@ -2603,7 +2603,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
default_search_exp = f"{self.primary_field} >= 0"
|
default_search_exp = f"{self.primary_field} >= 0"
|
||||||
grpby_field = self.inverted_string_field
|
grpby_field = self.inverted_string_field
|
||||||
default_search_field = self.vector_fields[1]
|
default_search_field = self.vector_fields[1]
|
||||||
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1])
|
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1]))
|
||||||
all_pages_ids = []
|
all_pages_ids = []
|
||||||
all_pages_grpby_field_values = []
|
all_pages_grpby_field_values = []
|
||||||
res_count = limit * group_size
|
res_count = limit * group_size
|
||||||
@ -2655,7 +2655,7 @@ class TestGroupSearch(TestCaseClassBase):
|
|||||||
"""
|
"""
|
||||||
group_by_field = self.inverted_string_field
|
group_by_field = self.inverted_string_field
|
||||||
default_search_field = self.vector_fields[1]
|
default_search_field = self.vector_fields[1]
|
||||||
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=self.vector_fields[1])
|
search_vectors = cf.gen_vectors(1, dim=self.dims[1], vector_data_type=cf.get_field_dtype_by_field_name(self.collection_wrap, self.vector_fields[1]))
|
||||||
search_params = {}
|
search_params = {}
|
||||||
limit = 10
|
limit = 10
|
||||||
max_group_size = 10
|
max_group_size = 10
|
||||||
|
|||||||
@ -2351,7 +2351,7 @@ class TestQueryOperation(TestcaseBase):
|
|||||||
expected: return the latest entity; verify the result is same as dedup entities
|
expected: return the latest entity; verify the result is same as dedup entities
|
||||||
"""
|
"""
|
||||||
collection_w = self.init_collection_general(prefix, dim=16, is_flush=False, insert_data=False, is_index=False,
|
collection_w = self.init_collection_general(prefix, dim=16, is_flush=False, insert_data=False, is_index=False,
|
||||||
vector_data_type=ct.float_type, with_json=False)[0]
|
vector_data_type=DataType.FLOAT_VECTOR, with_json=False)[0]
|
||||||
nb = 50
|
nb = 50
|
||||||
rounds = 10
|
rounds = 10
|
||||||
for i in range(rounds):
|
for i in range(rounds):
|
||||||
@ -2465,7 +2465,7 @@ class TestQueryOperation(TestcaseBase):
|
|||||||
assert res[0].keys() == set(fields)
|
assert res[0].keys() == set(fields)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types)
|
||||||
def test_query_output_all_vector_type(self, vector_data_type):
|
def test_query_output_all_vector_type(self, vector_data_type):
|
||||||
"""
|
"""
|
||||||
target: test query output different vector type
|
target: test query output different vector type
|
||||||
|
|||||||
@ -127,7 +127,7 @@ class TestQueryIterator(TestcaseBase):
|
|||||||
"batch_size": batch_size})
|
"batch_size": batch_size})
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
@pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
@pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types)
|
||||||
def test_query_iterator_output_different_vector_type(self, vector_data_type):
|
def test_query_iterator_output_different_vector_type(self, vector_data_type):
|
||||||
"""
|
"""
|
||||||
target: test query iterator with output fields
|
target: test query iterator with output fields
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user