test: add iterator check and update sdk version and cases[2.5] (#40132)

pr: #39798 #39960

Signed-off-by: nico <cheng.yuan@zilliz.com>
This commit is contained in:
nico 2025-02-24 19:13:56 +08:00 committed by GitHub
parent 29579a8ec9
commit 0c6518f344
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 84 additions and 95 deletions

View File

@ -154,7 +154,7 @@ class TestMilvusClientV2Base(Base):
return res, check_result
@trace()
def search_iterator(self, client, collection_name, data, batch_size, limit=-1, filter=None, output_fields=None,
def search_iterator(self, client, collection_name, data, batch_size=1000, limit=-1, filter=None, output_fields=None,
search_params=None, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})

View File

@ -193,7 +193,7 @@ class ApiCollectionWrapper:
return res, check_result
@trace()
def search_iterator(self, data, anns_field, param, batch_size, limit=-1, expr=None,
def search_iterator(self, data, anns_field, param, batch_size=1000, limit=-1, expr=None,
partition_names=None, output_fields=None, timeout=None, round_decimal=-1,
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout

View File

@ -409,7 +409,7 @@ class ResponseChecker:
log.info("search_results_check: Checked the distances for one nq: OK")
else:
pass # just check nq and topk, not specific ids need check
vector_id += 1
vector_id += 1
log.info("search_results_check: limit (topK) and "
"ids searched for %d queries are correct" % len(search_res))
@ -432,7 +432,7 @@ class ResponseChecker:
while True:
try:
res = search_iterator.next()
if len(res) == 0:
if res is None or len(res) == 0:
log.info("search iteration finished, close")
search_iterator.close()
break
@ -454,6 +454,10 @@ class ResponseChecker:
except Exception as e:
assert check_items["err_msg"] in str(e)
return False
if check_items.get("limit"):
if "range_filter" not in check_items and "radius" not in check_items:
assert len(pk_list) / check_items["limit"] >= 0.9
assert len(pk_list) == len(set(pk_list))
log.info("check: total %d results" % len(pk_list))

View File

@ -138,7 +138,7 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base):
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
error = {ct.err_code: 65535, ct.err_msg: f"type param(max_length) should be specified for varChar "
f"field of collection {collection_name}"}
f"field(id) of collection {collection_name}"}
self.create_collection(client, collection_name, default_dim, id_type="string", auto_id=True,
check_task=CheckTasks.err_res, check_items=error)

View File

@ -66,21 +66,6 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base):
self.create_collection(client, collection_name, default_dim, id_type="invalid",
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_collection_string_auto_id(self):
"""
target: test high level api: client.create_collection
method: create collection with auto id on string primary key
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
error = {ct.err_code: 65535, ct.err_msg: f"type param(max_length) should be specified for varChar "
f"field of collection {collection_name}"}
self.create_collection(client, collection_name, default_dim, id_type="string", auto_id=True,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_same_collection_different_params(self):
"""

View File

@ -7,15 +7,12 @@ from pymilvus import DataType
from base.client_v2_base import TestMilvusClientV2Base
prefix = "milvus_client_api_search_iterator"
epsilon = ct.epsilon
user_pre = "user"
role_pre = "role"
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_batch_size = ct.default_batch_size
default_metric_type = "COSINE"
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
@ -177,14 +174,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.flush(client, collection_name)
# 3. search iterator
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size, search_params=search_params,
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
check_task=CheckTasks.check_search_iterator, check_items=check_items)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@ -210,7 +207,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12,
max_length=64, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(default_vector_field_name, metric_type=default_metric_type)
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
@ -221,15 +218,15 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.flush(client, collection_name)
# 3. search iterator
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size, filter="nullable_field>=10",
search_params=search_params,
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
search_params=search_params, check_task=CheckTasks.check_search_iterator,
check_items=check_items)
if self.has_collection(client, collection_name)[0]:
self.drop_collection(client, collection_name)
@ -264,14 +261,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
# assert self.num_entities(client, collection_name)[0] == default_nb
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, new_name, vectors_to_search, batch_size, search_params=search_params,
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
check_task=CheckTasks.check_search_iterator, check_items=check_items)
self.release_collection(client, new_name)
self.drop_collection(client, new_name)
@ -301,14 +298,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.insert(client, collection_name, rows)
# 3. search iterator
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size, search_params=search_params,
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
check_task=CheckTasks.check_search_iterator, check_items=check_items)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_search_iterator_string(self, search_params):
@ -331,12 +328,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.flush(client, collection_name)
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size, search_params=search_params,
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"limit": default_limit})
check_task=CheckTasks.check_search_iterator, check_items=check_items)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@ -362,14 +361,18 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.insert(client, collection_name, rows)
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
limit = default_limit if default_limit < default_batch_size else default_batch_size
check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size=default_batch_size,
limit=default_limit, search_params=search_params,
output_fields=[default_primary_key_field_name],
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"limit": default_limit})
check_items=check_items)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@ -395,15 +398,19 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.insert(client, collection_name, rows)
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
limit = default_limit if default_limit < default_batch_size else default_batch_size
check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
search_params.update({"metric_type": metric_type})
self.search_iterator(client, collection_name, vectors_to_search, batch_size=default_batch_size,
limit=default_limit, search_params=search_params,
output_fields=[default_primary_key_field_name],
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"limit": default_limit})
check_items=check_items)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@ -433,14 +440,16 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
if insert_id in insert_ids:
insert_ids.remove(insert_id)
limit = default_nb - delete_num
check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size=default_batch_size,
search_params=search_params, limit=default_nb,
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": limit})
check_items=check_items)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@ -470,18 +479,21 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
if insert_id in insert_ids:
insert_ids.remove(insert_id)
limit = default_nb - delete_num
check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size=default_batch_size,
search_params=search_params, limit=default_nb,
check_task=CheckTasks.check_search_iterator,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": limit})
check_items=check_items)
# 5. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows[delete_num:],
"with_vec": True,
"primary_field": default_primary_key_field_name})
self.drop_collection(client, collection_name)
self.drop_collection(client, collection_name)

View File

@ -28,8 +28,8 @@ pytest-parallel
pytest-random-order
# pymilvus
pymilvus==2.5.5rc8
pymilvus[bulk_writer]==2.5.5rc8
pymilvus==2.5.5rc13
pymilvus[bulk_writer]==2.5.5rc13
# for customize config test
python-benedict==0.24.3

View File

@ -720,7 +720,7 @@ class TestCollectionParams(TestcaseBase):
c_name = cf.gen_unique_str(prefix)
float_vec_field = cf.gen_float_vec_field(dim=dim)
schema = cf.gen_collection_schema(fields=[cf.gen_int64_field(is_primary=True), float_vec_field])
error = {ct.err_code: 65535, ct.err_msg: "invalid dimension: {}.".format(dim)}
error = {ct.err_code: 65535, ct.err_msg: "invalid dimension: {}".format(dim)}
self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@ -4096,8 +4096,8 @@ class TestCollectionARRAY(TestcaseBase):
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "type param(max_length) should be specified for "
"varChar field of collection"})
ct.err_msg: "type param(max_length) should be specified for varChar "
"field(int_array) of collection"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("https://github.com/milvus-io/pymilvus/issues/2041")

View File

@ -65,21 +65,6 @@ class TestHighLevelApi(TestMilvusClientV2Base):
self.create_collection(client, collection_name, default_dim, consistency_level="Strong",
id_type="invalid", check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_high_level_collection_string_auto_id(self):
"""
target: test high level api: client.create_collection
method: create collection with auto id on string primary key
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
error = {ct.err_code: 65535, ct.err_msg: f"type param(max_length) should be specified for varChar "
f"field of collection {collection_name}"}
self.create_collection(client, collection_name, default_dim, id_type="string", auto_id=True,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_high_level_create_same_collection_different_params(self):
"""

View File

@ -2198,7 +2198,8 @@ class TestScaNNIndex(TestcaseBase):
"""
collection_w = self.init_collection_general(prefix, is_index=False)[0]
index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": nlist}}
error = {ct.err_code: 999, ct.err_msg: f"Out of range in json: param 'nlist' ({nlist}) should be in range [1, 65536]"}
error = {ct.err_code: 999,
ct.err_msg: f"Out of range in json: param 'nlist' ({nlist}) should be in range [1, 65536]"}
collection_w.create_index(default_field_name, index_params,
check_task=CheckTasks.err_res, check_items=error)
@ -2213,7 +2214,8 @@ class TestScaNNIndex(TestcaseBase):
collection_w = self.init_collection_general(prefix, is_index=False, dim=dim)[0]
index_params = {"index_type": "SCANN", "metric_type": "L2", "params": {"nlist": 1024}}
error = {ct.err_code: 1100,
ct.err_msg: f"The dimension of a vector (dim) should be a multiple of 2. Dimension:{dim}"}
ct.err_msg: f"The dimension of a vector (dim) should be a multiple of sub_dim. "
f"Dimension:{dim}, sub_dim:2"}
collection_w.create_index(default_field_name, index_params,
check_task=CheckTasks.err_res, check_items=error)

View File

@ -1146,7 +1146,8 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
expected:
1. search iterator with BITMAP index
"""
search_params, vector_field = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name
ef = 32 if batch_size <= 32 else batch_size # ef must be larger than or equal to batch size
search_params, vector_field = {"metric_type": "L2", "ef": ef}, DataType.FLOAT16_VECTOR.name
self.collection_wrap.search_iterator(
cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size,
expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})

View File

@ -1770,7 +1770,7 @@ class TestQueryParams(TestcaseBase):
collection_w.insert(data)
# 3. query with param ignore_growing invalid
error = {ct.err_code: 999, ct.err_msg: "parse search growing failed"}
error = {ct.err_code: 999, ct.err_msg: "parse ignore growing field failed"}
collection_w.query('int64 >= 0', ignore_growing=ignore_growing,
check_task=CheckTasks.err_res, check_items=error)

View File

@ -1043,7 +1043,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
default_search_exp,
check_task=CheckTasks.err_res,
check_items={"err_code": 999,
"err_msg": "parse search growing failed"})
"err_msg": "parse ignore growing field failed"})
@pytest.mark.tags(CaseLabel.L2)
def test_search_param_invalid_guarantee_timestamp(self, get_invalid_guarantee_timestamp):
@ -4894,7 +4894,7 @@ class TestCollectionSearch(TestcaseBase):
binary_schema = cf.gen_default_binary_collection_schema(dim=dim)
self.init_collection_wrap(c_name, schema=binary_schema,
check_task=CheckTasks.err_res,
check_items={"err_code": 999, "err_msg": f"invalid dimension: {dim}."})
check_items={"err_code": 999, "err_msg": f"invalid dimension: {dim}"})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="issue #37547")