test: update ttl test comments and update for expressions tests (#42611)

related issue: #42604

1. update the test expression for all to L3 for now as it takes too many
hours to complete running. Will improve the performance in next pr.

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
yanliang567 2025-06-11 16:52:38 +08:00 committed by GitHub
parent 9af6c16ea0
commit fb7f19dfa1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 133 additions and 79 deletions

View File

@ -2519,6 +2519,7 @@ def gen_json_field_expressions_all_single_operator():
return expressions
def gen_field_expressions_all_single_operator_each_field(field = ct.default_int64_field_name):
"""
Gen a list of filter in expression-format(as a string)
@ -2678,6 +2679,7 @@ def gen_field_expressions_all_single_operator_each_field(field = ct.default_int6
return expressions
def concatenate_uneven_arrays(arr1, arr2):
"""
concatenate the element in two arrays with different length

View File

@ -75,15 +75,14 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
@pytest.mark.parametrize("is_flush", [True])
@pytest.mark.parametrize("is_release", [True])
@pytest.mark.parametrize("single_data_num", [50])
@pytest.mark.parametrize("expr_field", [ct.default_int64_field_name])
@pytest.mark.parametrize("expr_field", [ct.default_int64_field_name,
# ct.default_string_field_name, # TODO: uncommented after #42604 fixed
ct.default_float_array_field_name])
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self,
enable_dynamic_field,
supported_bool_scalar_index,
supported_numeric_float_double_index,
supported_numeric_scalar_index,
supported_varchar_scalar_index,
# supported_varchar_scalar_index,
supported_json_path_index,
supported_array_scalar_index,
supported_array_double_float_scalar_index,
is_flush,
is_release,
@ -169,7 +168,7 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
} for j in range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
log.info(rows)
# log.info(rows)
self.insert(client, collection_name=collection_name, data=rows)
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
# 3. flush if specified
@ -181,13 +180,13 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
res = \
self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
log.info(f"query with filter '{express_list[i]}' before scalar index")
res = self.query(client, collection_name=collection_name,
filter=express_list[i], output_fields=["count(*)"])[0]
count = res[0]['count(*)']
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{expr_field}"])[0]
# log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
res = self.query(client, collection_name=collection_name,
filter=express_list[i], output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
@ -203,24 +202,24 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
# 6. prepare index params with json path index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
# index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
# index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
# index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
# index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
# index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
# index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
# index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
# index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
# index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
# index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
# index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
# index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_float_array_field_name,
index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_double_array_field_name,
index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
# index_params.add_index(field_name=ct.default_double_array_field_name,
# index_type=supported_array_double_float_scalar_index)
# index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
json_index_name = "json_index_name"
json_path_list = [f"{ct.default_json_field_name}",
f"{ct.default_json_field_name}[0]",
@ -251,57 +250,62 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
time.sleep(60)
# 11. query after there is index under all expressions which should get the same result
# with that without index
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' after index is:")
log.info(f"query with filter '{express_list[i]}' after index")
count = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
# log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
log.debug(
f"the field {expr_field} value after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
log.debug(json_list)
log.debug(
f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["json_list"])
# if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
# log.debug(
# f"the field {expr_field} value after indexed under expression '{express_list[i]}' is:")
# log.debug(json_list)
# log.debug(
# f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
# log.debug(compare_dict[f'{i}']["json_list"])
assert json_list == compare_dict[f'{i}']["json_list"]
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
log.debug(
f"primary key field {default_primary_key_field_name} after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
log.debug(id_list)
log.debug(
f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["id_list"])
# if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
# log.debug(
# f"primary key field {default_primary_key_field_name} after indexed under expression '{express_list[i]}' is:")
# log.debug(id_list)
# log.debug(
# f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
# log.debug(compare_dict[f'{i}']["id_list"])
assert id_list == compare_dict[f'{i}']["id_list"]
log.info(f"PASS with expression {express_list[i]}")
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("enable_dynamic_field", [False])
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
@pytest.mark.parametrize("is_release", [True])
@pytest.mark.parametrize("single_data_num", [50])
@pytest.mark.parametrize("expr_field", [ct.default_int8_field_name, ct.default_int16_field_name,
ct.default_int32_field_name, ct.default_int64_field_name,
ct.default_float_field_name, ct.default_double_field_name,
ct.default_string_field_name, ct.default_bool_field_name,
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
ct.default_int32_array_field_name, ct.default_int64_array_field_name,
ct.default_bool_array_field_name, ct.default_float_array_field_name,
ct.default_double_array_field_name, ct.default_string_array_field_name])
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, enable_dynamic_field, supported_bool_scalar_index,
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, enable_dynamic_field,
supported_bool_scalar_index,
supported_numeric_float_double_index,
supported_numeric_scalar_index, supported_varchar_scalar_index,
supported_json_path_index, supported_array_scalar_index,
supported_numeric_scalar_index,
supported_varchar_scalar_index,
supported_json_path_index,
supported_array_scalar_index,
supported_array_double_float_scalar_index,
is_flush, is_release, single_data_num, expr_field):
is_flush,
is_release,
single_data_num,
expr_field):
"""
target: test query using expression fields with all supported field type after all supported scalar index
with all supported basic expressions
@ -382,7 +386,7 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
} for j in range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
log.info(rows)
# log.info(rows)
self.insert(client, collection_name=collection_name, data=rows)
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
# 3. flush if specified
@ -394,10 +398,10 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
log.info(f"query with filter '{express_list[i]}' before scalar index")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
count = res[0]['count(*)']
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
# log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
@ -464,10 +468,10 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' after index is:")
log.info(f"query with filter '{express_list[i]}' after index")
count = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
# log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{expr_field}"])[0]
for single in res:

View File

@ -3544,6 +3544,23 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base):
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=[DataType.INT8, DataType.INT16, DataType.INT32,
DataType.FLOAT, DataType.DOUBLE])
def rerank_fields(self, request):
tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True)
if CaseLabel.L2 not in tags:
if request.param not in [DataType.INT8, DataType.FLOAT]:
pytest.skip(f"skip rerank field type {request.param}")
yield request.param
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED", "AUTOINDEX", ""])
def scalar_index(self, request):
tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True)
if CaseLabel.L2 not in tags:
if request.param not in ["INVERTED", ""]:
pytest.skip(f"skip scalar index type {request.param}")
yield request.param
"""
******************************************************************
@ -3807,8 +3824,6 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base):
)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("rerank_fields", [DataType.INT8, DataType.INT16, DataType.INT32,
DataType.FLOAT, DataType.DOUBLE])
def test_milvus_client_search_with_reranker_all_supported_datatype_field(self, rerank_fields):
"""
target: test search with reranker with partition key field
@ -3887,13 +3902,26 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base):
)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 42011")
@pytest.mark.parametrize("rerank_fields", [DataType.INT8, DataType.INT16, DataType.INT32,
DataType.FLOAT, DataType.DOUBLE])
@pytest.mark.parametrize("index", ["STL_SORT", "INVERTED", "AUTOINDEX", ""])
@pytest.mark.parametrize("mmap", [True, False])
def test_milvus_client_search_with_reranker_scalar_index(self, rerank_fields, index, mmap):
def test_milvus_client_search_with_reranker_scalar_index(self, rerank_fields, scalar_index, mmap):
"""
Test search functionality with reranker using scalar index in Milvus client.
This test verifies the search operation works correctly when using a reranker with different scalar index types.
It covers various scenarios including:
- Different data types for rerank fields (INT8, INT16, INT32, FLOAT, DOUBLE)
- Different index types (STL_SORT, INVERTED, AUTOINDEX, "")
- Memory-mapped and non-memory-mapped configurations
The test performs the following steps:
1. Creates a collection with specified schema and index parameters
2. Inserts test data with appropriate data types
3. Builds indexes on both vector and scalar fields
4. Executes search operations with reranking function
5. Validates search results with different filter conditions
6. Cleans up by releasing collection and dropping indexes
Note: This is an L1 (basic functionality) test case.
target: test search with reranker with scalar index
method: create connection, collection, insert and search
expected: search successfully
@ -3908,7 +3936,7 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base):
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(ct.default_reranker_field_name, rerank_fields, mmap_enabled=mmap)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(default_vector_field_name, index_type='HNSW', metric_type="COSINE")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
@ -3930,15 +3958,18 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base):
ct.default_reranker_field_name: value}
rows.append(single_row)
self.insert(client, collection_name, rows)
# flush
self.flush(client, collection_name)
# 2. prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=ct.default_reranker_field_name, index_type=index, params={})
index_params.add_index(field_name=ct.default_reranker_field_name, index_type=scalar_index, params={})
# 3. create index
self.create_index(client, collection_name, index_params)
# 3. compact
# 4. compact
self.compact(client, collection_name)
# 4. flush
self.flush(client, collection_name)
self.wait_for_index_ready(client, collection_name, index_name=ct.default_reranker_field_name)
self.wait_for_index_ready(client, collection_name, index_name=default_vector_field_name)
# 5. search
my_rerank_fn = Function(
name="my_reranker",
@ -3980,15 +4011,17 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base):
self.drop_index(client, collection_name, ct.default_reranker_field_name)
self.drop_index(client, collection_name, default_vector_field_name)
# 6. create index
params = {"metric_type": "L2"}
if index != "STL_SORT":
params = {"metric_type": "COSINE"}
if scalar_index != "STL_SORT":
params['mmap.enabled'] = mmap
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=ct.default_reranker_field_name, index_type=index, params=params)
index_params.add_index(field_name=default_vector_field_name, index_type="IVF_FLAT", params=params)
index_params.add_index(field_name=ct.default_reranker_field_name, index_type=scalar_index, params=params)
index_params.add_index(field_name=default_vector_field_name, index_type='HNSW', params=params)
self.create_index(client, collection_name, index_params)
self.wait_for_index_ready(client, collection_name, index_name=ct.default_reranker_field_name)
self.wait_for_index_ready(client, collection_name, index_name=default_vector_field_name)
self.load_collection(client, collection_name)
vectors_to_search = rng.random((1, dim))
# vectors_to_search = rng.random((1, dim))
self.search(client, collection_name, vectors_to_search, ranker=my_rerank_fn,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,

View File

@ -86,7 +86,7 @@ class TestCollectionRangeSearch(TestcaseBase):
@pytest.fixture(scope="function", params=ct.all_index_types[:8])
def index_type(self, request):
tags = request.config.getoption("--tags")
tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True)
if CaseLabel.L2 not in tags:
if request.param not in ct.L0_index_types:
pytest.skip(f"skip index type {request.param}")
@ -94,10 +94,10 @@ class TestCollectionRangeSearch(TestcaseBase):
@pytest.fixture(scope="function", params=ct.dense_metrics)
def metric(self, request):
tags = request.config.getoption("--tags")
tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True)
if CaseLabel.L2 not in tags:
if request.param != ct.default_L0_metric:
pytest.skip(f"skip index type {request.param}")
pytest.skip(f"skip metric type {request.param}")
yield request.param
@pytest.fixture(scope="function", params=[default_nb, default_nb_medium])

View File

@ -17,9 +17,24 @@ class TestMilvusClientTTL(TestMilvusClientV2Base):
@pytest.mark.parametrize("on_insert", [True, False])
def test_milvus_client_ttl_default(self, flush_enable, on_insert):
"""
target: verify that data is invisible after ttl
method: create collection with ttl, insert data, wait for ttl, search data
expected: data is invisible
Test case for verifying TTL (Time To Live) functionality in Milvus client.
This test verifies that:
1. Data becomes invisible after the specified TTL period
2. Different operations (search, query, hybrid search) correctly handle expired data
3. TTL can be altered and the changes take effect
4. Newly inserted data is not affected by previous TTL settings
The test performs the following steps:
1. Create a collection with TTL enabled
2. Insert test data
3. Wait for TTL to expire and verifies data becomes invisible
4. Insert new data and verify new inserted data are visible
5. Alter TTL and verify the changes
Parameters:
- flush_enable: Whether to flush collection during testing
- on_insert: Whether to use insert or upsert operation
"""
client = self._client()
dim = 65