From fb7f19dfa1bbf898fc5c7f226bcadf9139310261 Mon Sep 17 00:00:00 2001 From: yanliang567 <82361606+yanliang567@users.noreply.github.com> Date: Wed, 11 Jun 2025 16:52:38 +0800 Subject: [PATCH] test: update ttl test comments and update for expressions tests (#42611) related issue: #42604 1. update the test expression for all to L3 for now as it takes too many hours to complete running. Will improve the performance in next pr. --------- Signed-off-by: yanliang567 --- tests/python_client/common/common_func.py | 2 + .../test_milvus_client_data_integrity.py | 116 +++++++++--------- .../test_milvus_client_search.py | 67 +++++++--- .../test_milvus_client_range_search.py | 6 +- .../test_milvus_client_ttl.py | 21 +++- 5 files changed, 133 insertions(+), 79 deletions(-) diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index c10c42e895..9b4ae23189 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -2519,6 +2519,7 @@ def gen_json_field_expressions_all_single_operator(): return expressions + def gen_field_expressions_all_single_operator_each_field(field = ct.default_int64_field_name): """ Gen a list of filter in expression-format(as a string) @@ -2678,6 +2679,7 @@ def gen_field_expressions_all_single_operator_each_field(field = ct.default_int6 return expressions + def concatenate_uneven_arrays(arr1, arr2): """ concatenate the element in two arrays with different length diff --git a/tests/python_client/milvus_client/test_milvus_client_data_integrity.py b/tests/python_client/milvus_client/test_milvus_client_data_integrity.py index fabcaac708..ede1038167 100644 --- a/tests/python_client/milvus_client/test_milvus_client_data_integrity.py +++ b/tests/python_client/milvus_client/test_milvus_client_data_integrity.py @@ -75,15 +75,14 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): @pytest.mark.parametrize("is_flush", [True]) @pytest.mark.parametrize("is_release", [True]) @pytest.mark.parametrize("single_data_num", [50]) - @pytest.mark.parametrize("expr_field", [ct.default_int64_field_name]) + @pytest.mark.parametrize("expr_field", [ct.default_int64_field_name, + # ct.default_string_field_name, # TODO: uncommented after #42604 fixed + ct.default_float_array_field_name]) def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self, enable_dynamic_field, - supported_bool_scalar_index, - supported_numeric_float_double_index, supported_numeric_scalar_index, - supported_varchar_scalar_index, + # supported_varchar_scalar_index, supported_json_path_index, - supported_array_scalar_index, supported_array_double_float_scalar_index, is_flush, is_release, @@ -169,7 +168,7 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None } for j in range(i * nb_single, (i + 1) * nb_single)] assert len(rows) == nb_single - log.info(rows) + # log.info(rows) self.insert(client, collection_name=collection_name, data=rows) log.info(f"inserted {nb_single} {inserted_data_distribution[i]}") # 3. flush if specified @@ -181,13 +180,13 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): for i in range(len(express_list)): json_list = [] id_list = [] - log.info(f"query with filter '{express_list[i]}' before scalar index is:") - res = \ - self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0] + log.info(f"query with filter '{express_list[i]}' before scalar index") + res = self.query(client, collection_name=collection_name, + filter=express_list[i], output_fields=["count(*)"])[0] count = res[0]['count(*)'] - log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}") - res = self.query(client, collection_name=collection_name, filter=express_list[i], - output_fields=[f"{expr_field}"])[0] + # log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}") + res = self.query(client, collection_name=collection_name, + filter=express_list[i], output_fields=[f"{expr_field}"])[0] for single in res: id_list.append(single[f"{default_primary_key_field_name}"]) json_list.append(single[f"{expr_field}"]) @@ -203,24 +202,24 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): # 6. prepare index params with json path index index_params = self.prepare_index_params(client)[0] index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") - index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index) - index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index) - index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index) - index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index) + # index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index) + # index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index) + # index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index) + # index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index) index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index) - index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index) - index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index) - index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index) - index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index) - index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index) - index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index) - index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index) - index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index) + # index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index) + # index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index) + # index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index) + # index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index) + # index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index) + # index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index) + # index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index) + # index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index) index_params.add_index(field_name=ct.default_float_array_field_name, index_type=supported_array_double_float_scalar_index) - index_params.add_index(field_name=ct.default_double_array_field_name, - index_type=supported_array_double_float_scalar_index) - index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index) + # index_params.add_index(field_name=ct.default_double_array_field_name, + # index_type=supported_array_double_float_scalar_index) + # index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index) json_index_name = "json_index_name" json_path_list = [f"{ct.default_json_field_name}", f"{ct.default_json_field_name}[0]", @@ -251,57 +250,62 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): # 10. sleep for 60s to make sure the new index load successfully without release and reload operations time.sleep(60) # 11. query after there is index under all expressions which should get the same result - # with that without index for i in range(len(express_list)): json_list = [] id_list = [] - log.info(f"query with filter '{express_list[i]}' after index is:") + log.info(f"query with filter '{express_list[i]}' after index") count = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0] - log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}") + # log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}") res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0] for single in res: id_list.append(single[f"{default_primary_key_field_name}"]) json_list.append(single[f"{expr_field}"]) - if len(json_list) != len(compare_dict[f'{i}']["json_list"]): - log.debug( - f"the field {expr_field} value after index {supported_array_scalar_index} under expression '{express_list[i]}' is:") - log.debug(json_list) - log.debug( - f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:") - log.debug(compare_dict[f'{i}']["json_list"]) + # if len(json_list) != len(compare_dict[f'{i}']["json_list"]): + # log.debug( + # f"the field {expr_field} value after indexed under expression '{express_list[i]}' is:") + # log.debug(json_list) + # log.debug( + # f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:") + # log.debug(compare_dict[f'{i}']["json_list"]) assert json_list == compare_dict[f'{i}']["json_list"] - if len(id_list) != len(compare_dict[f'{i}']["id_list"]): - log.debug( - f"primary key field {default_primary_key_field_name} after index {supported_array_scalar_index} under expression '{express_list[i]}' is:") - log.debug(id_list) - log.debug( - f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:") - log.debug(compare_dict[f'{i}']["id_list"]) + # if len(id_list) != len(compare_dict[f'{i}']["id_list"]): + # log.debug( + # f"primary key field {default_primary_key_field_name} after indexed under expression '{express_list[i]}' is:") + # log.debug(id_list) + # log.debug( + # f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:") + # log.debug(compare_dict[f'{i}']["id_list"]) assert id_list == compare_dict[f'{i}']["id_list"] log.info(f"PASS with expression {express_list[i]}") self.drop_collection(client, collection_name) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("enable_dynamic_field", [False]) @pytest.mark.parametrize("is_flush", [True, False]) - @pytest.mark.parametrize("is_release", [True, False]) + @pytest.mark.parametrize("is_release", [True]) @pytest.mark.parametrize("single_data_num", [50]) @pytest.mark.parametrize("expr_field", [ct.default_int8_field_name, ct.default_int16_field_name, ct.default_int32_field_name, ct.default_int64_field_name, ct.default_float_field_name, ct.default_double_field_name, ct.default_string_field_name, ct.default_bool_field_name, ct.default_int8_array_field_name, ct.default_int16_array_field_name, - ct.default_int32_array_field_name,ct.default_int64_array_field_name, + ct.default_int32_array_field_name, ct.default_int64_array_field_name, ct.default_bool_array_field_name, ct.default_float_array_field_name, ct.default_double_array_field_name, ct.default_string_array_field_name]) - def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, enable_dynamic_field, supported_bool_scalar_index, + def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, enable_dynamic_field, + supported_bool_scalar_index, supported_numeric_float_double_index, - supported_numeric_scalar_index, supported_varchar_scalar_index, - supported_json_path_index, supported_array_scalar_index, + supported_numeric_scalar_index, + supported_varchar_scalar_index, + supported_json_path_index, + supported_array_scalar_index, supported_array_double_float_scalar_index, - is_flush, is_release, single_data_num, expr_field): + is_flush, + is_release, + single_data_num, + expr_field): """ target: test query using expression fields with all supported field type after all supported scalar index with all supported basic expressions @@ -382,7 +386,7 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None } for j in range(i * nb_single, (i + 1) * nb_single)] assert len(rows) == nb_single - log.info(rows) + # log.info(rows) self.insert(client, collection_name=collection_name, data=rows) log.info(f"inserted {nb_single} {inserted_data_distribution[i]}") # 3. flush if specified @@ -394,10 +398,10 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): for i in range(len(express_list)): json_list = [] id_list = [] - log.info(f"query with filter '{express_list[i]}' before scalar index is:") + log.info(f"query with filter '{express_list[i]}' before scalar index") res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0] count = res[0]['count(*)'] - log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}") + # log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}") res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0] for single in res: id_list.append(single[f"{default_primary_key_field_name}"]) @@ -464,10 +468,10 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): for i in range(len(express_list)): json_list = [] id_list = [] - log.info(f"query with filter '{express_list[i]}' after index is:") + log.info(f"query with filter '{express_list[i]}' after index") count = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0] - log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}") + # log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}") res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0] for single in res: diff --git a/tests/python_client/milvus_client/test_milvus_client_search.py b/tests/python_client/milvus_client/test_milvus_client_search.py index 4f2f99c650..56e160438c 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search.py +++ b/tests/python_client/milvus_client/test_milvus_client_search.py @@ -3544,6 +3544,23 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base): @pytest.fixture(scope="function", params=["COSINE", "L2"]) def metric_type(self, request): yield request.param + + @pytest.fixture(scope="function", params=[DataType.INT8, DataType.INT16, DataType.INT32, + DataType.FLOAT, DataType.DOUBLE]) + def rerank_fields(self, request): + tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True) + if CaseLabel.L2 not in tags: + if request.param not in [DataType.INT8, DataType.FLOAT]: + pytest.skip(f"skip rerank field type {request.param}") + yield request.param + + @pytest.fixture(scope="function", params=["STL_SORT", "INVERTED", "AUTOINDEX", ""]) + def scalar_index(self, request): + tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True) + if CaseLabel.L2 not in tags: + if request.param not in ["INVERTED", ""]: + pytest.skip(f"skip scalar index type {request.param}") + yield request.param """ ****************************************************************** @@ -3807,8 +3824,6 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base): ) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("rerank_fields", [DataType.INT8, DataType.INT16, DataType.INT32, - DataType.FLOAT, DataType.DOUBLE]) def test_milvus_client_search_with_reranker_all_supported_datatype_field(self, rerank_fields): """ target: test search with reranker with partition key field @@ -3887,13 +3902,26 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base): ) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="pymilvus issue 42011") - @pytest.mark.parametrize("rerank_fields", [DataType.INT8, DataType.INT16, DataType.INT32, - DataType.FLOAT, DataType.DOUBLE]) - @pytest.mark.parametrize("index", ["STL_SORT", "INVERTED", "AUTOINDEX", ""]) @pytest.mark.parametrize("mmap", [True, False]) - def test_milvus_client_search_with_reranker_scalar_index(self, rerank_fields, index, mmap): + def test_milvus_client_search_with_reranker_scalar_index(self, rerank_fields, scalar_index, mmap): """ + Test search functionality with reranker using scalar index in Milvus client. + + This test verifies the search operation works correctly when using a reranker with different scalar index types. + It covers various scenarios including: + - Different data types for rerank fields (INT8, INT16, INT32, FLOAT, DOUBLE) + - Different index types (STL_SORT, INVERTED, AUTOINDEX, "") + - Memory-mapped and non-memory-mapped configurations + + The test performs the following steps: + 1. Creates a collection with specified schema and index parameters + 2. Inserts test data with appropriate data types + 3. Builds indexes on both vector and scalar fields + 4. Executes search operations with reranking function + 5. Validates search results with different filter conditions + 6. Cleans up by releasing collection and dropping indexes + + Note: This is an L1 (basic functionality) test case. target: test search with reranker with scalar index method: create connection, collection, insert and search expected: search successfully @@ -3908,7 +3936,7 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base): schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) schema.add_field(ct.default_reranker_field_name, rerank_fields, mmap_enabled=mmap) index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_vector_field_name, metric_type="COSINE") + index_params.add_index(default_vector_field_name, index_type='HNSW', metric_type="COSINE") self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) # 2. insert rng = np.random.default_rng(seed=19530) @@ -3930,15 +3958,18 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base): ct.default_reranker_field_name: value} rows.append(single_row) self.insert(client, collection_name, rows) + # flush + self.flush(client, collection_name) # 2. prepare index params index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name=ct.default_reranker_field_name, index_type=index, params={}) + index_params.add_index(field_name=ct.default_reranker_field_name, index_type=scalar_index, params={}) # 3. create index self.create_index(client, collection_name, index_params) - # 3. compact + # 4. compact self.compact(client, collection_name) - # 4. flush - self.flush(client, collection_name) + self.wait_for_index_ready(client, collection_name, index_name=ct.default_reranker_field_name) + self.wait_for_index_ready(client, collection_name, index_name=default_vector_field_name) + # 5. search my_rerank_fn = Function( name="my_reranker", @@ -3980,15 +4011,17 @@ class TestMilvusClientSearchRerankValid(TestMilvusClientV2Base): self.drop_index(client, collection_name, ct.default_reranker_field_name) self.drop_index(client, collection_name, default_vector_field_name) # 6. create index - params = {"metric_type": "L2"} - if index != "STL_SORT": + params = {"metric_type": "COSINE"} + if scalar_index != "STL_SORT": params['mmap.enabled'] = mmap index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name=ct.default_reranker_field_name, index_type=index, params=params) - index_params.add_index(field_name=default_vector_field_name, index_type="IVF_FLAT", params=params) + index_params.add_index(field_name=ct.default_reranker_field_name, index_type=scalar_index, params=params) + index_params.add_index(field_name=default_vector_field_name, index_type='HNSW', params=params) self.create_index(client, collection_name, index_params) + self.wait_for_index_ready(client, collection_name, index_name=ct.default_reranker_field_name) + self.wait_for_index_ready(client, collection_name, index_name=default_vector_field_name) self.load_collection(client, collection_name) - vectors_to_search = rng.random((1, dim)) + # vectors_to_search = rng.random((1, dim)) self.search(client, collection_name, vectors_to_search, ranker=my_rerank_fn, check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py index 8b565379a2..30160e3b16 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py @@ -86,7 +86,7 @@ class TestCollectionRangeSearch(TestcaseBase): @pytest.fixture(scope="function", params=ct.all_index_types[:8]) def index_type(self, request): - tags = request.config.getoption("--tags") + tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True) if CaseLabel.L2 not in tags: if request.param not in ct.L0_index_types: pytest.skip(f"skip index type {request.param}") @@ -94,10 +94,10 @@ class TestCollectionRangeSearch(TestcaseBase): @pytest.fixture(scope="function", params=ct.dense_metrics) def metric(self, request): - tags = request.config.getoption("--tags") + tags = request.config.getoption("--tags", default=['L0', 'L1', 'L2'], skip=True) if CaseLabel.L2 not in tags: if request.param != ct.default_L0_metric: - pytest.skip(f"skip index type {request.param}") + pytest.skip(f"skip metric type {request.param}") yield request.param @pytest.fixture(scope="function", params=[default_nb, default_nb_medium]) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py b/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py index 1bc995ab14..b9ad8f53ee 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py @@ -17,9 +17,24 @@ class TestMilvusClientTTL(TestMilvusClientV2Base): @pytest.mark.parametrize("on_insert", [True, False]) def test_milvus_client_ttl_default(self, flush_enable, on_insert): """ - target: verify that data is invisible after ttl - method: create collection with ttl, insert data, wait for ttl, search data - expected: data is invisible + Test case for verifying TTL (Time To Live) functionality in Milvus client. + + This test verifies that: + 1. Data becomes invisible after the specified TTL period + 2. Different operations (search, query, hybrid search) correctly handle expired data + 3. TTL can be altered and the changes take effect + 4. Newly inserted data is not affected by previous TTL settings + + The test performs the following steps: + 1. Create a collection with TTL enabled + 2. Insert test data + 3. Wait for TTL to expire and verifies data becomes invisible + 4. Insert new data and verify new inserted data are visible + 5. Alter TTL and verify the changes + + Parameters: + - flush_enable: Whether to flush collection during testing + - on_insert: Whether to use insert or upsert operation """ client = self._client() dim = 65