From e8011908ace25db3c56e3bc3915febfe20d53784 Mon Sep 17 00:00:00 2001 From: yanliang567 <82361606+yanliang567@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:02:43 +0800 Subject: [PATCH] test: Add tests for partition key filter issue and ttl eventually search (#43052) related issue: #42918 1. add tests for ttl eventually search 2. add tests for partition key filter 3. improve check query results for output fields 4. verify some fix for rabitq index and update the test accordingly 5. update gen random float vector in (-1, 1) instead of (0,1) --------- Signed-off-by: yanliang567 --- tests/python_client/check/func_check.py | 55 ++-- tests/python_client/check/param_check.py | 105 ++++++++ tests/python_client/common/common_func.py | 29 +- .../test_milvus_client_data_integrity.py | 4 +- .../test_milvus_client_search_iterator.py | 2 +- .../test_milvus_client_e2e.py | 173 +++++++----- .../test_milvus_client_range_search.py | 49 ---- .../test_milvus_client_search_v2.py | 240 +---------------- .../test_milvus_client_search_v2_new.py | 253 +++++++++++++++++- .../test_milvus_client_ttl.py | 104 ++++--- tests/python_client/requirements.txt | 4 +- .../testcases/indexes/idx_ivf_rabitq.py | 12 +- .../testcases/indexes/test_ivf_rabitq.py | 2 +- .../testcases/test_partition_key.py | 17 ++ 14 files changed, 606 insertions(+), 443 deletions(-) diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index a0be09fa4e..404ab8fed9 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -566,31 +566,46 @@ class ResponseChecker: raise Exception("No expect values found in the check task") exp_res = check_items.get("exp_res", None) with_vec = check_items.get("with_vec", False) - pk_name = check_items.get("pk_name", ct.default_primary_field_name) - vector_type = check_items.get("vector_type", "FLOAT_VECTOR") - if vector_type == DataType.FLOAT16_VECTOR: - for single_exp_res in exp_res: - single_exp_res['vector'] = single_exp_res['vector'] .tolist() - for single_query_result in query_res: - single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.float16).tolist() - if vector_type == DataType.BFLOAT16_VECTOR: - for single_exp_res in exp_res: - single_exp_res['vector'] = single_exp_res['vector'] .tolist() - for single_query_result in query_res: - single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=bfloat16).tolist() - if vector_type == DataType.INT8_VECTOR: - for single_exp_res in exp_res: - single_exp_res['vector'] = single_exp_res['vector'] .tolist() - for single_query_result in query_res: - single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.int8).tolist() + exp_limit = check_items.get("exp_limit", None) + count = check_items.get("count(*)", None) + if count is not None: + assert count == query_res[0].get("count(*)", None) + return True + if exp_limit is None and exp_res is None: + raise Exception(f"No expected values would be checked in the check task") + if exp_limit is not None: + assert len(query_res) == exp_limit + # pk_name = check_items.get("pk_name", ct.default_primary_field_name) + # if with_vec: if exp_res is not None: + if with_vec is True: + vector_type = check_items.get('vector_type', 'FLOAT_VECTOR') + vector_field = check_items.get('vector_field', 'vector') + if vector_type == DataType.FLOAT16_VECTOR: + # for single_exp_res in exp_res: + # single_exp_res[vector_field] = single_exp_res[vector_field].tolist() + for single_query_result in query_res: + single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.float16).tolist() + if vector_type == DataType.BFLOAT16_VECTOR: + # for single_exp_res in exp_res: + # single_exp_res[vector_field] = single_exp_res[vector_field].tolist() + for single_query_result in query_res: + single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=bfloat16).tolist() + if vector_type == DataType.INT8_VECTOR: + # for single_exp_res in exp_res: + # if single_exp_res[vector_field].__class__ is not list: + # single_exp_res[vector_field] = single_exp_res[vector_field].tolist() + for single_query_result in query_res: + single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.int8).tolist() if isinstance(query_res, list): - assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, - with_vec=with_vec) + # assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, with_vec=with_vec) + # return True + assert pc.compare_lists_ignore_order(a=query_res, b=exp_res) return True else: log.error(f"Query result {query_res} is not list") return False + log.warning(f'Expected query result is {exp_res}') @staticmethod @@ -810,4 +825,4 @@ class ResponseChecker: if check_items.get("index_name", None) is not None: assert res["index_name"] == check_items.get("index_name") - return True \ No newline at end of file + return True diff --git a/tests/python_client/check/param_check.py b/tests/python_client/check/param_check.py index b4c975df4c..28cf436e67 100644 --- a/tests/python_client/check/param_check.py +++ b/tests/python_client/check/param_check.py @@ -5,6 +5,111 @@ from common import common_type as ct sys.path.append("..") from utils.util_log import test_log as log +import numpy as np +from collections.abc import Iterable + +epsilon = ct.epsilon + +def deep_approx_compare(x, y, epsilon=epsilon): + """ + Recursively compares two objects for approximate equality, handling floating-point precision. + + Args: + x: First object to compare + y: Second object to compare + epsilon: Tolerance for floating-point comparisons (default: 1e-6) + + Returns: + bool: True if objects are approximately equal, False otherwise + + Handles: + - Numeric types (int, float, numpy scalars) + - Sequences (list, tuple, numpy arrays) + - Dictionaries + - Other iterables (except strings) + - Numpy arrays (shape and value comparison) + - Falls back to strict equality for other types + """ + # Handle basic numeric types (including numpy scalars) + if isinstance(x, (int, float, np.integer, np.floating)) and isinstance(y, (int, float, np.integer, np.floating)): + return abs(float(x) - float(y)) < epsilon + + # Handle lists/tuples/arrays + if isinstance(x, (list, tuple, np.ndarray)) and isinstance(y, (list, tuple, np.ndarray)): + if len(x) != len(y): + return False + for a, b in zip(x, y): + if not deep_approx_compare(a, b, epsilon): + return False + return True + + # Handle dictionaries + if isinstance(x, dict) and isinstance(y, dict): + if set(x.keys()) != set(y.keys()): + return False + for key in x: + if not deep_approx_compare(x[key], y[key], epsilon): + return False + return True + + # Handle other iterables (e.g., Protobuf containers) + if isinstance(x, Iterable) and isinstance(y, Iterable) and not isinstance(x, str): + try: + return deep_approx_compare(list(x), list(y), epsilon) + except: + pass + + # Handle numpy arrays + if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): + if x.shape != y.shape: + return False + return np.allclose(x, y, atol=epsilon) + + # Fall back to strict equality for other types + return x == y + + +def compare_lists_ignore_order(a, b, epsilon=epsilon): + """ + Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance. + + Args: + a (list): First list of dictionaries to compare + b (list): Second list of dictionaries to compare + epsilon (float, optional): Tolerance for floating-point comparisons. Defaults to 1e-6. + + Returns: + bool: True if lists contain equivalent dictionaries (order doesn't matter), False otherwise + + Note: + Uses deep_approx_compare() for dictionary comparison with floating-point tolerance. + Maintains O(n²) complexity due to nested comparisons. + """ + if len(a) != len(b): + return False + + # Create a set of available indices for b + available_indices = set(range(len(b))) + + for item_a in a: + matched = False + # Create a list of indices to remove (avoid modifying the set during iteration) + to_remove = [] + + for idx in available_indices: + if deep_approx_compare(item_a, b[idx], epsilon): + to_remove.append(idx) + matched = True + break + + if not matched: + return False + + # Remove matched indices + available_indices -= set(to_remove) + + return True + def ip_check(ip): if ip == "localhost": diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 239783e717..8bd27b5823 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -1127,33 +1127,6 @@ def gen_schema_multi_string_fields(string_fields): primary_field=primary_field, auto_id=False) return schema - -def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR): - vectors = [] - if vector_data_type == DataType.FLOAT_VECTOR: - vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] - elif vector_data_type == DataType.FLOAT16_VECTOR: - vectors = gen_fp16_vectors(nb, dim)[1] - elif vector_data_type == DataType.BFLOAT16_VECTOR: - vectors = gen_bf16_vectors(nb, dim)[1] - elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR: - vectors = gen_sparse_vectors(nb, dim) - elif vector_data_type == ct.text_sparse_vector: - vectors = gen_text_vectors(nb) # for Full Text Search - elif vector_data_type == DataType.INT8_VECTOR: - vectors = gen_int8_vectors(nb, dim)[1] - elif vector_data_type == DataType.BINARY_VECTOR: - vectors = gen_binary_vectors(nb, dim)[1] - else: - log.error(f"Invalid vector data type: {vector_data_type}") - raise Exception(f"Invalid vector data type: {vector_data_type}") - if dim > 1: - if vector_data_type == DataType.FLOAT_VECTOR: - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - vectors = vectors.tolist() - return vectors - - def gen_string(nb): string_values = [str(random.random()) for _ in range(nb)] return string_values @@ -3613,7 +3586,7 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0): def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR): vectors = [] if vector_data_type == DataType.FLOAT_VECTOR: - vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] + vectors = [[random.uniform(-1, 1) for _ in range(dim)] for _ in range(nb)] elif vector_data_type == DataType.FLOAT16_VECTOR: vectors = gen_fp16_vectors(nb, dim)[1] elif vector_data_type == DataType.BFLOAT16_VECTOR: diff --git a/tests/python_client/milvus_client/test_milvus_client_data_integrity.py b/tests/python_client/milvus_client/test_milvus_client_data_integrity.py index ede1038167..1a6ad951db 100644 --- a/tests/python_client/milvus_client/test_milvus_client_data_integrity.py +++ b/tests/python_client/milvus_client/test_milvus_client_data_integrity.py @@ -76,12 +76,12 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): @pytest.mark.parametrize("is_release", [True]) @pytest.mark.parametrize("single_data_num", [50]) @pytest.mark.parametrize("expr_field", [ct.default_int64_field_name, - # ct.default_string_field_name, # TODO: uncommented after #42604 fixed + ct.default_string_field_name, ct.default_float_array_field_name]) def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self, enable_dynamic_field, supported_numeric_scalar_index, - # supported_varchar_scalar_index, + supported_varchar_scalar_index, supported_json_path_index, supported_array_double_float_scalar_index, is_flush, diff --git a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py index 850e519b8e..1044729672 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py +++ b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py @@ -54,7 +54,7 @@ def external_filter_with_outputs(hits): results = [] for hit in hits: # equals filter nothing if there are output_fields - if hit.distance < 1.0 and len(hit.fields) > 0: + if hit.distance <= 4.0 and len(hit.fields) > 0: results.append(hit) return results diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py b/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py index 25a48ff689..8ed005fd95 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py @@ -1,3 +1,6 @@ +import random + +import pandas import pytest import numpy as np import time @@ -10,7 +13,6 @@ from base.client_v2_base import TestMilvusClientV2Base from pymilvus import DataType, FieldSchema, CollectionSchema # Test parameters -default_dim = ct.default_dim default_nb = ct.default_nb default_nq = ct.default_nq default_limit = ct.default_limit @@ -28,7 +30,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("flush_enable", [True, False]) @pytest.mark.parametrize("scalar_index_enable", [True, False]) - def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable): + @pytest.mark.parametrize("vector_type", [DataType.FLOAT_VECTOR]) + def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable, vector_type): """ target: test high level api: client.create_collection, insert, search, query method: create connection, collection, insert and search with: @@ -37,13 +40,14 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - + dim = 8 + # 1. Create collection with custom schema collection_name = cf.gen_collection_name_by_testcase_name() schema = self.create_schema(client, enable_dynamic_field=False)[0] # Primary key and vector field schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False) - schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field("vector", vector_type, dim=dim) # Boolean type schema.add_field("bool_field", DataType.BOOL, nullable=True) # Integer types @@ -59,7 +63,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # JSON type schema.add_field("json_field", DataType.JSON, nullable=True) # Array type - schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, nullable=True) + schema.add_field("array_field", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=12, nullable=True) # Create collection self.create_collection(client, collection_name, schema=schema) @@ -68,14 +72,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): num_inserts = 5 # insert data for 5 times total_rows = [] for batch in range(num_inserts): - vectors = cf.gen_vectors(default_nb, default_dim) + vectors = list(cf.gen_vectors(default_nb, dim, vector_data_type=vector_type)) \ + if vector_type == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb, dim, vector_data_type=vector_type) rows = [] start_id = batch * default_nb # ensure id is not duplicated for i in range(default_nb): row = { "id": start_id + i, # ensure id is not duplicated - "embeddings": list(vectors[i]) + "vector": vectors[i] } # Add nullable fields with null values for every 5th record @@ -99,11 +105,11 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): "int16_field": i % 32768, "int32_field": i, "int64_field": i, - "float_field": float(i), - "double_field": float(i) * 1.0, + "float_field": random.random(), + "double_field": random.random(), "varchar_field": f"varchar_{start_id + i}", "json_field": {"id": start_id + i, "value": f"json_{start_id + i}"}, - "array_field": [i, i + 1, i + 2] + "array_field": [random.random() for _ in range(5)] }) rows.append(row) total_rows.append(row) @@ -124,7 +130,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # Create index parameters index_params = self.prepare_index_params(client)[0] - index_params.add_index("embeddings", metric_type="COSINE") + index_params.add_index("vector", metric_type="COSINE") # Add autoindex for scalar fields if enabled if scalar_index_enable: @@ -160,13 +166,13 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # 4. Search t0 = time.time() - vectors_to_search = cf.gen_vectors(1, default_dim) + vectors_to_search = cf.gen_vectors(1, dim, vector_data_type=vector_type) search_params = {"metric_type": "COSINE", "params": {"nprobe": 100}} search_res, _ = self.search( client, collection_name, vectors_to_search, - anns_field="embeddings", + anns_field="vector", search_params=search_params, limit=default_limit, output_fields=['*'], @@ -182,26 +188,34 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # 5. Query with filters on each scalar field t0 = time.time() - # Query on boolean field + output_fields = ['id', 'int8_field', 'json_field'] bool_filter = "bool_field == true" - bool_expected = [r for r in total_rows if r["bool_field"] is not None and r["bool_field"]] + bool_expected = [ + { + 'id': r['id'], + 'int8_field': r['int8_field'], + 'json_field': r['json_field'] + } + for r in total_rows if r["bool_field"] is not None and r["bool_field"]] query_res, _ = self.query( client, collection_name, filter=bool_filter, - output_fields=['*'], + output_fields=output_fields, check_task=CheckTasks.check_query_results, check_items={ "exp_res": bool_expected, - "with_vec": True, + "with_vec": False, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int8 field - int8_filter = "int8_field < 50" - int8_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50] + with_vec = True + int8_filter = "int8_field is null || int8_field < 10" + int8_expected = [r for r in total_rows if r["int8_field"] is None or r["int8_field"] < 10] query_res, _ = self.query( client, collection_name, @@ -210,14 +224,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int8_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int16 field - int16_filter = "int16_field < 1000" - int16_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 1000] + int16_filter = "100 <= int16_field < 200" + int16_expected = [r for r in total_rows if r["int16_field"] is not None and 100 <= r["int16_field"] < 200] query_res, _ = self.query( client, collection_name, @@ -226,14 +241,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int16_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int32 field - int32_filter = "int32_field in [1,2,3,4,5]" - int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,3,4,5]] + int32_filter = "int32_field in [1,2,5,6]" + int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,5,6]] query_res, _ = self.query( client, collection_name, @@ -242,14 +258,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int32_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int64 field - int64_filter = "int64_field >= 10" - int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 10] + int64_filter = "int64_field >= 4678 and int64_field < 5050" + int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 4678 and r["int64_field"] < 5050] query_res, _ = self.query( client, collection_name, @@ -258,14 +275,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int64_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on float field - float_filter = "float_field > 5.0" - float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 5.0] + float_filter = "float_field > 0.5 and float_field <= 0.7" + float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7] query_res, _ = self.query( client, collection_name, @@ -274,14 +292,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": float_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on double field - double_filter = "3.0 <=double_field <= 7.0" - double_expected = [r for r in total_rows if r["double_field"] is not None and 3.0 <= r["double_field"] <= 7.0] + double_filter = "0.5 <=double_field <= 0.7" + double_expected = [r for r in total_rows if r["double_field"] is not None and 0.5 <= r["double_field"] <= 0.7] query_res, _ = self.query( client, collection_name, @@ -290,7 +309,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": double_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -306,7 +326,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": varchar_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -322,7 +343,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": varchar_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -338,7 +360,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": json_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -354,7 +377,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": array_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -370,7 +394,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": multi_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -386,15 +411,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": mix_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on is not null conditions for each scalar field # Int8 field is not null - int8_not_null_filter = "int8_field is not null" - int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None] + int8_not_null_filter = "int8_field is not null and int8_field > 100" + int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] > 100] query_res, _ = self.query( client, collection_name, @@ -403,14 +429,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int8_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Int16 field is not null - int16_not_null_filter = "int16_field is not null" - int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None] + int16_not_null_filter = "int16_field is not null and int16_field < 100" + int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 100] query_res, _ = self.query( client, collection_name, @@ -419,14 +446,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int16_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Float field is not null - float_not_null_filter = "float_field is not null" - float_not_null_expected = [r for r in total_rows if r["float_field"] is not None] + float_not_null_filter = "float_field is not null and float_field > 0.5 and float_field <= 0.7" + float_not_null_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7] query_res, _ = self.query( client, collection_name, @@ -435,14 +463,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": float_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Double field is not null - double_not_null_filter = "double_field is not null" - double_not_null_expected = [r for r in total_rows if r["double_field"] is not None] + double_not_null_filter = "double_field is not null and double_field <= 0.2" + double_not_null_expected = [r for r in total_rows if r["double_field"] is not None and r["double_field"] <= 0.2] query_res, _ = self.query( client, collection_name, @@ -451,7 +480,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": double_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -467,14 +497,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": varchar_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # JSON field is not null - json_not_null_filter = "json_field is not null" - json_not_null_expected = [r for r in total_rows if r["json_field"] is not None] + json_not_null_filter = "json_field is not null and json_field['id'] < 100" + json_not_null_expected = [r for r in total_rows if r["json_field"] is not None and r["json_field"]["id"] < 100] query_res, _ = self.query( client, collection_name, @@ -483,14 +514,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": json_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Array field is not null - array_not_null_filter = "array_field is not null" - array_not_null_expected = [r for r in total_rows if r["array_field"] is not None] + array_not_null_filter = "array_field is not null and array_field[0] < 100" + array_not_null_expected = [r for r in total_rows if r["array_field"] is not None and r["array_field"][0] < 100] query_res, _ = self.query( client, collection_name, @@ -499,7 +531,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": array_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -516,16 +549,17 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": multi_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Complex mixed conditions with is null, is not null, and comparison operators # Test case 1: int field is null AND float field > value AND varchar field is not null - complex_mix_filter1 = "int32_field is null and float_field > 10.0 and varchar_field is not null" + complex_mix_filter1 = "int32_field is null and float_field > 0.7 and varchar_field is not null" complex_mix_expected1 = [r for r in total_rows if r["int32_field"] is None and - r["float_field"] is not None and r["float_field"] > 10.0 and + r["float_field"] is not None and r["float_field"] > 0.7 and r["varchar_field"] is not None] query_res, _ = self.query( client, @@ -535,7 +569,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": complex_mix_expected1, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -553,15 +588,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": complex_mix_expected2, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Test case 3: Multiple fields with mixed null/not null conditions and range comparisons - complex_mix_filter3 = ("int8_field is not null and int8_field < 50 and double_field is null and " + complex_mix_filter3 = ("int8_field is not null and int8_field < 15 and double_field is null and " "varchar_field is not null and varchar_field like \"varchar_2%\"") - complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50 and + complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 15 and r["double_field"] is None and r["varchar_field"] is not None and r["varchar_field"].startswith("varchar_2")] query_res, _ = self.query( @@ -572,7 +608,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": complex_mix_expected3, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py index 30160e3b16..f7dd4a1c3b 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py @@ -1548,55 +1548,6 @@ class TestCollectionRangeSearch(TestcaseBase): **kwargs ) - @pytest.mark.tags(CaseLabel.L1) - def test_range_search_with_consistency_session(self, nq, dim, auto_id, _async): - """ - target: test range search with different consistency level - method: 1. create a collection - 2. insert data - 3. range search with consistency_level is "session" - expected: searched successfully - """ - limit = 1000 - nb_old = 500 - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim)[0:4] - # 2. search for original data after load - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - range_search_params = {"metric_type": "COSINE", "params": {"nprobe": 10, "radius": 0, - "range_filter": 1000}} - collection_w.search(vectors[:nq], default_search_field, - range_search_params, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old, - "_async": _async, - "pk_name": ct.default_int64_field_name}) - - kwargs = {} - consistency_level = kwargs.get( - "consistency_level", CONSISTENCY_SESSION) - kwargs.update({"consistency_level": consistency_level}) - - nb_new = 400 - _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, - auto_id=auto_id, dim=dim, - insert_offset=nb_old) - insert_ids.extend(insert_ids_new) - collection_w.search(vectors[:nq], default_search_field, - range_search_params, limit, - default_search_exp, _async=_async, - **kwargs, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old + nb_new, - "_async": _async, - "pk_name": ct.default_int64_field_name}) - @pytest.mark.tags(CaseLabel.L2) def test_range_search_sparse(self): """ diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py index 2969d5dfbb..5d54cd209f 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py @@ -80,101 +80,6 @@ half_nb = ct.default_nb // 2 max_hybrid_search_req_num = ct.max_hybrid_search_req_num -class TestSearchBase(TestcaseBase): - @pytest.fixture( - scope="function", - params=[1, 10] - ) - def get_top_k(self, request): - yield request.param - - @pytest.fixture( - scope="function", - params=[1, 10, 1100] - ) - def get_nq(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[32, 128]) - def dim(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[False, True]) - def auto_id(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[False, True]) - def _async(self, request): - yield request.param - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:6]) - def test_each_index_with_mmap_enabled_search(self, index): - """ - target: test each index with mmap enabled search - method: test each index with mmap enabled search - expected: search success - """ - self._connect() - nb = 2000 - dim = 32 - collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False)[0] - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index(field_name, default_index, index_name="mmap_index") - # mmap index - collection_w.alter_index("mmap_index", {'mmap.enabled': True}) - # search - collection_w.load() - search_params = cf.gen_search_param(index)[0] - vector = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - collection_w.search(vector, default_search_field, search_params, ct.default_limit, - output_fields=["*"], - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": ct.default_limit}) - # enable mmap - collection_w.release() - collection_w.alter_index("mmap_index", {'mmap.enabled': False}) - collection_w.load() - collection_w.search(vector, default_search_field, search_params, ct.default_limit, - output_fields=["*"], - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": ct.default_limit}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[8:10]) - def test_enable_mmap_search_for_binary_indexes(self, index): - """ - target: enable mmap for binary indexes - method: enable mmap for binary indexes - expected: search success - """ - self._connect() - dim = 64 - nb = 2000 - collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, is_binary=True)[0] - params = cf.get_index_params_params(index) - default_index = {"index_type": index, - "params": params, "metric_type": "JACCARD"} - collection_w.create_index(ct.default_binary_vec_field_name, default_index, index_name="binary_idx_name") - collection_w.alter_index("binary_idx_name", {'mmap.enabled': True}) - collection_w.set_properties({'mmap.enabled': True}) - collection_w.load() - pro = collection_w.describe()[0].get("properties") - assert pro["mmap.enabled"] == 'True' - assert collection_w.index()[0].params["mmap.enabled"] == 'True' - # search - binary_vectors = cf.gen_binary_vectors(default_nq, dim)[1] - search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} - output_fields = ["*"] - collection_w.search(binary_vectors, ct.default_binary_vec_field_name, search_params, - default_limit, default_search_string_exp, output_fields=output_fields, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit}) - class TestCollectionSearch(TestcaseBase): """ Test case of search interface """ @@ -237,129 +142,6 @@ class TestCollectionSearch(TestcaseBase): ****************************************************************** """ - @pytest.mark.skip("enable this later using session/strong consistency") - @pytest.mark.tags(CaseLabel.L1) - def test_search_new_data(self, nq, _async): - """ - target: test search new inserted data without load - method: 1. search the collection - 2. insert new data - 3. search the collection without load again - 4. Use guarantee_timestamp to guarantee data consistency - expected: new data should be searched - """ - # 1. initialize with data - dim = 128 - auto_id = False - limit = 1000 - nb_old = 500 - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim)[0:5] - # 2. search for original data after load - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - log.info("test_search_new_data: searching for original data after load") - collection_w.search(vectors[:nq], default_search_field, - default_search_params, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old, - "_async": _async}) - # 3. insert new data - nb_new = 300 - _, _, _, insert_ids_new, time_stamp = cf.insert_data(collection_w, nb_new, - auto_id=auto_id, dim=dim, - insert_offset=nb_old) - insert_ids.extend(insert_ids_new) - # 4. search for new data without load - # Using bounded staleness, maybe we could not search the "inserted" entities, - # since the search requests arrived query nodes earlier than query nodes consume the insert requests. - collection_w.search(vectors[:nq], default_search_field, - default_search_params, limit, - default_search_exp, _async=_async, - guarantee_timestamp=time_stamp, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old + nb_new, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("shards_num", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num]) - def test_search_with_non_default_shard_nums(self, shards_num, _async): - """ - target: test search with non_default shards_num - method: connect milvus, create collection with several shard numbers , insert, load and search - expected: search successfully with the non_default shards_num - """ - auto_id = False - self._connect() - # 1. create collection - name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap( - name=name, shards_num=shards_num) - # 2. rename collection - new_collection_name = cf.gen_unique_str(prefix + "new") - self.utility_wrap.rename_collection( - collection_w.name, new_collection_name) - collection_w = self.init_collection_wrap( - name=new_collection_name, shards_num=shards_num) - # 3. insert - dataframe = cf.gen_default_dataframe_data() - collection_w.insert(dataframe) - # 4. create index and load - collection_w.create_index( - ct.default_float_vec_field_name, index_params=ct.default_flat_index) - collection_w.load() - # 5. search - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("M", [4, 64]) - @pytest.mark.parametrize("efConstruction", [8, 512]) - def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, _async): - """ - target: test search HNSW index with redundant param - method: connect milvus, create collection , insert, create index, load and search - expected: search successfully - """ - dim = M * 4 - auto_id = False - enable_dynamic_field = False - self._connect() - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - # nlist is of no use - HNSW_index_params = { - "M": M, "efConstruction": efConstruction, "nlist": 100} - HNSW_index = {"index_type": "HNSW", - "params": HNSW_index_params, "metric_type": "L2"} - collection_w.create_index("float_vector", HNSW_index) - collection_w.load() - search_param = {"metric_type": "L2", "params": { - "ef": 32768, "nprobe": 10}} # nprobe is of no use - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("M", [4, 64]) @pytest.mark.parametrize("efConstruction", [8, 512]) @@ -788,8 +570,7 @@ class TestCollectionSearch(TestcaseBase): self.init_collection_general(prefix, True, nb=nb, dim=dim, enable_dynamic_field=True)[0:4] # filter result with expression in collection - search_vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] + search_vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] _vectors = _vectors[0] for expressions in cf.gen_json_field_expressions_and_templates(): expr = expressions[0].replace("&&", "and").replace("||", "or") @@ -858,12 +639,16 @@ class TestCollectionSearch(TestcaseBase): ids = hits.ids assert set(ids).issubset(filter_ids_set) # 7. create json index - default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double", - "json_path": f"{ct.default_json_field_name}['number']"}} - collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_0") - default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double", - "json_path": f"{ct.default_json_field_name}['float']"}} - collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_1") + default_json_path_index = {"index_type": "INVERTED", + "params": {"json_cast_type": "double", + "json_path": f"{ct.default_json_field_name}['number']"}} + collection_w.create_index(ct.default_json_field_name, default_json_path_index, + index_name=f"{ct.default_json_field_name}_0") + default_json_path_index = {"index_type": "AUTOINDEX", + "params": {"json_cast_type": "double", + "json_path": f"{ct.default_json_field_name}['float']"}} + collection_w.create_index(ct.default_json_field_name, default_json_path_index, + index_name=f"{ct.default_json_field_name}_1") # 8. release and load to make sure the new index is loaded collection_w.release() collection_w.load() @@ -994,8 +779,7 @@ class TestCollectionSearch(TestcaseBase): collection_w.search(vectors, default_search_field, default_search_params, default_limit, expression, output_fields=[field], check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": 0})[0] + check_items={"nq": default_nq, "limit": 0}) # 4. search normal using all the scalar type as output fields collection_w.search(vectors, default_search_field, default_search_params, default_limit, output_fields=[field], diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py index 303db9b879..e769b747bb 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py @@ -161,6 +161,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base): index_type=self.binary_vector_index, params={"nlist": 128}) self.create_index(client, self.collection_name, index_params=index_params) + self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name) + self.wait_for_index_ready(client, self.collection_name, index_name=self.bfloat16_vector_field_name) # Load collection self.load_collection(client, self.collection_name) @@ -378,7 +380,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base): ) @pytest.mark.tags(CaseLabel.L2) - def test_search_with_output_fields(self): + @pytest.mark.parametrize("consistency_level", ["Strong", "Session", "Bounded", "Eventually"]) + def test_search_with_output_fields_and_consistency_level(self, consistency_level): """ target: test search with output fields method: 1. connect and create a collection @@ -400,6 +403,7 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base): anns_field=self.float_vector_field_name, search_params=search_params, limit=default_limit, + consistency_level=consistency_level, output_fields=[ct.default_string_field_name, self.dyna_filed_name1, self.dyna_filed_name2], check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, @@ -1220,3 +1224,250 @@ class TestSearchV2Independent(TestMilvusClientV2Base): "nq": ct.default_nq, "pk_name": "id", "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("index", ct.all_index_types[:6]) + def test_each_index_with_mmap_enabled_search(self, index): + """ + target: test each index with mmap enabled search + method: test each index with mmap enabled search + expected: search success + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + dim = 32 + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim) + self.create_collection(client, collection_name, schema=schema) + + # insert data + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, dim)[0] + }) + self.insert(client, collection_name, data) + self.flush(client, collection_name) + # create index + index_params = self.prepare_index_params(client)[0] + params = cf.get_index_params_params(index) + index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='L2') + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + + # alter mmap index + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'True' + # search + self.load_collection(client, collection_name) + search_params = {} + vector = cf.gen_vectors(ct.default_nq, dim) + self.search(client, collection_name, vector, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + # disable mmap + self.release_collection(client, collection_name) + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'False' + self.load_collection(client, collection_name) + self.search(client, collection_name, vector, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("index", ct.all_index_types[8:10]) + def test_enable_mmap_search_for_binary_indexes(self, index): + """ + Test enabling mmap for binary indexes in Milvus. + + This test verifies that: + 1. Binary vector indexes can be successfully created with mmap enabled + 2. Search operations work correctly with mmap enabled + 3. Mmap can be properly disabled and search still works + + The test performs following steps: + - Creates a collection with binary vectors + - Inserts test data + - Creates index with mmap enabled + - Verifies mmap status + - Performs search with mmap enabled + - Disables mmap and verifies search still works + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + dim = 64 + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.BINARY_VECTOR, dim=dim) + self.create_collection(client, collection_name, schema=schema) + + # insert data + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_binary_vectors(1, dim)[1][0] + }) + self.insert(client, collection_name, data) + self.flush(client, collection_name) + # create index + index_params = self.prepare_index_params(client)[0] + params = cf.get_index_params_params(index) + index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='JACCARD') + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + # alter mmap index + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'True' + # load collection + self.load_collection(client, collection_name) + # search + binary_vectors = cf.gen_binary_vectors(ct.default_nq, dim)[1] + params = cf.get_search_params_params(index) + search_params = {"metric_type": "JACCARD", "params": params} + output_fields = ["*"] + self.search(client, collection_name, binary_vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=output_fields, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + # disable mmap + self.release_collection(client, collection_name) + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'False' + self.load_collection(client, collection_name) + self.search(client, collection_name, binary_vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=output_fields, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("num_shards", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num]) + def test_search_with_non_default_shard_nums(self, num_shards): + """ + Test search functionality with non-default shard numbers. + + This test verifies that: + 1. Collections are created with default shard numbers when num_shards <= 0 + 2. Collections are created with specified shard numbers when num_shards > 0 + 3. Search operations work correctly with different shard configurations + + The test follows these steps: + 1. Creates a collection with specified shard numbers + 2. Inserts test data + 3. Builds an index + 4. Performs a search operation + 5. Validates the results + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # create collection + dim = 32 + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim) + # create collection + self.create_collection(client, collection_name, schema=schema, num_shards=num_shards) + collection_info = self.describe_collection(client, collection_name)[0] + expected_num_shards = ct.default_shards_num if num_shards <= 0 else num_shards + assert collection_info["num_shards"] == expected_num_shards + # insert + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, dim)[0] + }) + self.insert(client, collection_name, data) + # create index + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name='vector', index_type='HNSW', metric_type='COSINE') + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + # load + self.load_collection(client, collection_name) + # search + vectors = cf.gen_vectors(ct.default_nq, dim) + search_params = {} + self.search(client, collection_name, vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_HNSW_index_with_redundant_param(self): + """ + Test search functionality with HNSW index and redundant parameters. + + This test verifies that: + 1. HNSW index can be created with redundant parameters + 2. Search operations work correctly with redundant parameters + 3. Redundant parameters are ignored + + The test performs following steps: + 1. Creates a collection with float vectors + 2. Inserts test data + 3. Creates HNSW index with redundant parameters + 4. Performs a search operation + 5. Validates the results + """ + dim = 16 + index = "HNSW" + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim) + self.create_collection(client, collection_name, schema=schema) + + # insert + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, dim)[0] + }) + self.insert(client, collection_name, data) + self.flush(client, collection_name) + # create index + index_params = self.prepare_index_params(client)[0] + params = cf.get_index_params_params(index) + params["nlist"] = 100 # nlist is redundant parameter + index_params.add_index(field_name='vector', index_type=index, + metric_type='COSINE', params=params) + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["nlist"] == '100' + # load + self.load_collection(client, collection_name) + # search + vectors = cf.gen_vectors(ct.default_nq, dim) + search_params = {} + self.search(client, collection_name, vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + \ No newline at end of file diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py b/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py index b9ad8f53ee..d5de9d166f 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py @@ -7,6 +7,7 @@ from utils.util_log import test_log as log from utils.util_pymilvus import * from base.client_v2_base import TestMilvusClientV2Base from pymilvus import DataType, AnnSearchRequest, WeightedRanker +from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY class TestMilvusClientTTL(TestMilvusClientV2Base): @@ -38,7 +39,7 @@ class TestMilvusClientTTL(TestMilvusClientV2Base): """ client = self._client() dim = 65 - ttl = 10 + ttl = 11 nb = 1000 collection_name = cf.gen_collection_name_by_testcase_name() schema = self.create_schema(client, enable_dynamic_field=False)[0] @@ -98,18 +99,21 @@ class TestMilvusClientTTL(TestMilvusClientV2Base): while time.time() - start_time < timeout: if search_ttl_effective is False: res1 = self.search(client, collection_name, search_vectors, anns_field='embeddings', - search_params={}, limit=10, consistency_level='Strong')[0] + search_params={}, limit=10, consistency_level=CONSISTENCY_STRONG)[0] if query_ttl_effective is False: res2 = self.query(client, collection_name, filter='', - output_fields=["count(*)"], consistency_level='Strong')[0] + output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0] if hybrid_search_ttl_effective is False: res3 = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, - limit=10, consistency_level='Strong')[0] + limit=10, consistency_level=CONSISTENCY_STRONG)[0] if len(res1[0]) == 0 and search_ttl_effective is False: log.info(f"search ttl effects in {round(time.time() - start_time, 4)}s") search_ttl_effective = True if res2[0].get('count(*)', None) == 0 and query_ttl_effective is False: log.info(f"query ttl effects in {round(time.time() - start_time, 4)}s") + res2x = self.query(client, collection_name, filter='visible==False', + output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0] + log.debug(f"res2x: {res2x[0].get('count(*)', None)}") query_ttl_effective = True if len(res3[0]) == 0 and hybrid_search_ttl_effective is False: log.info(f"hybrid search ttl effects in {round(time.time() - start_time, 4)}s") @@ -152,44 +156,64 @@ class TestMilvusClientTTL(TestMilvusClientV2Base): log.info(f"flush completed in {time.time() - t1}s") # search data again after insert more data - res = self.search(client, collection_name, search_vectors, - search_params={}, anns_field='embeddings', - limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 - # query count(*) - res = self.query(client, collection_name, filter='visible==False', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) == 0 + consistency_levels = [CONSISTENCY_EVENTUALLY, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_STRONG] + for consistency_level in consistency_levels: + log.debug(f"start to search/query with {consistency_level}") + # try 3 times + for i in range(3): + res = self.search(client, collection_name, search_vectors, + search_params={}, anns_field='embeddings', + limit=10, consistency_level=consistency_level)[0] + if len(res[0]) > 0: + break + else: + time.sleep(1) + assert len(res[0]) > 0 - # hybrid search - res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, - limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 + if consistency_level != CONSISTENCY_STRONG: + pass + else: + # query count(*) + res = self.query(client, collection_name, filter='', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', None) == nb * insert_times + res = self.query(client, collection_name, filter='visible==False', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', None) == 0 + # query count(visible) + res = self.query(client, collection_name, filter='visible==True', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', None) == nb * insert_times - # query count(visible) - res = self.query(client, collection_name, filter='visible==True', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) > 0 + # hybrid search + res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, + limit=10, consistency_level=consistency_level)[0] + assert len(res[0]) > 0 - # alter ttl to 1000s - self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 1000}) - # search data after alter ttl - res = self.search(client, collection_name, search_vectors, - search_params={}, anns_field='embeddings', - filter='visible==False', limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 + # alter ttl to 2000s + self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 2000}) + for consistency_level in consistency_levels: + log.debug(f"start to search/query after alter ttl with {consistency_level}") + # search data after alter ttl + res = self.search(client, collection_name, search_vectors, + search_params={}, anns_field='embeddings', + filter='visible==False', limit=10, consistency_level=consistency_level)[0] + assert len(res[0]) > 0 - # hybrid search data after alter ttl - sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False') - sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False') - res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, - limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 + # hybrid search data after alter ttl + sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False') + sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False') + res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, + limit=10, consistency_level=consistency_level)[0] + assert len(res[0]) > 0 - # query count(*) - res = self.query(client, collection_name, filter='visible==False', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) == insert_times * nb - res = self.query(client, collection_name, filter='', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) == insert_times * nb * 2 + # query count(*) + res = self.query(client, collection_name, filter='visible==False', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', 0) == insert_times * nb + res = self.query(client, collection_name, filter='', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + if consistency_level != CONSISTENCY_STRONG: + assert res[0].get('count(*)', 0) >= insert_times * nb + else: + assert res[0].get('count(*)', 0) == insert_times * nb * 2 diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 01905383ae..dc0f952fc4 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -28,8 +28,8 @@ pytest-parallel pytest-random-order # pymilvus -pymilvus==2.6.0rc151 -pymilvus[bulk_writer]==2.6.0rc151 +pymilvus==2.6.0rc155 +pymilvus[bulk_writer]==2.6.0rc155 # for protobuf protobuf==5.27.2 diff --git a/tests/python_client/testcases/indexes/idx_ivf_rabitq.py b/tests/python_client/testcases/indexes/idx_ivf_rabitq.py index 4ad90f940d..382221626b 100644 --- a/tests/python_client/testcases/indexes/idx_ivf_rabitq.py +++ b/tests/python_client/testcases/indexes/idx_ivf_rabitq.py @@ -68,12 +68,12 @@ class IVF_RABITQ: # refine params test { "description": "Enable Refine Test", - "params": {"refine": 'true'}, # to be fixed: #41760 + "params": {"refine": 'true'}, "expected": success }, { "description": "Disable Refine Test", - "params": {"refine": 'false'}, # to be fixed: #41760 + "params": {"refine": 'false'}, "expected": success }, @@ -194,7 +194,13 @@ class IVF_RABITQ: { "description": "Exceed nlist Test", "params": {"nprobe": 129}, # Assuming nlist=128 - "expected": success # to be fixed: #41765 + "expected": success + }, + { + "description": "Exceed nprobe Test", + "params": {"nprobe": 65537}, + "expected": {"err_code": 999, + "err_msg": "should be in range [1, 65536]"} }, { "description": "Negative Value Test", diff --git a/tests/python_client/testcases/indexes/test_ivf_rabitq.py b/tests/python_client/testcases/indexes/test_ivf_rabitq.py index 04116e52d6..87bda46aa4 100644 --- a/tests/python_client/testcases/indexes/test_ivf_rabitq.py +++ b/tests/python_client/testcases/indexes/test_ivf_rabitq.py @@ -82,7 +82,7 @@ class TestIvfRabitqBuildParams(TestMilvusClientV2Base): for key, value in build_params.items(): if value is not None: assert key in idx_info.keys() - # assert value in idx_info.values() # TODO: uncommented after #41783 fixed + assert str(value) in idx_info.values() # TODO: uncommented after #41783 fixed @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("vector_data_type", ct.all_vector_types) diff --git a/tests/python_client/testcases/test_partition_key.py b/tests/python_client/testcases/test_partition_key.py index 41b561fb1e..55fc50d3f5 100644 --- a/tests/python_client/testcases/test_partition_key.py +++ b/tests/python_client/testcases/test_partition_key.py @@ -72,6 +72,23 @@ class TestPartitionKeyParams(TestcaseBase): for i in range(nq): assert res1[i].ids == res2[i].ids == res3[i].ids + # search with 'or' to verify no partition key optimization local with or binary expr + query_res1 = collection_w.query( + expr=f'{string_field.name} == "{string_prefix}5" || {int64_field.name} in [2,4,6]', + output_fields=['count(*)'])[0] + query_res2 = collection_w.query( + expr=f'{string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"] || {int64_field.name}==5', + output_fields=['count(*)'])[0] + query_res3 = collection_w.query( + expr=f'{int64_field.name}==5 or {string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"]', + output_fields=['count(*)'])[0] + query_res4 = collection_w.query( + expr=f'{int64_field.name} in [2,4,6] || {string_field.name} == "{string_prefix}5"', + output_fields=['count(*)'])[0] + # assert the results persist + assert query_res1[0].get('count(*)') == query_res2[0].get('count(*)') \ + == query_res3[0].get('count(*)') == query_res4[0].get('count(*)') == 40 + @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name]) @pytest.mark.parametrize("index_on_par_key_field", [True, False])