diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index a0be09fa4e..404ab8fed9 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -566,31 +566,46 @@ class ResponseChecker: raise Exception("No expect values found in the check task") exp_res = check_items.get("exp_res", None) with_vec = check_items.get("with_vec", False) - pk_name = check_items.get("pk_name", ct.default_primary_field_name) - vector_type = check_items.get("vector_type", "FLOAT_VECTOR") - if vector_type == DataType.FLOAT16_VECTOR: - for single_exp_res in exp_res: - single_exp_res['vector'] = single_exp_res['vector'] .tolist() - for single_query_result in query_res: - single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.float16).tolist() - if vector_type == DataType.BFLOAT16_VECTOR: - for single_exp_res in exp_res: - single_exp_res['vector'] = single_exp_res['vector'] .tolist() - for single_query_result in query_res: - single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=bfloat16).tolist() - if vector_type == DataType.INT8_VECTOR: - for single_exp_res in exp_res: - single_exp_res['vector'] = single_exp_res['vector'] .tolist() - for single_query_result in query_res: - single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.int8).tolist() + exp_limit = check_items.get("exp_limit", None) + count = check_items.get("count(*)", None) + if count is not None: + assert count == query_res[0].get("count(*)", None) + return True + if exp_limit is None and exp_res is None: + raise Exception(f"No expected values would be checked in the check task") + if exp_limit is not None: + assert len(query_res) == exp_limit + # pk_name = check_items.get("pk_name", ct.default_primary_field_name) + # if with_vec: if exp_res is not None: + if with_vec is True: + vector_type = check_items.get('vector_type', 'FLOAT_VECTOR') + vector_field = check_items.get('vector_field', 'vector') + if vector_type == DataType.FLOAT16_VECTOR: + # for single_exp_res in exp_res: + # single_exp_res[vector_field] = single_exp_res[vector_field].tolist() + for single_query_result in query_res: + single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.float16).tolist() + if vector_type == DataType.BFLOAT16_VECTOR: + # for single_exp_res in exp_res: + # single_exp_res[vector_field] = single_exp_res[vector_field].tolist() + for single_query_result in query_res: + single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=bfloat16).tolist() + if vector_type == DataType.INT8_VECTOR: + # for single_exp_res in exp_res: + # if single_exp_res[vector_field].__class__ is not list: + # single_exp_res[vector_field] = single_exp_res[vector_field].tolist() + for single_query_result in query_res: + single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.int8).tolist() if isinstance(query_res, list): - assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, - with_vec=with_vec) + # assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, with_vec=with_vec) + # return True + assert pc.compare_lists_ignore_order(a=query_res, b=exp_res) return True else: log.error(f"Query result {query_res} is not list") return False + log.warning(f'Expected query result is {exp_res}') @staticmethod @@ -810,4 +825,4 @@ class ResponseChecker: if check_items.get("index_name", None) is not None: assert res["index_name"] == check_items.get("index_name") - return True \ No newline at end of file + return True diff --git a/tests/python_client/check/param_check.py b/tests/python_client/check/param_check.py index b4c975df4c..28cf436e67 100644 --- a/tests/python_client/check/param_check.py +++ b/tests/python_client/check/param_check.py @@ -5,6 +5,111 @@ from common import common_type as ct sys.path.append("..") from utils.util_log import test_log as log +import numpy as np +from collections.abc import Iterable + +epsilon = ct.epsilon + +def deep_approx_compare(x, y, epsilon=epsilon): + """ + Recursively compares two objects for approximate equality, handling floating-point precision. + + Args: + x: First object to compare + y: Second object to compare + epsilon: Tolerance for floating-point comparisons (default: 1e-6) + + Returns: + bool: True if objects are approximately equal, False otherwise + + Handles: + - Numeric types (int, float, numpy scalars) + - Sequences (list, tuple, numpy arrays) + - Dictionaries + - Other iterables (except strings) + - Numpy arrays (shape and value comparison) + - Falls back to strict equality for other types + """ + # Handle basic numeric types (including numpy scalars) + if isinstance(x, (int, float, np.integer, np.floating)) and isinstance(y, (int, float, np.integer, np.floating)): + return abs(float(x) - float(y)) < epsilon + + # Handle lists/tuples/arrays + if isinstance(x, (list, tuple, np.ndarray)) and isinstance(y, (list, tuple, np.ndarray)): + if len(x) != len(y): + return False + for a, b in zip(x, y): + if not deep_approx_compare(a, b, epsilon): + return False + return True + + # Handle dictionaries + if isinstance(x, dict) and isinstance(y, dict): + if set(x.keys()) != set(y.keys()): + return False + for key in x: + if not deep_approx_compare(x[key], y[key], epsilon): + return False + return True + + # Handle other iterables (e.g., Protobuf containers) + if isinstance(x, Iterable) and isinstance(y, Iterable) and not isinstance(x, str): + try: + return deep_approx_compare(list(x), list(y), epsilon) + except: + pass + + # Handle numpy arrays + if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): + if x.shape != y.shape: + return False + return np.allclose(x, y, atol=epsilon) + + # Fall back to strict equality for other types + return x == y + + +def compare_lists_ignore_order(a, b, epsilon=epsilon): + """ + Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance. + + Args: + a (list): First list of dictionaries to compare + b (list): Second list of dictionaries to compare + epsilon (float, optional): Tolerance for floating-point comparisons. Defaults to 1e-6. + + Returns: + bool: True if lists contain equivalent dictionaries (order doesn't matter), False otherwise + + Note: + Uses deep_approx_compare() for dictionary comparison with floating-point tolerance. + Maintains O(n²) complexity due to nested comparisons. + """ + if len(a) != len(b): + return False + + # Create a set of available indices for b + available_indices = set(range(len(b))) + + for item_a in a: + matched = False + # Create a list of indices to remove (avoid modifying the set during iteration) + to_remove = [] + + for idx in available_indices: + if deep_approx_compare(item_a, b[idx], epsilon): + to_remove.append(idx) + matched = True + break + + if not matched: + return False + + # Remove matched indices + available_indices -= set(to_remove) + + return True + def ip_check(ip): if ip == "localhost": diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 239783e717..8bd27b5823 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -1127,33 +1127,6 @@ def gen_schema_multi_string_fields(string_fields): primary_field=primary_field, auto_id=False) return schema - -def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR): - vectors = [] - if vector_data_type == DataType.FLOAT_VECTOR: - vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] - elif vector_data_type == DataType.FLOAT16_VECTOR: - vectors = gen_fp16_vectors(nb, dim)[1] - elif vector_data_type == DataType.BFLOAT16_VECTOR: - vectors = gen_bf16_vectors(nb, dim)[1] - elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR: - vectors = gen_sparse_vectors(nb, dim) - elif vector_data_type == ct.text_sparse_vector: - vectors = gen_text_vectors(nb) # for Full Text Search - elif vector_data_type == DataType.INT8_VECTOR: - vectors = gen_int8_vectors(nb, dim)[1] - elif vector_data_type == DataType.BINARY_VECTOR: - vectors = gen_binary_vectors(nb, dim)[1] - else: - log.error(f"Invalid vector data type: {vector_data_type}") - raise Exception(f"Invalid vector data type: {vector_data_type}") - if dim > 1: - if vector_data_type == DataType.FLOAT_VECTOR: - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - vectors = vectors.tolist() - return vectors - - def gen_string(nb): string_values = [str(random.random()) for _ in range(nb)] return string_values @@ -3613,7 +3586,7 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0): def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR): vectors = [] if vector_data_type == DataType.FLOAT_VECTOR: - vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] + vectors = [[random.uniform(-1, 1) for _ in range(dim)] for _ in range(nb)] elif vector_data_type == DataType.FLOAT16_VECTOR: vectors = gen_fp16_vectors(nb, dim)[1] elif vector_data_type == DataType.BFLOAT16_VECTOR: diff --git a/tests/python_client/milvus_client/test_milvus_client_data_integrity.py b/tests/python_client/milvus_client/test_milvus_client_data_integrity.py index ede1038167..1a6ad951db 100644 --- a/tests/python_client/milvus_client/test_milvus_client_data_integrity.py +++ b/tests/python_client/milvus_client/test_milvus_client_data_integrity.py @@ -76,12 +76,12 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base): @pytest.mark.parametrize("is_release", [True]) @pytest.mark.parametrize("single_data_num", [50]) @pytest.mark.parametrize("expr_field", [ct.default_int64_field_name, - # ct.default_string_field_name, # TODO: uncommented after #42604 fixed + ct.default_string_field_name, ct.default_float_array_field_name]) def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self, enable_dynamic_field, supported_numeric_scalar_index, - # supported_varchar_scalar_index, + supported_varchar_scalar_index, supported_json_path_index, supported_array_double_float_scalar_index, is_flush, diff --git a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py index 850e519b8e..1044729672 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py +++ b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py @@ -54,7 +54,7 @@ def external_filter_with_outputs(hits): results = [] for hit in hits: # equals filter nothing if there are output_fields - if hit.distance < 1.0 and len(hit.fields) > 0: + if hit.distance <= 4.0 and len(hit.fields) > 0: results.append(hit) return results diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py b/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py index 25a48ff689..8ed005fd95 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_e2e.py @@ -1,3 +1,6 @@ +import random + +import pandas import pytest import numpy as np import time @@ -10,7 +13,6 @@ from base.client_v2_base import TestMilvusClientV2Base from pymilvus import DataType, FieldSchema, CollectionSchema # Test parameters -default_dim = ct.default_dim default_nb = ct.default_nb default_nq = ct.default_nq default_limit = ct.default_limit @@ -28,7 +30,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("flush_enable", [True, False]) @pytest.mark.parametrize("scalar_index_enable", [True, False]) - def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable): + @pytest.mark.parametrize("vector_type", [DataType.FLOAT_VECTOR]) + def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable, vector_type): """ target: test high level api: client.create_collection, insert, search, query method: create connection, collection, insert and search with: @@ -37,13 +40,14 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - + dim = 8 + # 1. Create collection with custom schema collection_name = cf.gen_collection_name_by_testcase_name() schema = self.create_schema(client, enable_dynamic_field=False)[0] # Primary key and vector field schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False) - schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field("vector", vector_type, dim=dim) # Boolean type schema.add_field("bool_field", DataType.BOOL, nullable=True) # Integer types @@ -59,7 +63,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # JSON type schema.add_field("json_field", DataType.JSON, nullable=True) # Array type - schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, nullable=True) + schema.add_field("array_field", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=12, nullable=True) # Create collection self.create_collection(client, collection_name, schema=schema) @@ -68,14 +72,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): num_inserts = 5 # insert data for 5 times total_rows = [] for batch in range(num_inserts): - vectors = cf.gen_vectors(default_nb, default_dim) + vectors = list(cf.gen_vectors(default_nb, dim, vector_data_type=vector_type)) \ + if vector_type == DataType.FLOAT_VECTOR \ + else cf.gen_vectors(default_nb, dim, vector_data_type=vector_type) rows = [] start_id = batch * default_nb # ensure id is not duplicated for i in range(default_nb): row = { "id": start_id + i, # ensure id is not duplicated - "embeddings": list(vectors[i]) + "vector": vectors[i] } # Add nullable fields with null values for every 5th record @@ -99,11 +105,11 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): "int16_field": i % 32768, "int32_field": i, "int64_field": i, - "float_field": float(i), - "double_field": float(i) * 1.0, + "float_field": random.random(), + "double_field": random.random(), "varchar_field": f"varchar_{start_id + i}", "json_field": {"id": start_id + i, "value": f"json_{start_id + i}"}, - "array_field": [i, i + 1, i + 2] + "array_field": [random.random() for _ in range(5)] }) rows.append(row) total_rows.append(row) @@ -124,7 +130,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # Create index parameters index_params = self.prepare_index_params(client)[0] - index_params.add_index("embeddings", metric_type="COSINE") + index_params.add_index("vector", metric_type="COSINE") # Add autoindex for scalar fields if enabled if scalar_index_enable: @@ -160,13 +166,13 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # 4. Search t0 = time.time() - vectors_to_search = cf.gen_vectors(1, default_dim) + vectors_to_search = cf.gen_vectors(1, dim, vector_data_type=vector_type) search_params = {"metric_type": "COSINE", "params": {"nprobe": 100}} search_res, _ = self.search( client, collection_name, vectors_to_search, - anns_field="embeddings", + anns_field="vector", search_params=search_params, limit=default_limit, output_fields=['*'], @@ -182,26 +188,34 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): # 5. Query with filters on each scalar field t0 = time.time() - # Query on boolean field + output_fields = ['id', 'int8_field', 'json_field'] bool_filter = "bool_field == true" - bool_expected = [r for r in total_rows if r["bool_field"] is not None and r["bool_field"]] + bool_expected = [ + { + 'id': r['id'], + 'int8_field': r['int8_field'], + 'json_field': r['json_field'] + } + for r in total_rows if r["bool_field"] is not None and r["bool_field"]] query_res, _ = self.query( client, collection_name, filter=bool_filter, - output_fields=['*'], + output_fields=output_fields, check_task=CheckTasks.check_query_results, check_items={ "exp_res": bool_expected, - "with_vec": True, + "with_vec": False, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int8 field - int8_filter = "int8_field < 50" - int8_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50] + with_vec = True + int8_filter = "int8_field is null || int8_field < 10" + int8_expected = [r for r in total_rows if r["int8_field"] is None or r["int8_field"] < 10] query_res, _ = self.query( client, collection_name, @@ -210,14 +224,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int8_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int16 field - int16_filter = "int16_field < 1000" - int16_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 1000] + int16_filter = "100 <= int16_field < 200" + int16_expected = [r for r in total_rows if r["int16_field"] is not None and 100 <= r["int16_field"] < 200] query_res, _ = self.query( client, collection_name, @@ -226,14 +241,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int16_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int32 field - int32_filter = "int32_field in [1,2,3,4,5]" - int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,3,4,5]] + int32_filter = "int32_field in [1,2,5,6]" + int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,5,6]] query_res, _ = self.query( client, collection_name, @@ -242,14 +258,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int32_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on int64 field - int64_filter = "int64_field >= 10" - int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 10] + int64_filter = "int64_field >= 4678 and int64_field < 5050" + int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 4678 and r["int64_field"] < 5050] query_res, _ = self.query( client, collection_name, @@ -258,14 +275,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int64_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on float field - float_filter = "float_field > 5.0" - float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 5.0] + float_filter = "float_field > 0.5 and float_field <= 0.7" + float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7] query_res, _ = self.query( client, collection_name, @@ -274,14 +292,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": float_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on double field - double_filter = "3.0 <=double_field <= 7.0" - double_expected = [r for r in total_rows if r["double_field"] is not None and 3.0 <= r["double_field"] <= 7.0] + double_filter = "0.5 <=double_field <= 0.7" + double_expected = [r for r in total_rows if r["double_field"] is not None and 0.5 <= r["double_field"] <= 0.7] query_res, _ = self.query( client, collection_name, @@ -290,7 +309,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": double_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -306,7 +326,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": varchar_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -322,7 +343,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": varchar_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -338,7 +360,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": json_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -354,7 +377,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": array_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -370,7 +394,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": multi_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -386,15 +411,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": mix_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Query on is not null conditions for each scalar field # Int8 field is not null - int8_not_null_filter = "int8_field is not null" - int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None] + int8_not_null_filter = "int8_field is not null and int8_field > 100" + int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] > 100] query_res, _ = self.query( client, collection_name, @@ -403,14 +429,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int8_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Int16 field is not null - int16_not_null_filter = "int16_field is not null" - int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None] + int16_not_null_filter = "int16_field is not null and int16_field < 100" + int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 100] query_res, _ = self.query( client, collection_name, @@ -419,14 +446,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": int16_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Float field is not null - float_not_null_filter = "float_field is not null" - float_not_null_expected = [r for r in total_rows if r["float_field"] is not None] + float_not_null_filter = "float_field is not null and float_field > 0.5 and float_field <= 0.7" + float_not_null_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7] query_res, _ = self.query( client, collection_name, @@ -435,14 +463,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": float_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Double field is not null - double_not_null_filter = "double_field is not null" - double_not_null_expected = [r for r in total_rows if r["double_field"] is not None] + double_not_null_filter = "double_field is not null and double_field <= 0.2" + double_not_null_expected = [r for r in total_rows if r["double_field"] is not None and r["double_field"] <= 0.2] query_res, _ = self.query( client, collection_name, @@ -451,7 +480,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": double_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -467,14 +497,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": varchar_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # JSON field is not null - json_not_null_filter = "json_field is not null" - json_not_null_expected = [r for r in total_rows if r["json_field"] is not None] + json_not_null_filter = "json_field is not null and json_field['id'] < 100" + json_not_null_expected = [r for r in total_rows if r["json_field"] is not None and r["json_field"]["id"] < 100] query_res, _ = self.query( client, collection_name, @@ -483,14 +514,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": json_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Array field is not null - array_not_null_filter = "array_field is not null" - array_not_null_expected = [r for r in total_rows if r["array_field"] is not None] + array_not_null_filter = "array_field is not null and array_field[0] < 100" + array_not_null_expected = [r for r in total_rows if r["array_field"] is not None and r["array_field"][0] < 100] query_res, _ = self.query( client, collection_name, @@ -499,7 +531,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": array_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -516,16 +549,17 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": multi_not_null_expected, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Complex mixed conditions with is null, is not null, and comparison operators # Test case 1: int field is null AND float field > value AND varchar field is not null - complex_mix_filter1 = "int32_field is null and float_field > 10.0 and varchar_field is not null" + complex_mix_filter1 = "int32_field is null and float_field > 0.7 and varchar_field is not null" complex_mix_expected1 = [r for r in total_rows if r["int32_field"] is None and - r["float_field"] is not None and r["float_field"] > 10.0 and + r["float_field"] is not None and r["float_field"] > 0.7 and r["varchar_field"] is not None] query_res, _ = self.query( client, @@ -535,7 +569,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": complex_mix_expected1, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) @@ -553,15 +588,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": complex_mix_expected2, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) # Test case 3: Multiple fields with mixed null/not null conditions and range comparisons - complex_mix_filter3 = ("int8_field is not null and int8_field < 50 and double_field is null and " + complex_mix_filter3 = ("int8_field is not null and int8_field < 15 and double_field is null and " "varchar_field is not null and varchar_field like \"varchar_2%\"") - complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50 and + complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 15 and r["double_field"] is None and r["varchar_field"] is not None and r["varchar_field"].startswith("varchar_2")] query_res, _ = self.query( @@ -572,7 +608,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base): check_task=CheckTasks.check_query_results, check_items={ "exp_res": complex_mix_expected3, - "with_vec": True, + "with_vec": with_vec, + "vector_type": vector_type, "pk_name": "id" } ) diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py index 30160e3b16..f7dd4a1c3b 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_range_search.py @@ -1548,55 +1548,6 @@ class TestCollectionRangeSearch(TestcaseBase): **kwargs ) - @pytest.mark.tags(CaseLabel.L1) - def test_range_search_with_consistency_session(self, nq, dim, auto_id, _async): - """ - target: test range search with different consistency level - method: 1. create a collection - 2. insert data - 3. range search with consistency_level is "session" - expected: searched successfully - """ - limit = 1000 - nb_old = 500 - collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim)[0:4] - # 2. search for original data after load - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - range_search_params = {"metric_type": "COSINE", "params": {"nprobe": 10, "radius": 0, - "range_filter": 1000}} - collection_w.search(vectors[:nq], default_search_field, - range_search_params, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old, - "_async": _async, - "pk_name": ct.default_int64_field_name}) - - kwargs = {} - consistency_level = kwargs.get( - "consistency_level", CONSISTENCY_SESSION) - kwargs.update({"consistency_level": consistency_level}) - - nb_new = 400 - _, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new, - auto_id=auto_id, dim=dim, - insert_offset=nb_old) - insert_ids.extend(insert_ids_new) - collection_w.search(vectors[:nq], default_search_field, - range_search_params, limit, - default_search_exp, _async=_async, - **kwargs, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old + nb_new, - "_async": _async, - "pk_name": ct.default_int64_field_name}) - @pytest.mark.tags(CaseLabel.L2) def test_range_search_sparse(self): """ diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py index 2969d5dfbb..5d54cd209f 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2.py @@ -80,101 +80,6 @@ half_nb = ct.default_nb // 2 max_hybrid_search_req_num = ct.max_hybrid_search_req_num -class TestSearchBase(TestcaseBase): - @pytest.fixture( - scope="function", - params=[1, 10] - ) - def get_top_k(self, request): - yield request.param - - @pytest.fixture( - scope="function", - params=[1, 10, 1100] - ) - def get_nq(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[32, 128]) - def dim(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[False, True]) - def auto_id(self, request): - yield request.param - - @pytest.fixture(scope="function", params=[False, True]) - def _async(self, request): - yield request.param - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[:6]) - def test_each_index_with_mmap_enabled_search(self, index): - """ - target: test each index with mmap enabled search - method: test each index with mmap enabled search - expected: search success - """ - self._connect() - nb = 2000 - dim = 32 - collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False)[0] - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": "L2"} - collection_w.create_index(field_name, default_index, index_name="mmap_index") - # mmap index - collection_w.alter_index("mmap_index", {'mmap.enabled': True}) - # search - collection_w.load() - search_params = cf.gen_search_param(index)[0] - vector = [[random.random() for _ in range(dim)] for _ in range(default_nq)] - collection_w.search(vector, default_search_field, search_params, ct.default_limit, - output_fields=["*"], - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": ct.default_limit}) - # enable mmap - collection_w.release() - collection_w.alter_index("mmap_index", {'mmap.enabled': False}) - collection_w.load() - collection_w.search(vector, default_search_field, search_params, ct.default_limit, - output_fields=["*"], - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": ct.default_limit}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index", ct.all_index_types[8:10]) - def test_enable_mmap_search_for_binary_indexes(self, index): - """ - target: enable mmap for binary indexes - method: enable mmap for binary indexes - expected: search success - """ - self._connect() - dim = 64 - nb = 2000 - collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, is_binary=True)[0] - params = cf.get_index_params_params(index) - default_index = {"index_type": index, - "params": params, "metric_type": "JACCARD"} - collection_w.create_index(ct.default_binary_vec_field_name, default_index, index_name="binary_idx_name") - collection_w.alter_index("binary_idx_name", {'mmap.enabled': True}) - collection_w.set_properties({'mmap.enabled': True}) - collection_w.load() - pro = collection_w.describe()[0].get("properties") - assert pro["mmap.enabled"] == 'True' - assert collection_w.index()[0].params["mmap.enabled"] == 'True' - # search - binary_vectors = cf.gen_binary_vectors(default_nq, dim)[1] - search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} - output_fields = ["*"] - collection_w.search(binary_vectors, ct.default_binary_vec_field_name, search_params, - default_limit, default_search_string_exp, output_fields=output_fields, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit}) - class TestCollectionSearch(TestcaseBase): """ Test case of search interface """ @@ -237,129 +142,6 @@ class TestCollectionSearch(TestcaseBase): ****************************************************************** """ - @pytest.mark.skip("enable this later using session/strong consistency") - @pytest.mark.tags(CaseLabel.L1) - def test_search_new_data(self, nq, _async): - """ - target: test search new inserted data without load - method: 1. search the collection - 2. insert new data - 3. search the collection without load again - 4. Use guarantee_timestamp to guarantee data consistency - expected: new data should be searched - """ - # 1. initialize with data - dim = 128 - auto_id = False - limit = 1000 - nb_old = 500 - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb_old, - auto_id=auto_id, - dim=dim)[0:5] - # 2. search for original data after load - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - log.info("test_search_new_data: searching for original data after load") - collection_w.search(vectors[:nq], default_search_field, - default_search_params, limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old, - "_async": _async}) - # 3. insert new data - nb_new = 300 - _, _, _, insert_ids_new, time_stamp = cf.insert_data(collection_w, nb_new, - auto_id=auto_id, dim=dim, - insert_offset=nb_old) - insert_ids.extend(insert_ids_new) - # 4. search for new data without load - # Using bounded staleness, maybe we could not search the "inserted" entities, - # since the search requests arrived query nodes earlier than query nodes consume the insert requests. - collection_w.search(vectors[:nq], default_search_field, - default_search_params, limit, - default_search_exp, _async=_async, - guarantee_timestamp=time_stamp, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": nb_old + nb_new, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("shards_num", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num]) - def test_search_with_non_default_shard_nums(self, shards_num, _async): - """ - target: test search with non_default shards_num - method: connect milvus, create collection with several shard numbers , insert, load and search - expected: search successfully with the non_default shards_num - """ - auto_id = False - self._connect() - # 1. create collection - name = cf.gen_unique_str(prefix) - collection_w = self.init_collection_wrap( - name=name, shards_num=shards_num) - # 2. rename collection - new_collection_name = cf.gen_unique_str(prefix + "new") - self.utility_wrap.rename_collection( - collection_w.name, new_collection_name) - collection_w = self.init_collection_wrap( - name=new_collection_name, shards_num=shards_num) - # 3. insert - dataframe = cf.gen_default_dataframe_data() - collection_w.insert(dataframe) - # 4. create index and load - collection_w.create_index( - ct.default_float_vec_field_name, index_params=ct.default_flat_index) - collection_w.load() - # 5. search - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] - collection_w.search(vectors[:default_nq], default_search_field, - default_search_params, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": default_limit, - "_async": _async}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("M", [4, 64]) - @pytest.mark.parametrize("efConstruction", [8, 512]) - def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, _async): - """ - target: test search HNSW index with redundant param - method: connect milvus, create collection , insert, create index, load and search - expected: search successfully - """ - dim = M * 4 - auto_id = False - enable_dynamic_field = False - self._connect() - collection_w, _, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id, - dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - # nlist is of no use - HNSW_index_params = { - "M": M, "efConstruction": efConstruction, "nlist": 100} - HNSW_index = {"index_type": "HNSW", - "params": HNSW_index_params, "metric_type": "L2"} - collection_w.create_index("float_vector", HNSW_index) - collection_w.load() - search_param = {"metric_type": "L2", "params": { - "ef": 32768, "nprobe": 10}} # nprobe is of no use - vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) - @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("M", [4, 64]) @pytest.mark.parametrize("efConstruction", [8, 512]) @@ -788,8 +570,7 @@ class TestCollectionSearch(TestcaseBase): self.init_collection_general(prefix, True, nb=nb, dim=dim, enable_dynamic_field=True)[0:4] # filter result with expression in collection - search_vectors = [[random.random() for _ in range(dim)] - for _ in range(default_nq)] + search_vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] _vectors = _vectors[0] for expressions in cf.gen_json_field_expressions_and_templates(): expr = expressions[0].replace("&&", "and").replace("||", "or") @@ -858,12 +639,16 @@ class TestCollectionSearch(TestcaseBase): ids = hits.ids assert set(ids).issubset(filter_ids_set) # 7. create json index - default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double", - "json_path": f"{ct.default_json_field_name}['number']"}} - collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_0") - default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double", - "json_path": f"{ct.default_json_field_name}['float']"}} - collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_1") + default_json_path_index = {"index_type": "INVERTED", + "params": {"json_cast_type": "double", + "json_path": f"{ct.default_json_field_name}['number']"}} + collection_w.create_index(ct.default_json_field_name, default_json_path_index, + index_name=f"{ct.default_json_field_name}_0") + default_json_path_index = {"index_type": "AUTOINDEX", + "params": {"json_cast_type": "double", + "json_path": f"{ct.default_json_field_name}['float']"}} + collection_w.create_index(ct.default_json_field_name, default_json_path_index, + index_name=f"{ct.default_json_field_name}_1") # 8. release and load to make sure the new index is loaded collection_w.release() collection_w.load() @@ -994,8 +779,7 @@ class TestCollectionSearch(TestcaseBase): collection_w.search(vectors, default_search_field, default_search_params, default_limit, expression, output_fields=[field], check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "limit": 0})[0] + check_items={"nq": default_nq, "limit": 0}) # 4. search normal using all the scalar type as output fields collection_w.search(vectors, default_search_field, default_search_params, default_limit, output_fields=[field], diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py index 303db9b879..e769b747bb 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_search_v2_new.py @@ -161,6 +161,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base): index_type=self.binary_vector_index, params={"nlist": 128}) self.create_index(client, self.collection_name, index_params=index_params) + self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name) + self.wait_for_index_ready(client, self.collection_name, index_name=self.bfloat16_vector_field_name) # Load collection self.load_collection(client, self.collection_name) @@ -378,7 +380,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base): ) @pytest.mark.tags(CaseLabel.L2) - def test_search_with_output_fields(self): + @pytest.mark.parametrize("consistency_level", ["Strong", "Session", "Bounded", "Eventually"]) + def test_search_with_output_fields_and_consistency_level(self, consistency_level): """ target: test search with output fields method: 1. connect and create a collection @@ -400,6 +403,7 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base): anns_field=self.float_vector_field_name, search_params=search_params, limit=default_limit, + consistency_level=consistency_level, output_fields=[ct.default_string_field_name, self.dyna_filed_name1, self.dyna_filed_name2], check_task=CheckTasks.check_search_results, check_items={"enable_milvus_client_api": True, @@ -1220,3 +1224,250 @@ class TestSearchV2Independent(TestMilvusClientV2Base): "nq": ct.default_nq, "pk_name": "id", "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("index", ct.all_index_types[:6]) + def test_each_index_with_mmap_enabled_search(self, index): + """ + target: test each index with mmap enabled search + method: test each index with mmap enabled search + expected: search success + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + dim = 32 + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim) + self.create_collection(client, collection_name, schema=schema) + + # insert data + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, dim)[0] + }) + self.insert(client, collection_name, data) + self.flush(client, collection_name) + # create index + index_params = self.prepare_index_params(client)[0] + params = cf.get_index_params_params(index) + index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='L2') + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + + # alter mmap index + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'True' + # search + self.load_collection(client, collection_name) + search_params = {} + vector = cf.gen_vectors(ct.default_nq, dim) + self.search(client, collection_name, vector, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + # disable mmap + self.release_collection(client, collection_name) + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'False' + self.load_collection(client, collection_name) + self.search(client, collection_name, vector, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("index", ct.all_index_types[8:10]) + def test_enable_mmap_search_for_binary_indexes(self, index): + """ + Test enabling mmap for binary indexes in Milvus. + + This test verifies that: + 1. Binary vector indexes can be successfully created with mmap enabled + 2. Search operations work correctly with mmap enabled + 3. Mmap can be properly disabled and search still works + + The test performs following steps: + - Creates a collection with binary vectors + - Inserts test data + - Creates index with mmap enabled + - Verifies mmap status + - Performs search with mmap enabled + - Disables mmap and verifies search still works + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # fast create collection + dim = 64 + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.BINARY_VECTOR, dim=dim) + self.create_collection(client, collection_name, schema=schema) + + # insert data + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_binary_vectors(1, dim)[1][0] + }) + self.insert(client, collection_name, data) + self.flush(client, collection_name) + # create index + index_params = self.prepare_index_params(client)[0] + params = cf.get_index_params_params(index) + index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='JACCARD') + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + # alter mmap index + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'True' + # load collection + self.load_collection(client, collection_name) + # search + binary_vectors = cf.gen_binary_vectors(ct.default_nq, dim)[1] + params = cf.get_search_params_params(index) + search_params = {"metric_type": "JACCARD", "params": params} + output_fields = ["*"] + self.search(client, collection_name, binary_vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=output_fields, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + # disable mmap + self.release_collection(client, collection_name) + self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False}) + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["mmap.enabled"] == 'False' + self.load_collection(client, collection_name) + self.search(client, collection_name, binary_vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + output_fields=output_fields, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("num_shards", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num]) + def test_search_with_non_default_shard_nums(self, num_shards): + """ + Test search functionality with non-default shard numbers. + + This test verifies that: + 1. Collections are created with default shard numbers when num_shards <= 0 + 2. Collections are created with specified shard numbers when num_shards > 0 + 3. Search operations work correctly with different shard configurations + + The test follows these steps: + 1. Creates a collection with specified shard numbers + 2. Inserts test data + 3. Builds an index + 4. Performs a search operation + 5. Validates the results + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # create collection + dim = 32 + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim) + # create collection + self.create_collection(client, collection_name, schema=schema, num_shards=num_shards) + collection_info = self.describe_collection(client, collection_name)[0] + expected_num_shards = ct.default_shards_num if num_shards <= 0 else num_shards + assert collection_info["num_shards"] == expected_num_shards + # insert + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, dim)[0] + }) + self.insert(client, collection_name, data) + # create index + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name='vector', index_type='HNSW', metric_type='COSINE') + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + # load + self.load_collection(client, collection_name) + # search + vectors = cf.gen_vectors(ct.default_nq, dim) + search_params = {} + self.search(client, collection_name, vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_HNSW_index_with_redundant_param(self): + """ + Test search functionality with HNSW index and redundant parameters. + + This test verifies that: + 1. HNSW index can be created with redundant parameters + 2. Search operations work correctly with redundant parameters + 3. Redundant parameters are ignored + + The test performs following steps: + 1. Creates a collection with float vectors + 2. Inserts test data + 3. Creates HNSW index with redundant parameters + 4. Performs a search operation + 5. Validates the results + """ + dim = 16 + index = "HNSW" + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + schema = self.create_schema(client)[0] + schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False) + schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim) + self.create_collection(client, collection_name, schema=schema) + + # insert + data = [] + for i in range(ct.default_nb): + data.append({ + "id": i, + "vector": cf.gen_vectors(1, dim)[0] + }) + self.insert(client, collection_name, data) + self.flush(client, collection_name) + # create index + index_params = self.prepare_index_params(client)[0] + params = cf.get_index_params_params(index) + params["nlist"] = 100 # nlist is redundant parameter + index_params.add_index(field_name='vector', index_type=index, + metric_type='COSINE', params=params) + self.create_index(client, collection_name, index_params=index_params) + self.wait_for_index_ready(client, collection_name, index_name='vector') + index_info = self.describe_index(client, collection_name, index_name='vector') + assert index_info[0]["nlist"] == '100' + # load + self.load_collection(client, collection_name) + # search + vectors = cf.gen_vectors(ct.default_nq, dim) + search_params = {} + self.search(client, collection_name, vectors, anns_field="vector", + search_params=search_params, limit=ct.default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": ct.default_nq, + "limit": ct.default_limit}) + \ No newline at end of file diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py b/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py index b9ad8f53ee..d5de9d166f 100644 --- a/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py +++ b/tests/python_client/milvus_client_v2/test_milvus_client_ttl.py @@ -7,6 +7,7 @@ from utils.util_log import test_log as log from utils.util_pymilvus import * from base.client_v2_base import TestMilvusClientV2Base from pymilvus import DataType, AnnSearchRequest, WeightedRanker +from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY class TestMilvusClientTTL(TestMilvusClientV2Base): @@ -38,7 +39,7 @@ class TestMilvusClientTTL(TestMilvusClientV2Base): """ client = self._client() dim = 65 - ttl = 10 + ttl = 11 nb = 1000 collection_name = cf.gen_collection_name_by_testcase_name() schema = self.create_schema(client, enable_dynamic_field=False)[0] @@ -98,18 +99,21 @@ class TestMilvusClientTTL(TestMilvusClientV2Base): while time.time() - start_time < timeout: if search_ttl_effective is False: res1 = self.search(client, collection_name, search_vectors, anns_field='embeddings', - search_params={}, limit=10, consistency_level='Strong')[0] + search_params={}, limit=10, consistency_level=CONSISTENCY_STRONG)[0] if query_ttl_effective is False: res2 = self.query(client, collection_name, filter='', - output_fields=["count(*)"], consistency_level='Strong')[0] + output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0] if hybrid_search_ttl_effective is False: res3 = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, - limit=10, consistency_level='Strong')[0] + limit=10, consistency_level=CONSISTENCY_STRONG)[0] if len(res1[0]) == 0 and search_ttl_effective is False: log.info(f"search ttl effects in {round(time.time() - start_time, 4)}s") search_ttl_effective = True if res2[0].get('count(*)', None) == 0 and query_ttl_effective is False: log.info(f"query ttl effects in {round(time.time() - start_time, 4)}s") + res2x = self.query(client, collection_name, filter='visible==False', + output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0] + log.debug(f"res2x: {res2x[0].get('count(*)', None)}") query_ttl_effective = True if len(res3[0]) == 0 and hybrid_search_ttl_effective is False: log.info(f"hybrid search ttl effects in {round(time.time() - start_time, 4)}s") @@ -152,44 +156,64 @@ class TestMilvusClientTTL(TestMilvusClientV2Base): log.info(f"flush completed in {time.time() - t1}s") # search data again after insert more data - res = self.search(client, collection_name, search_vectors, - search_params={}, anns_field='embeddings', - limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 - # query count(*) - res = self.query(client, collection_name, filter='visible==False', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) == 0 + consistency_levels = [CONSISTENCY_EVENTUALLY, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_STRONG] + for consistency_level in consistency_levels: + log.debug(f"start to search/query with {consistency_level}") + # try 3 times + for i in range(3): + res = self.search(client, collection_name, search_vectors, + search_params={}, anns_field='embeddings', + limit=10, consistency_level=consistency_level)[0] + if len(res[0]) > 0: + break + else: + time.sleep(1) + assert len(res[0]) > 0 - # hybrid search - res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, - limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 + if consistency_level != CONSISTENCY_STRONG: + pass + else: + # query count(*) + res = self.query(client, collection_name, filter='', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', None) == nb * insert_times + res = self.query(client, collection_name, filter='visible==False', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', None) == 0 + # query count(visible) + res = self.query(client, collection_name, filter='visible==True', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', None) == nb * insert_times - # query count(visible) - res = self.query(client, collection_name, filter='visible==True', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) > 0 + # hybrid search + res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, + limit=10, consistency_level=consistency_level)[0] + assert len(res[0]) > 0 - # alter ttl to 1000s - self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 1000}) - # search data after alter ttl - res = self.search(client, collection_name, search_vectors, - search_params={}, anns_field='embeddings', - filter='visible==False', limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 + # alter ttl to 2000s + self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 2000}) + for consistency_level in consistency_levels: + log.debug(f"start to search/query after alter ttl with {consistency_level}") + # search data after alter ttl + res = self.search(client, collection_name, search_vectors, + search_params={}, anns_field='embeddings', + filter='visible==False', limit=10, consistency_level=consistency_level)[0] + assert len(res[0]) > 0 - # hybrid search data after alter ttl - sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False') - sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False') - res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, - limit=10, consistency_level='Strong')[0] - assert len(res[0]) > 0 + # hybrid search data after alter ttl + sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False') + sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False') + res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, + limit=10, consistency_level=consistency_level)[0] + assert len(res[0]) > 0 - # query count(*) - res = self.query(client, collection_name, filter='visible==False', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) == insert_times * nb - res = self.query(client, collection_name, filter='', - output_fields=["count(*)"], consistency_level='Strong')[0] - assert res[0].get('count(*)', None) == insert_times * nb * 2 + # query count(*) + res = self.query(client, collection_name, filter='visible==False', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + assert res[0].get('count(*)', 0) == insert_times * nb + res = self.query(client, collection_name, filter='', + output_fields=["count(*)"], consistency_level=consistency_level)[0] + if consistency_level != CONSISTENCY_STRONG: + assert res[0].get('count(*)', 0) >= insert_times * nb + else: + assert res[0].get('count(*)', 0) == insert_times * nb * 2 diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 01905383ae..dc0f952fc4 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -28,8 +28,8 @@ pytest-parallel pytest-random-order # pymilvus -pymilvus==2.6.0rc151 -pymilvus[bulk_writer]==2.6.0rc151 +pymilvus==2.6.0rc155 +pymilvus[bulk_writer]==2.6.0rc155 # for protobuf protobuf==5.27.2 diff --git a/tests/python_client/testcases/indexes/idx_ivf_rabitq.py b/tests/python_client/testcases/indexes/idx_ivf_rabitq.py index 4ad90f940d..382221626b 100644 --- a/tests/python_client/testcases/indexes/idx_ivf_rabitq.py +++ b/tests/python_client/testcases/indexes/idx_ivf_rabitq.py @@ -68,12 +68,12 @@ class IVF_RABITQ: # refine params test { "description": "Enable Refine Test", - "params": {"refine": 'true'}, # to be fixed: #41760 + "params": {"refine": 'true'}, "expected": success }, { "description": "Disable Refine Test", - "params": {"refine": 'false'}, # to be fixed: #41760 + "params": {"refine": 'false'}, "expected": success }, @@ -194,7 +194,13 @@ class IVF_RABITQ: { "description": "Exceed nlist Test", "params": {"nprobe": 129}, # Assuming nlist=128 - "expected": success # to be fixed: #41765 + "expected": success + }, + { + "description": "Exceed nprobe Test", + "params": {"nprobe": 65537}, + "expected": {"err_code": 999, + "err_msg": "should be in range [1, 65536]"} }, { "description": "Negative Value Test", diff --git a/tests/python_client/testcases/indexes/test_ivf_rabitq.py b/tests/python_client/testcases/indexes/test_ivf_rabitq.py index 04116e52d6..87bda46aa4 100644 --- a/tests/python_client/testcases/indexes/test_ivf_rabitq.py +++ b/tests/python_client/testcases/indexes/test_ivf_rabitq.py @@ -82,7 +82,7 @@ class TestIvfRabitqBuildParams(TestMilvusClientV2Base): for key, value in build_params.items(): if value is not None: assert key in idx_info.keys() - # assert value in idx_info.values() # TODO: uncommented after #41783 fixed + assert str(value) in idx_info.values() # TODO: uncommented after #41783 fixed @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("vector_data_type", ct.all_vector_types) diff --git a/tests/python_client/testcases/test_partition_key.py b/tests/python_client/testcases/test_partition_key.py index 41b561fb1e..55fc50d3f5 100644 --- a/tests/python_client/testcases/test_partition_key.py +++ b/tests/python_client/testcases/test_partition_key.py @@ -72,6 +72,23 @@ class TestPartitionKeyParams(TestcaseBase): for i in range(nq): assert res1[i].ids == res2[i].ids == res3[i].ids + # search with 'or' to verify no partition key optimization local with or binary expr + query_res1 = collection_w.query( + expr=f'{string_field.name} == "{string_prefix}5" || {int64_field.name} in [2,4,6]', + output_fields=['count(*)'])[0] + query_res2 = collection_w.query( + expr=f'{string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"] || {int64_field.name}==5', + output_fields=['count(*)'])[0] + query_res3 = collection_w.query( + expr=f'{int64_field.name}==5 or {string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"]', + output_fields=['count(*)'])[0] + query_res4 = collection_w.query( + expr=f'{int64_field.name} in [2,4,6] || {string_field.name} == "{string_prefix}5"', + output_fields=['count(*)'])[0] + # assert the results persist + assert query_res1[0].get('count(*)') == query_res2[0].get('count(*)') \ + == query_res3[0].get('count(*)') == query_res4[0].get('count(*)') == 40 + @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name]) @pytest.mark.parametrize("index_on_par_key_field", [True, False])