milvus/tests/python_client/check/param_check.py
yanliang567 e8011908ac
test: Add tests for partition key filter issue and ttl eventually search (#43052)
related issue: #42918
1. add tests for ttl eventually search
2. add tests for partition key filter 
3. improve check query results for output fields 
4. verify some fix for rabitq index and update the test accordingly
5. update gen random float vector in (-1, 1) instead of (0,1)

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
2025-07-02 11:02:43 +08:00

359 lines
12 KiB
Python

import sys
import operator
from common import common_type as ct
sys.path.append("..")
from utils.util_log import test_log as log
import numpy as np
from collections.abc import Iterable
epsilon = ct.epsilon
def deep_approx_compare(x, y, epsilon=epsilon):
"""
Recursively compares two objects for approximate equality, handling floating-point precision.
Args:
x: First object to compare
y: Second object to compare
epsilon: Tolerance for floating-point comparisons (default: 1e-6)
Returns:
bool: True if objects are approximately equal, False otherwise
Handles:
- Numeric types (int, float, numpy scalars)
- Sequences (list, tuple, numpy arrays)
- Dictionaries
- Other iterables (except strings)
- Numpy arrays (shape and value comparison)
- Falls back to strict equality for other types
"""
# Handle basic numeric types (including numpy scalars)
if isinstance(x, (int, float, np.integer, np.floating)) and isinstance(y, (int, float, np.integer, np.floating)):
return abs(float(x) - float(y)) < epsilon
# Handle lists/tuples/arrays
if isinstance(x, (list, tuple, np.ndarray)) and isinstance(y, (list, tuple, np.ndarray)):
if len(x) != len(y):
return False
for a, b in zip(x, y):
if not deep_approx_compare(a, b, epsilon):
return False
return True
# Handle dictionaries
if isinstance(x, dict) and isinstance(y, dict):
if set(x.keys()) != set(y.keys()):
return False
for key in x:
if not deep_approx_compare(x[key], y[key], epsilon):
return False
return True
# Handle other iterables (e.g., Protobuf containers)
if isinstance(x, Iterable) and isinstance(y, Iterable) and not isinstance(x, str):
try:
return deep_approx_compare(list(x), list(y), epsilon)
except:
pass
# Handle numpy arrays
if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
if x.shape != y.shape:
return False
return np.allclose(x, y, atol=epsilon)
# Fall back to strict equality for other types
return x == y
def compare_lists_ignore_order(a, b, epsilon=epsilon):
"""
Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance.
Args:
a (list): First list of dictionaries to compare
b (list): Second list of dictionaries to compare
epsilon (float, optional): Tolerance for floating-point comparisons. Defaults to 1e-6.
Returns:
bool: True if lists contain equivalent dictionaries (order doesn't matter), False otherwise
Note:
Uses deep_approx_compare() for dictionary comparison with floating-point tolerance.
Maintains O(n²) complexity due to nested comparisons.
"""
if len(a) != len(b):
return False
# Create a set of available indices for b
available_indices = set(range(len(b)))
for item_a in a:
matched = False
# Create a list of indices to remove (avoid modifying the set during iteration)
to_remove = []
for idx in available_indices:
if deep_approx_compare(item_a, b[idx], epsilon):
to_remove.append(idx)
matched = True
break
if not matched:
return False
# Remove matched indices
available_indices -= set(to_remove)
return True
def ip_check(ip):
if ip == "localhost":
return True
if not isinstance(ip, str):
log.error("[IP_CHECK] IP(%s) is not a string." % ip)
return False
return True
def number_check(num):
if str(num).isdigit():
return True
else:
log.error("[NUMBER_CHECK] Number(%s) is not a numbers." % num)
return False
def exist_check(param, _list):
if param in _list:
return True
else:
log.error("[EXIST_CHECK] Param(%s) is not in (%s)." % (param, _list))
return False
def dict_equal_check(dict1, dict2):
if not isinstance(dict1, dict) or not isinstance(dict2, dict):
log.error("[DICT_EQUAL_CHECK] Type of dict(%s) or dict(%s) is not a dict." % (str(dict1), str(dict2)))
return False
return operator.eq(dict1, dict2)
def list_de_duplication(_list):
if not isinstance(_list, list):
log.error("[LIST_DE_DUPLICATION] Type of list(%s) is not a list." % str(_list))
return _list
# de-duplication of _list
result = list(set(_list))
# Keep the order of the elements unchanged
result.sort(key=_list.index)
log.debug("[LIST_DE_DUPLICATION] %s after removing the duplicate elements, the list becomes %s" % (
str(_list), str(result)))
return result
def list_equal_check(param1, param2):
check_result = True
if len(param1) == len(param1):
_list1 = list_de_duplication(param1)
_list2 = list_de_duplication(param2)
if len(_list1) == len(_list2):
for i in _list1:
if i not in _list2:
check_result = False
break
else:
check_result = False
else:
check_result = False
if check_result is False:
log.error("[LIST_EQUAL_CHECK] List(%s) and list(%s) are not equal." % (str(param1), str(param2)))
return check_result
def list_contain_check(sublist, superlist):
if not isinstance(sublist, list):
raise Exception("%s isn't list type" % sublist)
if not isinstance(superlist, list):
raise Exception("%s isn't list type" % superlist)
check_result = True
for i in sublist:
if i not in superlist:
check_result = False
break
else:
superlist.remove(i)
if not check_result:
log.error("list_contain_check: List(%s) does not contain list(%s)"
% (str(superlist), str(sublist)))
return check_result
def get_connect_object_name(_list):
""" get the name of the objects that returned by the connection """
if not isinstance(_list, list):
log.error("[GET_CONNECT_OBJECT_NAME] Type of list(%s) is not a list." % str(_list))
return _list
new_list = []
for i in _list:
if not isinstance(i, tuple):
log.error("[GET_CONNECT_OBJECT_NAME] The element:%s of the list is not tuple, please check manually."
% str(i))
return _list
if len(i) != 2:
log.error("[GET_CONNECT_OBJECT_NAME] The length of the tuple:%s is not equal to 2, please check manually."
% str(i))
return _list
if i[1] is not None:
_obj_name = type(i[1]).__name__
new_list.append((i[0], _obj_name))
else:
new_list.append(i)
log.debug("[GET_CONNECT_OBJECT_NAME] list:%s is reset to list:%s" % (str(_list), str(new_list)))
return new_list
def equal_entity(exp, actual):
"""
compare two entities containing vector field
{"int64": 0, "float": 0.0, "float_vec": [0.09111554112502457, ..., 0.08652634258062468]}
:param exp: exp entity
:param actual: actual entity
:return: bool
"""
assert actual.keys() == exp.keys()
for field, value in exp.items():
if isinstance(value, list):
assert len(actual[field]) == len(exp[field])
for i in range(0, len(exp[field]), 4):
assert abs(actual[field][i] - exp[field][i]) < ct.epsilon
else:
assert actual[field] == exp[field]
return True
def entity_in(entity, entities, primary_field):
"""
according to the primary key to judge entity in the entities list
:param entity: dict
{"int": 0, "vec": [0.999999, 0.111111]}
:param entities: list of dict
[{"int": 0, "vec": [0.999999, 0.111111]}, {"int": 1, "vec": [0.888888, 0.222222]}]
:param primary_field: collection primary field
:return: True or False
"""
primary_default = ct.default_primary_field_name
primary_field = primary_default if primary_field is None else primary_field
primary_key = entity.get(primary_field, None)
primary_keys = []
for e in entities:
primary_keys.append(e[primary_field])
if primary_key not in primary_keys:
return False
index = primary_keys.index(primary_key)
return equal_entity(entities[index], entity)
def remove_entity(entity, entities, primary_field):
"""
according to the primary key to remove an entity from an entities list
:param entity: dict
{"int": 0, "vec": [0.999999, 0.111111]}
:param entities: list of dict
[{"int": 0, "vec": [0.999999, 0.111111]}, {"int": 1, "vec": [0.888888, 0.222222]}]
:param primary_field: collection primary field
:return: entities of removed entity
"""
primary_default = ct.default_primary_field_name
primary_field = primary_default if primary_field is None else primary_field
primary_key = entity.get(primary_field, None)
primary_keys = []
for e in entities:
primary_keys.append(e[primary_field])
index = primary_keys.index(primary_key)
entities.pop(index)
return entities
def equal_entities_list(exp, actual, primary_field, with_vec=False):
"""
compare two entities lists in inconsistent order
:param with_vec: whether entities with vec field
:param exp: exp entities list, list of dict
:param actual: actual entities list, list of dict
:return: True or False
example:
exp = [{"int": 0, "vec": [0.999999, 0.111111]}, {"int": 1, "vec": [0.888888, 0.222222]}]
actual = [{"int": 1, "vec": [0.888888, 0.222222]}, {"int": 0, "vec": [0.999999, 0.111111]}]
exp = actual
"""
exp = exp.copy()
if len(exp) != len(actual):
return False
if with_vec:
for a in actual:
# if vec field returned in query res
if entity_in(a, exp, primary_field):
try:
# if vec field returned in query res
remove_entity(a, exp, primary_field)
except Exception as ex:
log.error(ex)
else:
for a in actual:
if a in exp:
try:
exp.remove(a)
except Exception as ex:
log.error(ex)
return True if len(exp) == 0 else False
def output_field_value_check(search_res, original, pk_name):
"""
check if the value of output fields is correct, it only works on auto_id = False
:param search_res: the search result of specific output fields
:param original: the data in the collection
:return: True or False
"""
pk_name = ct.default_primary_field_name if pk_name is None else pk_name
limit = len(search_res[0])
for i in range(limit):
entity = search_res[0][i].fields
_id = search_res[0][i].id
for field in entity.keys():
if isinstance(entity[field], list):
for order in range(0, len(entity[field]), 4):
assert abs(original[field][_id][order] - entity[field][order]) < ct.epsilon
elif isinstance(entity[field], dict) and field != ct.default_json_field_name:
# sparse checking, sparse vector must be the last, this is a bit hacky,
# but sparse only supports list data type insertion for now
assert entity[field].keys() == original[-1][_id].keys()
else:
num = original[original[pk_name] == _id].index.to_list()[0]
assert original[field][num] == entity[field]
return True