test: Add tests for partition key filter issue and ttl eventually search (#43052)

related issue: #42918
1. add tests for ttl eventually search
2. add tests for partition key filter 
3. improve check query results for output fields 
4. verify some fix for rabitq index and update the test accordingly
5. update gen random float vector in (-1, 1) instead of (0,1)

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
yanliang567 2025-07-02 11:02:43 +08:00 committed by GitHub
parent 09c6df62d8
commit e8011908ac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 606 additions and 443 deletions

View File

@ -566,31 +566,46 @@ class ResponseChecker:
raise Exception("No expect values found in the check task")
exp_res = check_items.get("exp_res", None)
with_vec = check_items.get("with_vec", False)
pk_name = check_items.get("pk_name", ct.default_primary_field_name)
vector_type = check_items.get("vector_type", "FLOAT_VECTOR")
if vector_type == DataType.FLOAT16_VECTOR:
for single_exp_res in exp_res:
single_exp_res['vector'] = single_exp_res['vector'] .tolist()
for single_query_result in query_res:
single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.float16).tolist()
if vector_type == DataType.BFLOAT16_VECTOR:
for single_exp_res in exp_res:
single_exp_res['vector'] = single_exp_res['vector'] .tolist()
for single_query_result in query_res:
single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=bfloat16).tolist()
if vector_type == DataType.INT8_VECTOR:
for single_exp_res in exp_res:
single_exp_res['vector'] = single_exp_res['vector'] .tolist()
for single_query_result in query_res:
single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.int8).tolist()
exp_limit = check_items.get("exp_limit", None)
count = check_items.get("count(*)", None)
if count is not None:
assert count == query_res[0].get("count(*)", None)
return True
if exp_limit is None and exp_res is None:
raise Exception(f"No expected values would be checked in the check task")
if exp_limit is not None:
assert len(query_res) == exp_limit
# pk_name = check_items.get("pk_name", ct.default_primary_field_name)
# if with_vec:
if exp_res is not None:
if with_vec is True:
vector_type = check_items.get('vector_type', 'FLOAT_VECTOR')
vector_field = check_items.get('vector_field', 'vector')
if vector_type == DataType.FLOAT16_VECTOR:
# for single_exp_res in exp_res:
# single_exp_res[vector_field] = single_exp_res[vector_field].tolist()
for single_query_result in query_res:
single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.float16).tolist()
if vector_type == DataType.BFLOAT16_VECTOR:
# for single_exp_res in exp_res:
# single_exp_res[vector_field] = single_exp_res[vector_field].tolist()
for single_query_result in query_res:
single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=bfloat16).tolist()
if vector_type == DataType.INT8_VECTOR:
# for single_exp_res in exp_res:
# if single_exp_res[vector_field].__class__ is not list:
# single_exp_res[vector_field] = single_exp_res[vector_field].tolist()
for single_query_result in query_res:
single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.int8).tolist()
if isinstance(query_res, list):
assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name,
with_vec=with_vec)
# assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, with_vec=with_vec)
# return True
assert pc.compare_lists_ignore_order(a=query_res, b=exp_res)
return True
else:
log.error(f"Query result {query_res} is not list")
return False
log.warning(f'Expected query result is {exp_res}')
@staticmethod
@ -810,4 +825,4 @@ class ResponseChecker:
if check_items.get("index_name", None) is not None:
assert res["index_name"] == check_items.get("index_name")
return True
return True

View File

@ -5,6 +5,111 @@ from common import common_type as ct
sys.path.append("..")
from utils.util_log import test_log as log
import numpy as np
from collections.abc import Iterable
epsilon = ct.epsilon
def deep_approx_compare(x, y, epsilon=epsilon):
"""
Recursively compares two objects for approximate equality, handling floating-point precision.
Args:
x: First object to compare
y: Second object to compare
epsilon: Tolerance for floating-point comparisons (default: 1e-6)
Returns:
bool: True if objects are approximately equal, False otherwise
Handles:
- Numeric types (int, float, numpy scalars)
- Sequences (list, tuple, numpy arrays)
- Dictionaries
- Other iterables (except strings)
- Numpy arrays (shape and value comparison)
- Falls back to strict equality for other types
"""
# Handle basic numeric types (including numpy scalars)
if isinstance(x, (int, float, np.integer, np.floating)) and isinstance(y, (int, float, np.integer, np.floating)):
return abs(float(x) - float(y)) < epsilon
# Handle lists/tuples/arrays
if isinstance(x, (list, tuple, np.ndarray)) and isinstance(y, (list, tuple, np.ndarray)):
if len(x) != len(y):
return False
for a, b in zip(x, y):
if not deep_approx_compare(a, b, epsilon):
return False
return True
# Handle dictionaries
if isinstance(x, dict) and isinstance(y, dict):
if set(x.keys()) != set(y.keys()):
return False
for key in x:
if not deep_approx_compare(x[key], y[key], epsilon):
return False
return True
# Handle other iterables (e.g., Protobuf containers)
if isinstance(x, Iterable) and isinstance(y, Iterable) and not isinstance(x, str):
try:
return deep_approx_compare(list(x), list(y), epsilon)
except:
pass
# Handle numpy arrays
if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
if x.shape != y.shape:
return False
return np.allclose(x, y, atol=epsilon)
# Fall back to strict equality for other types
return x == y
def compare_lists_ignore_order(a, b, epsilon=epsilon):
"""
Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance.
Args:
a (list): First list of dictionaries to compare
b (list): Second list of dictionaries to compare
epsilon (float, optional): Tolerance for floating-point comparisons. Defaults to 1e-6.
Returns:
bool: True if lists contain equivalent dictionaries (order doesn't matter), False otherwise
Note:
Uses deep_approx_compare() for dictionary comparison with floating-point tolerance.
Maintains O() complexity due to nested comparisons.
"""
if len(a) != len(b):
return False
# Create a set of available indices for b
available_indices = set(range(len(b)))
for item_a in a:
matched = False
# Create a list of indices to remove (avoid modifying the set during iteration)
to_remove = []
for idx in available_indices:
if deep_approx_compare(item_a, b[idx], epsilon):
to_remove.append(idx)
matched = True
break
if not matched:
return False
# Remove matched indices
available_indices -= set(to_remove)
return True
def ip_check(ip):
if ip == "localhost":

View File

@ -1127,33 +1127,6 @@ def gen_schema_multi_string_fields(string_fields):
primary_field=primary_field, auto_id=False)
return schema
def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR):
vectors = []
if vector_data_type == DataType.FLOAT_VECTOR:
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
elif vector_data_type == DataType.FLOAT16_VECTOR:
vectors = gen_fp16_vectors(nb, dim)[1]
elif vector_data_type == DataType.BFLOAT16_VECTOR:
vectors = gen_bf16_vectors(nb, dim)[1]
elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
vectors = gen_sparse_vectors(nb, dim)
elif vector_data_type == ct.text_sparse_vector:
vectors = gen_text_vectors(nb) # for Full Text Search
elif vector_data_type == DataType.INT8_VECTOR:
vectors = gen_int8_vectors(nb, dim)[1]
elif vector_data_type == DataType.BINARY_VECTOR:
vectors = gen_binary_vectors(nb, dim)[1]
else:
log.error(f"Invalid vector data type: {vector_data_type}")
raise Exception(f"Invalid vector data type: {vector_data_type}")
if dim > 1:
if vector_data_type == DataType.FLOAT_VECTOR:
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
vectors = vectors.tolist()
return vectors
def gen_string(nb):
string_values = [str(random.random()) for _ in range(nb)]
return string_values
@ -3613,7 +3586,7 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0):
def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR):
vectors = []
if vector_data_type == DataType.FLOAT_VECTOR:
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
vectors = [[random.uniform(-1, 1) for _ in range(dim)] for _ in range(nb)]
elif vector_data_type == DataType.FLOAT16_VECTOR:
vectors = gen_fp16_vectors(nb, dim)[1]
elif vector_data_type == DataType.BFLOAT16_VECTOR:

View File

@ -76,12 +76,12 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
@pytest.mark.parametrize("is_release", [True])
@pytest.mark.parametrize("single_data_num", [50])
@pytest.mark.parametrize("expr_field", [ct.default_int64_field_name,
# ct.default_string_field_name, # TODO: uncommented after #42604 fixed
ct.default_string_field_name,
ct.default_float_array_field_name])
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self,
enable_dynamic_field,
supported_numeric_scalar_index,
# supported_varchar_scalar_index,
supported_varchar_scalar_index,
supported_json_path_index,
supported_array_double_float_scalar_index,
is_flush,

View File

@ -54,7 +54,7 @@ def external_filter_with_outputs(hits):
results = []
for hit in hits:
# equals filter nothing if there are output_fields
if hit.distance < 1.0 and len(hit.fields) > 0:
if hit.distance <= 4.0 and len(hit.fields) > 0:
results.append(hit)
return results

View File

@ -1,3 +1,6 @@
import random
import pandas
import pytest
import numpy as np
import time
@ -10,7 +13,6 @@ from base.client_v2_base import TestMilvusClientV2Base
from pymilvus import DataType, FieldSchema, CollectionSchema
# Test parameters
default_dim = ct.default_dim
default_nb = ct.default_nb
default_nq = ct.default_nq
default_limit = ct.default_limit
@ -28,7 +30,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("flush_enable", [True, False])
@pytest.mark.parametrize("scalar_index_enable", [True, False])
def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable):
@pytest.mark.parametrize("vector_type", [DataType.FLOAT_VECTOR])
def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable, vector_type):
"""
target: test high level api: client.create_collection, insert, search, query
method: create connection, collection, insert and search with:
@ -37,13 +40,14 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
expected: search/query successfully
"""
client = self._client()
dim = 8
# 1. Create collection with custom schema
collection_name = cf.gen_collection_name_by_testcase_name()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
# Primary key and vector field
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field("vector", vector_type, dim=dim)
# Boolean type
schema.add_field("bool_field", DataType.BOOL, nullable=True)
# Integer types
@ -59,7 +63,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
# JSON type
schema.add_field("json_field", DataType.JSON, nullable=True)
# Array type
schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, nullable=True)
schema.add_field("array_field", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=12, nullable=True)
# Create collection
self.create_collection(client, collection_name, schema=schema)
@ -68,14 +72,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
num_inserts = 5 # insert data for 5 times
total_rows = []
for batch in range(num_inserts):
vectors = cf.gen_vectors(default_nb, default_dim)
vectors = list(cf.gen_vectors(default_nb, dim, vector_data_type=vector_type)) \
if vector_type == DataType.FLOAT_VECTOR \
else cf.gen_vectors(default_nb, dim, vector_data_type=vector_type)
rows = []
start_id = batch * default_nb # ensure id is not duplicated
for i in range(default_nb):
row = {
"id": start_id + i, # ensure id is not duplicated
"embeddings": list(vectors[i])
"vector": vectors[i]
}
# Add nullable fields with null values for every 5th record
@ -99,11 +105,11 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
"int16_field": i % 32768,
"int32_field": i,
"int64_field": i,
"float_field": float(i),
"double_field": float(i) * 1.0,
"float_field": random.random(),
"double_field": random.random(),
"varchar_field": f"varchar_{start_id + i}",
"json_field": {"id": start_id + i, "value": f"json_{start_id + i}"},
"array_field": [i, i + 1, i + 2]
"array_field": [random.random() for _ in range(5)]
})
rows.append(row)
total_rows.append(row)
@ -124,7 +130,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
# Create index parameters
index_params = self.prepare_index_params(client)[0]
index_params.add_index("embeddings", metric_type="COSINE")
index_params.add_index("vector", metric_type="COSINE")
# Add autoindex for scalar fields if enabled
if scalar_index_enable:
@ -160,13 +166,13 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
# 4. Search
t0 = time.time()
vectors_to_search = cf.gen_vectors(1, default_dim)
vectors_to_search = cf.gen_vectors(1, dim, vector_data_type=vector_type)
search_params = {"metric_type": "COSINE", "params": {"nprobe": 100}}
search_res, _ = self.search(
client,
collection_name,
vectors_to_search,
anns_field="embeddings",
anns_field="vector",
search_params=search_params,
limit=default_limit,
output_fields=['*'],
@ -182,26 +188,34 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
# 5. Query with filters on each scalar field
t0 = time.time()
# Query on boolean field
output_fields = ['id', 'int8_field', 'json_field']
bool_filter = "bool_field == true"
bool_expected = [r for r in total_rows if r["bool_field"] is not None and r["bool_field"]]
bool_expected = [
{
'id': r['id'],
'int8_field': r['int8_field'],
'json_field': r['json_field']
}
for r in total_rows if r["bool_field"] is not None and r["bool_field"]]
query_res, _ = self.query(
client,
collection_name,
filter=bool_filter,
output_fields=['*'],
output_fields=output_fields,
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": bool_expected,
"with_vec": True,
"with_vec": False,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Query on int8 field
int8_filter = "int8_field < 50"
int8_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50]
with_vec = True
int8_filter = "int8_field is null || int8_field < 10"
int8_expected = [r for r in total_rows if r["int8_field"] is None or r["int8_field"] < 10]
query_res, _ = self.query(
client,
collection_name,
@ -210,14 +224,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": int8_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Query on int16 field
int16_filter = "int16_field < 1000"
int16_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 1000]
int16_filter = "100 <= int16_field < 200"
int16_expected = [r for r in total_rows if r["int16_field"] is not None and 100 <= r["int16_field"] < 200]
query_res, _ = self.query(
client,
collection_name,
@ -226,14 +241,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": int16_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Query on int32 field
int32_filter = "int32_field in [1,2,3,4,5]"
int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,3,4,5]]
int32_filter = "int32_field in [1,2,5,6]"
int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,5,6]]
query_res, _ = self.query(
client,
collection_name,
@ -242,14 +258,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": int32_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Query on int64 field
int64_filter = "int64_field >= 10"
int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 10]
int64_filter = "int64_field >= 4678 and int64_field < 5050"
int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 4678 and r["int64_field"] < 5050]
query_res, _ = self.query(
client,
collection_name,
@ -258,14 +275,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": int64_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Query on float field
float_filter = "float_field > 5.0"
float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 5.0]
float_filter = "float_field > 0.5 and float_field <= 0.7"
float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7]
query_res, _ = self.query(
client,
collection_name,
@ -274,14 +292,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": float_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Query on double field
double_filter = "3.0 <=double_field <= 7.0"
double_expected = [r for r in total_rows if r["double_field"] is not None and 3.0 <= r["double_field"] <= 7.0]
double_filter = "0.5 <=double_field <= 0.7"
double_expected = [r for r in total_rows if r["double_field"] is not None and 0.5 <= r["double_field"] <= 0.7]
query_res, _ = self.query(
client,
collection_name,
@ -290,7 +309,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": double_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -306,7 +326,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": varchar_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -322,7 +343,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": varchar_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -338,7 +360,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": json_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -354,7 +377,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": array_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -370,7 +394,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": multi_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -386,15 +411,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": mix_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Query on is not null conditions for each scalar field
# Int8 field is not null
int8_not_null_filter = "int8_field is not null"
int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None]
int8_not_null_filter = "int8_field is not null and int8_field > 100"
int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] > 100]
query_res, _ = self.query(
client,
collection_name,
@ -403,14 +429,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": int8_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Int16 field is not null
int16_not_null_filter = "int16_field is not null"
int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None]
int16_not_null_filter = "int16_field is not null and int16_field < 100"
int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 100]
query_res, _ = self.query(
client,
collection_name,
@ -419,14 +446,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": int16_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Float field is not null
float_not_null_filter = "float_field is not null"
float_not_null_expected = [r for r in total_rows if r["float_field"] is not None]
float_not_null_filter = "float_field is not null and float_field > 0.5 and float_field <= 0.7"
float_not_null_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7]
query_res, _ = self.query(
client,
collection_name,
@ -435,14 +463,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": float_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Double field is not null
double_not_null_filter = "double_field is not null"
double_not_null_expected = [r for r in total_rows if r["double_field"] is not None]
double_not_null_filter = "double_field is not null and double_field <= 0.2"
double_not_null_expected = [r for r in total_rows if r["double_field"] is not None and r["double_field"] <= 0.2]
query_res, _ = self.query(
client,
collection_name,
@ -451,7 +480,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": double_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -467,14 +497,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": varchar_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# JSON field is not null
json_not_null_filter = "json_field is not null"
json_not_null_expected = [r for r in total_rows if r["json_field"] is not None]
json_not_null_filter = "json_field is not null and json_field['id'] < 100"
json_not_null_expected = [r for r in total_rows if r["json_field"] is not None and r["json_field"]["id"] < 100]
query_res, _ = self.query(
client,
collection_name,
@ -483,14 +514,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": json_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Array field is not null
array_not_null_filter = "array_field is not null"
array_not_null_expected = [r for r in total_rows if r["array_field"] is not None]
array_not_null_filter = "array_field is not null and array_field[0] < 100"
array_not_null_expected = [r for r in total_rows if r["array_field"] is not None and r["array_field"][0] < 100]
query_res, _ = self.query(
client,
collection_name,
@ -499,7 +531,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": array_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -516,16 +549,17 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": multi_not_null_expected,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Complex mixed conditions with is null, is not null, and comparison operators
# Test case 1: int field is null AND float field > value AND varchar field is not null
complex_mix_filter1 = "int32_field is null and float_field > 10.0 and varchar_field is not null"
complex_mix_filter1 = "int32_field is null and float_field > 0.7 and varchar_field is not null"
complex_mix_expected1 = [r for r in total_rows if r["int32_field"] is None and
r["float_field"] is not None and r["float_field"] > 10.0 and
r["float_field"] is not None and r["float_field"] > 0.7 and
r["varchar_field"] is not None]
query_res, _ = self.query(
client,
@ -535,7 +569,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": complex_mix_expected1,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
@ -553,15 +588,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": complex_mix_expected2,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)
# Test case 3: Multiple fields with mixed null/not null conditions and range comparisons
complex_mix_filter3 = ("int8_field is not null and int8_field < 50 and double_field is null and "
complex_mix_filter3 = ("int8_field is not null and int8_field < 15 and double_field is null and "
"varchar_field is not null and varchar_field like \"varchar_2%\"")
complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50 and
complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 15 and
r["double_field"] is None and
r["varchar_field"] is not None and r["varchar_field"].startswith("varchar_2")]
query_res, _ = self.query(
@ -572,7 +608,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
check_task=CheckTasks.check_query_results,
check_items={
"exp_res": complex_mix_expected3,
"with_vec": True,
"with_vec": with_vec,
"vector_type": vector_type,
"pk_name": "id"
}
)

View File

@ -1548,55 +1548,6 @@ class TestCollectionRangeSearch(TestcaseBase):
**kwargs
)
@pytest.mark.tags(CaseLabel.L1)
def test_range_search_with_consistency_session(self, nq, dim, auto_id, _async):
"""
target: test range search with different consistency level
method: 1. create a collection
2. insert data
3. range search with consistency_level is "session"
expected: searched successfully
"""
limit = 1000
nb_old = 500
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
auto_id=auto_id,
dim=dim)[0:4]
# 2. search for original data after load
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
range_search_params = {"metric_type": "COSINE", "params": {"nprobe": 10, "radius": 0,
"range_filter": 1000}}
collection_w.search(vectors[:nq], default_search_field,
range_search_params, limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old,
"_async": _async,
"pk_name": ct.default_int64_field_name})
kwargs = {}
consistency_level = kwargs.get(
"consistency_level", CONSISTENCY_SESSION)
kwargs.update({"consistency_level": consistency_level})
nb_new = 400
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
auto_id=auto_id, dim=dim,
insert_offset=nb_old)
insert_ids.extend(insert_ids_new)
collection_w.search(vectors[:nq], default_search_field,
range_search_params, limit,
default_search_exp, _async=_async,
**kwargs,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old + nb_new,
"_async": _async,
"pk_name": ct.default_int64_field_name})
@pytest.mark.tags(CaseLabel.L2)
def test_range_search_sparse(self):
"""

View File

@ -80,101 +80,6 @@ half_nb = ct.default_nb // 2
max_hybrid_search_req_num = ct.max_hybrid_search_req_num
class TestSearchBase(TestcaseBase):
@pytest.fixture(
scope="function",
params=[1, 10]
)
def get_top_k(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=[1, 10, 1100]
)
def get_nq(self, request):
yield request.param
@pytest.fixture(scope="function", params=[32, 128])
def dim(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def _async(self, request):
yield request.param
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index", ct.all_index_types[:6])
def test_each_index_with_mmap_enabled_search(self, index):
"""
target: test each index with mmap enabled search
method: test each index with mmap enabled search
expected: search success
"""
self._connect()
nb = 2000
dim = 32
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False)[0]
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
collection_w.create_index(field_name, default_index, index_name="mmap_index")
# mmap index
collection_w.alter_index("mmap_index", {'mmap.enabled': True})
# search
collection_w.load()
search_params = cf.gen_search_param(index)[0]
vector = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": ct.default_limit})
# enable mmap
collection_w.release()
collection_w.alter_index("mmap_index", {'mmap.enabled': False})
collection_w.load()
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index", ct.all_index_types[8:10])
def test_enable_mmap_search_for_binary_indexes(self, index):
"""
target: enable mmap for binary indexes
method: enable mmap for binary indexes
expected: search success
"""
self._connect()
dim = 64
nb = 2000
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, is_binary=True)[0]
params = cf.get_index_params_params(index)
default_index = {"index_type": index,
"params": params, "metric_type": "JACCARD"}
collection_w.create_index(ct.default_binary_vec_field_name, default_index, index_name="binary_idx_name")
collection_w.alter_index("binary_idx_name", {'mmap.enabled': True})
collection_w.set_properties({'mmap.enabled': True})
collection_w.load()
pro = collection_w.describe()[0].get("properties")
assert pro["mmap.enabled"] == 'True'
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
# search
binary_vectors = cf.gen_binary_vectors(default_nq, dim)[1]
search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
output_fields = ["*"]
collection_w.search(binary_vectors, ct.default_binary_vec_field_name, search_params,
default_limit, default_search_string_exp, output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit})
class TestCollectionSearch(TestcaseBase):
""" Test case of search interface """
@ -237,129 +142,6 @@ class TestCollectionSearch(TestcaseBase):
******************************************************************
"""
@pytest.mark.skip("enable this later using session/strong consistency")
@pytest.mark.tags(CaseLabel.L1)
def test_search_new_data(self, nq, _async):
"""
target: test search new inserted data without load
method: 1. search the collection
2. insert new data
3. search the collection without load again
4. Use guarantee_timestamp to guarantee data consistency
expected: new data should be searched
"""
# 1. initialize with data
dim = 128
auto_id = False
limit = 1000
nb_old = 500
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb_old,
auto_id=auto_id,
dim=dim)[0:5]
# 2. search for original data after load
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
log.info("test_search_new_data: searching for original data after load")
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old,
"_async": _async})
# 3. insert new data
nb_new = 300
_, _, _, insert_ids_new, time_stamp = cf.insert_data(collection_w, nb_new,
auto_id=auto_id, dim=dim,
insert_offset=nb_old)
insert_ids.extend(insert_ids_new)
# 4. search for new data without load
# Using bounded staleness, maybe we could not search the "inserted" entities,
# since the search requests arrived query nodes earlier than query nodes consume the insert requests.
collection_w.search(vectors[:nq], default_search_field,
default_search_params, limit,
default_search_exp, _async=_async,
guarantee_timestamp=time_stamp,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": nb_old + nb_new,
"_async": _async})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("shards_num", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num])
def test_search_with_non_default_shard_nums(self, shards_num, _async):
"""
target: test search with non_default shards_num
method: connect milvus, create collection with several shard numbers , insert, load and search
expected: search successfully with the non_default shards_num
"""
auto_id = False
self._connect()
# 1. create collection
name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(
name=name, shards_num=shards_num)
# 2. rename collection
new_collection_name = cf.gen_unique_str(prefix + "new")
self.utility_wrap.rename_collection(
collection_w.name, new_collection_name)
collection_w = self.init_collection_wrap(
name=new_collection_name, shards_num=shards_num)
# 3. insert
dataframe = cf.gen_default_dataframe_data()
collection_w.insert(dataframe)
# 4. create index and load
collection_w.create_index(
ct.default_float_vec_field_name, index_params=ct.default_flat_index)
collection_w.load()
# 5. search
vectors = [[random.random() for _ in range(default_dim)]
for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": default_limit,
"_async": _async})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("M", [4, 64])
@pytest.mark.parametrize("efConstruction", [8, 512])
def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, _async):
"""
target: test search HNSW index with redundant param
method: connect milvus, create collection , insert, create index, load and search
expected: search successfully
"""
dim = M * 4
auto_id = False
enable_dynamic_field = False
self._connect()
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id,
dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5]
# nlist is of no use
HNSW_index_params = {
"M": M, "efConstruction": efConstruction, "nlist": 100}
HNSW_index = {"index_type": "HNSW",
"params": HNSW_index_params, "metric_type": "L2"}
collection_w.create_index("float_vector", HNSW_index)
collection_w.load()
search_param = {"metric_type": "L2", "params": {
"ef": 32768, "nprobe": 10}} # nprobe is of no use
vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
search_param, default_limit,
default_search_exp, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("M", [4, 64])
@pytest.mark.parametrize("efConstruction", [8, 512])
@ -788,8 +570,7 @@ class TestCollectionSearch(TestcaseBase):
self.init_collection_general(prefix, True, nb=nb, dim=dim, enable_dynamic_field=True)[0:4]
# filter result with expression in collection
search_vectors = [[random.random() for _ in range(dim)]
for _ in range(default_nq)]
search_vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
_vectors = _vectors[0]
for expressions in cf.gen_json_field_expressions_and_templates():
expr = expressions[0].replace("&&", "and").replace("||", "or")
@ -858,12 +639,16 @@ class TestCollectionSearch(TestcaseBase):
ids = hits.ids
assert set(ids).issubset(filter_ids_set)
# 7. create json index
default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{ct.default_json_field_name}['number']"}}
collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_0")
default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{ct.default_json_field_name}['float']"}}
collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_1")
default_json_path_index = {"index_type": "INVERTED",
"params": {"json_cast_type": "double",
"json_path": f"{ct.default_json_field_name}['number']"}}
collection_w.create_index(ct.default_json_field_name, default_json_path_index,
index_name=f"{ct.default_json_field_name}_0")
default_json_path_index = {"index_type": "AUTOINDEX",
"params": {"json_cast_type": "double",
"json_path": f"{ct.default_json_field_name}['float']"}}
collection_w.create_index(ct.default_json_field_name, default_json_path_index,
index_name=f"{ct.default_json_field_name}_1")
# 8. release and load to make sure the new index is loaded
collection_w.release()
collection_w.load()
@ -994,8 +779,7 @@ class TestCollectionSearch(TestcaseBase):
collection_w.search(vectors, default_search_field, default_search_params,
default_limit, expression, output_fields=[field],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": 0})[0]
check_items={"nq": default_nq, "limit": 0})
# 4. search normal using all the scalar type as output fields
collection_w.search(vectors, default_search_field, default_search_params,
default_limit, output_fields=[field],

View File

@ -161,6 +161,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
index_type=self.binary_vector_index,
params={"nlist": 128})
self.create_index(client, self.collection_name, index_params=index_params)
self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name)
self.wait_for_index_ready(client, self.collection_name, index_name=self.bfloat16_vector_field_name)
# Load collection
self.load_collection(client, self.collection_name)
@ -378,7 +380,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
)
@pytest.mark.tags(CaseLabel.L2)
def test_search_with_output_fields(self):
@pytest.mark.parametrize("consistency_level", ["Strong", "Session", "Bounded", "Eventually"])
def test_search_with_output_fields_and_consistency_level(self, consistency_level):
"""
target: test search with output fields
method: 1. connect and create a collection
@ -400,6 +403,7 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
anns_field=self.float_vector_field_name,
search_params=search_params,
limit=default_limit,
consistency_level=consistency_level,
output_fields=[ct.default_string_field_name, self.dyna_filed_name1, self.dyna_filed_name2],
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
@ -1220,3 +1224,250 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
"nq": ct.default_nq,
"pk_name": "id",
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index", ct.all_index_types[:6])
def test_each_index_with_mmap_enabled_search(self, index):
"""
target: test each index with mmap enabled search
method: test each index with mmap enabled search
expected: search success
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# fast create collection
dim = 32
schema = self.create_schema(client)[0]
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
self.create_collection(client, collection_name, schema=schema)
# insert data
data = []
for i in range(ct.default_nb):
data.append({
"id": i,
"vector": cf.gen_vectors(1, dim)[0]
})
self.insert(client, collection_name, data)
self.flush(client, collection_name)
# create index
index_params = self.prepare_index_params(client)[0]
params = cf.get_index_params_params(index)
index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='L2')
self.create_index(client, collection_name, index_params=index_params)
self.wait_for_index_ready(client, collection_name, index_name='vector')
# alter mmap index
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True})
index_info = self.describe_index(client, collection_name, index_name='vector')
assert index_info[0]["mmap.enabled"] == 'True'
# search
self.load_collection(client, collection_name)
search_params = {}
vector = cf.gen_vectors(ct.default_nq, dim)
self.search(client, collection_name, vector, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
"limit": ct.default_limit})
# disable mmap
self.release_collection(client, collection_name)
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False})
index_info = self.describe_index(client, collection_name, index_name='vector')
assert index_info[0]["mmap.enabled"] == 'False'
self.load_collection(client, collection_name)
self.search(client, collection_name, vector, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=["*"],
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index", ct.all_index_types[8:10])
def test_enable_mmap_search_for_binary_indexes(self, index):
"""
Test enabling mmap for binary indexes in Milvus.
This test verifies that:
1. Binary vector indexes can be successfully created with mmap enabled
2. Search operations work correctly with mmap enabled
3. Mmap can be properly disabled and search still works
The test performs following steps:
- Creates a collection with binary vectors
- Inserts test data
- Creates index with mmap enabled
- Verifies mmap status
- Performs search with mmap enabled
- Disables mmap and verifies search still works
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# fast create collection
dim = 64
schema = self.create_schema(client)[0]
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
schema.add_field('vector', DataType.BINARY_VECTOR, dim=dim)
self.create_collection(client, collection_name, schema=schema)
# insert data
data = []
for i in range(ct.default_nb):
data.append({
"id": i,
"vector": cf.gen_binary_vectors(1, dim)[1][0]
})
self.insert(client, collection_name, data)
self.flush(client, collection_name)
# create index
index_params = self.prepare_index_params(client)[0]
params = cf.get_index_params_params(index)
index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='JACCARD')
self.create_index(client, collection_name, index_params=index_params)
self.wait_for_index_ready(client, collection_name, index_name='vector')
# alter mmap index
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True})
index_info = self.describe_index(client, collection_name, index_name='vector')
assert index_info[0]["mmap.enabled"] == 'True'
# load collection
self.load_collection(client, collection_name)
# search
binary_vectors = cf.gen_binary_vectors(ct.default_nq, dim)[1]
params = cf.get_search_params_params(index)
search_params = {"metric_type": "JACCARD", "params": params}
output_fields = ["*"]
self.search(client, collection_name, binary_vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
"limit": ct.default_limit})
# disable mmap
self.release_collection(client, collection_name)
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False})
index_info = self.describe_index(client, collection_name, index_name='vector')
assert index_info[0]["mmap.enabled"] == 'False'
self.load_collection(client, collection_name)
self.search(client, collection_name, binary_vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("num_shards", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num])
def test_search_with_non_default_shard_nums(self, num_shards):
"""
Test search functionality with non-default shard numbers.
This test verifies that:
1. Collections are created with default shard numbers when num_shards <= 0
2. Collections are created with specified shard numbers when num_shards > 0
3. Search operations work correctly with different shard configurations
The test follows these steps:
1. Creates a collection with specified shard numbers
2. Inserts test data
3. Builds an index
4. Performs a search operation
5. Validates the results
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
# create collection
dim = 32
schema = self.create_schema(client)[0]
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
# create collection
self.create_collection(client, collection_name, schema=schema, num_shards=num_shards)
collection_info = self.describe_collection(client, collection_name)[0]
expected_num_shards = ct.default_shards_num if num_shards <= 0 else num_shards
assert collection_info["num_shards"] == expected_num_shards
# insert
data = []
for i in range(ct.default_nb):
data.append({
"id": i,
"vector": cf.gen_vectors(1, dim)[0]
})
self.insert(client, collection_name, data)
# create index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name='vector', index_type='HNSW', metric_type='COSINE')
self.create_index(client, collection_name, index_params=index_params)
self.wait_for_index_ready(client, collection_name, index_name='vector')
# load
self.load_collection(client, collection_name)
# search
vectors = cf.gen_vectors(ct.default_nq, dim)
search_params = {}
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
"limit": ct.default_limit})
@pytest.mark.tags(CaseLabel.L2)
def test_search_HNSW_index_with_redundant_param(self):
"""
Test search functionality with HNSW index and redundant parameters.
This test verifies that:
1. HNSW index can be created with redundant parameters
2. Search operations work correctly with redundant parameters
3. Redundant parameters are ignored
The test performs following steps:
1. Creates a collection with float vectors
2. Inserts test data
3. Creates HNSW index with redundant parameters
4. Performs a search operation
5. Validates the results
"""
dim = 16
index = "HNSW"
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
schema = self.create_schema(client)[0]
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
self.create_collection(client, collection_name, schema=schema)
# insert
data = []
for i in range(ct.default_nb):
data.append({
"id": i,
"vector": cf.gen_vectors(1, dim)[0]
})
self.insert(client, collection_name, data)
self.flush(client, collection_name)
# create index
index_params = self.prepare_index_params(client)[0]
params = cf.get_index_params_params(index)
params["nlist"] = 100 # nlist is redundant parameter
index_params.add_index(field_name='vector', index_type=index,
metric_type='COSINE', params=params)
self.create_index(client, collection_name, index_params=index_params)
self.wait_for_index_ready(client, collection_name, index_name='vector')
index_info = self.describe_index(client, collection_name, index_name='vector')
assert index_info[0]["nlist"] == '100'
# load
self.load_collection(client, collection_name)
# search
vectors = cf.gen_vectors(ct.default_nq, dim)
search_params = {}
self.search(client, collection_name, vectors, anns_field="vector",
search_params=search_params, limit=ct.default_limit,
check_task=CheckTasks.check_search_results,
check_items={"nq": ct.default_nq,
"limit": ct.default_limit})

View File

@ -7,6 +7,7 @@ from utils.util_log import test_log as log
from utils.util_pymilvus import *
from base.client_v2_base import TestMilvusClientV2Base
from pymilvus import DataType, AnnSearchRequest, WeightedRanker
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
class TestMilvusClientTTL(TestMilvusClientV2Base):
@ -38,7 +39,7 @@ class TestMilvusClientTTL(TestMilvusClientV2Base):
"""
client = self._client()
dim = 65
ttl = 10
ttl = 11
nb = 1000
collection_name = cf.gen_collection_name_by_testcase_name()
schema = self.create_schema(client, enable_dynamic_field=False)[0]
@ -98,18 +99,21 @@ class TestMilvusClientTTL(TestMilvusClientV2Base):
while time.time() - start_time < timeout:
if search_ttl_effective is False:
res1 = self.search(client, collection_name, search_vectors, anns_field='embeddings',
search_params={}, limit=10, consistency_level='Strong')[0]
search_params={}, limit=10, consistency_level=CONSISTENCY_STRONG)[0]
if query_ttl_effective is False:
res2 = self.query(client, collection_name, filter='',
output_fields=["count(*)"], consistency_level='Strong')[0]
output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0]
if hybrid_search_ttl_effective is False:
res3 = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
limit=10, consistency_level='Strong')[0]
limit=10, consistency_level=CONSISTENCY_STRONG)[0]
if len(res1[0]) == 0 and search_ttl_effective is False:
log.info(f"search ttl effects in {round(time.time() - start_time, 4)}s")
search_ttl_effective = True
if res2[0].get('count(*)', None) == 0 and query_ttl_effective is False:
log.info(f"query ttl effects in {round(time.time() - start_time, 4)}s")
res2x = self.query(client, collection_name, filter='visible==False',
output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0]
log.debug(f"res2x: {res2x[0].get('count(*)', None)}")
query_ttl_effective = True
if len(res3[0]) == 0 and hybrid_search_ttl_effective is False:
log.info(f"hybrid search ttl effects in {round(time.time() - start_time, 4)}s")
@ -152,44 +156,64 @@ class TestMilvusClientTTL(TestMilvusClientV2Base):
log.info(f"flush completed in {time.time() - t1}s")
# search data again after insert more data
res = self.search(client, collection_name, search_vectors,
search_params={}, anns_field='embeddings',
limit=10, consistency_level='Strong')[0]
assert len(res[0]) > 0
# query count(*)
res = self.query(client, collection_name, filter='visible==False',
output_fields=["count(*)"], consistency_level='Strong')[0]
assert res[0].get('count(*)', None) == 0
consistency_levels = [CONSISTENCY_EVENTUALLY, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_STRONG]
for consistency_level in consistency_levels:
log.debug(f"start to search/query with {consistency_level}")
# try 3 times
for i in range(3):
res = self.search(client, collection_name, search_vectors,
search_params={}, anns_field='embeddings',
limit=10, consistency_level=consistency_level)[0]
if len(res[0]) > 0:
break
else:
time.sleep(1)
assert len(res[0]) > 0
# hybrid search
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
limit=10, consistency_level='Strong')[0]
assert len(res[0]) > 0
if consistency_level != CONSISTENCY_STRONG:
pass
else:
# query count(*)
res = self.query(client, collection_name, filter='',
output_fields=["count(*)"], consistency_level=consistency_level)[0]
assert res[0].get('count(*)', None) == nb * insert_times
res = self.query(client, collection_name, filter='visible==False',
output_fields=["count(*)"], consistency_level=consistency_level)[0]
assert res[0].get('count(*)', None) == 0
# query count(visible)
res = self.query(client, collection_name, filter='visible==True',
output_fields=["count(*)"], consistency_level=consistency_level)[0]
assert res[0].get('count(*)', None) == nb * insert_times
# query count(visible)
res = self.query(client, collection_name, filter='visible==True',
output_fields=["count(*)"], consistency_level='Strong')[0]
assert res[0].get('count(*)', None) > 0
# hybrid search
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
limit=10, consistency_level=consistency_level)[0]
assert len(res[0]) > 0
# alter ttl to 1000s
self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 1000})
# search data after alter ttl
res = self.search(client, collection_name, search_vectors,
search_params={}, anns_field='embeddings',
filter='visible==False', limit=10, consistency_level='Strong')[0]
assert len(res[0]) > 0
# alter ttl to 2000s
self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 2000})
for consistency_level in consistency_levels:
log.debug(f"start to search/query after alter ttl with {consistency_level}")
# search data after alter ttl
res = self.search(client, collection_name, search_vectors,
search_params={}, anns_field='embeddings',
filter='visible==False', limit=10, consistency_level=consistency_level)[0]
assert len(res[0]) > 0
# hybrid search data after alter ttl
sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False')
sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False')
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
limit=10, consistency_level='Strong')[0]
assert len(res[0]) > 0
# hybrid search data after alter ttl
sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False')
sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False')
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
limit=10, consistency_level=consistency_level)[0]
assert len(res[0]) > 0
# query count(*)
res = self.query(client, collection_name, filter='visible==False',
output_fields=["count(*)"], consistency_level='Strong')[0]
assert res[0].get('count(*)', None) == insert_times * nb
res = self.query(client, collection_name, filter='',
output_fields=["count(*)"], consistency_level='Strong')[0]
assert res[0].get('count(*)', None) == insert_times * nb * 2
# query count(*)
res = self.query(client, collection_name, filter='visible==False',
output_fields=["count(*)"], consistency_level=consistency_level)[0]
assert res[0].get('count(*)', 0) == insert_times * nb
res = self.query(client, collection_name, filter='',
output_fields=["count(*)"], consistency_level=consistency_level)[0]
if consistency_level != CONSISTENCY_STRONG:
assert res[0].get('count(*)', 0) >= insert_times * nb
else:
assert res[0].get('count(*)', 0) == insert_times * nb * 2

View File

@ -28,8 +28,8 @@ pytest-parallel
pytest-random-order
# pymilvus
pymilvus==2.6.0rc151
pymilvus[bulk_writer]==2.6.0rc151
pymilvus==2.6.0rc155
pymilvus[bulk_writer]==2.6.0rc155
# for protobuf
protobuf==5.27.2

View File

@ -68,12 +68,12 @@ class IVF_RABITQ:
# refine params test
{
"description": "Enable Refine Test",
"params": {"refine": 'true'}, # to be fixed: #41760
"params": {"refine": 'true'},
"expected": success
},
{
"description": "Disable Refine Test",
"params": {"refine": 'false'}, # to be fixed: #41760
"params": {"refine": 'false'},
"expected": success
},
@ -194,7 +194,13 @@ class IVF_RABITQ:
{
"description": "Exceed nlist Test",
"params": {"nprobe": 129}, # Assuming nlist=128
"expected": success # to be fixed: #41765
"expected": success
},
{
"description": "Exceed nprobe Test",
"params": {"nprobe": 65537},
"expected": {"err_code": 999,
"err_msg": "should be in range [1, 65536]"}
},
{
"description": "Negative Value Test",

View File

@ -82,7 +82,7 @@ class TestIvfRabitqBuildParams(TestMilvusClientV2Base):
for key, value in build_params.items():
if value is not None:
assert key in idx_info.keys()
# assert value in idx_info.values() # TODO: uncommented after #41783 fixed
assert str(value) in idx_info.values() # TODO: uncommented after #41783 fixed
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("vector_data_type", ct.all_vector_types)

View File

@ -72,6 +72,23 @@ class TestPartitionKeyParams(TestcaseBase):
for i in range(nq):
assert res1[i].ids == res2[i].ids == res3[i].ids
# search with 'or' to verify no partition key optimization local with or binary expr
query_res1 = collection_w.query(
expr=f'{string_field.name} == "{string_prefix}5" || {int64_field.name} in [2,4,6]',
output_fields=['count(*)'])[0]
query_res2 = collection_w.query(
expr=f'{string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"] || {int64_field.name}==5',
output_fields=['count(*)'])[0]
query_res3 = collection_w.query(
expr=f'{int64_field.name}==5 or {string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"]',
output_fields=['count(*)'])[0]
query_res4 = collection_w.query(
expr=f'{int64_field.name} in [2,4,6] || {string_field.name} == "{string_prefix}5"',
output_fields=['count(*)'])[0]
# assert the results persist
assert query_res1[0].get('count(*)') == query_res2[0].get('count(*)') \
== query_res3[0].get('count(*)') == query_res4[0].get('count(*)') == 40
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name])
@pytest.mark.parametrize("index_on_par_key_field", [True, False])