mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
test: Add tests for partition key filter issue and ttl eventually search (#43052)
related issue: #42918 1. add tests for ttl eventually search 2. add tests for partition key filter 3. improve check query results for output fields 4. verify some fix for rabitq index and update the test accordingly 5. update gen random float vector in (-1, 1) instead of (0,1) --------- Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
parent
09c6df62d8
commit
e8011908ac
@ -566,31 +566,46 @@ class ResponseChecker:
|
||||
raise Exception("No expect values found in the check task")
|
||||
exp_res = check_items.get("exp_res", None)
|
||||
with_vec = check_items.get("with_vec", False)
|
||||
pk_name = check_items.get("pk_name", ct.default_primary_field_name)
|
||||
vector_type = check_items.get("vector_type", "FLOAT_VECTOR")
|
||||
if vector_type == DataType.FLOAT16_VECTOR:
|
||||
for single_exp_res in exp_res:
|
||||
single_exp_res['vector'] = single_exp_res['vector'] .tolist()
|
||||
for single_query_result in query_res:
|
||||
single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.float16).tolist()
|
||||
if vector_type == DataType.BFLOAT16_VECTOR:
|
||||
for single_exp_res in exp_res:
|
||||
single_exp_res['vector'] = single_exp_res['vector'] .tolist()
|
||||
for single_query_result in query_res:
|
||||
single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=bfloat16).tolist()
|
||||
if vector_type == DataType.INT8_VECTOR:
|
||||
for single_exp_res in exp_res:
|
||||
single_exp_res['vector'] = single_exp_res['vector'] .tolist()
|
||||
for single_query_result in query_res:
|
||||
single_query_result['vector'] = np.frombuffer(single_query_result['vector'][0], dtype=np.int8).tolist()
|
||||
exp_limit = check_items.get("exp_limit", None)
|
||||
count = check_items.get("count(*)", None)
|
||||
if count is not None:
|
||||
assert count == query_res[0].get("count(*)", None)
|
||||
return True
|
||||
if exp_limit is None and exp_res is None:
|
||||
raise Exception(f"No expected values would be checked in the check task")
|
||||
if exp_limit is not None:
|
||||
assert len(query_res) == exp_limit
|
||||
# pk_name = check_items.get("pk_name", ct.default_primary_field_name)
|
||||
# if with_vec:
|
||||
if exp_res is not None:
|
||||
if with_vec is True:
|
||||
vector_type = check_items.get('vector_type', 'FLOAT_VECTOR')
|
||||
vector_field = check_items.get('vector_field', 'vector')
|
||||
if vector_type == DataType.FLOAT16_VECTOR:
|
||||
# for single_exp_res in exp_res:
|
||||
# single_exp_res[vector_field] = single_exp_res[vector_field].tolist()
|
||||
for single_query_result in query_res:
|
||||
single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.float16).tolist()
|
||||
if vector_type == DataType.BFLOAT16_VECTOR:
|
||||
# for single_exp_res in exp_res:
|
||||
# single_exp_res[vector_field] = single_exp_res[vector_field].tolist()
|
||||
for single_query_result in query_res:
|
||||
single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=bfloat16).tolist()
|
||||
if vector_type == DataType.INT8_VECTOR:
|
||||
# for single_exp_res in exp_res:
|
||||
# if single_exp_res[vector_field].__class__ is not list:
|
||||
# single_exp_res[vector_field] = single_exp_res[vector_field].tolist()
|
||||
for single_query_result in query_res:
|
||||
single_query_result[vector_field] = np.frombuffer(single_query_result[vector_field][0], dtype=np.int8).tolist()
|
||||
if isinstance(query_res, list):
|
||||
assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name,
|
||||
with_vec=with_vec)
|
||||
# assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=pk_name, with_vec=with_vec)
|
||||
# return True
|
||||
assert pc.compare_lists_ignore_order(a=query_res, b=exp_res)
|
||||
return True
|
||||
else:
|
||||
log.error(f"Query result {query_res} is not list")
|
||||
return False
|
||||
|
||||
log.warning(f'Expected query result is {exp_res}')
|
||||
|
||||
@staticmethod
|
||||
@ -810,4 +825,4 @@ class ResponseChecker:
|
||||
if check_items.get("index_name", None) is not None:
|
||||
assert res["index_name"] == check_items.get("index_name")
|
||||
|
||||
return True
|
||||
return True
|
||||
|
||||
@ -5,6 +5,111 @@ from common import common_type as ct
|
||||
sys.path.append("..")
|
||||
from utils.util_log import test_log as log
|
||||
|
||||
import numpy as np
|
||||
from collections.abc import Iterable
|
||||
|
||||
epsilon = ct.epsilon
|
||||
|
||||
def deep_approx_compare(x, y, epsilon=epsilon):
|
||||
"""
|
||||
Recursively compares two objects for approximate equality, handling floating-point precision.
|
||||
|
||||
Args:
|
||||
x: First object to compare
|
||||
y: Second object to compare
|
||||
epsilon: Tolerance for floating-point comparisons (default: 1e-6)
|
||||
|
||||
Returns:
|
||||
bool: True if objects are approximately equal, False otherwise
|
||||
|
||||
Handles:
|
||||
- Numeric types (int, float, numpy scalars)
|
||||
- Sequences (list, tuple, numpy arrays)
|
||||
- Dictionaries
|
||||
- Other iterables (except strings)
|
||||
- Numpy arrays (shape and value comparison)
|
||||
- Falls back to strict equality for other types
|
||||
"""
|
||||
# Handle basic numeric types (including numpy scalars)
|
||||
if isinstance(x, (int, float, np.integer, np.floating)) and isinstance(y, (int, float, np.integer, np.floating)):
|
||||
return abs(float(x) - float(y)) < epsilon
|
||||
|
||||
# Handle lists/tuples/arrays
|
||||
if isinstance(x, (list, tuple, np.ndarray)) and isinstance(y, (list, tuple, np.ndarray)):
|
||||
if len(x) != len(y):
|
||||
return False
|
||||
for a, b in zip(x, y):
|
||||
if not deep_approx_compare(a, b, epsilon):
|
||||
return False
|
||||
return True
|
||||
|
||||
# Handle dictionaries
|
||||
if isinstance(x, dict) and isinstance(y, dict):
|
||||
if set(x.keys()) != set(y.keys()):
|
||||
return False
|
||||
for key in x:
|
||||
if not deep_approx_compare(x[key], y[key], epsilon):
|
||||
return False
|
||||
return True
|
||||
|
||||
# Handle other iterables (e.g., Protobuf containers)
|
||||
if isinstance(x, Iterable) and isinstance(y, Iterable) and not isinstance(x, str):
|
||||
try:
|
||||
return deep_approx_compare(list(x), list(y), epsilon)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Handle numpy arrays
|
||||
if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
|
||||
if x.shape != y.shape:
|
||||
return False
|
||||
return np.allclose(x, y, atol=epsilon)
|
||||
|
||||
# Fall back to strict equality for other types
|
||||
return x == y
|
||||
|
||||
|
||||
def compare_lists_ignore_order(a, b, epsilon=epsilon):
|
||||
"""
|
||||
Compares two lists of dictionaries for equality (order-insensitive) with floating-point tolerance.
|
||||
|
||||
Args:
|
||||
a (list): First list of dictionaries to compare
|
||||
b (list): Second list of dictionaries to compare
|
||||
epsilon (float, optional): Tolerance for floating-point comparisons. Defaults to 1e-6.
|
||||
|
||||
Returns:
|
||||
bool: True if lists contain equivalent dictionaries (order doesn't matter), False otherwise
|
||||
|
||||
Note:
|
||||
Uses deep_approx_compare() for dictionary comparison with floating-point tolerance.
|
||||
Maintains O(n²) complexity due to nested comparisons.
|
||||
"""
|
||||
if len(a) != len(b):
|
||||
return False
|
||||
|
||||
# Create a set of available indices for b
|
||||
available_indices = set(range(len(b)))
|
||||
|
||||
for item_a in a:
|
||||
matched = False
|
||||
# Create a list of indices to remove (avoid modifying the set during iteration)
|
||||
to_remove = []
|
||||
|
||||
for idx in available_indices:
|
||||
if deep_approx_compare(item_a, b[idx], epsilon):
|
||||
to_remove.append(idx)
|
||||
matched = True
|
||||
break
|
||||
|
||||
if not matched:
|
||||
return False
|
||||
|
||||
# Remove matched indices
|
||||
available_indices -= set(to_remove)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def ip_check(ip):
|
||||
if ip == "localhost":
|
||||
|
||||
@ -1127,33 +1127,6 @@ def gen_schema_multi_string_fields(string_fields):
|
||||
primary_field=primary_field, auto_id=False)
|
||||
return schema
|
||||
|
||||
|
||||
def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR):
|
||||
vectors = []
|
||||
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
elif vector_data_type == DataType.FLOAT16_VECTOR:
|
||||
vectors = gen_fp16_vectors(nb, dim)[1]
|
||||
elif vector_data_type == DataType.BFLOAT16_VECTOR:
|
||||
vectors = gen_bf16_vectors(nb, dim)[1]
|
||||
elif vector_data_type == DataType.SPARSE_FLOAT_VECTOR:
|
||||
vectors = gen_sparse_vectors(nb, dim)
|
||||
elif vector_data_type == ct.text_sparse_vector:
|
||||
vectors = gen_text_vectors(nb) # for Full Text Search
|
||||
elif vector_data_type == DataType.INT8_VECTOR:
|
||||
vectors = gen_int8_vectors(nb, dim)[1]
|
||||
elif vector_data_type == DataType.BINARY_VECTOR:
|
||||
vectors = gen_binary_vectors(nb, dim)[1]
|
||||
else:
|
||||
log.error(f"Invalid vector data type: {vector_data_type}")
|
||||
raise Exception(f"Invalid vector data type: {vector_data_type}")
|
||||
if dim > 1:
|
||||
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
|
||||
vectors = vectors.tolist()
|
||||
return vectors
|
||||
|
||||
|
||||
def gen_string(nb):
|
||||
string_values = [str(random.random()) for _ in range(nb)]
|
||||
return string_values
|
||||
@ -3613,7 +3586,7 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0):
|
||||
def gen_vectors(nb, dim, vector_data_type=DataType.FLOAT_VECTOR):
|
||||
vectors = []
|
||||
if vector_data_type == DataType.FLOAT_VECTOR:
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
vectors = [[random.uniform(-1, 1) for _ in range(dim)] for _ in range(nb)]
|
||||
elif vector_data_type == DataType.FLOAT16_VECTOR:
|
||||
vectors = gen_fp16_vectors(nb, dim)[1]
|
||||
elif vector_data_type == DataType.BFLOAT16_VECTOR:
|
||||
|
||||
@ -76,12 +76,12 @@ class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
|
||||
@pytest.mark.parametrize("is_release", [True])
|
||||
@pytest.mark.parametrize("single_data_num", [50])
|
||||
@pytest.mark.parametrize("expr_field", [ct.default_int64_field_name,
|
||||
# ct.default_string_field_name, # TODO: uncommented after #42604 fixed
|
||||
ct.default_string_field_name,
|
||||
ct.default_float_array_field_name])
|
||||
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self,
|
||||
enable_dynamic_field,
|
||||
supported_numeric_scalar_index,
|
||||
# supported_varchar_scalar_index,
|
||||
supported_varchar_scalar_index,
|
||||
supported_json_path_index,
|
||||
supported_array_double_float_scalar_index,
|
||||
is_flush,
|
||||
|
||||
@ -54,7 +54,7 @@ def external_filter_with_outputs(hits):
|
||||
results = []
|
||||
for hit in hits:
|
||||
# equals filter nothing if there are output_fields
|
||||
if hit.distance < 1.0 and len(hit.fields) > 0:
|
||||
if hit.distance <= 4.0 and len(hit.fields) > 0:
|
||||
results.append(hit)
|
||||
return results
|
||||
|
||||
|
||||
@ -1,3 +1,6 @@
|
||||
import random
|
||||
|
||||
import pandas
|
||||
import pytest
|
||||
import numpy as np
|
||||
import time
|
||||
@ -10,7 +13,6 @@ from base.client_v2_base import TestMilvusClientV2Base
|
||||
from pymilvus import DataType, FieldSchema, CollectionSchema
|
||||
|
||||
# Test parameters
|
||||
default_dim = ct.default_dim
|
||||
default_nb = ct.default_nb
|
||||
default_nq = ct.default_nq
|
||||
default_limit = ct.default_limit
|
||||
@ -28,7 +30,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("flush_enable", [True, False])
|
||||
@pytest.mark.parametrize("scalar_index_enable", [True, False])
|
||||
def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable):
|
||||
@pytest.mark.parametrize("vector_type", [DataType.FLOAT_VECTOR])
|
||||
def test_milvus_client_e2e_default(self, flush_enable, scalar_index_enable, vector_type):
|
||||
"""
|
||||
target: test high level api: client.create_collection, insert, search, query
|
||||
method: create connection, collection, insert and search with:
|
||||
@ -37,13 +40,14 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
expected: search/query successfully
|
||||
"""
|
||||
client = self._client()
|
||||
|
||||
dim = 8
|
||||
|
||||
# 1. Create collection with custom schema
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
||||
# Primary key and vector field
|
||||
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=default_dim)
|
||||
schema.add_field("vector", vector_type, dim=dim)
|
||||
# Boolean type
|
||||
schema.add_field("bool_field", DataType.BOOL, nullable=True)
|
||||
# Integer types
|
||||
@ -59,7 +63,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
# JSON type
|
||||
schema.add_field("json_field", DataType.JSON, nullable=True)
|
||||
# Array type
|
||||
schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, nullable=True)
|
||||
schema.add_field("array_field", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=12, nullable=True)
|
||||
|
||||
# Create collection
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
@ -68,14 +72,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
num_inserts = 5 # insert data for 5 times
|
||||
total_rows = []
|
||||
for batch in range(num_inserts):
|
||||
vectors = cf.gen_vectors(default_nb, default_dim)
|
||||
vectors = list(cf.gen_vectors(default_nb, dim, vector_data_type=vector_type)) \
|
||||
if vector_type == DataType.FLOAT_VECTOR \
|
||||
else cf.gen_vectors(default_nb, dim, vector_data_type=vector_type)
|
||||
rows = []
|
||||
start_id = batch * default_nb # ensure id is not duplicated
|
||||
|
||||
for i in range(default_nb):
|
||||
row = {
|
||||
"id": start_id + i, # ensure id is not duplicated
|
||||
"embeddings": list(vectors[i])
|
||||
"vector": vectors[i]
|
||||
}
|
||||
|
||||
# Add nullable fields with null values for every 5th record
|
||||
@ -99,11 +105,11 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
"int16_field": i % 32768,
|
||||
"int32_field": i,
|
||||
"int64_field": i,
|
||||
"float_field": float(i),
|
||||
"double_field": float(i) * 1.0,
|
||||
"float_field": random.random(),
|
||||
"double_field": random.random(),
|
||||
"varchar_field": f"varchar_{start_id + i}",
|
||||
"json_field": {"id": start_id + i, "value": f"json_{start_id + i}"},
|
||||
"array_field": [i, i + 1, i + 2]
|
||||
"array_field": [random.random() for _ in range(5)]
|
||||
})
|
||||
rows.append(row)
|
||||
total_rows.append(row)
|
||||
@ -124,7 +130,7 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
|
||||
# Create index parameters
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index("embeddings", metric_type="COSINE")
|
||||
index_params.add_index("vector", metric_type="COSINE")
|
||||
|
||||
# Add autoindex for scalar fields if enabled
|
||||
if scalar_index_enable:
|
||||
@ -160,13 +166,13 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
|
||||
# 4. Search
|
||||
t0 = time.time()
|
||||
vectors_to_search = cf.gen_vectors(1, default_dim)
|
||||
vectors_to_search = cf.gen_vectors(1, dim, vector_data_type=vector_type)
|
||||
search_params = {"metric_type": "COSINE", "params": {"nprobe": 100}}
|
||||
search_res, _ = self.search(
|
||||
client,
|
||||
collection_name,
|
||||
vectors_to_search,
|
||||
anns_field="embeddings",
|
||||
anns_field="vector",
|
||||
search_params=search_params,
|
||||
limit=default_limit,
|
||||
output_fields=['*'],
|
||||
@ -182,26 +188,34 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
|
||||
# 5. Query with filters on each scalar field
|
||||
t0 = time.time()
|
||||
|
||||
# Query on boolean field
|
||||
output_fields = ['id', 'int8_field', 'json_field']
|
||||
bool_filter = "bool_field == true"
|
||||
bool_expected = [r for r in total_rows if r["bool_field"] is not None and r["bool_field"]]
|
||||
bool_expected = [
|
||||
{
|
||||
'id': r['id'],
|
||||
'int8_field': r['int8_field'],
|
||||
'json_field': r['json_field']
|
||||
}
|
||||
for r in total_rows if r["bool_field"] is not None and r["bool_field"]]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
filter=bool_filter,
|
||||
output_fields=['*'],
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": bool_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": False,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Query on int8 field
|
||||
int8_filter = "int8_field < 50"
|
||||
int8_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50]
|
||||
with_vec = True
|
||||
int8_filter = "int8_field is null || int8_field < 10"
|
||||
int8_expected = [r for r in total_rows if r["int8_field"] is None or r["int8_field"] < 10]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -210,14 +224,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": int8_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Query on int16 field
|
||||
int16_filter = "int16_field < 1000"
|
||||
int16_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 1000]
|
||||
int16_filter = "100 <= int16_field < 200"
|
||||
int16_expected = [r for r in total_rows if r["int16_field"] is not None and 100 <= r["int16_field"] < 200]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -226,14 +241,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": int16_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Query on int32 field
|
||||
int32_filter = "int32_field in [1,2,3,4,5]"
|
||||
int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,3,4,5]]
|
||||
int32_filter = "int32_field in [1,2,5,6]"
|
||||
int32_expected = [r for r in total_rows if r["int32_field"] is not None and r["int32_field"] in [1,2,5,6]]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -242,14 +258,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": int32_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Query on int64 field
|
||||
int64_filter = "int64_field >= 10"
|
||||
int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 10]
|
||||
int64_filter = "int64_field >= 4678 and int64_field < 5050"
|
||||
int64_expected = [r for r in total_rows if r["int64_field"] is not None and r["int64_field"] >= 4678 and r["int64_field"] < 5050]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -258,14 +275,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": int64_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Query on float field
|
||||
float_filter = "float_field > 5.0"
|
||||
float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 5.0]
|
||||
float_filter = "float_field > 0.5 and float_field <= 0.7"
|
||||
float_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -274,14 +292,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": float_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Query on double field
|
||||
double_filter = "3.0 <=double_field <= 7.0"
|
||||
double_expected = [r for r in total_rows if r["double_field"] is not None and 3.0 <= r["double_field"] <= 7.0]
|
||||
double_filter = "0.5 <=double_field <= 0.7"
|
||||
double_expected = [r for r in total_rows if r["double_field"] is not None and 0.5 <= r["double_field"] <= 0.7]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -290,7 +309,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": double_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -306,7 +326,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": varchar_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -322,7 +343,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": varchar_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -338,7 +360,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": json_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -354,7 +377,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": array_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -370,7 +394,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": multi_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -386,15 +411,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": mix_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Query on is not null conditions for each scalar field
|
||||
# Int8 field is not null
|
||||
int8_not_null_filter = "int8_field is not null"
|
||||
int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None]
|
||||
int8_not_null_filter = "int8_field is not null and int8_field > 100"
|
||||
int8_not_null_expected = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] > 100]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -403,14 +429,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": int8_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Int16 field is not null
|
||||
int16_not_null_filter = "int16_field is not null"
|
||||
int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None]
|
||||
int16_not_null_filter = "int16_field is not null and int16_field < 100"
|
||||
int16_not_null_expected = [r for r in total_rows if r["int16_field"] is not None and r["int16_field"] < 100]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -419,14 +446,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": int16_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Float field is not null
|
||||
float_not_null_filter = "float_field is not null"
|
||||
float_not_null_expected = [r for r in total_rows if r["float_field"] is not None]
|
||||
float_not_null_filter = "float_field is not null and float_field > 0.5 and float_field <= 0.7"
|
||||
float_not_null_expected = [r for r in total_rows if r["float_field"] is not None and r["float_field"] > 0.5 and r["float_field"] <= 0.7]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -435,14 +463,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": float_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Double field is not null
|
||||
double_not_null_filter = "double_field is not null"
|
||||
double_not_null_expected = [r for r in total_rows if r["double_field"] is not None]
|
||||
double_not_null_filter = "double_field is not null and double_field <= 0.2"
|
||||
double_not_null_expected = [r for r in total_rows if r["double_field"] is not None and r["double_field"] <= 0.2]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -451,7 +480,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": double_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -467,14 +497,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": varchar_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# JSON field is not null
|
||||
json_not_null_filter = "json_field is not null"
|
||||
json_not_null_expected = [r for r in total_rows if r["json_field"] is not None]
|
||||
json_not_null_filter = "json_field is not null and json_field['id'] < 100"
|
||||
json_not_null_expected = [r for r in total_rows if r["json_field"] is not None and r["json_field"]["id"] < 100]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -483,14 +514,15 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": json_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Array field is not null
|
||||
array_not_null_filter = "array_field is not null"
|
||||
array_not_null_expected = [r for r in total_rows if r["array_field"] is not None]
|
||||
array_not_null_filter = "array_field is not null and array_field[0] < 100"
|
||||
array_not_null_expected = [r for r in total_rows if r["array_field"] is not None and r["array_field"][0] < 100]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
collection_name,
|
||||
@ -499,7 +531,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": array_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -516,16 +549,17 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": multi_not_null_expected,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Complex mixed conditions with is null, is not null, and comparison operators
|
||||
# Test case 1: int field is null AND float field > value AND varchar field is not null
|
||||
complex_mix_filter1 = "int32_field is null and float_field > 10.0 and varchar_field is not null"
|
||||
complex_mix_filter1 = "int32_field is null and float_field > 0.7 and varchar_field is not null"
|
||||
complex_mix_expected1 = [r for r in total_rows if r["int32_field"] is None and
|
||||
r["float_field"] is not None and r["float_field"] > 10.0 and
|
||||
r["float_field"] is not None and r["float_field"] > 0.7 and
|
||||
r["varchar_field"] is not None]
|
||||
query_res, _ = self.query(
|
||||
client,
|
||||
@ -535,7 +569,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": complex_mix_expected1,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
@ -553,15 +588,16 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": complex_mix_expected2,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
# Test case 3: Multiple fields with mixed null/not null conditions and range comparisons
|
||||
complex_mix_filter3 = ("int8_field is not null and int8_field < 50 and double_field is null and "
|
||||
complex_mix_filter3 = ("int8_field is not null and int8_field < 15 and double_field is null and "
|
||||
"varchar_field is not null and varchar_field like \"varchar_2%\"")
|
||||
complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 50 and
|
||||
complex_mix_expected3 = [r for r in total_rows if r["int8_field"] is not None and r["int8_field"] < 15 and
|
||||
r["double_field"] is None and
|
||||
r["varchar_field"] is not None and r["varchar_field"].startswith("varchar_2")]
|
||||
query_res, _ = self.query(
|
||||
@ -572,7 +608,8 @@ class TestMilvusClientE2E(TestMilvusClientV2Base):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={
|
||||
"exp_res": complex_mix_expected3,
|
||||
"with_vec": True,
|
||||
"with_vec": with_vec,
|
||||
"vector_type": vector_type,
|
||||
"pk_name": "id"
|
||||
}
|
||||
)
|
||||
|
||||
@ -1548,55 +1548,6 @@ class TestCollectionRangeSearch(TestcaseBase):
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_range_search_with_consistency_session(self, nq, dim, auto_id, _async):
|
||||
"""
|
||||
target: test range search with different consistency level
|
||||
method: 1. create a collection
|
||||
2. insert data
|
||||
3. range search with consistency_level is "session"
|
||||
expected: searched successfully
|
||||
"""
|
||||
limit = 1000
|
||||
nb_old = 500
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb_old,
|
||||
auto_id=auto_id,
|
||||
dim=dim)[0:4]
|
||||
# 2. search for original data after load
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
range_search_params = {"metric_type": "COSINE", "params": {"nprobe": 10, "radius": 0,
|
||||
"range_filter": 1000}}
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
range_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old,
|
||||
"_async": _async,
|
||||
"pk_name": ct.default_int64_field_name})
|
||||
|
||||
kwargs = {}
|
||||
consistency_level = kwargs.get(
|
||||
"consistency_level", CONSISTENCY_SESSION)
|
||||
kwargs.update({"consistency_level": consistency_level})
|
||||
|
||||
nb_new = 400
|
||||
_, _, _, insert_ids_new, _ = cf.insert_data(collection_w, nb_new,
|
||||
auto_id=auto_id, dim=dim,
|
||||
insert_offset=nb_old)
|
||||
insert_ids.extend(insert_ids_new)
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
range_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
**kwargs,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old + nb_new,
|
||||
"_async": _async,
|
||||
"pk_name": ct.default_int64_field_name})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_range_search_sparse(self):
|
||||
"""
|
||||
|
||||
@ -80,101 +80,6 @@ half_nb = ct.default_nb // 2
|
||||
max_hybrid_search_req_num = ct.max_hybrid_search_req_num
|
||||
|
||||
|
||||
class TestSearchBase(TestcaseBase):
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=[1, 10]
|
||||
)
|
||||
def get_top_k(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(
|
||||
scope="function",
|
||||
params=[1, 10, 1100]
|
||||
)
|
||||
def get_nq(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[32, 128])
|
||||
def dim(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[False, True])
|
||||
def auto_id(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[False, True])
|
||||
def _async(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index", ct.all_index_types[:6])
|
||||
def test_each_index_with_mmap_enabled_search(self, index):
|
||||
"""
|
||||
target: test each index with mmap enabled search
|
||||
method: test each index with mmap enabled search
|
||||
expected: search success
|
||||
"""
|
||||
self._connect()
|
||||
nb = 2000
|
||||
dim = 32
|
||||
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
|
||||
collection_w.create_index(field_name, default_index, index_name="mmap_index")
|
||||
# mmap index
|
||||
collection_w.alter_index("mmap_index", {'mmap.enabled': True})
|
||||
# search
|
||||
collection_w.load()
|
||||
search_params = cf.gen_search_param(index)[0]
|
||||
vector = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": ct.default_limit})
|
||||
# enable mmap
|
||||
collection_w.release()
|
||||
collection_w.alter_index("mmap_index", {'mmap.enabled': False})
|
||||
collection_w.load()
|
||||
collection_w.search(vector, default_search_field, search_params, ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index", ct.all_index_types[8:10])
|
||||
def test_enable_mmap_search_for_binary_indexes(self, index):
|
||||
"""
|
||||
target: enable mmap for binary indexes
|
||||
method: enable mmap for binary indexes
|
||||
expected: search success
|
||||
"""
|
||||
self._connect()
|
||||
dim = 64
|
||||
nb = 2000
|
||||
collection_w = self.init_collection_general(prefix, True, nb, dim=dim, is_index=False, is_binary=True)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
default_index = {"index_type": index,
|
||||
"params": params, "metric_type": "JACCARD"}
|
||||
collection_w.create_index(ct.default_binary_vec_field_name, default_index, index_name="binary_idx_name")
|
||||
collection_w.alter_index("binary_idx_name", {'mmap.enabled': True})
|
||||
collection_w.set_properties({'mmap.enabled': True})
|
||||
collection_w.load()
|
||||
pro = collection_w.describe()[0].get("properties")
|
||||
assert pro["mmap.enabled"] == 'True'
|
||||
assert collection_w.index()[0].params["mmap.enabled"] == 'True'
|
||||
# search
|
||||
binary_vectors = cf.gen_binary_vectors(default_nq, dim)[1]
|
||||
search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
|
||||
output_fields = ["*"]
|
||||
collection_w.search(binary_vectors, ct.default_binary_vec_field_name, search_params,
|
||||
default_limit, default_search_string_exp, output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": default_limit})
|
||||
|
||||
|
||||
class TestCollectionSearch(TestcaseBase):
|
||||
""" Test case of search interface """
|
||||
@ -237,129 +142,6 @@ class TestCollectionSearch(TestcaseBase):
|
||||
******************************************************************
|
||||
"""
|
||||
|
||||
@pytest.mark.skip("enable this later using session/strong consistency")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_new_data(self, nq, _async):
|
||||
"""
|
||||
target: test search new inserted data without load
|
||||
method: 1. search the collection
|
||||
2. insert new data
|
||||
3. search the collection without load again
|
||||
4. Use guarantee_timestamp to guarantee data consistency
|
||||
expected: new data should be searched
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 128
|
||||
auto_id = False
|
||||
limit = 1000
|
||||
nb_old = 500
|
||||
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb_old,
|
||||
auto_id=auto_id,
|
||||
dim=dim)[0:5]
|
||||
# 2. search for original data after load
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
log.info("test_search_new_data: searching for original data after load")
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old,
|
||||
"_async": _async})
|
||||
# 3. insert new data
|
||||
nb_new = 300
|
||||
_, _, _, insert_ids_new, time_stamp = cf.insert_data(collection_w, nb_new,
|
||||
auto_id=auto_id, dim=dim,
|
||||
insert_offset=nb_old)
|
||||
insert_ids.extend(insert_ids_new)
|
||||
# 4. search for new data without load
|
||||
# Using bounded staleness, maybe we could not search the "inserted" entities,
|
||||
# since the search requests arrived query nodes earlier than query nodes consume the insert requests.
|
||||
collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp, _async=_async,
|
||||
guarantee_timestamp=time_stamp,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": nb_old + nb_new,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("shards_num", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num])
|
||||
def test_search_with_non_default_shard_nums(self, shards_num, _async):
|
||||
"""
|
||||
target: test search with non_default shards_num
|
||||
method: connect milvus, create collection with several shard numbers , insert, load and search
|
||||
expected: search successfully with the non_default shards_num
|
||||
"""
|
||||
auto_id = False
|
||||
self._connect()
|
||||
# 1. create collection
|
||||
name = cf.gen_unique_str(prefix)
|
||||
collection_w = self.init_collection_wrap(
|
||||
name=name, shards_num=shards_num)
|
||||
# 2. rename collection
|
||||
new_collection_name = cf.gen_unique_str(prefix + "new")
|
||||
self.utility_wrap.rename_collection(
|
||||
collection_w.name, new_collection_name)
|
||||
collection_w = self.init_collection_wrap(
|
||||
name=new_collection_name, shards_num=shards_num)
|
||||
# 3. insert
|
||||
dataframe = cf.gen_default_dataframe_data()
|
||||
collection_w.insert(dataframe)
|
||||
# 4. create index and load
|
||||
collection_w.create_index(
|
||||
ct.default_float_vec_field_name, index_params=ct.default_flat_index)
|
||||
collection_w.load()
|
||||
# 5. search
|
||||
vectors = [[random.random() for _ in range(default_dim)]
|
||||
for _ in range(default_nq)]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": default_limit,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("M", [4, 64])
|
||||
@pytest.mark.parametrize("efConstruction", [8, 512])
|
||||
def test_search_HNSW_index_with_redundant_param(self, M, efConstruction, _async):
|
||||
"""
|
||||
target: test search HNSW index with redundant param
|
||||
method: connect milvus, create collection , insert, create index, load and search
|
||||
expected: search successfully
|
||||
"""
|
||||
dim = M * 4
|
||||
auto_id = False
|
||||
enable_dynamic_field = False
|
||||
self._connect()
|
||||
collection_w, _, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(prefix, True, partition_num=1, auto_id=auto_id,
|
||||
dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5]
|
||||
# nlist is of no use
|
||||
HNSW_index_params = {
|
||||
"M": M, "efConstruction": efConstruction, "nlist": 100}
|
||||
HNSW_index = {"index_type": "HNSW",
|
||||
"params": HNSW_index_params, "metric_type": "L2"}
|
||||
collection_w.create_index("float_vector", HNSW_index)
|
||||
collection_w.load()
|
||||
search_param = {"metric_type": "L2", "params": {
|
||||
"ef": 32768, "nprobe": 10}} # nprobe is of no use
|
||||
vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
search_param, default_limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("M", [4, 64])
|
||||
@pytest.mark.parametrize("efConstruction", [8, 512])
|
||||
@ -788,8 +570,7 @@ class TestCollectionSearch(TestcaseBase):
|
||||
self.init_collection_general(prefix, True, nb=nb, dim=dim, enable_dynamic_field=True)[0:4]
|
||||
|
||||
# filter result with expression in collection
|
||||
search_vectors = [[random.random() for _ in range(dim)]
|
||||
for _ in range(default_nq)]
|
||||
search_vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
_vectors = _vectors[0]
|
||||
for expressions in cf.gen_json_field_expressions_and_templates():
|
||||
expr = expressions[0].replace("&&", "and").replace("||", "or")
|
||||
@ -858,12 +639,16 @@ class TestCollectionSearch(TestcaseBase):
|
||||
ids = hits.ids
|
||||
assert set(ids).issubset(filter_ids_set)
|
||||
# 7. create json index
|
||||
default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
|
||||
"json_path": f"{ct.default_json_field_name}['number']"}}
|
||||
collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_0")
|
||||
default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
|
||||
"json_path": f"{ct.default_json_field_name}['float']"}}
|
||||
collection_w.create_index(ct.default_json_field_name, default_json_path_index, index_name = f"{ct.default_json_field_name}_1")
|
||||
default_json_path_index = {"index_type": "INVERTED",
|
||||
"params": {"json_cast_type": "double",
|
||||
"json_path": f"{ct.default_json_field_name}['number']"}}
|
||||
collection_w.create_index(ct.default_json_field_name, default_json_path_index,
|
||||
index_name=f"{ct.default_json_field_name}_0")
|
||||
default_json_path_index = {"index_type": "AUTOINDEX",
|
||||
"params": {"json_cast_type": "double",
|
||||
"json_path": f"{ct.default_json_field_name}['float']"}}
|
||||
collection_w.create_index(ct.default_json_field_name, default_json_path_index,
|
||||
index_name=f"{ct.default_json_field_name}_1")
|
||||
# 8. release and load to make sure the new index is loaded
|
||||
collection_w.release()
|
||||
collection_w.load()
|
||||
@ -994,8 +779,7 @@ class TestCollectionSearch(TestcaseBase):
|
||||
collection_w.search(vectors, default_search_field, default_search_params,
|
||||
default_limit, expression, output_fields=[field],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"limit": 0})[0]
|
||||
check_items={"nq": default_nq, "limit": 0})
|
||||
# 4. search normal using all the scalar type as output fields
|
||||
collection_w.search(vectors, default_search_field, default_search_params,
|
||||
default_limit, output_fields=[field],
|
||||
|
||||
@ -161,6 +161,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
|
||||
index_type=self.binary_vector_index,
|
||||
params={"nlist": 128})
|
||||
self.create_index(client, self.collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, self.collection_name, index_name=self.float_vector_field_name)
|
||||
self.wait_for_index_ready(client, self.collection_name, index_name=self.bfloat16_vector_field_name)
|
||||
|
||||
# Load collection
|
||||
self.load_collection(client, self.collection_name)
|
||||
@ -378,7 +380,8 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_with_output_fields(self):
|
||||
@pytest.mark.parametrize("consistency_level", ["Strong", "Session", "Bounded", "Eventually"])
|
||||
def test_search_with_output_fields_and_consistency_level(self, consistency_level):
|
||||
"""
|
||||
target: test search with output fields
|
||||
method: 1. connect and create a collection
|
||||
@ -400,6 +403,7 @@ class TestMilvusClientSearchBasicV2(TestMilvusClientV2Base):
|
||||
anns_field=self.float_vector_field_name,
|
||||
search_params=search_params,
|
||||
limit=default_limit,
|
||||
consistency_level=consistency_level,
|
||||
output_fields=[ct.default_string_field_name, self.dyna_filed_name1, self.dyna_filed_name2],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
@ -1220,3 +1224,250 @@ class TestSearchV2Independent(TestMilvusClientV2Base):
|
||||
"nq": ct.default_nq,
|
||||
"pk_name": "id",
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index", ct.all_index_types[:6])
|
||||
def test_each_index_with_mmap_enabled_search(self, index):
|
||||
"""
|
||||
target: test each index with mmap enabled search
|
||||
method: test each index with mmap enabled search
|
||||
expected: search success
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
|
||||
# fast create collection
|
||||
dim = 32
|
||||
schema = self.create_schema(client)[0]
|
||||
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# insert data
|
||||
data = []
|
||||
for i in range(ct.default_nb):
|
||||
data.append({
|
||||
"id": i,
|
||||
"vector": cf.gen_vectors(1, dim)[0]
|
||||
})
|
||||
self.insert(client, collection_name, data)
|
||||
self.flush(client, collection_name)
|
||||
# create index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='L2')
|
||||
self.create_index(client, collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name='vector')
|
||||
|
||||
# alter mmap index
|
||||
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True})
|
||||
index_info = self.describe_index(client, collection_name, index_name='vector')
|
||||
assert index_info[0]["mmap.enabled"] == 'True'
|
||||
# search
|
||||
self.load_collection(client, collection_name)
|
||||
search_params = {}
|
||||
vector = cf.gen_vectors(ct.default_nq, dim)
|
||||
self.search(client, collection_name, vector, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
# disable mmap
|
||||
self.release_collection(client, collection_name)
|
||||
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False})
|
||||
index_info = self.describe_index(client, collection_name, index_name='vector')
|
||||
assert index_info[0]["mmap.enabled"] == 'False'
|
||||
self.load_collection(client, collection_name)
|
||||
self.search(client, collection_name, vector, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=["*"],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index", ct.all_index_types[8:10])
|
||||
def test_enable_mmap_search_for_binary_indexes(self, index):
|
||||
"""
|
||||
Test enabling mmap for binary indexes in Milvus.
|
||||
|
||||
This test verifies that:
|
||||
1. Binary vector indexes can be successfully created with mmap enabled
|
||||
2. Search operations work correctly with mmap enabled
|
||||
3. Mmap can be properly disabled and search still works
|
||||
|
||||
The test performs following steps:
|
||||
- Creates a collection with binary vectors
|
||||
- Inserts test data
|
||||
- Creates index with mmap enabled
|
||||
- Verifies mmap status
|
||||
- Performs search with mmap enabled
|
||||
- Disables mmap and verifies search still works
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
|
||||
# fast create collection
|
||||
dim = 64
|
||||
schema = self.create_schema(client)[0]
|
||||
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field('vector', DataType.BINARY_VECTOR, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# insert data
|
||||
data = []
|
||||
for i in range(ct.default_nb):
|
||||
data.append({
|
||||
"id": i,
|
||||
"vector": cf.gen_binary_vectors(1, dim)[1][0]
|
||||
})
|
||||
self.insert(client, collection_name, data)
|
||||
self.flush(client, collection_name)
|
||||
# create index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
index_params.add_index(field_name='vector', index_type=index, params=params, metric_type='JACCARD')
|
||||
self.create_index(client, collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name='vector')
|
||||
# alter mmap index
|
||||
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": True})
|
||||
index_info = self.describe_index(client, collection_name, index_name='vector')
|
||||
assert index_info[0]["mmap.enabled"] == 'True'
|
||||
# load collection
|
||||
self.load_collection(client, collection_name)
|
||||
# search
|
||||
binary_vectors = cf.gen_binary_vectors(ct.default_nq, dim)[1]
|
||||
params = cf.get_search_params_params(index)
|
||||
search_params = {"metric_type": "JACCARD", "params": params}
|
||||
output_fields = ["*"]
|
||||
self.search(client, collection_name, binary_vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
# disable mmap
|
||||
self.release_collection(client, collection_name)
|
||||
self.alter_index_properties(client, collection_name, index_name='vector', properties={"mmap.enabled": False})
|
||||
index_info = self.describe_index(client, collection_name, index_name='vector')
|
||||
assert index_info[0]["mmap.enabled"] == 'False'
|
||||
self.load_collection(client, collection_name)
|
||||
self.search(client, collection_name, binary_vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("num_shards", [-256, 0, ct.max_shards_num // 2, ct.max_shards_num])
|
||||
def test_search_with_non_default_shard_nums(self, num_shards):
|
||||
"""
|
||||
Test search functionality with non-default shard numbers.
|
||||
|
||||
This test verifies that:
|
||||
1. Collections are created with default shard numbers when num_shards <= 0
|
||||
2. Collections are created with specified shard numbers when num_shards > 0
|
||||
3. Search operations work correctly with different shard configurations
|
||||
|
||||
The test follows these steps:
|
||||
1. Creates a collection with specified shard numbers
|
||||
2. Inserts test data
|
||||
3. Builds an index
|
||||
4. Performs a search operation
|
||||
5. Validates the results
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
|
||||
# create collection
|
||||
dim = 32
|
||||
schema = self.create_schema(client)[0]
|
||||
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
|
||||
# create collection
|
||||
self.create_collection(client, collection_name, schema=schema, num_shards=num_shards)
|
||||
collection_info = self.describe_collection(client, collection_name)[0]
|
||||
expected_num_shards = ct.default_shards_num if num_shards <= 0 else num_shards
|
||||
assert collection_info["num_shards"] == expected_num_shards
|
||||
# insert
|
||||
data = []
|
||||
for i in range(ct.default_nb):
|
||||
data.append({
|
||||
"id": i,
|
||||
"vector": cf.gen_vectors(1, dim)[0]
|
||||
})
|
||||
self.insert(client, collection_name, data)
|
||||
# create index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name='vector', index_type='HNSW', metric_type='COSINE')
|
||||
self.create_index(client, collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name='vector')
|
||||
# load
|
||||
self.load_collection(client, collection_name)
|
||||
# search
|
||||
vectors = cf.gen_vectors(ct.default_nq, dim)
|
||||
search_params = {}
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_HNSW_index_with_redundant_param(self):
|
||||
"""
|
||||
Test search functionality with HNSW index and redundant parameters.
|
||||
|
||||
This test verifies that:
|
||||
1. HNSW index can be created with redundant parameters
|
||||
2. Search operations work correctly with redundant parameters
|
||||
3. Redundant parameters are ignored
|
||||
|
||||
The test performs following steps:
|
||||
1. Creates a collection with float vectors
|
||||
2. Inserts test data
|
||||
3. Creates HNSW index with redundant parameters
|
||||
4. Performs a search operation
|
||||
5. Validates the results
|
||||
"""
|
||||
dim = 16
|
||||
index = "HNSW"
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema = self.create_schema(client)[0]
|
||||
schema.add_field('id', DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field('vector', DataType.FLOAT_VECTOR, dim=dim)
|
||||
self.create_collection(client, collection_name, schema=schema)
|
||||
|
||||
# insert
|
||||
data = []
|
||||
for i in range(ct.default_nb):
|
||||
data.append({
|
||||
"id": i,
|
||||
"vector": cf.gen_vectors(1, dim)[0]
|
||||
})
|
||||
self.insert(client, collection_name, data)
|
||||
self.flush(client, collection_name)
|
||||
# create index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
params = cf.get_index_params_params(index)
|
||||
params["nlist"] = 100 # nlist is redundant parameter
|
||||
index_params.add_index(field_name='vector', index_type=index,
|
||||
metric_type='COSINE', params=params)
|
||||
self.create_index(client, collection_name, index_params=index_params)
|
||||
self.wait_for_index_ready(client, collection_name, index_name='vector')
|
||||
index_info = self.describe_index(client, collection_name, index_name='vector')
|
||||
assert index_info[0]["nlist"] == '100'
|
||||
# load
|
||||
self.load_collection(client, collection_name)
|
||||
# search
|
||||
vectors = cf.gen_vectors(ct.default_nq, dim)
|
||||
search_params = {}
|
||||
self.search(client, collection_name, vectors, anns_field="vector",
|
||||
search_params=search_params, limit=ct.default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": ct.default_nq,
|
||||
"limit": ct.default_limit})
|
||||
|
||||
@ -7,6 +7,7 @@ from utils.util_log import test_log as log
|
||||
from utils.util_pymilvus import *
|
||||
from base.client_v2_base import TestMilvusClientV2Base
|
||||
from pymilvus import DataType, AnnSearchRequest, WeightedRanker
|
||||
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
|
||||
|
||||
|
||||
class TestMilvusClientTTL(TestMilvusClientV2Base):
|
||||
@ -38,7 +39,7 @@ class TestMilvusClientTTL(TestMilvusClientV2Base):
|
||||
"""
|
||||
client = self._client()
|
||||
dim = 65
|
||||
ttl = 10
|
||||
ttl = 11
|
||||
nb = 1000
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
||||
@ -98,18 +99,21 @@ class TestMilvusClientTTL(TestMilvusClientV2Base):
|
||||
while time.time() - start_time < timeout:
|
||||
if search_ttl_effective is False:
|
||||
res1 = self.search(client, collection_name, search_vectors, anns_field='embeddings',
|
||||
search_params={}, limit=10, consistency_level='Strong')[0]
|
||||
search_params={}, limit=10, consistency_level=CONSISTENCY_STRONG)[0]
|
||||
if query_ttl_effective is False:
|
||||
res2 = self.query(client, collection_name, filter='',
|
||||
output_fields=["count(*)"], consistency_level='Strong')[0]
|
||||
output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0]
|
||||
if hybrid_search_ttl_effective is False:
|
||||
res3 = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
|
||||
limit=10, consistency_level='Strong')[0]
|
||||
limit=10, consistency_level=CONSISTENCY_STRONG)[0]
|
||||
if len(res1[0]) == 0 and search_ttl_effective is False:
|
||||
log.info(f"search ttl effects in {round(time.time() - start_time, 4)}s")
|
||||
search_ttl_effective = True
|
||||
if res2[0].get('count(*)', None) == 0 and query_ttl_effective is False:
|
||||
log.info(f"query ttl effects in {round(time.time() - start_time, 4)}s")
|
||||
res2x = self.query(client, collection_name, filter='visible==False',
|
||||
output_fields=["count(*)"], consistency_level=CONSISTENCY_STRONG)[0]
|
||||
log.debug(f"res2x: {res2x[0].get('count(*)', None)}")
|
||||
query_ttl_effective = True
|
||||
if len(res3[0]) == 0 and hybrid_search_ttl_effective is False:
|
||||
log.info(f"hybrid search ttl effects in {round(time.time() - start_time, 4)}s")
|
||||
@ -152,44 +156,64 @@ class TestMilvusClientTTL(TestMilvusClientV2Base):
|
||||
log.info(f"flush completed in {time.time() - t1}s")
|
||||
|
||||
# search data again after insert more data
|
||||
res = self.search(client, collection_name, search_vectors,
|
||||
search_params={}, anns_field='embeddings',
|
||||
limit=10, consistency_level='Strong')[0]
|
||||
assert len(res[0]) > 0
|
||||
# query count(*)
|
||||
res = self.query(client, collection_name, filter='visible==False',
|
||||
output_fields=["count(*)"], consistency_level='Strong')[0]
|
||||
assert res[0].get('count(*)', None) == 0
|
||||
consistency_levels = [CONSISTENCY_EVENTUALLY, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_STRONG]
|
||||
for consistency_level in consistency_levels:
|
||||
log.debug(f"start to search/query with {consistency_level}")
|
||||
# try 3 times
|
||||
for i in range(3):
|
||||
res = self.search(client, collection_name, search_vectors,
|
||||
search_params={}, anns_field='embeddings',
|
||||
limit=10, consistency_level=consistency_level)[0]
|
||||
if len(res[0]) > 0:
|
||||
break
|
||||
else:
|
||||
time.sleep(1)
|
||||
assert len(res[0]) > 0
|
||||
|
||||
# hybrid search
|
||||
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
|
||||
limit=10, consistency_level='Strong')[0]
|
||||
assert len(res[0]) > 0
|
||||
if consistency_level != CONSISTENCY_STRONG:
|
||||
pass
|
||||
else:
|
||||
# query count(*)
|
||||
res = self.query(client, collection_name, filter='',
|
||||
output_fields=["count(*)"], consistency_level=consistency_level)[0]
|
||||
assert res[0].get('count(*)', None) == nb * insert_times
|
||||
res = self.query(client, collection_name, filter='visible==False',
|
||||
output_fields=["count(*)"], consistency_level=consistency_level)[0]
|
||||
assert res[0].get('count(*)', None) == 0
|
||||
# query count(visible)
|
||||
res = self.query(client, collection_name, filter='visible==True',
|
||||
output_fields=["count(*)"], consistency_level=consistency_level)[0]
|
||||
assert res[0].get('count(*)', None) == nb * insert_times
|
||||
|
||||
# query count(visible)
|
||||
res = self.query(client, collection_name, filter='visible==True',
|
||||
output_fields=["count(*)"], consistency_level='Strong')[0]
|
||||
assert res[0].get('count(*)', None) > 0
|
||||
# hybrid search
|
||||
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
|
||||
limit=10, consistency_level=consistency_level)[0]
|
||||
assert len(res[0]) > 0
|
||||
|
||||
# alter ttl to 1000s
|
||||
self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 1000})
|
||||
# search data after alter ttl
|
||||
res = self.search(client, collection_name, search_vectors,
|
||||
search_params={}, anns_field='embeddings',
|
||||
filter='visible==False', limit=10, consistency_level='Strong')[0]
|
||||
assert len(res[0]) > 0
|
||||
# alter ttl to 2000s
|
||||
self.alter_collection_properties(client, collection_name, properties={"collection.ttl.seconds": 2000})
|
||||
for consistency_level in consistency_levels:
|
||||
log.debug(f"start to search/query after alter ttl with {consistency_level}")
|
||||
# search data after alter ttl
|
||||
res = self.search(client, collection_name, search_vectors,
|
||||
search_params={}, anns_field='embeddings',
|
||||
filter='visible==False', limit=10, consistency_level=consistency_level)[0]
|
||||
assert len(res[0]) > 0
|
||||
|
||||
# hybrid search data after alter ttl
|
||||
sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False')
|
||||
sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False')
|
||||
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
|
||||
limit=10, consistency_level='Strong')[0]
|
||||
assert len(res[0]) > 0
|
||||
# hybrid search data after alter ttl
|
||||
sub_search1 = AnnSearchRequest(search_vectors, "embeddings", {"level": 1}, 20, expr='visible==False')
|
||||
sub_search2 = AnnSearchRequest(search_vectors, "embeddings_2", {"level": 1}, 20, expr='visible==False')
|
||||
res = self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker,
|
||||
limit=10, consistency_level=consistency_level)[0]
|
||||
assert len(res[0]) > 0
|
||||
|
||||
# query count(*)
|
||||
res = self.query(client, collection_name, filter='visible==False',
|
||||
output_fields=["count(*)"], consistency_level='Strong')[0]
|
||||
assert res[0].get('count(*)', None) == insert_times * nb
|
||||
res = self.query(client, collection_name, filter='',
|
||||
output_fields=["count(*)"], consistency_level='Strong')[0]
|
||||
assert res[0].get('count(*)', None) == insert_times * nb * 2
|
||||
# query count(*)
|
||||
res = self.query(client, collection_name, filter='visible==False',
|
||||
output_fields=["count(*)"], consistency_level=consistency_level)[0]
|
||||
assert res[0].get('count(*)', 0) == insert_times * nb
|
||||
res = self.query(client, collection_name, filter='',
|
||||
output_fields=["count(*)"], consistency_level=consistency_level)[0]
|
||||
if consistency_level != CONSISTENCY_STRONG:
|
||||
assert res[0].get('count(*)', 0) >= insert_times * nb
|
||||
else:
|
||||
assert res[0].get('count(*)', 0) == insert_times * nb * 2
|
||||
|
||||
@ -28,8 +28,8 @@ pytest-parallel
|
||||
pytest-random-order
|
||||
|
||||
# pymilvus
|
||||
pymilvus==2.6.0rc151
|
||||
pymilvus[bulk_writer]==2.6.0rc151
|
||||
pymilvus==2.6.0rc155
|
||||
pymilvus[bulk_writer]==2.6.0rc155
|
||||
|
||||
# for protobuf
|
||||
protobuf==5.27.2
|
||||
|
||||
@ -68,12 +68,12 @@ class IVF_RABITQ:
|
||||
# refine params test
|
||||
{
|
||||
"description": "Enable Refine Test",
|
||||
"params": {"refine": 'true'}, # to be fixed: #41760
|
||||
"params": {"refine": 'true'},
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Disable Refine Test",
|
||||
"params": {"refine": 'false'}, # to be fixed: #41760
|
||||
"params": {"refine": 'false'},
|
||||
"expected": success
|
||||
},
|
||||
|
||||
@ -194,7 +194,13 @@ class IVF_RABITQ:
|
||||
{
|
||||
"description": "Exceed nlist Test",
|
||||
"params": {"nprobe": 129}, # Assuming nlist=128
|
||||
"expected": success # to be fixed: #41765
|
||||
"expected": success
|
||||
},
|
||||
{
|
||||
"description": "Exceed nprobe Test",
|
||||
"params": {"nprobe": 65537},
|
||||
"expected": {"err_code": 999,
|
||||
"err_msg": "should be in range [1, 65536]"}
|
||||
},
|
||||
{
|
||||
"description": "Negative Value Test",
|
||||
|
||||
@ -82,7 +82,7 @@ class TestIvfRabitqBuildParams(TestMilvusClientV2Base):
|
||||
for key, value in build_params.items():
|
||||
if value is not None:
|
||||
assert key in idx_info.keys()
|
||||
# assert value in idx_info.values() # TODO: uncommented after #41783 fixed
|
||||
assert str(value) in idx_info.values() # TODO: uncommented after #41783 fixed
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("vector_data_type", ct.all_vector_types)
|
||||
|
||||
@ -72,6 +72,23 @@ class TestPartitionKeyParams(TestcaseBase):
|
||||
for i in range(nq):
|
||||
assert res1[i].ids == res2[i].ids == res3[i].ids
|
||||
|
||||
# search with 'or' to verify no partition key optimization local with or binary expr
|
||||
query_res1 = collection_w.query(
|
||||
expr=f'{string_field.name} == "{string_prefix}5" || {int64_field.name} in [2,4,6]',
|
||||
output_fields=['count(*)'])[0]
|
||||
query_res2 = collection_w.query(
|
||||
expr=f'{string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"] || {int64_field.name}==5',
|
||||
output_fields=['count(*)'])[0]
|
||||
query_res3 = collection_w.query(
|
||||
expr=f'{int64_field.name}==5 or {string_field.name} in ["{string_prefix}2","{string_prefix}4", "{string_prefix}6"]',
|
||||
output_fields=['count(*)'])[0]
|
||||
query_res4 = collection_w.query(
|
||||
expr=f'{int64_field.name} in [2,4,6] || {string_field.name} == "{string_prefix}5"',
|
||||
output_fields=['count(*)'])[0]
|
||||
# assert the results persist
|
||||
assert query_res1[0].get('count(*)') == query_res2[0].get('count(*)') \
|
||||
== query_res3[0].get('count(*)') == query_res4[0].get('count(*)') == 40
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
||||
@pytest.mark.parametrize("index_on_par_key_field", [True, False])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user