test: add more general expr cases (#42035)

Signed-off-by: binbin lv <binbin.lv@zilliz.com>
This commit is contained in:
binbin 2025-05-28 22:26:28 +08:00 committed by GitHub
parent 54d365dcb0
commit ceb8434cb4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 1435 additions and 7 deletions

View File

@ -2490,7 +2490,8 @@ def gen_json_field_expressions_all_single_operator():
"json_field['a'] < 2", "json_field['a'] < 2.0", "json_field['a'] > 0", "json_field['a'] > 0.0",
"json_field['a'] <= '1'", "json_field['a'] >= '1'", "json_field['a'] < '2'", "json_field['a'] > '0'",
"json_field['a'] == 1", "json_field['a'] == 1.0", "json_field['a'] == True",
"json_field['a'] == 9707199254740993.0", "json_field['a'] == 9707199254740992", "json_field['a'] == '1'",
"json_field['a'] == 9707199254740993.0", "json_field['a'] == 9707199254740992",
"json_field['a'] == '1'",
"json_field['a'] != '1'", "json_field['a'] like '1%'", "json_field['a'] like '%1'",
"json_field['a'] like '%1%'", "json_field['a'] LIKE '1%'", "json_field['a'] LIKE '%1'",
"json_field['a'] LIKE '%1%'", "EXISTS json_field['a']", "exists json_field['a']",
@ -2498,7 +2499,8 @@ def gen_json_field_expressions_all_single_operator():
"json_field['a'] - 1 <= 0", "json_field['a'] + 1.0 >= 2", "json_field['a'] - 1.0 <= 0",
"json_field['a'] * 2 == 2", "json_field['a'] * 1.0 == 1.0", "json_field / 1 == 1",
"json_field['a'] / 1.0 == 1", "json_field['a'] % 10 == 1", "json_field['a'] == 1**2",
"json_field['a'][0] == 1 && json_field['a'][1] == 2", "json_field['a'][0] == 1 and json_field['a'][1] == 2",
"json_field['a'][0] == 1 && json_field['a'][1] == 2",
"json_field['a'][0] == 1 and json_field['a'][1] == 2",
"json_field['a'][0]['b'] >=1 && json_field['a'][2] == 3",
"json_field['a'][0]['b'] >=1 and json_field['a'][2] == 3",
"json_field['a'] == 1 || json_field['a'] == '1'", "json_field['a'] == 1 or json_field['a'] == '1'",
@ -2517,6 +2519,248 @@ def gen_json_field_expressions_all_single_operator():
return expressions
def gen_field_expressions_all_single_operator_each_field(field = ct.default_int64_field_name):
"""
Gen a list of filter in expression-format(as a string)
"""
if field in [ct.default_int8_field_name, ct.default_int16_field_name, ct.default_int32_field_name,
ct.default_int64_field_name]:
expressions = [f"{field} <= 1", f"{field} >= 1",
f"{field} < 2", f"{field} > 0",
f"{field} == 1", f"{field} != 1",
f"{field} == 9707199254740992", f"{field} != 9707199254740992",
f"{field} + 1 >= 2", f"{field} - 1 <= 0",
f"{field} * 2 == 2", f"{field} / 1 == 1",
f"{field} % 10 == 1", f"{field} == 1 || {field} == 2",
f"{field} == 1 or {field} == 2",
f"{field} in [1]", f"{field} not in [1]",
f"{field} is null", f"{field} IS NULL",
f"{field} is not null", f"{field} IS NOT NULL"
]
elif field in [ct.default_bool_field_name]:
expressions = [f"{field} == True", f"{field} == False",
f"{field} != True", f"{field} != False",
f"{field} <= True", f"{field} >= True",
f"{field} <= False", f"{field} >= False",
f"{field} < True", f"{field} > True",
f"{field} < False", f"{field} > False",
f"{field} == True && {field} == False",
f"{field} == True and {field} == False ",
f"{field} == True || {field} == False",
f"{field} == True or {field} == False",
f"{field} in [True]", f"{field} in [False]", f"{field} in [True, False]",
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"]
elif field in [ct.default_float_field_name, ct.default_double_field_name]:
expressions = [f"{field} <= 1", f"{field} >= 1",
f"{field} < 2", f"{field} > 0",
f"{field} == 1", f"{field} != 1",
f"{field} == 9707199254740992", f"{field} != 9707199254740992",
f"{field} <= 1.0", f"{field} >= 1.0",
f"{field} < 2.0", f"{field} > 0.0",
f"{field} == 1.0", f"{field} != 1.0",
f"{field} == 9707199254740992.0", f"{field} != 9707199254740992.0",
f"{field} - 1 <= 0", f"{field} + 1.0 >= 2",
f"{field} - 1.0 <= 0", f"{field} * 2 == 2",
f"{field} * 1.0 == 1.0", f"{field} / 1 == 1",
f"{field} / 1.0 == 1.0", f"{field} == 1**2",
f"{field} == 1 && {field} == 2",
f"{field} == 1 and {field} == 2.0",
f"{field} >=1 && {field} == 3.0",
f"{field} >=1 and {field} == 3",
f"{field} == 1 || {field} == 2.0",
f"{field} == 1 or {field} == 2.0",
f"{field} >= 1 || {field} <=2.0",
f"{field} >= 1.0 or {field} <= 2.0",
f"{field} in [1]", f"{field} in [1, 2]",
f"{field} in [1.0]", f"{field} in [1.0, 2.0]",
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
]
elif field in [ct.default_string_field_name]:
expressions = [f"{field} <= '1'", f"{field} >= '1'", f"{field} < '2'", f"{field} > '0'",
f"{field} == '1'", f"{field} != '1'", f"{field} like '1%'", f"{field} like '%1'",
f"{field} like '%1%'", f"{field} LIKE '1%'", f"{field} LIKE '%1'",
f"{field} LIKE '%1%'",
f"{field} == '1' && {field} == '2'",
f"{field} == '1' and {field} == '2'",
f"{field} == '1' || {field} == '2'",
f"{field} == '1' or {field} == '2'",
f"{field} >= '1' || {field} <= '2'",
f"{field} >= '1' or {field} <= '2'",
f"{field} in ['1']", f"{field} in ['1', '2']",
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
]
elif field in [ct.default_int8_array_field_name, ct.default_int16_array_field_name,
ct.default_int32_array_field_name, ct.default_int64_array_field_name]:
expressions = [f"{field}[0] <= 1", f"{field}[0] >= 1",
f"{field}[0] < 2", f"{field}[0] > 0",
f"{field}[1] == 1", f"{field}[1] != 1",
f"{field}[0] == 9707199254740992", f"{field}[0] != 9707199254740992",
f"{field}[0] + 1 >= 2", f"{field}[0] - 1 <= 0",
f"{field}[0] + 1.0 >= 2", f"{field}[0] - 1.0 <= 0",
f"{field}[0] * 2 == 2", f"{field}[1] * 1.0 == 1.0",
f"{field}[1] / 1 == 1", f"{field}[0] / 1.0 == 1", f"{field}[1] % 10 == 1",
f"{field}[0] == 1 && {field}[1] == 2", f"{field}[0] == 1 and {field}[1] == 2",
f"{field}[0] >=1 && {field}[2] <= 3", f"{field}[0] >=1 and {field}[1] == 2",
f"{field}[0] >=1 || {field}[1] <=2", f"{field}[0] >=1 or {field}[1] <=2",
f"{field}[0] in [1]", f"json_contains({field}, 1)", f"JSON_CONTAINS({field}, 1)",
f"json_contains_all({field}, [1, 2])", f"JSON_CONTAINS_ALL({field}, [1, 2])",
f"json_contains_any({field}, [1, 2])", f"JSON_CONTAINS_ANY({field}, [1, 2])",
f"array_contains({field}, 2)", f"ARRAY_CONTAINS({field}, 2)",
f"array_contains_all({field}, [1, 2])", f"ARRAY_CONTAINS_ALL({field}, [1, 2])",
f"array_contains_any({field}, [1, 2])", f"ARRAY_CONTAINS_ANY({field}, [1, 2])",
f"array_length({field}) < 10", f"ARRAY_LENGTH({field}) < 10",
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
]
elif field in [ct.default_float_array_field_name, ct.default_double_array_field_name]:
expressions = [f"{field}[0] <= 1", f"{field}[0] >= 1",
f"{field}[0] < 2", f"{field}[0] > 0",
f"{field}[1] == 1", f"{field}[1] != 1",
f"{field}[0] == 9707199254740992", f"{field}[0] != 9707199254740992",
f"{field}[0] <= 1.0", f"{field}[0] >= 1.0",
f"{field}[0] < 2.0", f"{field}[0] > 0.0",
f"{field}[1] == 1.0", f"{field}[1] != 1.0",
f"{field}[0] == 9707199254740992.0",
f"{field}[0] - 1 <= 0", f"{field}[0] + 1.0 >= 2",
f"{field}[0] - 1.0 <= 0", f"{field}[0] * 2 == 2",
f"{field}[0] * 1.0 == 1.0", f"{field}[0] / 1 == 1",
f"{field}[0] / 1.0 == 1.0", f"{field}[0] == 1**2",
f"{field}[0] == 1 && {field}[1] == 2",
f"{field}[0] == 1 and {field}[1] == 2.0",
f"{field}[0] >=1 && {field}[2] == 3.0",
f"{field}[0] >=1 and {field}[2] == 3",
f"{field}[0] == 1 || {field}[1] == 2.0",
f"{field}[0] == 1 or {field}[1] == 2.0",
f"{field}[0] >= 1 || {field}[1] <=2.0",
f"{field}[0] >= 1.0 or {field}[1] <= 2.0",
f"{field}[0] in [1]", f"{field}[0] in [1.0]", f"json_contains({field}, 1.0)",
f"JSON_CONTAINS({field}, 1.0)", f"json_contains({field}, 1.0)", f"JSON_CONTAINS({field}, 1.0)",
f"json_contains_all({field}, [2.0, 4.0])", f"JSON_CONTAINS_ALL({field}, [2.0, 4.0])",
f"json_contains_any({field}, [2.0, 4.0])", f"JSON_CONTAINS_ANY({field}, [2.0, 4.0])",
f"array_contains({field}, 2.0)", f"ARRAY_CONTAINS({field}, 2.0)",
f"array_contains({field}, 2.0)", f"ARRAY_CONTAINS({field}, 2.0)",
f"array_contains_all({field}, [1.0, 2.0])", f"ARRAY_CONTAINS_ALL({field}, [1.0, 2.0])",
f"array_contains_any({field}, [1.0, 2.0])", f"ARRAY_CONTAINS_ANY({field}, [1.0, 2.0])",
f"array_length({field}) < 10", f"ARRAY_LENGTH({field}) < 10",
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
]
elif field in [ct.default_bool_array_field_name]:
expressions = [f"{field}[0] == True", f"{field}[0] == False",
f"{field}[0] != True", f"{field}[0] != False",
f"{field}[0] <= True", f"{field}[0] >= True",
f"{field}[1] <= False", f"{field}[1] >= False",
f"{field}[0] < True", f"{field}[1] > True",
f"{field}[0] < False", f"{field}[0] > False",
f"{field}[0] == True && {field}[1] == False",
f"{field}[0] == True and {field}[1] == False ",
f"{field}[0] == True || {field}[1] == False",
f"{field}[0] == True or {field}[1] == False",
f"{field}[0] in [True]", f"{field}[1] in [False]", f"{field}[0] in [True, False]",
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
]
elif field in [ct.default_string_array_field_name]:
expressions = [f"{field}[0] <= '1'", f"{field}[0] >= '1'",
f"{field}[0] < '2'", f"{field}[0] > '0'",
f"{field}[1] == '1'", f"{field}[1] != '1'",
f"{field}[1] like '1%'", f"{field}[1] like '%1'",
f"{field}[1] like '%1%'", f"{field}[1] LIKE '1%'",
f"{field}[1] LIKE '%1'", f"{field}[1] LIKE '%1%'",
f"{field}[1] == '1' && {field}[2] == '2'",
f"{field}[1] == '1' and {field}[2] == '2'",
f"{field}[0] == '1' || {field}[2] == '2'",
f"{field}[0] == '1' or {field}[2] == '2'",
f"{field}[1] >= '1' || {field}[2] <= '2'",
f"{field}[1] >= '1' or {field}[2] <= '2'",
f"{field}[0] in ['0']", f"{field}[1] in ['1', '2']",
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
]
else:
raise Exception("Invalid field name")
return expressions
def concatenate_uneven_arrays(arr1, arr2):
"""
concatenate the element in two arrays with different length
"""
max_len = max(len(arr1), len(arr2))
result = []
op_list = ["and", "or", "&&", "||"]
for i in range(max_len):
a = arr1[i] if i < len(arr1) else ""
b = arr2[i] if i < len(arr2) else ""
if a == "" or b == "":
result.append(a + b)
else:
random_op = op_list[random.randint(0, len(op_list)-1)]
result.append( a + " " + random_op + " " + b)
return result
def gen_multiple_field_expressions(field_name_list=[], random_field_number=0, expr_number=1):
"""
Gen an expression including multiple fields
parameters:
field_name_list: the field names to be filtered. And the names should be in the following field name list if this
parameter is specified: (both repeated or non-repeated field name are supported)
all_fields = [ct.default_int8_field_name, ct.default_int16_field_name,
ct.default_int32_field_name, ct.default_int64_field_name,
ct.default_float_field_name, ct.default_double_field_name,
ct.default_string_field_name, ct.default_bool_field_name,
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
ct.default_bool_array_field_name, ct.default_float_array_field_name,
ct.default_double_array_field_name, ct.default_string_array_field_name]
random_field_number: the random field numbers to be filtered. The filtered fields will be randomly selected in
the above field name list (all_fields) if this parameter is specified.
And if random_field_number <= len(all_fields), the fields will be randomly selected without
repeat. If random_field_number > len(all_fields), there will be repeated fields
for (random_field_number - len(all_fields)) part.
expr_number: the number of expressions for each field
return:
expressions_fields: all the expressions for multiple fields
field_name_list: the field name list used for the filtered expressions
"""
if not isinstance(field_name_list, list):
raise Exception("parameter field_name_list should be a list of all the fields to be filtered")
if random_field_number < 0:
raise Exception(f"random_field_number should be greater than or equal with 0]")
if not isinstance(expr_number, int):
raise Exception("parameter parameter should be an interger")
log.info(field_name_list)
log.info(random_field_number)
if len(field_name_list) != 0 and random_field_number != 0:
raise Exception("Not support both field_name_list and random_field_number are specified")
field_name_list_cp = field_name_list.copy()
all_fields = [ct.default_int8_field_name, ct.default_int16_field_name,
ct.default_int32_field_name, ct.default_int64_field_name,
ct.default_float_field_name, ct.default_double_field_name,
ct.default_string_field_name, ct.default_bool_field_name,
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
ct.default_bool_array_field_name, ct.default_float_array_field_name,
ct.default_double_array_field_name, ct.default_string_array_field_name]
if len(field_name_list) == 0 and random_field_number != 0:
if random_field_number <= len(all_fields):
random_array = random.sample(range(len(all_fields)), random_field_number)
else:
random_array = random.sample(range(len(all_fields)), len(all_fields))
for _ in range(random_field_number - len(all_fields)):
random_array.append(random.randint(0, len(all_fields)-1))
for i in random_array:
field_name_list_cp.append(all_fields[i])
if len(field_name_list) == 0 and random_field_number == 0:
field_name_list_cp = all_fields
expressions_fields = gen_field_expressions_all_single_operator_each_field(field_name_list_cp[0])
if len(field_name_list_cp) > 1:
for field in field_name_list[1:]:
expressions = gen_field_expressions_all_single_operator_each_field(field)
expressions_fields = concatenate_uneven_arrays(expressions_fields, expressions)
return expressions_fields, field_name_list_cp
def gen_array_field_expressions_and_templates():
"""

View File

@ -39,8 +39,13 @@ default_double_field_name = "double"
default_string_field_name = "varchar"
default_json_field_name = "json_field"
default_array_field_name = "int_array"
default_int8_array_field_name = "int8_array"
default_int16_array_field_name = "int16_array"
default_int32_array_field_name = "int32_array"
default_int64_array_field_name = "int64_array"
default_bool_array_field_name = "bool_array"
default_float_array_field_name = "float_array"
default_double_array_field_name = "double_array"
default_string_array_field_name = "string_array"
default_float_vec_field_name = "float_vector"
default_float16_vec_field_name = "float16_vector"
@ -114,6 +119,8 @@ max_database_num = 64
max_collections_per_db = 65536
max_collection_num = 65536
max_hybrid_search_req_num = 1024
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
IMAGE_REPOSITORY_MILVUS = "harbor.milvus.io/dockerhub/milvusdb/milvus"
@ -239,7 +246,7 @@ get_wrong_format_dict = [
get_all_kind_data_distribution = [
1, np.float64(1.0), np.double(1.0), 9707199254740993.0, 9707199254740992,
'1', '123', '321', '213', True, False, [1, 2], [1.0, 2], None, {}, {"a": 1},
'1', '123', '321', '213', True, False, None, [1, 2], [1.0, 2], {}, {"a": 1},
{'a': 1.0}, {'a': 9707199254740993.0}, {'a': 9707199254740992}, {'a': '1'}, {'a': '123'},
{'a': '321'}, {'a': '213'}, {'a': True}, {'a': [1, 2, 3]}, {'a': [1.0, 2, '1']}, {'a': [1.0, 2]},
{'a': None}, {'a': {'b': 1}}, {'a': {'b': 1.0}}, {'a': [{'b': 1}, 2.0, np.double(3.0), '4', True, [1, 3.0], None]}
@ -321,7 +328,14 @@ privilege_group_privilege_dict = {"Query": False, "Search": False, "GetLoadState
"AlterDatabase": False, "FlushAll": False, "ListPrivilegeGroups": False,
"CreatePrivilegeGroup": False, "DropPrivilegeGroup": False,
"OperatePrivilegeGroup": False}
all_expr_fields = [default_int8_field_name, default_int16_field_name,
default_int32_field_name, default_int64_field_name,
default_float_field_name, default_double_field_name,
default_string_field_name, default_bool_field_name,
default_int8_array_field_name, default_int16_array_field_name,
default_int32_array_field_name, default_int64_array_field_name,
default_bool_array_field_name, default_float_array_field_name,
default_double_array_field_name, default_string_array_field_name]
class CheckTasks:
""" The name of the method used to check the result """

View File

@ -0,0 +1,906 @@
import pytest
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
import numpy as np
prefix = "milvus_client_api_query"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
""" Test case of data integrity interface """
@pytest.fixture(scope="function", params=["INVERTED", "BITMAP"])
def supported_bool_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED"])
def supported_numeric_float_double_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED", "BITMAP"])
def supported_numeric_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["TRIE", "INVERTED", "BITMAP"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_json_path_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED", "BITMAP"])
def supported_array_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_array_double_float_scalar_index(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [False])
@pytest.mark.parametrize("is_flush", [True])
@pytest.mark.parametrize("is_release", [True])
@pytest.mark.parametrize("single_data_num", [50])
@pytest.mark.parametrize("expr_field", [ct.default_int64_field_name])
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self,
enable_dynamic_field,
supported_bool_scalar_index,
supported_numeric_float_double_index,
supported_numeric_scalar_index,
supported_varchar_scalar_index,
supported_json_path_index,
supported_array_scalar_index,
supported_array_double_float_scalar_index,
is_flush,
is_release,
single_data_num,
expr_field):
"""
target: test query using expression fields with all supported field type after all supported scalar index
with all supported basic expressions
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
step: 1. create collection
2. insert with different data distribution
3. flush if specified
4. query when there is no index applying on each field under all supported expressions
5. release if specified
6. prepare index params with all supported scalar index on all scalar fields
7. create index
8. create same index twice
9. reload collection if released before to make sure the new index load successfully
10. sleep for 60s to make sure the new index load successfully without release and reload operations
11. query after there is index applying on each supported field under all supported expressions
which should get the same result with that without index
expected: query successfully after there is index applying on each supported field under all expressions which
should get the same result with that without index
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
default_dim = 5
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
if not enable_dynamic_field:
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
max_capacity=5, nullable=True)
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
max_capacity=5, nullable=True)
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
max_capacity=5, nullable=True)
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
max_capacity=5, nullable=True)
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
max_capacity=5, max_length=100, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb + 60, default_dim)
inserted_data_distribution = ct.get_all_kind_data_distribution
nb_single = single_data_num
for i in range(len(inserted_data_distribution)):
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
ct.default_int64_field_name: j if (i % 2 == 0) else None,
ct.default_float_field_name: j * 1.0 if (i % 2 == 0) else None,
ct.default_double_field_name: j * 1.0 if (i % 2 == 0) else None,
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
ct.default_json_field_name: inserted_data_distribution[i],
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
} for j in range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
log.info(rows)
self.insert(client, collection_name=collection_name, data=rows)
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
# 3. flush if specified
if is_flush:
self.flush(client, collection_name)
# 4. query when there is no index under all expressions
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
compare_dict = {}
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
res = \
self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
count = res[0]['count(*)']
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
assert count == len(id_list)
assert count == len(json_list)
compare_dict.setdefault(f'{i}', {})
compare_dict[f'{i}']["id_list"] = id_list
compare_dict[f'{i}']["json_list"] = json_list
# 5. release if specified
if is_release:
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, default_vector_field_name)
# 6. prepare index params with json path index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_float_array_field_name,
index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_double_array_field_name,
index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
json_index_name = "json_index_name"
json_path_list = [f"{ct.default_json_field_name}",
f"{ct.default_json_field_name}[0]",
f"{ct.default_json_field_name}[1]",
f"{ct.default_json_field_name}[6]",
f"{ct.default_json_field_name}[10000]",
f"{ct.default_json_field_name}['a']",
f"{ct.default_json_field_name}['a']['b']",
f"{ct.default_json_field_name}['a'][0]",
f"{ct.default_json_field_name}['a'][6]",
f"{ct.default_json_field_name}['a'][0]['b']",
f"{ct.default_json_field_name}['a']['b']['c']",
f"{ct.default_json_field_name}['a']['b'][0]['d']",
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
for i in range(len(json_path_list)):
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
index_type=supported_json_path_index,
params={"json_cast_type": "DOUBLE",
"json_path": json_path_list[i]})
# 7. create index
self.create_index(client, collection_name, index_params)
# 8. create same twice
self.create_index(client, collection_name, index_params)
# 9. reload collection if released before to make sure the new index load successfully
if is_release:
self.load_collection(client, collection_name)
else:
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
time.sleep(60)
# 11. query after there is index under all expressions which should get the same result
# with that without index
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' after index is:")
count = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
log.debug(
f"the field {expr_field} value after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
log.debug(json_list)
log.debug(
f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["json_list"])
assert json_list == compare_dict[f'{i}']["json_list"]
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
log.debug(
f"primary key field {default_primary_key_field_name} after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
log.debug(id_list)
log.debug(
f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["id_list"])
assert id_list == compare_dict[f'{i}']["id_list"]
log.info(f"PASS with expression {express_list[i]}")
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_dynamic_field", [False])
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
@pytest.mark.parametrize("single_data_num", [50])
@pytest.mark.parametrize("expr_field", [ct.default_int8_field_name, ct.default_int16_field_name,
ct.default_int32_field_name, ct.default_int64_field_name,
ct.default_float_field_name, ct.default_double_field_name,
ct.default_string_field_name, ct.default_bool_field_name,
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
ct.default_bool_array_field_name, ct.default_float_array_field_name,
ct.default_double_array_field_name, ct.default_string_array_field_name])
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, enable_dynamic_field, supported_bool_scalar_index,
supported_numeric_float_double_index,
supported_numeric_scalar_index, supported_varchar_scalar_index,
supported_json_path_index, supported_array_scalar_index,
supported_array_double_float_scalar_index,
is_flush, is_release, single_data_num, expr_field):
"""
target: test query using expression fields with all supported field type after all supported scalar index
with all supported basic expressions
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
step: 1. create collection
2. insert with different data distribution
3. flush if specified
4. query when there is no index applying on each field under all supported expressions
5. release if specified
6. prepare index params with all supported scalar index on all scalar fields
7. create index
8. create same index twice
9. reload collection if released before to make sure the new index load successfully
10. sleep for 60s to make sure the new index load successfully without release and reload operations
11. query after there is index applying on each supported field under all supported expressions
which should get the same result with that without index
expected: query successfully after there is index applying on each supported field under all expressions which
should get the same result with that without index
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
default_dim = 5
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
if not enable_dynamic_field:
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
max_capacity=5, nullable=True)
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
max_capacity=5, nullable=True)
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
max_capacity=5, nullable=True)
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
max_capacity=5, nullable=True)
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
max_capacity=5, max_length=100, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+60, default_dim)
inserted_data_distribution = ct.get_all_kind_data_distribution
nb_single = single_data_num
for i in range(len(inserted_data_distribution)):
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
ct.default_int64_field_name: j if (i % 2 == 0) else None,
ct.default_float_field_name: j*1.0 if (i % 2 == 0) else None,
ct.default_double_field_name: j*1.0 if (i % 2 == 0) else None,
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
ct.default_json_field_name: inserted_data_distribution[i],
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
} for j in range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
log.info(rows)
self.insert(client, collection_name=collection_name, data=rows)
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
# 3. flush if specified
if is_flush:
self.flush(client, collection_name)
# 4. query when there is no index under all expressions
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
compare_dict = {}
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
count = res[0]['count(*)']
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
assert count == len(id_list)
assert count == len(json_list)
compare_dict.setdefault(f'{i}', {})
compare_dict[f'{i}']["id_list"] = id_list
compare_dict[f'{i}']["json_list"] = json_list
# 5. release if specified
if is_release:
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, default_vector_field_name)
# 6. prepare index params with json path index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_float_array_field_name, index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_double_array_field_name, index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
json_index_name = "json_index_name"
json_path_list = [f"{ct.default_json_field_name}",
f"{ct.default_json_field_name}[0]",
f"{ct.default_json_field_name}[1]",
f"{ct.default_json_field_name}[6]",
f"{ct.default_json_field_name}[10000]",
f"{ct.default_json_field_name}['a']",
f"{ct.default_json_field_name}['a']['b']",
f"{ct.default_json_field_name}['a'][0]",
f"{ct.default_json_field_name}['a'][6]",
f"{ct.default_json_field_name}['a'][0]['b']",
f"{ct.default_json_field_name}['a']['b']['c']",
f"{ct.default_json_field_name}['a']['b'][0]['d']",
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
for i in range(len(json_path_list)):
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
index_type=supported_json_path_index,
params={"json_cast_type": "DOUBLE",
"json_path": json_path_list[i]})
# 7. create index
self.create_index(client, collection_name, index_params)
# 8. create same twice
self.create_index(client, collection_name, index_params)
# 9. reload collection if released before to make sure the new index load successfully
if is_release:
self.load_collection(client, collection_name)
else:
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
time.sleep(60)
# 11. query after there is index under all expressions which should get the same result
# with that without index
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' after index is:")
count = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
log.debug(f"the field {expr_field} value after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
log.debug(json_list)
log.debug(f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["json_list"])
assert json_list == compare_dict[f'{i}']["json_list"]
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
log.debug(f"primary key field {default_primary_key_field_name} after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
log.debug(id_list)
log.debug(f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["id_list"])
assert id_list == compare_dict[f'{i}']["id_list"]
log.info(f"PASS with expression {express_list[i]}")
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("enable_dynamic_field", [False])
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
@pytest.mark.parametrize("single_data_num", [50])
@pytest.mark.parametrize("expr_field", [ct.default_int8_field_name, ct.default_int16_field_name,
ct.default_int32_field_name, ct.default_int64_field_name,
ct.default_float_field_name, ct.default_double_field_name,
ct.default_string_field_name, ct.default_bool_field_name,
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
ct.default_bool_array_field_name, ct.default_float_array_field_name,
ct.default_double_array_field_name, ct.default_string_array_field_name])
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_auto_index(self, enable_dynamic_field,
supported_json_path_index,
is_flush, is_release,
single_data_num, expr_field):
"""
target: test query using expression fields with all supported field type after all supported scalar index
with all supported basic expressions
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
step: 1. create collection
2. insert with different data distribution
3. flush if specified
4. query when there is no index applying on each field under all supported expressions
5. release if specified
6. prepare index params with all supported scalar index on all scalar fields
7. create index
8. create same index twice
9. reload collection if released before to make sure the new index load successfully
10. sleep for 60s to make sure the new index load successfully without release and reload operations
11. query after there is index applying on each supported field under all supported expressions
which should get the same result with that without index
expected: query successfully after there is index applying on each supported field under all expressions which
should get the same result with that without index
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
default_dim = 5
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
if not enable_dynamic_field:
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
max_capacity=5, nullable=True)
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
max_capacity=5, nullable=True)
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
max_capacity=5, nullable=True)
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
max_capacity=5, nullable=True)
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
max_capacity=5, max_length=100, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+60, default_dim)
inserted_data_distribution = ct.get_all_kind_data_distribution
nb_single = single_data_num
for i in range(len(inserted_data_distribution)):
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
ct.default_int64_field_name: j if (i % 2 == 0) else None,
ct.default_float_field_name: j*1.0 if (i % 2 == 0) else None,
ct.default_double_field_name: j*1.0 if (i % 2 == 0) else None,
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
ct.default_json_field_name: inserted_data_distribution[i],
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
} for j in range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
log.info(rows)
self.insert(client, collection_name=collection_name, data=rows)
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
# 3. flush if specified
if is_flush:
self.flush(client, collection_name)
# 4. query when there is no index under all expressions
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
compare_dict = {}
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
count = res[0]['count(*)']
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
assert count == len(id_list)
assert count == len(json_list)
compare_dict.setdefault(f'{i}', {})
compare_dict[f'{i}']["id_list"] = id_list
compare_dict[f'{i}']["json_list"] = json_list
# 5. release if specified
if is_release:
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, default_vector_field_name)
# 6. prepare index params with json path index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=ct.default_bool_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int8_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int16_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int32_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int64_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_float_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_double_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_string_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_float_array_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_double_array_field_name, index_type="AUTOINDEX")
index_params.add_index(field_name=ct.default_string_array_field_name, index_type="AUTOINDEX")
json_index_name = "json_index_name"
json_path_list = [f"{ct.default_json_field_name}",
f"{ct.default_json_field_name}[0]",
f"{ct.default_json_field_name}[1]",
f"{ct.default_json_field_name}[6]",
f"{ct.default_json_field_name}[10000]",
f"{ct.default_json_field_name}['a']",
f"{ct.default_json_field_name}['a']['b']",
f"{ct.default_json_field_name}['a'][0]",
f"{ct.default_json_field_name}['a'][6]",
f"{ct.default_json_field_name}['a'][0]['b']",
f"{ct.default_json_field_name}['a']['b']['c']",
f"{ct.default_json_field_name}['a']['b'][0]['d']",
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
for i in range(len(json_path_list)):
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
index_type=supported_json_path_index,
params={"json_cast_type": "DOUBLE",
"json_path": json_path_list[i]})
# 7. create index
self.create_index(client, collection_name, index_params)
# 8. create same twice
self.create_index(client, collection_name, index_params)
# 9. reload collection if released before to make sure the new index load successfully
if is_release:
self.load_collection(client, collection_name)
else:
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
time.sleep(60)
# 11. query after there is index under all expressions which should get the same result
# with that without index
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter '{express_list[i]}' after index is:")
count = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{expr_field}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{expr_field}"])
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
log.debug(f"the field {expr_field} value after index 'AUTOINDEX' under expression '{express_list[i]}' is:")
log.debug(json_list)
log.debug(f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["json_list"])
assert json_list == compare_dict[f'{i}']["json_list"]
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
log.debug(f"primary key field {default_primary_key_field_name} after index 'AUTOINDEX' under expression '{express_list[i]}' is:")
log.debug(id_list)
log.debug(f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
log.debug(compare_dict[f'{i}']["id_list"])
assert id_list == compare_dict[f'{i}']["id_list"]
log.info(f"PASS with expression {express_list[i]}")
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("enable_dynamic_field", [False])
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
@pytest.mark.parametrize("single_data_num", [50])
@pytest.mark.parametrize("random_filter_field_number", [2, 6, 16])
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_multiple_fields(self,
enable_dynamic_field,
supported_bool_scalar_index,
supported_numeric_float_double_index,
supported_numeric_scalar_index,
supported_varchar_scalar_index,
supported_json_path_index,
supported_array_scalar_index,
supported_array_double_float_scalar_index,
is_flush,
is_release,
single_data_num,
random_filter_field_number):
"""
target: test query using expression fields with all supported field type after all supported scalar index
with all supported basic expressions
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
step: 1. create collection
2. insert with different data distribution
3. flush if specified
4. query when there is no index applying on each field under all supported expressions
5. release if specified
6. prepare index params with all supported scalar index on all scalar fields
7. create index
8. create same index twice
9. reload collection if released before to make sure the new index load successfully
10. sleep for 60s to make sure the new index load successfully without release and reload operations
11. query after there is index applying on each supported field under all supported expressions
which should get the same result with that without index
expected: query successfully after there is index applying on each supported field under all expressions which
should get the same result with that without index
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
default_dim = 5
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
if not enable_dynamic_field:
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
max_capacity=5, nullable=True)
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
max_capacity=5, nullable=True)
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
max_capacity=5, nullable=True)
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
max_capacity=5, nullable=True)
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
max_capacity=5, max_length=100, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb + 60, default_dim)
inserted_data_distribution = ct.get_all_kind_data_distribution
nb_single = single_data_num
for i in range(len(inserted_data_distribution)):
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
ct.default_int64_field_name: j if (i % 2 == 0) else None,
ct.default_float_field_name: j * 1.0 if (i % 2 == 0) else None,
ct.default_double_field_name: j * 1.0 if (i % 2 == 0) else None,
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
ct.default_json_field_name: inserted_data_distribution[i],
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
} for j in range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
self.insert(client, collection_name=collection_name, data=rows)
log.debug(f"inserted {nb_single} {inserted_data_distribution[i]}")
# 3. flush if specified
if is_flush:
self.flush(client, collection_name)
# 4. query when there is no index under all expressions
express_list, field_lists = cf.gen_multiple_field_expressions(random_field_number=random_filter_field_number)
compare_dict = {}
for i in range(len(express_list)):
id_list = []
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
res = \
self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
count = res[0]['count(*)']
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=field_lists)[0]
# compare_dict.setdefault(f'{i}', {})
one_dict = {}
# init the compared dict
for field_name in field_lists:
one_dict.setdefault(f'{field_name}', [])
compare_dict.setdefault(f'{i}', one_dict)
# extract and store the id and output_fields value used for compare after index
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
for field_name in field_lists:
compare_dict[f'{i}'][f'{field_name}'].append(single[f"{field_name}"])
assert count == len(id_list)
for field_name in field_lists:
assert count == len(compare_dict[f'{i}'][f'{field_name}'])
compare_dict[f'{i}']['id_list'] = id_list
# 5. release if specified
if is_release:
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, default_vector_field_name)
# 6. prepare index params with json path index
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index)
index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
index_params.add_index(field_name=ct.default_float_array_field_name,
index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_double_array_field_name,
index_type=supported_array_double_float_scalar_index)
index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
json_index_name = "json_index_name"
json_path_list = [f"{ct.default_json_field_name}",
f"{ct.default_json_field_name}[0]",
f"{ct.default_json_field_name}[1]",
f"{ct.default_json_field_name}[6]",
f"{ct.default_json_field_name}[10000]",
f"{ct.default_json_field_name}['a']",
f"{ct.default_json_field_name}['a']['b']",
f"{ct.default_json_field_name}['a'][0]",
f"{ct.default_json_field_name}['a'][6]",
f"{ct.default_json_field_name}['a'][0]['b']",
f"{ct.default_json_field_name}['a']['b']['c']",
f"{ct.default_json_field_name}['a']['b'][0]['d']",
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
for i in range(len(json_path_list)):
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
index_type=supported_json_path_index,
params={"json_cast_type": "DOUBLE",
"json_path": json_path_list[i]})
# 7. create index
self.create_index(client, collection_name, index_params)
# # 8. create same twice
# self.create_index(client, collection_name, index_params)
# 9. reload collection if released before to make sure the new index load successfully
if is_release:
self.load_collection(client, collection_name)
else:
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
time.sleep(60)
# 11. query after there is index under all expressions which should get the same result
# with that without index
for i in range(len(express_list)):
id_list = []
log.info(f"query with filter '{express_list[i]}' after index is:")
count = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=field_lists)[0]
# compare each filtered field before and after index
for field_name in field_lists:
json_list = []
for single in res:
json_list.append(single[f"{field_name}"])
if len(json_list) != len(compare_dict[f'{i}'][f'{field_name}']):
log.debug(f"the field {field_name} value after index under expression '{express_list[i]}' is: {json_list}")
log.debug(f"the field {field_name} value before index to be compared under expression '{express_list[i]}' is: {compare_dict[f'{i}'][f'{field_name}']}")
assert json_list == compare_dict[f'{i}'][f'{field_name}']
# compare id before and after index
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
if len(id_list) != len(compare_dict[f'{i}']['id_list']):
log.debug(f"primary key field {default_primary_key_field_name} after index under expression '{express_list[i]}' is: {id_list}")
log.debug(f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is: {compare_dict[f'{i}']['id_list']}")
assert id_list == compare_dict[f'{i}']['id_list']
log.info(f"PASS with expression {express_list[i]}")
self.drop_collection(client, collection_name)

View File

@ -0,0 +1,246 @@
import pytest
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
import numpy as np
prefix = "milvus_client_api_query"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
@pytest.mark.xdist_group("TestStaticFieldNoIndexAllExpr")
class TestStaticFieldNoIndexAllExpr(TestMilvusClientV2Base):
"""
Scalar fields are not indexed, and verify DQL requests
"""
def setup_class(self):
super().setup_class(self)
# init params
self.collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
self.enable_dynamic_field = False
self.ground_truth = {}
@pytest.fixture(scope="class", autouse=True)
def prepare_data(self, request):
"""
Initialize collection before test class runs
"""
# Get client connection
client = self._client()
# Create collection
# create schema
schema = self.create_schema(client, enable_dynamic_field=self.enable_dynamic_field)[0]
schema.add_field(ct.default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(ct.default_vector_field_name, DataType.FLOAT_VECTOR, dim=ct.default_dim)
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
max_capacity=5, nullable=True)
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
max_capacity=5, nullable=True)
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
max_capacity=5, nullable=True)
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
max_capacity=5, nullable=True)
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
max_capacity=5, nullable=True)
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
max_capacity=5, max_length=100, nullable=True)
# prepare index params
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
# create collection with the above schema and index params
self.create_collection(client, self.collection_name, schema=schema,
index_params=index_params, force_teardown=False)
# Generate vectors and all scalar data
vectors = cf.gen_vectors(default_nb + 60, default_dim)
inserted_data_distribution = ct.get_all_kind_data_distribution
nb_single = 50
rows_list = []
for i in range(len(inserted_data_distribution)):
rows = [{ct.default_primary_key_field_name: j, ct.default_vector_field_name: vectors[j],
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
ct.default_int64_field_name: j if (i % 2 == 0) else None,
ct.default_float_field_name: j * 1.0 if (i % 2 == 0) else None,
ct.default_double_field_name: j * 1.0 if (i % 2 == 0) else None,
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
ct.default_json_field_name: inserted_data_distribution[i],
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
} for j in range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
# insert
self.insert(client, collection_name=self.collection_name, data=rows)
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
rows_list.extend(rows)
assert len(rows_list) == nb_single * len(inserted_data_distribution)
# calculated the ground truth for all fields with its supported expressions
expr_fields = ct.all_expr_fields
compare_dict = {}
one_dict = {}
for field in expr_fields:
globals()[field] = rows_list[0][field]
for field in expr_fields:
express_list = cf.gen_field_expressions_all_single_operator_each_field(field)
for i in range(len(express_list)):
expression = express_list[i].replace("&&", "and").replace("||", "or")
compare_dict.setdefault(field, {})
one_dict.setdefault(f'{field}', [])
compare_dict[field].setdefault(f'{i}', one_dict)
compare_dict[field][f'{i}'].setdefault("id_list", [])
for j in range(nb_single*len(inserted_data_distribution)):
globals()[field] = rows_list[j][field]
log.info("binbin_debug1")
log.info(field)
if (int8 is None) or (int16 is None) or (int32 is None) or (int64 is None)\
or (float is None) or (double is None) or (varchar is None) or (bool_field is None)\
or (int8_array is None) or (int16_array is None) or (int32_array is None) or (int64_array is None)\
or (bool_array is None) or (float_array is None) or (double_array is None) or (string_array is None):
if "is null" or "IS NULL" in expression:
compare_dict[field][f'{i}'][field].append(rows_list[j][field])
compare_dict[field][f'{i}']["id_list"].append(
rows_list[j][ct.default_primary_key_field_name])
continue
else:
if ("is not null" in expression) or ("IS NOT NULL" in expression):
compare_dict[field][f'{i}'][field].append(rows_list[j][field])
compare_dict[field][f'{i}']["id_list"].append(
rows_list[j][ct.default_primary_key_field_name])
continue
if ("is null" in expression) or ("IS NULL" in expression):
continue
log.info("binbin_debug")
log.info(expression)
if not expression or eval(expression):
compare_dict[field][f'{i}'][field].append(rows_list[j][field])
compare_dict[field][f'{i}']["id_list"].append(rows_list[j][ct.default_primary_key_field_name])
log.info("binbin_debug_2")
# log.info(compare_dict)
self.ground_truth = compare_dict
# flush collection, segment sealed
self.flush(client, self.collection_name)
# load collection
self.load_collection(client, self.collection_name)
def teardown():
self.drop_collection(self._client(), self.collection_name)
request.addfinalizer(teardown)
def check_query_res(self, res, expr_field: str) -> list:
""" Ensure that primary key field values are unique """
real_data = {x[0]: x[1] for x in zip(self.insert_data.get(self.primary_field),
self.insert_data.get(expr_field))}
if len(real_data) != len(self.insert_data.get(self.primary_field)):
log.warning("[TestNoIndexDQLExpr] The primary key values are not unique, " +
"only check whether the res value is within the inserted data")
return [(r.get(self.primary_field), r.get(expr_field)) for r in res if
r.get(expr_field) not in self.insert_data.get(expr_field)]
return [(r[self.primary_field], r[expr_field], real_data[r[self.primary_field]]) for r in res if
r[expr_field] != real_data[r[self.primary_field]]]
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("expr_field", ct.all_expr_fields)
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, expr_field):
"""
target: test query using expression fields with all supported field type after all supported scalar index
with all supported basic expressions
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
step: 1. create collection
2. insert with different data distribution
3. flush if specified
4. query when there is no index applying on each field under all supported expressions
5. release if specified
6. prepare index params with all supported scalar index on all scalar fields
7. create index
8. create same index twice
9. reload collection if released before to make sure the new index load successfully
10. sleep for 60s to make sure the new index load successfully without release and reload operations
11. query after there is index applying on each supported field under all supported expressions
which should get the same result with that without index
expected: query successfully after there is index applying on each supported field under all expressions which
should get the same result with that without index
"""
client = self._client()
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
compare_dict = self.ground_truth[expr_field]
for i in range(len(express_list)):
expression = express_list[i]
json_list = []
id_list = []
log.info(f"query with filter '{expression}' without scalar index is:")
count = self.query(client, collection_name=self.collection_name, filter=expression,
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter '{expression}' without scalar index is: {count}")
assert count == len(compare_dict[f'{i}']["id_list"])
res = self.query(client, collection_name=self.collection_name, filter=expression,
output_fields=[expr_field])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[expr_field])
if len(json_list) != len(compare_dict[f'{i}'][expr_field]):
log.debug(f"the field {expr_field} value without scalar index under expression '{expression}' is:")
log.debug(json_list)
log.debug(f"the field {expr_field} value without scalar index to be compared under expression '{expression}' is:")
log.debug(compare_dict[f'{i}'][expr_field])
assert json_list == compare_dict[f'{i}'][expr_field]
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
log.debug(f"primary key field {default_primary_key_field_name} without scalar index under expression '{expression}' is:")
log.debug(id_list)
log.debug(f"primary key field {default_primary_key_field_name} without scalar index to be compared under expression '{expression}' is:")
log.debug(compare_dict[f'{i}']["id_list"])
assert id_list == compare_dict[f'{i}']["id_list"]
log.info(f"PASS with expression {expression}")

View File

@ -2750,8 +2750,13 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("nullable", [True, False])
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
@pytest.mark.parametrize("is_scalar_index", [True, False])
@pytest.mark.parametrize("scalar_index_type", ["AUTOINDEX", "INVERTED", "BITMAP"])
@pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"])
def test_milvus_client_search_null_expr_array(self, nullable, null_expr_op):
def test_milvus_client_search_null_expr_array(self, nullable, null_expr_op, is_flush, is_release,
is_scalar_index, scalar_index_type):
"""
target: test search with null expression on array fields
method: create connection, collection, insert and search
@ -2759,7 +2764,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
"""
client = self._client()
collection_name = cf.gen_collection_name_by_testcase_name()
dim = 5
dim = 128
# 1. create collection
nullable_field_name = "nullable_field"
schema = self.create_schema(client, enable_dynamic_field=False)[0]
@ -2771,6 +2776,8 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
max_length=64, nullable=nullable)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
if is_scalar_index:
index_params.add_index(nullable_field_name, index_type=scalar_index_type)
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
@ -2781,6 +2788,18 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]),
default_string_field_name: str(i), "nullable_field": [1, 2]} for i in range(default_nb)]
self.insert(client, collection_name, rows)
if is_flush:
self.flush(client, collection_name)
if is_release:
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, default_vector_field_name)
self.drop_index(client, collection_name, nullable_field_name)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
if is_scalar_index:
index_params.add_index(nullable_field_name, index_type=scalar_index_type)
self.create_index(client, collection_name, index_params)
self.load_collection(client, collection_name)
# 3. search
vectors_to_search = rng.random((1, dim))
insert_ids = [str(i) for i in range(default_nb)]
@ -2789,7 +2808,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
if "not" in null_expr or "NOT" in null_expr:
insert_ids = []
limit = 0
else:
limit = default_limit
else: