mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
test: add more general expr cases (#42035)
Signed-off-by: binbin lv <binbin.lv@zilliz.com>
This commit is contained in:
parent
54d365dcb0
commit
ceb8434cb4
@ -2490,7 +2490,8 @@ def gen_json_field_expressions_all_single_operator():
|
||||
"json_field['a'] < 2", "json_field['a'] < 2.0", "json_field['a'] > 0", "json_field['a'] > 0.0",
|
||||
"json_field['a'] <= '1'", "json_field['a'] >= '1'", "json_field['a'] < '2'", "json_field['a'] > '0'",
|
||||
"json_field['a'] == 1", "json_field['a'] == 1.0", "json_field['a'] == True",
|
||||
"json_field['a'] == 9707199254740993.0", "json_field['a'] == 9707199254740992", "json_field['a'] == '1'",
|
||||
"json_field['a'] == 9707199254740993.0", "json_field['a'] == 9707199254740992",
|
||||
"json_field['a'] == '1'",
|
||||
"json_field['a'] != '1'", "json_field['a'] like '1%'", "json_field['a'] like '%1'",
|
||||
"json_field['a'] like '%1%'", "json_field['a'] LIKE '1%'", "json_field['a'] LIKE '%1'",
|
||||
"json_field['a'] LIKE '%1%'", "EXISTS json_field['a']", "exists json_field['a']",
|
||||
@ -2498,7 +2499,8 @@ def gen_json_field_expressions_all_single_operator():
|
||||
"json_field['a'] - 1 <= 0", "json_field['a'] + 1.0 >= 2", "json_field['a'] - 1.0 <= 0",
|
||||
"json_field['a'] * 2 == 2", "json_field['a'] * 1.0 == 1.0", "json_field / 1 == 1",
|
||||
"json_field['a'] / 1.0 == 1", "json_field['a'] % 10 == 1", "json_field['a'] == 1**2",
|
||||
"json_field['a'][0] == 1 && json_field['a'][1] == 2", "json_field['a'][0] == 1 and json_field['a'][1] == 2",
|
||||
"json_field['a'][0] == 1 && json_field['a'][1] == 2",
|
||||
"json_field['a'][0] == 1 and json_field['a'][1] == 2",
|
||||
"json_field['a'][0]['b'] >=1 && json_field['a'][2] == 3",
|
||||
"json_field['a'][0]['b'] >=1 and json_field['a'][2] == 3",
|
||||
"json_field['a'] == 1 || json_field['a'] == '1'", "json_field['a'] == 1 or json_field['a'] == '1'",
|
||||
@ -2517,6 +2519,248 @@ def gen_json_field_expressions_all_single_operator():
|
||||
|
||||
return expressions
|
||||
|
||||
def gen_field_expressions_all_single_operator_each_field(field = ct.default_int64_field_name):
|
||||
"""
|
||||
Gen a list of filter in expression-format(as a string)
|
||||
"""
|
||||
if field in [ct.default_int8_field_name, ct.default_int16_field_name, ct.default_int32_field_name,
|
||||
ct.default_int64_field_name]:
|
||||
expressions = [f"{field} <= 1", f"{field} >= 1",
|
||||
f"{field} < 2", f"{field} > 0",
|
||||
f"{field} == 1", f"{field} != 1",
|
||||
f"{field} == 9707199254740992", f"{field} != 9707199254740992",
|
||||
f"{field} + 1 >= 2", f"{field} - 1 <= 0",
|
||||
f"{field} * 2 == 2", f"{field} / 1 == 1",
|
||||
f"{field} % 10 == 1", f"{field} == 1 || {field} == 2",
|
||||
f"{field} == 1 or {field} == 2",
|
||||
f"{field} in [1]", f"{field} not in [1]",
|
||||
f"{field} is null", f"{field} IS NULL",
|
||||
f"{field} is not null", f"{field} IS NOT NULL"
|
||||
]
|
||||
elif field in [ct.default_bool_field_name]:
|
||||
expressions = [f"{field} == True", f"{field} == False",
|
||||
f"{field} != True", f"{field} != False",
|
||||
f"{field} <= True", f"{field} >= True",
|
||||
f"{field} <= False", f"{field} >= False",
|
||||
f"{field} < True", f"{field} > True",
|
||||
f"{field} < False", f"{field} > False",
|
||||
f"{field} == True && {field} == False",
|
||||
f"{field} == True and {field} == False ",
|
||||
f"{field} == True || {field} == False",
|
||||
f"{field} == True or {field} == False",
|
||||
f"{field} in [True]", f"{field} in [False]", f"{field} in [True, False]",
|
||||
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"]
|
||||
elif field in [ct.default_float_field_name, ct.default_double_field_name]:
|
||||
expressions = [f"{field} <= 1", f"{field} >= 1",
|
||||
f"{field} < 2", f"{field} > 0",
|
||||
f"{field} == 1", f"{field} != 1",
|
||||
f"{field} == 9707199254740992", f"{field} != 9707199254740992",
|
||||
f"{field} <= 1.0", f"{field} >= 1.0",
|
||||
f"{field} < 2.0", f"{field} > 0.0",
|
||||
f"{field} == 1.0", f"{field} != 1.0",
|
||||
f"{field} == 9707199254740992.0", f"{field} != 9707199254740992.0",
|
||||
f"{field} - 1 <= 0", f"{field} + 1.0 >= 2",
|
||||
f"{field} - 1.0 <= 0", f"{field} * 2 == 2",
|
||||
f"{field} * 1.0 == 1.0", f"{field} / 1 == 1",
|
||||
f"{field} / 1.0 == 1.0", f"{field} == 1**2",
|
||||
f"{field} == 1 && {field} == 2",
|
||||
f"{field} == 1 and {field} == 2.0",
|
||||
f"{field} >=1 && {field} == 3.0",
|
||||
f"{field} >=1 and {field} == 3",
|
||||
f"{field} == 1 || {field} == 2.0",
|
||||
f"{field} == 1 or {field} == 2.0",
|
||||
f"{field} >= 1 || {field} <=2.0",
|
||||
f"{field} >= 1.0 or {field} <= 2.0",
|
||||
f"{field} in [1]", f"{field} in [1, 2]",
|
||||
f"{field} in [1.0]", f"{field} in [1.0, 2.0]",
|
||||
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
|
||||
]
|
||||
elif field in [ct.default_string_field_name]:
|
||||
expressions = [f"{field} <= '1'", f"{field} >= '1'", f"{field} < '2'", f"{field} > '0'",
|
||||
f"{field} == '1'", f"{field} != '1'", f"{field} like '1%'", f"{field} like '%1'",
|
||||
f"{field} like '%1%'", f"{field} LIKE '1%'", f"{field} LIKE '%1'",
|
||||
f"{field} LIKE '%1%'",
|
||||
f"{field} == '1' && {field} == '2'",
|
||||
f"{field} == '1' and {field} == '2'",
|
||||
f"{field} == '1' || {field} == '2'",
|
||||
f"{field} == '1' or {field} == '2'",
|
||||
f"{field} >= '1' || {field} <= '2'",
|
||||
f"{field} >= '1' or {field} <= '2'",
|
||||
f"{field} in ['1']", f"{field} in ['1', '2']",
|
||||
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
|
||||
]
|
||||
elif field in [ct.default_int8_array_field_name, ct.default_int16_array_field_name,
|
||||
ct.default_int32_array_field_name, ct.default_int64_array_field_name]:
|
||||
expressions = [f"{field}[0] <= 1", f"{field}[0] >= 1",
|
||||
f"{field}[0] < 2", f"{field}[0] > 0",
|
||||
f"{field}[1] == 1", f"{field}[1] != 1",
|
||||
f"{field}[0] == 9707199254740992", f"{field}[0] != 9707199254740992",
|
||||
f"{field}[0] + 1 >= 2", f"{field}[0] - 1 <= 0",
|
||||
f"{field}[0] + 1.0 >= 2", f"{field}[0] - 1.0 <= 0",
|
||||
f"{field}[0] * 2 == 2", f"{field}[1] * 1.0 == 1.0",
|
||||
f"{field}[1] / 1 == 1", f"{field}[0] / 1.0 == 1", f"{field}[1] % 10 == 1",
|
||||
f"{field}[0] == 1 && {field}[1] == 2", f"{field}[0] == 1 and {field}[1] == 2",
|
||||
f"{field}[0] >=1 && {field}[2] <= 3", f"{field}[0] >=1 and {field}[1] == 2",
|
||||
f"{field}[0] >=1 || {field}[1] <=2", f"{field}[0] >=1 or {field}[1] <=2",
|
||||
f"{field}[0] in [1]", f"json_contains({field}, 1)", f"JSON_CONTAINS({field}, 1)",
|
||||
f"json_contains_all({field}, [1, 2])", f"JSON_CONTAINS_ALL({field}, [1, 2])",
|
||||
f"json_contains_any({field}, [1, 2])", f"JSON_CONTAINS_ANY({field}, [1, 2])",
|
||||
f"array_contains({field}, 2)", f"ARRAY_CONTAINS({field}, 2)",
|
||||
f"array_contains_all({field}, [1, 2])", f"ARRAY_CONTAINS_ALL({field}, [1, 2])",
|
||||
f"array_contains_any({field}, [1, 2])", f"ARRAY_CONTAINS_ANY({field}, [1, 2])",
|
||||
f"array_length({field}) < 10", f"ARRAY_LENGTH({field}) < 10",
|
||||
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
|
||||
]
|
||||
elif field in [ct.default_float_array_field_name, ct.default_double_array_field_name]:
|
||||
expressions = [f"{field}[0] <= 1", f"{field}[0] >= 1",
|
||||
f"{field}[0] < 2", f"{field}[0] > 0",
|
||||
f"{field}[1] == 1", f"{field}[1] != 1",
|
||||
f"{field}[0] == 9707199254740992", f"{field}[0] != 9707199254740992",
|
||||
f"{field}[0] <= 1.0", f"{field}[0] >= 1.0",
|
||||
f"{field}[0] < 2.0", f"{field}[0] > 0.0",
|
||||
f"{field}[1] == 1.0", f"{field}[1] != 1.0",
|
||||
f"{field}[0] == 9707199254740992.0",
|
||||
f"{field}[0] - 1 <= 0", f"{field}[0] + 1.0 >= 2",
|
||||
f"{field}[0] - 1.0 <= 0", f"{field}[0] * 2 == 2",
|
||||
f"{field}[0] * 1.0 == 1.0", f"{field}[0] / 1 == 1",
|
||||
f"{field}[0] / 1.0 == 1.0", f"{field}[0] == 1**2",
|
||||
f"{field}[0] == 1 && {field}[1] == 2",
|
||||
f"{field}[0] == 1 and {field}[1] == 2.0",
|
||||
f"{field}[0] >=1 && {field}[2] == 3.0",
|
||||
f"{field}[0] >=1 and {field}[2] == 3",
|
||||
f"{field}[0] == 1 || {field}[1] == 2.0",
|
||||
f"{field}[0] == 1 or {field}[1] == 2.0",
|
||||
f"{field}[0] >= 1 || {field}[1] <=2.0",
|
||||
f"{field}[0] >= 1.0 or {field}[1] <= 2.0",
|
||||
f"{field}[0] in [1]", f"{field}[0] in [1.0]", f"json_contains({field}, 1.0)",
|
||||
f"JSON_CONTAINS({field}, 1.0)", f"json_contains({field}, 1.0)", f"JSON_CONTAINS({field}, 1.0)",
|
||||
f"json_contains_all({field}, [2.0, 4.0])", f"JSON_CONTAINS_ALL({field}, [2.0, 4.0])",
|
||||
f"json_contains_any({field}, [2.0, 4.0])", f"JSON_CONTAINS_ANY({field}, [2.0, 4.0])",
|
||||
f"array_contains({field}, 2.0)", f"ARRAY_CONTAINS({field}, 2.0)",
|
||||
f"array_contains({field}, 2.0)", f"ARRAY_CONTAINS({field}, 2.0)",
|
||||
f"array_contains_all({field}, [1.0, 2.0])", f"ARRAY_CONTAINS_ALL({field}, [1.0, 2.0])",
|
||||
f"array_contains_any({field}, [1.0, 2.0])", f"ARRAY_CONTAINS_ANY({field}, [1.0, 2.0])",
|
||||
f"array_length({field}) < 10", f"ARRAY_LENGTH({field}) < 10",
|
||||
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
|
||||
]
|
||||
elif field in [ct.default_bool_array_field_name]:
|
||||
expressions = [f"{field}[0] == True", f"{field}[0] == False",
|
||||
f"{field}[0] != True", f"{field}[0] != False",
|
||||
f"{field}[0] <= True", f"{field}[0] >= True",
|
||||
f"{field}[1] <= False", f"{field}[1] >= False",
|
||||
f"{field}[0] < True", f"{field}[1] > True",
|
||||
f"{field}[0] < False", f"{field}[0] > False",
|
||||
f"{field}[0] == True && {field}[1] == False",
|
||||
f"{field}[0] == True and {field}[1] == False ",
|
||||
f"{field}[0] == True || {field}[1] == False",
|
||||
f"{field}[0] == True or {field}[1] == False",
|
||||
f"{field}[0] in [True]", f"{field}[1] in [False]", f"{field}[0] in [True, False]",
|
||||
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
|
||||
]
|
||||
elif field in [ct.default_string_array_field_name]:
|
||||
expressions = [f"{field}[0] <= '1'", f"{field}[0] >= '1'",
|
||||
f"{field}[0] < '2'", f"{field}[0] > '0'",
|
||||
f"{field}[1] == '1'", f"{field}[1] != '1'",
|
||||
f"{field}[1] like '1%'", f"{field}[1] like '%1'",
|
||||
f"{field}[1] like '%1%'", f"{field}[1] LIKE '1%'",
|
||||
f"{field}[1] LIKE '%1'", f"{field}[1] LIKE '%1%'",
|
||||
f"{field}[1] == '1' && {field}[2] == '2'",
|
||||
f"{field}[1] == '1' and {field}[2] == '2'",
|
||||
f"{field}[0] == '1' || {field}[2] == '2'",
|
||||
f"{field}[0] == '1' or {field}[2] == '2'",
|
||||
f"{field}[1] >= '1' || {field}[2] <= '2'",
|
||||
f"{field}[1] >= '1' or {field}[2] <= '2'",
|
||||
f"{field}[0] in ['0']", f"{field}[1] in ['1', '2']",
|
||||
f"{field} is null", f"{field} IS NULL", f"{field} is not null", f"{field} IS NOT NULL"
|
||||
]
|
||||
else:
|
||||
raise Exception("Invalid field name")
|
||||
|
||||
return expressions
|
||||
|
||||
def concatenate_uneven_arrays(arr1, arr2):
|
||||
"""
|
||||
concatenate the element in two arrays with different length
|
||||
"""
|
||||
max_len = max(len(arr1), len(arr2))
|
||||
result = []
|
||||
op_list = ["and", "or", "&&", "||"]
|
||||
for i in range(max_len):
|
||||
a = arr1[i] if i < len(arr1) else ""
|
||||
b = arr2[i] if i < len(arr2) else ""
|
||||
if a == "" or b == "":
|
||||
result.append(a + b)
|
||||
else:
|
||||
random_op = op_list[random.randint(0, len(op_list)-1)]
|
||||
result.append( a + " " + random_op + " " + b)
|
||||
|
||||
return result
|
||||
|
||||
def gen_multiple_field_expressions(field_name_list=[], random_field_number=0, expr_number=1):
|
||||
"""
|
||||
Gen an expression including multiple fields
|
||||
parameters:
|
||||
field_name_list: the field names to be filtered. And the names should be in the following field name list if this
|
||||
parameter is specified: (both repeated or non-repeated field name are supported)
|
||||
all_fields = [ct.default_int8_field_name, ct.default_int16_field_name,
|
||||
ct.default_int32_field_name, ct.default_int64_field_name,
|
||||
ct.default_float_field_name, ct.default_double_field_name,
|
||||
ct.default_string_field_name, ct.default_bool_field_name,
|
||||
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
|
||||
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
|
||||
ct.default_bool_array_field_name, ct.default_float_array_field_name,
|
||||
ct.default_double_array_field_name, ct.default_string_array_field_name]
|
||||
random_field_number: the random field numbers to be filtered. The filtered fields will be randomly selected in
|
||||
the above field name list (all_fields) if this parameter is specified.
|
||||
And if random_field_number <= len(all_fields), the fields will be randomly selected without
|
||||
repeat. If random_field_number > len(all_fields), there will be repeated fields
|
||||
for (random_field_number - len(all_fields)) part.
|
||||
expr_number: the number of expressions for each field
|
||||
return:
|
||||
expressions_fields: all the expressions for multiple fields
|
||||
field_name_list: the field name list used for the filtered expressions
|
||||
"""
|
||||
if not isinstance(field_name_list, list):
|
||||
raise Exception("parameter field_name_list should be a list of all the fields to be filtered")
|
||||
if random_field_number < 0:
|
||||
raise Exception(f"random_field_number should be greater than or equal with 0]")
|
||||
if not isinstance(expr_number, int):
|
||||
raise Exception("parameter parameter should be an interger")
|
||||
log.info(field_name_list)
|
||||
log.info(random_field_number)
|
||||
if len(field_name_list) != 0 and random_field_number != 0:
|
||||
raise Exception("Not support both field_name_list and random_field_number are specified")
|
||||
|
||||
field_name_list_cp = field_name_list.copy()
|
||||
|
||||
all_fields = [ct.default_int8_field_name, ct.default_int16_field_name,
|
||||
ct.default_int32_field_name, ct.default_int64_field_name,
|
||||
ct.default_float_field_name, ct.default_double_field_name,
|
||||
ct.default_string_field_name, ct.default_bool_field_name,
|
||||
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
|
||||
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
|
||||
ct.default_bool_array_field_name, ct.default_float_array_field_name,
|
||||
ct.default_double_array_field_name, ct.default_string_array_field_name]
|
||||
|
||||
if len(field_name_list) == 0 and random_field_number != 0:
|
||||
if random_field_number <= len(all_fields):
|
||||
random_array = random.sample(range(len(all_fields)), random_field_number)
|
||||
else:
|
||||
random_array = random.sample(range(len(all_fields)), len(all_fields))
|
||||
for _ in range(random_field_number - len(all_fields)):
|
||||
random_array.append(random.randint(0, len(all_fields)-1))
|
||||
for i in random_array:
|
||||
field_name_list_cp.append(all_fields[i])
|
||||
if len(field_name_list) == 0 and random_field_number == 0:
|
||||
field_name_list_cp = all_fields
|
||||
expressions_fields = gen_field_expressions_all_single_operator_each_field(field_name_list_cp[0])
|
||||
if len(field_name_list_cp) > 1:
|
||||
for field in field_name_list[1:]:
|
||||
expressions = gen_field_expressions_all_single_operator_each_field(field)
|
||||
expressions_fields = concatenate_uneven_arrays(expressions_fields, expressions)
|
||||
|
||||
return expressions_fields, field_name_list_cp
|
||||
|
||||
|
||||
def gen_array_field_expressions_and_templates():
|
||||
"""
|
||||
|
||||
@ -39,8 +39,13 @@ default_double_field_name = "double"
|
||||
default_string_field_name = "varchar"
|
||||
default_json_field_name = "json_field"
|
||||
default_array_field_name = "int_array"
|
||||
default_int8_array_field_name = "int8_array"
|
||||
default_int16_array_field_name = "int16_array"
|
||||
default_int32_array_field_name = "int32_array"
|
||||
default_int64_array_field_name = "int64_array"
|
||||
default_bool_array_field_name = "bool_array"
|
||||
default_float_array_field_name = "float_array"
|
||||
default_double_array_field_name = "double_array"
|
||||
default_string_array_field_name = "string_array"
|
||||
default_float_vec_field_name = "float_vector"
|
||||
default_float16_vec_field_name = "float16_vector"
|
||||
@ -114,6 +119,8 @@ max_database_num = 64
|
||||
max_collections_per_db = 65536
|
||||
max_collection_num = 65536
|
||||
max_hybrid_search_req_num = 1024
|
||||
default_primary_key_field_name = "id"
|
||||
default_vector_field_name = "vector"
|
||||
|
||||
|
||||
IMAGE_REPOSITORY_MILVUS = "harbor.milvus.io/dockerhub/milvusdb/milvus"
|
||||
@ -239,7 +246,7 @@ get_wrong_format_dict = [
|
||||
|
||||
get_all_kind_data_distribution = [
|
||||
1, np.float64(1.0), np.double(1.0), 9707199254740993.0, 9707199254740992,
|
||||
'1', '123', '321', '213', True, False, [1, 2], [1.0, 2], None, {}, {"a": 1},
|
||||
'1', '123', '321', '213', True, False, None, [1, 2], [1.0, 2], {}, {"a": 1},
|
||||
{'a': 1.0}, {'a': 9707199254740993.0}, {'a': 9707199254740992}, {'a': '1'}, {'a': '123'},
|
||||
{'a': '321'}, {'a': '213'}, {'a': True}, {'a': [1, 2, 3]}, {'a': [1.0, 2, '1']}, {'a': [1.0, 2]},
|
||||
{'a': None}, {'a': {'b': 1}}, {'a': {'b': 1.0}}, {'a': [{'b': 1}, 2.0, np.double(3.0), '4', True, [1, 3.0], None]}
|
||||
@ -321,7 +328,14 @@ privilege_group_privilege_dict = {"Query": False, "Search": False, "GetLoadState
|
||||
"AlterDatabase": False, "FlushAll": False, "ListPrivilegeGroups": False,
|
||||
"CreatePrivilegeGroup": False, "DropPrivilegeGroup": False,
|
||||
"OperatePrivilegeGroup": False}
|
||||
|
||||
all_expr_fields = [default_int8_field_name, default_int16_field_name,
|
||||
default_int32_field_name, default_int64_field_name,
|
||||
default_float_field_name, default_double_field_name,
|
||||
default_string_field_name, default_bool_field_name,
|
||||
default_int8_array_field_name, default_int16_array_field_name,
|
||||
default_int32_array_field_name, default_int64_array_field_name,
|
||||
default_bool_array_field_name, default_float_array_field_name,
|
||||
default_double_array_field_name, default_string_array_field_name]
|
||||
|
||||
class CheckTasks:
|
||||
""" The name of the method used to check the result """
|
||||
|
||||
@ -0,0 +1,906 @@
|
||||
import pytest
|
||||
|
||||
from base.client_v2_base import TestMilvusClientV2Base
|
||||
from utils.util_log import test_log as log
|
||||
from common import common_func as cf
|
||||
from common import common_type as ct
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
from utils.util_pymilvus import *
|
||||
import numpy as np
|
||||
|
||||
prefix = "milvus_client_api_query"
|
||||
epsilon = ct.epsilon
|
||||
default_nb = ct.default_nb
|
||||
default_nb_medium = ct.default_nb_medium
|
||||
default_nq = ct.default_nq
|
||||
default_dim = ct.default_dim
|
||||
default_limit = ct.default_limit
|
||||
default_search_exp = "id >= 0"
|
||||
exp_res = "exp_res"
|
||||
default_search_string_exp = "varchar >= \"0\""
|
||||
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
|
||||
default_invaild_string_exp = "varchar >= 0"
|
||||
default_json_search_exp = "json_field[\"number\"] >= 0"
|
||||
perfix_expr = 'varchar like "0%"'
|
||||
default_search_field = ct.default_float_vec_field_name
|
||||
default_search_params = ct.default_search_params
|
||||
default_primary_key_field_name = "id"
|
||||
default_vector_field_name = "vector"
|
||||
default_float_field_name = ct.default_float_field_name
|
||||
default_bool_field_name = ct.default_bool_field_name
|
||||
default_string_field_name = ct.default_string_field_name
|
||||
default_int32_array_field_name = ct.default_int32_array_field_name
|
||||
default_string_array_field_name = ct.default_string_array_field_name
|
||||
|
||||
|
||||
class TestMilvusClientDataIntegrity(TestMilvusClientV2Base):
|
||||
""" Test case of data integrity interface """
|
||||
|
||||
@pytest.fixture(scope="function", params=["INVERTED", "BITMAP"])
|
||||
def supported_bool_scalar_index(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED"])
|
||||
def supported_numeric_float_double_index(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED", "BITMAP"])
|
||||
def supported_numeric_scalar_index(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=["TRIE", "INVERTED", "BITMAP"])
|
||||
def supported_varchar_scalar_index(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=["INVERTED"])
|
||||
def supported_json_path_index(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=["INVERTED", "BITMAP"])
|
||||
def supported_array_scalar_index(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=["INVERTED"])
|
||||
def supported_array_double_float_scalar_index(self, request):
|
||||
yield request.param
|
||||
|
||||
"""
|
||||
******************************************************************
|
||||
# The following are valid base cases
|
||||
******************************************************************
|
||||
"""
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [False])
|
||||
@pytest.mark.parametrize("is_flush", [True])
|
||||
@pytest.mark.parametrize("is_release", [True])
|
||||
@pytest.mark.parametrize("single_data_num", [50])
|
||||
@pytest.mark.parametrize("expr_field", [ct.default_int64_field_name])
|
||||
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array(self,
|
||||
enable_dynamic_field,
|
||||
supported_bool_scalar_index,
|
||||
supported_numeric_float_double_index,
|
||||
supported_numeric_scalar_index,
|
||||
supported_varchar_scalar_index,
|
||||
supported_json_path_index,
|
||||
supported_array_scalar_index,
|
||||
supported_array_double_float_scalar_index,
|
||||
is_flush,
|
||||
is_release,
|
||||
single_data_num,
|
||||
expr_field):
|
||||
"""
|
||||
target: test query using expression fields with all supported field type after all supported scalar index
|
||||
with all supported basic expressions
|
||||
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
|
||||
step: 1. create collection
|
||||
2. insert with different data distribution
|
||||
3. flush if specified
|
||||
4. query when there is no index applying on each field under all supported expressions
|
||||
5. release if specified
|
||||
6. prepare index params with all supported scalar index on all scalar fields
|
||||
7. create index
|
||||
8. create same index twice
|
||||
9. reload collection if released before to make sure the new index load successfully
|
||||
10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
11. query after there is index applying on each supported field under all supported expressions
|
||||
which should get the same result with that without index
|
||||
expected: query successfully after there is index applying on each supported field under all expressions which
|
||||
should get the same result with that without index
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_unique_str(prefix)
|
||||
# 1. create collection
|
||||
default_dim = 5
|
||||
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
||||
if not enable_dynamic_field:
|
||||
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
|
||||
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
|
||||
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
|
||||
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
|
||||
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
|
||||
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
|
||||
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
|
||||
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
|
||||
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
|
||||
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
|
||||
max_capacity=5, max_length=100, nullable=True)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
||||
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
||||
# 2. insert with different data distribution
|
||||
vectors = cf.gen_vectors(default_nb + 60, default_dim)
|
||||
inserted_data_distribution = ct.get_all_kind_data_distribution
|
||||
nb_single = single_data_num
|
||||
for i in range(len(inserted_data_distribution)):
|
||||
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
|
||||
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
|
||||
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
|
||||
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
|
||||
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
|
||||
ct.default_int64_field_name: j if (i % 2 == 0) else None,
|
||||
ct.default_float_field_name: j * 1.0 if (i % 2 == 0) else None,
|
||||
ct.default_double_field_name: j * 1.0 if (i % 2 == 0) else None,
|
||||
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
|
||||
ct.default_json_field_name: inserted_data_distribution[i],
|
||||
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
|
||||
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
|
||||
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
|
||||
} for j in range(i * nb_single, (i + 1) * nb_single)]
|
||||
assert len(rows) == nb_single
|
||||
log.info(rows)
|
||||
self.insert(client, collection_name=collection_name, data=rows)
|
||||
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
|
||||
# 3. flush if specified
|
||||
if is_flush:
|
||||
self.flush(client, collection_name)
|
||||
# 4. query when there is no index under all expressions
|
||||
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
|
||||
compare_dict = {}
|
||||
for i in range(len(express_list)):
|
||||
json_list = []
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
|
||||
res = \
|
||||
self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
|
||||
count = res[0]['count(*)']
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=[f"{expr_field}"])[0]
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
json_list.append(single[f"{expr_field}"])
|
||||
assert count == len(id_list)
|
||||
assert count == len(json_list)
|
||||
compare_dict.setdefault(f'{i}', {})
|
||||
compare_dict[f'{i}']["id_list"] = id_list
|
||||
compare_dict[f'{i}']["json_list"] = json_list
|
||||
# 5. release if specified
|
||||
if is_release:
|
||||
self.release_collection(client, collection_name)
|
||||
self.drop_index(client, collection_name, default_vector_field_name)
|
||||
# 6. prepare index params with json path index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
||||
index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
|
||||
index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
|
||||
index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_float_array_field_name,
|
||||
index_type=supported_array_double_float_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_double_array_field_name,
|
||||
index_type=supported_array_double_float_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
|
||||
json_index_name = "json_index_name"
|
||||
json_path_list = [f"{ct.default_json_field_name}",
|
||||
f"{ct.default_json_field_name}[0]",
|
||||
f"{ct.default_json_field_name}[1]",
|
||||
f"{ct.default_json_field_name}[6]",
|
||||
f"{ct.default_json_field_name}[10000]",
|
||||
f"{ct.default_json_field_name}['a']",
|
||||
f"{ct.default_json_field_name}['a']['b']",
|
||||
f"{ct.default_json_field_name}['a'][0]",
|
||||
f"{ct.default_json_field_name}['a'][6]",
|
||||
f"{ct.default_json_field_name}['a'][0]['b']",
|
||||
f"{ct.default_json_field_name}['a']['b']['c']",
|
||||
f"{ct.default_json_field_name}['a']['b'][0]['d']",
|
||||
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
|
||||
for i in range(len(json_path_list)):
|
||||
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
|
||||
index_type=supported_json_path_index,
|
||||
params={"json_cast_type": "DOUBLE",
|
||||
"json_path": json_path_list[i]})
|
||||
# 7. create index
|
||||
self.create_index(client, collection_name, index_params)
|
||||
# 8. create same twice
|
||||
self.create_index(client, collection_name, index_params)
|
||||
# 9. reload collection if released before to make sure the new index load successfully
|
||||
if is_release:
|
||||
self.load_collection(client, collection_name)
|
||||
else:
|
||||
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
time.sleep(60)
|
||||
# 11. query after there is index under all expressions which should get the same result
|
||||
# with that without index
|
||||
for i in range(len(express_list)):
|
||||
json_list = []
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' after index is:")
|
||||
count = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=["count(*)"])[0]
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=[f"{expr_field}"])[0]
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
json_list.append(single[f"{expr_field}"])
|
||||
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
|
||||
log.debug(
|
||||
f"the field {expr_field} value after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
|
||||
log.debug(json_list)
|
||||
log.debug(
|
||||
f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
|
||||
log.debug(compare_dict[f'{i}']["json_list"])
|
||||
assert json_list == compare_dict[f'{i}']["json_list"]
|
||||
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
|
||||
log.debug(
|
||||
f"primary key field {default_primary_key_field_name} after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
|
||||
log.debug(id_list)
|
||||
log.debug(
|
||||
f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
|
||||
log.debug(compare_dict[f'{i}']["id_list"])
|
||||
assert id_list == compare_dict[f'{i}']["id_list"]
|
||||
log.info(f"PASS with expression {express_list[i]}")
|
||||
self.drop_collection(client, collection_name)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [False])
|
||||
@pytest.mark.parametrize("is_flush", [True, False])
|
||||
@pytest.mark.parametrize("is_release", [True, False])
|
||||
@pytest.mark.parametrize("single_data_num", [50])
|
||||
@pytest.mark.parametrize("expr_field", [ct.default_int8_field_name, ct.default_int16_field_name,
|
||||
ct.default_int32_field_name, ct.default_int64_field_name,
|
||||
ct.default_float_field_name, ct.default_double_field_name,
|
||||
ct.default_string_field_name, ct.default_bool_field_name,
|
||||
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
|
||||
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
|
||||
ct.default_bool_array_field_name, ct.default_float_array_field_name,
|
||||
ct.default_double_array_field_name, ct.default_string_array_field_name])
|
||||
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, enable_dynamic_field, supported_bool_scalar_index,
|
||||
supported_numeric_float_double_index,
|
||||
supported_numeric_scalar_index, supported_varchar_scalar_index,
|
||||
supported_json_path_index, supported_array_scalar_index,
|
||||
supported_array_double_float_scalar_index,
|
||||
is_flush, is_release, single_data_num, expr_field):
|
||||
"""
|
||||
target: test query using expression fields with all supported field type after all supported scalar index
|
||||
with all supported basic expressions
|
||||
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
|
||||
step: 1. create collection
|
||||
2. insert with different data distribution
|
||||
3. flush if specified
|
||||
4. query when there is no index applying on each field under all supported expressions
|
||||
5. release if specified
|
||||
6. prepare index params with all supported scalar index on all scalar fields
|
||||
7. create index
|
||||
8. create same index twice
|
||||
9. reload collection if released before to make sure the new index load successfully
|
||||
10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
11. query after there is index applying on each supported field under all supported expressions
|
||||
which should get the same result with that without index
|
||||
expected: query successfully after there is index applying on each supported field under all expressions which
|
||||
should get the same result with that without index
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_unique_str(prefix)
|
||||
# 1. create collection
|
||||
default_dim = 5
|
||||
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
||||
if not enable_dynamic_field:
|
||||
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
|
||||
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
|
||||
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
|
||||
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
|
||||
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
|
||||
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
|
||||
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
|
||||
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
|
||||
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
|
||||
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
|
||||
max_capacity=5, max_length=100, nullable=True)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
||||
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
||||
# 2. insert with different data distribution
|
||||
vectors = cf.gen_vectors(default_nb+60, default_dim)
|
||||
inserted_data_distribution = ct.get_all_kind_data_distribution
|
||||
nb_single = single_data_num
|
||||
for i in range(len(inserted_data_distribution)):
|
||||
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
|
||||
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
|
||||
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
|
||||
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
|
||||
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
|
||||
ct.default_int64_field_name: j if (i % 2 == 0) else None,
|
||||
ct.default_float_field_name: j*1.0 if (i % 2 == 0) else None,
|
||||
ct.default_double_field_name: j*1.0 if (i % 2 == 0) else None,
|
||||
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
|
||||
ct.default_json_field_name: inserted_data_distribution[i],
|
||||
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
|
||||
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
|
||||
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
|
||||
} for j in range(i * nb_single, (i + 1) * nb_single)]
|
||||
assert len(rows) == nb_single
|
||||
log.info(rows)
|
||||
self.insert(client, collection_name=collection_name, data=rows)
|
||||
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
|
||||
# 3. flush if specified
|
||||
if is_flush:
|
||||
self.flush(client, collection_name)
|
||||
# 4. query when there is no index under all expressions
|
||||
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
|
||||
compare_dict = {}
|
||||
for i in range(len(express_list)):
|
||||
json_list = []
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
|
||||
count = res[0]['count(*)']
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0]
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
json_list.append(single[f"{expr_field}"])
|
||||
assert count == len(id_list)
|
||||
assert count == len(json_list)
|
||||
compare_dict.setdefault(f'{i}', {})
|
||||
compare_dict[f'{i}']["id_list"] = id_list
|
||||
compare_dict[f'{i}']["json_list"] = json_list
|
||||
# 5. release if specified
|
||||
if is_release:
|
||||
self.release_collection(client, collection_name)
|
||||
self.drop_index(client, collection_name, default_vector_field_name)
|
||||
# 6. prepare index params with json path index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
||||
index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
|
||||
index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
|
||||
index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_float_array_field_name, index_type=supported_array_double_float_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_double_array_field_name, index_type=supported_array_double_float_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
|
||||
json_index_name = "json_index_name"
|
||||
json_path_list = [f"{ct.default_json_field_name}",
|
||||
f"{ct.default_json_field_name}[0]",
|
||||
f"{ct.default_json_field_name}[1]",
|
||||
f"{ct.default_json_field_name}[6]",
|
||||
f"{ct.default_json_field_name}[10000]",
|
||||
f"{ct.default_json_field_name}['a']",
|
||||
f"{ct.default_json_field_name}['a']['b']",
|
||||
f"{ct.default_json_field_name}['a'][0]",
|
||||
f"{ct.default_json_field_name}['a'][6]",
|
||||
f"{ct.default_json_field_name}['a'][0]['b']",
|
||||
f"{ct.default_json_field_name}['a']['b']['c']",
|
||||
f"{ct.default_json_field_name}['a']['b'][0]['d']",
|
||||
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
|
||||
for i in range(len(json_path_list)):
|
||||
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
|
||||
index_type=supported_json_path_index,
|
||||
params={"json_cast_type": "DOUBLE",
|
||||
"json_path": json_path_list[i]})
|
||||
# 7. create index
|
||||
self.create_index(client, collection_name, index_params)
|
||||
# 8. create same twice
|
||||
self.create_index(client, collection_name, index_params)
|
||||
# 9. reload collection if released before to make sure the new index load successfully
|
||||
if is_release:
|
||||
self.load_collection(client, collection_name)
|
||||
else:
|
||||
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
time.sleep(60)
|
||||
# 11. query after there is index under all expressions which should get the same result
|
||||
# with that without index
|
||||
for i in range(len(express_list)):
|
||||
json_list = []
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' after index is:")
|
||||
count = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=["count(*)"])[0]
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=[f"{expr_field}"])[0]
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
json_list.append(single[f"{expr_field}"])
|
||||
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
|
||||
log.debug(f"the field {expr_field} value after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
|
||||
log.debug(json_list)
|
||||
log.debug(f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
|
||||
log.debug(compare_dict[f'{i}']["json_list"])
|
||||
assert json_list == compare_dict[f'{i}']["json_list"]
|
||||
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
|
||||
log.debug(f"primary key field {default_primary_key_field_name} after index {supported_array_scalar_index} under expression '{express_list[i]}' is:")
|
||||
log.debug(id_list)
|
||||
log.debug(f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
|
||||
log.debug(compare_dict[f'{i}']["id_list"])
|
||||
assert id_list == compare_dict[f'{i}']["id_list"]
|
||||
log.info(f"PASS with expression {express_list[i]}")
|
||||
self.drop_collection(client, collection_name)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L3)
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [False])
|
||||
@pytest.mark.parametrize("is_flush", [True, False])
|
||||
@pytest.mark.parametrize("is_release", [True, False])
|
||||
@pytest.mark.parametrize("single_data_num", [50])
|
||||
@pytest.mark.parametrize("expr_field", [ct.default_int8_field_name, ct.default_int16_field_name,
|
||||
ct.default_int32_field_name, ct.default_int64_field_name,
|
||||
ct.default_float_field_name, ct.default_double_field_name,
|
||||
ct.default_string_field_name, ct.default_bool_field_name,
|
||||
ct.default_int8_array_field_name, ct.default_int16_array_field_name,
|
||||
ct.default_int32_array_field_name,ct.default_int64_array_field_name,
|
||||
ct.default_bool_array_field_name, ct.default_float_array_field_name,
|
||||
ct.default_double_array_field_name, ct.default_string_array_field_name])
|
||||
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_auto_index(self, enable_dynamic_field,
|
||||
supported_json_path_index,
|
||||
is_flush, is_release,
|
||||
single_data_num, expr_field):
|
||||
"""
|
||||
target: test query using expression fields with all supported field type after all supported scalar index
|
||||
with all supported basic expressions
|
||||
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
|
||||
step: 1. create collection
|
||||
2. insert with different data distribution
|
||||
3. flush if specified
|
||||
4. query when there is no index applying on each field under all supported expressions
|
||||
5. release if specified
|
||||
6. prepare index params with all supported scalar index on all scalar fields
|
||||
7. create index
|
||||
8. create same index twice
|
||||
9. reload collection if released before to make sure the new index load successfully
|
||||
10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
11. query after there is index applying on each supported field under all supported expressions
|
||||
which should get the same result with that without index
|
||||
expected: query successfully after there is index applying on each supported field under all expressions which
|
||||
should get the same result with that without index
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_unique_str(prefix)
|
||||
# 1. create collection
|
||||
default_dim = 5
|
||||
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
||||
if not enable_dynamic_field:
|
||||
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
|
||||
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
|
||||
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
|
||||
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
|
||||
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
|
||||
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
|
||||
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
|
||||
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
|
||||
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
|
||||
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
|
||||
max_capacity=5, max_length=100, nullable=True)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
||||
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
||||
# 2. insert with different data distribution
|
||||
vectors = cf.gen_vectors(default_nb+60, default_dim)
|
||||
inserted_data_distribution = ct.get_all_kind_data_distribution
|
||||
nb_single = single_data_num
|
||||
for i in range(len(inserted_data_distribution)):
|
||||
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
|
||||
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
|
||||
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
|
||||
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
|
||||
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
|
||||
ct.default_int64_field_name: j if (i % 2 == 0) else None,
|
||||
ct.default_float_field_name: j*1.0 if (i % 2 == 0) else None,
|
||||
ct.default_double_field_name: j*1.0 if (i % 2 == 0) else None,
|
||||
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
|
||||
ct.default_json_field_name: inserted_data_distribution[i],
|
||||
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
|
||||
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
|
||||
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
|
||||
} for j in range(i * nb_single, (i + 1) * nb_single)]
|
||||
assert len(rows) == nb_single
|
||||
log.info(rows)
|
||||
self.insert(client, collection_name=collection_name, data=rows)
|
||||
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
|
||||
# 3. flush if specified
|
||||
if is_flush:
|
||||
self.flush(client, collection_name)
|
||||
# 4. query when there is no index under all expressions
|
||||
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
|
||||
compare_dict = {}
|
||||
for i in range(len(express_list)):
|
||||
json_list = []
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
|
||||
count = res[0]['count(*)']
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{expr_field}"])[0]
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
json_list.append(single[f"{expr_field}"])
|
||||
assert count == len(id_list)
|
||||
assert count == len(json_list)
|
||||
compare_dict.setdefault(f'{i}', {})
|
||||
compare_dict[f'{i}']["id_list"] = id_list
|
||||
compare_dict[f'{i}']["json_list"] = json_list
|
||||
# 5. release if specified
|
||||
if is_release:
|
||||
self.release_collection(client, collection_name)
|
||||
self.drop_index(client, collection_name, default_vector_field_name)
|
||||
# 6. prepare index params with json path index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
||||
index_params.add_index(field_name=ct.default_bool_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int8_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int16_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int32_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int64_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_float_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_double_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_string_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_float_array_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_double_array_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(field_name=ct.default_string_array_field_name, index_type="AUTOINDEX")
|
||||
json_index_name = "json_index_name"
|
||||
json_path_list = [f"{ct.default_json_field_name}",
|
||||
f"{ct.default_json_field_name}[0]",
|
||||
f"{ct.default_json_field_name}[1]",
|
||||
f"{ct.default_json_field_name}[6]",
|
||||
f"{ct.default_json_field_name}[10000]",
|
||||
f"{ct.default_json_field_name}['a']",
|
||||
f"{ct.default_json_field_name}['a']['b']",
|
||||
f"{ct.default_json_field_name}['a'][0]",
|
||||
f"{ct.default_json_field_name}['a'][6]",
|
||||
f"{ct.default_json_field_name}['a'][0]['b']",
|
||||
f"{ct.default_json_field_name}['a']['b']['c']",
|
||||
f"{ct.default_json_field_name}['a']['b'][0]['d']",
|
||||
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
|
||||
for i in range(len(json_path_list)):
|
||||
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
|
||||
index_type=supported_json_path_index,
|
||||
params={"json_cast_type": "DOUBLE",
|
||||
"json_path": json_path_list[i]})
|
||||
# 7. create index
|
||||
self.create_index(client, collection_name, index_params)
|
||||
# 8. create same twice
|
||||
self.create_index(client, collection_name, index_params)
|
||||
# 9. reload collection if released before to make sure the new index load successfully
|
||||
if is_release:
|
||||
self.load_collection(client, collection_name)
|
||||
else:
|
||||
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
time.sleep(60)
|
||||
# 11. query after there is index under all expressions which should get the same result
|
||||
# with that without index
|
||||
for i in range(len(express_list)):
|
||||
json_list = []
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' after index is:")
|
||||
count = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=["count(*)"])[0]
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=[f"{expr_field}"])[0]
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
json_list.append(single[f"{expr_field}"])
|
||||
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
|
||||
log.debug(f"the field {expr_field} value after index 'AUTOINDEX' under expression '{express_list[i]}' is:")
|
||||
log.debug(json_list)
|
||||
log.debug(f"the field {expr_field} value before index to be compared under expression '{express_list[i]}' is:")
|
||||
log.debug(compare_dict[f'{i}']["json_list"])
|
||||
assert json_list == compare_dict[f'{i}']["json_list"]
|
||||
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
|
||||
log.debug(f"primary key field {default_primary_key_field_name} after index 'AUTOINDEX' under expression '{express_list[i]}' is:")
|
||||
log.debug(id_list)
|
||||
log.debug(f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is:")
|
||||
log.debug(compare_dict[f'{i}']["id_list"])
|
||||
assert id_list == compare_dict[f'{i}']["id_list"]
|
||||
log.info(f"PASS with expression {express_list[i]}")
|
||||
self.drop_collection(client, collection_name)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L3)
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [False])
|
||||
@pytest.mark.parametrize("is_flush", [True, False])
|
||||
@pytest.mark.parametrize("is_release", [True, False])
|
||||
@pytest.mark.parametrize("single_data_num", [50])
|
||||
@pytest.mark.parametrize("random_filter_field_number", [2, 6, 16])
|
||||
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_multiple_fields(self,
|
||||
enable_dynamic_field,
|
||||
supported_bool_scalar_index,
|
||||
supported_numeric_float_double_index,
|
||||
supported_numeric_scalar_index,
|
||||
supported_varchar_scalar_index,
|
||||
supported_json_path_index,
|
||||
supported_array_scalar_index,
|
||||
supported_array_double_float_scalar_index,
|
||||
is_flush,
|
||||
is_release,
|
||||
single_data_num,
|
||||
random_filter_field_number):
|
||||
"""
|
||||
target: test query using expression fields with all supported field type after all supported scalar index
|
||||
with all supported basic expressions
|
||||
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
|
||||
step: 1. create collection
|
||||
2. insert with different data distribution
|
||||
3. flush if specified
|
||||
4. query when there is no index applying on each field under all supported expressions
|
||||
5. release if specified
|
||||
6. prepare index params with all supported scalar index on all scalar fields
|
||||
7. create index
|
||||
8. create same index twice
|
||||
9. reload collection if released before to make sure the new index load successfully
|
||||
10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
11. query after there is index applying on each supported field under all supported expressions
|
||||
which should get the same result with that without index
|
||||
expected: query successfully after there is index applying on each supported field under all expressions which
|
||||
should get the same result with that without index
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_unique_str(prefix)
|
||||
# 1. create collection
|
||||
default_dim = 5
|
||||
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
|
||||
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
||||
if not enable_dynamic_field:
|
||||
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
|
||||
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
|
||||
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
|
||||
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
|
||||
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
|
||||
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
|
||||
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
|
||||
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
|
||||
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
|
||||
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
|
||||
max_capacity=5, max_length=100, nullable=True)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
||||
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
|
||||
# 2. insert with different data distribution
|
||||
vectors = cf.gen_vectors(default_nb + 60, default_dim)
|
||||
inserted_data_distribution = ct.get_all_kind_data_distribution
|
||||
nb_single = single_data_num
|
||||
for i in range(len(inserted_data_distribution)):
|
||||
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
|
||||
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
|
||||
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
|
||||
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
|
||||
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
|
||||
ct.default_int64_field_name: j if (i % 2 == 0) else None,
|
||||
ct.default_float_field_name: j * 1.0 if (i % 2 == 0) else None,
|
||||
ct.default_double_field_name: j * 1.0 if (i % 2 == 0) else None,
|
||||
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
|
||||
ct.default_json_field_name: inserted_data_distribution[i],
|
||||
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
|
||||
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
|
||||
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
|
||||
} for j in range(i * nb_single, (i + 1) * nb_single)]
|
||||
assert len(rows) == nb_single
|
||||
self.insert(client, collection_name=collection_name, data=rows)
|
||||
log.debug(f"inserted {nb_single} {inserted_data_distribution[i]}")
|
||||
# 3. flush if specified
|
||||
if is_flush:
|
||||
self.flush(client, collection_name)
|
||||
# 4. query when there is no index under all expressions
|
||||
express_list, field_lists = cf.gen_multiple_field_expressions(random_field_number=random_filter_field_number)
|
||||
compare_dict = {}
|
||||
for i in range(len(express_list)):
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' before scalar index is:")
|
||||
res = \
|
||||
self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
|
||||
count = res[0]['count(*)']
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' before scalar index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=field_lists)[0]
|
||||
# compare_dict.setdefault(f'{i}', {})
|
||||
one_dict = {}
|
||||
# init the compared dict
|
||||
for field_name in field_lists:
|
||||
one_dict.setdefault(f'{field_name}', [])
|
||||
compare_dict.setdefault(f'{i}', one_dict)
|
||||
# extract and store the id and output_fields value used for compare after index
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
for field_name in field_lists:
|
||||
compare_dict[f'{i}'][f'{field_name}'].append(single[f"{field_name}"])
|
||||
assert count == len(id_list)
|
||||
for field_name in field_lists:
|
||||
assert count == len(compare_dict[f'{i}'][f'{field_name}'])
|
||||
compare_dict[f'{i}']['id_list'] = id_list
|
||||
# 5. release if specified
|
||||
if is_release:
|
||||
self.release_collection(client, collection_name)
|
||||
self.drop_index(client, collection_name, default_vector_field_name)
|
||||
# 6. prepare index params with json path index
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
||||
index_params.add_index(field_name=ct.default_bool_field_name, index_type=supported_bool_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int8_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int16_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int32_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int64_field_name, index_type=supported_numeric_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_float_field_name, index_type=supported_numeric_float_double_index)
|
||||
index_params.add_index(field_name=ct.default_double_field_name, index_type=supported_numeric_float_double_index)
|
||||
index_params.add_index(field_name=ct.default_string_field_name, index_type=supported_varchar_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int8_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int16_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int32_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_int64_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_bool_array_field_name, index_type=supported_array_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_float_array_field_name,
|
||||
index_type=supported_array_double_float_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_double_array_field_name,
|
||||
index_type=supported_array_double_float_scalar_index)
|
||||
index_params.add_index(field_name=ct.default_string_array_field_name, index_type=supported_array_scalar_index)
|
||||
json_index_name = "json_index_name"
|
||||
json_path_list = [f"{ct.default_json_field_name}",
|
||||
f"{ct.default_json_field_name}[0]",
|
||||
f"{ct.default_json_field_name}[1]",
|
||||
f"{ct.default_json_field_name}[6]",
|
||||
f"{ct.default_json_field_name}[10000]",
|
||||
f"{ct.default_json_field_name}['a']",
|
||||
f"{ct.default_json_field_name}['a']['b']",
|
||||
f"{ct.default_json_field_name}['a'][0]",
|
||||
f"{ct.default_json_field_name}['a'][6]",
|
||||
f"{ct.default_json_field_name}['a'][0]['b']",
|
||||
f"{ct.default_json_field_name}['a']['b']['c']",
|
||||
f"{ct.default_json_field_name}['a']['b'][0]['d']",
|
||||
f"{ct.default_json_field_name}['a']['c'][0]['d']"]
|
||||
for i in range(len(json_path_list)):
|
||||
index_params.add_index(field_name=ct.default_json_field_name, index_name=json_index_name + f'{i}',
|
||||
index_type=supported_json_path_index,
|
||||
params={"json_cast_type": "DOUBLE",
|
||||
"json_path": json_path_list[i]})
|
||||
# 7. create index
|
||||
self.create_index(client, collection_name, index_params)
|
||||
# # 8. create same twice
|
||||
# self.create_index(client, collection_name, index_params)
|
||||
# 9. reload collection if released before to make sure the new index load successfully
|
||||
if is_release:
|
||||
self.load_collection(client, collection_name)
|
||||
else:
|
||||
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
time.sleep(60)
|
||||
# 11. query after there is index under all expressions which should get the same result
|
||||
# with that without index
|
||||
for i in range(len(express_list)):
|
||||
id_list = []
|
||||
log.info(f"query with filter '{express_list[i]}' after index is:")
|
||||
count = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=["count(*)"])[0]
|
||||
log.info(f"The count(*) after query with filter '{express_list[i]}' after index is: {count}")
|
||||
res = self.query(client, collection_name=collection_name, filter=express_list[i],
|
||||
output_fields=field_lists)[0]
|
||||
# compare each filtered field before and after index
|
||||
for field_name in field_lists:
|
||||
json_list = []
|
||||
for single in res:
|
||||
json_list.append(single[f"{field_name}"])
|
||||
if len(json_list) != len(compare_dict[f'{i}'][f'{field_name}']):
|
||||
log.debug(f"the field {field_name} value after index under expression '{express_list[i]}' is: {json_list}")
|
||||
log.debug(f"the field {field_name} value before index to be compared under expression '{express_list[i]}' is: {compare_dict[f'{i}'][f'{field_name}']}")
|
||||
assert json_list == compare_dict[f'{i}'][f'{field_name}']
|
||||
# compare id before and after index
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
if len(id_list) != len(compare_dict[f'{i}']['id_list']):
|
||||
log.debug(f"primary key field {default_primary_key_field_name} after index under expression '{express_list[i]}' is: {id_list}")
|
||||
log.debug(f"primary key field {default_primary_key_field_name} before index to be compared under expression '{express_list[i]}' is: {compare_dict[f'{i}']['id_list']}")
|
||||
assert id_list == compare_dict[f'{i}']['id_list']
|
||||
log.info(f"PASS with expression {express_list[i]}")
|
||||
self.drop_collection(client, collection_name)
|
||||
@ -0,0 +1,246 @@
|
||||
import pytest
|
||||
|
||||
from base.client_v2_base import TestMilvusClientV2Base
|
||||
from utils.util_log import test_log as log
|
||||
from common import common_func as cf
|
||||
from common import common_type as ct
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
from utils.util_pymilvus import *
|
||||
import numpy as np
|
||||
|
||||
prefix = "milvus_client_api_query"
|
||||
epsilon = ct.epsilon
|
||||
default_nb = ct.default_nb
|
||||
default_nb_medium = ct.default_nb_medium
|
||||
default_nq = ct.default_nq
|
||||
default_dim = ct.default_dim
|
||||
default_limit = ct.default_limit
|
||||
default_search_exp = "id >= 0"
|
||||
exp_res = "exp_res"
|
||||
default_search_string_exp = "varchar >= \"0\""
|
||||
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
|
||||
default_invaild_string_exp = "varchar >= 0"
|
||||
default_json_search_exp = "json_field[\"number\"] >= 0"
|
||||
perfix_expr = 'varchar like "0%"'
|
||||
default_search_field = ct.default_float_vec_field_name
|
||||
default_search_params = ct.default_search_params
|
||||
default_primary_key_field_name = "id"
|
||||
default_vector_field_name = "vector"
|
||||
default_float_field_name = ct.default_float_field_name
|
||||
default_bool_field_name = ct.default_bool_field_name
|
||||
default_string_field_name = ct.default_string_field_name
|
||||
default_int32_array_field_name = ct.default_int32_array_field_name
|
||||
default_string_array_field_name = ct.default_string_array_field_name
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestStaticFieldNoIndexAllExpr")
|
||||
class TestStaticFieldNoIndexAllExpr(TestMilvusClientV2Base):
|
||||
"""
|
||||
Scalar fields are not indexed, and verify DQL requests
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
# init params
|
||||
self.collection_name = cf.gen_collection_name_by_testcase_name(module_index=1)
|
||||
self.enable_dynamic_field = False
|
||||
self.ground_truth = {}
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self, request):
|
||||
"""
|
||||
Initialize collection before test class runs
|
||||
"""
|
||||
# Get client connection
|
||||
client = self._client()
|
||||
|
||||
# Create collection
|
||||
# create schema
|
||||
schema = self.create_schema(client, enable_dynamic_field=self.enable_dynamic_field)[0]
|
||||
schema.add_field(ct.default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(ct.default_vector_field_name, DataType.FLOAT_VECTOR, dim=ct.default_dim)
|
||||
schema.add_field(ct.default_bool_field_name, DataType.BOOL, nullable=True)
|
||||
schema.add_field(ct.default_int8_field_name, DataType.INT8, nullable=True)
|
||||
schema.add_field(ct.default_int16_field_name, DataType.INT16, nullable=True)
|
||||
schema.add_field(ct.default_int32_field_name, DataType.INT32, nullable=True)
|
||||
schema.add_field(ct.default_int64_field_name, DataType.INT64, nullable=True)
|
||||
schema.add_field(ct.default_float_field_name, DataType.FLOAT, nullable=True)
|
||||
schema.add_field(ct.default_double_field_name, DataType.DOUBLE, nullable=True)
|
||||
schema.add_field(ct.default_string_field_name, DataType.VARCHAR, max_length=100, nullable=True)
|
||||
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=True)
|
||||
schema.add_field(ct.default_int8_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT8,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int16_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT16,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int32_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT32,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_int64_array_field_name, datatype=DataType.ARRAY, element_type=DataType.INT64,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_bool_array_field_name, datatype=DataType.ARRAY, element_type=DataType.BOOL,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_float_array_field_name, datatype=DataType.ARRAY, element_type=DataType.FLOAT,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_double_array_field_name, datatype=DataType.ARRAY, element_type=DataType.DOUBLE,
|
||||
max_capacity=5, nullable=True)
|
||||
schema.add_field(ct.default_string_array_field_name, datatype=DataType.ARRAY, element_type=DataType.VARCHAR,
|
||||
max_capacity=5, max_length=100, nullable=True)
|
||||
# prepare index params
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
|
||||
# create collection with the above schema and index params
|
||||
self.create_collection(client, self.collection_name, schema=schema,
|
||||
index_params=index_params, force_teardown=False)
|
||||
# Generate vectors and all scalar data
|
||||
vectors = cf.gen_vectors(default_nb + 60, default_dim)
|
||||
inserted_data_distribution = ct.get_all_kind_data_distribution
|
||||
nb_single = 50
|
||||
rows_list = []
|
||||
for i in range(len(inserted_data_distribution)):
|
||||
rows = [{ct.default_primary_key_field_name: j, ct.default_vector_field_name: vectors[j],
|
||||
ct.default_bool_field_name: bool(j) if (i % 2 == 0) else None,
|
||||
ct.default_int8_field_name: np.int8(j) if (i % 2 == 0) else None,
|
||||
ct.default_int16_field_name: np.int16(j) if (i % 2 == 0) else None,
|
||||
ct.default_int32_field_name: np.int32(j) if (i % 2 == 0) else None,
|
||||
ct.default_int64_field_name: j if (i % 2 == 0) else None,
|
||||
ct.default_float_field_name: j * 1.0 if (i % 2 == 0) else None,
|
||||
ct.default_double_field_name: j * 1.0 if (i % 2 == 0) else None,
|
||||
ct.default_string_field_name: f'{j}' if (i % 2 == 0) else None,
|
||||
ct.default_json_field_name: inserted_data_distribution[i],
|
||||
ct.default_int8_array_field_name: [np.int8(j), np.int8(j)] if (i % 2 == 0) else None,
|
||||
ct.default_int16_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int32_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_int64_array_field_name: [j, j + 1] if (i % 2 == 0) else None,
|
||||
ct.default_bool_array_field_name: [bool(j), bool(j + 1)] if (i % 2 == 0) else None,
|
||||
ct.default_float_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_double_array_field_name: [j * 1.0, (j + 1) * 1.0] if (i % 2 == 0) else None,
|
||||
ct.default_string_array_field_name: [f'{j}', f'{j + 1}'] if (i % 2 == 0) else None
|
||||
} for j in range(i * nb_single, (i + 1) * nb_single)]
|
||||
assert len(rows) == nb_single
|
||||
# insert
|
||||
self.insert(client, collection_name=self.collection_name, data=rows)
|
||||
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
|
||||
rows_list.extend(rows)
|
||||
assert len(rows_list) == nb_single * len(inserted_data_distribution)
|
||||
# calculated the ground truth for all fields with its supported expressions
|
||||
expr_fields = ct.all_expr_fields
|
||||
compare_dict = {}
|
||||
one_dict = {}
|
||||
for field in expr_fields:
|
||||
globals()[field] = rows_list[0][field]
|
||||
for field in expr_fields:
|
||||
express_list = cf.gen_field_expressions_all_single_operator_each_field(field)
|
||||
for i in range(len(express_list)):
|
||||
expression = express_list[i].replace("&&", "and").replace("||", "or")
|
||||
compare_dict.setdefault(field, {})
|
||||
one_dict.setdefault(f'{field}', [])
|
||||
compare_dict[field].setdefault(f'{i}', one_dict)
|
||||
compare_dict[field][f'{i}'].setdefault("id_list", [])
|
||||
for j in range(nb_single*len(inserted_data_distribution)):
|
||||
globals()[field] = rows_list[j][field]
|
||||
log.info("binbin_debug1")
|
||||
log.info(field)
|
||||
if (int8 is None) or (int16 is None) or (int32 is None) or (int64 is None)\
|
||||
or (float is None) or (double is None) or (varchar is None) or (bool_field is None)\
|
||||
or (int8_array is None) or (int16_array is None) or (int32_array is None) or (int64_array is None)\
|
||||
or (bool_array is None) or (float_array is None) or (double_array is None) or (string_array is None):
|
||||
if "is null" or "IS NULL" in expression:
|
||||
compare_dict[field][f'{i}'][field].append(rows_list[j][field])
|
||||
compare_dict[field][f'{i}']["id_list"].append(
|
||||
rows_list[j][ct.default_primary_key_field_name])
|
||||
continue
|
||||
else:
|
||||
if ("is not null" in expression) or ("IS NOT NULL" in expression):
|
||||
compare_dict[field][f'{i}'][field].append(rows_list[j][field])
|
||||
compare_dict[field][f'{i}']["id_list"].append(
|
||||
rows_list[j][ct.default_primary_key_field_name])
|
||||
continue
|
||||
if ("is null" in expression) or ("IS NULL" in expression):
|
||||
continue
|
||||
log.info("binbin_debug")
|
||||
log.info(expression)
|
||||
if not expression or eval(expression):
|
||||
compare_dict[field][f'{i}'][field].append(rows_list[j][field])
|
||||
compare_dict[field][f'{i}']["id_list"].append(rows_list[j][ct.default_primary_key_field_name])
|
||||
log.info("binbin_debug_2")
|
||||
# log.info(compare_dict)
|
||||
self.ground_truth = compare_dict
|
||||
# flush collection, segment sealed
|
||||
self.flush(client, self.collection_name)
|
||||
# load collection
|
||||
self.load_collection(client, self.collection_name)
|
||||
def teardown():
|
||||
self.drop_collection(self._client(), self.collection_name)
|
||||
|
||||
request.addfinalizer(teardown)
|
||||
|
||||
def check_query_res(self, res, expr_field: str) -> list:
|
||||
""" Ensure that primary key field values are unique """
|
||||
real_data = {x[0]: x[1] for x in zip(self.insert_data.get(self.primary_field),
|
||||
self.insert_data.get(expr_field))}
|
||||
|
||||
if len(real_data) != len(self.insert_data.get(self.primary_field)):
|
||||
log.warning("[TestNoIndexDQLExpr] The primary key values are not unique, " +
|
||||
"only check whether the res value is within the inserted data")
|
||||
return [(r.get(self.primary_field), r.get(expr_field)) for r in res if
|
||||
r.get(expr_field) not in self.insert_data.get(expr_field)]
|
||||
|
||||
return [(r[self.primary_field], r[expr_field], real_data[r[self.primary_field]]) for r in res if
|
||||
r[expr_field] != real_data[r[self.primary_field]]]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L3)
|
||||
@pytest.mark.parametrize("expr_field", ct.all_expr_fields)
|
||||
def test_milvus_client_query_all_field_type_all_data_distribution_all_expressions_array_all(self, expr_field):
|
||||
"""
|
||||
target: test query using expression fields with all supported field type after all supported scalar index
|
||||
with all supported basic expressions
|
||||
method: Query using expression on all supported fields after all scalar indexes with all supported basic expressions
|
||||
step: 1. create collection
|
||||
2. insert with different data distribution
|
||||
3. flush if specified
|
||||
4. query when there is no index applying on each field under all supported expressions
|
||||
5. release if specified
|
||||
6. prepare index params with all supported scalar index on all scalar fields
|
||||
7. create index
|
||||
8. create same index twice
|
||||
9. reload collection if released before to make sure the new index load successfully
|
||||
10. sleep for 60s to make sure the new index load successfully without release and reload operations
|
||||
11. query after there is index applying on each supported field under all supported expressions
|
||||
which should get the same result with that without index
|
||||
expected: query successfully after there is index applying on each supported field under all expressions which
|
||||
should get the same result with that without index
|
||||
"""
|
||||
client = self._client()
|
||||
|
||||
express_list = cf.gen_field_expressions_all_single_operator_each_field(expr_field)
|
||||
compare_dict = self.ground_truth[expr_field]
|
||||
for i in range(len(express_list)):
|
||||
expression = express_list[i]
|
||||
json_list = []
|
||||
id_list = []
|
||||
log.info(f"query with filter '{expression}' without scalar index is:")
|
||||
count = self.query(client, collection_name=self.collection_name, filter=expression,
|
||||
output_fields=["count(*)"])[0]
|
||||
log.info(f"The count(*) after query with filter '{expression}' without scalar index is: {count}")
|
||||
assert count == len(compare_dict[f'{i}']["id_list"])
|
||||
res = self.query(client, collection_name=self.collection_name, filter=expression,
|
||||
output_fields=[expr_field])[0]
|
||||
for single in res:
|
||||
id_list.append(single[f"{default_primary_key_field_name}"])
|
||||
json_list.append(single[expr_field])
|
||||
if len(json_list) != len(compare_dict[f'{i}'][expr_field]):
|
||||
log.debug(f"the field {expr_field} value without scalar index under expression '{expression}' is:")
|
||||
log.debug(json_list)
|
||||
log.debug(f"the field {expr_field} value without scalar index to be compared under expression '{expression}' is:")
|
||||
log.debug(compare_dict[f'{i}'][expr_field])
|
||||
assert json_list == compare_dict[f'{i}'][expr_field]
|
||||
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
|
||||
log.debug(f"primary key field {default_primary_key_field_name} without scalar index under expression '{expression}' is:")
|
||||
log.debug(id_list)
|
||||
log.debug(f"primary key field {default_primary_key_field_name} without scalar index to be compared under expression '{expression}' is:")
|
||||
log.debug(compare_dict[f'{i}']["id_list"])
|
||||
assert id_list == compare_dict[f'{i}']["id_list"]
|
||||
log.info(f"PASS with expression {expression}")
|
||||
|
||||
|
||||
|
||||
|
||||
@ -2750,8 +2750,13 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("nullable", [True, False])
|
||||
@pytest.mark.parametrize("is_flush", [True, False])
|
||||
@pytest.mark.parametrize("is_release", [True, False])
|
||||
@pytest.mark.parametrize("is_scalar_index", [True, False])
|
||||
@pytest.mark.parametrize("scalar_index_type", ["AUTOINDEX", "INVERTED", "BITMAP"])
|
||||
@pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"])
|
||||
def test_milvus_client_search_null_expr_array(self, nullable, null_expr_op):
|
||||
def test_milvus_client_search_null_expr_array(self, nullable, null_expr_op, is_flush, is_release,
|
||||
is_scalar_index, scalar_index_type):
|
||||
"""
|
||||
target: test search with null expression on array fields
|
||||
method: create connection, collection, insert and search
|
||||
@ -2759,7 +2764,7 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
|
||||
"""
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
dim = 5
|
||||
dim = 128
|
||||
# 1. create collection
|
||||
nullable_field_name = "nullable_field"
|
||||
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
||||
@ -2771,6 +2776,8 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
|
||||
max_length=64, nullable=nullable)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
||||
if is_scalar_index:
|
||||
index_params.add_index(nullable_field_name, index_type=scalar_index_type)
|
||||
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
|
||||
# 2. insert
|
||||
rng = np.random.default_rng(seed=19530)
|
||||
@ -2781,6 +2788,18 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
|
||||
rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]),
|
||||
default_string_field_name: str(i), "nullable_field": [1, 2]} for i in range(default_nb)]
|
||||
self.insert(client, collection_name, rows)
|
||||
if is_flush:
|
||||
self.flush(client, collection_name)
|
||||
if is_release:
|
||||
self.release_collection(client, collection_name)
|
||||
self.drop_index(client, collection_name, default_vector_field_name)
|
||||
self.drop_index(client, collection_name, nullable_field_name)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(default_vector_field_name, metric_type="COSINE")
|
||||
if is_scalar_index:
|
||||
index_params.add_index(nullable_field_name, index_type=scalar_index_type)
|
||||
self.create_index(client, collection_name, index_params)
|
||||
self.load_collection(client, collection_name)
|
||||
# 3. search
|
||||
vectors_to_search = rng.random((1, dim))
|
||||
insert_ids = [str(i) for i in range(default_nb)]
|
||||
@ -2789,7 +2808,6 @@ class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base):
|
||||
if "not" in null_expr or "NOT" in null_expr:
|
||||
insert_ids = []
|
||||
limit = 0
|
||||
|
||||
else:
|
||||
limit = default_limit
|
||||
else:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user