From a47bb93c582b596af4d0693ee35b310ea5171992 Mon Sep 17 00:00:00 2001 From: wt Date: Thu, 3 Apr 2025 14:36:22 +0800 Subject: [PATCH] test: [cp] add more varchar expressions for query (#41083) pr: https://github.com/milvus-io/milvus/pull/39122 Signed-off-by: wangting0128 --- tests/python_client/common/common_func.py | 33 ++ .../testcases/test_mix_scenes.py | 282 ++++++++++++++---- 2 files changed, 257 insertions(+), 58 deletions(-) diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index f0a09349fa..eea6636783 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -2591,6 +2591,26 @@ def gen_modulo_expression(expr_fields): return exprs +def count_match_expr(values_l: list, rex_l: str, op: str, values_r: list, rex_r: str) -> list: + if len(values_l) != len(values_r): + raise ValueError(f"[count_match_expr] values not equal: {len(values_l)} != {len(values_r)}") + + res = [] + if op in ['and', '&&']: + for i in range(len(values_l)): + if re.search(rex_l, values_l[i]) and re.search(rex_r, values_r[i]): + res.append(i) + + elif op in ['or', '||']: + for i in range(len(values_l)): + if re.search(rex_l, values_l[i]) or re.search(rex_r, values_r[i]): + res.append(i) + + else: + raise ValueError(f"[count_match_expr] Not support op: {op}") + return res + + def gen_varchar_expression(expr_fields): exprs = [] for field in expr_fields: @@ -2605,6 +2625,19 @@ def gen_varchar_expression(expr_fields): return exprs +def gen_varchar_operation(expr_fields): + exprs = [] + for field in expr_fields: + exprs.extend([ + (Expr.EQ(field, '"a"').value, field, r'a'), + (Expr.GT(field, '"a"').value, field, r'[^a]'), + (Expr.GE(field, '"a"').value, field, r'.*'), + (Expr.LT(field, '"z"').value, field, r'[^z]'), + (Expr.LE(field, '"z"').value, field, r'.*') + ]) + return exprs + + def gen_varchar_unicode_expression(expr_fields): exprs = [] for field in expr_fields: diff --git a/tests/python_client/testcases/test_mix_scenes.py b/tests/python_client/testcases/test_mix_scenes.py index b9cf546fe5..081eb13d9a 100644 --- a/tests/python_client/testcases/test_mix_scenes.py +++ b/tests/python_client/testcases/test_mix_scenes.py @@ -41,7 +41,8 @@ class TestNoIndexDQLExpr(TestCaseClassBase): name=cf.gen_unique_str("test_no_index_dql_expr"), schema=cf.set_collection_schema( fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, - DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields], + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, + 'VARCHAR_1', *self().all_scalar_fields], field_params={ self.primary_field: FieldParams(is_primary=True).to_dict, DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict, @@ -52,7 +53,9 @@ class TestNoIndexDQLExpr(TestCaseClassBase): ) # prepare data (> 1024 triggering index building) - self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb) + self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={ + 'VARCHAR_1': cf.gen_varchar_data(1, self.nb) + }) @pytest.fixture(scope="class", autouse=True) def prepare_data(self): @@ -89,7 +92,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase): return [(r[self.primary_field], r[expr_field], real_data[r[self.primary_field]]) for r in res if r[expr_field] != real_data[r[self.primary_field]]] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expr, output_fields", [ (Expr.In(Expr.MOD('INT8', 13).subset, [0, 1, 2]).value, ['INT8']), (Expr.Nin(Expr.MOD('INT16', 100).subset, [10, 20, 30, 40]).value, ['INT16']), @@ -134,8 +137,9 @@ class TestNoIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", + cf.gen_varchar_expression(['VARCHAR']) + cf.gen_varchar_operation(['VARCHAR_1'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) def test_no_index_query_with_string(self, expr, expr_field, limit, rex): """ @@ -158,7 +162,37 @@ class TestNoIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr_l, expr_field_l, rex_l", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("expr_r, expr_field_r, rex_r", cf.gen_varchar_operation(['VARCHAR_1'])) + @pytest.mark.parametrize("expr_obj, op", [(Expr.AND, 'and'), (Expr.OR, 'or')]) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_no_index_query_with_mix_string( + self, expr_l, expr_field_l, rex_l, expr_r, expr_field_r, rex_r, expr_obj, op, limit): + """ + target: + 1. check mix string fields expression + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len(cf.count_match_expr(self.insert_data.get(expr_field_l, []), rex_l, op, + self.insert_data.get(expr_field_r, []), rex_r)) + + # query + res, _ = self.collection_wrap.query(expr=expr_obj(f"({expr_l})", f"({expr_r})").value, limit=limit, + output_fields=[expr_field_l, expr_field_r]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + # check query response data + assert self.check_query_res(res=res, expr_field=expr_field_l) == [] + assert self.check_query_res(res=res, expr_field=expr_field_r) == [] + + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize( "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -183,7 +217,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)]) @pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64']) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -247,7 +281,8 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): # init params self.primary_field, self.nb = "int64_pk", 10000 self.all_fields = [self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, - DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields] + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, + 'VARCHAR_1', *self().all_scalar_fields] # create a collection with fields self.collection_wrap.init_collection( @@ -266,6 +301,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): # prepare data (> 1024 triggering index building) self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={ 'VARCHAR': cf.gen_varchar_data(3, self.nb), + 'VARCHAR_1': cf.gen_varchar_data(1, self.nb), 'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)] }) @@ -284,7 +320,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): **DefaultVectorIndexParams.SPARSE_INVERTED_INDEX(DataType.SPARSE_FLOAT_VECTOR.name), **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name), # build Hybrid index - **DefaultScalarIndexParams.list_default([self.primary_field] + self.all_index_scalar_fields) + **DefaultScalarIndexParams.list_default([self.primary_field, 'VARCHAR_1'] + self.all_index_scalar_fields) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -332,8 +368,9 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", + cf.gen_varchar_expression(['VARCHAR']) + cf.gen_varchar_operation(['VARCHAR_1'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) def test_hybrid_index_query_with_string(self, expr, expr_field, limit, rex): """ @@ -356,7 +393,37 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr_l, expr_field_l, rex_l", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("expr_r, expr_field_r, rex_r", cf.gen_varchar_operation(['VARCHAR_1'])) + @pytest.mark.parametrize("expr_obj, op", [(Expr.AND, 'and'), (Expr.OR, 'or')]) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_hybrid_index_query_with_mix_string( + self, expr_l, expr_field_l, rex_l, expr_r, expr_field_r, rex_r, expr_obj, op, limit): + """ + target: + 1. check mix string fields expression + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len(cf.count_match_expr(self.insert_data.get(expr_field_l, []), rex_l, op, + self.insert_data.get(expr_field_r, []), rex_r)) + + # query + res, _ = self.collection_wrap.query(expr=expr_obj(f"({expr_l})", f"({expr_r})").value, limit=limit, + output_fields=[expr_field_l, expr_field_r]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + # check query response data + assert self.check_query_res(res=res, expr_field=expr_field_l) == [] + assert self.check_query_res(res=res, expr_field=expr_field_r) == [] + + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize( "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -381,7 +448,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)]) @pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64']) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -427,7 +494,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)]) @pytest.mark.parametrize("limit", [1, 10, 3000]) @pytest.mark.parametrize("expr_field", ['VARCHAR']) @@ -473,7 +540,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("length", [0, 5, 11]) @pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH]) @pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR']) @@ -513,7 +580,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase): self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": self.nb}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_hybrid_index_search_output_fields(self): """ target: @@ -550,7 +617,8 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): # init params self.primary_field, self.nb = "int64_pk", 10000 self.all_fields = [self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, - DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields] + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, + 'VARCHAR_1', *self().all_scalar_fields] # create a collection with fields self.collection_wrap.init_collection( @@ -569,6 +637,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): # prepare data (> 1024 triggering index building) self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={ 'VARCHAR': cf.gen_varchar_data(3, self.nb), + 'VARCHAR_1': cf.gen_varchar_data(1, self.nb), 'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)] }) @@ -587,7 +656,8 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): **DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name), **DefaultVectorIndexParams.BIN_FLAT(DataType.BINARY_VECTOR.name), # build INVERTED index - **DefaultScalarIndexParams.list_inverted([self.primary_field] + self.inverted_support_dtype_names) + **DefaultScalarIndexParams.list_inverted( + [self.primary_field, 'VARCHAR_1'] + self.inverted_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -635,8 +705,9 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", + cf.gen_varchar_expression(['VARCHAR']) + cf.gen_varchar_operation(['VARCHAR_1'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) def test_inverted_index_query_with_string(self, expr, expr_field, limit, rex): """ @@ -659,7 +730,37 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr_l, expr_field_l, rex_l", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("expr_r, expr_field_r, rex_r", cf.gen_varchar_operation(['VARCHAR_1'])) + @pytest.mark.parametrize("expr_obj, op", [(Expr.AND, 'and'), (Expr.OR, 'or')]) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_inverted_index_query_with_mix_string( + self, expr_l, expr_field_l, rex_l, expr_r, expr_field_r, rex_r, expr_obj, op, limit): + """ + target: + 1. check mix string fields expression + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len(cf.count_match_expr(self.insert_data.get(expr_field_l, []), rex_l, op, + self.insert_data.get(expr_field_r, []), rex_r)) + + # query + res, _ = self.collection_wrap.query(expr=expr_obj(f"({expr_l})", f"({expr_r})").value, limit=limit, + output_fields=[expr_field_l, expr_field_r]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + # check query response data + assert self.check_query_res(res=res, expr_field=expr_field_l) == [] + assert self.check_query_res(res=res, expr_field=expr_field_r) == [] + + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize( "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -684,7 +785,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)]) @pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64']) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -730,7 +831,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)]) @pytest.mark.parametrize("limit", [1, 10, 3000]) @pytest.mark.parametrize("expr_field", ['VARCHAR']) @@ -776,7 +877,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("length", [0, 5, 11]) @pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH]) @pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR']) @@ -818,7 +919,8 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): # init params self.primary_field, self.nb = "int64_pk", 10000 self.all_fields = [self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name, - DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields] + DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, + "VARCHAR_1", *self().all_scalar_fields] # create a collection with fields self.collection_wrap.init_collection( @@ -837,6 +939,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): # prepare data (> 1024 triggering index building) self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={ 'VARCHAR': cf.gen_varchar_data(3, self.nb), + 'VARCHAR_1': cf.gen_varchar_data(1, self.nb), 'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)] }) @@ -855,7 +958,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): **DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name), **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name), # build BITMAP index - **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(["VARCHAR_1"] + self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -927,8 +1030,9 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", + cf.gen_varchar_expression(['VARCHAR']) + cf.gen_varchar_operation(['VARCHAR_1'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) def test_bitmap_index_query_with_string(self, expr, expr_field, limit, rex): """ @@ -951,7 +1055,37 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr_l, expr_field_l, rex_l", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("expr_r, expr_field_r, rex_r", cf.gen_varchar_operation(['VARCHAR_1'])) + @pytest.mark.parametrize("expr_obj, op", [(Expr.AND, 'and'), (Expr.OR, 'or')]) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_bitmap_index_query_with_mix_string( + self, expr_l, expr_field_l, rex_l, expr_r, expr_field_r, rex_r, expr_obj, op, limit): + """ + target: + 1. check mix string fields expression + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len(cf.count_match_expr(self.insert_data.get(expr_field_l, []), rex_l, op, + self.insert_data.get(expr_field_r, []), rex_r)) + + # query + res, _ = self.collection_wrap.query(expr=expr_obj(f"({expr_l})", f"({expr_r})").value, limit=limit, + output_fields=[expr_field_l, expr_field_r]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + # check query response data + assert self.check_query_res(res=res, expr_field=expr_field_l) == [] + assert self.check_query_res(res=res, expr_field=expr_field_r) == [] + + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize( "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -976,7 +1110,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)]) @pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64']) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -1022,7 +1156,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)]) @pytest.mark.parametrize("limit", [1, 10, 3000]) @pytest.mark.parametrize("expr_field", ['VARCHAR']) @@ -1068,7 +1202,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("length", [0, 5, 11]) @pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH]) @pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR']) @@ -1108,7 +1242,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": self.nb}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("limit", [10, 1000]) @pytest.mark.parametrize("group_by_field", ['INT8', 'INT16', 'INT32', 'INT64', 'BOOL', 'VARCHAR']) @pytest.mark.parametrize( @@ -1138,7 +1272,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): assert len(values) == len(set(values)), f"values: {values}, output_values:{output_values}" - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("batch_size", [10, 1000]) def test_bitmap_index_search_iterator(self, batch_size): """ @@ -1156,7 +1290,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size, expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_index_search_output_fields(self): """ target: @@ -1175,7 +1309,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase): check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit, "output_fields": self.all_fields}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_index_hybrid_search(self): """ target: @@ -1228,7 +1362,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): # init params self.primary_field, self.nb = "int64_pk", 3000 - self.all_fields = [self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields] + self.all_fields = [self.primary_field, DataType.FLOAT_VECTOR.name, 'VARCHAR_1', *self().all_scalar_fields] # create a collection with fields self.collection_wrap.init_collection( @@ -1244,6 +1378,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): # prepare data (> 1024 triggering index building) self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={ 'VARCHAR': cf.gen_varchar_data(3, self.nb), + 'VARCHAR_1': cf.gen_varchar_data(1, self.nb), 'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)] }) @@ -1258,13 +1393,13 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): index_params = { **DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name), # build BITMAP index - **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(['VARCHAR_1'] + self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) # enable offset cache - for index_name in self.bitmap_support_dtype_names: + for index_name in ['VARCHAR_1'] + self.bitmap_support_dtype_names: self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.index_offset_cache()) # load collection @@ -1284,7 +1419,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): return [(r[self.primary_field], r[expr_field], real_data[r[self.primary_field]]) for r in res if r[expr_field] != real_data[r[self.primary_field]]] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64'])) @pytest.mark.parametrize("limit", [1, 10]) def test_bitmap_offset_cache_query_with_modulo(self, expr, expr_field, limit): @@ -1309,8 +1444,9 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr, expr_field, rex", + cf.gen_varchar_expression(['VARCHAR']) + cf.gen_varchar_operation(['VARCHAR_1'])) @pytest.mark.parametrize("limit", [1, 10]) def test_bitmap_offset_cache_query_with_string(self, expr, expr_field, limit, rex): """ @@ -1333,7 +1469,37 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("expr_l, expr_field_l, rex_l", cf.gen_varchar_expression(['VARCHAR'])) + @pytest.mark.parametrize("expr_r, expr_field_r, rex_r", cf.gen_varchar_operation(['VARCHAR_1'])) + @pytest.mark.parametrize("expr_obj, op", [(Expr.AND, 'and'), (Expr.OR, 'or')]) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_bitmap_offset_cache_query_with_mix_string( + self, expr_l, expr_field_l, rex_l, expr_r, expr_field_r, rex_r, expr_obj, op, limit): + """ + target: + 1. check mix string fields expression + method: + 1. prepare some data + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len(cf.count_match_expr(self.insert_data.get(expr_field_l, []), rex_l, op, + self.insert_data.get(expr_field_r, []), rex_r)) + + # query + res, _ = self.collection_wrap.query(expr=expr_obj(f"({expr_l})", f"({expr_r})").value, limit=limit, + output_fields=[expr_field_l, expr_field_r]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + # check query response data + assert self.check_query_res(res=res, expr_field=expr_field_l) == [] + assert self.check_query_res(res=res, expr_field=expr_field_r) == [] + + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize( "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) @pytest.mark.parametrize("limit", [1, 10]) @@ -1358,7 +1524,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)]) @pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64']) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -1404,7 +1570,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)]) @pytest.mark.parametrize("limit", [1, 10, 3000]) @pytest.mark.parametrize("expr_field", ['VARCHAR']) @@ -1450,7 +1616,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("length", [0, 5, 11]) @pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH]) @pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR']) @@ -1474,7 +1640,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": expr_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_offset_cache_query_count(self): """ target: @@ -1490,7 +1656,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": self.nb}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_offset_cache_search_output_fields(self): """ target: @@ -1509,7 +1675,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase): check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit, "output_fields": self.all_fields}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_offset_cache_hybrid_search(self): """ target: @@ -1609,7 +1775,7 @@ class TestBitmapIndexMmap(TestCaseClassBase): return [(r[self.primary_field], r[expr_field], real_data[r[self.primary_field]]) for r in res if r[expr_field] != real_data[r[self.primary_field]]] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64'])) @pytest.mark.parametrize("limit", [1, 10]) def test_bitmap_mmap_query_with_modulo(self, expr, expr_field, limit): @@ -1634,7 +1800,7 @@ class TestBitmapIndexMmap(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR'])) @pytest.mark.parametrize("limit", [1, 10]) def test_bitmap_mmap_query_with_string(self, expr, expr_field, limit, rex): @@ -1658,7 +1824,7 @@ class TestBitmapIndexMmap(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize( "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE'])) @pytest.mark.parametrize("limit", [1, 10]) @@ -1683,7 +1849,7 @@ class TestBitmapIndexMmap(TestCaseClassBase): # check query response data assert self.check_query_res(res=res, expr_field=expr_field) == [] - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)]) @pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64']) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -1729,7 +1895,7 @@ class TestBitmapIndexMmap(TestCaseClassBase): check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": not_in_count}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_mmap_query_count(self): """ target: @@ -1745,7 +1911,7 @@ class TestBitmapIndexMmap(TestCaseClassBase): self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results, check_items={"exp_res": [{"count(*)": self.nb}]}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_mmap_search_output_fields(self): """ target: @@ -1764,7 +1930,7 @@ class TestBitmapIndexMmap(TestCaseClassBase): check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit, "output_fields": self.all_fields}) - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) def test_bitmap_mmap_hybrid_search(self): """ target: @@ -1863,7 +2029,7 @@ class TestIndexUnicodeString(TestCaseClassBase): # load collection self.collection_wrap.load() - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_unicode_expression(['VARCHAR_BITMAP', 'VARCHAR_INVERTED'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -1885,7 +2051,7 @@ class TestIndexUnicodeString(TestCaseClassBase): res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" - @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("obj", cf.gen_varchar_unicode_expression_array( ['ARRAY_VARCHAR_BITMAP', 'ARRAY_VARCHAR_INVERTED', 'ARRAY_VARCHAR_NoIndex'])) @pytest.mark.parametrize("limit", [1, 10, 3000])