From eecf229b59239ceb620d13c5459b8d5c8fb84874 Mon Sep 17 00:00:00 2001 From: nico <109071306+NicoYuan1986@users.noreply.github.com> Date: Fri, 21 Jul 2023 18:38:59 +0800 Subject: [PATCH] Add test cases of json contain and binary index (#25808) Signed-off-by: nico --- tests/python_client/testcases/test_index.py | 20 +++ tests/python_client/testcases/test_query.py | 125 +++++++++++++++++++ tests/python_client/testcases/test_search.py | 123 ++++++++++++++++-- 3 files changed, 260 insertions(+), 8 deletions(-) diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 33d72509b1..bfb55d52bc 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -1156,6 +1156,26 @@ class TestNewIndexBinary(TestcaseBase): check_items={ct.err_code: 1, ct.err_msg: "Invalid metric_type: L2, which does not match the index type: BIN_IVF_FLAT"}) + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE", "JACCARD", "HAMMING", "TANIMOTO"]) + def test_create_binary_index_HNSW(self, metric_type): + """ + target: test create binary index hnsw + method: create binary index hnsw + expected: succeed + """ + c_name = cf.gen_unique_str(prefix) + collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema) + binary_index_params = {'index_type': 'HNSW', "M": '18', "efConstruction": '240', 'metric_type': metric_type} + if metric_type == "TANIMOTO": + collection_w.create_index(default_binary_vec_field_name, binary_index_params, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, + ct.err_msg: "metric type not found or not supported"}) + else: + collection_w.create_index(default_binary_vec_field_name, binary_index_params) + assert collection_w.index()[0].params == binary_index_params + """ ****************************************************************** The following cases are used to test `drop_index` function diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index e37dd1708e..8c8db08bcb 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -556,6 +556,131 @@ class TestQueryParams(TestcaseBase): term_expr = f'{ct.default_int64_field_name} in [{constant}]' collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) + @pytest.mark.tags(CaseLabel.L1) + def test_query_expr_json_contains(self, enable_dynamic_field): + """ + target: test query with expression using json_contains + method: query with expression using json_contains + expected: succeed + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0] + + # 2. insert data + limit = 99 + array = [] + for i in range(ct.default_nb): + data = { + ct.default_int64_field_name: i, + ct.default_float_field_name: i * 1.0, + ct.default_string_field_name: str(i), + ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]}, + ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0] + } + array.append(data) + collection_w.insert(array) + + # 3. query + collection_w.load() + expressions = ["json_contains(json_field['list'], 1000)", "JSON_CONTAINS(json_field['list'], 1000)"] + for expression in expressions: + res = collection_w.query(expression)[0] + assert len(res) == limit + + @pytest.mark.tags(CaseLabel.L2) + def test_query_expr_list_json_contains(self): + """ + target: test query with expression using json_contains + method: query with expression using json_contains + expected: succeed + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix, enable_dynamic_field=True)[0] + + # 2. insert data + limit = ct.default_nb // 4 + array = [] + for i in range(ct.default_nb): + data = { + ct.default_int64_field_name: i, + ct.default_json_field_name: [str(m) for m in range(i, i + limit)], + ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0] + } + array.append(data) + collection_w.insert(array) + + # 3. query + collection_w.load() + expressions = ["json_contains(json_field, '1000')", "JSON_CONTAINS(json_field, '1000')"] + for expression in expressions: + res = collection_w.query(expression, output_fields=["count(*)"])[0] + assert res[0]["count(*)"] == limit + + @pytest.mark.tags(CaseLabel.L2) + def test_query_expr_json_contains_combined_with_normal(self, enable_dynamic_field): + """ + target: test query with expression using json_contains + method: query with expression using json_contains + expected: succeed + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0] + + # 2. insert data + limit = ct.default_nb // 3 + array = [] + for i in range(ct.default_nb): + data = { + ct.default_int64_field_name: i, + ct.default_float_field_name: i * 1.0, + ct.default_string_field_name: str(i), + ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]}, + ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0] + } + array.append(data) + collection_w.insert(array) + + # 3. query + collection_w.load() + tar = 1000 + expressions = [f"json_contains(json_field['list'], {tar}) && float > {tar - limit // 2}", + f"JSON_CONTAINS(json_field['list'], {tar}) && float > {tar - limit // 2}"] + for expression in expressions: + res = collection_w.query(expression)[0] + assert len(res) == limit // 2 + + @pytest.mark.tags(CaseLabel.L2) + def test_query_expr_json_contains_pagination(self, enable_dynamic_field): + """ + target: test query with expression using json_contains + method: query with expression using json_contains + expected: succeed + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0] + + # 2. insert data + limit = ct.default_nb // 3 + array = [] + for i in range(ct.default_nb): + data = { + ct.default_int64_field_name: i, + ct.default_float_field_name: i * 1.0, + ct.default_string_field_name: str(i), + ct.default_json_field_name: {"number": i, "list": [m for m in range(i, i + limit)]}, + ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0] + } + array.append(data) + collection_w.insert(array) + + # 3. query + collection_w.load() + expressions = ["json_contains(json_field['list'], 1000)", "JSON_CONTAINS(json_field['list'], 1000)"] + offset = random.randint(1, limit) + for expression in expressions: + res = collection_w.query(expression, limit=limit, offset=offset)[0] + assert len(res) == limit - offset + @pytest.mark.tags(CaseLabel.L1) def test_query_output_field_none_or_empty(self, enable_dynamic_field): """ diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index f3d7a040b3..5aac6ebd1f 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -40,6 +40,7 @@ default_int64_field_name = ct.default_int64_field_name default_float_field_name = ct.default_float_field_name default_bool_field_name = ct.default_bool_field_name default_string_field_name = ct.default_string_field_name +default_json_field_name = ct.default_json_field_name default_index_params = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}} vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] range_search_supported_index = ct.all_index_types[:6] @@ -3028,6 +3029,112 @@ class TestCollectionSearch(TestcaseBase): ids = hits.ids assert set(ids).issubset(filter_ids_set) + @pytest.mark.tags(CaseLabel.L1) + def test_search_with_expression_json_contains(self, enable_dynamic_field): + """ + target: test search with expression using json_contains + method: search with expression (json_contains) + expected: search successfully + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0] + + # 2. insert data + array = [] + for i in range(default_nb): + data = { + default_int64_field_name: i, + default_float_field_name: i*1.0, + default_string_field_name: str(i), + default_json_field_name: {"number": i, "list": [i, i+1, i+2]}, + default_float_vec_field_name: gen_vectors(1, default_dim)[0] + } + array.append(data) + collection_w.insert(array) + + # 2. search + collection_w.load() + log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name) + expressions = ["json_contains(json_field['list'], 100)", "JSON_CONTAINS(json_field['list'], 100)"] + for expression in expressions: + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, expression, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "limit": 3}) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_with_expression_json_contains_list(self, auto_id): + """ + target: test search with expression using json_contains + method: search with expression (json_contains) + expected: search successfully + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix, auto_id=auto_id, enable_dynamic_field=True)[0] + + # 2. insert data + limit = 100 + array = [] + for i in range(default_nb): + data = { + default_int64_field_name: i, + default_json_field_name: [j for j in range(i, i + limit)], + default_float_vec_field_name: gen_vectors(1, default_dim)[0] + } + if auto_id: + data.pop(default_int64_field_name, None) + array.append(data) + collection_w.insert(array) + + # 2. search + collection_w.load() + log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name) + expressions = ["json_contains(json_field, 100)", "JSON_CONTAINS(json_field, 100)"] + for expression in expressions: + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, limit, expression, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + def test_search_expression_json_contains_combined_with_normal(self, enable_dynamic_field): + """ + target: test search with expression using json_contains + method: search with expression (json_contains) + expected: search successfully + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix, enable_dynamic_field=enable_dynamic_field)[0] + + # 2. insert data + limit = 100 + array = [] + for i in range(default_nb): + data = { + default_int64_field_name: i, + default_float_field_name: i * 1.0, + default_string_field_name: str(i), + default_json_field_name: {"number": i, "list": [str(j) for j in range(i, i + limit)]}, + default_float_vec_field_name: gen_vectors(1, default_dim)[0] + } + array.append(data) + collection_w.insert(array) + + # 2. search + collection_w.load() + log.info("test_search_with_output_field_json_contains: Searching collection %s" % collection_w.name) + tar = 1000 + expressions = [f"json_contains(json_field['list'], '{tar}') && int64 > {tar - limit // 2}", + f"JSON_CONTAINS(json_field['list'], '{tar}') && int64 > {tar - limit // 2}"] + for expression in expressions: + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, limit, expression, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "limit": limit // 2}) + @pytest.mark.tags(CaseLabel.L2) def test_search_expression_all_data_type(self, nb, nq, dim, auto_id, _async, enable_dynamic_field): """ @@ -3289,8 +3396,7 @@ class TestCollectionSearch(TestcaseBase): "output_fields": [field_name]}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="issue #23661") - @pytest.mark.parametrize("index", ct.all_index_types[6:8]) + @pytest.mark.parametrize("index", ["HNSW", "BIN_FLAT", "BIN_IVF_FLAT"]) def test_search_output_field_vector_after_binary_index(self, index): """ target: test search with output vector field after binary index @@ -3306,19 +3412,20 @@ class TestCollectionSearch(TestcaseBase): collection_w.insert(data) # 2. create index and load - default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "JACCARD"} + default_index = {"index_type": index, "metric_type": "JACCARD", + "params": {"nlist": 128, "efConstruction": 64, "M": 10}} collection_w.create_index(binary_field_name, default_index) collection_w.load() # 3. search with output field vector - search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} + search_params = {"metric_type": "JACCARD"} binary_vectors = cf.gen_binary_vectors(1, default_dim)[1] res = collection_w.search(binary_vectors, binary_field_name, - ct.default_search_binary_params, 2, default_search_exp, + search_params, 2, default_search_exp, output_fields=[binary_field_name])[0] # 4. check the result vectors should be equal to the inserted - assert res[0][0].entity.binary_vector == data[binary_field_name][res[0][0].id] + assert res[0][0].entity.binary_vector == [data[binary_field_name][res[0][0].id]] @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("dim", [32, 128, 768]) @@ -5037,8 +5144,8 @@ class TestSearchPagination(TestcaseBase): default_search_exp, _async=_async, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, - "limit": limit, - "_async": _async})[0] + "limit": limit, + "_async": _async})[0] # 3. search with offset+limit res = collection_w.search(vectors[:default_nq], default_search_field, default_search_params, limit+offset, default_search_exp, _async=_async)[0]