From 345538d10a4f9860875fefc4038ec64444e60f36 Mon Sep 17 00:00:00 2001 From: binbin <83755740+binbinlv@users.noreply.github.com> Date: Mon, 10 Mar 2025 14:30:07 +0800 Subject: [PATCH] test: Add test cases for part of json path index (#40445) Signed-off-by: binbin lv --- tests/python_client/base/client_v2_base.py | 206 +++- tests/python_client/check/func_check.py | 34 + tests/python_client/common/common_type.py | 2 + .../test_milvus_client_collection.py | 4 +- .../test_milvus_client_compact.py | 173 +++ .../test_milvus_client_database.py | 3 +- .../test_milvus_client_hybrid_search.py | 392 +++++++ .../milvus_client/test_milvus_client_index.py | 795 ++++++++++++- .../test_milvus_client_insert.py | 33 + .../test_milvus_client_search.py | 1015 ++++++++++++++++- tests/python_client/requirements.txt | 4 +- tests/python_client/testcases/test_index.py | 8 +- tests/python_client/testcases/test_search.py | 1 - 13 files changed, 2629 insertions(+), 41 deletions(-) create mode 100644 tests/python_client/milvus_client/test_milvus_client_compact.py create mode 100644 tests/python_client/milvus_client/test_milvus_client_hybrid_search.py diff --git a/tests/python_client/base/client_v2_base.py b/tests/python_client/base/client_v2_base.py index 2ecd6d5e9e..7e37acfddc 100644 --- a/tests/python_client/base/client_v2_base.py +++ b/tests/python_client/base/client_v2_base.py @@ -144,6 +144,7 @@ class TestMilvusClientV2Base(Base): timeout=None, check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name res, check = api_request([client.search, collection_name, data, filter, limit, output_fields, search_params], **kwargs) @@ -153,11 +154,26 @@ class TestMilvusClientV2Base(Base): **kwargs).run() return res, check_result + @trace() + def hybrid_search(self, client, collection_name, reqs, ranker, limit=10, output_fields=None, timeout=None, + check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.hybrid_search, collection_name, reqs, ranker, limit, + output_fields], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + collection_name=collection_name, reqs=reqs, ranker=ranker, limit=limit, + output_fields=output_fields, **kwargs).run() + return res, check_result + @trace() def search_iterator(self, client, collection_name, data, batch_size, limit=-1, filter=None, output_fields=None, search_params=None, timeout=None, check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name res, check = api_request([client.search_iterator, collection_name, data, batch_size, filter, limit, output_fields, search_params], **kwargs) @@ -754,6 +770,19 @@ class TestMilvusClientV2Base(Base): object_name=object_name, db_name=db_name, **kwargs).run() return res, check_result + @trace() + def grant_privilege_v2(self, client, role_name, privilege, collection_name, db_name=None, + timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.grant_privilege_v2, role_name, privilege, collection_name, + db_name], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + role_name=role_name, privilege=privilege, + collection_name=collection_name, db_name=db_name, **kwargs).run() + return res, check_result + @trace() def revoke_privilege(self, client, role_name, object_type, privilege, object_name, db_name="", timeout=None, check_task=None, check_items=None, **kwargs): @@ -767,37 +796,89 @@ class TestMilvusClientV2Base(Base): object_name=object_name, db_name=db_name, **kwargs).run() return res, check_result - def create_privilege_group(self, client, privilege_group: str, check_task=None, check_items=None, **kwargs): + @trace() + def create_privilege_group(self, client, privilege_group: str, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name res, check = api_request([client.create_privilege_group, privilege_group], **kwargs) - check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + privilege_group=privilege_group, **kwargs).run() return res, check_result - def drop_privilege_group(self, client, privilege_group: str, check_task=None, check_items=None, **kwargs): + @trace() + def drop_privilege_group(self, client, privilege_group: str, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name res, check = api_request([client.drop_privilege_group, privilege_group], **kwargs) - check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + privilege_group=privilege_group, **kwargs).run() return res, check_result - def list_privilege_groups(self, client, check_task=None, check_items=None, **kwargs): + @trace() + def list_privilege_groups(self, client, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name res, check = api_request([client.list_privilege_groups], **kwargs) check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() return res, check_result - def add_privileges_to_group(self, client, privilege_group: str, privileges: list, check_task=None, check_items=None, - **kwargs): + @trace() + def add_privileges_to_group(self, client, privilege_group: str, privileges: list, timeout=None, + check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name res, check = api_request([client.add_privileges_to_group, privilege_group, privileges], **kwargs) - check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + privilege_group=privilege_group, privileges=privileges, **kwargs).run() return res, check_result - def remove_privileges_from_group(self, client, privilege_group: str, privileges: list, check_task=None, check_items=None, - **kwargs): + @trace() + def remove_privileges_from_group(self, client, privilege_group: str, privileges: list, timeout=None, + check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name res, check = api_request([client.remove_privileges_from_group, privilege_group, privileges], **kwargs) - check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + privilege_group=privilege_group, privileges=privileges, **kwargs).run() + return res, check_result + + @trace() + def grant_privilege_v2(self, client, role_name: str, privilege: str, collection_name: str, db_name=None, timeout=None, + check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.grant_privilege_v2, role_name, privilege, collection_name, db_name], + **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + role_name=role_name, privilege=privilege, + collection_name=collection_name, db_name=db_name, **kwargs).run() + return res, check_result + + @trace() + def revoke_privilege_v2(self, client, role_name: str, privilege: str, collection_name: str, db_name=None, + timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.revoke_privilege_v2, role_name, privilege, collection_name, db_name], + **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + role_name=role_name, privilege=privilege, + collection_name=collection_name, db_name=db_name, **kwargs).run() return res, check_result @trace() @@ -849,7 +930,7 @@ class TestMilvusClientV2Base(Base): @trace() def alter_collection_field(self, client, collection_name, field_name, field_params, timeout=None, - check_task=None, check_items=None, **kwargs): + check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name @@ -860,7 +941,7 @@ class TestMilvusClientV2Base(Base): @trace() def alter_database_properties(self, client, db_name, properties, timeout=None, - check_task=None, check_items=None, **kwargs): + check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout kwargs.update({"timeout": timeout}) @@ -871,7 +952,7 @@ class TestMilvusClientV2Base(Base): @trace() def drop_database_properties(self, client, db_name, property_keys, timeout=None, - check_task=None, check_items=None, **kwargs): + check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout kwargs.update({"timeout": timeout}) @@ -880,16 +961,6 @@ class TestMilvusClientV2Base(Base): check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() return res, check_result - @trace() - def create_database(self, client, db_name, timeout=None, check_task=None, check_items=None, **kwargs): - timeout = TIMEOUT if timeout is None else timeout - kwargs.update({"timeout": timeout}) - - func_name = sys._getframe().f_code.co_name - res, check = api_request([client.create_database, db_name], **kwargs) - check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run() - return res, check_result - @trace() def describe_database(self, client, db_name, timeout=None, check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout @@ -930,4 +1001,91 @@ class TestMilvusClientV2Base(Base): analyzer_params=analyzer_params, **kwargs).run() return res, check_result + def compact(self, client, collection_name, is_clustering=False, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.compact, collection_name, is_clustering], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + collection_name=collection_name, is_clustering=is_clustering, **kwargs).run() + return res, check_result + + @trace() + def get_compaction_state(self, client, job_id, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.get_compaction_state, job_id], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, job_id=job_id, **kwargs).run() + return res, check_result + + @trace() + def create_resource_group(self, client, name, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.create_resource_group, name], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + name=name, **kwargs).run() + return res, check_result + + @trace() + def update_resource_groups(self, client, configs, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.update_resource_groups, configs], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + configs=configs, **kwargs).run() + return res, check_result + + @trace() + def drop_resource_group(self, client, name, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.update_resource_groups, name], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + name=name, **kwargs).run() + return res, check_result + + @trace() + def describe_resource_group(self, client, name, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.describe_resource_group, name], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + name=name, **kwargs).run() + return res, check_result + + @trace() + def list_resource_groups(self, client, timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.list_resource_groups], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + **kwargs).run() + return res, check_result + + @trace() + def transfer_replica(self, client, source_group, target_group, collection_name, num_replicas, + timeout=None, check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, check = api_request([client.transfer_replica, source_group, target_group, collection_name, num_replicas], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + source_group=source_group, target_group=target_group, + collection_name=collection_name, num_replicas=num_replicas, + **kwargs).run() + return res, check_result \ No newline at end of file diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index 8759632b86..e82c666bd6 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -123,6 +123,9 @@ class ResponseChecker: elif self.check_task == CheckTasks.check_insert_result: # check `insert` interface response result = self.check_insert_response(check_items=self.check_items) + elif self.check_task == CheckTasks.check_describe_index_property: + # describe collection interface(high level api) response check + result = self.check_describe_index_property(self.response, self.func_name, self.check_items) # Add check_items here if something new need verify @@ -732,3 +735,34 @@ class ResponseChecker: assert self.response.insert_count == real, error_message.format(self.response.insert_count, real) return True + + @staticmethod + def check_describe_index_property(res, func_name, check_items): + """ + According to the check_items to check collection properties of res, which return from func_name + :param res: actual response of init collection + :type res: Collection + + :param func_name: init collection API + :type func_name: str + + :param check_items: which items expected to be checked, including name, schema, num_entities, primary + :type check_items: dict, {check_key: expected_value} + """ + exp_func_name = "describe_index" + if func_name != exp_func_name: + log.warning("The function name is {} rather than {}".format(func_name, exp_func_name)) + if len(check_items) == 0: + raise Exception("No expect values found in the check task") + if check_items.get("json_cast_type", None) is not None: + assert res["json_cast_type"] == check_items.get("json_cast_type") + if check_items.get("index_type", None) is not None: + assert res["index_type"] == check_items.get("index_type") + if check_items.get("json_path", None) is not None: + assert res["json_path"] == check_items.get("json_path") + if check_items.get("field_name", None) is not None: + assert res["field_name"] == check_items.get("field_name") + if check_items.get("index_name", None) is not None: + assert res["index_name"] == check_items.get("index_name") + + return True \ No newline at end of file diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 722b78b38e..4c796af320 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -13,6 +13,7 @@ default_top_k = 10 default_nq = 2 default_limit = 10 default_batch_size = 1000 +min_limit = 1 max_limit = 16384 max_top_k = 16384 max_partition_num = 1024 @@ -309,6 +310,7 @@ class CheckTasks: check_describe_database_property = "check_describe_database_property" check_insert_result = "check_insert_result" check_collection_fields_properties = "check_collection_fields_properties" + check_describe_index_property = "check_describe_index_property" class BulkLoadStates: diff --git a/tests/python_client/milvus_client/test_milvus_client_collection.py b/tests/python_client/milvus_client/test_milvus_client_collection.py index cdb30f1c02..f0d9843341 100644 --- a/tests/python_client/milvus_client/test_milvus_client_collection.py +++ b/tests/python_client/milvus_client/test_milvus_client_collection.py @@ -1201,8 +1201,8 @@ class TestMilvusClientCollectionPropertiesInvalid(TestMilvusClientV2Base): properties = {'mmap.enabled': True} error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={drop_name}]"} self.drop_collection_properties(client, drop_name, properties, - check_task=CheckTasks.err_res, - check_items=error) + check_task=CheckTasks.err_res, + check_items=error) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("property_keys", ["", {}, []]) diff --git a/tests/python_client/milvus_client/test_milvus_client_compact.py b/tests/python_client/milvus_client/test_milvus_client_compact.py new file mode 100644 index 0000000000..28c77262b1 --- /dev/null +++ b/tests/python_client/milvus_client/test_milvus_client_compact.py @@ -0,0 +1,173 @@ +import pytest + +from base.client_v2_base import TestMilvusClientV2Base +from utils.util_log import test_log as log +from common import common_func as cf +from common import common_type as ct +from common.common_type import CaseLabel, CheckTasks +from utils.util_pymilvus import * +from common.constants import * +from pymilvus import DataType +from pymilvus import AnnSearchRequest +from pymilvus import WeightedRanker + +prefix = "client_hybrid_search" +epsilon = ct.epsilon +default_nb = ct.default_nb +default_nb_medium = ct.default_nb_medium +default_nq = ct.default_nq +default_dim = ct.default_dim +default_limit = ct.default_limit +default_search_exp = "id >= 0" +exp_res = "exp_res" +default_search_string_exp = "varchar >= \"0\"" +default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" +default_invaild_string_exp = "varchar >= 0" +default_json_search_exp = "json_field[\"number\"] >= 0" +perfix_expr = 'varchar like "0%"' +default_search_field = ct.default_float_vec_field_name +default_search_params = ct.default_search_params +default_primary_key_field_name = "id" +default_vector_field_name = "vector" +default_float_field_name = ct.default_float_field_name +default_bool_field_name = ct.default_bool_field_name +default_string_field_name = ct.default_string_field_name +default_int32_array_field_name = ct.default_int32_array_field_name +default_string_array_field_name = ct.default_string_array_field_name + + +class TestMilvusClientCompactInvalid(TestMilvusClientV2Base): + """ Test case of compact interface """ + + """ + ****************************************************************** + # The following are invalid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="pymilvus issue 2588") + @pytest.mark.parametrize("name", [1, "12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_compact_invalid_collection_name_string(self, name): + """ + target: test compact with invalid collection name + method: create connection, collection, insert and hybrid search with invalid collection name + expected: Raise exception + """ + client = self._client() + error = {ct.err_code: 1100, + ct.err_msg: f"Invalid collection name: {name}. the first character of a collection name " + f"must be an underscore or letter: invalid parameter"} + self.compact(client, name, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2587") + @pytest.mark.parametrize("name", [1]) + def test_milvus_client_compact_invalid_collection_name_non_string(self, name): + """ + target: test compact with invalid collection name + method: create connection, collection, insert and hybrid search with invalid collection name + expected: Raise exception + """ + client = self._client() + error = {ct.err_code: 1100, + ct.err_msg: f"Invalid collection name: {name}. the first character of a collection name " + f"must be an underscore or letter: invalid parameter"} + self.compact(client, name, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_clustering", ["12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_compact_invalid_is_clustering(self, invalid_clustering): + """ + target: test compact with invalid collection name + method: create connection, collection, insert and hybrid search with invalid collection name + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + error = {ct.err_code: 1, + ct.err_msg: f"is_clustering value {invalid_clustering} is illegal"} + self.compact(client, collection_name, is_clustering=invalid_clustering, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_job_id", ["12-s"]) + def test_milvus_client_get_compact_state_invalid_job_id(self, invalid_job_id): + """ + target: test compact with invalid collection name + method: create connection, collection, insert and hybrid search with invalid collection name + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + error = {ct.err_code: 1, + ct.err_msg: f"compaction_id value {invalid_job_id} is illegal"} + self.get_compaction_state(client, invalid_job_id, + check_task=CheckTasks.err_res, check_items=error) + + +class TestMilvusClientHybridSearchValid(TestMilvusClientV2Base): + """ Test case of hybrid search interface """ + + @pytest.fixture(scope="function", params=[False, True]) + def is_clustering(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_compact_normal(self, is_clustering): + """ + target: test hybrid search with default normal case (2 vector fields) + method: create connection, collection, insert and hybrid search + expected: successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 128 + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_vector_field_name+"new", DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + index_params.add_index(default_vector_field_name+"new", metric_type="L2") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [ + {default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_vector_field_name+"new": list(rng.random((1, default_dim))[0]), + default_string_field_name: str(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + self.flush(client, collection_name) + # 3. hybrid search + res = self.compact(client, collection_name, is_clustering=is_clustering)[0] + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_compact_empty_collection(self, is_clustering): + """ + target: test compact to empty collection + method: create connection, collection, compact + expected: successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. compact + self.compact(client, collection_name, is_clustering=is_clustering) + self.drop_collection(client, collection_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_database.py b/tests/python_client/milvus_client/test_milvus_client_database.py index bf9da177cd..88c13683ab 100644 --- a/tests/python_client/milvus_client/test_milvus_client_database.py +++ b/tests/python_client/milvus_client/test_milvus_client_database.py @@ -106,6 +106,7 @@ class TestMilvusClientDatabaseInvalid(TestMilvusClientV2Base): self.drop_database(client, db_name) @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2683") @pytest.mark.parametrize("properties", ["hhh", []]) def test_milvus_client_create_database_with_invalid_properties(self, properties): """ @@ -120,7 +121,7 @@ class TestMilvusClientDatabaseInvalid(TestMilvusClientV2Base): db_name = cf.gen_unique_str(db_prefix) error = {ct.err_code: 1, ct.err_msg: f"Unexpected error, message="} self.create_database(client, db_name, properties, - check_task=CheckTasks.err_res, check_items=error) + check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("properties", [{"database.rep.number": 3}]) diff --git a/tests/python_client/milvus_client/test_milvus_client_hybrid_search.py b/tests/python_client/milvus_client/test_milvus_client_hybrid_search.py new file mode 100644 index 0000000000..96e37e5912 --- /dev/null +++ b/tests/python_client/milvus_client/test_milvus_client_hybrid_search.py @@ -0,0 +1,392 @@ +import pytest + +from base.client_v2_base import TestMilvusClientV2Base +from utils.util_log import test_log as log +from common import common_func as cf +from common import common_type as ct +from common.common_type import CaseLabel, CheckTasks +from utils.util_pymilvus import * +from common.constants import * +from pymilvus import DataType +from pymilvus import AnnSearchRequest +from pymilvus import WeightedRanker + +prefix = "client_hybrid_search" +epsilon = ct.epsilon +default_nb = ct.default_nb +default_nb_medium = ct.default_nb_medium +default_nq = ct.default_nq +default_dim = ct.default_dim +default_limit = ct.default_limit +default_search_exp = "id >= 0" +exp_res = "exp_res" +default_search_string_exp = "varchar >= \"0\"" +default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" +default_invaild_string_exp = "varchar >= 0" +default_json_search_exp = "json_field[\"number\"] >= 0" +perfix_expr = 'varchar like "0%"' +default_search_field = ct.default_float_vec_field_name +default_search_params = ct.default_search_params +default_primary_key_field_name = "id" +default_vector_field_name = "vector" +default_float_field_name = ct.default_float_field_name +default_bool_field_name = ct.default_bool_field_name +default_string_field_name = ct.default_string_field_name +default_int32_array_field_name = ct.default_int32_array_field_name +default_string_array_field_name = ct.default_string_array_field_name + + +class TestMilvusClientHybridSearchInvalid(TestMilvusClientV2Base): + """ Test case of hybrid search interface """ + + """ + ****************************************************************** + # The following are invalid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_hybrid_search_invalid_collection_name_string(self, name): + """ + target: test hybrid search with invalid collection name + method: create connection, collection, insert and hybrid search with invalid collection name + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 100, + ct.err_msg: f"collection not found[database=default][collection={name}]"} + self.hybrid_search(client, name, [sub_search1], ranker, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2587") + @pytest.mark.parametrize("name", [1]) + def test_milvus_client_hybrid_search_invalid_collection_name_non_string(self, name): + """ + target: test hybrid search with invalid collection name + method: create connection, collection, insert and hybrid search with invalid collection name + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 100, + ct.err_msg: f"collection not found[database=default][collection={name}]"} + self.hybrid_search(client, name, [sub_search1], ranker, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2588") + @pytest.mark.parametrize("reqs", ["12-s", 1]) + def test_milvus_client_hybrid_search_invalid_reqs(self, reqs): + """ + target: test hybrid search with invalid reqs + method: create connection, collection, insert and hybrid search with invalid reqs + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 100, + ct.err_msg: f"collection not found[database=default][collection=1]"} + self.hybrid_search(client, collection_name, reqs, ranker, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2588") + @pytest.mark.parametrize("invalid_ranker", [1]) + def test_milvus_client_hybrid_search_invalid_ranker(self, invalid_ranker): + """ + target: test hybrid search with invalid ranker + method: create connection, collection, insert and hybrid search with invalid ranker + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 100, + ct.err_msg: f"collection not found[database=default][collection=1]"} + self.hybrid_search(client, collection_name, [sub_search1], invalid_ranker, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_limit", [-1, ct.min_limit-1, "1", "12-s", "中文", "%$#"]) + def test_milvus_client_hybrid_search_invalid_limit(self, invalid_limit): + """ + target: test hybrid search with invalid limit + method: create connection, collection, insert and hybrid search with invalid limit + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 1, + ct.err_msg: f"`limit` value {invalid_limit} is illegal"} + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=invalid_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_limit", [ct.max_limit+1]) + def test_milvus_client_hybrid_search_limit_out_of_range(self, invalid_limit): + """ + target: test hybrid search with invalid limit (out of range) + method: create connection, collection, insert and hybrid search with invalid limit + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 65535, + ct.err_msg: "invalid max query result window, (offset+limit) should be in range [1, 16384], but got 16385"} + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=invalid_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_output_fields", [1, "1"]) + def test_milvus_client_hybrid_search_invalid_output_fields(self, invalid_output_fields): + """ + target: test hybrid search with invalid output_fields + method: create connection, collection, insert and hybrid search with invalid output_fields + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 1, + ct.err_msg: f"`output_fields` value {invalid_output_fields} is illegal"} + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit, + output_fields=invalid_output_fields, check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2589") + @pytest.mark.parametrize("invalid_partition_names", [1, "1"]) + def test_milvus_client_hybrid_search_invalid_partition_names(self, invalid_partition_names): + """ + target: test hybrid search with invalid partition names + method: create connection, collection, insert and hybrid search with invalid partition names + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 1, + ct.err_msg: f"`partition_name_array` value {invalid_partition_names} is illegal"} + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit, + partition_names=invalid_partition_names, check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_partition_names", ["not_exist"]) + def test_milvus_client_hybrid_search_not_exist_partition_names(self, invalid_partition_names): + """ + target: test hybrid search with not exist partition names + method: create connection, collection, insert and hybrid search with not exist partition names + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 65535, + ct.err_msg: f"partition name {invalid_partition_names} not found"} + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit, + partition_names=[invalid_partition_names], check_task=CheckTasks.err_res, + check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_hybrid_search_not_exist_vector_name(self): + """ + target: test hybrid search normal default case + method: create connection, collection, insert and hybrid search + expected: successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + not_exist_vector_field = "not_exist_vector_field" + sub_search1 = AnnSearchRequest(vectors_to_search, not_exist_vector_field, {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 1100, + ct.err_msg: f"failed to create query plan: failed to get field schema by name: " + f"fieldName({not_exist_vector_field}) not found: invalid parameter"} + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_hybrid_search_requests_mismatch(self): + """ + target: test hybrid search when the length of weights param mismatch with ann search requests + method: create connection, collection, insert and hybrid search + expected: successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. hybrid search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, default_dim)) + sub_search1 = AnnSearchRequest(vectors_to_search, "vector", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(0.2, 0.8) + error = {ct.err_code: 1100, + ct.err_msg: "the length of weights param mismatch with ann search requests: " + "invalid parameter[expected=1][actual=2]"} + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + +class TestMilvusClientHybridSearchValid(TestMilvusClientV2Base): + """ Test case of hybrid search interface """ + + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_hybrid_search_default(self): + """ + target: test hybrid search with default normal case (2 vector fields) + method: create connection, collection, insert and hybrid search + expected: successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 128 + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_vector_field_name+"new", DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + index_params.add_index(default_vector_field_name+"new", metric_type="L2") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [ + {default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_vector_field_name+"new": list(rng.random((1, default_dim))[0]), + default_string_field_name: str(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. hybrid search + vectors_to_search = rng.random((1, default_dim)) + insert_ids = [i for i in range(default_nb)] + sub_search1 = AnnSearchRequest(vectors_to_search, default_vector_field_name, {"level": 1}, 20, expr="id>=0") + sub_search2 = AnnSearchRequest(vectors_to_search, default_vector_field_name+"new", {"level": 1}, 20, expr="id>=0") + ranker = WeightedRanker(0.2, 0.8) + self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit}) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_hybrid_search_single_vector(self): + """ + target: test hybrid search with just one vector field + method: create connection, collection, insert and hybrid search + expected: successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [ + {default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0])} + for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. hybrid search + rng = np.random.default_rng(seed=19530) + insert_ids = [i for i in range(default_nb)] + vectors_to_search = rng.random((1, default_dim)) + sub_search1 = AnnSearchRequest(vectors_to_search, "vector", {"level": 1}, 20, expr="id<100") + ranker = WeightedRanker(1) + self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit}) + self.drop_collection(client, collection_name) diff --git a/tests/python_client/milvus_client/test_milvus_client_index.py b/tests/python_client/milvus_client/test_milvus_client_index.py index c654f637a9..3292a91d4c 100644 --- a/tests/python_client/milvus_client/test_milvus_client_index.py +++ b/tests/python_client/milvus_client/test_milvus_client_index.py @@ -68,8 +68,7 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): index_params = self.prepare_index_params(client)[0] index_params.add_index(field_name="vector") # 3. create index - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. the first character of a collection " - f"name must be an underscore or letter: invalid parameter"} + error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={name}"} self.create_index(client, name, index_params, check_task=CheckTasks.err_res, check_items=error) self.drop_collection(client, collection_name) @@ -92,8 +91,7 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): index_params = self.prepare_index_params(client)[0] index_params.add_index(field_name="vector") # 3. create index - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {name}. the length of a collection name " - f"must be less than 255 characters: invalid parameter"} + error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={name}]"} self.create_index(client, name, index_params, check_task=CheckTasks.err_res, check_items=error) self.drop_collection(client, collection_name) @@ -117,7 +115,7 @@ class TestMilvusClientIndexInvalid(TestMilvusClientV2Base): index_params.add_index(field_name="vector") # 3. create index error = {ct.err_code: 100, - ct.err_msg: f"can't find collection[database=default][collection={not_existed_collection_name}]"} + ct.err_msg: f"collection not found[database=default][collection={not_existed_collection_name}]"} self.create_index(client, not_existed_collection_name, index_params, check_task=CheckTasks.err_res, check_items=error) self.drop_collection(client, collection_name) @@ -632,4 +630,789 @@ class TestMilvusClientIndexValid(TestMilvusClientV2Base): check_items={exp_res: rows, "with_vec": True, "primary_field": default_primary_key_field_name}) - self.drop_collection(client, collection_name) \ No newline at end of file + self.drop_collection(client, collection_name) + + +class TestMilvusClientJsonPathIndexInvalid(TestMilvusClientV2Base): + """ Test case of search interface """ + + @pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "BITMAP"]) + def not_supported_varchar_scalar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["INVERTED"]) + def supported_varchar_scalar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[DataType.JSON, DataType.ARRAY, DataType.FLOAT_VECTOR, + DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, + DataType.SPARSE_FLOAT_VECTOR, DataType.INT8_VECTOR]) + def not_supported_json_cast_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are invalid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_json_path_index_no_index_params(self): + """ + target: test json path index with: + 1. no all index params + 2. no json_cast_type + 3. no json_path + method: create index on invalid collection name + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + self.release_collection(client, collection_name) + self.drop_index(client, collection_name, "vector") + # 2. prepare index params with no index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name="my_json", index_type="INVERTED") + # 3. create index + error = {ct.err_code: 1100, ct.err_msg: "json index must specify cast type: missing parameter" + "[missing_param=json_cast_type]: invalid parameter" + "[expected=valid index params][actual=invalid index params]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + # 4. prepare index params with no json_cast_type + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name="my_json", index_type="INVERTED", params={"json_path": "my_json['a']['b']"}) + # 5. create index + error = {ct.err_code: 1100, ct.err_msg: "json index must specify cast type: missing parameter" + "[missing_param=json_cast_type]: invalid parameter" + "[expected=valid index params][actual=invalid index params]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + # 6. prepare index params with no json_path + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name="my_json", index_type="INVERTED", params={"json_cast_type": DataType.VARCHAR}) + # 7. create index + error = {ct.err_code: 1100, ct.err_msg: "json index must specify json path: missing parameter" + "[missing_param=json_path]: invalid parameter[expected=valid index params]" + "[actual=invalid index params]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_index_type", ["12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_json_path_index_invalid_index_type(self, invalid_index_type): + """ + target: test json path index with invalid index type + method: create json path index with invalid index type + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + self.release_collection(client, collection_name) + self.drop_index(client, collection_name, "vector") + # 2. prepare index params with invalid index type + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name="my_json", index_type=invalid_index_type, params={"json_cast_type": DataType.INT64, + "json_path": "my_json['a']['b']"}) + # 3. create index + error = {ct.err_code: 1100, ct.err_msg: f"invalid parameter[expected=valid index]" + f"[actual=invalid index type: {invalid_index_type}]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_json_path_index_not_support_index_type(self, enable_dynamic_field, not_supported_varchar_scalar_index): + """ + target: test json path index with not supported index type + method: create json path index with not supported index type + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, default_dim) + # 2. prepare index params with invalid json index type + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_type=not_supported_varchar_scalar_index, + params={"json_cast_type": DataType.INT64, "json_path": "my_json['a']['b']"}) + # 3. create index + if not_supported_varchar_scalar_index == "TRIE": + supported_field_type = "varchar" + if not_supported_varchar_scalar_index == "STL_SORT": + supported_field_type = "numeric" + if not_supported_varchar_scalar_index == "BITMAP": + supported_field_type = "bool, int, string and array" + not_supported_varchar_scalar_index = "bitmap index" + error = {ct.err_code: 1100, ct.err_msg: f"{not_supported_varchar_scalar_index} are only supported on " + f"{supported_field_type} field: invalid parameter[expected=valid " + f"index params][actual=invalid index params]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="issue 40420") + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("invalid_json_cast_type", ["12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_json_path_index_invalid_json_cast_type(self, enable_dynamic_field, invalid_json_cast_type, + supported_varchar_scalar_index): + """ + target: test json path index with invalid json_cast_type + method: create json path index with invalid json_cast_type + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, default_dim) + # 2. prepare index params with invalid json index type + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name="json_index", index_type=supported_varchar_scalar_index, + params={"json_cast_type": invalid_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + # 3. create index + error = {ct.err_code: 1100, ct.err_msg: f"index params][actual=invalid index params]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="issue 40420") + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_json_path_index_not_supported_json_cast_type(self, enable_dynamic_field, not_supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test json path index with not supported json_cast_type + method: create json path index with not supported json_cast_type + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, default_dim) + # 2. prepare index params with invalid json index type + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name="json_index", index_type=supported_varchar_scalar_index, + params={"json_cast_type": not_supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + # 3. create index + error = {ct.err_code: 1100, ct.err_msg: f"index params][actual=invalid index params]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="issue 40423") + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("invalid_json_path", ["12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_json_path_index_invalid_json_path(self, enable_dynamic_field, invalid_json_path, + supported_varchar_scalar_index): + """ + target: test json path index with invalid json_cast_type + method: create json path index with invalid json_cast_type + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, default_dim) + # 2. prepare index params with invalid json index type + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name="json_index", + index_type=supported_varchar_scalar_index, + params={"json_cast_type": DataType.INT64, "json_path": invalid_json_path}) + # 3. create index + error = {ct.err_code: 1100, ct.err_msg: f"index params][actual=invalid index params]"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_json_path_index_not_exist_field_non_dynamic(self, supported_varchar_scalar_index): + """ + target: test json path index with not exist field in non dynamic field scenario + method: create json path index with not exist field with enable_dynamic_field disabled + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": DataType.INT64, "json_path": f"{json_field_name}['a']"}) + error = {ct.err_code: 65535, ct.err_msg: f"cannot create index on non-exist field: {json_field_name}"} + self.create_collection(client, collection_name, schema=schema, index_params=index_params, + check_task = CheckTasks.err_res, check_items = error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_different_index_same_json_path(self, enable_dynamic_field, supported_varchar_scalar_index): + """ + target: test json path index with invalid json_cast_type + method: create json path index with invalid json_cast_type + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, default_dim) + # 2. prepare index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name="json_index", + index_type=supported_varchar_scalar_index, + params={"json_cast_type": DataType.INT64, "json_path": f"{json_field_name}['a']"}) + self.create_index(client, collection_name, index_params) + # 4. prepare another index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=json_field_name, index_name="json_index", + index_type=supported_varchar_scalar_index, + params={"json_cast_type": DataType.VARCHAR, "json_path": f"{json_field_name}['a']"}) + # 5. create index + error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one distinct index is allowed per field"} + self.create_index(client, collection_name, index_params, + check_task=CheckTasks.err_res, check_items=error) + + +class TestMilvusClientJsonPathIndexValid(TestMilvusClientV2Base): + """ Test case of search interface """ + + @pytest.fixture(scope="function", params=["TRIE", "STL_SORT", "BITMAP"]) + def not_supported_varchar_scalar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["INVERTED"]) + def supported_varchar_scalar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[DataType.BOOL, DataType.INT8, DataType.INT16, DataType.INT32, + DataType.INT64, DataType.FLOAT, DataType.DOUBLE, DataType.VARCHAR]) + def supported_json_cast_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_json_path_index_default(self, enable_dynamic_field, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test json path index with not supported json_cast_type + method: create json path index with not supported json_cast_type + expected: raise exception + """ + if enable_dynamic_field: + pytest.skip('need to fix the field name when enabling dynamic field') + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert with different data distribution + vectors = cf.gen_vectors(default_nb+50, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in + range(default_nb)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: i} for i in + range(default_nb, default_nb+10)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {}} for i in + range(default_nb+10, default_nb+20)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in + range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in + range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in + range(default_nb + 30, default_nb + 40)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_name = "json_index" + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '1', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '2', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '3', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '4', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]"}) + # 3. create index + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + #"json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + # 4. create json index on different json path + self.create_index(client, collection_name, index_params) + # 5. create same json index twice + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + self.describe_index(client, collection_name, index_name + '1', + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}['a']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '1'}) + self.describe_index(client, collection_name, index_name +'2', + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '2'}) + self.describe_index(client, collection_name, index_name + '3', + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}['a'][0]['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '3'}) + self.describe_index(client, collection_name, index_name + '4', + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}['a'][0]", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '4'}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_json_path_index_default_index_name(self, enable_dynamic_field, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test json path index with not supported json_cast_type + method: create json path index with not supported json_cast_type + expected: raise exception + """ + if enable_dynamic_field: + pytest.skip('issue 40374') + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + # 3. create index + index_name = json_field_name + '/a/b' + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + #"json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_json_path_index_on_non_json_field(self, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test json path index with not supported json_cast_type + method: create json path index with not supported json_cast_type + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=default_string_field_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{default_string_field_name}['a']['b']"}) + # 3. create index + index_name = default_string_field_name + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + #"json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{default_string_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": default_string_field_name, + "index_name": index_name}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_different_json_path_index_same_field_same_index_name(self, enable_dynamic_field, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test different json path index with same index name at the same time + method: test different json path index with same index name at the same index_params object + expected: create index successfully using the last index params with the same index name + """ + if enable_dynamic_field: + pytest.skip('need to fix the field name when enabling dynamic field') + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_name = "json_index" + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}"}) + # 3. create index + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + #"json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="issue 40442") + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_different_json_path_index_same_field_different_index_name(self, enable_dynamic_field, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test different json path index with different default index name at the same time + method: test different json path index with different default index name at the same index_params object + expected: create index successfully using the last index params with the same index name + """ + if enable_dynamic_field: + pytest.skip('need to fix the field name when enabling dynamic field') + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_name = "json_index" + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}"}) + # 3. create index + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + #"json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_diff_index_same_field_diff_index_name_diff_index_params(self, enable_dynamic_field, + supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test different json path index with different default index name at the same time + method: test different json path index with different default index name at the same index_params object + expected: create index successfully using the last index params with the same index name + """ + if enable_dynamic_field: + pytest.skip('need to fix the field name when enabling dynamic field') + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + self.create_index(client, collection_name, index_params) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=json_field_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']"}) + self.create_index(client, collection_name, index_params) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=json_field_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}"}) + self.create_index(client, collection_name, index_params) + # 3. create index + index_name = f"{json_field_name}/a/b" + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + #"json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": "my_json['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + index_name = f"{json_field_name}/a" + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": "my_json['a']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + index_name = f"{json_field_name}/" + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": "my_json", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + #"index_name": index_name # issue 40441 + }) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_json_index_same_json_path_diff_field(self, enable_dynamic_field, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test different json path index with different default index name at the same time + method: test different json path index with different default index name at the same index_params object + expected: create index successfully using the last index params with the same index name + """ + if enable_dynamic_field: + pytest.skip('need to fix the field name when enabling dynamic field') + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + schema.add_field(json_field_name + "1", DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {'b': i}}, + json_field_name + "1": {'a': {'b': i}}} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']['b']"}) + self.create_index(client, collection_name, index_params) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=json_field_name + "1", + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}1['a']['b']"}) + self.create_index(client, collection_name, index_params) + # 3. create index + index_name = f"{json_field_name}/a/b" + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + index_name = f"{json_field_name}1/a/b" + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + # "json_cast_type": supported_json_cast_type, # issue 40426 + "json_path": f"{json_field_name}1['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name + "1", + "index_name": index_name}) diff --git a/tests/python_client/milvus_client/test_milvus_client_insert.py b/tests/python_client/milvus_client/test_milvus_client_insert.py index 65db0c9abe..54ea5a67bc 100644 --- a/tests/python_client/milvus_client/test_milvus_client_insert.py +++ b/tests/python_client/milvus_client/test_milvus_client_insert.py @@ -793,6 +793,39 @@ class TestMilvusClientUpsertInvalid(TestMilvusClientV2Base): self.upsert(client, collection_name, data=rows, partition_name=partition_name, check_task=CheckTasks.err_res, check_items=error) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("nullable", [True, False]) + def test_milvus_client_insert_array_element_null(self, nullable): + """ + target: test search with null expression on each key of json + method: create connection, collection, insert and search + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(nullable_field_name, DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, + max_length=64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, dim) + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i], + nullable_field_name: [None, 2, 3]} for i in range(default_nb)] + error = {ct.err_code: 1, + ct.err_msg: "The Input data type is inconsistent with defined schema, {nullable_field} field " + "should be a array, but got a {} instead."} + self.insert(client, collection_name, rows, + check_task=CheckTasks.err_res, + check_items=error) + class TestMilvusClientUpsertValid(TestMilvusClientV2Base): """ Test case of search interface """ diff --git a/tests/python_client/milvus_client/test_milvus_client_search.py b/tests/python_client/milvus_client/test_milvus_client_search.py index c6564c70af..ae8520a05a 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search.py +++ b/tests/python_client/milvus_client/test_milvus_client_search.py @@ -52,8 +52,246 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): ****************************************************************** """ + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_search_invalid_collection_name_string(self, invalid_collection_name): + """ + target: test search with invalid collection name + method: create connection, collection, insert and search with invalid collection name + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 100, + ct.err_msg: f"collection not found[database=default][collection={invalid_collection_name}]"} + self.search(client, invalid_collection_name, vectors_to_search, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2587") + @pytest.mark.parametrize("invalid_collection_name", [1]) + def test_milvus_client_search_invalid_collection_name_non_string(self, invalid_collection_name): + """ + target: test search with invalid collection name + method: create connection, collection, insert and search with invalid collection name + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 100, + ct.err_msg: f"collection not found[database=default][collection={invalid_collection_name}]"} + self.search(client, invalid_collection_name, vectors_to_search, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_data", [1, "12-s","中文", "% $#"]) + def test_milvus_client_search_invalid_data(self, invalid_data): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 100, + ct.err_msg: f"`search_data` value {invalid_data} is illegal"} + self.search(client, collection_name, invalid_data, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_limit", [-1, ct.min_limit-1, "1", "12-s", "中文", "%$#"]) + def test_milvus_client_search_invalid_limit(self, invalid_limit): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 1, + ct.err_msg: f"`limit` value {invalid_limit} is illegal"} + self.search(client, collection_name, vectors_to_search, limit=invalid_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_limit", [ct.max_limit+1]) + def test_milvus_client_search_limit_out_of_range(self, invalid_limit): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 65535, + ct.err_msg: "topk [16385] is invalid, it should be in range [1, 16384], but got 16385"} + self.search(client, collection_name, vectors_to_search, limit=invalid_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_filter", ["12-s"]) + def test_milvus_client_search_invalid_filter(self, invalid_filter): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 1100, + ct.err_msg: f"failed to create query plan: predicate is not a boolean expression: {invalid_filter}, " + f"data type: Int64: invalid parameter"} + self.search(client, collection_name, vectors_to_search, filter=invalid_filter, limit=default_limit, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_output_fields", [1, "1"]) + def test_milvus_client_search_invalid_output_fields(self, invalid_output_fields): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 1, + ct.err_msg: f"`output_fields` value {invalid_output_fields} is illegal"} + self.search(client, collection_name, vectors_to_search, limit=default_limit, output_fields=invalid_output_fields, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="pymilvus issue 2588") + @pytest.mark.parametrize("invalid_search_params", [1, "1"]) + def test_milvus_client_search_invalid_search_params(self, invalid_search_params): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 1, + ct.err_msg: f"`search_params` value {invalid_search_params} is illegal"} + self.search(client, collection_name, vectors_to_search, limit=default_limit, search_params=invalid_search_params, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_partition_names", [1, "1"]) + def test_milvus_client_search_invalid_partition_names(self, invalid_partition_names): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 1, + ct.err_msg: f"`partition_name_array` value {invalid_partition_names} is illegal"} + self.search(client, collection_name, vectors_to_search, limit=default_limit, + partition_names=invalid_partition_names, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_anns_field", [1]) + def test_milvus_client_search_invalid_anns_field(self, invalid_anns_field): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 1, + ct.err_msg: f"`anns_field` value {invalid_anns_field} is illegal"} + self.search(client, collection_name, vectors_to_search, limit=default_limit, + anns_field=invalid_anns_field, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("invalid_anns_field", ["not_exist_field"]) + def test_milvus_client_search_not_exist_anns_field(self, invalid_anns_field): + """ + target: test search with invalid data + method: create connection, collection, insert and search with invalid data + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, 8)) + error = {ct.err_code: 1100, + ct.err_msg: f"failed to create query plan: failed to get field schema by name: " + f"fieldName({invalid_anns_field}) not found: invalid parameter"} + self.search(client, collection_name, vectors_to_search, limit=default_limit, + anns_field=invalid_anns_field, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.xfail(reason="pymilvus issue 1554") + @pytest.mark.skip(reason="pymilvus issue 1554") def test_milvus_client_collection_invalid_primary_field(self): """ target: test high level api: client.create_collection @@ -127,6 +365,162 @@ class TestMilvusClientSearchInvalid(TestMilvusClientV2Base): check_task=CheckTasks.err_res, check_items=error) self.drop_collection(client, collection_name) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_vector_field(self, null_expr_op): + """ + target: test search with null expression on vector field + method: create connection, collection, insert and search + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + null_expr = default_vector_field_name + " " + null_expr_op + log.info(null_expr) + error = {ct.err_code: 65535, + ct.err_msg: f"unsupported data type: VECTOR_FLOAT"} + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_not_exist_field(self, null_expr_op): + """ + target: test search with null expression on vector field + method: create connection, collection, insert and search + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + not_exist_field_name = "not_exist_field" + null_expr = not_exist_field_name + " " + null_expr_op + log.info(null_expr) + error = {ct.err_code: 1100, + ct.err_msg: f"failed to create query plan: cannot parse expression: " + f"{null_expr}, error: field {not_exist_field_name} not exist: invalid parameter"} + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_json_key(self, nullable, null_expr_op): + """ + target: test search with null expression on each key of json + method: create connection, collection, insert and search + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(nullable_field_name, DataType.JSON, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, dim) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i], + nullable_field_name: {'a': None}} for i in range(default_nb)] + null_expr = nullable_field_name + "['a']" + " " + null_expr_op + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i], + nullable_field_name: {'a': 1, 'b': None}} for i in range(default_nb)] + null_expr = nullable_field_name + "['b']" + " " + null_expr_op + self.insert(client, collection_name, rows) + # 3. search + log.info(null_expr) + error = {ct.err_code: 1100, + ct.err_msg: f"failed to create query plan: cannot parse expression: {null_expr}, " + f"error: invalid expression: {null_expr}: invalid parameter"} + self.search(client, collection_name, [vectors[0]], + filter=null_expr, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_array_element(self, nullable, null_expr_op): + """ + target: test search with null expression on each key of json + method: create connection, collection, insert and search + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(nullable_field_name, DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, + max_length=64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, dim) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i], + nullable_field_name: None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i], + nullable_field_name: [1, 2, 3]} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + null_expr = nullable_field_name + "[0]" + " " + null_expr_op + log.info(null_expr) + error = {ct.err_code: 1100, + ct.err_msg: f"failed to create query plan: cannot parse expression: {null_expr}, " + f"error: invalid expression: {null_expr}: invalid parameter"} + self.search(client, collection_name, [vectors[0]], + filter=null_expr, + check_task=CheckTasks.err_res, check_items=error) + class TestMilvusClientSearchValid(TestMilvusClientV2Base): """ Test case of search interface """ @@ -538,3 +932,622 @@ class TestMilvusClientSearchValid(TestMilvusClientV2Base): 'params': cf.get_search_params_params('IVF_FLAT')} self.search(client, collection_name, data=[search_vector], filter='id >= 10', search_params=search_params, check_task=CheckTasks.err_res, check_items=error) + + +class TestMilvusClientSearchNullExpr(TestMilvusClientV2Base): + """ Test case of search interface """ + + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr(self, nullable, null_expr_op): + """ + target: test search with null expression on int64 fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.INT64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": i} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_int8(self, nullable, null_expr_op): + """ + target: test search with null expression on int8 fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.INT8, nullable=nullable) + # schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, + # max_length=64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": np.int8(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_int16(self, nullable, null_expr_op): + """ + target: test search with null expression on int16 fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.INT16, nullable=nullable) + # schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, + # max_length=64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": np.int16(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_int32(self, nullable, null_expr_op): + """ + target: test search with null expression on int32 fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.INT32, nullable=nullable) + # schema.add_field("array_field", DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, + # max_length=64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": np.int32(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_float(self, nullable, null_expr_op): + """ + target: test search with null expression on float fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.FLOAT, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": i*1.0} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_double(self, nullable, null_expr_op): + """ + target: test search with null expression on double fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.DOUBLE, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": np.double(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_bool(self, nullable, null_expr_op): + """ + target: test search with null expression on bool fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.BOOL, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": np.bool_(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_varchar(self, nullable, null_expr_op): + """ + target: test search with null expression on varchar fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.VARCHAR, nullable=nullable, max_length=128) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": str(i)} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_json(self, nullable, null_expr_op): + """ + target: test search with null expression on json fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.JSON, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + index_params.add_index(field_name=nullable_field_name, index_name="json_index", index_type="INVERTED", + params={"json_cast_type": DataType.INT64, "json_path": f"{nullable_field_name}['a']['b']"}) + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), nullable_field_name: None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), nullable_field_name: {'a': {'b': i, 'c': None}}} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + output_fields = [nullable_field_name], + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("null_expr_op", ["is null", "IS NULL", "is not null", "IS NOT NULL"]) + def test_milvus_client_search_null_expr_array(self, nullable, null_expr_op): + """ + target: test search with null expression on array fields + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + schema.add_field(nullable_field_name, DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, + max_length=64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + rng = np.random.default_rng(seed=19530) + if nullable: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": None} for i in range(default_nb)] + else: + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, dim))[0]), + default_string_field_name: str(i), "nullable_field": [1, 2]} for i in range(default_nb)] + self.insert(client, collection_name, rows) + # 3. search + vectors_to_search = rng.random((1, dim)) + insert_ids = [str(i) for i in range(default_nb)] + null_expr = nullable_field_name + " " + null_expr_op + log.info(null_expr) + if nullable: + if "not" in null_expr or "NOT" in null_expr: + insert_ids = [] + limit = 0 + + else: + limit = default_limit + else: + if "not" in null_expr or "NOT" in null_expr: + limit = default_limit + else: + insert_ids = [] + limit = 0 + self.search(client, collection_name, vectors_to_search, + filter=null_expr, + output_fields=[nullable_field_name], + consistency_level = "Strong", + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": limit}) \ No newline at end of file diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index b329fc050f..f188bfb96e 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -28,8 +28,8 @@ pytest-parallel pytest-random-order # pymilvus -pymilvus==2.6.0rc81 -pymilvus[bulk_writer]==2.6.0rc81 +pymilvus==2.6.0rc84 +pymilvus[bulk_writer]==2.6.0rc84 # for customize config test diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index f86176d11c..5fb10a260c 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -84,8 +84,8 @@ class TestIndexParams(TestcaseBase): fieldname = "non_existing" self.index_wrap.init_index(collection_w.collection, fieldname, default_index_params, check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, - ct.err_msg: "cannot create index on non-existed field"}) + check_items={ct.err_code: 65535, + ct.err_msg: f"cannot create index on non-exist field: {fieldname}"}) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("index_type", ["non_exiting_type", 100]) @@ -459,8 +459,8 @@ class TestNewIndexBase(TestcaseBase): collection_w.create_index(ct.default_int8_field_name, default_index_params, index_name=ct.default_index_name, check_task=CheckTasks.err_res, - check_items={ct.err_code: 999, - ct.err_msg: "cannot create index on non-existed field: int8"} + check_items={ct.err_code: 65535, + ct.err_msg: "cannot create index on non-exist field: int8"} ) @pytest.mark.tags(CaseLabel.L1) diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index d85da326d5..74c55aeec4 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -11260,7 +11260,6 @@ class TestCollectionHybridSearchValid(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) - @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36273") def test_hybrid_search_overall_limit_larger_sum_each_limit(self, nq, primary_field, metric_type): """