diff --git a/tests/_helm/values/e2e/distributed-pulsar b/tests/_helm/values/e2e/distributed-pulsar index ce59e74cea..7a66b42eb4 100644 --- a/tests/_helm/values/e2e/distributed-pulsar +++ b/tests/_helm/values/e2e/distributed-pulsar @@ -21,7 +21,7 @@ proxy: dataNode: resources: limits: - cpu: "2" + cpu: "4" memory: 8Gi requests: cpu: "0.5" diff --git a/tests/python_client/base/client_v2_base.py b/tests/python_client/base/client_v2_base.py index 5f00e0d8b1..d1fddaa6af 100644 --- a/tests/python_client/base/client_v2_base.py +++ b/tests/python_client/base/client_v2_base.py @@ -261,17 +261,18 @@ class TestMilvusClientV2Base(Base): **kwargs).run() return res, check_result - @trace() - def num_entities(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs): - timeout = TIMEOUT if timeout is None else timeout - kwargs.update({"timeout": timeout}) + # No client.num_entities method + # @trace() + # def num_entities(self, client, collection_name, timeout=None, check_task=None, check_items=None, **kwargs): + # timeout = TIMEOUT if timeout is None else timeout + # kwargs.update({"timeout": timeout}) - func_name = sys._getframe().f_code.co_name - res, check = api_request([client.num_entities, collection_name], **kwargs) - check_result = ResponseChecker(res, func_name, check_task, check_items, check, - collection_name=collection_name, - **kwargs).run() - return res, check_result + # func_name = sys._getframe().f_code.co_name + # res, check = api_request([client.num_entities, collection_name], **kwargs) + # check_result = ResponseChecker(res, func_name, check_task, check_items, check, + # collection_name=collection_name, + # **kwargs).run() + # return res, check_result @trace() def delete(self, client, collection_name, ids=None, timeout=None, filter=None, partition_name=None, diff --git a/tests/python_client/milvus_client/test_milvus_client_alter.py b/tests/python_client/milvus_client/test_milvus_client_alter.py index 30ccaf3d9b..fec1b11ba1 100644 --- a/tests/python_client/milvus_client/test_milvus_client_alter.py +++ b/tests/python_client/milvus_client/test_milvus_client_alter.py @@ -38,7 +38,7 @@ class TestMilvusClientAlterIndex(TestMilvusClientV2Base): expected: alter successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") idx_names, _ = self.list_indexes(client, collection_name, field_name=default_vector_field_name) self.load_collection(client, collection_name) @@ -69,7 +69,7 @@ class TestMilvusClientAlterIndex(TestMilvusClientV2Base): expected: raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] dim = 32 @@ -112,7 +112,7 @@ class TestMilvusClientAlterIndex(TestMilvusClientV2Base): expected: raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") idx_names, _ = self.list_indexes(client, collection_name, field_name=default_vector_field_name) self.release_collection(client, collection_name) @@ -141,7 +141,7 @@ class TestMilvusClientAlterCollection(TestMilvusClientV2Base): expected: alter successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") self.load_collection(client, collection_name) res1 = self.describe_collection(client, collection_name)[0] @@ -295,6 +295,114 @@ class TestMilvusClientAlterCollection(TestMilvusClientV2Base): res = self.describe_collection(client, collection_name)[0] assert res.get('enable_dynamic_field', None) is new_dynamic_flag + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("pk_field_type", [DataType.INT64, DataType.VARCHAR]) + def test_milvus_client_alter_allow_insert_auto_id(self, pk_field_type): + """ + target: test alter collection allow insert auto id + method: + 1. create collection with auto_id=True + 2. try to insert data with primary key + 3. verify insert failed + 4. alter collection allow_insert_auto_id=True + 5. insert data with customized primary key + 6. verify insert successfully + 7. verify the new inserted data's primary keys are customized + 8. verify the collection info + 9. drop the collection properties allow_insert_auto_id + 10. alter collection allow_insert_auto_id=False + 11. verify the collection info + 12. alter collection allow_insert_auto_id=True with string value + 13. verify the collection info + 14. insert data with customized primary key + 15. verify insert successfully + expected: insert successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + dim = 8 + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, pk_field_type, max_length=64, is_primary=True, auto_id=True) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. try to insert data with primary key + rows_with_pk = [{ + default_primary_key_field_name: i, + default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0] + } for i in range(100)] + if pk_field_type == DataType.VARCHAR: + rows_with_pk = [{ + default_primary_key_field_name: f"id_{i}", + default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0] + } for i in range(100)] + error = {ct.err_code: 999, ct.err_msg: f"more fieldData has pass in"} + self.insert(client, collection_name, rows_with_pk, check_task=CheckTasks.err_res, check_items=error) + + rows_without_pk = cf.gen_row_data_by_schema(nb=100, schema=schema) + self.insert(client, collection_name, rows_without_pk) + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == 100 + + self.load_collection(client, collection_name) + + filter = f"{default_primary_key_field_name} in [10, 20,90]" + if pk_field_type == DataType.VARCHAR: + filter = f"{default_primary_key_field_name} in ['id_10', 'id_20', 'id_90']" + res = self.query(client, collection_name, filter=filter, + output_fields=[default_primary_key_field_name])[0] + assert (len(res)) == 0 + + # 3. alter collection allow_insert_auto_id=True + self.alter_collection_properties(client, collection_name, properties={"allow_insert_auto_id": True}) + # 4. insert data with customized primary key + self.insert(client, collection_name, rows_with_pk) + # 5. verify insert successfully + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == 100 * 2 + # 6. verify the new inserted data's primary keys are customized + res = self.query(client, collection_name, filter=filter, + output_fields=[default_primary_key_field_name])[0] + assert (len(res)) == 3 + + # check the collection info + res = self.describe_collection(client, collection_name)[0] + assert res.get('properties').get('allow_insert_auto_id', None) == 'True' + + # drop the collection properties allow_insert_auto_id + self.drop_collection_properties(client, collection_name, property_keys=["allow_insert_auto_id"]) + res = self.describe_collection(client, collection_name)[0] + assert res.get('properties').get('allow_insert_auto_id', None) is None + self.insert(client, collection_name, rows_with_pk, check_task=CheckTasks.err_res, check_items=error) + + # alter collection allow_insert_auto_id=False + self.alter_collection_properties(client, collection_name, properties={"allow_insert_auto_id": False}) + res = self.describe_collection(client, collection_name)[0] + assert res.get('properties').get('allow_insert_auto_id', None) == 'False' + self.insert(client, collection_name, rows_with_pk, check_task=CheckTasks.err_res, check_items=error) + + # alter collection allow_insert_auto_id=True with string value + self.alter_collection_properties(client, collection_name, properties={"allow_insert_auto_id": "True"}) + res = self.describe_collection(client, collection_name)[0] + assert res.get('properties').get('allow_insert_auto_id', None) == 'True' + rows_with_pk = [{ + default_primary_key_field_name: i, + default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0] + } for i in range(100, 200)] + if pk_field_type == DataType.VARCHAR: + rows_with_pk = [{ + default_primary_key_field_name: f"id_{i}", + default_vector_field_name: cf.gen_vectors(1, dim, vector_data_type=DataType.FLOAT_VECTOR)[0] + } for i in range(100, 200)] + self.insert(client, collection_name, rows_with_pk) + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == 100 * 3 + class TestMilvusClientAlterCollectionField(TestMilvusClientV2Base): @pytest.mark.tags(CaseLabel.L0) @@ -306,7 +414,7 @@ class TestMilvusClientAlterCollectionField(TestMilvusClientV2Base): expected: alter successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection schema = self.create_schema(client, enable_dynamic_field=False)[0] dim = 32 @@ -452,7 +560,7 @@ class TestMilvusClientAlterDatabase(TestMilvusClientV2Base): expected: alter successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() self.create_collection(client, collection_name, ct.default_dim, consistency_level="Strong") self.release_collection(client, collection_name) default_db = 'default' diff --git a/tests/python_client/milvus_client/test_milvus_client_insert.py b/tests/python_client/milvus_client/test_milvus_client_insert.py index 581bb18cf5..0874ed130d 100644 --- a/tests/python_client/milvus_client/test_milvus_client_insert.py +++ b/tests/python_client/milvus_client/test_milvus_client_insert.py @@ -51,9 +51,23 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): # The following are invalid base cases ****************************************************************** """ + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_insert_after_client_closed(self): + """ + target: test insert after client is closed + method: insert after client is closed + expected: raise exception + """ + client = self._client(alias='my_client') + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + self.close(client) - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="pymilvus issue 1883") + data = cf.gen_default_list_data(10) + error = {ct.err_code: 999, ct.err_msg: 'should create connection first'} + self.insert(client, collection_name, data, check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) def test_milvus_client_insert_column_data(self): """ target: test insert column data @@ -61,13 +75,14 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: raise error """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. insert vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nb)] data = [[i for i in range(default_nb)], vectors] - error = {ct.err_code: 1, ct.err_msg: "Unexpected error, message=<'list' object has no attribute 'items'"} + error = {ct.err_code: 999, + ct.err_msg: "The Input data type is inconsistent with defined schema, please check it."} self.insert(client, collection_name, data, check_task=CheckTasks.err_res, check_items=error) self.drop_collection(client, collection_name) @@ -88,7 +103,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) def test_milvus_client_insert_invalid_collection_name(self, collection_name): """ @@ -105,7 +120,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_milvus_client_insert_collection_name_over_max_length(self): """ target: test high level api: client.create_collection @@ -121,7 +136,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.tags(CaseLabel.L2) def test_milvus_client_insert_not_exist_collection_name(self): """ target: test high level api: client.create_collection @@ -129,7 +144,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str("insert_not_exist") + collection_name = cf.gen_collection_name_by_testcase_name() rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] @@ -138,7 +153,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("data", ["12-s", "12 s", "(mn)", "中文", "%$#", " ", ""]) + @pytest.mark.parametrize("data", ["12-s", "中文", "%$#", " ", ""]) def test_milvus_client_insert_data_invalid_type(self, data): """ target: test high level api: client.create_collection @@ -146,7 +161,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -163,7 +178,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -184,7 +199,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -204,7 +219,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, enable_dynamic_field=False) # 2. insert @@ -224,7 +239,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. insert @@ -244,7 +259,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. insert @@ -267,7 +282,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. insert @@ -288,7 +303,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim) # 2. insert @@ -308,8 +323,8 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base): expected: Raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) - another_collection_name = cf.gen_unique_str(prefix + "another") + collection_name = cf.gen_collection_name_by_testcase_name() + another_collection_name = cf.gen_collection_name_by_testcase_name() partition_name = cf.gen_unique_str("partition") # 1. create collection self.create_collection(client, collection_name, default_dim) @@ -358,7 +373,7 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection dim = 8 # 1. create collection @@ -405,7 +420,7 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") collections = self.list_collections(client)[0] @@ -446,7 +461,7 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): expected: search/query successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert @@ -473,8 +488,8 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): expected: create collection with default schema, index, and load successfully """ client = self._client() - collection_name = cf.gen_unique_str(prefix) - partition_name = cf.gen_unique_str(prefix) + collection_name = cf.gen_collection_name_by_testcase_name() + partition_name = cf.gen_unique_str('partition') # 1. create collection self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. create partition @@ -577,1977 +592,797 @@ class TestMilvusClientInsertValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) -class TestMilvusClientUpsertInvalid(TestMilvusClientV2Base): - """ Test case of search interface """ +class TestInsertOperation(TestMilvusClientV2Base): + """ + ****************************************************************** + The following cases are used to test insert interface operations + ****************************************************************** + """ + + @pytest.fixture(scope="function", params=[8, 4096]) + def dim(self, request): + yield request.param @pytest.fixture(scope="function", params=[False, True]) def auto_id(self, request): yield request.param - @pytest.fixture(scope="function", params=["COSINE", "L2"]) - def metric_type(self, request): + @pytest.fixture(scope="function", params=[ct.default_int64_field_name, ct.default_string_field_name]) + def pk_field(self, request): yield request.param - """ - ****************************************************************** - # The following are invalid base cases - ****************************************************************** - """ - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.skip(reason="pymilvus issue 1883") - def test_milvus_client_upsert_column_data(self): - """ - target: test insert column data - method: create connection, collection, insert and search - expected: raise error - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim) - # 2. insert - vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nb)] - data = [[i for i in range(default_nb)], vectors] - error = {ct.err_code: 1, ct.err_msg: "Unexpected error, message=<'list' object has no attribute 'items'"} - self.upsert(client, collection_name, data, - check_task=CheckTasks.err_res, check_items=error) - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_empty_collection_name(self): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - collection_name = "" - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, ct.err_msg: f"`collection_name` value {collection_name} is illegal"} - self.upsert(client, collection_name, rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) - def test_milvus_client_upsert_invalid_collection_name(self, collection_name): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a " - f"collection name must be an underscore or letter: invalid parameter"} - self.upsert(client, collection_name, rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_collection_name_over_max_length(self): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - collection_name = "a".join("a" for i in range(256)) - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} - self.upsert(client, collection_name, rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_not_exist_collection_name(self): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str("insert_not_exist") - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 100, ct.err_msg: f"can't find collection[database=default][collection={collection_name}]"} - self.upsert(client, collection_name, rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("data", ["12-s", "12 s", "(mn)", "中文", "%$#", " "]) - def test_milvus_client_upsert_data_invalid_type(self, data): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. insert - error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"} - self.upsert(client, collection_name, data, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_data_empty(self): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. insert - error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"} - self.upsert(client, collection_name, data="", - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_data_vector_field_missing(self): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] - error = {ct.err_code: 1, - ct.err_msg: "Insert missed an field `vector` to collection without set nullable==true or set default_value"} - self.upsert(client, collection_name, data=rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_data_id_field_missing(self): - """ - target: test high level api: client.create_collection - method: create collection with invalid primary field - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(20)] - error = {ct.err_code: 1, - ct.err_msg: f"Insert missed an field `id` to collection without set nullable==true or set default_value"} - self.upsert(client, collection_name, data=rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_data_extra_field(self): - """ - target: test milvus client: insert extra field than schema - method: insert extra field than schema when enable_dynamic_field is False - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - dim = 32 - self.create_collection(client, collection_name, dim, enable_dynamic_field=False) - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] - error = {ct.err_code: 1, - ct.err_msg: f"Attempt to insert an unexpected field `float` to collection without enabling dynamic field"} - self.upsert(client, collection_name, data=rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_data_dim_not_match(self): - """ - target: test milvus client: insert extra field than schema - method: insert extra field than schema when enable_dynamic_field is False - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim) - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [ - {default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim + 1))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 65536, ct.err_msg: f"of float data should divide the dim({default_dim})"} - self.upsert(client, collection_name, data=rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_not_matched_data(self): - """ - target: test milvus client: insert not matched data then defined - method: insert string to int primary field - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim) - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [ - {default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 1, - ct.err_msg: "The Input data type is inconsistent with defined schema, {id} field should be a int64"} - self.upsert(client, collection_name, data=rows, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("partition_name", ["12 s", "(mn)", "中文", "%$#", " "]) - def test_milvus_client_upsert_invalid_partition_name(self, partition_name): - """ - target: test milvus client: insert extra field than schema - method: insert extra field than schema when enable_dynamic_field is False - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim) - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}"} - if partition_name == " ": - error = {ct.err_code: 1, ct.err_msg: f"Invalid partition name: . Partition name should not be empty."} - self.upsert(client, collection_name, data=rows, partition_name=partition_name, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_upsert_not_exist_partition_name(self): - """ - target: test milvus client: insert extra field than schema - method: insert extra field than schema when enable_dynamic_field is False - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim) - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - partition_name = cf.gen_unique_str("partition_not_exist") - error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"} - self.upsert(client, collection_name, data=rows, partition_name=partition_name, - check_task=CheckTasks.err_res, check_items=error) - @pytest.mark.tags(CaseLabel.L2) - def test_milvus_client_upsert_collection_partition_not_match(self): + def test_insert_without_connection(self): """ - target: test milvus client: insert extra field than schema - method: insert extra field than schema when enable_dynamic_field is False - expected: Raise exception - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - another_collection_name = cf.gen_unique_str(prefix + "another") - partition_name = cf.gen_unique_str("partition") - # 1. create collection - self.create_collection(client, collection_name, default_dim) - self.create_collection(client, another_collection_name, default_dim) - self.create_partition(client, another_collection_name, partition_name) - # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"} - self.upsert(client, collection_name, data=rows, partition_name=partition_name, - check_task=CheckTasks.err_res, check_items=error) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("nullable", [True, False]) - def test_milvus_client_insert_array_element_null(self, nullable): - """ - target: test search with null expression on each key of json - method: create connection, collection, insert and search + target: test insert without connection + method: insert after remove connection expected: raise exception """ client = self._client() - collection_name = cf.gen_unique_str(prefix) - dim = 5 - # 1. create collection - nullable_field_name = "nullable_field" - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, - auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) - schema.add_field(nullable_field_name, DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, - max_length=64, nullable=nullable) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_vector_field_name, metric_type="COSINE") - self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) - # 2. insert - vectors = cf.gen_vectors(default_nb, dim) - rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i], - nullable_field_name: [None, 2, 3]} for i in range(default_nb)] - error = {ct.err_code: 1, - ct.err_msg: "The Input data type is inconsistent with defined schema, {nullable_field} field " - "should be a array, but got a {} instead."} - self.insert(client, collection_name, rows, - check_task=CheckTasks.err_res, - check_items=error) + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + self.close(client) + + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] + error = {ct.err_code: 999, ct.err_msg: 'should create connection first'} + self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) - -class TestMilvusClientUpsertValid(TestMilvusClientV2Base): - """ Test case of search interface """ - - @pytest.fixture(scope="function", params=[False, True]) - def auto_id(self, request): - yield request.param - - @pytest.fixture(scope="function", params=["COSINE", "L2"]) - def metric_type(self, request): - yield request.param - - """ - ****************************************************************** - # The following are valid base cases - ****************************************************************** - """ - - @pytest.mark.tags(CaseLabel.L0) - def test_milvus_client_upsert_default(self): + @pytest.mark.tags(CaseLabel.L1) + def test_insert_default_partition(self): """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully + target: test insert entities into default partition + method: create partition and insert info collection + expected: the collection insert count equals to nb + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + partition_name = cf.gen_unique_str("partition") + + self.create_collection(client, collection_name, default_dim) + self.create_partition(client, collection_name, partition_name) + + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + results = self.insert(client, collection_name, rows, partition_name=partition_name)[0] + assert results['insert_count'] == ct.default_nb + self.drop_collection(client, collection_name) + + def test_insert_partition_not_existed(self): + """ + target: test insert entities in collection created before + method: create collection and insert entities in it, with the not existed partition_name param + expected: error raised + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] + error = {ct.err_code: 200, ct.err_msg: "partition not found[partition=p]"} + self.insert(client, collection_name, rows, partition_name="p", + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_partition_repeatedly(self): + """ + target: test insert entities in collection created before + method: create collection and insert entities in it repeatedly, with the partition_name param + expected: the collection row count equals to nq """ client = self._client() collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + partition_name_1 = cf.gen_unique_str("partition1") + partition_name_2 = cf.gen_unique_str("partition2") + + self.create_collection(client, collection_name, default_dim) + self.create_partition(client, collection_name, partition_name_1) + self.create_partition(client, collection_name, partition_name_2) + + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + result_1 = self.insert(client, collection_name, rows, partition_name=partition_name_1)[0] + result_2 = self.insert(client, collection_name, rows, partition_name=partition_name_2)[0] + assert result_1['insert_count'] == ct.default_nb + assert result_2['insert_count'] == ct.default_nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L0) + def test_insert_partition_with_ids(self): + """ + target: test insert entities in collection created before, insert with ids + method: create collection and insert entities in it, with the partition_name param + expected: the collection insert count equals to nq + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + partition_name = cf.gen_unique_str("partition") + + self.create_collection(client, collection_name, default_dim) + self.create_partition(client, collection_name, partition_name) + + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + results = self.insert(client, collection_name, rows, partition_name=partition_name)[0] + assert results['insert_count'] == ct.default_nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_exceed_varchar_limit(self): + """ + target: test insert exceed varchar limit + method: create a collection with varchar limit=2 and insert invalid data + expected: error raised + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # Create schema with varchar limit + schema = self.create_schema(client, auto_id=True, enable_dynamic_field=False)[0] + schema.add_field("id", DataType.INT64, is_primary=True, auto_id=True) + schema.add_field("vector", DataType.FLOAT_VECTOR, dim=ct.default_dim) + schema.add_field("small_limit", DataType.VARCHAR, max_length=2) + schema.add_field("big_limit", DataType.VARCHAR, max_length=65530) + + self.create_collection(client, collection_name, dimension=ct.default_dim, schema=schema) + + # Insert data exceeding varchar limit + rows = [ + {"vector": list(cf.gen_vectors(1, ct.default_dim)[0]), "small_limit": "limit_1___________", "big_limit": "1"}, + {"vector": list(cf.gen_vectors(1, ct.default_dim)[0]), "small_limit": "limit_2___________", "big_limit": "2"} + ] + error = {ct.err_code: 999, ct.err_msg: "length of varchar field small_limit exceeds max length"} + self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_insert_with_no_vector_field_dtype(self): + """ + target: test insert entities, with no vector field + method: vector field is missing in data + expected: error raised + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + + # Generate data without vector field + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] + error = {ct.err_code: 1, ct.err_msg: f"Insert missed an field `vector` to collection"} + self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_insert_with_vector_field_dismatch_dtype(self): + """ + target: test insert entities, with no vector field + method: vector field is missing in data + expected: error raised + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + + # Generate data with wrong vector type (scalar instead of list) + rows = [{default_primary_key_field_name: 0, default_vector_field_name: 0.0001, + default_float_field_name: 0.0, default_string_field_name: "0"}] + error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"} + self.insert(client, collection_name, rows, check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_drop_collection(self): + """ + target: test insert and drop + method: insert data and drop collection + expected: verify collection if exist + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + collections = self.list_collections(client)[0] assert collection_name in collections - self.describe_collection(client, collection_name, - check_task=CheckTasks.check_describe_collection_property, - check_items={"collection_name": collection_name, - "dim": default_dim, - "consistency_level": 0}) - # 2. insert + rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - results = self.upsert(client, collection_name, rows)[0] - assert results['upsert_count'] == default_nb - # 3. search - vectors_to_search = rng.random((1, default_dim)) - insert_ids = [i for i in range(default_nb)] - self.search(client, collection_name, vectors_to_search, - check_task=CheckTasks.check_search_results, - check_items={"enable_milvus_client_api": True, - "nq": len(vectors_to_search), - "ids": insert_ids, - "limit": default_limit, - "pk_name": default_primary_key_field_name}) - # 4. query - self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - check_items={exp_res: rows, - "with_vec": True, - "pk_name": default_primary_key_field_name}) - self.release_collection(client, collection_name) + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + self.insert(client, collection_name, rows) + self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L2) - def test_milvus_client_upsert_empty_data(self): - """ - target: test search (high level api) normal case - method: create connection, collection, insert and search - expected: search/query successfully - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. insert - rows = [] - results = self.upsert(client, collection_name, rows)[0] - assert results['upsert_count'] == 0 - # 3. search - rng = np.random.default_rng(seed=19530) - vectors_to_search = rng.random((1, default_dim)) - self.search(client, collection_name, vectors_to_search, - check_task=CheckTasks.check_search_results, - check_items={"enable_milvus_client_api": True, - "nq": len(vectors_to_search), - "ids": [], - "pk_name": default_primary_key_field_name, - "limit": 0}) - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L2) - def test_milvus_client_upsert_partition(self): - """ - target: test fast create collection normal case - method: create collection - expected: create collection with default schema, index, and load successfully - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - partition_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. create partition - self.create_partition(client, collection_name, partition_name) - partitions = self.list_partitions(client, collection_name)[0] - assert partition_name in partitions - index = self.list_indexes(client, collection_name)[0] - assert index == ['vector'] - # load_state = self.get_load_state(collection_name)[0] - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - # 3. upsert to default partition - results = self.upsert(client, collection_name, rows, partition_name=partitions[0])[0] - assert results['upsert_count'] == default_nb - # 4. upsert to non-default partition - results = self.upsert(client, collection_name, rows, partition_name=partition_name)[0] - assert results['upsert_count'] == default_nb - # 5. search - vectors_to_search = rng.random((1, default_dim)) - insert_ids = [i for i in range(default_nb)] - self.search(client, collection_name, vectors_to_search, - check_task=CheckTasks.check_search_results, - check_items={"enable_milvus_client_api": True, - "nq": len(vectors_to_search), - "ids": insert_ids, - "limit": default_limit, - "pk_name": default_primary_key_field_name}) - # partition_number = self.get_partition_stats(client, collection_name, "_default")[0] - # assert partition_number == default_nb - # partition_number = self.get_partition_stats(client, collection_name, partition_name)[0] - # assert partition_number[0]['value'] == 0 - if self.has_partition(client, collection_name, partition_name)[0]: - self.release_partitions(client, collection_name, partition_name) - self.drop_partition(client, collection_name, partition_name) - if self.has_collection(client, collection_name)[0]: - self.drop_collection(client, collection_name) + collections = self.list_collections(client)[0] + assert collection_name not in collections @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_insert_upsert(self): + def test_insert_create_index(self): """ - target: test fast create collection normal case - method: create collection - expected: create collection with default schema, index, and load successfully + target: test insert and create index + method: 1. insert 2. create index + expected: verify num entities and index """ client = self._client() - collection_name = cf.gen_unique_str(prefix) - partition_name = cf.gen_unique_str(prefix) - # 1. create collection - self.create_collection(client, collection_name, default_dim, consistency_level="Strong") - # 2. create partition - self.create_partition(client, collection_name, partition_name) - partitions = self.list_partitions(client, collection_name)[0] - assert partition_name in partitions - index = self.list_indexes(client, collection_name)[0] - assert index == ['vector'] - # load_state = self.get_load_state(collection_name)[0] - # 3. insert and upsert + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + rng = np.random.default_rng(seed=19530) rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] - results = self.insert(client, collection_name, rows, partition_name=partition_name)[0] - assert results['insert_count'] == default_nb - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), - default_float_field_name: i * 1.0, "new_diff_str_field": str(i)} for i in range(default_nb)] - results = self.upsert(client, collection_name, rows, partition_name=partition_name)[0] - assert results['upsert_count'] == default_nb - # 3. search - vectors_to_search = rng.random((1, default_dim)) - insert_ids = [i for i in range(default_nb)] - self.search(client, collection_name, vectors_to_search, - check_task=CheckTasks.check_search_results, - check_items={"enable_milvus_client_api": True, - "nq": len(vectors_to_search), - "ids": insert_ids, - "limit": default_limit, - "pk_name": default_primary_key_field_name}) - if self.has_partition(client, collection_name, partition_name)[0]: - self.release_partitions(client, collection_name, partition_name) - self.drop_partition(client, collection_name, partition_name) - if self.has_collection(client, collection_name)[0]: - self.drop_collection(client, collection_name) + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + self.insert(client, collection_name, rows) + self.flush(client, collection_name) - -class TestMilvusClientInsertJsonPathIndexValid(TestMilvusClientV2Base): - """ Test case of insert interface """ - - @pytest.fixture(scope="function", params=["INVERTED"]) - def supported_varchar_scalar_index(self, request): - yield request.param - - @pytest.fixture(scope="function", params=["BOOL", "Double", "Varchar", "json"]) - def supported_json_cast_type(self, request): - yield request.param - - """ - ****************************************************************** - # The following are valid base cases - ****************************************************************** - """ - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("enable_dynamic_field", [True, False]) - def test_milvus_client_insert_before_json_path_index(self, enable_dynamic_field, supported_json_cast_type, - supported_varchar_scalar_index): - """ - target: test insert and then create json path index - method: create json path index after insert - steps: 1. create schema - 2. create collection - 3. insert - 4. prepare json path index params with parameter "json_cast_type" and "json_path" - 5. create index - expected: insert and create json path index successfully - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection - json_field_name = "my_json" - schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) - if not enable_dynamic_field: - schema.add_field(json_field_name, DataType.JSON) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb + + # Create index (note: quick setup collection already has index) index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_vector_field_name, metric_type="COSINE") - self.create_collection(client, collection_name, schema=schema, index_params=index_params) - # 2. insert with different data distribution - vectors = cf.gen_vectors(default_nb+50, default_dim) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in - range(default_nb)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: i} for i in - range(default_nb, default_nb+10)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {}} for i in - range(default_nb+10, default_nb+20)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in - range(default_nb + 20, default_nb + 30)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in - range(default_nb + 20, default_nb + 30)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in - range(default_nb + 30, default_nb + 40)] - self.insert(client, collection_name, rows) - # 2. prepare index params - index_name = "json_index" - index_params = self.prepare_index_params(client)[0] - index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") - index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '1', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a']"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '2', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '3', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]['b']"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '4', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]"}) - # 3. create index - self.create_index(client, collection_name, index_params) - self.describe_index(client, collection_name, index_name, - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a']['b']", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name}) - self.describe_index(client, collection_name, index_name + '1', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a']", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '1'}) - self.describe_index(client, collection_name, index_name +'2', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '2'}) - self.describe_index(client, collection_name, index_name + '3', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]['b']", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '3'}) - self.describe_index(client, collection_name, index_name + '4', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '4'}) - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("enable_dynamic_field", [True, False]) - def test_milvus_client_insert_after_json_path_index(self, enable_dynamic_field, supported_json_cast_type, - supported_varchar_scalar_index): - """ - target: test insert after create json path index - method: create json path index after insert - steps: 1. create schema - 2. create all the index parameters including json path index - 3. create collection with schema and index params - 4. insert - 5. check the index - expected: insert successfully after create json path index - """ - client = self._client() - collection_name = cf.gen_unique_str(prefix) - # 1. create collection with schema and all the index parameters - json_field_name = "my_json" - schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) - if not enable_dynamic_field: - schema.add_field(json_field_name, DataType.JSON) - index_name = "json_index" - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_vector_field_name, metric_type="COSINE") - index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '1', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a']"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '2', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '3', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]['b']"}) - index_params.add_index(field_name=json_field_name, index_name=index_name + '4', - index_type=supported_varchar_scalar_index, - params={"json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]"}) - self.create_collection(client, collection_name, schema=schema, index_params=index_params) - # 2. insert with different data distribution - vectors = cf.gen_vectors(default_nb+50, default_dim) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in - range(default_nb)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: i} for i in - range(default_nb, default_nb+10)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {}} for i in - range(default_nb+10, default_nb+20)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in - range(default_nb + 20, default_nb + 30)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in - range(default_nb + 20, default_nb + 30)] - self.insert(client, collection_name, rows) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in - range(default_nb + 30, default_nb + 40)] - self.insert(client, collection_name, rows) - # 3. check the json path index - self.describe_index(client, collection_name, index_name, - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a']['b']", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name}) - self.describe_index(client, collection_name, index_name + '1', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a']", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '1'}) - self.describe_index(client, collection_name, index_name +'2', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '2'}) - self.describe_index(client, collection_name, index_name + '3', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]['b']", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '3'}) - self.describe_index(client, collection_name, index_name + '4', - check_task=CheckTasks.check_describe_index_property, - check_items={ - "json_cast_type": supported_json_cast_type, - "json_path": f"{json_field_name}['a'][0]", - "index_type": supported_varchar_scalar_index, - "field_name": json_field_name, - "index_name": index_name + '4'}) - - -class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base): - """ Test case of partial update interface """ - @pytest.fixture(scope="function", params=[False, True]) - def auto_id(self, request): - yield request.param - - @pytest.fixture(scope="function", params=["COSINE", "L2"]) - def metric_type(self, request): - yield request.param - - """ - ****************************************************************** - # The following are valid base cases - ****************************************************************** - """ - @pytest.mark.tags(CaseLabel.L0) - def test_milvus_client_partial_update(self): - """ - target: test basic function of partial update - method: - 1. create collection - 2. insert a full row of data using partial update - 3. partial update data - expected: both step 2 and 3 should be successful - """ - # Step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_string_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) + self.create_index(client, collection_name, index_params) - # Step 2: insert full rows of data using partial update - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - check_items={exp_res: rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - # Step 3: partial update data - new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=[default_primary_key_field_name, default_string_field_name]) - self.upsert(client, collection_name, new_row, partial_update=True) - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_string_field_name], - check_items={exp_res: new_row, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - + indexes = self.list_indexes(client, collection_name)[0] + assert default_vector_field_name in indexes self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_with_all_datatype(self): + def test_insert_after_create_index(self): """ - target: test partial update with all datatype - method: - 1. create collection with all datatype schema - 2. insert data - 3. partial update data - expected: both step 2 and 3 should be successful + target: test insert after create index + method: 1. create index 2. insert data + expected: verify index and num entities """ - # step 1: create collection with all datatype schema client = self._client() - schema = cf.gen_all_datatype_collection_schema(dim=default_dim) + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim) + + # Create index first index_params = self.prepare_index_params(client)[0] - text_sparse_emb_field_name = "text_sparse_emb" - - for i in range(len(schema.fields)): - field_name = schema.fields[i].name - if field_name == "json_field": - index_params.add_index(field_name, index_type="AUTOINDEX", - params={"json_cast_type": "json"}) - elif field_name == text_sparse_emb_field_name: - index_params.add_index(field_name, index_type="AUTOINDEX", metric_type="BM25") - else: - index_params.add_index(field_name, index_type="AUTOINDEX") - - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + self.create_index(client, collection_name, index_params) - # step 2: insert data - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) + indexes = self.list_indexes(client, collection_name)[0] + assert default_vector_field_name in indexes - # step 3: partial update data - for field in schema.fields: - if field.is_primary: - primary_key_field_name = field.name - break + # Then insert data + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + self.insert(client, collection_name, rows) - vector_field_type = [DataType.FLOAT16_VECTOR, - DataType.BFLOAT16_VECTOR, - DataType.INT8_VECTOR] - # fields to be updated - update_fields_name = [] - scalar_update_name = [] - vector_update = [] # this stores field object - for field in schema.fields: - field_name = field.name - if field_name != text_sparse_emb_field_name: - update_fields_name.append(field_name) - if field.dtype not in vector_field_type: - scalar_update_name.append(field_name) - else: - vector_update.append(field) - - # PU scalar fields and vector fields together - new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=update_fields_name) - self.upsert(client, collection_name, new_rows, partial_update=True) - # expected scalar result - expected = [{field: new_rows[i][field] for field in scalar_update_name} - for i in range(default_nb)] - - result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0", - check_task=CheckTasks.check_query_results, - output_fields=scalar_update_name, - check_items={exp_res: expected, - "with_vec": True, - "pk_name": primary_key_field_name})[0] - assert len(result) == default_nb - - # expected vector result - for field in vector_update: - expected = [{primary_key_field_name: data[primary_key_field_name], - field.name: data[field.name]} for data in new_rows] - result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0", - check_task=CheckTasks.check_query_results, - output_fields=[field.name], - check_items={exp_res: expected, - "with_vec": True, - "vector_type": field.dtype, - "vector_field": field.name, - "pk_name": primary_key_field_name})[0] - assert len(result) == default_nb - + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb self.drop_collection(client, collection_name) - + @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_new_field_with_dynamic_field(self): + def test_insert_binary_after_index(self): """ - target: Test PU will success when provided empty data - method: - 1. Create a collection - 2. partial upsert new field - expected: Step 2 should result success + target: test insert binary after index + method: 1.create index 2.insert binary data + expected: 1.index ok 2.num entities correct """ - # step 1: create collection client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # Create binary vector collection schema = self.create_schema(client, enable_dynamic_field=True)[0] schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(ct.default_binary_vec_field_name, DataType.BINARY_VECTOR, dim=default_dim) + schema.add_field(default_float_field_name, DataType.FLOAT) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length) + index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) + index_params.add_index(ct.default_binary_vec_field_name, index_type="BIN_IVF_FLAT", metric_type="HAMMING") - # step 2: partial upsert new field - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - new_rows = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)] - self.upsert(client, collection_name, new_rows, partial_update=True) - - self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, index_params=index_params) - self.drop_collection(client, collection_name) - - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_partition(self): - """ - target: test PU can successfully update data in a partition - method: - 1. Create a collection - 2. Insert data into a partition - 3. Partial update data in the partition - expected: Step 3 should result success - """ - # step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) + indexes = self.list_indexes(client, collection_name)[0] + assert ct.default_binary_vec_field_name in indexes - # step 2: insert data into a partition - num_of_partitions = 10 - partition_names = [] - for _ in range(num_of_partitions): - partition_name = cf.gen_unique_str("partition") - self.create_partition(client, collection_name, partition_name) - partition_names.append(partition_name) - - # step 3: insert data into a partition - # partition 0: 0, 1, 2, ..., 199 - # partition 1: 200, 201, 202, ..., 399 - # partition 2: 400, 401, 402, ..., 599 - gap = default_nb // num_of_partitions # 200 - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - for i, partition in enumerate(partition_names): - self.upsert(client, collection_name, rows[i*gap:i*gap+gap], partition_name=partition, partial_update=True) - - # step 4: partial update data in the partition - # i*200+i = 0, 201, 402, 603, ..., 1809 - new_value = np.int32(99) - for i, partition_name in enumerate(partition_names): - new_row = [{default_primary_key_field_name: i*gap+i, default_int32_field_name: new_value}] - self.upsert(client, collection_name, new_row, partition_name=partition_name, partial_update=True) - self.query(client, collection_name, - check_task=CheckTasks.check_query_results, - partition_names=[partition_name], - ids = [i*gap+i], - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "with_vec": True, - "pk_name": default_primary_key_field_name}) - - result = self.query(client, collection_name, filter=default_search_exp)[0] - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_partition_insert_update(self): - """ - target: test PU can successfully update data in a partition and insert data into a partition - method: - 1. Create a collection - 2. Insert data into a partitions - 3. Partial update data in the partition - 4. Insert data into a different partition - expected: Step 3 and 4 should result success - Visualization: - rows: [0-------------default_nb] - new_rows: [extra_nb-------------default_nb+extra_nb] - they overlap from extra_nb to default_nb - rows is inserted into partition 0 - new_rows is upserted into partition 0 & 1 - """ - # step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: insert data into partitions - num_of_partitions = 2 - partition_names = [] - for _ in range(num_of_partitions): - partition_name = cf.gen_unique_str("partition") - self.create_partition(client, collection_name, partition_name) - partition_names.append(partition_name) - - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.insert(client, collection_name, rows, partition_name=partition_names[0]) - - # step 3: partial update data in the partition - extra_nb = default_nb // num_of_partitions - new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, start=extra_nb) - - for partition_name in partition_names: - self.upsert(client, collection_name, new_rows, partition_name=partition_name, partial_update=True) - result = self.query(client, collection_name, - check_task=CheckTasks.check_query_results, - partition_names=[partition_name], - filter=f"{default_primary_key_field_name} >= {extra_nb}", - check_items={exp_res: new_rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - result =self.delete(client, collection_name, partition_names=[partition_name], - filter=f"{default_primary_key_field_name} >= 0")[0] - if partition_name == partition_names[0]: - assert result["delete_count"] == default_nb + extra_nb - else: - assert result["delete_count"] == default_nb - - self.drop_collection(client, collection_name) - - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_insert_delete_upsert(self): - """ - target: Test PU will success and query will success - method: - 1. Create a collection - 2. Insert rows - 3. Delete the rows - 4. Upsert the rows - expected: Step 2,3,4 should success - """ - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: Insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.insert(client, collection_name, rows) - - # step 3: Delete the rows - delete_result = self.delete(client, collection_name, filter=default_search_exp)[0] - query_result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_nothing)[0] - - # step 4: Upsert the rows - self.upsert(client, collection_name, new_rows, partial_update=True) - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - check_items={exp_res: new_rows, - "pk_name": default_primary_key_field_name})[0] - - assert delete_result["delete_count"] == default_nb - assert len(query_result) == 0 - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_insert_delete_upsert_with_flush(self): - """ - target: Test PU will success and query will success - method: - 1. Create a collection - 2. Insert rows - 3. Delete the 1/2 rows and flush - 4. Upsert the default_nbrows and flush - 5. query the rows - expected: Step 2-5 should success - """ - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: Insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.insert(client, collection_name, rows) - - # step 3: Delete the rows and flush - delete_result = self.delete(client, collection_name, - filter=f"{default_primary_key_field_name} < {default_nb//2}")[0] - self.flush(client, collection_name) - query_result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_nothing)[0] - - # step 4: Upsert the rows and flush - self.upsert(client, collection_name, new_rows, partial_update=True) - self.flush(client, collection_name) - - # step 5: query the rows - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - check_items={exp_res: new_rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - - assert delete_result["delete_count"] == default_nb//2 - assert len(query_result) == default_nb//2 - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_insert_upsert_delete_upsert_flush(self): - """ - target: Test PU will success and query will success - method: - 1. Create a collection - 2. Insert rows - 3. Delete the rows and upsert new rows, immediate flush - 4. Query the rows - expected: Step 2-4 should success - """ - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: Insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - partial_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=[default_primary_key_field_name, default_int32_field_name]) + # Insert binary data + rows = cf.gen_row_data_by_schema(nb=ct.default_nb, schema=schema) self.insert(client, collection_name, rows) - # step 3: partial update rows then delete 1/2 rows and upsert new rows, flush - self.upsert(client, collection_name, partial_rows, partial_update=True) - delete_result = self.delete(client, collection_name, - filter=f"{default_primary_key_field_name} < {default_nb//2}")[0] - self.upsert(client, collection_name, new_rows, partial_update=True) self.flush(client, collection_name) - - # step 4: Query the rows - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - check_items={exp_res: new_rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - - assert delete_result["delete_count"] == default_nb//2 - assert len(result) == default_nb - + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_insert_upsert_flush_delete_upsert_flush(self): + def test_insert_auto_id_create_index(self): """ - target: Test PU will success and query will success - method: - 1. Create a collection - 2. Insert rows - 3. Upsert the rows - 4. Delete the rows - 5. Upsert the rows - 6. Flush the collection - 7. Query the rows - expected: Step 2-7 should success + target: test create index in auto_id=True collection + method: 1.create auto_id=True collection and insert + 2.create index + expected: index correct """ client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) + collection_name = cf.gen_collection_name_by_testcase_name() - # step 2: Insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - partial_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=[default_primary_key_field_name, default_int32_field_name]) - new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + # Create schema with auto_id=True + schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=True) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_float_field_name, DataType.FLOAT) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length) + + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, + index_params=index_params, auto_id=True) + + # Insert without primary key (auto_id) + rng = np.random.default_rng(seed=19530) + rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + results = self.insert(client, collection_name, rows)[0] + assert results['insert_count'] == ct.default_nb + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb + + indexes = self.list_indexes(client, collection_name)[0] + assert default_vector_field_name in indexes + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_insert_auto_id_true(self, pk_field): + """ + target: test insert ids fields values when auto_id=True + method: 1.create collection with auto_id=True 2.insert without ids + expected: verify primary_keys and num_entities + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # Create schema with auto_id=True and specific primary field + schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0] + if pk_field == ct.default_int64_field_name: + schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True) + else: + schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_float_field_name, DataType.FLOAT) + if pk_field != ct.default_string_field_name: + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length) + + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True) + + # Insert without primary key (auto_id) + rng = np.random.default_rng(seed=19530) + rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0} for i in range(ct.default_nb)] + if pk_field != ct.default_string_field_name: + for i, row in enumerate(rows): + row[default_string_field_name] = str(i) + + results = self.insert(client, collection_name, rows)[0] + assert results['insert_count'] == ct.default_nb + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_twice_auto_id_true(self, pk_field): + """ + target: test insert ids fields twice when auto_id=True + method: 1.create collection with auto_id=True 2.insert twice + expected: verify primary_keys unique + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + nb = 10 + + # Create schema with auto_id=True and specific primary field + schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0] + if pk_field == ct.default_int64_field_name: + schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True) + else: + schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_float_field_name, DataType.FLOAT) + if pk_field != ct.default_string_field_name: + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length) + + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True) + + # Insert twice + rng = np.random.default_rng(seed=19530) + rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0} for i in range(nb)] + if pk_field != ct.default_string_field_name: + for i, row in enumerate(rows): + row[default_string_field_name] = str(i) + + results_1 = self.insert(client, collection_name, rows)[0] + assert results_1['insert_count'] == nb + + results_2 = self.insert(client, collection_name, rows)[0] + assert results_2['insert_count'] == nb + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == nb * 2 + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_insert_auto_id_true_list_data(self, pk_field): + """ + target: test insert ids fields values when auto_id=True + method: 1.create collection with auto_id=True 2.insert list data with ids field values + expected: assert num entities + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + + # Create schema with auto_id=True and specific primary field + schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0] + if pk_field == ct.default_int64_field_name: + schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True) + else: + schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_float_field_name, DataType.FLOAT) + if pk_field != ct.default_string_field_name: + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length) + + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True) + + # Insert without primary key (auto_id) + rng = np.random.default_rng(seed=19530) + rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0} for i in range(ct.default_nb)] + if pk_field != ct.default_string_field_name: + for i, row in enumerate(rows): + row[default_string_field_name] = str(i) + + results = self.insert(client, collection_name, rows)[0] + assert results['insert_count'] == ct.default_nb + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_auto_id_true_with_dataframe_values(self, pk_field): + """ + target: test insert with dataframe data + method: create collection with auto_id=True + expected: milvus client does not support insert with dataframe + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + + # Create schema with auto_id=True + schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0] + if pk_field == ct.default_int64_field_name: + schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True) + else: + schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_float_field_name, DataType.FLOAT) + if pk_field != ct.default_string_field_name: + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length) + + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True) + + # Try to insert with primary key included (should fail) + df = cf.gen_default_dataframe_data(nb=100, auto_id=True) + error = {ct.err_code: 999, + ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict', got 'DataFrame'"} + self.insert(client, collection_name, df, check_task=CheckTasks.err_res, check_items=error) + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == 0 + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_insert_auto_id_true_with_list_values(self, pk_field): + """ + target: test insert with auto_id=True + method: create collection with auto_id=True + expected: 1.verify num entities 2.verify ids + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + nb = 100 + + # Create schema with auto_id=True + schema = self.create_schema(client, auto_id=True, enable_dynamic_field=True)[0] + if pk_field == ct.default_int64_field_name: + schema.add_field(pk_field, DataType.INT64, is_primary=True, auto_id=True) + else: + schema.add_field(pk_field, DataType.VARCHAR, max_length=ct.default_length, is_primary=True, auto_id=True) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_float_field_name, DataType.FLOAT) + if pk_field != ct.default_string_field_name: + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length) + + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=True) + + # Insert without primary key (auto_id) + rng = np.random.default_rng(seed=19530) + rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0} for i in range(nb)] + if pk_field != ct.default_string_field_name: + for i, row in enumerate(rows): + row[default_string_field_name] = str(i) + + self.insert(client, collection_name, rows) + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_auto_id_false_same_values(self): + """ + target: test insert same ids with auto_id false + method: 1.create collection with auto_id=False 2.insert same int64 field values + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + nb = 100 + + self.create_collection(client, collection_name, default_dim, auto_id=False) + + # Insert with same primary key values + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: 1, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(nb)] + results = self.insert(client, collection_name, rows)[0] + assert results['insert_count'] == nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_auto_id_false_negative_values(self): + """ + target: test insert negative ids with auto_id false + method: auto_id=False, primary field values is negative + expected: verify num entities + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + nb = 100 + + self.create_collection(client, collection_name, default_dim, auto_id=False) + + # Insert with negative primary key values + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: -i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(nb)] + results = self.insert(client, collection_name, rows)[0] + assert results['insert_count'] == nb + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + # @pytest.mark.xfail(reason="issue 15416") + def test_insert_multi_threading(self): + """ + target: test concurrent insert + method: multi threads insert + expected: verify num entities + """ + import threading + + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + + thread_num = 4 + threads = [] + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(ct.default_nb)] + + def insert(thread_i): + log.debug(f'In thread-{thread_i}') + # Adjust primary keys to be unique per thread + thread_rows = [{default_primary_key_field_name: i + thread_i * ct.default_nb, + default_vector_field_name: row[default_vector_field_name], + default_float_field_name: row[default_float_field_name], + default_string_field_name: row[default_string_field_name]} for i, row in enumerate(rows)] + results = self.insert(client, collection_name, thread_rows)[0] + assert results['insert_count'] == ct.default_nb + + for i in range(thread_num): + x = threading.Thread(target=insert, args=(i,)) + threads.append(x) + x.start() + for t in threads: + t.join() + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb * thread_num + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_multi_times(self, dim): + """ + target: test insert multi times + method: insert data multi times + expected: verify num entities + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + step = 120 + nb = 12000 + + self.create_collection(client, collection_name, dim, auto_id=False) + + rng = np.random.default_rng(seed=19530) + start_id = 0 + for _ in range(nb // step): + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} + for i in range(start_id, start_id + step)] + results = self.insert(client, collection_name, rows)[0] + assert results['insert_count'] == step + start_id += step + + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_insert_all_datatype_collection(self): + """ + target: test insert into collection that contains all datatype fields + method: 1.create all datatype collection 2.insert data + expected: verify num entities + """ + # MilvusClient doesn't support construct_from_dataframe, skip this test + # or reimplement using schema with all data types + pytest.skip("MilvusClient doesn't support construct_from_dataframe") + + @pytest.mark.tags(CaseLabel.L2) + def test_insert_equal_to_resource_limit(self): + """ + target: test insert data equal to RPC limitation 64MB (67108864) + method: calculated critical value and insert equivalent data + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # nb = 127583 without json field + nb = 108993 + + self.create_collection(client, collection_name, default_dim, auto_id=False) + + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(nb)] self.insert(client, collection_name, rows) - # step 3: Upsert the rows - upsert_result = self.upsert(client, collection_name, partial_rows, partial_update=True)[0] self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == nb + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("nullable", [True, False]) + @pytest.mark.parametrize("default_value_type", ["empty", "none"]) + def test_insert_one_field_using_default_value(self, default_value_type, nullable, auto_id): + """ + target: test insert with one field using default value + method: 1. create a collection with one field using default value + 2. insert using default value to replace the field value []/[None] + expected: insert successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() - # step 4: Delete the rows - delete_result = self.delete(client, collection_name, - filter=f"{default_primary_key_field_name} < {default_nb//2}")[0] - self.upsert(client, collection_name, new_rows, partial_update=True) + # Create schema with default value field + schema = self.create_schema(client, auto_id=auto_id, enable_dynamic_field=False)[0] + if not auto_id: + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + else: + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=True) + schema.add_field(default_float_field_name, DataType.FLOAT) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=ct.default_length, + default_value="abc", nullable=nullable) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - # step 5: Flush the collection + self.create_collection(client, collection_name, dimension=default_dim, schema=schema, auto_id=auto_id) + + # Insert data with None or omitting the default value field + rng = np.random.default_rng(seed=19530) + rows = [] + for i in range(ct.default_nb): + row = {default_float_field_name: float(i), + default_vector_field_name: list(rng.random((1, default_dim))[0])} + if not auto_id: + row[default_primary_key_field_name] = i + if default_value_type == "none": + row[default_string_field_name] = None + # If default_value_type == "empty", we don't include the field at all + rows.append(row) + + self.insert(client, collection_name, rows) self.flush(client, collection_name) - - # step 6: Query the rows - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - check_items={exp_res: new_rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - - assert upsert_result["upsert_count"] == default_nb - assert delete_result["delete_count"] == default_nb//2 - assert len(result) == default_nb - + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == ct.default_nb self.drop_collection(client, collection_name) - """ - ****************************************************************** - # The following are valid cases for nullable fields - ****************************************************************** - """ - @pytest.mark.tags(CaseLabel.L0) - def test_milvus_client_partial_update_nullable_field(self): + @pytest.mark.tags(CaseLabel.L1) + def test_insert_multi_fields_none_with_default_value(self): """ - Target: test PU without nullable field, the field will keep its value - Method: - 1. Create collection, enable nullable fields. - 2. Insert a row while assigning a value to nullable field (using partial update) - 3. PU nullable field and other fields - Expected: values should be updated + target: test insert with multi fields include array using none value + method: 1. create a collection with multi fields using default value + 2. insert using none value to replace the field value + expected: insert successfully """ - # Step 1: create collection with nullable fields client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + collection_name = cf.gen_collection_name_by_testcase_name() + schema = self.create_schema(client)[0] + dim = 16 + nb = 100 + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True) + schema.add_field(default_int32_field_name, DataType.INT32, default_value=np.int32(1), nullable=True) + schema.add_field(default_float_field_name, DataType.FLOAT, default_value=np.float32(1.0), nullable=True) + schema.add_field(default_string_field_name, DataType.VARCHAR, default_value="abc", max_length=100, nullable=True) + schema.add_field('int32_array', datatype=DataType.ARRAY, element_type=DataType.INT32, max_capacity=20, nullable=True) + schema.add_field('float_array', datatype=DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=20, nullable=True) + schema.add_field('string_array', datatype=DataType.ARRAY, element_type=DataType.VARCHAR, max_capacity=20, max_length=100, nullable=True) + schema.add_field('json', DataType.JSON, nullable=True) + schema.add_field(default_float_vec_field_name, DataType.FLOAT_VECTOR, dim=dim) + self.create_collection(client, collection_name, schema=schema) + + rows = [{ + default_primary_key_field_name: i, + default_int32_field_name: None, + default_float_field_name: None, + default_string_field_name: None, + 'int32_array': None, + 'float_array': None, + 'string_array': None, + 'json': None, + default_float_vec_field_name: cf.gen_vectors(1, dim=dim)[0] + } for i in range(nb)] + self.insert(client, collection_name, rows) + self.flush(client, collection_name) + num_entities = self.get_collection_stats(client, collection_name)[0] + assert num_entities.get("row_count", None) == nb + + # build index and load index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) + index_params.add_index(default_float_vec_field_name, metric_type="L2") + self.create_index(client, collection_name, index_params) + self.load_collection(client, collection_name) - # Step 2: insert a row while assigning a value to nullable field - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) - self.upsert(client, collection_name, rows, partial_update=True) + # try to query None value entities, should be empty + res, _ = self.query(client, collection_name, filter=f"{default_string_field_name} is null") + assert len(res) == 0 - # Step 3: PU other fields - # Even index: update int32 field to new value - # Odd index: update vector field to random value - # also update rows to keep track of changes so we can query the result - new_value = np.int32(99) - vector_rows = [] - int32_rows = [] - for i, row in enumerate(rows): - if i % 2 == 0: - int32_rows.append({default_primary_key_field_name: row[default_primary_key_field_name], - default_int32_field_name: new_value}) - rows[i][default_int32_field_name] = new_value - else: - new_vector = [random.random() for _ in range(default_dim)] - vector_rows.append({default_primary_key_field_name: row[default_primary_key_field_name], - default_vector_field_name: new_vector}) - rows[i][default_vector_field_name] = new_vector - rows[i][default_int32_field_name] = None + # try to query default value entities, should be not empty + res, _ = self.query(client, collection_name, filter=f"{default_string_field_name}=='abc'") + assert len(res) == nb - self.upsert(client, collection_name, int32_rows, partial_update=True) - self.upsert(client, collection_name, vector_rows, partial_update=True) - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_vector_field_name, default_int32_field_name], - check_items={exp_res: rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb + # try to query None value entities on json field, should not be empty + res, _ = self.query(client, collection_name, filter=f"json is null") + assert len(res) == nb + + res, _ = self.query(client, collection_name, filter=f"int32_array is null") + assert len(res) == nb self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_null_to_value(self): - """ - Target: test PU can successfully update null to a value - Method: - 1. Create a collection, enable nullable fields init null - 2. Partial update nullable field - 3. Query null field - Expected: Nullfield should have the same value as updated - """ - # step 1: create collection with nullable fields init null - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 2: Partial update nullable field - new_value = np.int32(99) - new_rows = [{default_primary_key_field_name: row[default_primary_key_field_name], - default_int32_field_name: new_value} for row in rows] - self.upsert(client, collection_name, new_rows, partial_update=True) - - # step 3: Query null field - #self.load_collection(client, collection_name) - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_rows, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_default_to_value(self): - """ - Target: test PU can successfully update a default to a value - Method: - 1. Create a collection, enable nullable fields init default value - 2. Partial update nullable field - 3. Query null field - Expected: Nullfield should have the same value as updated - """ - # step 1: create collection with nullable fields init default value - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True, default_value=default_int32_value) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 2: Partial update nullable field - new_value = 99 - new_row = [{default_primary_key_field_name: i, - default_int32_field_name: new_value} for i in range(default_nb)] - self.upsert(client, collection_name, new_row, partial_update=True) - - # step 3: Query null field - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_value_to_null(self): - """ - Target: test PU can successfully update a value to null - Method: - 1. Create a collection, enable nullable fields init value - 2. Partial update nullable field - 3. Query null field - Expected: Nullfield should have the same value as updated - """ - # step 1: create collection with nullable fields init value - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 2: Partial update nullable field - new_value = None - new_row = [{default_primary_key_field_name: i, - default_int32_field_name: new_value} for i in range(default_nb)] - self.upsert(client, collection_name, new_row, partial_update=True) - - # step 3: Query null field - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_null_to_null(self): - """ - Target: test PU can successfully update a null to null - Method: - 1. Create a collection, enable nullable fields - 2. Insert default_nb rows to the collection - 3. Partial Update the nullable field with null - 4. Query the collection to check the value of nullable field - Expected: query should have correct value and number of entities - """ - # step 1: create collection with nullable fields - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: insert default_nb rows to the collection - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 3: Partial Update the nullable field with null - new_value = None - new_row = [{default_primary_key_field_name: i, - default_int32_field_name: new_value} for i in range(default_nb)] - self.upsert(client, collection_name, new_row, partial_update=True) - - # step 4: Query the collection to check the value of nullable field - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_various_value_to_nullable_field(self): - """ - Target: test PU can successfully update various value to a nullable field - Method: - 1. Create a collection, enable nullable fields - 2. Insert default_nb rows to the collection - 3. Partial Update the nullable field with various value - 4. Query the collection to check the value of nullable field - Expected: query should have correct value - """ - # step 1: create collection with nullable fields - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: insert default_nb rows to the collection - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 3: Partial Update the nullable field with various value - new_value = 99 - new_row = [{default_primary_key_field_name: i, - default_int32_field_name: new_value if i % 2 == 0 else None} - for i in range(default_nb)] - self.upsert(client, collection_name, new_row, partial_update=True) - - # step 4: Query the collection to check the value of nullable field - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_filter_by_null(self): - """ - target: Test PU will success and query will success - method: - 1. Create a collection - 2. partial upsert data with nullable field - 3. Query the collection with filter by nullable field - 4. partial update nullable field back to null - 5. Query the collection with filter by nullable field - expected: Step 2,3,4,5 should success - """ - # step 1: create collection with nullable fields - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: partial upsert data with nullable field - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) - self.upsert(client, collection_name, rows, partial_update=True) - result = self.query(client, collection_name, filter=f"{default_int32_field_name} IS NULL", - check_task=CheckTasks.check_query_results, - output_fields=[default_vector_field_name], - check_items={exp_res: rows, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - # update first half of the dataset with nullable field value - new_value = np.int32(99) - new_row = [{default_primary_key_field_name: i, - default_int32_field_name: new_value} for i in range(default_nb//2)] - self.upsert(client, collection_name, new_row, partial_update=True) - - # step 3: Query the collection with filter by nullable field - result = self.query(client, collection_name, filter=f"{default_int32_field_name} IS NOT NULL", - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb//2 - # query with == filter - result = self.query(client, collection_name, filter=f"{default_int32_field_name} == {new_value}", - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb//2 - - # step 4: partial update nullable field back to null - null_row = [{default_primary_key_field_name: i, - default_int32_field_name: None} for i in range(default_nb)] - self.upsert(client, collection_name, null_row, partial_update=True) - - # step 5: Query the collection with filter by nullable field - result = self.query(client, collection_name, filter=f"{default_int32_field_name} IS NULL", - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: null_row, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - self.drop_collection(client, collection_name) - - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_same_pk_same_field(self): - """ - target: Test PU will success and query will success - method: - 1. Create a collection - 2. Insert rows - 3. Upsert the rows with same pk and same field - 4. Query the rows - 5. Upsert the rows with same pk and different field - expected: Step 2 -> 4 should success 5 should fail - """ - # step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: Insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 3: Upsert the rows with same pk and same field - new_rows = [{default_primary_key_field_name: 0, - default_int32_field_name: i} for i in range(default_nb)] - self.upsert(client, collection_name, new_rows, partial_update=True) - - # step 4: Query the rows - result = self.query(client, collection_name, filter=f"{default_primary_key_field_name} == 0", - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: [new_rows[-1]], - "pk_name": default_primary_key_field_name})[0] - assert len(result) == 1 - - self.drop_collection(client, collection_name) - - -class TestMilvusClientPartialUpdateInvalid(TestMilvusClientV2Base): - """ Test case of partial update interface """ - @pytest.fixture(scope="function", params=[False, True]) - def auto_id(self, request): - yield request.param - - @pytest.fixture(scope="function", params=["COSINE", "L2"]) - def metric_type(self, request): - yield request.param - - """ - ****************************************************************** - # The following are invalid base cases - ****************************************************************** - """ - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_new_pk_with_missing_field(self): - """ - target: Test PU will return error when provided new pk and partial field - method: - 1. Create a collection - 2. partial upsert a new pk with only partial field - expected: Step 2 should result fail - """ - # step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: partial upsert a new pk with only partial field - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=[default_primary_key_field_name, default_int32_field_name]) - error = {ct.err_code: 1100, ct.err_msg: - f"fieldSchema({default_vector_field_name}) has no corresponding fieldData pass in: invalid parameter"} - self.upsert(client, collection_name, rows, partial_update=True, - check_task=CheckTasks.err_res, check_items=error) - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_new_field_without_dynamic_field(self): - """ - target: Test PU will return error when provided new field without dynamic field - method: - 1. Create a collection with dynamic field - 2. partial upsert a new field - expected: Step 2 should result fail - """ - # step 1: create collection with dynamic field - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: partial upsert a new field - row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, row, partial_update=True) - - new_row = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)] - error = {ct.err_code: 1, - ct.err_msg: f"Attempt to insert an unexpected field `{default_int32_field_name}` to collection without enabling dynamic field"} - self.upsert(client, collection_name, new_row, partial_update=True, check_task=CheckTasks.err_res, check_items=error) - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_after_release_collection(self): - """ - target: test basic function of partial update - method: - 1. create collection - 2. insert a full row of data using partial update - 3. partial update data - 4. release collection - 5. partial update data - expected: step 5 should fail - """ - # Step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_string_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # Step 2: insert a full row of data using partial update - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - - # Step 3: partial update data - new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=[default_primary_key_field_name, default_string_field_name]) - self.upsert(client, collection_name, new_row, partial_update=True) - - # Step 4: release collection - self.release_collection(client, collection_name) - - # Step 5: partial update data - new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=[default_primary_key_field_name, default_string_field_name]) - error = {ct.err_code: 101, - ct.err_msg: f"failed to query: collection not loaded"} - self.upsert(client, collection_name, new_row, partial_update=True, - check_task=CheckTasks.err_res, check_items=error) - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_same_pk_after_delete(self): - """ - target: test PU will fail when provided same pk and partial field - method: - 1. Create a collection with dynamic field - 2. Insert rows - 3. delete the rows - 4. upsert the rows with same pk and partial field - expected: step 4 should fail - """ - # Step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # Step 2: insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - - # Step 3: delete the rows - result = self.delete(client, collection_name, filter=default_search_exp)[0] - assert result["delete_count"] == default_nb - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_nothing)[0] - assert len(result) == 0 - - # Step 4: upsert the rows with same pk and partial field - new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, - desired_field_names=[default_primary_key_field_name, default_vector_field_name]) - error = {ct.err_code: 1100, - ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} - self.upsert(client, collection_name, new_rows, partial_update=True, - check_task=CheckTasks.err_res, check_items=error) - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_pk_in_wrong_partition(self): - """ - target: test PU will fail when provided pk in wrong partition - method: - 1. Create a collection - 2. Create 2 partitions - 3. Insert rows - 4. upsert the rows with pk in wrong partition - expected: step 4 should fail - """ - # Step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # Step 2: Create 2 partitions - num_of_partitions = 2 - partition_names = [] - for _ in range(num_of_partitions): - partition_name = cf.gen_unique_str("partition") - self.create_partition(client, collection_name, partition_name) - partition_names.append(partition_name) - - # Step 3: Insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - gap = default_nb // num_of_partitions - for i, partition in enumerate(partition_names): - self.upsert(client, collection_name, rows[i*gap:(i+1)*gap], partition_name=partition, partial_update=True) - - # Step 4: upsert the rows with pk in wrong partition - new_rows = cf.gen_row_data_by_schema(nb=gap, schema=schema, - desired_field_names=[default_primary_key_field_name, default_vector_field_name]) - error = {ct.err_code: 1100, - ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} - self.upsert(client, collection_name, new_rows, partition_name=partition_names[-1], partial_update=True, - check_task=CheckTasks.err_res, check_items=error) - - self.drop_collection(client, collection_name) - - @pytest.mark.tags(CaseLabel.L1) - def test_milvus_client_partial_update_same_pk_multiple_fields(self): - """ - target: Test PU will success and query will success - method: - 1. Create a collection - 2. Insert rows - 3. Upsert the rows with same pk and different field - expected: Step 3 should fail - """ - # step 1: create collection - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: Insert rows - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 3: Upsert the rows with same pk and different field - new_rows = [] - for i in range(default_nb): - data = {} - if i % 2 == 0: - data[default_int32_field_name] = i + 1000 - data[default_primary_key_field_name] = 0 - else: - data[default_vector_field_name] = [random.random() for _ in range(default_dim)] - data[default_primary_key_field_name] = 0 - new_rows.append(data) - - error = {ct.err_code: 1, - ct.err_msg: f"The data fields length is inconsistent. previous length is 2000, current length is 1000"} - self.upsert(client, collection_name, new_rows, partial_update=True, - check_task=CheckTasks.err_res, check_items=error) - - self.drop_collection(client, collection_name) \ No newline at end of file diff --git a/tests/python_client/milvus_client/test_milvus_client_json_path_index.py b/tests/python_client/milvus_client/test_milvus_client_json_path_index.py new file mode 100644 index 0000000000..229c2f1e62 --- /dev/null +++ b/tests/python_client/milvus_client/test_milvus_client_json_path_index.py @@ -0,0 +1,556 @@ +import pytest +import numpy as np + +from base.client_v2_base import TestMilvusClientV2Base +from utils.util_log import test_log as log +from common import common_func as cf +from common import common_type as ct +from common.common_type import CaseLabel, CheckTasks +from utils.util_pymilvus import * + +prefix = "client_insert" +epsilon = ct.epsilon +default_nb = ct.default_nb +default_nb_medium = ct.default_nb_medium +default_nq = ct.default_nq +default_dim = ct.default_dim +default_limit = ct.default_limit +default_search_exp = "id >= 0" +exp_res = "exp_res" +default_search_string_exp = "varchar >= \"0\"" +default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" +default_invaild_string_exp = "varchar >= 0" +default_json_search_exp = "json_field[\"number\"] >= 0" +perfix_expr = 'varchar like "0%"' +default_search_field = ct.default_float_vec_field_name +default_search_params = ct.default_search_params +default_primary_key_field_name = "id" +default_vector_field_name = "vector" +default_dynamic_field_name = "field_new" +default_float_field_name = ct.default_float_field_name +default_bool_field_name = ct.default_bool_field_name +default_string_field_name = ct.default_string_field_name +default_int32_array_field_name = ct.default_int32_array_field_name +default_string_array_field_name = ct.default_string_array_field_name +default_int32_field_name = ct.default_int32_field_name +default_int32_value = ct.default_int32_value + + +class TestMilvusClientInsertJsonPathIndexValid(TestMilvusClientV2Base): + """ Test case of insert interface """ + + @pytest.fixture(scope="function", params=["INVERTED"]) + def supported_varchar_scalar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["BOOL", "Double", "Varchar", "json"]) + def supported_json_cast_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_insert_before_json_path_index(self, enable_dynamic_field, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test insert and then create json path index + method: create json path index after insert + steps: 1. create schema + 2. create collection + 3. insert + 4. prepare json path index params with parameter "json_cast_type" and "json_path" + 5. create index + expected: insert and create json path index successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert with different data distribution + vectors = cf.gen_vectors(default_nb+50, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in + range(default_nb)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: i} for i in + range(default_nb, default_nb+10)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {}} for i in + range(default_nb+10, default_nb+20)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in + range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in + range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in + range(default_nb + 30, default_nb + 40)] + self.insert(client, collection_name, rows) + # 2. prepare index params + index_name = "json_index" + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '1', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '2', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '3', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '4', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]"}) + # 3. create index + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + self.describe_index(client, collection_name, index_name + '1', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '1'}) + self.describe_index(client, collection_name, index_name +'2', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '2'}) + self.describe_index(client, collection_name, index_name + '3', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '3'}) + self.describe_index(client, collection_name, index_name + '4', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '4'}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + def test_milvus_client_insert_after_json_path_index(self, enable_dynamic_field, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test insert after create json path index + method: create json path index after insert + steps: 1. create schema + 2. create all the index parameters including json path index + 3. create collection with schema and index params + 4. insert + 5. check the index + expected: insert successfully after create json path index + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection with schema and all the index parameters + json_field_name = "my_json" + schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + if not enable_dynamic_field: + schema.add_field(json_field_name, DataType.JSON) + index_name = "json_index" + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '1', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '2', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '3', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]['b']"}) + index_params.add_index(field_name=json_field_name, index_name=index_name + '4', + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]"}) + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. insert with different data distribution + vectors = cf.gen_vectors(default_nb+50, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in + range(default_nb)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: i} for i in + range(default_nb, default_nb+10)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {}} for i in + range(default_nb+10, default_nb+20)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in + range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in + range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in + range(default_nb + 30, default_nb + 40)] + self.insert(client, collection_name, rows) + # 3. check the json path index + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + self.describe_index(client, collection_name, index_name + '1', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '1'}) + self.describe_index(client, collection_name, index_name +'2', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '2'}) + self.describe_index(client, collection_name, index_name + '3', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '3'}) + self.describe_index(client, collection_name, index_name + '4', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '4'}) + + + """ Test case of partial update interface """ + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are invalid base cases + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_new_pk_with_missing_field(self): + """ + target: Test PU will return error when provided new pk and partial field + method: + 1. Create a collection + 2. partial upsert a new pk with only partial field + expected: Step 2 should result fail + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert a new pk with only partial field + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_int32_field_name]) + error = {ct.err_code: 1100, ct.err_msg: + f"fieldSchema({default_vector_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_new_field_without_dynamic_field(self): + """ + target: Test PU will return error when provided new field without dynamic field + method: + 1. Create a collection with dynamic field + 2. partial upsert a new field + expected: Step 2 should result fail + """ + # step 1: create collection with dynamic field + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert a new field + row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, row, partial_update=True) + + new_row = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)] + error = {ct.err_code: 1, + ct.err_msg: f"Attempt to insert an unexpected field `{default_int32_field_name}` to collection without enabling dynamic field"} + self.upsert(client, collection_name, new_row, partial_update=True, check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_after_release_collection(self): + """ + target: test basic function of partial update + method: + 1. create collection + 2. insert a full row of data using partial update + 3. partial update data + 4. release collection + 5. partial update data + expected: step 5 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_string_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert a full row of data using partial update + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # Step 3: partial update data + new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_string_field_name]) + self.upsert(client, collection_name, new_row, partial_update=True) + + # Step 4: release collection + self.release_collection(client, collection_name) + + # Step 5: partial update data + new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_string_field_name]) + error = {ct.err_code: 101, + ct.err_msg: f"failed to query: collection not loaded"} + self.upsert(client, collection_name, new_row, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_same_pk_after_delete(self): + """ + target: test PU will fail when provided same pk and partial field + method: + 1. Create a collection with dynamic field + 2. Insert rows + 3. delete the rows + 4. upsert the rows with same pk and partial field + expected: step 4 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # Step 3: delete the rows + result = self.delete(client, collection_name, filter=default_search_exp)[0] + assert result["delete_count"] == default_nb + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_nothing)[0] + assert len(result) == 0 + + # Step 4: upsert the rows with same pk and partial field + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_vector_field_name]) + error = {ct.err_code: 1100, + ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, new_rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_pk_in_wrong_partition(self): + """ + target: test PU will fail when provided pk in wrong partition + method: + 1. Create a collection + 2. Create 2 partitions + 3. Insert rows + 4. upsert the rows with pk in wrong partition + expected: step 4 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: Create 2 partitions + num_of_partitions = 2 + partition_names = [] + for _ in range(num_of_partitions): + partition_name = cf.gen_unique_str("partition") + self.create_partition(client, collection_name, partition_name) + partition_names.append(partition_name) + + # Step 3: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + gap = default_nb // num_of_partitions + for i, partition in enumerate(partition_names): + self.upsert(client, collection_name, rows[i*gap:(i+1)*gap], partition_name=partition, partial_update=True) + + # Step 4: upsert the rows with pk in wrong partition + new_rows = cf.gen_row_data_by_schema(nb=gap, schema=schema, + desired_field_names=[default_primary_key_field_name, default_vector_field_name]) + error = {ct.err_code: 1100, + ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, new_rows, partition_name=partition_names[-1], partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_same_pk_multiple_fields(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Upsert the rows with same pk and different field + expected: Step 3 should fail + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: Upsert the rows with same pk and different field + new_rows = [] + for i in range(default_nb): + data = {} + if i % 2 == 0: + data[default_int32_field_name] = i + 1000 + data[default_primary_key_field_name] = 0 + else: + data[default_vector_field_name] = [random.random() for _ in range(default_dim)] + data[default_primary_key_field_name] = 0 + new_rows.append(data) + + error = {ct.err_code: 1, + ct.err_msg: f"The data fields length is inconsistent. previous length is 2000, current length is 1000"} + self.upsert(client, collection_name, new_rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) \ No newline at end of file diff --git a/tests/python_client/milvus_client/test_milvus_client_partial_update.py b/tests/python_client/milvus_client/test_milvus_client_partial_update.py new file mode 100644 index 0000000000..d69d947fca --- /dev/null +++ b/tests/python_client/milvus_client/test_milvus_client_partial_update.py @@ -0,0 +1,1533 @@ +import pytest +import numpy as np + +from base.client_v2_base import TestMilvusClientV2Base +from utils.util_log import test_log as log +from common import common_func as cf +from common import common_type as ct +from common.common_type import CaseLabel, CheckTasks +from utils.util_pymilvus import * + +prefix = "client_insert" +epsilon = ct.epsilon +default_nb = ct.default_nb +default_nb_medium = ct.default_nb_medium +default_nq = ct.default_nq +default_dim = ct.default_dim +default_limit = ct.default_limit +default_search_exp = "id >= 0" +exp_res = "exp_res" +default_search_string_exp = "varchar >= \"0\"" +default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" +default_invaild_string_exp = "varchar >= 0" +default_json_search_exp = "json_field[\"number\"] >= 0" +perfix_expr = 'varchar like "0%"' +default_search_field = ct.default_float_vec_field_name +default_search_params = ct.default_search_params +default_primary_key_field_name = "id" +default_vector_field_name = "vector" +default_dynamic_field_name = "field_new" +default_float_field_name = ct.default_float_field_name +default_bool_field_name = ct.default_bool_field_name +default_string_field_name = ct.default_string_field_name +default_int32_array_field_name = ct.default_int32_array_field_name +default_string_array_field_name = ct.default_string_array_field_name +default_int32_field_name = ct.default_int32_field_name +default_int32_value = ct.default_int32_value + + +class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base): + """ Test case of partial update interface """ + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L0) + def test_milvus_client_partial_update(self): + """ + target: test basic function of partial update + method: + 1. create collection + 2. insert a full row of data using partial update + 3. partial update data + expected: both step 2 and 3 should be successful + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_string_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert full rows of data using partial update + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + # Step 3: partial update data + new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_string_field_name]) + self.upsert(client, collection_name, new_row, partial_update=True) + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_string_field_name], + check_items={exp_res: new_row, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_with_all_datatype(self): + """ + target: test partial update with all datatype + method: + 1. create collection with all datatype schema + 2. insert data + 3. partial update data + expected: both step 2 and 3 should be successful + """ + # step 1: create collection with all datatype schema + client = self._client() + schema = cf.gen_all_datatype_collection_schema(dim=default_dim) + index_params = self.prepare_index_params(client)[0] + text_sparse_emb_field_name = "text_sparse_emb" + + for i in range(len(schema.fields)): + field_name = schema.fields[i].name + if field_name == "json_field": + index_params.add_index(field_name, index_type="AUTOINDEX", + params={"json_cast_type": "json"}) + elif field_name == text_sparse_emb_field_name: + index_params.add_index(field_name, index_type="AUTOINDEX", metric_type="BM25") + else: + index_params.add_index(field_name, index_type="AUTOINDEX") + + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: insert data + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: partial update data + for field in schema.fields: + if field.is_primary: + primary_key_field_name = field.name + break + + vector_field_type = [DataType.FLOAT16_VECTOR, + DataType.BFLOAT16_VECTOR, + DataType.INT8_VECTOR] + # fields to be updated + update_fields_name = [] + scalar_update_name = [] + vector_update = [] # this stores field object + for field in schema.fields: + field_name = field.name + if field_name != text_sparse_emb_field_name: + update_fields_name.append(field_name) + if field.dtype not in vector_field_type: + scalar_update_name.append(field_name) + else: + vector_update.append(field) + + # PU scalar fields and vector fields together + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=update_fields_name) + self.upsert(client, collection_name, new_rows, partial_update=True) + # expected scalar result + expected = [{field: new_rows[i][field] for field in scalar_update_name} + for i in range(default_nb)] + + result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0", + check_task=CheckTasks.check_query_results, + output_fields=scalar_update_name, + check_items={exp_res: expected, + "with_vec": True, + "pk_name": primary_key_field_name})[0] + assert len(result) == default_nb + + # expected vector result + for field in vector_update: + expected = [{primary_key_field_name: data[primary_key_field_name], + field.name: data[field.name]} for data in new_rows] + result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0", + check_task=CheckTasks.check_query_results, + output_fields=[field.name], + check_items={exp_res: expected, + "with_vec": True, + "vector_type": field.dtype, + "vector_field": field.name, + "pk_name": primary_key_field_name})[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L0) + def test_partial_update_all_field_types_one_by_one(self): + """ + Test partial update functionality with all field types + 1. Create collection with all data types + 2. Insert initial data + 3. Perform partial update for each field type + 4. Verify all updates work correctly + """ + client = self._client() + dim = 64 + collection_name = cf.gen_collection_name_by_testcase_name() + + # Create schema with all data types + schema = cf.gen_all_datatype_collection_schema(dim=dim) + + # Create index parameters + index_params = client.prepare_index_params() + for i in range(len(schema.fields)): + field_name = schema.fields[i].name + print(f"field_name: {field_name}") + if field_name == "json_field": + index_params.add_index(field_name, index_type="AUTOINDEX", + params={"json_cast_type": "json"}) + elif field_name == "text_sparse_emb": + index_params.add_index(field_name, index_type="AUTOINDEX", metric_type="BM25") + else: + index_params.add_index(field_name, index_type="AUTOINDEX") + + # Create collection + client.create_collection(collection_name, default_dim, consistency_level="Strong", schema=schema, index_params=index_params) + + # Load collection + self.load_collection(client, collection_name) + + # Insert initial data + nb = 1000 + rows = cf.gen_row_data_by_schema(nb=nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + log.info(f"Inserted {nb} initial records") + + primary_key_field_name = schema.fields[0].name + for i in range(len(schema.fields)): + update_field_name = schema.fields[i if i != 0 else 1].name + new_row = cf.gen_partial_row_data_by_schema(nb=nb, schema=schema, + desired_field_names=[primary_key_field_name, update_field_name]) + client.upsert(collection_name, new_row, partial_update=True) + + log.info("Partial update test for all field types passed successfully") + + @pytest.mark.tags(CaseLabel.L0) + def test_partial_update_simple_demo(self): + """ + Test simple partial update demo with nullable fields + 1. Create collection with explicit schema including nullable fields + 2. Insert initial data with some null values + 3. Perform partial updates with different field combinations + 4. Verify partial update behavior preserves unchanged fields + """ + client = self._client() + dim = 3 + collection_name = cf.gen_collection_name_by_testcase_name() + + # Create schema with nullable fields + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False) + schema.add_field("vector", DataType.FLOAT_VECTOR, dim=dim) + schema.add_field("name", DataType.VARCHAR, max_length=100, nullable=True) + schema.add_field("price", DataType.FLOAT, nullable=True) + schema.add_field("category", DataType.VARCHAR, max_length=50, nullable=True) + + # Create collection + self.create_collection(client, collection_name, schema=schema) + + # Create index + index_params = self.prepare_index_params(client)[0] + index_params.add_index("vector", index_type="AUTOINDEX", metric_type="L2") + self.create_index(client, collection_name, index_params=index_params) + + # Load collection + self.load_collection(client, collection_name) + + # Insert initial data with some null values + initial_data = [ + { + "id": 1, + "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), + "name": "Product A", + "price": 100.0, + "category": "Electronics" + }, + { + "id": 2, + "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), + "name": "Product B", + "price": None, # Null price + "category": "Home" + }, + { + "id": 3, + "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), + "name": "Product C", + "price": None, # Null price + "category": "Books" + } + ] + + self.upsert(client, collection_name, initial_data, partial_update=False) + log.info("Inserted initial data with null values") + + # Verify initial state + results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0] + assert len(results) == 3 + + initial_data_map = {data['id']: data for data in results} + assert initial_data_map[1]['name'] == "Product A" + assert initial_data_map[1]['price'] == 100.0 + assert initial_data_map[1]['category'] == "Electronics" + assert initial_data_map[2]['name'] == "Product B" + assert initial_data_map[2]['price'] is None + assert initial_data_map[2]['category'] == "Home" + assert initial_data_map[3]['name'] == "Product C" + assert initial_data_map[3]['price'] is None + assert initial_data_map[3]['category'] == "Books" + + log.info("Initial data verification passed") + + # First partial update - update all fields + log.info("First partial update - updating all fields...") + first_update_data = [ + { + "id": 1, + "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), + "name": "Product A-Update", + "price": 111.1, + "category": "Electronics-Update" + }, + { + "id": 2, + "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), + "name": "Product B-Update", + "price": 222.2, + "category": "Home-Update" + }, + { + "id": 3, + "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), + "name": "Product C-Update", + "price": None, # Still null + "category": "Books-Update" + } + ] + + self.upsert(client, collection_name, first_update_data, partial_update=True) + + # Verify first update + results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0] + assert len(results) == 3 + + first_update_map = {data['id']: data for data in results} + assert first_update_map[1]['name'] == "Product A-Update" + assert abs(first_update_map[1]['price'] - 111.1) < 0.001 + assert first_update_map[1]['category'] == "Electronics-Update" + assert first_update_map[2]['name'] == "Product B-Update" + assert abs(first_update_map[2]['price'] - 222.2) < 0.001 + assert first_update_map[2]['category'] == "Home-Update" + assert first_update_map[3]['name'] == "Product C-Update" + assert first_update_map[3]['price'] is None + assert first_update_map[3]['category'] == "Books-Update" + + log.info("First partial update verification passed") + + # Second partial update - update only specific fields + log.info("Second partial update - updating specific fields...") + second_update_data = [ + { + "id": 1, + "name": "Product A-Update-Again", + "price": 1111.1, + "category": "Electronics-Update-Again" + }, + { + "id": 2, + "name": "Product B-Update-Again", + "price": None, # Set back to null + "category": "Home-Update-Again" + }, + { + "id": 3, + "name": "Product C-Update-Again", + "price": 3333.3, # Set price from null to value + "category": "Books-Update-Again" + } + ] + + self.upsert(client, collection_name, second_update_data, partial_update=True) + + # Verify second update + results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0] + assert len(results) == 3 + + second_update_map = {data['id']: data for data in results} + + # Verify ID 1: all fields updated + assert second_update_map[1]['name'] == "Product A-Update-Again" + assert abs(second_update_map[1]['price'] - 1111.1) < 0.001 + assert second_update_map[1]['category'] == "Electronics-Update-Again" + + # Verify ID 2: all fields updated, price set to null + assert second_update_map[2]['name'] == "Product B-Update-Again" + assert second_update_map[2]['price'] is None + assert second_update_map[2]['category'] == "Home-Update-Again" + + # Verify ID 3: all fields updated, price set from null to value + assert second_update_map[3]['name'] == "Product C-Update-Again" + assert abs(second_update_map[3]['price'] - 3333.3) < 0.001 + assert second_update_map[3]['category'] == "Books-Update-Again" + + # Verify vector fields were preserved from first update (not updated in second update) + # Note: Vector comparison might be complex, so we just verify they exist + assert 'vector' in second_update_map[1] + assert 'vector' in second_update_map[2] + assert 'vector' in second_update_map[3] + + log.info("Second partial update verification passed") + log.info("Simple partial update demo test completed successfully") + + @pytest.mark.tags(CaseLabel.L0) + def test_milvus_client_partial_update_null_to_null(self): + """ + Target: test PU can successfully update a null to null + Method: + 1. Create a collection, enable nullable fields + 2. Insert default_nb rows to the collection + 3. Partial Update the nullable field with null + 4. Query the collection to check the value of nullable field + Expected: query should have correct value and number of entities + """ + # step 1: create collection with nullable fields + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: insert default_nb rows to the collection + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: Partial Update the nullable field with null + new_row = cf.gen_partial_row_data_by_schema( + nb=default_nb, + schema=schema, + desired_field_names=[default_primary_key_field_name, default_int32_field_name], + start=0 + ) + + # Set the nullable field to None + for data in new_row: + data[default_int32_field_name] = None + + self.upsert(client, collection_name, new_row, partial_update=True) + + # step 4: Query the collection to check the value of nullable field + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + # Verify that all nullable fields are indeed null + for data in result: + assert data[default_int32_field_name] is None, f"Expected null value for {default_int32_field_name}, got {data[default_int32_field_name]}" + + log.info("Partial update null to null test completed successfully") + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_new_field_with_dynamic_field(self): + """ + target: Test PU will success when provided empty data + method: + 1. Create a collection + 2. partial upsert new field + expected: Step 2 should result success + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=True)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert new field + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + new_rows = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)] + self.upsert(client, collection_name, new_rows, partial_update=True) + + self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + + self.drop_collection(client, collection_name) + + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_partition(self): + """ + target: test PU can successfully update data in a partition + method: + 1. Create a collection + 2. Insert data into a partition + 3. Partial update data in the partition + expected: Step 3 should result success + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: insert data into a partition + num_of_partitions = 10 + partition_names = [] + for _ in range(num_of_partitions): + partition_name = cf.gen_unique_str("partition") + self.create_partition(client, collection_name, partition_name) + partition_names.append(partition_name) + + # step 3: insert data into a partition + # partition 0: 0, 1, 2, ..., 199 + # partition 1: 200, 201, 202, ..., 399 + # partition 2: 400, 401, 402, ..., 599 + gap = default_nb // num_of_partitions # 200 + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + for i, partition in enumerate(partition_names): + self.upsert(client, collection_name, rows[i*gap:i*gap+gap], partition_name=partition, partial_update=True) + + # step 4: partial update data in the partition + # i*200+i = 0, 201, 402, 603, ..., 1809 + new_value = np.int32(99) + for i, partition_name in enumerate(partition_names): + new_row = [{default_primary_key_field_name: i*gap+i, default_int32_field_name: new_value}] + self.upsert(client, collection_name, new_row, partition_name=partition_name, partial_update=True) + self.query(client, collection_name, + check_task=CheckTasks.check_query_results, + partition_names=[partition_name], + ids = [i*gap+i], + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "with_vec": True, + "pk_name": default_primary_key_field_name}) + + result = self.query(client, collection_name, filter=default_search_exp)[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_partition_insert_update(self): + """ + target: test PU can successfully update data in a partition and insert data into a partition + method: + 1. Create a collection + 2. Insert data into a partitions + 3. Partial update data in the partition + 4. Insert data into a different partition + expected: Step 3 and 4 should result success + Visualization: + rows: [0-------------default_nb] + new_rows: [extra_nb-------------default_nb+extra_nb] + they overlap from extra_nb to default_nb + rows is inserted into partition 0 + new_rows is upserted into partition 0 & 1 + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: insert data into partitions + num_of_partitions = 2 + partition_names = [] + for _ in range(num_of_partitions): + partition_name = cf.gen_unique_str("partition") + self.create_partition(client, collection_name, partition_name) + partition_names.append(partition_name) + + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.insert(client, collection_name, rows, partition_name=partition_names[0]) + + # step 3: partial update data in the partition + extra_nb = default_nb // num_of_partitions + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, start=extra_nb) + + for partition_name in partition_names: + self.upsert(client, collection_name, new_rows, partition_name=partition_name, partial_update=True) + result = self.query(client, collection_name, + check_task=CheckTasks.check_query_results, + partition_names=[partition_name], + filter=f"{default_primary_key_field_name} >= {extra_nb}", + check_items={exp_res: new_rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + result =self.delete(client, collection_name, partition_names=[partition_name], + filter=f"{default_primary_key_field_name} >= 0")[0] + if partition_name == partition_names[0]: + assert result["delete_count"] == default_nb + extra_nb + else: + assert result["delete_count"] == default_nb + + self.drop_collection(client, collection_name) + + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_insert_delete_upsert(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Delete the rows + 4. Upsert the rows + expected: Step 2,3,4 should success + """ + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.insert(client, collection_name, rows) + + # step 3: Delete the rows + delete_result = self.delete(client, collection_name, filter=default_search_exp)[0] + query_result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_nothing)[0] + + # step 4: Upsert the rows + self.upsert(client, collection_name, new_rows, partial_update=True) + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: new_rows, + "pk_name": default_primary_key_field_name})[0] + + assert delete_result["delete_count"] == default_nb + assert len(query_result) == 0 + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_insert_delete_upsert_with_flush(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Delete the 1/2 rows and flush + 4. Upsert the default_nbrows and flush + 5. query the rows + expected: Step 2-5 should success + """ + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.insert(client, collection_name, rows) + + # step 3: Delete the rows and flush + delete_result = self.delete(client, collection_name, + filter=f"{default_primary_key_field_name} < {default_nb//2}")[0] + self.flush(client, collection_name) + query_result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_nothing)[0] + + # step 4: Upsert the rows and flush + self.upsert(client, collection_name, new_rows, partial_update=True) + self.flush(client, collection_name) + + # step 5: query the rows + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: new_rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + + assert delete_result["delete_count"] == default_nb//2 + assert len(query_result) == default_nb//2 + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_insert_upsert_delete_upsert_flush(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Delete the rows and upsert new rows, immediate flush + 4. Query the rows + expected: Step 2-4 should success + """ + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + partial_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_int32_field_name]) + self.insert(client, collection_name, rows) + + # step 3: partial update rows then delete 1/2 rows and upsert new rows, flush + self.upsert(client, collection_name, partial_rows, partial_update=True) + delete_result = self.delete(client, collection_name, + filter=f"{default_primary_key_field_name} < {default_nb//2}")[0] + self.upsert(client, collection_name, new_rows, partial_update=True) + self.flush(client, collection_name) + + # step 4: Query the rows + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: new_rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + + assert delete_result["delete_count"] == default_nb//2 + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_insert_upsert_flush_delete_upsert_flush(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Upsert the rows + 4. Delete the rows + 5. Upsert the rows + 6. Flush the collection + 7. Query the rows + expected: Step 2-7 should success + """ + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + partial_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_int32_field_name]) + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.insert(client, collection_name, rows) + + # step 3: Upsert the rows + upsert_result = self.upsert(client, collection_name, partial_rows, partial_update=True)[0] + self.flush(client, collection_name) + + # step 4: Delete the rows + delete_result = self.delete(client, collection_name, + filter=f"{default_primary_key_field_name} < {default_nb//2}")[0] + self.upsert(client, collection_name, new_rows, partial_update=True) + + # step 5: Flush the collection + self.flush(client, collection_name) + + # step 6: Query the rows + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: new_rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + + assert upsert_result["upsert_count"] == default_nb + assert delete_result["delete_count"] == default_nb//2 + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + """ + ****************************************************************** + # The following are valid cases for nullable fields + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L0) + def test_milvus_client_partial_update_nullable_field(self): + """ + Target: test PU without nullable field, the field will keep its value + Method: + 1. Create collection, enable nullable fields. + 2. Insert a row while assigning a value to nullable field (using partial update) + 3. PU nullable field and other fields + Expected: values should be updated + """ + # Step 1: create collection with nullable fields + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert a row while assigning a value to nullable field + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) + self.upsert(client, collection_name, rows, partial_update=True) + + # Step 3: PU other fields + # Even index: update int32 field to new value + # Odd index: update vector field to random value + # also update rows to keep track of changes so we can query the result + new_value = np.int32(99) + vector_rows = [] + int32_rows = [] + for i, row in enumerate(rows): + if i % 2 == 0: + int32_rows.append({default_primary_key_field_name: row[default_primary_key_field_name], + default_int32_field_name: new_value}) + rows[i][default_int32_field_name] = new_value + else: + new_vector = [random.random() for _ in range(default_dim)] + vector_rows.append({default_primary_key_field_name: row[default_primary_key_field_name], + default_vector_field_name: new_vector}) + rows[i][default_vector_field_name] = new_vector + rows[i][default_int32_field_name] = None + + self.upsert(client, collection_name, int32_rows, partial_update=True) + self.upsert(client, collection_name, vector_rows, partial_update=True) + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_vector_field_name, default_int32_field_name], + check_items={exp_res: rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_null_to_value(self): + """ + Target: test PU can successfully update null to a value + Method: + 1. Create a collection, enable nullable fields init null + 2. Partial update nullable field + 3. Query null field + Expected: Nullfield should have the same value as updated + """ + # step 1: create collection with nullable fields init null + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 2: Partial update nullable field + new_value = np.int32(99) + new_rows = [{default_primary_key_field_name: row[default_primary_key_field_name], + default_int32_field_name: new_value} for row in rows] + self.upsert(client, collection_name, new_rows, partial_update=True) + + # step 3: Query null field + #self.load_collection(client, collection_name) + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_rows, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_default_to_value(self): + """ + Target: test PU can successfully update a default to a value + Method: + 1. Create a collection, enable nullable fields init default value + 2. Partial update nullable field + 3. Query null field + Expected: Nullfield should have the same value as updated + """ + # step 1: create collection with nullable fields init default value + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True, default_value=default_int32_value) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 2: Partial update nullable field + new_value = 99 + new_row = [{default_primary_key_field_name: i, + default_int32_field_name: new_value} for i in range(default_nb)] + self.upsert(client, collection_name, new_row, partial_update=True) + + # step 3: Query null field + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_value_to_null(self): + """ + Target: test PU can successfully update a value to null + Method: + 1. Create a collection, enable nullable fields init value + 2. Partial update nullable field + 3. Query null field + Expected: Nullfield should have the same value as updated + """ + # step 1: create collection with nullable fields init value + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 2: Partial update nullable field + new_value = None + new_row = [{default_primary_key_field_name: i, + default_int32_field_name: new_value} for i in range(default_nb)] + self.upsert(client, collection_name, new_row, partial_update=True) + + # step 3: Query null field + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_null_to_null(self): + """ + Target: test PU can successfully update a null to null + Method: + 1. Create a collection, enable nullable fields + 2. Insert default_nb rows to the collection + 3. Partial Update the nullable field with null + 4. Query the collection to check the value of nullable field + Expected: query should have correct value and number of entities + """ + # step 1: create collection with nullable fields + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: insert default_nb rows to the collection + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: Partial Update the nullable field with null + new_value = None + new_row = [{default_primary_key_field_name: i, + default_int32_field_name: new_value} for i in range(default_nb)] + self.upsert(client, collection_name, new_row, partial_update=True) + + # step 4: Query the collection to check the value of nullable field + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_various_value_to_nullable_field(self): + """ + Target: test PU can successfully update various value to a nullable field + Method: + 1. Create a collection, enable nullable fields + 2. Insert default_nb rows to the collection + 3. Partial Update the nullable field with various value + 4. Query the collection to check the value of nullable field + Expected: query should have correct value + """ + # step 1: create collection with nullable fields + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: insert default_nb rows to the collection + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: Partial Update the nullable field with various value + new_value = 99 + new_row = [{default_primary_key_field_name: i, + default_int32_field_name: new_value if i % 2 == 0 else None} + for i in range(default_nb)] + self.upsert(client, collection_name, new_row, partial_update=True) + + # step 4: Query the collection to check the value of nullable field + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "with_vec": True, + "pk_name": default_primary_key_field_name})[0] + + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_filter_by_null(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. partial upsert data with nullable field + 3. Query the collection with filter by nullable field + 4. partial update nullable field back to null + 5. Query the collection with filter by nullable field + expected: Step 2,3,4,5 should success + """ + # step 1: create collection with nullable fields + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert data with nullable field + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) + self.upsert(client, collection_name, rows, partial_update=True) + result = self.query(client, collection_name, filter=f"{default_int32_field_name} IS NULL", + check_task=CheckTasks.check_query_results, + output_fields=[default_vector_field_name], + check_items={exp_res: rows, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + # update first half of the dataset with nullable field value + new_value = np.int32(99) + new_row = [{default_primary_key_field_name: i, + default_int32_field_name: new_value} for i in range(default_nb//2)] + self.upsert(client, collection_name, new_row, partial_update=True) + + # step 3: Query the collection with filter by nullable field + result = self.query(client, collection_name, filter=f"{default_int32_field_name} IS NOT NULL", + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb//2 + # query with == filter + result = self.query(client, collection_name, filter=f"{default_int32_field_name} == {new_value}", + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: new_row, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb//2 + + # step 4: partial update nullable field back to null + null_row = [{default_primary_key_field_name: i, + default_int32_field_name: None} for i in range(default_nb)] + self.upsert(client, collection_name, null_row, partial_update=True) + + # step 5: Query the collection with filter by nullable field + result = self.query(client, collection_name, filter=f"{default_int32_field_name} IS NULL", + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: null_row, + "pk_name": default_primary_key_field_name})[0] + assert len(result) == default_nb + + self.drop_collection(client, collection_name) + + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_same_pk_same_field(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Upsert the rows with same pk and same field + 4. Query the rows + 5. Upsert the rows with same pk and different field + expected: Step 2 -> 4 should success 5 should fail + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: Upsert the rows with same pk and same field + new_rows = [{default_primary_key_field_name: 0, + default_int32_field_name: i} for i in range(default_nb)] + self.upsert(client, collection_name, new_rows, partial_update=True) + + # step 4: Query the rows + result = self.query(client, collection_name, filter=f"{default_primary_key_field_name} == 0", + check_task=CheckTasks.check_query_results, + output_fields=[default_int32_field_name], + check_items={exp_res: [new_rows[-1]], + "pk_name": default_primary_key_field_name})[0] + assert len(result) == 1 + + self.drop_collection(client, collection_name) + + +class TestMilvusClientPartialUpdateInvalid(TestMilvusClientV2Base): + """ Test case of partial update interface """ + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are invalid base cases + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_new_pk_with_missing_field(self): + """ + target: Test PU will return error when provided new pk and partial field + method: + 1. Create a collection + 2. partial upsert a new pk with only partial field + expected: Step 2 should result fail + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert a new pk with only partial field + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_int32_field_name]) + error = {ct.err_code: 1100, ct.err_msg: + f"fieldSchema({default_vector_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_new_field_without_dynamic_field(self): + """ + target: Test PU will return error when provided new field without dynamic field + method: + 1. Create a collection with dynamic field + 2. partial upsert a new field + expected: Step 2 should result fail + """ + # step 1: create collection with dynamic field + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert a new field + row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, row, partial_update=True) + + new_row = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)] + error = {ct.err_code: 1, + ct.err_msg: f"Attempt to insert an unexpected field `{default_int32_field_name}` to collection without enabling dynamic field"} + self.upsert(client, collection_name, new_row, partial_update=True, check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_after_release_collection(self): + """ + target: test basic function of partial update + method: + 1. create collection + 2. insert a full row of data using partial update + 3. partial update data + 4. release collection + 5. partial update data + expected: step 5 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_string_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert a full row of data using partial update + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # Step 3: partial update data + new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_string_field_name]) + self.upsert(client, collection_name, new_row, partial_update=True) + + # Step 4: release collection + self.release_collection(client, collection_name) + + # Step 5: partial update data + new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_string_field_name]) + error = {ct.err_code: 101, + ct.err_msg: f"failed to query: collection not loaded"} + self.upsert(client, collection_name, new_row, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_same_pk_after_delete(self): + """ + target: test PU will fail when provided same pk and partial field + method: + 1. Create a collection with dynamic field + 2. Insert rows + 3. delete the rows + 4. upsert the rows with same pk and partial field + expected: step 4 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # Step 3: delete the rows + result = self.delete(client, collection_name, filter=default_search_exp)[0] + assert result["delete_count"] == default_nb + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_nothing)[0] + assert len(result) == 0 + + # Step 4: upsert the rows with same pk and partial field + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_vector_field_name]) + error = {ct.err_code: 1100, + ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, new_rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_pk_in_wrong_partition(self): + """ + target: test PU will fail when provided pk in wrong partition + method: + 1. Create a collection + 2. Create 2 partitions + 3. Insert rows + 4. upsert the rows with pk in wrong partition + expected: step 4 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: Create 2 partitions + num_of_partitions = 2 + partition_names = [] + for _ in range(num_of_partitions): + partition_name = cf.gen_unique_str("partition") + self.create_partition(client, collection_name, partition_name) + partition_names.append(partition_name) + + # Step 3: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + gap = default_nb // num_of_partitions + for i, partition in enumerate(partition_names): + self.upsert(client, collection_name, rows[i*gap:(i+1)*gap], partition_name=partition, partial_update=True) + + # Step 4: upsert the rows with pk in wrong partition + new_rows = cf.gen_row_data_by_schema(nb=gap, schema=schema, + desired_field_names=[default_primary_key_field_name, default_vector_field_name]) + error = {ct.err_code: 1100, + ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, new_rows, partition_name=partition_names[-1], partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_same_pk_multiple_fields(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Upsert the rows with same pk and different field + expected: Step 3 should fail + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: Upsert the rows with same pk and different field + new_rows = [] + for i in range(default_nb): + data = {} + if i % 2 == 0: + data[default_int32_field_name] = i + 1000 + data[default_primary_key_field_name] = 0 + else: + data[default_vector_field_name] = [random.random() for _ in range(default_dim)] + data[default_primary_key_field_name] = 0 + new_rows.append(data) + + error = {ct.err_code: 1, + ct.err_msg: f"The data fields length is inconsistent. previous length is 2000, current length is 1000"} + self.upsert(client, collection_name, new_rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) \ No newline at end of file diff --git a/tests/python_client/milvus_client/test_milvus_client_upsert.py b/tests/python_client/milvus_client/test_milvus_client_upsert.py new file mode 100644 index 0000000000..7688897f66 --- /dev/null +++ b/tests/python_client/milvus_client/test_milvus_client_upsert.py @@ -0,0 +1,824 @@ +import pytest +import numpy as np + +from base.client_v2_base import TestMilvusClientV2Base +from utils.util_log import test_log as log +from common import common_func as cf +from common import common_type as ct +from common.common_type import CaseLabel, CheckTasks +from utils.util_pymilvus import * + +prefix = "client_insert" +epsilon = ct.epsilon +default_nb = ct.default_nb +default_nb_medium = ct.default_nb_medium +default_nq = ct.default_nq +default_dim = ct.default_dim +default_limit = ct.default_limit +default_search_exp = "id >= 0" +exp_res = "exp_res" +default_search_string_exp = "varchar >= \"0\"" +default_search_mix_exp = "int64 >= 0 && varchar >= \"0\"" +default_invaild_string_exp = "varchar >= 0" +default_json_search_exp = "json_field[\"number\"] >= 0" +perfix_expr = 'varchar like "0%"' +default_search_field = ct.default_float_vec_field_name +default_search_params = ct.default_search_params +default_primary_key_field_name = "id" +default_vector_field_name = "vector" +default_dynamic_field_name = "field_new" +default_float_field_name = ct.default_float_field_name +default_bool_field_name = ct.default_bool_field_name +default_string_field_name = ct.default_string_field_name +default_int32_array_field_name = ct.default_int32_array_field_name +default_string_array_field_name = ct.default_string_array_field_name +default_int32_field_name = ct.default_int32_field_name +default_int32_value = ct.default_int32_value + + +class TestMilvusClientUpsertInvalid(TestMilvusClientV2Base): + """ Test case of search interface """ + + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are invalid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_upsert_column_data(self): + """ + target: test insert column data + method: create connection, collection, insert and search + expected: raise error + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. insert + vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nb)] + data = [[i for i in range(default_nb)], vectors] + error = {ct.err_code: 999, + ct.err_msg: "The Input data type is inconsistent with defined schema, please check it."} + self.upsert(client, collection_name, data, + check_task=CheckTasks.err_res, check_items=error) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_empty_collection_name(self): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + collection_name = "" + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 1, ct.err_msg: f"`collection_name` value {collection_name} is illegal"} + self.upsert(client, collection_name, rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"]) + def test_milvus_client_upsert_invalid_collection_name(self, collection_name): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a " + f"collection name must be an underscore or letter: invalid parameter"} + self.upsert(client, collection_name, rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_collection_name_over_max_length(self): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + collection_name = "a".join("a" for i in range(256)) + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 1100, ct.err_msg: f"the length of a collection name must be less than 255 characters"} + self.upsert(client, collection_name, rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_not_exist_collection_name(self): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_unique_str("insert_not_exist") + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 100, ct.err_msg: f"can't find collection[database=default][collection={collection_name}]"} + self.upsert(client, collection_name, rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("data", ["12-s", "12 s", "(mn)", "中文", "%$#", " "]) + def test_milvus_client_upsert_data_invalid_type(self, data): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + # 2. insert + error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"} + self.upsert(client, collection_name, data, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_data_empty(self): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + # 2. insert + error = {ct.err_code: 1, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict'"} + self.upsert(client, collection_name, data="", + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_data_vector_field_missing(self): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] + error = {ct.err_code: 1, + ct.err_msg: "Insert missed an field `vector` to collection without set nullable==true or set default_value"} + self.upsert(client, collection_name, data=rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_data_id_field_missing(self): + """ + target: test high level api: client.create_collection + method: create collection with invalid primary field + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(20)] + error = {ct.err_code: 1, + ct.err_msg: f"Insert missed an field `id` to collection without set nullable==true or set default_value"} + self.upsert(client, collection_name, data=rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_data_extra_field(self): + """ + target: test milvus client: insert extra field than schema + method: insert extra field than schema when enable_dynamic_field is False + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + dim = 32 + self.create_collection(client, collection_name, dim, enable_dynamic_field=False) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(10)] + error = {ct.err_code: 1, + ct.err_msg: f"Attempt to insert an unexpected field `float` to collection without enabling dynamic field"} + self.upsert(client, collection_name, data=rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_data_dim_not_match(self): + """ + target: test milvus client: insert extra field than schema + method: insert extra field than schema when enable_dynamic_field is False + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [ + {default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim + 1))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 65536, ct.err_msg: f"of float data should divide the dim({default_dim})"} + self.upsert(client, collection_name, data=rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_not_matched_data(self): + """ + target: test milvus client: insert not matched data then defined + method: insert string to int primary field + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [ + {default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 1, + ct.err_msg: "The Input data type is inconsistent with defined schema, {id} field should be a int64"} + self.upsert(client, collection_name, data=rows, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("partition_name", ["12 s", "(mn)", "中文", "%$#", " "]) + def test_milvus_client_upsert_invalid_partition_name(self, partition_name): + """ + target: test milvus client: insert extra field than schema + method: insert extra field than schema when enable_dynamic_field is False + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}"} + if partition_name == " ": + error = {ct.err_code: 1, ct.err_msg: f"Invalid partition name: . Partition name should not be empty."} + self.upsert(client, collection_name, data=rows, partition_name=partition_name, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_upsert_not_exist_partition_name(self): + """ + target: test milvus client: insert extra field than schema + method: insert extra field than schema when enable_dynamic_field is False + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + partition_name = cf.gen_unique_str("partition_not_exist") + error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"} + self.upsert(client, collection_name, data=rows, partition_name=partition_name, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_upsert_collection_partition_not_match(self): + """ + target: test milvus client: insert extra field than schema + method: insert extra field than schema when enable_dynamic_field is False + expected: Raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + another_collection_name = cf.gen_unique_str(prefix + "another") + partition_name = cf.gen_unique_str("partition") + # 1. create collection + self.create_collection(client, collection_name, default_dim) + self.create_collection(client, another_collection_name, default_dim) + self.create_partition(client, another_collection_name, partition_name) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"} + self.upsert(client, collection_name, data=rows, partition_name=partition_name, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("nullable", [True, False]) + def test_milvus_client_insert_array_element_null(self, nullable): + """ + target: test search with null expression on each key of json + method: create connection, collection, insert and search + expected: raise exception + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + dim = 5 + # 1. create collection + nullable_field_name = "nullable_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True, + auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(nullable_field_name, DataType.ARRAY, element_type=DataType.INT64, max_capacity=12, + max_length=64, nullable=nullable) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. insert + vectors = cf.gen_vectors(default_nb, dim) + rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i], + nullable_field_name: [None, 2, 3]} for i in range(default_nb)] + error = {ct.err_code: 1, + ct.err_msg: "The Input data type is inconsistent with defined schema, {nullable_field} field " + "should be a array, but got a {} instead."} + self.insert(client, collection_name, rows, + check_task=CheckTasks.err_res, + check_items=error) + + +class TestMilvusClientUpsertValid(TestMilvusClientV2Base): + """ Test case of search interface """ + + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L0) + def test_milvus_client_upsert_default(self): + """ + target: test search (high level api) normal case + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + collections = self.list_collections(client)[0] + assert collection_name in collections + self.describe_collection(client, collection_name, + check_task=CheckTasks.check_describe_collection_property, + check_items={"collection_name": collection_name, + "dim": default_dim, + "consistency_level": 0}) + # 2. insert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + results = self.upsert(client, collection_name, rows)[0] + assert results['upsert_count'] == default_nb + # 3. search + vectors_to_search = rng.random((1, default_dim)) + insert_ids = [i for i in range(default_nb)] + self.search(client, collection_name, vectors_to_search, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit, + "pk_name": default_primary_key_field_name}) + # 4. query + self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_query_results, + check_items={exp_res: rows, + "with_vec": True, + "pk_name": default_primary_key_field_name}) + self.release_collection(client, collection_name) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_upsert_empty_data(self): + """ + target: test search (high level api) normal case + method: create connection, collection, insert and search + expected: search/query successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + # 2. insert + rows = [] + results = self.upsert(client, collection_name, rows)[0] + assert results['upsert_count'] == 0 + # 3. search + rng = np.random.default_rng(seed=19530) + vectors_to_search = rng.random((1, default_dim)) + self.search(client, collection_name, vectors_to_search, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": [], + "pk_name": default_primary_key_field_name, + "limit": 0}) + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_upsert_partition(self): + """ + target: test fast create collection normal case + method: create collection + expected: create collection with default schema, index, and load successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + partition_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + # 2. create partition + self.create_partition(client, collection_name, partition_name) + partitions = self.list_partitions(client, collection_name)[0] + assert partition_name in partitions + index = self.list_indexes(client, collection_name)[0] + assert index == ['vector'] + # load_state = self.get_load_state(collection_name)[0] + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + # 3. upsert to default partition + results = self.upsert(client, collection_name, rows, partition_name=partitions[0])[0] + assert results['upsert_count'] == default_nb + # 4. upsert to non-default partition + results = self.upsert(client, collection_name, rows, partition_name=partition_name)[0] + assert results['upsert_count'] == default_nb + # 5. search + vectors_to_search = rng.random((1, default_dim)) + insert_ids = [i for i in range(default_nb)] + self.search(client, collection_name, vectors_to_search, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit, + "pk_name": default_primary_key_field_name}) + # partition_number = self.get_partition_stats(client, collection_name, "_default")[0] + # assert partition_number == default_nb + # partition_number = self.get_partition_stats(client, collection_name, partition_name)[0] + # assert partition_number[0]['value'] == 0 + if self.has_partition(client, collection_name, partition_name)[0]: + self.release_partitions(client, collection_name, partition_name) + self.drop_partition(client, collection_name, partition_name) + if self.has_collection(client, collection_name)[0]: + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_insert_upsert(self): + """ + target: test fast create collection normal case + method: create collection + expected: create collection with default schema, index, and load successfully + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + partition_name = cf.gen_unique_str(prefix) + # 1. create collection + self.create_collection(client, collection_name, default_dim, consistency_level="Strong") + # 2. create partition + self.create_partition(client, collection_name, partition_name) + partitions = self.list_partitions(client, collection_name)[0] + assert partition_name in partitions + index = self.list_indexes(client, collection_name)[0] + assert index == ['vector'] + # load_state = self.get_load_state(collection_name)[0] + # 3. insert and upsert + rng = np.random.default_rng(seed=19530) + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] + results = self.insert(client, collection_name, rows, partition_name=partition_name)[0] + assert results['insert_count'] == default_nb + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_float_field_name: i * 1.0, "new_diff_str_field": str(i)} for i in range(default_nb)] + results = self.upsert(client, collection_name, rows, partition_name=partition_name)[0] + assert results['upsert_count'] == default_nb + # 3. search + vectors_to_search = rng.random((1, default_dim)) + insert_ids = [i for i in range(default_nb)] + self.search(client, collection_name, vectors_to_search, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "limit": default_limit, + "pk_name": default_primary_key_field_name}) + if self.has_partition(client, collection_name, partition_name)[0]: + self.release_partitions(client, collection_name, partition_name) + self.drop_partition(client, collection_name, partition_name) + if self.has_collection(client, collection_name)[0]: + self.drop_collection(client, collection_name) + + + + """ Test case of partial update interface """ + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are invalid base cases + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_new_pk_with_missing_field(self): + """ + target: Test PU will return error when provided new pk and partial field + method: + 1. Create a collection + 2. partial upsert a new pk with only partial field + expected: Step 2 should result fail + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert a new pk with only partial field + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_int32_field_name]) + error = {ct.err_code: 1100, ct.err_msg: + f"fieldSchema({default_vector_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_new_field_without_dynamic_field(self): + """ + target: Test PU will return error when provided new field without dynamic field + method: + 1. Create a collection with dynamic field + 2. partial upsert a new field + expected: Step 2 should result fail + """ + # step 1: create collection with dynamic field + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: partial upsert a new field + row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, row, partial_update=True) + + new_row = [{default_primary_key_field_name: i, default_int32_field_name: 99} for i in range(default_nb)] + error = {ct.err_code: 1, + ct.err_msg: f"Attempt to insert an unexpected field `{default_int32_field_name}` to collection without enabling dynamic field"} + self.upsert(client, collection_name, new_row, partial_update=True, check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_after_release_collection(self): + """ + target: test basic function of partial update + method: + 1. create collection + 2. insert a full row of data using partial update + 3. partial update data + 4. release collection + 5. partial update data + expected: step 5 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_string_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert a full row of data using partial update + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # Step 3: partial update data + new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_string_field_name]) + self.upsert(client, collection_name, new_row, partial_update=True) + + # Step 4: release collection + self.release_collection(client, collection_name) + + # Step 5: partial update data + new_row = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_string_field_name]) + error = {ct.err_code: 101, + ct.err_msg: f"failed to query: collection not loaded"} + self.upsert(client, collection_name, new_row, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_same_pk_after_delete(self): + """ + target: test PU will fail when provided same pk and partial field + method: + 1. Create a collection with dynamic field + 2. Insert rows + 3. delete the rows + 4. upsert the rows with same pk and partial field + expected: step 4 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # Step 3: delete the rows + result = self.delete(client, collection_name, filter=default_search_exp)[0] + assert result["delete_count"] == default_nb + result = self.query(client, collection_name, filter=default_search_exp, + check_task=CheckTasks.check_nothing)[0] + assert len(result) == 0 + + # Step 4: upsert the rows with same pk and partial field + new_rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, + desired_field_names=[default_primary_key_field_name, default_vector_field_name]) + error = {ct.err_code: 1100, + ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, new_rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_pk_in_wrong_partition(self): + """ + target: test PU will fail when provided pk in wrong partition + method: + 1. Create a collection + 2. Create 2 partitions + 3. Insert rows + 4. upsert the rows with pk in wrong partition + expected: step 4 should fail + """ + # Step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # Step 2: Create 2 partitions + num_of_partitions = 2 + partition_names = [] + for _ in range(num_of_partitions): + partition_name = cf.gen_unique_str("partition") + self.create_partition(client, collection_name, partition_name) + partition_names.append(partition_name) + + # Step 3: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + gap = default_nb // num_of_partitions + for i, partition in enumerate(partition_names): + self.upsert(client, collection_name, rows[i*gap:(i+1)*gap], partition_name=partition, partial_update=True) + + # Step 4: upsert the rows with pk in wrong partition + new_rows = cf.gen_row_data_by_schema(nb=gap, schema=schema, + desired_field_names=[default_primary_key_field_name, default_vector_field_name]) + error = {ct.err_code: 1100, + ct.err_msg: f"fieldSchema({default_int32_field_name}) has no corresponding fieldData pass in: invalid parameter"} + self.upsert(client, collection_name, new_rows, partition_name=partition_names[-1], partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) + + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_partial_update_same_pk_multiple_fields(self): + """ + target: Test PU will success and query will success + method: + 1. Create a collection + 2. Insert rows + 3. Upsert the rows with same pk and different field + expected: Step 3 should fail + """ + # step 1: create collection + client = self._client() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") + collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: Insert rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.upsert(client, collection_name, rows, partial_update=True) + + # step 3: Upsert the rows with same pk and different field + new_rows = [] + for i in range(default_nb): + data = {} + if i % 2 == 0: + data[default_int32_field_name] = i + 1000 + data[default_primary_key_field_name] = 0 + else: + data[default_vector_field_name] = [random.random() for _ in range(default_dim)] + data[default_primary_key_field_name] = 0 + new_rows.append(data) + + error = {ct.err_code: 1, + ct.err_msg: f"The data fields length is inconsistent. previous length is 2000, current length is 1000"} + self.upsert(client, collection_name, new_rows, partial_update=True, + check_task=CheckTasks.err_res, check_items=error) + + self.drop_collection(client, collection_name) \ No newline at end of file diff --git a/tests/python_client/milvus_client_v2/test_milvus_client_partial_update.py b/tests/python_client/milvus_client_v2/test_milvus_client_partial_update.py deleted file mode 100644 index f81e32d536..0000000000 --- a/tests/python_client/milvus_client_v2/test_milvus_client_partial_update.py +++ /dev/null @@ -1,314 +0,0 @@ -import pytest -import time -import random -import numpy as np -from common.common_type import CaseLabel, CheckTasks -from common import common_func as cf -from common import common_type as ct -from utils.util_log import test_log as log -from utils.util_pymilvus import * -from base.client_v2_base import TestMilvusClientV2Base -from pymilvus import DataType, FieldSchema, CollectionSchema -from sklearn import preprocessing - -# Test parameters -default_nb = ct.default_nb -default_nq = ct.default_nq -default_limit = ct.default_limit -default_search_exp = "id >= 0" -exp_res = "exp_res" -default_primary_key_field_name = "id" -default_vector_field_name = "vector" -default_int32_field_name = ct.default_int32_field_name - - -class TestMilvusClientPartialUpdate(TestMilvusClientV2Base): - """ Test case of partial update functionality """ - - @pytest.mark.tags(CaseLabel.L0) - def test_partial_update_all_field_types(self): - """ - Test partial update functionality with all field types - 1. Create collection with all data types - 2. Insert initial data - 3. Perform partial update for each field type - 4. Verify all updates work correctly - """ - client = self._client() - dim = 64 - collection_name = cf.gen_collection_name_by_testcase_name() - - # Create schema with all data types - schema = cf.gen_all_datatype_collection_schema(dim=dim) - - # Create index parameters - index_params = client.prepare_index_params() - for i in range(len(schema.fields)): - field_name = schema.fields[i].name - print(f"field_name: {field_name}") - if field_name == "json_field": - index_params.add_index(field_name, index_type="AUTOINDEX", - params={"json_cast_type": "json"}) - elif field_name == "text_sparse_emb": - index_params.add_index(field_name, index_type="AUTOINDEX", metric_type="BM25") - else: - index_params.add_index(field_name, index_type="AUTOINDEX") - - # Create collection - client.create_collection(collection_name, default_dim, consistency_level="Strong", schema=schema, index_params=index_params) - - # Load collection - self.load_collection(client, collection_name) - - # Insert initial data - nb = 1000 - rows = cf.gen_row_data_by_schema(nb=nb, schema=schema) - self.upsert(client, collection_name, rows, partial_update=True) - log.info(f"Inserted {nb} initial records") - - primary_key_field_name = schema.fields[0].name - for i in range(len(schema.fields)): - update_field_name = schema.fields[i if i != 0 else 1].name - new_row = cf.gen_partial_row_data_by_schema(nb=nb, schema=schema, - desired_field_names=[primary_key_field_name, update_field_name]) - client.upsert(collection_name, new_row, partial_update=True) - - log.info("Partial update test for all field types passed successfully") - - @pytest.mark.tags(CaseLabel.L0) - def test_partial_update_simple_demo(self): - """ - Test simple partial update demo with nullable fields - 1. Create collection with explicit schema including nullable fields - 2. Insert initial data with some null values - 3. Perform partial updates with different field combinations - 4. Verify partial update behavior preserves unchanged fields - """ - client = self._client() - dim = 3 - collection_name = cf.gen_collection_name_by_testcase_name() - - # Create schema with nullable fields - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False) - schema.add_field("vector", DataType.FLOAT_VECTOR, dim=dim) - schema.add_field("name", DataType.VARCHAR, max_length=100, nullable=True) - schema.add_field("price", DataType.FLOAT, nullable=True) - schema.add_field("category", DataType.VARCHAR, max_length=50, nullable=True) - - # Create collection - self.create_collection(client, collection_name, schema=schema) - - # Create index - index_params = self.prepare_index_params(client)[0] - index_params.add_index("vector", index_type="AUTOINDEX", metric_type="L2") - self.create_index(client, collection_name, index_params=index_params) - - # Load collection - self.load_collection(client, collection_name) - - # Insert initial data with some null values - initial_data = [ - { - "id": 1, - "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), - "name": "Product A", - "price": 100.0, - "category": "Electronics" - }, - { - "id": 2, - "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), - "name": "Product B", - "price": None, # Null price - "category": "Home" - }, - { - "id": 3, - "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), - "name": "Product C", - "price": None, # Null price - "category": "Books" - } - ] - - self.upsert(client, collection_name, initial_data, partial_update=False) - log.info("Inserted initial data with null values") - - # Verify initial state - results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0] - assert len(results) == 3 - - initial_data_map = {data['id']: data for data in results} - assert initial_data_map[1]['name'] == "Product A" - assert initial_data_map[1]['price'] == 100.0 - assert initial_data_map[1]['category'] == "Electronics" - assert initial_data_map[2]['name'] == "Product B" - assert initial_data_map[2]['price'] is None - assert initial_data_map[2]['category'] == "Home" - assert initial_data_map[3]['name'] == "Product C" - assert initial_data_map[3]['price'] is None - assert initial_data_map[3]['category'] == "Books" - - log.info("Initial data verification passed") - - # First partial update - update all fields - log.info("First partial update - updating all fields...") - first_update_data = [ - { - "id": 1, - "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), - "name": "Product A-Update", - "price": 111.1, - "category": "Electronics-Update" - }, - { - "id": 2, - "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), - "name": "Product B-Update", - "price": 222.2, - "category": "Home-Update" - }, - { - "id": 3, - "vector": preprocessing.normalize([np.array([random.random() for j in range(dim)])])[0].tolist(), - "name": "Product C-Update", - "price": None, # Still null - "category": "Books-Update" - } - ] - - self.upsert(client, collection_name, first_update_data, partial_update=True) - - # Verify first update - results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0] - assert len(results) == 3 - - first_update_map = {data['id']: data for data in results} - assert first_update_map[1]['name'] == "Product A-Update" - assert abs(first_update_map[1]['price'] - 111.1) < 0.001 - assert first_update_map[1]['category'] == "Electronics-Update" - assert first_update_map[2]['name'] == "Product B-Update" - assert abs(first_update_map[2]['price'] - 222.2) < 0.001 - assert first_update_map[2]['category'] == "Home-Update" - assert first_update_map[3]['name'] == "Product C-Update" - assert first_update_map[3]['price'] is None - assert first_update_map[3]['category'] == "Books-Update" - - log.info("First partial update verification passed") - - # Second partial update - update only specific fields - log.info("Second partial update - updating specific fields...") - second_update_data = [ - { - "id": 1, - "name": "Product A-Update-Again", - "price": 1111.1, - "category": "Electronics-Update-Again" - }, - { - "id": 2, - "name": "Product B-Update-Again", - "price": None, # Set back to null - "category": "Home-Update-Again" - }, - { - "id": 3, - "name": "Product C-Update-Again", - "price": 3333.3, # Set price from null to value - "category": "Books-Update-Again" - } - ] - - self.upsert(client, collection_name, second_update_data, partial_update=True) - - # Verify second update - results = self.query(client, collection_name, filter="id > 0", output_fields=["*"])[0] - assert len(results) == 3 - - second_update_map = {data['id']: data for data in results} - - # Verify ID 1: all fields updated - assert second_update_map[1]['name'] == "Product A-Update-Again" - assert abs(second_update_map[1]['price'] - 1111.1) < 0.001 - assert second_update_map[1]['category'] == "Electronics-Update-Again" - - # Verify ID 2: all fields updated, price set to null - assert second_update_map[2]['name'] == "Product B-Update-Again" - assert second_update_map[2]['price'] is None - assert second_update_map[2]['category'] == "Home-Update-Again" - - # Verify ID 3: all fields updated, price set from null to value - assert second_update_map[3]['name'] == "Product C-Update-Again" - assert abs(second_update_map[3]['price'] - 3333.3) < 0.001 - assert second_update_map[3]['category'] == "Books-Update-Again" - - # Verify vector fields were preserved from first update (not updated in second update) - # Note: Vector comparison might be complex, so we just verify they exist - assert 'vector' in second_update_map[1] - assert 'vector' in second_update_map[2] - assert 'vector' in second_update_map[3] - - log.info("Second partial update verification passed") - log.info("Simple partial update demo test completed successfully") - - @pytest.mark.tags(CaseLabel.L0) - def test_milvus_client_partial_update_null_to_null(self): - """ - Target: test PU can successfully update a null to null - Method: - 1. Create a collection, enable nullable fields - 2. Insert default_nb rows to the collection - 3. Partial Update the nullable field with null - 4. Query the collection to check the value of nullable field - Expected: query should have correct value and number of entities - """ - # step 1: create collection with nullable fields - client = self._client() - schema = self.create_schema(client, enable_dynamic_field=False)[0] - schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) - schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) - schema.add_field(default_int32_field_name, DataType.INT32, nullable=True) - - index_params = self.prepare_index_params(client)[0] - index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") - index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") - index_params.add_index(default_int32_field_name, index_type="AUTOINDEX") - - collection_name = cf.gen_collection_name_by_testcase_name(module_index=1) - self.create_collection(client, collection_name, default_dim, schema=schema, - consistency_level="Strong", index_params=index_params) - - # step 2: insert default_nb rows to the collection - rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema, skip_field_names=[default_int32_field_name]) - self.upsert(client, collection_name, rows, partial_update=True) - - # step 3: Partial Update the nullable field with null - new_row = cf.gen_partial_row_data_by_schema( - nb=default_nb, - schema=schema, - desired_field_names=[default_primary_key_field_name, default_int32_field_name], - start=0 - ) - - # Set the nullable field to None - for data in new_row: - data[default_int32_field_name] = None - - self.upsert(client, collection_name, new_row, partial_update=True) - - # step 4: Query the collection to check the value of nullable field - result = self.query(client, collection_name, filter=default_search_exp, - check_task=CheckTasks.check_query_results, - output_fields=[default_int32_field_name], - check_items={exp_res: new_row, - "with_vec": True, - "pk_name": default_primary_key_field_name})[0] - assert len(result) == default_nb - - # Verify that all nullable fields are indeed null - for data in result: - assert data[default_int32_field_name] is None, f"Expected null value for {default_int32_field_name}, got {data[default_int32_field_name]}" - - log.info("Partial update null to null test completed successfully") - self.drop_collection(client, collection_name) diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index c99497c8c0..f0627fc84b 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -719,24 +719,6 @@ class TestInsertOperation(TestcaseBase): assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb) assert collection_w.num_entities == ct.default_nb - @pytest.mark.tags(CaseLabel.L1) - def test_insert_auto_id_true_with_dataframe_values(self, pk_field): - """ - target: test insert with auto_id=True - method: create collection with auto_id=True - expected: 1.verify num entities 2.verify ids - """ - c_name = cf.gen_unique_str(prefix) - schema = cf.gen_default_collection_schema( - primary_field=pk_field, auto_id=True) - collection_w = self.init_collection_wrap(name=c_name, schema=schema) - df = cf.gen_default_dataframe_data(nb=100) - error = {ct.err_code: 999, - ct.err_msg: f"Expect no data for auto_id primary field: {pk_field}"} - collection_w.insert( - data=df, check_task=CheckTasks.err_res, check_items=error) - assert collection_w.is_empty - @pytest.mark.tags(CaseLabel.L2) def test_insert_auto_id_true_with_list_values(self, pk_field): """ @@ -888,107 +870,6 @@ class TestInsertOperation(TestcaseBase): collection_w.insert(data) assert collection_w.num_entities == ct.default_nb - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("enable_partition_key", [True, False]) - @pytest.mark.parametrize("default_value", [[], [None for _ in range(ct.default_nb)]]) - def test_insert_multi_fields_using_none_data(self, enable_partition_key, default_value, auto_id): - """ - target: test insert with multi fields include array using none value - method: 1. create a collection with multi fields using default value - 2. insert using none value to replace the field value - expected: insert successfully - """ - json_embedded_object = "json_embedded_object" - fields = [ - cf.gen_int64_field(is_primary=True), - cf.gen_int32_field(default_value=np.int32(1), nullable=True), - cf.gen_float_field(default_value=np.float32(1.0), nullable=True), - cf.gen_string_field(default_value="abc", enable_partition_key=enable_partition_key, nullable=True), - cf.gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32, nullable=True), - cf.gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT, nullable=True), - cf.gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR, max_length=100, nullable=True), - cf.gen_json_field(name=json_embedded_object, nullable=True), - cf.gen_float_vec_field() - ] - schema = cf.gen_collection_schema(fields, auto_id=auto_id) - collection_w = self.init_collection_wrap(schema=schema) - # default value fields, [] or [None] - data = [ - [i for i in range(ct.default_nb)], - default_value, - default_value, - default_value, - [[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)], - [[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)], - default_value, - default_value, - cf.gen_vectors(ct.default_nb, ct.default_dim) - ] - if auto_id: - del data[0] - collection_w.insert(data=data) - assert collection_w.num_entities == ct.default_nb - - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("enable_partition_key", [True, False]) - @pytest.mark.parametrize("nullable", [True, False]) - def test_insert_multi_fields_by_rows_using_default(self, enable_partition_key, nullable): - """ - target: test insert multi fields by rows with default value - method: 1. create a collection with one field using default value - 2. insert using default value to replace the field value - expected: insert successfully - """ - # 1. initialize with data - if enable_partition_key is True and nullable is True: - pytest.skip("partition key field not support nullable") - fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(3.14), nullable=nullable), - cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable), - cf.gen_json_field(), cf.gen_float_vec_field()] - schema = cf.gen_collection_schema(fields) - collection_w = self.init_collection_wrap(schema=schema) - - collection_w.create_index(ct.default_float_vec_field_name, default_index_params) - collection_w.load() - - # 2. insert data - array = cf.gen_default_rows_data() - for i in range(0, ct.default_nb, 2): - array[i][ct.default_string_field_name] = None - collection_w.insert(array) - - exp = f"{ct.default_string_field_name} == 'abc'" - res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0] - assert len(res) == ct.default_nb/2 - - @pytest.mark.tags(CaseLabel.L1) - def test_insert_multi_fields_by_rows_using_none(self): - """ - target: test insert multi fields by rows with none value - method: 1. create a collection with one field using none value - 2. insert using none to replace the field value - expected: insert successfully - """ - # 1. initialize with data - fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(nullable=True), - cf.gen_string_field(default_value="abc", nullable=True), cf.gen_json_field(), cf.gen_float_vec_field()] - schema = cf.gen_collection_schema(fields) - collection_w = self.init_collection_wrap(schema=schema) - - collection_w.create_index(ct.default_float_vec_field_name, default_index_params) - collection_w.load() - - # 2. insert data - array = cf.gen_default_rows_data() - for i in range(0, ct.default_nb, 2): - array[i][ct.default_float_field_name] = None - array[i][ct.default_string_field_name] = None - collection_w.insert(array) - - exp = f"{ct.default_string_field_name} == 'abc'" - res = collection_w.query(exp, output_fields=[ct.default_float_field_name, ct.default_string_field_name])[0] - assert len(res) == ct.default_nb/2 - assert res[0][ct.default_float_field_name] is None @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("enable_partition_key", [True, False])