From 9228ed7b8f2bc4a39c10f9a73cd605f5cd9414a6 Mon Sep 17 00:00:00 2001 From: qixuan <673771573@qq.com> Date: Sat, 13 Sep 2025 19:55:57 +0800 Subject: [PATCH] test: add case about enable dynamic schema (#44355) related issue: #42126 Signed-off-by: qixuan <673771573@qq.com> --- .../milvus_client/test_add_field_feature.py | 6 +- .../milvus_client/test_milvus_client_alter.py | 124 ++++++++++++++++++ .../milvus_client/test_milvus_client_index.py | 121 +++++++++++++++++ 3 files changed, 248 insertions(+), 3 deletions(-) diff --git a/tests/python_client/milvus_client/test_add_field_feature.py b/tests/python_client/milvus_client/test_add_field_feature.py index 8ead4a8796..8d5dd7b9dd 100644 --- a/tests/python_client/milvus_client/test_add_field_feature.py +++ b/tests/python_client/milvus_client/test_add_field_feature.py @@ -349,9 +349,8 @@ class TestMilvusClientAddFieldFeature(TestMilvusClientV2Base): self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) # 2. insert initial data before adding analyzer field - vectors = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR) - rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], - default_string_field_name: str(i)} for i in range(default_nb)] + schema_info = self.describe_collection(client, collection_name)[0] + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema_info) results = self.insert(client, collection_name, rows)[0] assert results['insert_count'] == default_nb @@ -371,6 +370,7 @@ class TestMilvusClientAddFieldFeature(TestMilvusClientV2Base): "Database performance and scalability features" ] rows_with_analyzer = [] + vectors = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR) for i in range(default_nb, default_nb + len(text_data)): rows_with_analyzer.append({ default_primary_key_field_name: i, diff --git a/tests/python_client/milvus_client/test_milvus_client_alter.py b/tests/python_client/milvus_client/test_milvus_client_alter.py index ca3b734d53..e9edaf74cf 100644 --- a/tests/python_client/milvus_client/test_milvus_client_alter.py +++ b/tests/python_client/milvus_client/test_milvus_client_alter.py @@ -10,6 +10,15 @@ import numpy as np prefix = "alter" default_vector_field_name = "vector" +default_primary_key_field_name = "id" +default_string_field_name = "varchar" +default_float_field_name = "float" +default_new_field_name = "field_new" +default_dynamic_field_name = "dynamic_field" +exp_res = "exp_res" +default_nb = 20 +default_dim = 128 +default_limit = 10 class TestMilvusClientAlterIndex(TestMilvusClientV2Base): @@ -145,12 +154,21 @@ class TestMilvusClientAlterCollection(TestMilvusClientV2Base): check_task=CheckTasks.err_res, check_items=error) self.alter_collection_properties(client, collection_name, properties={"lazyload.enabled": True}, check_task=CheckTasks.err_res, check_items=error) + error = {ct.err_code: 999, + ct.err_msg: "dynamic schema cannot supported to be disabled: invalid parameter"} + self.alter_collection_properties(client, collection_name, properties={"dynamicfield.enabled": False}, + check_task=CheckTasks.err_res, check_items=error) error = {ct.err_code: 999, ct.err_msg: "can not delete mmap properties if collection loaded"} self.drop_collection_properties(client, collection_name, property_keys=["mmap.enabled"], check_task=CheckTasks.err_res, check_items=error) self.drop_collection_properties(client, collection_name, property_keys=["lazyload.enabled"], check_task=CheckTasks.err_res, check_items=error) + # TODO + # error = {ct.err_code: 999, + # ct.err_msg: "can not delete dynamicfield properties"} + # self.drop_collection_properties(client, collection_name, property_keys=["dynamicfield.enabled"], + # check_task=CheckTasks.err_res, check_items=error) res3 = self.describe_collection(client, collection_name)[0] assert res3.get('properties', None) == {} self.drop_collection_properties(client, collection_name, property_keys=["collection.ttl.seconds"]) @@ -171,6 +189,112 @@ class TestMilvusClientAlterCollection(TestMilvusClientV2Base): res3 = self.describe_collection(client, collection_name)[0] assert res3.get('properties', None) == {} + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_alter_enable_dynamic_collection_field(self): + """ + target: test enable dynamic field and mixed field operations + method: create collection, add field, enable dynamic field, insert mixed data, query/search + expected: dynamic field works with new field and static field + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + dim = 8 + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, max_length=64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. Prepare and insert data + schema_info = self.describe_collection(client, collection_name)[0] + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema_info) + results = self.insert(client, collection_name, rows)[0] + assert results['insert_count'] == default_nb + # 3. add new field + default_value = 100 + self.add_collection_field(client, collection_name, field_name="field_new", data_type=DataType.INT64, + nullable=True, default_value=default_value) + # 4. alter collection dynamic field enable + self.alter_collection_properties(client, collection_name, {"dynamicfield.enabled": True}) + res = self.describe_collection(client, collection_name)[0] + assert res.get('enable_dynamic_field', None) is True + # 5. insert data with dynamic field and new field + vectors = cf.gen_vectors(default_nb, dim, vector_data_type=DataType.FLOAT_VECTOR) + rows_new = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), default_new_field_name: i, + default_dynamic_field_name: i} for i in range(default_nb)] + self.insert(client, collection_name, rows_new) + # 6. query using filter with dynamic field and new field + res = self.query(client, collection_name, + filter="{} >= 0 and field_new < {}".format(default_dynamic_field_name, default_value), + output_fields=[default_dynamic_field_name], + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{"id": item["id"], + default_dynamic_field_name: item[default_dynamic_field_name]} + for item in rows_new]})[0] + assert set(res[0].keys()) == {default_dynamic_field_name, default_primary_key_field_name} + # 7. search using filter with dynamic field and new field + vectors_to_search = [vectors[0]] + insert_ids = [i for i in range(default_nb)] + self.search(client, collection_name, vectors_to_search, + filter="{} >= 0 and field_new < {}".format(default_dynamic_field_name, default_value), + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "pk_name": default_primary_key_field_name, + "limit": default_limit}) + # 8. add new field same as dynamic field name + self.add_collection_field(client, collection_name, field_name=default_dynamic_field_name, + data_type=DataType.INT64, nullable=True, default_value=default_value) + # 9. query using filter with dynamic field and new field + res = self.query(client, collection_name, + filter='$meta["{}"] >= 0 and {} == {}'.format(default_dynamic_field_name, + default_dynamic_field_name, default_value), + output_fields=[default_dynamic_field_name, f'$meta["{default_dynamic_field_name}"]'], + check_task=CheckTasks.check_query_results, + check_items={exp_res: [{"id": item["id"], default_dynamic_field_name: default_value} + for item in rows_new]})[0] + # dynamic field same as new field name, output_fields contain dynamic field, result do not contain dynamic field + # https://github.com/milvus-io/milvus/issues/41702 + assert set(res[0].keys()) == {default_dynamic_field_name, default_primary_key_field_name} + # 10. search using filter with dynamic field and new field + self.search(client, collection_name, vectors_to_search, + filter='$meta["{}"] >= 0 and {} == {}'.format(default_dynamic_field_name, + default_dynamic_field_name, default_value), + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "pk_name": default_primary_key_field_name, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("old_dynamic_flag, new_dynamic_flag", [(True, True), (False, False)]) + def test_milvus_client_alter_dynamic_collection_field_no_op(self, old_dynamic_flag, new_dynamic_flag): + """ + target: test dynamic field no-op alter operations + method: create collection with dynamic flag, alter to same flag, verify unchanged + expected: no-op alter succeeds without state change + """ + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + dim = 8 + # 1. create collection + schema = self.create_schema(client, enable_dynamic_field=old_dynamic_flag)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, max_length=64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) + # 2. alter collection dynamic field + self.alter_collection_properties(client, collection_name, properties={"dynamicfield.enabled": new_dynamic_flag}) + res = self.describe_collection(client, collection_name)[0] + assert res.get('enable_dynamic_field', None) is new_dynamic_flag + class TestMilvusClientAlterCollectionField(TestMilvusClientV2Base): @pytest.mark.tags(CaseLabel.L0) diff --git a/tests/python_client/milvus_client/test_milvus_client_index.py b/tests/python_client/milvus_client/test_milvus_client_index.py index ad2074a7f4..2deeba983c 100644 --- a/tests/python_client/milvus_client/test_milvus_client_index.py +++ b/tests/python_client/milvus_client/test_milvus_client_index.py @@ -1691,3 +1691,124 @@ class TestMilvusClientJsonPathIndexValid(TestMilvusClientV2Base): "field_name": json_field_name, "index_name": index_name + '4'}) + @pytest.mark.tags(CaseLabel.L2) + def test_milvus_client_json_path_index_after_enable_dynamic_field(self, supported_json_cast_type, + supported_varchar_scalar_index): + """ + target: test json path index after enabling dynamic field + method: create collection, enable dynamic field, create json path index, insert + expected: json path index works with dynamic field data + """ + client = self._client() + collection_name = cf.gen_unique_str(prefix) + # 1. create collection + json_field_name = "dynamic_field" + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_vector_field_name, metric_type="COSINE") + self.create_collection(client, collection_name, schema=schema, index_params=index_params) + # 2. alter collection dynamic field enable + self.alter_collection_properties(client, collection_name, {"dynamicfield.enabled": True}) + res = self.describe_collection(client, collection_name)[0] + assert res.get('enable_dynamic_field', None) is True + # 3. insert with different data distribution + vectors = cf.gen_vectors(default_nb+50, default_dim) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in range(default_nb)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: i} for i in range(default_nb, default_nb+10)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {}} for i in range(default_nb+10, default_nb+20)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} + for i in range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} + for i in range(default_nb + 20, default_nb + 30)] + self.insert(client, collection_name, rows) + rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i], + default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} + for i in range(default_nb + 30, default_nb + 40)] + self.insert(client, collection_name, rows) + # 4. prepare index params + index_params = self.prepare_index_params(client)[0] + index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE") + index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"}) + index_params.add_index(field_name=json_field_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']"}) + index_params.add_index(field_name=json_field_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}"}) + index_params.add_index(field_name=json_field_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]['b']"}) + index_params.add_index(field_name=json_field_name, + index_type=supported_varchar_scalar_index, + params={"json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]"}) + # 5. create index + self.create_index(client, collection_name, index_params) + self.list_indexes(client, collection_name) + index_name = "$meta/" + json_field_name + self.describe_index(client, collection_name, index_name + "/a/b", + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + "/a/b"}) + # 6. create same json index twice + self.create_index(client, collection_name, index_params) + self.describe_index(client, collection_name, index_name + "/a/b", + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + "/a/b"}) + self.describe_index(client, collection_name, index_name + '/a', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '/a'}) + self.describe_index(client, collection_name, index_name, + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name}) + self.describe_index(client, collection_name, index_name + '/a/0/b', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]['b']", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '/a/0/b'}) + self.describe_index(client, collection_name, index_name + '/a/0', + check_task=CheckTasks.check_describe_index_property, + check_items={ + "json_cast_type": supported_json_cast_type, + "json_path": f"{json_field_name}['a'][0]", + "index_type": supported_varchar_scalar_index, + "field_name": json_field_name, + "index_name": index_name + '/a/0'}) \ No newline at end of file