test: add more test cases and add bulk insert scenario (#45770)

Issue: #45756 1. add bulk insert scenario 2. fix small issue in e2e cases 3. add search group by test case 4. add timestampstz to gen_all_datatype_collection_schema 5. modify partial update testcase to ensure correct result from timestamptz field On branch feature/timestamps Changes to be committed: modified: common/bulk_insert_data.py modified: common/common_func.py modified: common/common_type.py modified: milvus_client/test_milvus_client_partial_update.py modified: milvus_client/test_milvus_client_timestamptz.py modified: pytest.ini modified: testcases/test_bulk_insert.py Signed-off-by: Eric Hou <eric.hou@zilliz.com> Co-authored-by: Eric Hou <eric.hou@zilliz.com>
2025-12-06 17:18:35 +08:00 · 2025-11-24 15:21:06 +08:00 · 2025-11-24 15:21:06 +08:00 · 228eb0f5d0
commit 228eb0f5d0
parent 2134f83aa3
7 changed files with 90 additions and 6 deletions
--- a/tests/python_client/common/bulk_insert_data.py
+++ b/tests/python_client/common/bulk_insert_data.py
@ -12,6 +12,7 @@ import uuid
 from faker import Faker
 from sklearn import preprocessing
 from common.common_func import gen_unique_str
 from common.common_func import gen_timestamptz_str
 from common.minio_comm import copy_files_to_minio
 from utils.util_log import test_log as log
 import pyarrow as pa
@ -46,6 +47,7 @@ class DataField:
    array_string_field = "array_string"
    new_field = "new_field"
    geo_field = "geo"
    timestamp_field = "timestamptz"
 class DataErrorType:
@ -700,6 +702,11 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
                data = gen_wkt_geometry(rows)
            else:
                data = [None for _ in range(start, rows + start)]
        elif data_field == DataField.timestamp_field:
            if not nullable:
                data = [gen_timestamptz_str() for _ in range(start, rows + start)]
            else:
                data = [None for _ in range(start, rows + start)]
        else:
            raise Exception("unsupported field name")
@ -867,6 +874,11 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d
                    d[data_field] = gen_wkt_geometry(1)[0]
                else:
                    d[data_field] = None
            elif data_field == DataField.timestamp_field:
                if not nullable:
                    d[data_field] = gen_timestamptz_str()
                else:
                    d[data_field] = None
            else:
                raise Exception("unsupported field name")
        if enable_dynamic_field:
--- a/tests/python_client/common/common_func.py
+++ b/tests/python_client/common/common_func.py
@ -687,6 +687,9 @@ def gen_geometry_field(name=ct.default_geometry_field_name, description=ct.defau
 def gen_geometry_field(name="geo", description=ct.default_desc, is_primary=False, **kwargs):
    return gen_scalar_field(DataType.GEOMETRY, name=name, description=description, is_primary=is_primary, **kwargs)
 def gen_timestamptz_field(name=ct.default_timestamptz_field_name, description=ct.default_desc, is_primary=False, **kwargs):
    return gen_scalar_field(DataType.TIMESTAMPTZ, name=name, description=description, is_primary=is_primary, **kwargs)
 def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT64, max_capacity=ct.default_max_capacity,
                    description=ct.default_desc, is_primary=False, **kwargs):
@ -859,6 +862,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
                    analyzer_params=analyzer_params)
    schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=nullable)
    schema.add_field(ct.default_geometry_field_name, DataType.GEOMETRY, nullable=nullable)
    schema.add_field(ct.default_timestamptz_field_name, DataType.TIMESTAMPTZ, nullable=nullable)
    schema.add_field("array_int", DataType.ARRAY, element_type=DataType.INT64, max_capacity=ct.default_max_capacity)
    schema.add_field("array_float", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=ct.default_max_capacity)
    schema.add_field("array_varchar", DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=ct.default_max_capacity)
@ -867,7 +871,6 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
    schema.add_field("image_emb", DataType.INT8_VECTOR, dim=dim)
    schema.add_field("text_sparse_emb", DataType.SPARSE_FLOAT_VECTOR)
    # schema.add_field("voice_emb", DataType.FLOAT_VECTOR, dim=dim)
    # schema.add_field("timestamptz", DataType.TIMESTAMPTZ, nullable=nullable)
    # Add struct array field
    if enable_struct_array_field:
--- a/tests/python_client/common/common_type.py
+++ b/tests/python_client/common/common_type.py
@ -41,6 +41,7 @@ default_double_field_name = "double"
 default_string_field_name = "varchar"
 default_json_field_name = "json_field"
 default_geometry_field_name = "geometry_field"
 default_timestamptz_field_name = "timestamptz_field"
 default_array_field_name = "int_array"
 default_int8_array_field_name = "int8_array"
 default_int16_array_field_name = "int16_array"
--- a/tests/python_client/milvus_client/test_milvus_client_partial_update.py
+++ b/tests/python_client/milvus_client/test_milvus_client_partial_update.py
@ -141,7 +141,8 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
        vector_field_type = [DataType.FLOAT16_VECTOR,
                            DataType.BFLOAT16_VECTOR, 
-                            DataType.INT8_VECTOR]
+                            DataType.INT8_VECTOR,
                            DataType.FLOAT_VECTOR]
        # fields to be updated
        update_fields_name = []
        scalar_update_name = []
@ -163,6 +164,7 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
        expected = [{field: new_rows[i][field] for field in scalar_update_name}
                    for i in range(default_nb)]
        expected = cf.convert_timestamptz(expected, ct.default_timestamptz_field_name, "UTC")
        result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0",
                check_task=CheckTasks.check_query_results,
                output_fields=scalar_update_name,
--- a/tests/python_client/milvus_client/test_milvus_client_timestamptz.py
+++ b/tests/python_client/milvus_client/test_milvus_client_timestamptz.py
@ -602,6 +602,50 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
        self.drop_collection(client, collection_name)
    @pytest.mark.tags(CaseLabel.L1)
    def test_milvus_client_timestamptz_search_group_by(self):
        """
        target: test search with group by and timestamptz
        method:
            1. Create a collection
            2. Generate rows with timestamptz and insert the rows
            3. Search with group by timestamptz
        expected: Step 3 should result success
        """
        # step 1: create collection
        client = self._client()
        collection_name = cf.gen_collection_name_by_testcase_name()
        schema = self.create_schema(client, enable_dynamic_field=False)[0]
        schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
        schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
        schema.add_field(default_timestamp_field_name, DataType.TIMESTAMPTZ, nullable=True)
        index_params = self.prepare_index_params(client)[0] 
        index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
        index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
        index_params.add_index(default_timestamp_field_name, index_type="AUTOINDEX")
        self.create_collection(client, collection_name, default_dim, schema=schema, 
                               consistency_level="Strong", index_params=index_params)
        # step 2: generate rows with timestamptz and insert the rows
        rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
        self.insert(client, collection_name, rows)
        # step 3: search with group by timestamptz
        vectors_to_search = cf.gen_vectors(1, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
        insert_ids = [i for i in range(default_nb)]
        self.search(client, collection_name, vectors_to_search, 
                    timezone="Asia/Shanghai",
                    time_fields="year, month, day, hour, minute, second, microsecond",
                    group_by_field=default_timestamp_field_name,
                    check_task=CheckTasks.check_search_results,
                    check_items={"enable_milvus_client_api": True,
                            "nq": len(vectors_to_search),
                            "ids": insert_ids,
                            "pk_name": default_primary_key_field_name,
                            "limit": default_limit})
        self.drop_collection(client, collection_name)
    @pytest.mark.tags(CaseLabel.L1)
    def test_milvus_client_timestamptz_query(self):
        """
@ -763,7 +807,8 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
            2. insert rows
            3. add field with timestamptz
            4. compact
-        expected: Step 4 should success
+            5. query the rows
        expected: Step 4 and Step 5 should success
        """
        # step 1: create collection
        client = self._client()
@ -802,6 +847,24 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
            if time.time() - start > cost:
                raise Exception(1, f"Compact after index cost more than {cost}s")
        # step 5: query the rows
        # first release the collection
        self.release_collection(client, collection_name)
        # then load the collection
        self.load_collection(client, collection_name)
        # then query the rows
        for row in rows:
            row[default_timestamp_field_name] = None
        self.query(client, collection_name, filter=f"0 <= {default_primary_key_field_name} < {default_nb}",
                    check_task=CheckTasks.check_query_results,
                    check_items={exp_res: rows,
                                    "pk_name": default_primary_key_field_name})
        new_rows = cf.convert_timestamptz(new_rows, default_timestamp_field_name, "UTC")
        self.query(client, collection_name, filter=f"{default_primary_key_field_name} >= {default_nb}",
                    check_task=CheckTasks.check_query_results,
                    check_items={exp_res: new_rows,
                                    "pk_name": default_primary_key_field_name})
        self.drop_collection(client, collection_name)
    @pytest.mark.tags(CaseLabel.L1)
@ -1092,7 +1155,7 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
        self.insert(client, collection_name, rows)
        # step 3: query the rows from different client in different timezone
-        client2 = self._client()
+        client2 = self._client(alias="client2_alias")
        UTC_time_row = cf.convert_timestamptz(rows, default_timestamp_field_name, "UTC")
        shanghai_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "Asia/Shanghai")
        LA_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "America/Los_Angeles")
@ -1111,7 +1174,6 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
        self.drop_collection(client, collection_name)
 class TestMilvusClientTimestamptzInvalid(TestMilvusClientV2Base):
    """
--- a/tests/python_client/pytest.ini
+++ b/tests/python_client/pytest.ini
@ -1,7 +1,7 @@
 [pytest]
-addopts = --host 10.104.31.219 --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level  INFO
+addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level  INFO
 #  python3 -W ignore -m pytest
 log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
--- a/tests/python_client/testcases/test_bulk_insert.py
+++ b/tests/python_client/testcases/test_bulk_insert.py
@ -790,6 +790,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
            cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
            cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
            cf.gen_geometry_field(name=df.geo_field),
            cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
            cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
            cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
            cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
@ -1168,6 +1169,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
            cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
            cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
            cf.gen_geometry_field(name=df.geo_field),
            cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
            cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
            cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
            cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
@ -2191,6 +2193,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
            cf.gen_float_field(name=df.float_field, nullable=nullable),
            cf.gen_string_field(name=df.string_field, nullable=nullable),
            cf.gen_json_field(name=df.json_field, nullable=nullable),
            cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
            cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64, nullable=nullable),
            cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT, nullable=nullable),
            cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
@ -2227,6 +2230,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
                    df.float_field: 1.0 if not (nullable and random.random() < 0.5) else None,
                    df.string_field: "string" if not (nullable and random.random() < 0.5) else None,
                    df.json_field: json_value[i%len(json_value)] if not (nullable and random.random() < 0.5) else None,
                    df.timestamp_field: cf.gen_timestamptz_str() if not (nullable and random.random() < 0.5) else None,
                    df.array_int_field: [1, 2] if not (nullable and random.random() < 0.5) else None,
                    df.array_float_field: [1.0, 2.0] if not (nullable and random.random() < 0.5) else None,
                    df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,