diff --git a/tests/python_client/common/bulk_insert_data.py b/tests/python_client/common/bulk_insert_data.py index e682756ae9..a9df3dac2e 100644 --- a/tests/python_client/common/bulk_insert_data.py +++ b/tests/python_client/common/bulk_insert_data.py @@ -12,6 +12,7 @@ import uuid from faker import Faker from sklearn import preprocessing from common.common_func import gen_unique_str +from common.common_func import gen_timestamptz_str from common.minio_comm import copy_files_to_minio from utils.util_log import test_log as log import pyarrow as pa @@ -46,6 +47,7 @@ class DataField: array_string_field = "array_string" new_field = "new_field" geo_field = "geo" + timestamp_field = "timestamptz" class DataErrorType: @@ -700,6 +702,11 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128 data = gen_wkt_geometry(rows) else: data = [None for _ in range(start, rows + start)] + elif data_field == DataField.timestamp_field: + if not nullable: + data = [gen_timestamptz_str() for _ in range(start, rows + start)] + else: + data = [None for _ in range(start, rows + start)] else: raise Exception("unsupported field name") @@ -867,6 +874,11 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d d[data_field] = gen_wkt_geometry(1)[0] else: d[data_field] = None + elif data_field == DataField.timestamp_field: + if not nullable: + d[data_field] = gen_timestamptz_str() + else: + d[data_field] = None else: raise Exception("unsupported field name") if enable_dynamic_field: diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index ab9b22df31..6d679961a1 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -687,6 +687,9 @@ def gen_geometry_field(name=ct.default_geometry_field_name, description=ct.defau def gen_geometry_field(name="geo", description=ct.default_desc, is_primary=False, **kwargs): return gen_scalar_field(DataType.GEOMETRY, name=name, description=description, is_primary=is_primary, **kwargs) +def gen_timestamptz_field(name=ct.default_timestamptz_field_name, description=ct.default_desc, is_primary=False, **kwargs): + return gen_scalar_field(DataType.TIMESTAMPTZ, name=name, description=description, is_primary=is_primary, **kwargs) + def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT64, max_capacity=ct.default_max_capacity, description=ct.default_desc, is_primary=False, **kwargs): @@ -859,6 +862,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel analyzer_params=analyzer_params) schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=nullable) schema.add_field(ct.default_geometry_field_name, DataType.GEOMETRY, nullable=nullable) + schema.add_field(ct.default_timestamptz_field_name, DataType.TIMESTAMPTZ, nullable=nullable) schema.add_field("array_int", DataType.ARRAY, element_type=DataType.INT64, max_capacity=ct.default_max_capacity) schema.add_field("array_float", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=ct.default_max_capacity) schema.add_field("array_varchar", DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=ct.default_max_capacity) @@ -867,7 +871,6 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel schema.add_field("image_emb", DataType.INT8_VECTOR, dim=dim) schema.add_field("text_sparse_emb", DataType.SPARSE_FLOAT_VECTOR) # schema.add_field("voice_emb", DataType.FLOAT_VECTOR, dim=dim) - # schema.add_field("timestamptz", DataType.TIMESTAMPTZ, nullable=nullable) # Add struct array field if enable_struct_array_field: diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index 2286cd4a51..4685a99d5f 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -41,6 +41,7 @@ default_double_field_name = "double" default_string_field_name = "varchar" default_json_field_name = "json_field" default_geometry_field_name = "geometry_field" +default_timestamptz_field_name = "timestamptz_field" default_array_field_name = "int_array" default_int8_array_field_name = "int8_array" default_int16_array_field_name = "int16_array" diff --git a/tests/python_client/milvus_client/test_milvus_client_partial_update.py b/tests/python_client/milvus_client/test_milvus_client_partial_update.py index 6ca37d945c..3dcec78a78 100644 --- a/tests/python_client/milvus_client/test_milvus_client_partial_update.py +++ b/tests/python_client/milvus_client/test_milvus_client_partial_update.py @@ -141,7 +141,8 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base): vector_field_type = [DataType.FLOAT16_VECTOR, DataType.BFLOAT16_VECTOR, - DataType.INT8_VECTOR] + DataType.INT8_VECTOR, + DataType.FLOAT_VECTOR] # fields to be updated update_fields_name = [] scalar_update_name = [] @@ -163,6 +164,7 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base): expected = [{field: new_rows[i][field] for field in scalar_update_name} for i in range(default_nb)] + expected = cf.convert_timestamptz(expected, ct.default_timestamptz_field_name, "UTC") result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0", check_task=CheckTasks.check_query_results, output_fields=scalar_update_name, diff --git a/tests/python_client/milvus_client/test_milvus_client_timestamptz.py b/tests/python_client/milvus_client/test_milvus_client_timestamptz.py index 8c86d33cbd..d5b4960a5b 100644 --- a/tests/python_client/milvus_client/test_milvus_client_timestamptz.py +++ b/tests/python_client/milvus_client/test_milvus_client_timestamptz.py @@ -602,6 +602,50 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) + @pytest.mark.tags(CaseLabel.L1) + def test_milvus_client_timestamptz_search_group_by(self): + """ + target: test search with group by and timestamptz + method: + 1. Create a collection + 2. Generate rows with timestamptz and insert the rows + 3. Search with group by timestamptz + expected: Step 3 should result success + """ + # step 1: create collection + client = self._client() + collection_name = cf.gen_collection_name_by_testcase_name() + schema = self.create_schema(client, enable_dynamic_field=False)[0] + schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False) + schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim) + schema.add_field(default_timestamp_field_name, DataType.TIMESTAMPTZ, nullable=True) + index_params = self.prepare_index_params(client)[0] + index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX") + index_params.add_index(default_vector_field_name, index_type="AUTOINDEX") + index_params.add_index(default_timestamp_field_name, index_type="AUTOINDEX") + self.create_collection(client, collection_name, default_dim, schema=schema, + consistency_level="Strong", index_params=index_params) + + # step 2: generate rows with timestamptz and insert the rows + rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema) + self.insert(client, collection_name, rows) + + # step 3: search with group by timestamptz + vectors_to_search = cf.gen_vectors(1, default_dim, vector_data_type=DataType.FLOAT_VECTOR) + insert_ids = [i for i in range(default_nb)] + self.search(client, collection_name, vectors_to_search, + timezone="Asia/Shanghai", + time_fields="year, month, day, hour, minute, second, microsecond", + group_by_field=default_timestamp_field_name, + check_task=CheckTasks.check_search_results, + check_items={"enable_milvus_client_api": True, + "nq": len(vectors_to_search), + "ids": insert_ids, + "pk_name": default_primary_key_field_name, + "limit": default_limit}) + + self.drop_collection(client, collection_name) + @pytest.mark.tags(CaseLabel.L1) def test_milvus_client_timestamptz_query(self): """ @@ -763,7 +807,8 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base): 2. insert rows 3. add field with timestamptz 4. compact - expected: Step 4 should success + 5. query the rows + expected: Step 4 and Step 5 should success """ # step 1: create collection client = self._client() @@ -802,6 +847,24 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base): if time.time() - start > cost: raise Exception(1, f"Compact after index cost more than {cost}s") + # step 5: query the rows + # first release the collection + self.release_collection(client, collection_name) + # then load the collection + self.load_collection(client, collection_name) + # then query the rows + for row in rows: + row[default_timestamp_field_name] = None + self.query(client, collection_name, filter=f"0 <= {default_primary_key_field_name} < {default_nb}", + check_task=CheckTasks.check_query_results, + check_items={exp_res: rows, + "pk_name": default_primary_key_field_name}) + + new_rows = cf.convert_timestamptz(new_rows, default_timestamp_field_name, "UTC") + self.query(client, collection_name, filter=f"{default_primary_key_field_name} >= {default_nb}", + check_task=CheckTasks.check_query_results, + check_items={exp_res: new_rows, + "pk_name": default_primary_key_field_name}) self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) @@ -1092,7 +1155,7 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base): self.insert(client, collection_name, rows) # step 3: query the rows from different client in different timezone - client2 = self._client() + client2 = self._client(alias="client2_alias") UTC_time_row = cf.convert_timestamptz(rows, default_timestamp_field_name, "UTC") shanghai_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "Asia/Shanghai") LA_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "America/Los_Angeles") @@ -1111,7 +1174,6 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) - class TestMilvusClientTimestamptzInvalid(TestMilvusClientV2Base): """ diff --git a/tests/python_client/pytest.ini b/tests/python_client/pytest.ini index 5300ebcdc0..cfcc15d0d1 100644 --- a/tests/python_client/pytest.ini +++ b/tests/python_client/pytest.ini @@ -1,7 +1,7 @@ [pytest] -addopts = --host 10.104.31.219 --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level INFO +addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level INFO # python3 -W ignore -m pytest log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s) diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py index 15b3428e30..4a1620fd40 100644 --- a/tests/python_client/testcases/test_bulk_insert.py +++ b/tests/python_client/testcases/test_bulk_insert.py @@ -790,6 +790,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable), cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable), cf.gen_geometry_field(name=df.geo_field), + cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable), cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim), cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim), cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim), @@ -1168,6 +1169,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable), cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable), cf.gen_geometry_field(name=df.geo_field), + cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable), cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim), cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim), cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim), @@ -2191,6 +2193,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): cf.gen_float_field(name=df.float_field, nullable=nullable), cf.gen_string_field(name=df.string_field, nullable=nullable), cf.gen_json_field(name=df.json_field, nullable=nullable), + cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable), cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64, nullable=nullable), cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT, nullable=nullable), cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable), @@ -2227,6 +2230,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): df.float_field: 1.0 if not (nullable and random.random() < 0.5) else None, df.string_field: "string" if not (nullable and random.random() < 0.5) else None, df.json_field: json_value[i%len(json_value)] if not (nullable and random.random() < 0.5) else None, + df.timestamp_field: cf.gen_timestamptz_str() if not (nullable and random.random() < 0.5) else None, df.array_int_field: [1, 2] if not (nullable and random.random() < 0.5) else None, df.array_float_field: [1.0, 2.0] if not (nullable and random.random() < 0.5) else None, df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,