mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
test: add more test cases and add bulk insert scenario (#45770)
Issue: #45756 1. add bulk insert scenario 2. fix small issue in e2e cases 3. add search group by test case 4. add timestampstz to gen_all_datatype_collection_schema 5. modify partial update testcase to ensure correct result from timestamptz field On branch feature/timestamps Changes to be committed: modified: common/bulk_insert_data.py modified: common/common_func.py modified: common/common_type.py modified: milvus_client/test_milvus_client_partial_update.py modified: milvus_client/test_milvus_client_timestamptz.py modified: pytest.ini modified: testcases/test_bulk_insert.py Signed-off-by: Eric Hou <eric.hou@zilliz.com> Co-authored-by: Eric Hou <eric.hou@zilliz.com>
This commit is contained in:
parent
2134f83aa3
commit
228eb0f5d0
@ -12,6 +12,7 @@ import uuid
|
||||
from faker import Faker
|
||||
from sklearn import preprocessing
|
||||
from common.common_func import gen_unique_str
|
||||
from common.common_func import gen_timestamptz_str
|
||||
from common.minio_comm import copy_files_to_minio
|
||||
from utils.util_log import test_log as log
|
||||
import pyarrow as pa
|
||||
@ -46,6 +47,7 @@ class DataField:
|
||||
array_string_field = "array_string"
|
||||
new_field = "new_field"
|
||||
geo_field = "geo"
|
||||
timestamp_field = "timestamptz"
|
||||
|
||||
|
||||
class DataErrorType:
|
||||
@ -700,6 +702,11 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
|
||||
data = gen_wkt_geometry(rows)
|
||||
else:
|
||||
data = [None for _ in range(start, rows + start)]
|
||||
elif data_field == DataField.timestamp_field:
|
||||
if not nullable:
|
||||
data = [gen_timestamptz_str() for _ in range(start, rows + start)]
|
||||
else:
|
||||
data = [None for _ in range(start, rows + start)]
|
||||
else:
|
||||
raise Exception("unsupported field name")
|
||||
|
||||
@ -867,6 +874,11 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d
|
||||
d[data_field] = gen_wkt_geometry(1)[0]
|
||||
else:
|
||||
d[data_field] = None
|
||||
elif data_field == DataField.timestamp_field:
|
||||
if not nullable:
|
||||
d[data_field] = gen_timestamptz_str()
|
||||
else:
|
||||
d[data_field] = None
|
||||
else:
|
||||
raise Exception("unsupported field name")
|
||||
if enable_dynamic_field:
|
||||
|
||||
@ -687,6 +687,9 @@ def gen_geometry_field(name=ct.default_geometry_field_name, description=ct.defau
|
||||
def gen_geometry_field(name="geo", description=ct.default_desc, is_primary=False, **kwargs):
|
||||
return gen_scalar_field(DataType.GEOMETRY, name=name, description=description, is_primary=is_primary, **kwargs)
|
||||
|
||||
def gen_timestamptz_field(name=ct.default_timestamptz_field_name, description=ct.default_desc, is_primary=False, **kwargs):
|
||||
return gen_scalar_field(DataType.TIMESTAMPTZ, name=name, description=description, is_primary=is_primary, **kwargs)
|
||||
|
||||
|
||||
def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT64, max_capacity=ct.default_max_capacity,
|
||||
description=ct.default_desc, is_primary=False, **kwargs):
|
||||
@ -859,6 +862,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
|
||||
analyzer_params=analyzer_params)
|
||||
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=nullable)
|
||||
schema.add_field(ct.default_geometry_field_name, DataType.GEOMETRY, nullable=nullable)
|
||||
schema.add_field(ct.default_timestamptz_field_name, DataType.TIMESTAMPTZ, nullable=nullable)
|
||||
schema.add_field("array_int", DataType.ARRAY, element_type=DataType.INT64, max_capacity=ct.default_max_capacity)
|
||||
schema.add_field("array_float", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=ct.default_max_capacity)
|
||||
schema.add_field("array_varchar", DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=ct.default_max_capacity)
|
||||
@ -867,7 +871,6 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
|
||||
schema.add_field("image_emb", DataType.INT8_VECTOR, dim=dim)
|
||||
schema.add_field("text_sparse_emb", DataType.SPARSE_FLOAT_VECTOR)
|
||||
# schema.add_field("voice_emb", DataType.FLOAT_VECTOR, dim=dim)
|
||||
# schema.add_field("timestamptz", DataType.TIMESTAMPTZ, nullable=nullable)
|
||||
|
||||
# Add struct array field
|
||||
if enable_struct_array_field:
|
||||
|
||||
@ -41,6 +41,7 @@ default_double_field_name = "double"
|
||||
default_string_field_name = "varchar"
|
||||
default_json_field_name = "json_field"
|
||||
default_geometry_field_name = "geometry_field"
|
||||
default_timestamptz_field_name = "timestamptz_field"
|
||||
default_array_field_name = "int_array"
|
||||
default_int8_array_field_name = "int8_array"
|
||||
default_int16_array_field_name = "int16_array"
|
||||
|
||||
@ -141,7 +141,8 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
|
||||
|
||||
vector_field_type = [DataType.FLOAT16_VECTOR,
|
||||
DataType.BFLOAT16_VECTOR,
|
||||
DataType.INT8_VECTOR]
|
||||
DataType.INT8_VECTOR,
|
||||
DataType.FLOAT_VECTOR]
|
||||
# fields to be updated
|
||||
update_fields_name = []
|
||||
scalar_update_name = []
|
||||
@ -163,6 +164,7 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
|
||||
expected = [{field: new_rows[i][field] for field in scalar_update_name}
|
||||
for i in range(default_nb)]
|
||||
|
||||
expected = cf.convert_timestamptz(expected, ct.default_timestamptz_field_name, "UTC")
|
||||
result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0",
|
||||
check_task=CheckTasks.check_query_results,
|
||||
output_fields=scalar_update_name,
|
||||
|
||||
@ -602,6 +602,50 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
||||
|
||||
self.drop_collection(client, collection_name)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_milvus_client_timestamptz_search_group_by(self):
|
||||
"""
|
||||
target: test search with group by and timestamptz
|
||||
method:
|
||||
1. Create a collection
|
||||
2. Generate rows with timestamptz and insert the rows
|
||||
3. Search with group by timestamptz
|
||||
expected: Step 3 should result success
|
||||
"""
|
||||
# step 1: create collection
|
||||
client = self._client()
|
||||
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
||||
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
||||
schema.add_field(default_timestamp_field_name, DataType.TIMESTAMPTZ, nullable=True)
|
||||
index_params = self.prepare_index_params(client)[0]
|
||||
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
|
||||
index_params.add_index(default_timestamp_field_name, index_type="AUTOINDEX")
|
||||
self.create_collection(client, collection_name, default_dim, schema=schema,
|
||||
consistency_level="Strong", index_params=index_params)
|
||||
|
||||
# step 2: generate rows with timestamptz and insert the rows
|
||||
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
|
||||
self.insert(client, collection_name, rows)
|
||||
|
||||
# step 3: search with group by timestamptz
|
||||
vectors_to_search = cf.gen_vectors(1, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||
insert_ids = [i for i in range(default_nb)]
|
||||
self.search(client, collection_name, vectors_to_search,
|
||||
timezone="Asia/Shanghai",
|
||||
time_fields="year, month, day, hour, minute, second, microsecond",
|
||||
group_by_field=default_timestamp_field_name,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"enable_milvus_client_api": True,
|
||||
"nq": len(vectors_to_search),
|
||||
"ids": insert_ids,
|
||||
"pk_name": default_primary_key_field_name,
|
||||
"limit": default_limit})
|
||||
|
||||
self.drop_collection(client, collection_name)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_milvus_client_timestamptz_query(self):
|
||||
"""
|
||||
@ -763,7 +807,8 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
||||
2. insert rows
|
||||
3. add field with timestamptz
|
||||
4. compact
|
||||
expected: Step 4 should success
|
||||
5. query the rows
|
||||
expected: Step 4 and Step 5 should success
|
||||
"""
|
||||
# step 1: create collection
|
||||
client = self._client()
|
||||
@ -802,6 +847,24 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
||||
if time.time() - start > cost:
|
||||
raise Exception(1, f"Compact after index cost more than {cost}s")
|
||||
|
||||
# step 5: query the rows
|
||||
# first release the collection
|
||||
self.release_collection(client, collection_name)
|
||||
# then load the collection
|
||||
self.load_collection(client, collection_name)
|
||||
# then query the rows
|
||||
for row in rows:
|
||||
row[default_timestamp_field_name] = None
|
||||
self.query(client, collection_name, filter=f"0 <= {default_primary_key_field_name} < {default_nb}",
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={exp_res: rows,
|
||||
"pk_name": default_primary_key_field_name})
|
||||
|
||||
new_rows = cf.convert_timestamptz(new_rows, default_timestamp_field_name, "UTC")
|
||||
self.query(client, collection_name, filter=f"{default_primary_key_field_name} >= {default_nb}",
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={exp_res: new_rows,
|
||||
"pk_name": default_primary_key_field_name})
|
||||
self.drop_collection(client, collection_name)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@ -1092,7 +1155,7 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
||||
self.insert(client, collection_name, rows)
|
||||
|
||||
# step 3: query the rows from different client in different timezone
|
||||
client2 = self._client()
|
||||
client2 = self._client(alias="client2_alias")
|
||||
UTC_time_row = cf.convert_timestamptz(rows, default_timestamp_field_name, "UTC")
|
||||
shanghai_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "Asia/Shanghai")
|
||||
LA_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "America/Los_Angeles")
|
||||
@ -1111,7 +1174,6 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
||||
self.drop_collection(client, collection_name)
|
||||
|
||||
|
||||
|
||||
class TestMilvusClientTimestamptzInvalid(TestMilvusClientV2Base):
|
||||
|
||||
"""
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
[pytest]
|
||||
|
||||
|
||||
addopts = --host 10.104.31.219 --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level INFO
|
||||
addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level INFO
|
||||
# python3 -W ignore -m pytest
|
||||
|
||||
log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
|
||||
|
||||
@ -790,6 +790,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
||||
cf.gen_geometry_field(name=df.geo_field),
|
||||
cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
|
||||
cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
|
||||
cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
|
||||
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
|
||||
@ -1168,6 +1169,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
||||
cf.gen_geometry_field(name=df.geo_field),
|
||||
cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
|
||||
cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
|
||||
cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
|
||||
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
|
||||
@ -2191,6 +2193,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
||||
cf.gen_float_field(name=df.float_field, nullable=nullable),
|
||||
cf.gen_string_field(name=df.string_field, nullable=nullable),
|
||||
cf.gen_json_field(name=df.json_field, nullable=nullable),
|
||||
cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT, nullable=nullable),
|
||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||
@ -2227,6 +2230,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
||||
df.float_field: 1.0 if not (nullable and random.random() < 0.5) else None,
|
||||
df.string_field: "string" if not (nullable and random.random() < 0.5) else None,
|
||||
df.json_field: json_value[i%len(json_value)] if not (nullable and random.random() < 0.5) else None,
|
||||
df.timestamp_field: cf.gen_timestamptz_str() if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_int_field: [1, 2] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_float_field: [1.0, 2.0] if not (nullable and random.random() < 0.5) else None,
|
||||
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user