mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
test: add more test cases and add bulk insert scenario (#45770)
Issue: #45756 1. add bulk insert scenario 2. fix small issue in e2e cases 3. add search group by test case 4. add timestampstz to gen_all_datatype_collection_schema 5. modify partial update testcase to ensure correct result from timestamptz field On branch feature/timestamps Changes to be committed: modified: common/bulk_insert_data.py modified: common/common_func.py modified: common/common_type.py modified: milvus_client/test_milvus_client_partial_update.py modified: milvus_client/test_milvus_client_timestamptz.py modified: pytest.ini modified: testcases/test_bulk_insert.py Signed-off-by: Eric Hou <eric.hou@zilliz.com> Co-authored-by: Eric Hou <eric.hou@zilliz.com>
This commit is contained in:
parent
2134f83aa3
commit
228eb0f5d0
@ -12,6 +12,7 @@ import uuid
|
|||||||
from faker import Faker
|
from faker import Faker
|
||||||
from sklearn import preprocessing
|
from sklearn import preprocessing
|
||||||
from common.common_func import gen_unique_str
|
from common.common_func import gen_unique_str
|
||||||
|
from common.common_func import gen_timestamptz_str
|
||||||
from common.minio_comm import copy_files_to_minio
|
from common.minio_comm import copy_files_to_minio
|
||||||
from utils.util_log import test_log as log
|
from utils.util_log import test_log as log
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@ -46,6 +47,7 @@ class DataField:
|
|||||||
array_string_field = "array_string"
|
array_string_field = "array_string"
|
||||||
new_field = "new_field"
|
new_field = "new_field"
|
||||||
geo_field = "geo"
|
geo_field = "geo"
|
||||||
|
timestamp_field = "timestamptz"
|
||||||
|
|
||||||
|
|
||||||
class DataErrorType:
|
class DataErrorType:
|
||||||
@ -700,6 +702,11 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
|
|||||||
data = gen_wkt_geometry(rows)
|
data = gen_wkt_geometry(rows)
|
||||||
else:
|
else:
|
||||||
data = [None for _ in range(start, rows + start)]
|
data = [None for _ in range(start, rows + start)]
|
||||||
|
elif data_field == DataField.timestamp_field:
|
||||||
|
if not nullable:
|
||||||
|
data = [gen_timestamptz_str() for _ in range(start, rows + start)]
|
||||||
|
else:
|
||||||
|
data = [None for _ in range(start, rows + start)]
|
||||||
else:
|
else:
|
||||||
raise Exception("unsupported field name")
|
raise Exception("unsupported field name")
|
||||||
|
|
||||||
@ -867,6 +874,11 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d
|
|||||||
d[data_field] = gen_wkt_geometry(1)[0]
|
d[data_field] = gen_wkt_geometry(1)[0]
|
||||||
else:
|
else:
|
||||||
d[data_field] = None
|
d[data_field] = None
|
||||||
|
elif data_field == DataField.timestamp_field:
|
||||||
|
if not nullable:
|
||||||
|
d[data_field] = gen_timestamptz_str()
|
||||||
|
else:
|
||||||
|
d[data_field] = None
|
||||||
else:
|
else:
|
||||||
raise Exception("unsupported field name")
|
raise Exception("unsupported field name")
|
||||||
if enable_dynamic_field:
|
if enable_dynamic_field:
|
||||||
|
|||||||
@ -687,6 +687,9 @@ def gen_geometry_field(name=ct.default_geometry_field_name, description=ct.defau
|
|||||||
def gen_geometry_field(name="geo", description=ct.default_desc, is_primary=False, **kwargs):
|
def gen_geometry_field(name="geo", description=ct.default_desc, is_primary=False, **kwargs):
|
||||||
return gen_scalar_field(DataType.GEOMETRY, name=name, description=description, is_primary=is_primary, **kwargs)
|
return gen_scalar_field(DataType.GEOMETRY, name=name, description=description, is_primary=is_primary, **kwargs)
|
||||||
|
|
||||||
|
def gen_timestamptz_field(name=ct.default_timestamptz_field_name, description=ct.default_desc, is_primary=False, **kwargs):
|
||||||
|
return gen_scalar_field(DataType.TIMESTAMPTZ, name=name, description=description, is_primary=is_primary, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT64, max_capacity=ct.default_max_capacity,
|
def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT64, max_capacity=ct.default_max_capacity,
|
||||||
description=ct.default_desc, is_primary=False, **kwargs):
|
description=ct.default_desc, is_primary=False, **kwargs):
|
||||||
@ -859,6 +862,7 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
|
|||||||
analyzer_params=analyzer_params)
|
analyzer_params=analyzer_params)
|
||||||
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=nullable)
|
schema.add_field(ct.default_json_field_name, DataType.JSON, nullable=nullable)
|
||||||
schema.add_field(ct.default_geometry_field_name, DataType.GEOMETRY, nullable=nullable)
|
schema.add_field(ct.default_geometry_field_name, DataType.GEOMETRY, nullable=nullable)
|
||||||
|
schema.add_field(ct.default_timestamptz_field_name, DataType.TIMESTAMPTZ, nullable=nullable)
|
||||||
schema.add_field("array_int", DataType.ARRAY, element_type=DataType.INT64, max_capacity=ct.default_max_capacity)
|
schema.add_field("array_int", DataType.ARRAY, element_type=DataType.INT64, max_capacity=ct.default_max_capacity)
|
||||||
schema.add_field("array_float", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=ct.default_max_capacity)
|
schema.add_field("array_float", DataType.ARRAY, element_type=DataType.FLOAT, max_capacity=ct.default_max_capacity)
|
||||||
schema.add_field("array_varchar", DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=ct.default_max_capacity)
|
schema.add_field("array_varchar", DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=ct.default_max_capacity)
|
||||||
@ -867,7 +871,6 @@ def gen_all_datatype_collection_schema(description=ct.default_desc, primary_fiel
|
|||||||
schema.add_field("image_emb", DataType.INT8_VECTOR, dim=dim)
|
schema.add_field("image_emb", DataType.INT8_VECTOR, dim=dim)
|
||||||
schema.add_field("text_sparse_emb", DataType.SPARSE_FLOAT_VECTOR)
|
schema.add_field("text_sparse_emb", DataType.SPARSE_FLOAT_VECTOR)
|
||||||
# schema.add_field("voice_emb", DataType.FLOAT_VECTOR, dim=dim)
|
# schema.add_field("voice_emb", DataType.FLOAT_VECTOR, dim=dim)
|
||||||
# schema.add_field("timestamptz", DataType.TIMESTAMPTZ, nullable=nullable)
|
|
||||||
|
|
||||||
# Add struct array field
|
# Add struct array field
|
||||||
if enable_struct_array_field:
|
if enable_struct_array_field:
|
||||||
|
|||||||
@ -41,6 +41,7 @@ default_double_field_name = "double"
|
|||||||
default_string_field_name = "varchar"
|
default_string_field_name = "varchar"
|
||||||
default_json_field_name = "json_field"
|
default_json_field_name = "json_field"
|
||||||
default_geometry_field_name = "geometry_field"
|
default_geometry_field_name = "geometry_field"
|
||||||
|
default_timestamptz_field_name = "timestamptz_field"
|
||||||
default_array_field_name = "int_array"
|
default_array_field_name = "int_array"
|
||||||
default_int8_array_field_name = "int8_array"
|
default_int8_array_field_name = "int8_array"
|
||||||
default_int16_array_field_name = "int16_array"
|
default_int16_array_field_name = "int16_array"
|
||||||
|
|||||||
@ -141,7 +141,8 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
|
|||||||
|
|
||||||
vector_field_type = [DataType.FLOAT16_VECTOR,
|
vector_field_type = [DataType.FLOAT16_VECTOR,
|
||||||
DataType.BFLOAT16_VECTOR,
|
DataType.BFLOAT16_VECTOR,
|
||||||
DataType.INT8_VECTOR]
|
DataType.INT8_VECTOR,
|
||||||
|
DataType.FLOAT_VECTOR]
|
||||||
# fields to be updated
|
# fields to be updated
|
||||||
update_fields_name = []
|
update_fields_name = []
|
||||||
scalar_update_name = []
|
scalar_update_name = []
|
||||||
@ -163,6 +164,7 @@ class TestMilvusClientPartialUpdateValid(TestMilvusClientV2Base):
|
|||||||
expected = [{field: new_rows[i][field] for field in scalar_update_name}
|
expected = [{field: new_rows[i][field] for field in scalar_update_name}
|
||||||
for i in range(default_nb)]
|
for i in range(default_nb)]
|
||||||
|
|
||||||
|
expected = cf.convert_timestamptz(expected, ct.default_timestamptz_field_name, "UTC")
|
||||||
result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0",
|
result = self.query(client, collection_name, filter=f"{primary_key_field_name} >= 0",
|
||||||
check_task=CheckTasks.check_query_results,
|
check_task=CheckTasks.check_query_results,
|
||||||
output_fields=scalar_update_name,
|
output_fields=scalar_update_name,
|
||||||
|
|||||||
@ -602,6 +602,50 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
|||||||
|
|
||||||
self.drop_collection(client, collection_name)
|
self.drop_collection(client, collection_name)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_milvus_client_timestamptz_search_group_by(self):
|
||||||
|
"""
|
||||||
|
target: test search with group by and timestamptz
|
||||||
|
method:
|
||||||
|
1. Create a collection
|
||||||
|
2. Generate rows with timestamptz and insert the rows
|
||||||
|
3. Search with group by timestamptz
|
||||||
|
expected: Step 3 should result success
|
||||||
|
"""
|
||||||
|
# step 1: create collection
|
||||||
|
client = self._client()
|
||||||
|
collection_name = cf.gen_collection_name_by_testcase_name()
|
||||||
|
schema = self.create_schema(client, enable_dynamic_field=False)[0]
|
||||||
|
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
|
||||||
|
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
|
||||||
|
schema.add_field(default_timestamp_field_name, DataType.TIMESTAMPTZ, nullable=True)
|
||||||
|
index_params = self.prepare_index_params(client)[0]
|
||||||
|
index_params.add_index(default_primary_key_field_name, index_type="AUTOINDEX")
|
||||||
|
index_params.add_index(default_vector_field_name, index_type="AUTOINDEX")
|
||||||
|
index_params.add_index(default_timestamp_field_name, index_type="AUTOINDEX")
|
||||||
|
self.create_collection(client, collection_name, default_dim, schema=schema,
|
||||||
|
consistency_level="Strong", index_params=index_params)
|
||||||
|
|
||||||
|
# step 2: generate rows with timestamptz and insert the rows
|
||||||
|
rows = cf.gen_row_data_by_schema(nb=default_nb, schema=schema)
|
||||||
|
self.insert(client, collection_name, rows)
|
||||||
|
|
||||||
|
# step 3: search with group by timestamptz
|
||||||
|
vectors_to_search = cf.gen_vectors(1, default_dim, vector_data_type=DataType.FLOAT_VECTOR)
|
||||||
|
insert_ids = [i for i in range(default_nb)]
|
||||||
|
self.search(client, collection_name, vectors_to_search,
|
||||||
|
timezone="Asia/Shanghai",
|
||||||
|
time_fields="year, month, day, hour, minute, second, microsecond",
|
||||||
|
group_by_field=default_timestamp_field_name,
|
||||||
|
check_task=CheckTasks.check_search_results,
|
||||||
|
check_items={"enable_milvus_client_api": True,
|
||||||
|
"nq": len(vectors_to_search),
|
||||||
|
"ids": insert_ids,
|
||||||
|
"pk_name": default_primary_key_field_name,
|
||||||
|
"limit": default_limit})
|
||||||
|
|
||||||
|
self.drop_collection(client, collection_name)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_milvus_client_timestamptz_query(self):
|
def test_milvus_client_timestamptz_query(self):
|
||||||
"""
|
"""
|
||||||
@ -763,7 +807,8 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
|||||||
2. insert rows
|
2. insert rows
|
||||||
3. add field with timestamptz
|
3. add field with timestamptz
|
||||||
4. compact
|
4. compact
|
||||||
expected: Step 4 should success
|
5. query the rows
|
||||||
|
expected: Step 4 and Step 5 should success
|
||||||
"""
|
"""
|
||||||
# step 1: create collection
|
# step 1: create collection
|
||||||
client = self._client()
|
client = self._client()
|
||||||
@ -802,6 +847,24 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
|||||||
if time.time() - start > cost:
|
if time.time() - start > cost:
|
||||||
raise Exception(1, f"Compact after index cost more than {cost}s")
|
raise Exception(1, f"Compact after index cost more than {cost}s")
|
||||||
|
|
||||||
|
# step 5: query the rows
|
||||||
|
# first release the collection
|
||||||
|
self.release_collection(client, collection_name)
|
||||||
|
# then load the collection
|
||||||
|
self.load_collection(client, collection_name)
|
||||||
|
# then query the rows
|
||||||
|
for row in rows:
|
||||||
|
row[default_timestamp_field_name] = None
|
||||||
|
self.query(client, collection_name, filter=f"0 <= {default_primary_key_field_name} < {default_nb}",
|
||||||
|
check_task=CheckTasks.check_query_results,
|
||||||
|
check_items={exp_res: rows,
|
||||||
|
"pk_name": default_primary_key_field_name})
|
||||||
|
|
||||||
|
new_rows = cf.convert_timestamptz(new_rows, default_timestamp_field_name, "UTC")
|
||||||
|
self.query(client, collection_name, filter=f"{default_primary_key_field_name} >= {default_nb}",
|
||||||
|
check_task=CheckTasks.check_query_results,
|
||||||
|
check_items={exp_res: new_rows,
|
||||||
|
"pk_name": default_primary_key_field_name})
|
||||||
self.drop_collection(client, collection_name)
|
self.drop_collection(client, collection_name)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@ -1092,7 +1155,7 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
|||||||
self.insert(client, collection_name, rows)
|
self.insert(client, collection_name, rows)
|
||||||
|
|
||||||
# step 3: query the rows from different client in different timezone
|
# step 3: query the rows from different client in different timezone
|
||||||
client2 = self._client()
|
client2 = self._client(alias="client2_alias")
|
||||||
UTC_time_row = cf.convert_timestamptz(rows, default_timestamp_field_name, "UTC")
|
UTC_time_row = cf.convert_timestamptz(rows, default_timestamp_field_name, "UTC")
|
||||||
shanghai_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "Asia/Shanghai")
|
shanghai_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "Asia/Shanghai")
|
||||||
LA_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "America/Los_Angeles")
|
LA_rows = cf.convert_timestamptz(UTC_time_row, default_timestamp_field_name, "America/Los_Angeles")
|
||||||
@ -1111,7 +1174,6 @@ class TestMilvusClientTimestamptzValid(TestMilvusClientV2Base):
|
|||||||
self.drop_collection(client, collection_name)
|
self.drop_collection(client, collection_name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TestMilvusClientTimestamptzInvalid(TestMilvusClientV2Base):
|
class TestMilvusClientTimestamptzInvalid(TestMilvusClientV2Base):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
[pytest]
|
[pytest]
|
||||||
|
|
||||||
|
|
||||||
addopts = --host 10.104.31.219 --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level INFO
|
addopts = --host localhost --html=/tmp/ci_logs/report.html --self-contained-html -v --log-cli-level INFO
|
||||||
# python3 -W ignore -m pytest
|
# python3 -W ignore -m pytest
|
||||||
|
|
||||||
log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
|
log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
|
||||||
|
|||||||
@ -790,6 +790,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
||||||
cf.gen_geometry_field(name=df.geo_field),
|
cf.gen_geometry_field(name=df.geo_field),
|
||||||
|
cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
|
||||||
cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
|
cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
|
||||||
cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
|
cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
|
||||||
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
|
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
|
||||||
@ -1168,6 +1169,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||||
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL, nullable=nullable),
|
||||||
cf.gen_geometry_field(name=df.geo_field),
|
cf.gen_geometry_field(name=df.geo_field),
|
||||||
|
cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
|
||||||
cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
|
cf.gen_float_vec_field(name=df.float_vec_field, dim=float_vec_field_dim),
|
||||||
cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
|
cf.gen_binary_vec_field(name=df.binary_vec_field, dim=binary_vec_field_dim),
|
||||||
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
|
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=bf16_vec_field_dim),
|
||||||
@ -2191,6 +2193,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
cf.gen_float_field(name=df.float_field, nullable=nullable),
|
cf.gen_float_field(name=df.float_field, nullable=nullable),
|
||||||
cf.gen_string_field(name=df.string_field, nullable=nullable),
|
cf.gen_string_field(name=df.string_field, nullable=nullable),
|
||||||
cf.gen_json_field(name=df.json_field, nullable=nullable),
|
cf.gen_json_field(name=df.json_field, nullable=nullable),
|
||||||
|
cf.gen_timestamptz_field(name=df.timestamp_field, nullable=nullable),
|
||||||
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64, nullable=nullable),
|
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64, nullable=nullable),
|
||||||
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT, nullable=nullable),
|
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT, nullable=nullable),
|
||||||
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100, nullable=nullable),
|
||||||
@ -2227,6 +2230,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||||||
df.float_field: 1.0 if not (nullable and random.random() < 0.5) else None,
|
df.float_field: 1.0 if not (nullable and random.random() < 0.5) else None,
|
||||||
df.string_field: "string" if not (nullable and random.random() < 0.5) else None,
|
df.string_field: "string" if not (nullable and random.random() < 0.5) else None,
|
||||||
df.json_field: json_value[i%len(json_value)] if not (nullable and random.random() < 0.5) else None,
|
df.json_field: json_value[i%len(json_value)] if not (nullable and random.random() < 0.5) else None,
|
||||||
|
df.timestamp_field: cf.gen_timestamptz_str() if not (nullable and random.random() < 0.5) else None,
|
||||||
df.array_int_field: [1, 2] if not (nullable and random.random() < 0.5) else None,
|
df.array_int_field: [1, 2] if not (nullable and random.random() < 0.5) else None,
|
||||||
df.array_float_field: [1.0, 2.0] if not (nullable and random.random() < 0.5) else None,
|
df.array_float_field: [1.0, 2.0] if not (nullable and random.random() < 0.5) else None,
|
||||||
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
df.array_string_field: ["string1", "string2"] if not (nullable and random.random() < 0.5) else None,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user