test: unify schema retrieval to use get_schema() method in chaos checker (#45985)

/kind improvement


Replace direct self.schema access and describe_collection() calls with
get_schema() method to ensure consistent schema handling with complete
struct_fields information. Also fix FlushChecker error handling and
change schema log level from info to debug.

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
zhuwenxing 2025-12-02 09:43:10 +08:00 committed by GitHub
parent fbfbd3bce2
commit f68bd44f35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -390,7 +390,7 @@ class Checker:
enable_dynamic_field = kwargs.get("enable_dynamic_field", True) enable_dynamic_field = kwargs.get("enable_dynamic_field", True)
schema = cf.gen_all_datatype_collection_schema(dim=dim, enable_struct_array_field=enable_struct_array_field, enable_dynamic_field=enable_dynamic_field) if schema is None else schema schema = cf.gen_all_datatype_collection_schema(dim=dim, enable_struct_array_field=enable_struct_array_field, enable_dynamic_field=enable_dynamic_field) if schema is None else schema
log.info(f"schema: {schema}") log.debug(f"schema: {schema}")
self.schema = schema self.schema = schema
self.dim = cf.get_dim_by_schema(schema=schema) self.dim = cf.get_dim_by_schema(schema=schema)
self.int64_field_name = cf.get_int64_field_name(schema=schema) self.int64_field_name = cf.get_int64_field_name(schema=schema)
@ -601,8 +601,7 @@ class Checker:
def insert_data(self, nb=constants.DELTA_PER_INS, partition_name=None): def insert_data(self, nb=constants.DELTA_PER_INS, partition_name=None):
partition_name = self.p_name if partition_name is None else partition_name partition_name = self.p_name if partition_name is None else partition_name
client_schema = self.milvus_client.describe_collection(collection_name=self.c_name) data = cf.gen_row_data_by_schema(nb=nb, schema=self.get_schema())
data = cf.gen_row_data_by_schema(nb=nb, schema=client_schema)
ts_data = [] ts_data = []
for i in range(nb): for i in range(nb):
time.sleep(0.001) time.sleep(0.001)
@ -807,7 +806,7 @@ class CollectionRenameChecker(Checker):
result = self.milvus_client.has_collection(collection_name=new_collection_name) result = self.milvus_client.has_collection(collection_name=new_collection_name)
if result: if result:
self.c_name = new_collection_name self.c_name = new_collection_name
data = cf.gen_row_data_by_schema(nb=1, schema=self.schema) data = cf.gen_row_data_by_schema(nb=1, schema=self.get_schema())
self.milvus_client.insert(collection_name=new_collection_name, data=data) self.milvus_client.insert(collection_name=new_collection_name, data=data)
return res, result return res, result
@ -1104,7 +1103,7 @@ class InsertFlushChecker(Checker):
try: try:
self.milvus_client.insert( self.milvus_client.insert(
collection_name=self.c_name, collection_name=self.c_name,
data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.schema), data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.get_schema()),
timeout=timeout timeout=timeout
) )
insert_result = True insert_result = True
@ -1162,14 +1161,14 @@ class FlushChecker(Checker):
try: try:
self.milvus_client.insert( self.milvus_client.insert(
collection_name=self.c_name, collection_name=self.c_name,
data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.schema), data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.get_schema()),
timeout=timeout timeout=timeout
) )
result = True res, result = self.flush()
except Exception: return res, result
result = False except Exception as e:
res, result = self.flush() log.error(f"run task error: {e}")
return res, result return str(e), False
def keep_running(self): def keep_running(self):
while self._keep_running: while self._keep_running:
@ -1239,9 +1238,7 @@ class InsertChecker(Checker):
@trace() @trace()
def insert_entities(self): def insert_entities(self):
# Use describe_collection directly to preserve struct_fields information data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=self.get_schema())
schema = self.milvus_client.describe_collection(self.c_name)
data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=schema)
rows = len(data) rows = len(data)
ts_data = [] ts_data = []
for i in range(constants.DELTA_PER_INS): for i in range(constants.DELTA_PER_INS):
@ -1327,8 +1324,7 @@ class InsertFreshnessChecker(Checker):
self.file_name = f"/tmp/ci_logs/insert_data_{uuid.uuid4()}.parquet" self.file_name = f"/tmp/ci_logs/insert_data_{uuid.uuid4()}.parquet"
def insert_entities(self): def insert_entities(self):
schema = self.get_schema() data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=self.get_schema())
data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=schema)
ts_data = [] ts_data = []
for i in range(constants.DELTA_PER_INS): for i in range(constants.DELTA_PER_INS):
time.sleep(0.001) time.sleep(0.001)
@ -1386,8 +1382,7 @@ class UpsertChecker(Checker):
if collection_name is None: if collection_name is None:
collection_name = cf.gen_unique_str("UpsertChecker_") collection_name = cf.gen_unique_str("UpsertChecker_")
super().__init__(collection_name=collection_name, shards_num=shards_num, schema=schema) super().__init__(collection_name=collection_name, shards_num=shards_num, schema=schema)
schema = self.milvus_client.describe_collection(collection_name=self.c_name) self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=self.get_schema())
self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=schema)
@trace() @trace()
def upsert_entities(self): def upsert_entities(self):
@ -1405,8 +1400,7 @@ class UpsertChecker(Checker):
# half of the data is upsert, the other half is insert # half of the data is upsert, the other half is insert
rows = len(self.data) rows = len(self.data)
pk_old = [d[self.int64_field_name] for d in self.data[:rows // 2]] pk_old = [d[self.int64_field_name] for d in self.data[:rows // 2]]
schema = self.milvus_client.describe_collection(collection_name=self.c_name) self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=self.get_schema())
self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=schema)
pk_new = [d[self.int64_field_name] for d in self.data[rows // 2:]] pk_new = [d[self.int64_field_name] for d in self.data[rows // 2:]]
pk_update = pk_old + pk_new pk_update = pk_old + pk_new
for i in range(rows): for i in range(rows):
@ -1429,8 +1423,7 @@ class UpsertFreshnessChecker(Checker):
if collection_name is None: if collection_name is None:
collection_name = cf.gen_unique_str("UpsertChecker_") collection_name = cf.gen_unique_str("UpsertChecker_")
super().__init__(collection_name=collection_name, shards_num=shards_num, schema=schema) super().__init__(collection_name=collection_name, shards_num=shards_num, schema=schema)
schema = self.get_schema() self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=self.get_schema())
self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=schema)
def upsert_entities(self): def upsert_entities(self):
try: try:
@ -1463,8 +1456,7 @@ class UpsertFreshnessChecker(Checker):
# half of the data is upsert, the other half is insert # half of the data is upsert, the other half is insert
rows = len(self.data[0]) rows = len(self.data[0])
pk_old = self.data[0][:rows // 2] pk_old = self.data[0][:rows // 2]
schema = self.get_schema() self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=self.get_schema())
self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=schema)
pk_new = self.data[0][rows // 2:] pk_new = self.data[0][rows // 2:]
pk_update = pk_old + pk_new pk_update = pk_old + pk_new
self.data[0] = pk_update self.data[0] = pk_update
@ -1486,8 +1478,7 @@ class PartialUpdateChecker(Checker):
if collection_name is None: if collection_name is None:
collection_name = cf.gen_unique_str("PartialUpdateChecker_") collection_name = cf.gen_unique_str("PartialUpdateChecker_")
super().__init__(collection_name=collection_name, shards_num=shards_num, schema=schema, enable_struct_array_field=False) super().__init__(collection_name=collection_name, shards_num=shards_num, schema=schema, enable_struct_array_field=False)
schema = self.get_schema() self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=self.get_schema())
self.data = cf.gen_row_data_by_schema(nb=constants.DELTA_PER_INS, schema=schema)
@trace() @trace()
def partial_update_entities(self): def partial_update_entities(self):
@ -1772,7 +1763,7 @@ class IndexCreateChecker(Checker):
super().__init__(collection_name=collection_name, schema=schema) super().__init__(collection_name=collection_name, schema=schema)
for i in range(5): for i in range(5):
self.milvus_client.insert(collection_name=self.c_name, self.milvus_client.insert(collection_name=self.c_name,
data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.schema), data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.get_schema()),
timeout=timeout) timeout=timeout)
# do as a flush before indexing # do as a flush before indexing
stats = self.milvus_client.get_collection_stats(collection_name=self.c_name) stats = self.milvus_client.get_collection_stats(collection_name=self.c_name)
@ -1812,7 +1803,7 @@ class IndexDropChecker(Checker):
collection_name = cf.gen_unique_str("IndexChecker_") collection_name = cf.gen_unique_str("IndexChecker_")
super().__init__(collection_name=collection_name, schema=schema) super().__init__(collection_name=collection_name, schema=schema)
for i in range(5): for i in range(5):
self.milvus_client.insert(collection_name=self.c_name, data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.schema), self.milvus_client.insert(collection_name=self.c_name, data=cf.gen_row_data_by_schema(nb=constants.ENTITIES_FOR_SEARCH, schema=self.get_schema()),
timeout=timeout) timeout=timeout)
# do as a flush before indexing # do as a flush before indexing
stats = self.milvus_client.get_collection_stats(collection_name=self.c_name) stats = self.milvus_client.get_collection_stats(collection_name=self.c_name)
@ -2272,7 +2263,7 @@ class BulkInsertChecker(Checker):
) as remote_writer: ) as remote_writer:
for _ in range(data_size): for _ in range(data_size):
row = cf.gen_row_data_by_schema(nb=1, schema=self.schema)[0] row = cf.gen_row_data_by_schema(nb=1, schema=self.get_schema())[0]
remote_writer.append_row(row) remote_writer.append_row(row)
remote_writer.commit() remote_writer.commit()
batch_files = remote_writer.batch_files batch_files = remote_writer.batch_files