test: add unique error message collection in chaos checker (#46262)

/kind improvement

- Add normalize_error_message function to extract and normalize error
text
- Collect unique error messages during chaos test execution
- Display error details in assertion messages for better debugging

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
zhuwenxing 2025-12-11 13:49:12 +08:00 committed by GitHub
parent 75d6f0d509
commit 3aa0b769e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 45 additions and 1 deletions

View File

@ -109,6 +109,7 @@ def assert_statistic(
succ_rate = checkers[k].succ_rate()
total = checkers[k].total()
average_time = checkers[k].average_time
error_messages = getattr(checkers[k], 'error_messages', set())
if expectations.get(k, "") == constants.FAIL:
log.info(
f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}"
@ -121,7 +122,12 @@ def assert_statistic(
log.info(
f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}"
)
# Build assertion message with error details
assert_msg = f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}"
if error_messages:
error_details = "; ".join(error_messages)
assert_msg += f", unique errors({len(error_messages)}): [{error_details}]"
pytest.assume(
succ_rate >= succ_rate_threshold and total > 2,
f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}",
assert_msg,
)

View File

@ -2,6 +2,7 @@ import pytest
import unittest
from enum import Enum
import random
import re
import time
import threading
import uuid
@ -264,6 +265,33 @@ def create_index_params_from_dict(field_name: str, index_param_dict: dict) -> In
return index_params
def normalize_error_message(error_msg):
"""
Normalize error message by extracting text from message= fields.
Only keep letter content from message values to group similar errors.
"""
msg = str(error_msg)
# Extract all message= content
messages = re.findall(r'message[=:]\s*["\']?([^"\'>,\)]+)', msg, re.IGNORECASE)
if messages:
# Combine all message content and keep only letters and spaces
combined = ' '.join(messages)
combined = re.sub(r'[^a-zA-Z\s]', ' ', combined)
combined = re.sub(r'\s+', ' ', combined).strip()
return combined
# Fallback: extract text from details= if no message found
details = re.findall(r'details\s*=\s*"([^"]+)"', msg)
if details:
combined = ' '.join(details)
combined = re.sub(r'[^a-zA-Z\s]', ' ', combined)
combined = re.sub(r'\s+', ' ', combined).strip()
return combined
# Last fallback: keep only letters from entire message
msg = re.sub(r'[^a-zA-Z\s]', ' ', msg)
msg = re.sub(r'\s+', ' ', msg).strip()
return msg
def trace(fmt=DEFAULT_FMT, prefix='test', flag=True):
def decorate(func):
@functools.wraps(func)
@ -298,6 +326,14 @@ def trace(fmt=DEFAULT_FMT, prefix='test', flag=True):
else:
self._fail += 1
self.fail_records.append(("failure", self._succ + self._fail, start_time, start_time_ts))
# Collect unique error messages (normalized to group similar errors)
if hasattr(res, 'message'):
normalized_msg = normalize_error_message(res.message)
elif res is not None:
normalized_msg = normalize_error_message(str(res))
else:
normalized_msg = "Unknown error"
self.error_messages.add(normalized_msg)
return res, result
return inner_wrapper
@ -346,6 +382,7 @@ class Checker:
self._succ = 0
self._fail = 0
self.fail_records = []
self.error_messages = set() # Store unique error messages
self._keep_running = True
self.rsp_times = []
self.average_time = 0
@ -675,6 +712,7 @@ class Checker:
self._fail = 0
self.rsp_times = []
self.fail_records = []
self.error_messages = set()
self.average_time = 0
def get_rto(self):