From 3aa0b769e52470d085e89d28488f97f69cfaf5f3 Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Thu, 11 Dec 2025 13:49:12 +0800 Subject: [PATCH] test: add unique error message collection in chaos checker (#46262) /kind improvement - Add normalize_error_message function to extract and normalize error text - Collect unique error messages during chaos test execution - Display error details in assertion messages for better debugging Signed-off-by: zhuwenxing --- tests/python_client/chaos/chaos_commons.py | 8 ++++- tests/python_client/chaos/checker.py | 38 ++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/tests/python_client/chaos/chaos_commons.py b/tests/python_client/chaos/chaos_commons.py index bc45c98cc4..b22e49e5ce 100644 --- a/tests/python_client/chaos/chaos_commons.py +++ b/tests/python_client/chaos/chaos_commons.py @@ -109,6 +109,7 @@ def assert_statistic( succ_rate = checkers[k].succ_rate() total = checkers[k].total() average_time = checkers[k].average_time + error_messages = getattr(checkers[k], 'error_messages', set()) if expectations.get(k, "") == constants.FAIL: log.info( f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}" @@ -121,7 +122,12 @@ def assert_statistic( log.info( f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}" ) + # Build assertion message with error details + assert_msg = f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}" + if error_messages: + error_details = "; ".join(error_messages) + assert_msg += f", unique errors({len(error_messages)}): [{error_details}]" pytest.assume( succ_rate >= succ_rate_threshold and total > 2, - f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}", + assert_msg, ) diff --git a/tests/python_client/chaos/checker.py b/tests/python_client/chaos/checker.py index ddb37dbcad..26292ae0c4 100644 --- a/tests/python_client/chaos/checker.py +++ b/tests/python_client/chaos/checker.py @@ -2,6 +2,7 @@ import pytest import unittest from enum import Enum import random +import re import time import threading import uuid @@ -264,6 +265,33 @@ def create_index_params_from_dict(field_name: str, index_param_dict: dict) -> In return index_params +def normalize_error_message(error_msg): + """ + Normalize error message by extracting text from message= fields. + Only keep letter content from message values to group similar errors. + """ + msg = str(error_msg) + # Extract all message= content + messages = re.findall(r'message[=:]\s*["\']?([^"\'>,\)]+)', msg, re.IGNORECASE) + if messages: + # Combine all message content and keep only letters and spaces + combined = ' '.join(messages) + combined = re.sub(r'[^a-zA-Z\s]', ' ', combined) + combined = re.sub(r'\s+', ' ', combined).strip() + return combined + # Fallback: extract text from details= if no message found + details = re.findall(r'details\s*=\s*"([^"]+)"', msg) + if details: + combined = ' '.join(details) + combined = re.sub(r'[^a-zA-Z\s]', ' ', combined) + combined = re.sub(r'\s+', ' ', combined).strip() + return combined + # Last fallback: keep only letters from entire message + msg = re.sub(r'[^a-zA-Z\s]', ' ', msg) + msg = re.sub(r'\s+', ' ', msg).strip() + return msg + + def trace(fmt=DEFAULT_FMT, prefix='test', flag=True): def decorate(func): @functools.wraps(func) @@ -298,6 +326,14 @@ def trace(fmt=DEFAULT_FMT, prefix='test', flag=True): else: self._fail += 1 self.fail_records.append(("failure", self._succ + self._fail, start_time, start_time_ts)) + # Collect unique error messages (normalized to group similar errors) + if hasattr(res, 'message'): + normalized_msg = normalize_error_message(res.message) + elif res is not None: + normalized_msg = normalize_error_message(str(res)) + else: + normalized_msg = "Unknown error" + self.error_messages.add(normalized_msg) return res, result return inner_wrapper @@ -346,6 +382,7 @@ class Checker: self._succ = 0 self._fail = 0 self.fail_records = [] + self.error_messages = set() # Store unique error messages self._keep_running = True self.rsp_times = [] self.average_time = 0 @@ -675,6 +712,7 @@ class Checker: self._fail = 0 self.rsp_times = [] self.fail_records = [] + self.error_messages = set() self.average_time = 0 def get_rto(self):