From 33de788ba8d0bfc908dfc72a22c4afcefc23cf60 Mon Sep 17 00:00:00 2001 From: zhuwenxing Date: Tue, 14 Feb 2023 10:06:34 +0800 Subject: [PATCH] [test]Add rolling update test (#22144) Signed-off-by: zhuwenxing Co-authored-by: Sheldon --- tests/python_client/chaos/chaos_commons.py | 6 +-- tests/python_client/chaos/checker.py | 13 +++-- .../test_single_request_operation.py | 2 +- .../scripts/breakdown_rolling_update.py | 53 +++++++++++++++++++ tests/python_client/requirements.txt | 5 +- tests/scripts/breakdown_rolling_update.py | 37 +++++++++++++ 6 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 tests/python_client/deploy/scripts/breakdown_rolling_update.py create mode 100644 tests/scripts/breakdown_rolling_update.py diff --git a/tests/python_client/chaos/chaos_commons.py b/tests/python_client/chaos/chaos_commons.py index 4d38be28d5..39a2e2f05d 100644 --- a/tests/python_client/chaos/chaos_commons.py +++ b/tests/python_client/chaos/chaos_commons.py @@ -87,7 +87,7 @@ def reconnect(connections, alias='default', timeout=360): return connections.connect(alias) -def assert_statistic(checkers, expectations={}): +def assert_statistic(checkers, expectations={}, succ_rate_threshold=0.95, fail_rate_threshold=0.49): for k in checkers.keys(): # expect succ if no expectations succ_rate = checkers[k].succ_rate() @@ -95,9 +95,9 @@ def assert_statistic(checkers, expectations={}): average_time = checkers[k].average_time if expectations.get(k, '') == constants.FAIL: log.info(f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}") - expect(succ_rate < 0.49 or total < 2, + expect(succ_rate < fail_rate_threshold or total < 2, f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}") else: log.info(f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}") - expect(succ_rate > 0.90 and total > 2, + expect(succ_rate > succ_rate_threshold and total > 2, f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}") \ No newline at end of file diff --git a/tests/python_client/chaos/checker.py b/tests/python_client/chaos/checker.py index 4afbfb4fa8..5e77832f09 100644 --- a/tests/python_client/chaos/checker.py +++ b/tests/python_client/chaos/checker.py @@ -39,14 +39,14 @@ def trace(fmt=DEFAULT_FMT, prefix='chaos-test', flag=True): def decorate(func): @functools.wraps(func) def inner_wrapper(self, *args, **kwargs): - start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') + start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ') t0 = time.perf_counter() res, result = func(self, *args, **kwargs) elapsed = time.perf_counter() - t0 - end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') + end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ') + operation_name = func.__name__ if flag: collection_name = self.c_wrap.name - operation_name = func.__name__ log_str = f"[{prefix}]" + fmt.format(**locals()) # TODO: add report function in this place, like uploading to influxdb # it is better a async way to do this, in case of blocking the request processing @@ -56,8 +56,12 @@ def trace(fmt=DEFAULT_FMT, prefix='chaos-test', flag=True): self.average_time = ( elapsed + self.average_time * self._succ) / (self._succ + 1) self._succ += 1 + if len(self.fail_records) > 0 and self.fail_records[-1][0] == "failure" and \ + self._succ + self._fail == self.fail_records[-1][1] + 1: + self.fail_records.append(("success", self._succ + self._fail, start_time)) else: self._fail += 1 + self.fail_records.append(("failure", self._succ + self._fail, start_time)) return res, result return inner_wrapper return decorate @@ -91,6 +95,7 @@ class Checker: def __init__(self, collection_name=None, shards_num=2, dim=ct.default_dim): self._succ = 0 self._fail = 0 + self.fail_records = [] self._keep_running = True self.rsp_times = [] self.average_time = 0 @@ -126,6 +131,8 @@ class Checker: checkers_result = f"{checker_name}, succ_rate: {succ_rate:.2f}, total: {total:03d}, average_time: {average_time:.4f}, max_time: {max_time:.4f}, min_time: {min_time:.4f}" log.info(checkers_result) log.info(f"{checker_name} rsp times: {self.rsp_times}") + if len(self.fail_records) > 0: + log.info(f"{checker_name} failed at {self.fail_records}") return checkers_result def terminate(self): diff --git a/tests/python_client/chaos/testcases/test_single_request_operation.py b/tests/python_client/chaos/testcases/test_single_request_operation.py index 51d56feb74..558ae7ae95 100644 --- a/tests/python_client/chaos/testcases/test_single_request_operation.py +++ b/tests/python_client/chaos/testcases/test_single_request_operation.py @@ -78,6 +78,6 @@ class TestOperations(TestBase): for k,v in self.health_checkers.items(): v.check_result() if is_check: - assert_statistic(self.health_checkers) + assert_statistic(self.health_checkers, succ_rate_threshold=0.98) assert_expectations() log.info("*********************Chaos Test Completed**********************") diff --git a/tests/python_client/deploy/scripts/breakdown_rolling_update.py b/tests/python_client/deploy/scripts/breakdown_rolling_update.py new file mode 100644 index 0000000000..d3ce3cb37a --- /dev/null +++ b/tests/python_client/deploy/scripts/breakdown_rolling_update.py @@ -0,0 +1,53 @@ +import psutil +import time +from loguru import logger + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='config for rolling update process') + parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started') + args = parser.parse_args() + wait_time = args.wait_time + logger.info("start to watch rolling update process") + start_time = time.time() + end_time = time.time() + flag = False + while not flag and end_time - start_time < 360: + process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])] + for process in process_list: + logger.debug(process) + logger.debug("##"*30) + for process in process_list: + if isinstance(process.get("cmdline", []), list): + cmdline_list = process.get("cmdline", []) + for cmdline in cmdline_list: + if "rollingUpdate.sh" in cmdline: + logger.info(f"rolling update process: {process} started") + flag = True + break + if flag: + break + time.sleep(0.5) + end_time = time.time() + if not flag: + logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds") + else: + logger.info(f"rolling update process {process} found, wait for {end_time - start_time} seconds") + if flag: + logger.info(f"wait {wait_time}s to kill rolling update process") + time.sleep(wait_time) + logger.info("start to kill rolling update process") + try: + p = psutil.Process(process["pid"]) + p.terminate() + logger.info(f"rolling update process: {process} killed") + except Exception as e: + logger.error(f"rolling update process: {process} kill failed, {e}") + else: + logger.info("all process info") + for process in process_list: + logger.info(process) + + + diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index 4e6392ed19..99482cbc84 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -31,7 +31,7 @@ pytest-random-order python-benedict==0.24.3 timeout-decorator==0.5.0 -# for bulk load test +# for bulk insert test minio==7.1.5 # for benchmark @@ -39,3 +39,6 @@ h5py==3.7.0 # for log loguru==0.6.0 + +# util +psutil==5.8.0 \ No newline at end of file diff --git a/tests/scripts/breakdown_rolling_update.py b/tests/scripts/breakdown_rolling_update.py new file mode 100644 index 0000000000..ad32576bdc --- /dev/null +++ b/tests/scripts/breakdown_rolling_update.py @@ -0,0 +1,37 @@ +import psutil +import time +from loguru import logger + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser(description='config for rolling update process') + parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started') + args = parser.parse_args() + wait_time = args.wait_time + logger.info("start to watch rolling update process") + start_time = time.time() + end_time = time.time() + flag = True + while flag and end_time - start_time < 360: + process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])] + for process in process_list: + if isinstance(process.get("cmdline", []), list): + if "rollingUpdate.sh" in process.get("cmdline", []): + logger.info(f"rolling update process: {process} started") + flag = False + break + time.sleep(0.5) + end_time = time.time() + if flag: + logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds") + logger.info(f"wait {wait_time}s to kill rolling update process") + time.sleep(wait_time) + logger.info("start to kill rolling update process") + try: + p = psutil.Process(process["pid"]) + p.terminate() + logger.info(f"rolling update process: {process} killed") + except Exception as e: + logger.error(f"rolling update process: {process} kill failed, {e}") +