[test]Add rolling update test (#22144)

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
Co-authored-by: Sheldon <chuanfeng.liu@zilliz.com>
This commit is contained in:
zhuwenxing 2023-02-14 10:06:34 +08:00 committed by GitHub
parent e5a6d90e93
commit 33de788ba8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 108 additions and 8 deletions

View File

@ -87,7 +87,7 @@ def reconnect(connections, alias='default', timeout=360):
return connections.connect(alias)
def assert_statistic(checkers, expectations={}):
def assert_statistic(checkers, expectations={}, succ_rate_threshold=0.95, fail_rate_threshold=0.49):
for k in checkers.keys():
# expect succ if no expectations
succ_rate = checkers[k].succ_rate()
@ -95,9 +95,9 @@ def assert_statistic(checkers, expectations={}):
average_time = checkers[k].average_time
if expectations.get(k, '') == constants.FAIL:
log.info(f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
expect(succ_rate < 0.49 or total < 2,
expect(succ_rate < fail_rate_threshold or total < 2,
f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
else:
log.info(f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
expect(succ_rate > 0.90 and total > 2,
expect(succ_rate > succ_rate_threshold and total > 2,
f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")

View File

@ -39,14 +39,14 @@ def trace(fmt=DEFAULT_FMT, prefix='chaos-test', flag=True):
def decorate(func):
@functools.wraps(func)
def inner_wrapper(self, *args, **kwargs):
start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
t0 = time.perf_counter()
res, result = func(self, *args, **kwargs)
elapsed = time.perf_counter() - t0
end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
operation_name = func.__name__
if flag:
collection_name = self.c_wrap.name
operation_name = func.__name__
log_str = f"[{prefix}]" + fmt.format(**locals())
# TODO: add report function in this place, like uploading to influxdb
# it is better a async way to do this, in case of blocking the request processing
@ -56,8 +56,12 @@ def trace(fmt=DEFAULT_FMT, prefix='chaos-test', flag=True):
self.average_time = (
elapsed + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
if len(self.fail_records) > 0 and self.fail_records[-1][0] == "failure" and \
self._succ + self._fail == self.fail_records[-1][1] + 1:
self.fail_records.append(("success", self._succ + self._fail, start_time))
else:
self._fail += 1
self.fail_records.append(("failure", self._succ + self._fail, start_time))
return res, result
return inner_wrapper
return decorate
@ -91,6 +95,7 @@ class Checker:
def __init__(self, collection_name=None, shards_num=2, dim=ct.default_dim):
self._succ = 0
self._fail = 0
self.fail_records = []
self._keep_running = True
self.rsp_times = []
self.average_time = 0
@ -126,6 +131,8 @@ class Checker:
checkers_result = f"{checker_name}, succ_rate: {succ_rate:.2f}, total: {total:03d}, average_time: {average_time:.4f}, max_time: {max_time:.4f}, min_time: {min_time:.4f}"
log.info(checkers_result)
log.info(f"{checker_name} rsp times: {self.rsp_times}")
if len(self.fail_records) > 0:
log.info(f"{checker_name} failed at {self.fail_records}")
return checkers_result
def terminate(self):

View File

@ -78,6 +78,6 @@ class TestOperations(TestBase):
for k,v in self.health_checkers.items():
v.check_result()
if is_check:
assert_statistic(self.health_checkers)
assert_statistic(self.health_checkers, succ_rate_threshold=0.98)
assert_expectations()
log.info("*********************Chaos Test Completed**********************")

View File

@ -0,0 +1,53 @@
import psutil
import time
from loguru import logger
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='config for rolling update process')
parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started')
args = parser.parse_args()
wait_time = args.wait_time
logger.info("start to watch rolling update process")
start_time = time.time()
end_time = time.time()
flag = False
while not flag and end_time - start_time < 360:
process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])]
for process in process_list:
logger.debug(process)
logger.debug("##"*30)
for process in process_list:
if isinstance(process.get("cmdline", []), list):
cmdline_list = process.get("cmdline", [])
for cmdline in cmdline_list:
if "rollingUpdate.sh" in cmdline:
logger.info(f"rolling update process: {process} started")
flag = True
break
if flag:
break
time.sleep(0.5)
end_time = time.time()
if not flag:
logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds")
else:
logger.info(f"rolling update process {process} found, wait for {end_time - start_time} seconds")
if flag:
logger.info(f"wait {wait_time}s to kill rolling update process")
time.sleep(wait_time)
logger.info("start to kill rolling update process")
try:
p = psutil.Process(process["pid"])
p.terminate()
logger.info(f"rolling update process: {process} killed")
except Exception as e:
logger.error(f"rolling update process: {process} kill failed, {e}")
else:
logger.info("all process info")
for process in process_list:
logger.info(process)

View File

@ -31,7 +31,7 @@ pytest-random-order
python-benedict==0.24.3
timeout-decorator==0.5.0
# for bulk load test
# for bulk insert test
minio==7.1.5
# for benchmark
@ -39,3 +39,6 @@ h5py==3.7.0
# for log
loguru==0.6.0
# util
psutil==5.8.0

View File

@ -0,0 +1,37 @@
import psutil
import time
from loguru import logger
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='config for rolling update process')
parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started')
args = parser.parse_args()
wait_time = args.wait_time
logger.info("start to watch rolling update process")
start_time = time.time()
end_time = time.time()
flag = True
while flag and end_time - start_time < 360:
process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])]
for process in process_list:
if isinstance(process.get("cmdline", []), list):
if "rollingUpdate.sh" in process.get("cmdline", []):
logger.info(f"rolling update process: {process} started")
flag = False
break
time.sleep(0.5)
end_time = time.time()
if flag:
logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds")
logger.info(f"wait {wait_time}s to kill rolling update process")
time.sleep(wait_time)
logger.info("start to kill rolling update process")
try:
p = psutil.Process(process["pid"])
p.terminate()
logger.info(f"rolling update process: {process} killed")
except Exception as e:
logger.error(f"rolling update process: {process} kill failed, {e}")