mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
[test]Add rolling update test (#22144)
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com> Co-authored-by: Sheldon <chuanfeng.liu@zilliz.com>
This commit is contained in:
parent
e5a6d90e93
commit
33de788ba8
@ -87,7 +87,7 @@ def reconnect(connections, alias='default', timeout=360):
|
||||
return connections.connect(alias)
|
||||
|
||||
|
||||
def assert_statistic(checkers, expectations={}):
|
||||
def assert_statistic(checkers, expectations={}, succ_rate_threshold=0.95, fail_rate_threshold=0.49):
|
||||
for k in checkers.keys():
|
||||
# expect succ if no expectations
|
||||
succ_rate = checkers[k].succ_rate()
|
||||
@ -95,9 +95,9 @@ def assert_statistic(checkers, expectations={}):
|
||||
average_time = checkers[k].average_time
|
||||
if expectations.get(k, '') == constants.FAIL:
|
||||
log.info(f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
|
||||
expect(succ_rate < 0.49 or total < 2,
|
||||
expect(succ_rate < fail_rate_threshold or total < 2,
|
||||
f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
|
||||
else:
|
||||
log.info(f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
|
||||
expect(succ_rate > 0.90 and total > 2,
|
||||
expect(succ_rate > succ_rate_threshold and total > 2,
|
||||
f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
|
||||
@ -39,14 +39,14 @@ def trace(fmt=DEFAULT_FMT, prefix='chaos-test', flag=True):
|
||||
def decorate(func):
|
||||
@functools.wraps(func)
|
||||
def inner_wrapper(self, *args, **kwargs):
|
||||
start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
start_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
t0 = time.perf_counter()
|
||||
res, result = func(self, *args, **kwargs)
|
||||
elapsed = time.perf_counter() - t0
|
||||
end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
end_time = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
|
||||
operation_name = func.__name__
|
||||
if flag:
|
||||
collection_name = self.c_wrap.name
|
||||
operation_name = func.__name__
|
||||
log_str = f"[{prefix}]" + fmt.format(**locals())
|
||||
# TODO: add report function in this place, like uploading to influxdb
|
||||
# it is better a async way to do this, in case of blocking the request processing
|
||||
@ -56,8 +56,12 @@ def trace(fmt=DEFAULT_FMT, prefix='chaos-test', flag=True):
|
||||
self.average_time = (
|
||||
elapsed + self.average_time * self._succ) / (self._succ + 1)
|
||||
self._succ += 1
|
||||
if len(self.fail_records) > 0 and self.fail_records[-1][0] == "failure" and \
|
||||
self._succ + self._fail == self.fail_records[-1][1] + 1:
|
||||
self.fail_records.append(("success", self._succ + self._fail, start_time))
|
||||
else:
|
||||
self._fail += 1
|
||||
self.fail_records.append(("failure", self._succ + self._fail, start_time))
|
||||
return res, result
|
||||
return inner_wrapper
|
||||
return decorate
|
||||
@ -91,6 +95,7 @@ class Checker:
|
||||
def __init__(self, collection_name=None, shards_num=2, dim=ct.default_dim):
|
||||
self._succ = 0
|
||||
self._fail = 0
|
||||
self.fail_records = []
|
||||
self._keep_running = True
|
||||
self.rsp_times = []
|
||||
self.average_time = 0
|
||||
@ -126,6 +131,8 @@ class Checker:
|
||||
checkers_result = f"{checker_name}, succ_rate: {succ_rate:.2f}, total: {total:03d}, average_time: {average_time:.4f}, max_time: {max_time:.4f}, min_time: {min_time:.4f}"
|
||||
log.info(checkers_result)
|
||||
log.info(f"{checker_name} rsp times: {self.rsp_times}")
|
||||
if len(self.fail_records) > 0:
|
||||
log.info(f"{checker_name} failed at {self.fail_records}")
|
||||
return checkers_result
|
||||
|
||||
def terminate(self):
|
||||
|
||||
@ -78,6 +78,6 @@ class TestOperations(TestBase):
|
||||
for k,v in self.health_checkers.items():
|
||||
v.check_result()
|
||||
if is_check:
|
||||
assert_statistic(self.health_checkers)
|
||||
assert_statistic(self.health_checkers, succ_rate_threshold=0.98)
|
||||
assert_expectations()
|
||||
log.info("*********************Chaos Test Completed**********************")
|
||||
|
||||
@ -0,0 +1,53 @@
|
||||
import psutil
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='config for rolling update process')
|
||||
parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started')
|
||||
args = parser.parse_args()
|
||||
wait_time = args.wait_time
|
||||
logger.info("start to watch rolling update process")
|
||||
start_time = time.time()
|
||||
end_time = time.time()
|
||||
flag = False
|
||||
while not flag and end_time - start_time < 360:
|
||||
process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])]
|
||||
for process in process_list:
|
||||
logger.debug(process)
|
||||
logger.debug("##"*30)
|
||||
for process in process_list:
|
||||
if isinstance(process.get("cmdline", []), list):
|
||||
cmdline_list = process.get("cmdline", [])
|
||||
for cmdline in cmdline_list:
|
||||
if "rollingUpdate.sh" in cmdline:
|
||||
logger.info(f"rolling update process: {process} started")
|
||||
flag = True
|
||||
break
|
||||
if flag:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
end_time = time.time()
|
||||
if not flag:
|
||||
logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds")
|
||||
else:
|
||||
logger.info(f"rolling update process {process} found, wait for {end_time - start_time} seconds")
|
||||
if flag:
|
||||
logger.info(f"wait {wait_time}s to kill rolling update process")
|
||||
time.sleep(wait_time)
|
||||
logger.info("start to kill rolling update process")
|
||||
try:
|
||||
p = psutil.Process(process["pid"])
|
||||
p.terminate()
|
||||
logger.info(f"rolling update process: {process} killed")
|
||||
except Exception as e:
|
||||
logger.error(f"rolling update process: {process} kill failed, {e}")
|
||||
else:
|
||||
logger.info("all process info")
|
||||
for process in process_list:
|
||||
logger.info(process)
|
||||
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ pytest-random-order
|
||||
python-benedict==0.24.3
|
||||
timeout-decorator==0.5.0
|
||||
|
||||
# for bulk load test
|
||||
# for bulk insert test
|
||||
minio==7.1.5
|
||||
|
||||
# for benchmark
|
||||
@ -39,3 +39,6 @@ h5py==3.7.0
|
||||
|
||||
# for log
|
||||
loguru==0.6.0
|
||||
|
||||
# util
|
||||
psutil==5.8.0
|
||||
37
tests/scripts/breakdown_rolling_update.py
Normal file
37
tests/scripts/breakdown_rolling_update.py
Normal file
@ -0,0 +1,37 @@
|
||||
import psutil
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='config for rolling update process')
|
||||
parser.add_argument('--wait_time', type=int, default=60, help='wait time after rolling update started')
|
||||
args = parser.parse_args()
|
||||
wait_time = args.wait_time
|
||||
logger.info("start to watch rolling update process")
|
||||
start_time = time.time()
|
||||
end_time = time.time()
|
||||
flag = True
|
||||
while flag and end_time - start_time < 360:
|
||||
process_list = [p.info for p in psutil.process_iter(attrs=['pid', 'name','cmdline'])]
|
||||
for process in process_list:
|
||||
if isinstance(process.get("cmdline", []), list):
|
||||
if "rollingUpdate.sh" in process.get("cmdline", []):
|
||||
logger.info(f"rolling update process: {process} started")
|
||||
flag = False
|
||||
break
|
||||
time.sleep(0.5)
|
||||
end_time = time.time()
|
||||
if flag:
|
||||
logger.info(f"rolling update process not found, wait for {end_time - start_time} seconds")
|
||||
logger.info(f"wait {wait_time}s to kill rolling update process")
|
||||
time.sleep(wait_time)
|
||||
logger.info("start to kill rolling update process")
|
||||
try:
|
||||
p = psutil.Process(process["pid"])
|
||||
p.terminate()
|
||||
logger.info(f"rolling update process: {process} killed")
|
||||
except Exception as e:
|
||||
logger.error(f"rolling update process: {process} kill failed, {e}")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user