[skip e2e]Add response time metric for chaos test (#15900)

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
zhuwenxing 2022-03-07 16:20:04 +08:00 committed by GitHub
parent be30ccfac9
commit fd0b67d1fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 22 deletions

View File

@ -37,6 +37,7 @@ class Checker:
def __init__(self):
self._succ = 0
self._fail = 0
self.rsp_times = []
self.average_time = 0
self.c_wrap = ApiCollectionWrapper()
self.c_wrap.init_collection(name=cf.gen_unique_str('Checker_'),
@ -54,9 +55,20 @@ class Checker:
def succ_rate(self):
return self._succ / self.total() if self.total() != 0 else 0
def check_result(self):
succ_rate = self.succ_rate()
total = self.total()
rsp_times = self.rsp_times
average_time = 0 if len(rsp_times) == 0 else sum(rsp_times) / len(rsp_times)
max_time = 0 if len(rsp_times) == 0 else max(rsp_times)
min_time = 0 if len(rsp_times) == 0 else min(rsp_times)
checkers_result = f"succ_rate: {succ_rate:.2f}, total: {total:03d}, average_time: {average_time:.4f}, max_time: {max_time:.4f}, min_time: {min_time:.4f}"
return checkers_result
def reset(self):
self._succ = 0
self._fail = 0
self.rsp_times = []
self.average_time = 0
@ -81,6 +93,7 @@ class SearchChecker(Checker):
)
t1 = time.time()
if result:
self.rsp_times.append(t1 - t0)
self.average_time = ((t1 - t0) + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
log.debug(f"search success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}")
@ -108,6 +121,7 @@ class InsertFlushChecker(Checker):
t1 = time.time()
if not self._flush:
if insert_result:
self.rsp_times.append(t1 - t0)
self.average_time = ((t1 - t0) + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
log.debug(f"insert success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}")
@ -120,6 +134,7 @@ class InsertFlushChecker(Checker):
num_entities = self.c_wrap.num_entities
t1 = time.time()
if num_entities == (self.initial_entities + constants.DELTA_PER_INS):
self.rsp_times.append(t1 - t0)
self.average_time = ((t1 - t0) + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
log.debug(f"flush success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}")
@ -145,6 +160,7 @@ class CreateChecker(Checker):
check_task=CheckTasks.check_nothing)
t1 = time.time()
if result:
self.rsp_times.append(t1 - t0)
self.average_time = ((t1 - t0) + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
log.debug(f"create success, time: {t1 - t0:.4f}, average_time: {self.average_time:4f}")
@ -175,6 +191,7 @@ class IndexChecker(Checker):
check_task=CheckTasks.check_nothing)
t1 = time.time()
if result:
self.rsp_times.append(t1 - t0)
self.average_time = ((t1 - t0) + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
log.debug(f"index success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}")
@ -202,6 +219,7 @@ class QueryChecker(Checker):
check_task=CheckTasks.check_nothing)
t1 = time.time()
if result:
self.rsp_times.append(t1 - t0)
self.average_time = ((t1 - t0) + self.average_time * self._succ) / (self._succ + 1)
self._succ += 1
log.debug(f"query success, time: {t1 - t0:.4f}, average_time: {self.average_time:.4f}")
@ -215,11 +233,13 @@ def assert_statistic(checkers, expectations={}):
# expect succ if no expectations
succ_rate = checkers[k].succ_rate()
total = checkers[k].total()
checker_result = k.check_result()
if expectations.get(k, '') == constants.FAIL:
log.info(f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}")
log.info(f"Expect Fail: {str(k)} {checker_result}")
expect(succ_rate < 0.49 or total < 2,
f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {total}")
f"Expect Fail: {str(k)} {checker_result}")
else:
log.info(f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}")
log.info(f"Expect Succ: {str(k)} {checker_result}")
expect(succ_rate > 0.90 or total > 2,
f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}")
f"Expect Succ: {str(k)} {checker_result}")

View File

@ -45,8 +45,8 @@ def check_cluster_nodes(chaos_config):
selector = findkeys(chaos_config, "selector")
selector = list(selector)
log.info(f"chaos target selector: {selector}")
assert len(selector) == 1
selector = selector[0]
# assert len(selector) == 1
selector = selector[0] # chaos yaml file must place the effected pod selector in the first position
namespace = selector["namespaces"][0]
labels_dict = selector["labelSelectors"]
labels_list = []
@ -59,10 +59,8 @@ def check_cluster_nodes(chaos_config):
def record_results(checkers):
res = ""
for k in checkers.keys():
# expect succ if no expectations
succ_rate = checkers[k].succ_rate()
total = checkers[k].total()
res += f"{str(k):10} succ rate: {succ_rate:.2f} total: {total:02d}\n"
check_result = checkers[k].check_result()
res += f"{str(k):10} {check_result}\n"
return res
@ -175,11 +173,14 @@ class TestChaos(TestChaosBase):
# assert statistic:all ops 100% succ
log.info("******1st assert before chaos: ")
assert_statistic(self.health_checkers)
with open(file_name, "a+") as f:
ts = time.strftime("%Y-%m-%d %H:%M:%S")
f.write(f"{meta_name}-{ts}\n")
f.write("1st assert before chaos:\n")
f.write(record_results(self.health_checkers))
try:
with open(file_name, "a+") as f:
ts = time.strftime("%Y-%m-%d %H:%M:%S")
f.write(f"{meta_name}-{ts}\n")
f.write("1st assert before chaos:\n")
f.write(record_results(self.health_checkers))
except Exception as e:
log.info(f"Fail to write to file: {e}")
# apply chaos object
chaos_res = CusResource(kind=chaos_config['kind'],
group=constants.CHAOS_GROUP,
@ -188,6 +189,8 @@ class TestChaos(TestChaosBase):
chaos_res.create(chaos_config)
log.info("chaos injected")
log.info(f"chaos information: {chaos_res.get(meta_name)}")
res = chaos_res.get(meta_name)
log.info(f"chaos crd list: {res}")
sleep(constants.WAIT_PER_OP * 2)
# reset counting
cc.reset_counting(self.health_checkers)
@ -207,12 +210,19 @@ class TestChaos(TestChaosBase):
Op.search: self.expect_search,
Op.query: self.expect_query
})
with open(file_name, "a+") as f:
f.write("2nd assert after chaos injected:\n")
f.write(record_results(self.health_checkers))
try:
with open(file_name, "a+") as f:
f.write("2nd assert after chaos injected:\n")
f.write(record_results(self.health_checkers))
except Exception as e:
log.error(f"Fail to write the report: {e}")
# delete chaos
chaos_res.delete(meta_name)
# get chaos crd, expect it is deleted
res = chaos_res.get(meta_name)
log.info(f"chaos crd list: {res}")
log.info("chaos deleted")
log.info(f'Alive threads: {threading.enumerate()}')
sleep(2)
# wait all pods ready
@ -231,10 +241,13 @@ class TestChaos(TestChaosBase):
# assert statistic: all ops success again
log.info("******3rd assert after chaos deleted: ")
assert_statistic(self.health_checkers)
with open(file_name, "a+") as f:
f.write("3rd assert after chaos deleted:\n")
f.write(record_results(self.health_checkers))
try:
with open(file_name, "a+") as f:
f.write("3rd assert after chaos deleted:\n")
f.write(record_results(self.health_checkers))
except Exception as e:
log.info(f"Fail to write the report: {e}")
# assert all expectations
assert_expectations()
log.info("*********************Chaos Test Completed**********************")
log.info("*********************Chaos Test Completed**********************")