[test]Update chaos apply and chaos scope (#26873)

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
zhuwenxing 2023-09-06 14:03:14 +08:00 committed by GitHub
parent 528948559f
commit bd4b842153
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 37 additions and 2 deletions

View File

@ -102,6 +102,8 @@ class RequestRecords(metaclass=Singleton):
self.buffer = []
def sink(self):
if len(self.buffer) == 0:
return
df = pd.DataFrame(self.buffer)
if not self.created_file:
with request_lock:

View File

@ -6,6 +6,7 @@ def pytest_addoption(parser):
parser.addoption("--role_type", action="store", default="activated", help="role_type")
parser.addoption("--target_component", action="store", default="querynode", help="target_component")
parser.addoption("--target_pod", action="store", default="etcd_leader", help="target_pod")
parser.addoption("--target_scope", action="store", default="all", help="target_scope")
parser.addoption("--target_number", action="store", default="1", help="target_number")
parser.addoption("--chaos_duration", action="store", default="1m", help="chaos_duration")
parser.addoption("--chaos_interval", action="store", default="10s", help="chaos_interval")
@ -34,6 +35,11 @@ def target_pod(request):
return request.config.getoption("--target_pod")
@pytest.fixture
def target_scope(request):
return request.config.getoption("--target_scope")
@pytest.fixture
def target_number(request):
return request.config.getoption("--target_number")

View File

@ -56,7 +56,7 @@ class TestChaosApply:
chaos_res.delete(meta_name, raise_ex=False)
sleep(2)
def test_chaos_apply(self, chaos_type, target_component, target_number, chaos_duration, chaos_interval, wait_signal):
def test_chaos_apply(self, chaos_type, target_component, target_scope, target_number, chaos_duration, chaos_interval, wait_signal):
# start the monitor threads to check the milvus ops
log.info("*********************Chaos Test Start**********************")
if wait_signal:
@ -78,6 +78,7 @@ class TestChaosApply:
update_key_value(chaos_config, "app.kubernetes.io/instance", release_name)
update_key_value(chaos_config, "namespaces", [self.milvus_ns])
update_key_value(chaos_config, "value", target_number)
update_key_value(chaos_config, "mode", target_scope)
self.chaos_config = chaos_config
if "s" in chaos_interval:
schedule = f"*/{chaos_interval[:-1]} * * * * *"

View File

@ -2,6 +2,8 @@ import pytest
import time
from time import sleep
from pathlib import Path
from datetime import datetime
import json
from pymilvus import connections
from common.cus_resource_opts import CustomResourceOperations as CusResource
from common.milvus_sys import MilvusSys
@ -9,6 +11,7 @@ from chaos import chaos_commons as cc
import logging as log
from utils.util_k8s import (wait_pods_ready, get_milvus_instance_name,
get_milvus_deploy_tool, get_etcd_leader, get_etcd_followers)
from utils.util_common import wait_signal_to_apply_chaos
import constants
@ -54,9 +57,17 @@ class TestChaosApply:
chaos_res.delete(meta_name, raise_ex=False)
sleep(2)
def test_chaos_apply(self, chaos_type, target_pod, chaos_duration, chaos_interval):
def test_chaos_apply(self, chaos_type, target_pod, chaos_duration, chaos_interval, wait_signal):
# start the monitor threads to check the milvus ops
log.info("*********************Chaos Test Start**********************")
if wait_signal:
log.info("need wait signal to start chaos")
ready_for_chaos = wait_signal_to_apply_chaos()
if not ready_for_chaos:
log.info("did not get the signal to apply chaos")
raise Exception
else:
log.info("get the signal to apply chaos")
log.info(connections.get_connection_addr('default'))
release_name = self.release_name
deploy_tool = get_milvus_deploy_tool(self.milvus_ns, self.milvus_sys)
@ -85,6 +96,7 @@ class TestChaosApply:
version=constants.CHAOS_VERSION,
namespace=constants.CHAOS_NAMESPACE)
chaos_res.create(chaos_config)
create_time = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S.%f')
log.info("chaos injected")
res = chaos_res.list_all()
chaos_list = [r['metadata']['name'] for r in res['items']]
@ -96,6 +108,7 @@ class TestChaosApply:
sleep(chaos_duration)
# delete chaos
chaos_res.delete(meta_name)
delete_time = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S.%f')
log.info("chaos deleted")
res = chaos_res.list_all()
chaos_list = [r['metadata']['name'] for r in res['items']]
@ -113,6 +126,19 @@ class TestChaosApply:
log.info("all pods are ready")
pods_ready_time = time.time() - t0
log.info(f"pods ready time: {pods_ready_time}")
recovery_time = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S.%f')
event_records = {
"chaos_type": chaos_type,
"target_component": target_pod,
"meta_name": meta_name,
"create_time": create_time,
"delete_time": delete_time,
"recovery_time": recovery_time
}
# save event records to json file
with open(constants.CHAOS_INFO_SAVE_PATH, 'w') as f:
json.dump(event_records, f)
# reconnect to test the service healthy
start_time = time.time()
end_time = start_time + 120