[skip ci] Add Chaos tests (#5579)

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
This commit is contained in:
yanliang567 2021-06-04 10:33:34 +08:00 committed by GitHub
parent 0b00eacd90
commit f9e03c5468
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 356 additions and 0 deletions

1
tests20/chaos/README.md Normal file
View File

@ -0,0 +1 @@
Chaos Tests

View File

@ -0,0 +1,11 @@
chaos:
kind: PodChaos
spec:
action: pod-kill
selector:
namespaces:
- milvus
labelSelectors:
"app.kubernetes.io/name": data node
scheduler:
cron: "@every 20s"

View File

@ -0,0 +1,11 @@
chaos:
kind: PodChaos
spec:
action: pod-kill
selector:
namespaces:
- milvus
labelSelectors:
"app.kubernetes.io/name": query node
scheduler:
cron: "@every 20s"

View File

@ -0,0 +1,64 @@
# Testcases All-in-one
Collections:
-
testcase: test_querynode_podkill
chaos: chaos_querynode_podkill.yaml
expectation:
single:
create: succ # succ by default if not specified
drop: succ
query: fail
insert: succ
index: succ
cluster_1_node:
query: fail
cluster_n_nodes:
query: degrade # keep functional, but performance degraded
-
testcase: test_queryservice_podkill
chaos: chaos_queryservice_podkill.yaml
-
testcase: test_datanode_podkill
chaos: chaos_datanode_podkill.yaml
expectation:
single:
insert: fail
cluster_1_node:
insert: fail
cluster_n_nodes:
insert: degrade
-
testcase: test_dataservice_podkill
chaos: chaos_dataservice_podkill.yaml
-
testcase: test_indexnode_podkill
chaos: chaos_indexnode_podkill.yaml
-
testcase: test_indexservice_podkill
chaos: chaos_indexservice_podkill.yaml
-
testcase: test_proxy_podkill
chaos: chaos_proxy_podkill.yaml
-
testcase: test_master_podkill
chaos: chaos_master_podkill.yaml
-
testcase: test_etcd_podkill
chaos: chaos_etcd_podkill.yaml
-
testcase: test_minio_podkill
chaos: chaos_minio_podkill.yaml
-
testcase: test_querynode_cpu100p
chaos: chaos_querynode_cpu100p.yaml
# and 10 more for the other pods
-
testcase: test_querynode_mem100p
chaos: chaos_querynode_mem100p.yaml
# and 10 more for the other pods
-
testcase: test_querynode_network_isolation
chaos: chaos_querynode_network_isolation.yaml
# and 10 more for the other pods

106
tests20/chaos/checker.py Normal file
View File

@ -0,0 +1,106 @@
import sys
import threading
from time import sleep
from common import common_func as cf
from common import common_type as ct
nums = 0
class Checker:
def __init__(self):
self._succ = 0
self._fail = 0
self._running = True
def total(self):
return self._succ + self._fail
def statics(self):
return self._succ / self.total() if self.total() != 0 else 0
def terminate(self):
self._running = False
def reset(self):
self._succ = 0
self._fail = 0
class SearchChecker(Checker):
def __init__(self, collection_wrapper):
super().__init__()
self.c_wrapper = collection_wrapper
def keep_searching(self):
while self._running is True:
search_vec = cf.gen_vectors(5, ct.default_dim)
_, result = self.c_wrapper.search(
data=search_vec,
params={"nprobe": 32},
limit=1,
check_res="nothing"
)
if result is True:
self._succ += 1
else:
self._fail += 1
class InsertChecker(Checker):
def __init__(self, collection_wrapper):
super().__init__()
self.c_wrapper = collection_wrapper
def keep_inserting(self):
while self._running is True:
sleep(1)
_, result = self.c_wrapper.insert(data=cf.gen_default_list_data(),
check_res="nothing")
if result is True:
self._succ += 1
else:
self._fail += 1
class CreateChecker(Checker):
def __init__(self, collection_wrapper):
super().__init__()
self.c_wrapper = collection_wrapper
self.num = 0
def keep_creating(self):
while self._running is True:
collection, result = self.c_wrapper.collection_init(name=cf.gen_unique_str(),
schema=cf.gen_default_collection_schema(),
check_res="check_nothing")
if result is True:
self._succ += 1
self.c_wrapper.drop(check_res="check_nothing")
else:
self._fail += 1
class IndexChecker(Checker):
def __init__(self):
super().__init__()
def keep_indexing(self):
pass
class DropChecker(Checker):
def __init__(self):
super().__init__()
def keep_dropping(self):
pass
class FlushChecker(Checker):
def __init__(self):
super().__init__()
def keep_flushing(self):
pass

55
tests20/chaos/conftest.py Normal file
View File

@ -0,0 +1,55 @@
import pytest
def pytest_addoption(parser):
parser.addoption("--ip", action="store", default="localhost", help="service's ip")
parser.addoption("--host", action="store", default="localhost", help="service's ip")
parser.addoption("--service", action="store", default="", help="service address")
parser.addoption("--port", action="store", default=19530, help="service's port")
parser.addoption("--http_port", action="store", default=19121, help="http's port")
parser.addoption("--handler", action="store", default="GRPC", help="handler of request")
parser.addoption("--tag", action="store", default="all", help="only run tests matching the tag.")
parser.addoption('--dry_run', action='store_true', default=False, help="")
@pytest.fixture
def ip(request):
return request.config.getoption("--ip")
@pytest.fixture
def host(request):
return request.config.getoption("--host")
@pytest.fixture
def service(request):
return request.config.getoption("--service")
@pytest.fixture
def port(request):
return request.config.getoption("--port")
@pytest.fixture
def http_port(request):
return request.config.getoption("--http_port")
@pytest.fixture
def handler(request):
return request.config.getoption("--handler")
@pytest.fixture
def tag(request):
return request.config.getoption("--tag")
@pytest.fixture
def dry_run(request):
return request.config.getoption("--dry_run")

5
tests20/chaos/pytest.ini Normal file
View File

@ -0,0 +1,5 @@
[pytest]
addopts = --host 192.168.1.239 --html=/Users/yanliang/Document/report.html
-;addopts = --host 172.28.255.155 --html=/tmp/report.html
# python3 -W ignore -m pytest

103
tests20/chaos/test_chaos.py Normal file
View File

@ -0,0 +1,103 @@
import logging
import pytest
import sys
import threading
from time import sleep
from base.client_request import ApiReq
from pymilvus_orm import connections
from checker import CreateChecker, SearchChecker, InsertChecker
from base.client_request import ApiCollection
from common import common_func as cf
from common import common_type as ct
from utils.util_log import test_log as log
class TestsChaos:
@pytest.fixture(scope="function", autouse=True)
def coll_wrapper_4_insert(self):
connections.configure(default={"host": "192.168.1.239", "port": 19530})
res = connections.create_connection(alias='default')
if res is None:
raise Exception("no connections")
c_wrapper = ApiCollection()
c_wrapper.collection_init(name=cf.gen_unique_str(),
schema=cf.gen_default_collection_schema(),
check_res="check_nothing")
return c_wrapper
@pytest.fixture(scope="function", autouse=True)
def coll_wrapper_4_search(self):
connections.configure(default={"host": "192.168.1.239", "port": 19530})
res = connections.create_connection(alias='default')
if res is None:
raise Exception("no connections")
c_wrapper = ApiCollection()
_, result = c_wrapper.collection_init(name=cf.gen_unique_str(),
schema=cf.gen_default_collection_schema(),
check_res="check_nothing")
if result is False:
log.log("result: ")
# for _ in range(10):
# c_wrapper.insert(data=cf.gen_default_list_data(nb=ct.default_nb*10),
# check_res="check_nothing")
return c_wrapper
@pytest.fixture(scope="function", autouse=True)
def health_checkers(self, coll_wrapper_4_insert, coll_wrapper_4_search):
checkers = {}
# search_ch = SearchChecker(collection_wrapper=coll_wrapper_4_search)
# checkers["search"] = search_ch
# insert_ch = InsertChecker(collection_wrapper=coll_wrapper_4_insert)
# checkers["insert"] = insert_ch
create_ch = CreateChecker(collection_wrapper=coll_wrapper_4_insert)
checkers["create"] = create_ch
return checkers
'''
def teardown(self, health_checkers):
for ch in health_checkers.values():
ch.terminate()
pass
'''
def test_chaos(self, health_checkers):
# query_t = threading.Thread(target=health_checkers['create'].keep_searching, args=())
# query_t.start()
# insert_t = threading.Thread(target=health_checkers['create'].keep_inserting, args=())
# insert_t.start()
create_t = threading.Thread(target=health_checkers['create'].keep_creating, args=())
create_t.start()
# parse chaos object
# find the testcase by chaos ops in testcases
# parse the test expectations
# wait 120s
print("test_chaos starting...")
sleep(2)
print(f"succ count1: {health_checkers['create']._succ}")
print(f"succ rate1: {health_checkers['create'].statics()}")
# assert statistic:all ops 100% succ
# reset counting
# apply chaos object
# wait 300s (varies by chaos)
health_checkers["create"].reset()
print(f"succ count2: {health_checkers['create']._succ}")
print(f"succ rate2: {health_checkers['create'].statics()}")
sleep(2)
print(f"succ count3: {health_checkers['create']._succ}")
print(f"succ rate3: {health_checkers['create'].statics()}")
# assert statistic: the target ops succ <50% and the other keep 100% succ
# delete chaos
# wait 300s (varies by feature)
# assert statistic: the target ops succ >90% and the other keep 100% succ
# terminate thread
for ch in health_checkers.values():
ch.terminate()
pass