From ce23e6c77c2276351224441d05eedc8a8fe8df00 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Tue, 17 Sep 2019 12:46:29 +0800 Subject: [PATCH 001/126] init commit --- .gitignore | 3 + __init__.py | 1 + connections.py | 105 +++++++++++++++++++++++++++++ exception_codes.py | 3 + exceptions.py | 11 ++++ settings.py | 31 +++++++++ utils/__init__.py | 0 utils/logger_helper.py | 145 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 299 insertions(+) create mode 100644 .gitignore create mode 100644 __init__.py create mode 100644 connections.py create mode 100644 exception_codes.py create mode 100644 exceptions.py create mode 100644 settings.py create mode 100644 utils/__init__.py create mode 100644 utils/logger_helper.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..624eb4fa58 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env + +__pycache__/ diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000..7db5c41bd0 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +import settings diff --git a/connections.py b/connections.py new file mode 100644 index 0000000000..727864ef98 --- /dev/null +++ b/connections.py @@ -0,0 +1,105 @@ +import logging +from milvus import Milvus +from functools import wraps +from contextlib import contextmanager + +import exceptions + +logger = logging.getLogger(__name__) + +class Connection: + def __init__(self, name, uri, max_retry=1, error_handlers=None, **kwargs): + self.name = name + self.uri = uri + self.max_retry = max_retry + self.retried = 0 + self.conn = Milvus() + self.error_handlers = [] if not error_handlers else error_handlers + self.on_retry_func = kwargs.get('on_retry_func', None) + + def __str__(self): + return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) + + def _connect(self): + try: + self.conn.connect(uri=self.uri) + except Exception as e: + if not self.error_handlers: + raise exceptions.ConnectionConnectError(message='') + for handler in self.error_handlers: + handler(e) + + @property + def can_retry(self): + return self.retried <= self.max_retry + + @property + def connected(self): + return self.conn.connected() + + def on_retry(self): + if self.on_retry_func: + self.on_retry_func(self) + else: + logger.warn('{} is retrying {}'.format(self, self.retried)) + + def on_connect(self): + while not self.connected and self.can_retry: + self.retried += 1 + self.on_retry() + self._connect() + + if not self.can_retry and not self.connected: + raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry)) + + self.retried = 0 + + def connect(self, func, exception_handler=None): + @wraps(func) + def inner(*args, **kwargs): + self.on_connect() + try: + return func(*args, **kwargs) + except Exception as e: + if exception_handler: + exception_handler(e) + else: + raise e + return inner + +if __name__ == '__main__': + class Conn: + def __init__(self, state): + self.state = state + + def connect(self, uri): + return self.state + + def connected(self): + return self.state + + fail_conn = Conn(False) + success_conn = Conn(True) + + class Retry: + def __init__(self): + self.times = 0 + + def __call__(self, conn): + self.times += 1 + print('Retrying {}'.format(self.times)) + + + retry_obj = Retry() + c = Connection('client', uri='localhost', on_retry_func=retry_obj) + c.conn = fail_conn + + def f(): + print('ffffffff') + + # m = c.connect(func=f) + # m() + + c.conn = success_conn + m = c.connect(func=f) + m() diff --git a/exception_codes.py b/exception_codes.py new file mode 100644 index 0000000000..5369389e84 --- /dev/null +++ b/exception_codes.py @@ -0,0 +1,3 @@ +INVALID_CODE = -1 + +CONNECT_ERROR_CODE = 10001 diff --git a/exceptions.py b/exceptions.py new file mode 100644 index 0000000000..7178c4ebdc --- /dev/null +++ b/exceptions.py @@ -0,0 +1,11 @@ +import exception_codes as codes + +class BaseException(Exception): + code = codes.INVALID_CODE + message = 'BaseException' + def __init__(self, message='', code=None): + self.message = self.__class__.__name__ if not message else message + self.code = self.code if code is None else code + +class ConnectionConnectError(BaseException): + code = codes.CONNECT_ERROR_CODE diff --git a/settings.py b/settings.py new file mode 100644 index 0000000000..e1a45262c8 --- /dev/null +++ b/settings.py @@ -0,0 +1,31 @@ +import sys +import os + +from environs import Env + +env = Env() +env.read_env() + +DEBUG = env.bool('DEBUG', False) +TESTING = env.bool('TESTING', False) + +METADATA_URI = env.str('METADATA_URI', '') + +LOG_LEVEL = env.str('LOG_LEVEL', 'DEBUG' if DEBUG else 'INFO') +LOG_PATH = env.str('LOG_PATH', '/tmp/mishards') +LOG_NAME = env.str('LOG_NAME', 'logfile') +TIMEZONE = env.str('TIMEZONE', 'UTC') + +from utils.logger_helper import config +config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) + +TIMEOUT = env.int('TIMEOUT', 60) + + +if __name__ == '__main__': + import logging + logger = logging.getLogger(__name__) + logger.debug('DEBUG') + logger.info('INFO') + logger.warn('WARN') + logger.error('ERROR') diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/utils/logger_helper.py b/utils/logger_helper.py new file mode 100644 index 0000000000..1b59aa40ec --- /dev/null +++ b/utils/logger_helper.py @@ -0,0 +1,145 @@ +import os +import datetime +from pytz import timezone +from logging import Filter +import logging.config + + +class InfoFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.INFO + +class DebugFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.DEBUG + +class WarnFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.WARN + +class ErrorFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.ERROR + +class CriticalFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.CRITICAL + + +COLORS = { + 'HEADER': '\033[95m', + 'INFO': '\033[92m', + 'DEBUG': '\033[94m', + 'WARNING': '\033[93m', + 'ERROR': '\033[95m', + 'CRITICAL': '\033[91m', + 'ENDC': '\033[0m', +} + +class ColorFulFormatColMixin: + def format_col(self, message_str, level_name): + if level_name in COLORS.keys(): + message_str = COLORS.get(level_name) + message_str + COLORS.get( + 'ENDC') + return message_str + +class ColorfulFormatter(logging.Formatter, ColorFulFormatColMixin): + def format(self, record): + message_str = super(ColorfulFormatter, self).format(record) + + return self.format_col(message_str, level_name=record.levelname) + +def config(log_level, log_path, name, tz='UTC'): + def build_log_file(level, log_path, name, tz): + utc_now = datetime.datetime.utcnow() + utc_tz = timezone('UTC') + local_tz = timezone(tz) + tznow = utc_now.replace(tzinfo=utc_tz).astimezone(local_tz) + return '{}-{}-{}.log'.format(os.path.join(log_path, name), tznow.strftime("%m-%d-%Y-%H:%M:%S"), + level) + + if not os.path.exists(log_path): + os.makedirs(log_path) + + LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'default': { + 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)' + }, + 'colorful_console': { + 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)', + '()': ColorfulFormatter, + }, + }, + 'filters': { + 'InfoFilter': { + '()': InfoFilter, + }, + 'DebugFilter': { + '()': DebugFilter, + }, + 'WarnFilter': { + '()': WarnFilter, + }, + 'ErrorFilter': { + '()': ErrorFilter, + }, + 'CriticalFilter': { + '()': CriticalFilter, + }, + }, + 'handlers': { + 'milvus_celery_console': { + 'class': 'logging.StreamHandler', + 'formatter': 'colorful_console', + }, + 'milvus_debug_file': { + 'level': 'DEBUG', + 'filters': ['DebugFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('debug', log_path, name, tz) + }, + 'milvus_info_file': { + 'level': 'INFO', + 'filters': ['InfoFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('info', log_path, name, tz) + }, + 'milvus_warn_file': { + 'level': 'WARN', + 'filters': ['WarnFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('warn', log_path, name, tz) + }, + 'milvus_error_file': { + 'level': 'ERROR', + 'filters': ['ErrorFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('error', log_path, name, tz) + }, + 'milvus_critical_file': { + 'level': 'CRITICAL', + 'filters': ['CriticalFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('critical', log_path, name, tz) + }, + }, + 'loggers': { + '': { + 'handlers': ['milvus_celery_console', 'milvus_info_file', 'milvus_debug_file', 'milvus_warn_file', \ + 'milvus_error_file', 'milvus_critical_file'], + 'level': log_level, + 'propagate': False + }, + }, + 'propagate': False, + } + + logging.config.dictConfig(LOGGING) From 17bb7841843403516acf803157a6e5820511db19 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Tue, 17 Sep 2019 12:52:32 +0800 Subject: [PATCH 002/126] (exception): change exception definition --- connections.py | 6 +++--- exceptions.py | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/connections.py b/connections.py index 727864ef98..ea446d5ad3 100644 --- a/connections.py +++ b/connections.py @@ -25,7 +25,7 @@ class Connection: self.conn.connect(uri=self.uri) except Exception as e: if not self.error_handlers: - raise exceptions.ConnectionConnectError(message='') + raise exceptions.ConnectionConnectError() for handler in self.error_handlers: handler(e) @@ -97,8 +97,8 @@ if __name__ == '__main__': def f(): print('ffffffff') - # m = c.connect(func=f) - # m() + m = c.connect(func=f) + m() c.conn = success_conn m = c.connect(func=f) diff --git a/exceptions.py b/exceptions.py index 7178c4ebdc..50db4474c4 100644 --- a/exceptions.py +++ b/exceptions.py @@ -3,9 +3,8 @@ import exception_codes as codes class BaseException(Exception): code = codes.INVALID_CODE message = 'BaseException' - def __init__(self, message='', code=None): + def __init__(self, message=''): self.message = self.__class__.__name__ if not message else message - self.code = self.code if code is None else code class ConnectionConnectError(BaseException): code = codes.CONNECT_ERROR_CODE From 052d79a58da5fc91b1d36089947634c7d7528e2c Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Tue, 17 Sep 2019 14:28:34 +0800 Subject: [PATCH 003/126] (feat): update connections --- connections.py | 105 ++++++++++++++++++++++++++++++++++++++++++--- exception_codes.py | 1 + exceptions.py | 3 ++ service_handler.py | 11 +++++ settings.py | 1 + utils/__init__.py | 10 +++++ 6 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 service_handler.py diff --git a/connections.py b/connections.py index ea446d5ad3..c52a1c5f85 100644 --- a/connections.py +++ b/connections.py @@ -1,9 +1,12 @@ import logging -from milvus import Milvus +import threading from functools import wraps from contextlib import contextmanager +from milvus import Milvus +import settings import exceptions +from utils import singleton logger = logging.getLogger(__name__) @@ -16,6 +19,7 @@ class Connection: self.conn = Milvus() self.error_handlers = [] if not error_handlers else error_handlers self.on_retry_func = kwargs.get('on_retry_func', None) + self._connect() def __str__(self): return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) @@ -67,6 +71,79 @@ class Connection: raise e return inner +@singleton +class ConnectionMgr: + def __init__(self): + self.metas = {} + self.conns = {} + + def conn(self, name, throw=False): + c = self.conns.get(name, None) + if not c: + url = self.metas.get(name, None) + if not url: + if not throw: + return None + raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name)) + this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) + threaded = { + threading.get_ident() : this_conn + } + c[name] = threaded + return this_conn + + tid = threading.get_ident() + rconn = c.get(tid, None) + if not rconn: + url = self.metas.get(name, None) + if not url: + if not throw: + return None + raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name)) + this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) + c[tid] = this_conn + return this_conn + + return rconn + + def on_new_meta(self, name, url): + self.metas[name] = url + + def on_duplicate_meta(self, name, url): + if self.metas[name] == url: + return self.on_same_meta(name, url) + + return self.on_diff_meta(name, url) + + def on_same_meta(self, name, url): + logger.warn('Register same meta: {}:{}'.format(name, url)) + + def on_diff_meta(self, name, url): + logger.warn('Received {} with diff url={}'.format(name, url)) + self.metas[name] = url + self.conns[name] = {} + + def on_unregister_meta(self, name, url): + logger.info('Unregister name={};url={}'.format(name, url)) + self.conns.pop(name, None) + + def on_nonexisted_meta(self, name): + logger.warn('Non-existed meta: {}'.format(name)) + + def register(self, name, url): + meta = self.metas.get(name) + if not meta: + return self.on_new_meta(name, url) + else: + return self.on_duplicate_meta(name, url) + + def unregister(self, name): + url = self.metas.pop(name, None) + if url is None: + return self.on_nonexisted_meta(name) + return self.on_unregister_meta(name, url) + + if __name__ == '__main__': class Conn: def __init__(self, state): @@ -91,15 +168,33 @@ if __name__ == '__main__': retry_obj = Retry() - c = Connection('client', uri='localhost', on_retry_func=retry_obj) - c.conn = fail_conn + c = Connection('client', uri='', on_retry_func=retry_obj) def f(): print('ffffffff') - m = c.connect(func=f) - m() + # c.conn = fail_conn + # m = c.connect(func=f) + # m() c.conn = success_conn m = c.connect(func=f) m() + + mgr = ConnectionMgr() + mgr.register('pod1', '111') + mgr.register('pod2', '222') + mgr.register('pod2', '222') + mgr.register('pod2', 'tcp://127.0.0.1:19530') + + pod3 = mgr.conn('pod3') + print(pod3) + + pod2 = mgr.conn('pod2') + print(pod2) + print(pod2.connected) + + mgr.unregister('pod1') + + logger.info(mgr.metas) + logger.info(mgr.conns) diff --git a/exception_codes.py b/exception_codes.py index 5369389e84..c8cfd81dab 100644 --- a/exception_codes.py +++ b/exception_codes.py @@ -1,3 +1,4 @@ INVALID_CODE = -1 CONNECT_ERROR_CODE = 10001 +CONNECTTION_NOT_FOUND_CODE = 10002 diff --git a/exceptions.py b/exceptions.py index 50db4474c4..a25fb2c4ae 100644 --- a/exceptions.py +++ b/exceptions.py @@ -8,3 +8,6 @@ class BaseException(Exception): class ConnectionConnectError(BaseException): code = codes.CONNECT_ERROR_CODE + +class ConnectionNotFoundError(BaseException): + code = codes.CONNECTTION_NOT_FOUND_CODE diff --git a/service_handler.py b/service_handler.py new file mode 100644 index 0000000000..d5018a54d8 --- /dev/null +++ b/service_handler.py @@ -0,0 +1,11 @@ +import logging + +import grpco +from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 + +logger = logging.getLogger(__name__) + + +class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): + def __init__(self, connections, *args, **kwargs): + self.connections = self.connections diff --git a/settings.py b/settings.py index e1a45262c8..4ad00e66cb 100644 --- a/settings.py +++ b/settings.py @@ -20,6 +20,7 @@ from utils.logger_helper import config config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) TIMEOUT = env.int('TIMEOUT', 60) +MAX_RETRY = env.int('MAX_RETRY', 3) if __name__ == '__main__': diff --git a/utils/__init__.py b/utils/__init__.py index e69de29bb2..ec7f32bcbc 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -0,0 +1,10 @@ +from functools import wraps + +def singleton(cls): + instances = {} + @wraps(cls) + def getinstance(*args, **kw): + if cls not in instances: + instances[cls] = cls(*args, **kw) + return instances[cls] + return getinstance From 4fc6f0a520159ed09d3e4513a547c0ab6fddde3d Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Tue, 17 Sep 2019 20:48:08 +0800 Subject: [PATCH 004/126] add grpc server --- __init__.py | 1 - mishards/__init__.py | 6 + connections.py => mishards/connections.py | 2 +- .../exception_codes.py | 0 exceptions.py => mishards/exceptions.py | 0 mishards/grpc_utils/__init__.py | 0 mishards/grpc_utils/grpc_args_parser.py | 101 ++++++ mishards/grpc_utils/grpc_args_wrapper.py | 4 + mishards/main.py | 14 + mishards/server.py | 47 +++ mishards/service_handler.py | 327 ++++++++++++++++++ settings.py => mishards/settings.py | 2 + {utils => mishards/utils}/__init__.py | 0 {utils => mishards/utils}/logger_helper.py | 0 service_handler.py | 11 - 15 files changed, 502 insertions(+), 13 deletions(-) delete mode 100644 __init__.py create mode 100644 mishards/__init__.py rename connections.py => mishards/connections.py (99%) rename exception_codes.py => mishards/exception_codes.py (100%) rename exceptions.py => mishards/exceptions.py (100%) create mode 100644 mishards/grpc_utils/__init__.py create mode 100644 mishards/grpc_utils/grpc_args_parser.py create mode 100644 mishards/grpc_utils/grpc_args_wrapper.py create mode 100644 mishards/main.py create mode 100644 mishards/server.py create mode 100644 mishards/service_handler.py rename settings.py => mishards/settings.py (90%) rename {utils => mishards/utils}/__init__.py (100%) rename {utils => mishards/utils}/logger_helper.py (100%) delete mode 100644 service_handler.py diff --git a/__init__.py b/__init__.py deleted file mode 100644 index 7db5c41bd0..0000000000 --- a/__init__.py +++ /dev/null @@ -1 +0,0 @@ -import settings diff --git a/mishards/__init__.py b/mishards/__init__.py new file mode 100644 index 0000000000..700dd4238c --- /dev/null +++ b/mishards/__init__.py @@ -0,0 +1,6 @@ +import settings +from connections import ConnectionMgr +connect_mgr = ConnectionMgr() + +from server import Server +grpc_server = Server(conn_mgr=connect_mgr) diff --git a/connections.py b/mishards/connections.py similarity index 99% rename from connections.py rename to mishards/connections.py index c52a1c5f85..06d5f3ff16 100644 --- a/connections.py +++ b/mishards/connections.py @@ -89,7 +89,7 @@ class ConnectionMgr: threaded = { threading.get_ident() : this_conn } - c[name] = threaded + self.conns[name] = threaded return this_conn tid = threading.get_ident() diff --git a/exception_codes.py b/mishards/exception_codes.py similarity index 100% rename from exception_codes.py rename to mishards/exception_codes.py diff --git a/exceptions.py b/mishards/exceptions.py similarity index 100% rename from exceptions.py rename to mishards/exceptions.py diff --git a/mishards/grpc_utils/__init__.py b/mishards/grpc_utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mishards/grpc_utils/grpc_args_parser.py b/mishards/grpc_utils/grpc_args_parser.py new file mode 100644 index 0000000000..c8dc9d71d9 --- /dev/null +++ b/mishards/grpc_utils/grpc_args_parser.py @@ -0,0 +1,101 @@ +from milvus import Status +from functools import wraps + + +def error_status(func): + @wraps(func) + def inner(*args, **kwargs): + try: + results = func(*args, **kwargs) + except Exception as e: + return Status(code=Status.UNEXPECTED_ERROR, message=str(e)), None + + return Status(code=0, message="Success"), results + + return inner + + +class GrpcArgsParser(object): + + @classmethod + @error_status + def parse_proto_TableSchema(cls, param): + _table_schema = { + 'table_name': param.table_name.table_name, + 'dimension': param.dimension, + 'index_file_size': param.index_file_size, + 'metric_type': param.metric_type + } + + return _table_schema + + @classmethod + @error_status + def parse_proto_TableName(cls, param): + return param.table_name + + @classmethod + @error_status + def parse_proto_Index(cls, param): + _index = { + 'index_type': param.index_type, + 'nlist': param.nlist + } + + return _index + + @classmethod + @error_status + def parse_proto_IndexParam(cls, param): + _table_name = param.table_name.table_name + _status, _index = cls.parse_proto_Index(param.index) + + if not _status.OK(): + raise Exception("Argument parse error") + + return _table_name, _index + + @classmethod + @error_status + def parse_proto_Command(cls, param): + _cmd = param.cmd + + return _cmd + + @classmethod + @error_status + def parse_proto_Range(cls, param): + _start_value = param.start_value + _end_value = param.end_value + + return _start_value, _end_value + + @classmethod + @error_status + def parse_proto_RowRecord(cls, param): + return list(param.vector_data) + + @classmethod + @error_status + def parse_proto_SearchParam(cls, param): + _table_name = param.table_name + _topk = param.topk + _nprobe = param.nprobe + _status, _range = cls.parse_proto_Range(param.query_range_array) + + if not _status.OK(): + raise Exception("Argument parse error") + + _row_record = param.query_record_array + + return _table_name, _row_record, _range, _topk + + @classmethod + @error_status + def parse_proto_DeleteByRangeParam(cls, param): + _table_name = param.table_name + _range = param.range + _start_value = _range.start_value + _end_value = _range.end_value + + return _table_name, _start_value, _end_value diff --git a/mishards/grpc_utils/grpc_args_wrapper.py b/mishards/grpc_utils/grpc_args_wrapper.py new file mode 100644 index 0000000000..a864b1e400 --- /dev/null +++ b/mishards/grpc_utils/grpc_args_wrapper.py @@ -0,0 +1,4 @@ +# class GrpcArgsWrapper(object): + + # @classmethod + # def proto_TableName(cls): \ No newline at end of file diff --git a/mishards/main.py b/mishards/main.py new file mode 100644 index 0000000000..0185e6ac1d --- /dev/null +++ b/mishards/main.py @@ -0,0 +1,14 @@ +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import settings +from mishards import connect_mgr, grpc_server as server + +def main(): + connect_mgr.register('WOSERVER', settings.WOSERVER) + server.run(port=settings.SERVER_PORT) + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/mishards/server.py b/mishards/server.py new file mode 100644 index 0000000000..59ea7db46b --- /dev/null +++ b/mishards/server.py @@ -0,0 +1,47 @@ +import logging +import grpc +import time +from concurrent import futures +from grpc._cython import cygrpc +from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server +from service_handler import ServiceHandler +import settings + +logger = logging.getLogger(__name__) + + +class Server: + def __init__(self, conn_mgr, port=19530, max_workers=10, **kwargs): + self.exit_flag = False + self.port = int(port) + self.conn_mgr = conn_mgr + self.server_impl = grpc.server( + thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), + options=[(cygrpc.ChannelArgKey.max_send_message_length, -1), + (cygrpc.ChannelArgKey.max_receive_message_length, -1)] + ) + + def start(self, port=None): + add_MilvusServiceServicer_to_server(ServiceHandler(conn_mgr=self.conn_mgr), self.server_impl) + self.server_impl.add_insecure_port("[::]:{}".format(str(port or self._port))) + self.server_impl.start() + + def run(self, port): + logger.info('Milvus server start ......') + port = port or self.port + + self.start(port) + logger.info('Successfully') + logger.info('Listening on port {}'.format(port)) + + try: + while not self.exit_flag: + time.sleep(5) + except KeyboardInterrupt: + self.stop() + + def stop(self): + logger.info('Server is shuting down ......') + self.exit_flag = True + self.server.stop(0) + logger.info('Server is closed') diff --git a/mishards/service_handler.py b/mishards/service_handler.py new file mode 100644 index 0000000000..ead8d14d88 --- /dev/null +++ b/mishards/service_handler.py @@ -0,0 +1,327 @@ +import logging +from contextlib import contextmanager +from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 + +from grpc_utils.grpc_args_parser import GrpcArgsParser as Parser + +logger = logging.getLogger(__name__) + + +class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): + def __init__(self, conn_mgr, *args, **kwargs): + self.conn_mgr = conn_mgr + self.table_meta = {} + + @property + def connection(self): + conn = self.conn_mgr.conn('WOSERVER') + if conn: + conn.on_connect() + return conn.conn + + def CreateTable(self, request, context): + _status, _table_schema = Parser.parse_proto_TableSchema(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + logger.info('CreateTable {}'.format(_table_schema['table_name'])) + + _status = self.connection.create_table(_table_schema) + + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + def HasTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return milvus_pb2.BoolReply( + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + bool_reply=False + ) + + logger.info('HasTable {}'.format(_table_name)) + + _bool = self.connection.has_table(_table_name) + + return milvus_pb2.BoolReply( + status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="OK"), + bool_reply=_bool + ) + + def DropTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + logger.info('DropTable {}'.format(_table_name)) + + _status = self.connection.delete_table(_table_name) + + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + def CreateIndex(self, request, context): + _status, unpacks = Parser.parse_proto_IndexParam(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + _table_name, _index = unpacks + + logger.info('CreateIndex {}'.format(_table_name)) + + # TODO: interface create_table incompleted + _status = self.connection.create_index(_table_name, _index) + + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + def Insert(self, request, context): + logger.info('Insert') + # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' + _status, _ids = self.connection.add_vectors(None, None, insert_param=request) + return milvus_pb2.VectorIds( + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + vector_id_array=_ids + ) + + def Search(self, request, context): + + try: + table_name = request.table_name + + topk = request.topk + nprobe = request.nprobe + + logger.info('Search {}: topk={} nprobe={}'.format(table_name, topk, nprobe)) + + if nprobe > 2048 or nprobe <= 0: + raise exceptions.GRPCInvlidArgument('Invalid nprobe: {}'.format(nprobe)) + + table_meta = self.table_meta.get(table_name, None) + if not table_meta: + status, info = self.connection.describe_table(table_name) + if not status.OK(): + raise TableNotFoundException(table_name) + + self.table_meta[table_name] = info + table_meta = info + + start = time.time() + + query_record_array = [] + + for query_record in request.query_record_array: + query_record_array.append(list(query_record.vector_data)) + + query_range_array = [] + for query_range in request.query_range_array: + query_range_array.append( + Range(query_range.start_value, query_range.end_value)) + except (TableNotFoundException, exceptions.GRPCInvlidArgument) as exc: + return milvus_pb2.TopKQueryResultList( + status=status_pb2.Status(error_code=exc.code, reason=exc.message) + ) + except Exception as e: + return milvus_pb2.TopKQueryResultList( + status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=str(e)) + ) + + try: + results = workflow.query_vectors(table_name, table_meta, query_record_array, topk, + nprobe, query_range_array) + except (exceptions.GRPCQueryInvalidRangeException, TableNotFoundException) as exc: + return milvus_pb2.TopKQueryResultList( + status=status_pb2.Status(error_code=exc.code, reason=exc.message) + ) + except exceptions.ServiceNotFoundException as exc: + return milvus_pb2.TopKQueryResultList( + status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=exc.message) + ) + except Exception as e: + logger.error(e) + results = workflow.query_vectors(table_name, table_meta, query_record_array, + topk, nprobe, query_range_array) + + now = time.time() + logger.info('SearchVector Ends @{}'.format(now)) + logger.info('SearchVector takes: {}'.format(now - start)) + + topk_result_list = milvus_pb2.TopKQueryResultList( + status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="Success"), + topk_query_result=results + ) + return topk_result_list + + def SearchInFiles(self, request, context): + try: + file_id_array = list(request.file_id_array) + search_param = request.search_param + table_name = search_param.table_name + topk = search_param.topk + nprobe = search_param.nprobe + + query_record_array = [] + + for query_record in search_param.query_record_array: + query_record_array.append(list(query_record)) + + query_range_array = [] + for query_range in search_param.query_range_array: + query_range_array.append("") + except Exception as e: + milvus_pb2.TopKQueryResultList( + status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=str(e)), + ) + + res = search_vector_in_files.delay(table_name=table_name, + file_id_array=file_id_array, + query_record_array=query_record_array, + query_range_array=query_range_array, + topk=topk, + nprobe=nprobe) + status, result = res.get(timeout=1) + + if not status.OK(): + raise ThriftException(code=status.code, reason=status.message) + res = TopKQueryResult() + for top_k_query_results in result: + res.query_result_arrays.append([QueryResult(id=qr.id, distance=qr.distance) + for qr in top_k_query_results]) + return res + + def DescribeTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + table_name = milvus_pb2.TableName( + status=status_pb2.Status(error_code=_status.code, reason=_status.message) + ) + return milvus_pb2.TableSchema( + table_name=table_name + ) + + logger.info('DescribeTable {}'.format(_table_name)) + _status, _table = self.connection.describe_table(_table_name) + + if _status.OK(): + _grpc_table_name = milvus_pb2.TableName( + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + table_name=_table.table_name + ) + + return milvus_pb2.TableSchema( + table_name=_grpc_table_name, + index_file_size=_table.index_file_size, + dimension=_table.dimension, + metric_type=_table.metric_type + ) + + return milvus_pb2.TableSchema( + table_name=milvus_pb2.TableName( + status=status_pb2.Status(error_code=_status.code, reason=_status.message) + ) + ) + + def CountTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + status = status_pb2.Status(error_code=_status.code, reason=_status.message) + + return milvus_pb2.TableRowCount( + status=status + ) + + logger.info('CountTable {}'.format(_table_name)) + + _status, _count = self.connection.get_table_row_count(_table_name) + + return milvus_pb2.TableRowCount( + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + table_row_count=_count if isinstance(_count, int) else -1) + + def Cmd(self, request, context): + _status, _cmd = Parser.parse_proto_Command(request) + logger.info('Cmd: {}'.format(_cmd)) + + if not _status.OK(): + return milvus_pb2.StringReply( + status_pb2.Status(error_code=_status.code, reason=_status.message) + ) + + if _cmd == 'version': + _status, _reply = self.connection.server_version() + else: + _status, _reply = self.connection.server_status() + + return milvus_pb2.StringReply( + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + string_reply=_reply + ) + + def ShowTables(self, request, context): + logger.info('ShowTables') + _status, _results = self.connection.show_tables() + + if not _status.OK(): + _results = [] + + for _result in _results: + yield milvus_pb2.TableName( + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + table_name=_result + ) + + def DeleteByRange(self, request, context): + _status, unpacks = \ + Parser.parse_proto_DeleteByRangeParam(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + _table_name, _start_date, _end_date = unpacks + + logger.info('DeleteByRange {}: {} {}'.format(_table_name, _start_date, _end_date)) + _status = self.connection.delete_vectors_by_range(_table_name, _start_date, _end_date) + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + def PreloadTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + logger.info('PreloadTable {}'.format(_table_name)) + _status = self.connection.preload_table(_table_name) + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + def DescribeIndex(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return milvus_pb2.IndexParam( + table_name=milvus_pb2.TableName( + status=status_pb2.Status(error_code=_status.code, reason=_status.message) + ) + ) + + logger.info('DescribeIndex {}'.format(_table_name)) + _status, _index_param = self.connection.describe_index(_table_name) + + _index = milvus_pb2.Index(index_type=_index_param._index_type, nlist=_index_param._nlist) + _tablename = milvus_pb2.TableName( + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + table_name=_table_name) + + return milvus_pb2.IndexParam(table_name=_tablename, index=_index) + + def DropIndex(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, reason=_status.message) + + logger.info('DropIndex {}'.format(_table_name)) + _status = self.connection.drop_index(_table_name) + return status_pb2.Status(error_code=_status.code, reason=_status.message) diff --git a/settings.py b/mishards/settings.py similarity index 90% rename from settings.py rename to mishards/settings.py index 4ad00e66cb..0566cf066f 100644 --- a/settings.py +++ b/mishards/settings.py @@ -22,6 +22,8 @@ config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) TIMEOUT = env.int('TIMEOUT', 60) MAX_RETRY = env.int('MAX_RETRY', 3) +SERVER_PORT = env.int('SERVER_PORT', 19530) +WOSERVER = env.str('WOSERVER') if __name__ == '__main__': import logging diff --git a/utils/__init__.py b/mishards/utils/__init__.py similarity index 100% rename from utils/__init__.py rename to mishards/utils/__init__.py diff --git a/utils/logger_helper.py b/mishards/utils/logger_helper.py similarity index 100% rename from utils/logger_helper.py rename to mishards/utils/logger_helper.py diff --git a/service_handler.py b/service_handler.py deleted file mode 100644 index d5018a54d8..0000000000 --- a/service_handler.py +++ /dev/null @@ -1,11 +0,0 @@ -import logging - -import grpco -from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 - -logger = logging.getLogger(__name__) - - -class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): - def __init__(self, connections, *args, **kwargs): - self.connections = self.connections From 86a893cb0462f7822aa1d4da2aef3f478b67db83 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 11:56:00 +0800 Subject: [PATCH 005/126] impl part of search --- mishards/exception_codes.py | 2 + mishards/exceptions.py | 3 + mishards/main.py | 1 + mishards/service_handler.py | 232 +++++++++++++++++++++++------------- mishards/settings.py | 1 + 5 files changed, 157 insertions(+), 82 deletions(-) diff --git a/mishards/exception_codes.py b/mishards/exception_codes.py index c8cfd81dab..32b29bdfab 100644 --- a/mishards/exception_codes.py +++ b/mishards/exception_codes.py @@ -2,3 +2,5 @@ INVALID_CODE = -1 CONNECT_ERROR_CODE = 10001 CONNECTTION_NOT_FOUND_CODE = 10002 + +TABLE_NOT_FOUND_CODE = 20001 diff --git a/mishards/exceptions.py b/mishards/exceptions.py index a25fb2c4ae..1445d18769 100644 --- a/mishards/exceptions.py +++ b/mishards/exceptions.py @@ -11,3 +11,6 @@ class ConnectionConnectError(BaseException): class ConnectionNotFoundError(BaseException): code = codes.CONNECTTION_NOT_FOUND_CODE + +class TableNotFoundError(BaseException): + code = codes.TABLE_NOT_FOUND_CODE diff --git a/mishards/main.py b/mishards/main.py index 0185e6ac1d..2ba3f14697 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -7,6 +7,7 @@ from mishards import connect_mgr, grpc_server as server def main(): connect_mgr.register('WOSERVER', settings.WOSERVER) + connect_mgr.register('TEST', 'tcp://127.0.0.1:19530') server.run(port=settings.SERVER_PORT) return 0 diff --git a/mishards/service_handler.py b/mishards/service_handler.py index ead8d14d88..89ae2cd36c 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -1,13 +1,22 @@ import logging +import time +import datetime from contextlib import contextmanager -from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor +from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 +from milvus.grpc_gen.milvus_pb2 import TopKQueryResult +from milvus.client import types + +import settings from grpc_utils.grpc_args_parser import GrpcArgsParser as Parser logger = logging.getLogger(__name__) class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): + MAX_NPROBE = 2048 def __init__(self, conn_mgr, *args, **kwargs): self.conn_mgr = conn_mgr self.table_meta = {} @@ -19,6 +28,99 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): conn.on_connect() return conn.conn + def query_conn(self, name): + conn = self.conn_mgr.conn(name) + conn and conn.on_connect() + return conn.conn + + def _format_date(self, start, end): + return ((start.year-1900)*10000 + (start.month-1)*100 + start.day + , (end.year-1900)*10000 + (end.month-1)*100 + end.day) + + def _range_to_date(self, range_obj): + try: + start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') + end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') + assert start >= end + except (ValueError, AssertionError): + raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( + range_obj.start_date, range_obj.end_date + )) + + return self._format_date(start, end) + + def _get_routing_file_ids(self, table_id, range_array): + return { + 'TEST': { + 'table_id': table_id, + 'file_ids': [123] + } + } + + def _do_merge(self, files_n_topk_results, topk, reverse=False): + if not files_n_topk_results: + return [] + + request_results = defaultdict(list) + + calc_time = time.time() + for files_collection in files_n_topk_results: + for request_pos, each_request_results in enumerate(files_collection.topk_query_result): + request_results[request_pos].extend(each_request_results.query_result_arrays) + request_results[request_pos] = sorted(request_results[request_pos], key=lambda x: x.distance, + reverse=reverse)[:topk] + + calc_time = time.time() - calc_time + logger.info('Merge takes {}'.format(calc_time)) + + results = sorted(request_results.items()) + topk_query_result = [] + + for result in results: + query_result = TopKQueryResult(query_result_arrays=result[1]) + topk_query_result.append(query_result) + + return topk_query_result + + def _do_query(self, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): + range_array = [self._range_to_date(r) for r in range_array] if range_array else None + routing = self._get_routing_file_ids(table_id, range_array) + logger.debug(routing) + + rs = [] + all_topk_results = [] + + workers = settings.SEARCH_WORKER_SIZE + + def search(addr, query_params, vectors, topk, nprobe, **kwargs): + logger.info('Send Search Request: addr={};params={};nq={};topk={};nprobe={}'.format( + addr, query_params, len(vectors), topk, nprobe + )) + + conn = self.query_conn(addr) + start = time.time() + ret = conn.search_vectors_in_files(table_name=query_params['table_id'], + file_ids=query_params['file_ids'], + query_records=vectors, + top_k=topk, + nprobe=nprobe, + lazy=True) + end = time.time() + logger.info('search_vectors_in_files takes: {}'.format(end - start)) + + all_topk_results.append(ret) + + with ThreadPoolExecutor(max_workers=workers) as pool: + for addr, params in routing.items(): + res = pool.submit(search, addr, params, vectors, topk, nprobe) + rs.append(res) + + for res in rs: + res.result() + + reverse = table_meta.metric_type == types.MetricType.L2 + return self._do_merge(all_topk_results, topk, reverse=reverse) + def CreateTable(self, request, context): _status, _table_schema = Parser.parse_proto_TableSchema(request) @@ -87,64 +189,64 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def Search(self, request, context): - try: - table_name = request.table_name + table_name = request.table_name - topk = request.topk - nprobe = request.nprobe + topk = request.topk + nprobe = request.nprobe - logger.info('Search {}: topk={} nprobe={}'.format(table_name, topk, nprobe)) + logger.info('Search {}: topk={} nprobe={}'.format(table_name, topk, nprobe)) - if nprobe > 2048 or nprobe <= 0: - raise exceptions.GRPCInvlidArgument('Invalid nprobe: {}'.format(nprobe)) + if nprobe > self.MAX_NPROBE or nprobe <= 0: + raise exceptions.GRPCInvlidArgument('Invalid nprobe: {}'.format(nprobe)) - table_meta = self.table_meta.get(table_name, None) - if not table_meta: - status, info = self.connection.describe_table(table_name) - if not status.OK(): - raise TableNotFoundException(table_name) + table_meta = self.table_meta.get(table_name, None) + if not table_meta: + status, info = self.connection.describe_table(table_name) + if not status.OK(): + raise exceptions.TableNotFoundError(table_name) - self.table_meta[table_name] = info - table_meta = info + self.table_meta[table_name] = info + table_meta = info - start = time.time() + start = time.time() - query_record_array = [] + query_record_array = [] - for query_record in request.query_record_array: - query_record_array.append(list(query_record.vector_data)) + for query_record in request.query_record_array: + query_record_array.append(list(query_record.vector_data)) - query_range_array = [] - for query_range in request.query_range_array: - query_range_array.append( - Range(query_range.start_value, query_range.end_value)) - except (TableNotFoundException, exceptions.GRPCInvlidArgument) as exc: - return milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=exc.code, reason=exc.message) - ) - except Exception as e: - return milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=str(e)) - ) + query_range_array = [] + for query_range in request.query_range_array: + query_range_array.append( + Range(query_range.start_value, query_range.end_value)) + # except (TableNotFoundException, exceptions.GRPCInvlidArgument) as exc: + # return milvus_pb2.TopKQueryResultList( + # status=status_pb2.Status(error_code=exc.code, reason=exc.message) + # ) + # except Exception as e: + # return milvus_pb2.TopKQueryResultList( + # status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=str(e)) + # ) - try: - results = workflow.query_vectors(table_name, table_meta, query_record_array, topk, - nprobe, query_range_array) - except (exceptions.GRPCQueryInvalidRangeException, TableNotFoundException) as exc: - return milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=exc.code, reason=exc.message) - ) - except exceptions.ServiceNotFoundException as exc: - return milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=exc.message) - ) - except Exception as e: - logger.error(e) - results = workflow.query_vectors(table_name, table_meta, query_record_array, - topk, nprobe, query_range_array) + results = self._do_query(table_name, table_meta, query_record_array, topk, + nprobe, query_range_array) + # try: + # results = workflow.query_vectors(table_name, table_meta, query_record_array, topk, + # nprobe, query_range_array) + # except (exceptions.GRPCQueryInvalidRangeException, TableNotFoundException) as exc: + # return milvus_pb2.TopKQueryResultList( + # status=status_pb2.Status(error_code=exc.code, reason=exc.message) + # ) + # except exceptions.ServiceNotFoundException as exc: + # return milvus_pb2.TopKQueryResultList( + # status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=exc.message) + # ) + # except Exception as e: + # logger.error(e) + # results = workflow.query_vectors(table_name, table_meta, query_record_array, + # topk, nprobe, query_range_array) now = time.time() - logger.info('SearchVector Ends @{}'.format(now)) logger.info('SearchVector takes: {}'.format(now - start)) topk_result_list = milvus_pb2.TopKQueryResultList( @@ -154,41 +256,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return topk_result_list def SearchInFiles(self, request, context): - try: - file_id_array = list(request.file_id_array) - search_param = request.search_param - table_name = search_param.table_name - topk = search_param.topk - nprobe = search_param.nprobe - - query_record_array = [] - - for query_record in search_param.query_record_array: - query_record_array.append(list(query_record)) - - query_range_array = [] - for query_range in search_param.query_range_array: - query_range_array.append("") - except Exception as e: - milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=str(e)), - ) - - res = search_vector_in_files.delay(table_name=table_name, - file_id_array=file_id_array, - query_record_array=query_record_array, - query_range_array=query_range_array, - topk=topk, - nprobe=nprobe) - status, result = res.get(timeout=1) - - if not status.OK(): - raise ThriftException(code=status.code, reason=status.message) - res = TopKQueryResult() - for top_k_query_results in result: - res.query_result_arrays.append([QueryResult(id=qr.id, distance=qr.distance) - for qr in top_k_query_results]) - return res + raise NotImplemented() def DescribeTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) diff --git a/mishards/settings.py b/mishards/settings.py index 0566cf066f..4d87e69fe3 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -21,6 +21,7 @@ config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) TIMEOUT = env.int('TIMEOUT', 60) MAX_RETRY = env.int('MAX_RETRY', 3) +SEARCH_WORKER_SIZE = env.int('SEARCH_WORKER_SIZE', 10) SERVER_PORT = env.int('SERVER_PORT', 19530) WOSERVER = env.str('WOSERVER') From deb4a5fb62ff540eb06003d9b2940d09b8aeeb16 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 14:50:36 +0800 Subject: [PATCH 006/126] update for service discovery --- mishards/__init__.py | 8 ++ mishards/connections.py | 9 +- mishards/main.py | 16 ++- mishards/server.py | 2 +- mishards/service_founder.py | 273 ++++++++++++++++++++++++++++++++++++ mishards/service_handler.py | 7 +- mishards/settings.py | 11 +- 7 files changed, 315 insertions(+), 11 deletions(-) create mode 100644 mishards/service_founder.py diff --git a/mishards/__init__.py b/mishards/__init__.py index 700dd4238c..b3a14cf7e3 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -2,5 +2,13 @@ import settings from connections import ConnectionMgr connect_mgr = ConnectionMgr() +from service_founder import ServiceFounder +discover = ServiceFounder(namespace=settings.SD_NAMESPACE, + conn_mgr=connect_mgr, + pod_patt=settings.SD_ROSERVER_POD_PATT, + label_selector=settings.SD_LABEL_SELECTOR, + in_cluster=settings.SD_IN_CLUSTER, + poll_interval=settings.SD_POLL_INTERVAL) + from server import Server grpc_server = Server(conn_mgr=connect_mgr) diff --git a/mishards/connections.py b/mishards/connections.py index 06d5f3ff16..82dd082eac 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -29,7 +29,7 @@ class Connection: self.conn.connect(uri=self.uri) except Exception as e: if not self.error_handlers: - raise exceptions.ConnectionConnectError() + raise exceptions.ConnectionConnectError(e) for handler in self.error_handlers: handler(e) @@ -77,6 +77,10 @@ class ConnectionMgr: self.metas = {} self.conns = {} + @property + def conn_names(self): + return set(self.metas.keys()) - set(['WOSERVER']) + def conn(self, name, throw=False): c = self.conns.get(name, None) if not c: @@ -116,7 +120,8 @@ class ConnectionMgr: return self.on_diff_meta(name, url) def on_same_meta(self, name, url): - logger.warn('Register same meta: {}:{}'.format(name, url)) + # logger.warn('Register same meta: {}:{}'.format(name, url)) + pass def on_diff_meta(self, name, url): logger.warn('Received {} with diff url={}'.format(name, url)) diff --git a/mishards/main.py b/mishards/main.py index 2ba3f14697..0526f87ff8 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -3,13 +3,19 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import settings -from mishards import connect_mgr, grpc_server as server +from mishards import (connect_mgr, + discover, + grpc_server as server) def main(): - connect_mgr.register('WOSERVER', settings.WOSERVER) - connect_mgr.register('TEST', 'tcp://127.0.0.1:19530') - server.run(port=settings.SERVER_PORT) - return 0 + try: + discover.start() + connect_mgr.register('WOSERVER', settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER) + server.run(port=settings.SERVER_PORT) + return 0 + except Exception as e: + logger.error(e) + return 1 if __name__ == '__main__': sys.exit(main()) diff --git a/mishards/server.py b/mishards/server.py index 59ea7db46b..d2f88cf592 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -43,5 +43,5 @@ class Server: def stop(self): logger.info('Server is shuting down ......') self.exit_flag = True - self.server.stop(0) + self.server_impl.stop(0) logger.info('Server is closed') diff --git a/mishards/service_founder.py b/mishards/service_founder.py new file mode 100644 index 0000000000..7fc47639e7 --- /dev/null +++ b/mishards/service_founder.py @@ -0,0 +1,273 @@ +import os, sys +if __name__ == '__main__': + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import re +import logging +import time +import copy +import threading +import queue +from functools import wraps +from kubernetes import client, config, watch + +from mishards.utils import singleton + +logger = logging.getLogger(__name__) + +incluster_namespace_path = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' + + +class K8SMixin: + def __init__(self, namespace, in_cluster=False, **kwargs): + self.namespace = namespace + self.in_cluster = in_cluster + self.kwargs = kwargs + self.v1 = kwargs.get('v1', None) + if not self.namespace: + self.namespace = open(incluster_namespace_path).read() + + if not self.v1: + config.load_incluster_config() if self.in_cluster else config.load_kube_config() + self.v1 = client.CoreV1Api() + + +class K8SServiceDiscover(threading.Thread, K8SMixin): + def __init__(self, message_queue, namespace, label_selector, in_cluster=False, **kwargs): + K8SMixin.__init__(self, namespace=namespace, in_cluster=in_cluster, **kwargs) + threading.Thread.__init__(self) + self.queue = message_queue + self.terminate = False + self.label_selector = label_selector + self.poll_interval = kwargs.get('poll_interval', 5) + + def run(self): + while not self.terminate: + try: + pods = self.v1.list_namespaced_pod(namespace=self.namespace, label_selector=self.label_selector) + event_message = { + 'eType': 'PodHeartBeat', + 'events': [] + } + for item in pods.items: + pod = self.v1.read_namespaced_pod(name=item.metadata.name, namespace=self.namespace) + name = pod.metadata.name + ip = pod.status.pod_ip + phase = pod.status.phase + reason = pod.status.reason + message = pod.status.message + ready = True if phase == 'Running' else False + + pod_event = dict( + pod=name, + ip=ip, + ready=ready, + reason=reason, + message=message + ) + + event_message['events'].append(pod_event) + + self.queue.put(event_message) + + + except Exception as exc: + logger.error(exc) + + time.sleep(self.poll_interval) + + def stop(self): + self.terminate = True + + +class K8SEventListener(threading.Thread, K8SMixin): + def __init__(self, message_queue, namespace, in_cluster=False, **kwargs): + K8SMixin.__init__(self, namespace=namespace, in_cluster=in_cluster, **kwargs) + threading.Thread.__init__(self) + self.queue = message_queue + self.terminate = False + self.at_start_up = True + self._stop_event = threading.Event() + + def stop(self): + self.terminate = True + self._stop_event.set() + + def run(self): + resource_version = '' + w = watch.Watch() + for event in w.stream(self.v1.list_namespaced_event, namespace=self.namespace, + field_selector='involvedObject.kind=Pod'): + if self.terminate: + break + + resource_version = int(event['object'].metadata.resource_version) + + info = dict( + eType='WatchEvent', + pod=event['object'].involved_object.name, + reason=event['object'].reason, + message=event['object'].message, + start_up=self.at_start_up, + ) + self.at_start_up = False + # logger.info('Received event: {}'.format(info)) + self.queue.put(info) + + +class EventHandler(threading.Thread): + def __init__(self, mgr, message_queue, namespace, pod_patt, **kwargs): + threading.Thread.__init__(self) + self.mgr = mgr + self.queue = message_queue + self.kwargs = kwargs + self.terminate = False + self.pod_patt = re.compile(pod_patt) + self.namespace = namespace + + def stop(self): + self.terminate = True + + def on_drop(self, event, **kwargs): + pass + + def on_pod_started(self, event, **kwargs): + try_cnt = 3 + pod = None + while try_cnt > 0: + try_cnt -= 1 + try: + pod = self.mgr.v1.read_namespaced_pod(name=event['pod'], namespace=self.namespace) + if not pod.status.pod_ip: + time.sleep(0.5) + continue + break + except client.rest.ApiException as exc: + time.sleep(0.5) + + if try_cnt <= 0 and not pod: + if not event['start_up']: + logger.error('Pod {} is started but cannot read pod'.format(event['pod'])) + return + elif try_cnt <= 0 and not pod.status.pod_ip: + logger.warn('NoPodIPFoundError') + return + + logger.info('Register POD {} with IP {}'.format(pod.metadata.name, pod.status.pod_ip)) + self.mgr.add_pod(name=pod.metadata.name, ip=pod.status.pod_ip) + + def on_pod_killing(self, event, **kwargs): + logger.info('Unregister POD {}'.format(event['pod'])) + self.mgr.delete_pod(name=event['pod']) + + def on_pod_heartbeat(self, event, **kwargs): + names = self.mgr.conn_mgr.conn_names + + running_names = set() + for each_event in event['events']: + if each_event['ready']: + self.mgr.add_pod(name=each_event['pod'], ip=each_event['ip']) + running_names.add(each_event['pod']) + else: + self.mgr.delete_pod(name=each_event['pod']) + + to_delete = names - running_names + for name in to_delete: + self.mgr.delete_pod(name) + + logger.info(self.mgr.conn_mgr.conn_names) + + def handle_event(self, event): + if event['eType'] == 'PodHeartBeat': + return self.on_pod_heartbeat(event) + + if not event or (event['reason'] not in ('Started', 'Killing')): + return self.on_drop(event) + + if not re.match(self.pod_patt, event['pod']): + return self.on_drop(event) + + logger.info('Handling event: {}'.format(event)) + + if event['reason'] == 'Started': + return self.on_pod_started(event) + + return self.on_pod_killing(event) + + def run(self): + while not self.terminate: + try: + event = self.queue.get(timeout=1) + self.handle_event(event) + except queue.Empty: + continue + +@singleton +class ServiceFounder(object): + def __init__(self, conn_mgr, namespace, pod_patt, label_selector, in_cluster=False, **kwargs): + self.namespace = namespace + self.kwargs = kwargs + self.queue = queue.Queue() + self.in_cluster = in_cluster + + self.conn_mgr = conn_mgr + + if not self.namespace: + self.namespace = open(incluster_namespace_path).read() + + config.load_incluster_config() if self.in_cluster else config.load_kube_config() + self.v1 = client.CoreV1Api() + + self.listener = K8SEventListener( + message_queue=self.queue, + namespace=self.namespace, + in_cluster=self.in_cluster, + v1=self.v1, + **kwargs + ) + + self.pod_heartbeater = K8SServiceDiscover( + message_queue=self.queue, + namespace=namespace, + label_selector=label_selector, + in_cluster=self.in_cluster, + v1=self.v1, + **kwargs + ) + + self.event_handler = EventHandler(mgr=self, + message_queue=self.queue, + namespace=self.namespace, + pod_patt=pod_patt, **kwargs) + + def add_pod(self, name, ip): + self.conn_mgr.register(name, 'tcp://{}:19530'.format(ip)) + + def delete_pod(self, name): + self.conn_mgr.unregister(name) + + def start(self): + self.listener.daemon = True + self.listener.start() + self.event_handler.start() + while self.listener.at_start_up: + time.sleep(1) + + self.pod_heartbeater.start() + + def stop(self): + self.listener.stop() + self.pod_heartbeater.stop() + self.event_handler.stop() + + +if __name__ == '__main__': + from mishards import connect_mgr + logging.basicConfig(level=logging.INFO) + t = ServiceFounder(namespace='xp', conn_mgr=connect_mgr, pod_patt=".*-ro-servers-.*", label_selector='tier=ro-servers', in_cluster=False) + t.start() + cnt = 2 + while cnt > 0: + time.sleep(2) + cnt -= 1 + t.stop() diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 89ae2cd36c..516359f27c 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -11,6 +11,7 @@ from milvus.client import types import settings from grpc_utils.grpc_args_parser import GrpcArgsParser as Parser +import exceptions logger = logging.getLogger(__name__) @@ -30,7 +31,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def query_conn(self, name): conn = self.conn_mgr.conn(name) - conn and conn.on_connect() + if not conn: + raise exceptions.ConnectionNotFoundError(name) + conn.on_connect() return conn.conn def _format_date(self, start, end): @@ -51,7 +54,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def _get_routing_file_ids(self, table_id, range_array): return { - 'TEST': { + 'milvus-ro-servers-0': { 'table_id': table_id, 'file_ids': [123] } diff --git a/mishards/settings.py b/mishards/settings.py index 4d87e69fe3..c4466da6ec 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -7,7 +7,6 @@ env = Env() env.read_env() DEBUG = env.bool('DEBUG', False) -TESTING = env.bool('TESTING', False) METADATA_URI = env.str('METADATA_URI', '') @@ -26,6 +25,16 @@ SEARCH_WORKER_SIZE = env.int('SEARCH_WORKER_SIZE', 10) SERVER_PORT = env.int('SERVER_PORT', 19530) WOSERVER = env.str('WOSERVER') +SD_NAMESPACE = env.str('SD_NAMESPACE', '') +SD_IN_CLUSTER = env.bool('SD_IN_CLUSTER', False) +SD_POLL_INTERVAL = env.int('SD_POLL_INTERVAL', 5) +SD_ROSERVER_POD_PATT = env.str('SD_ROSERVER_POD_PATT', '') +SD_LABEL_SELECTOR = env.str('SD_LABEL_SELECTOR', '') + +TESTING = env.bool('TESTING', False) +TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') + + if __name__ == '__main__': import logging logger = logging.getLogger(__name__) From 099317edeeea5db14be23709736a8a13ffe4933a Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 15:43:42 +0800 Subject: [PATCH 007/126] add models --- manager.py | 14 +++++++ mishards/__init__.py | 13 +++++-- mishards/connections.py | 5 +-- mishards/db_base.py | 27 +++++++++++++ mishards/exceptions.py | 2 +- mishards/main.py | 20 ++++------ mishards/models.py | 75 +++++++++++++++++++++++++++++++++++++ mishards/server.py | 4 +- mishards/service_handler.py | 6 +-- mishards/settings.py | 4 +- 10 files changed, 144 insertions(+), 26 deletions(-) create mode 100644 manager.py create mode 100644 mishards/db_base.py create mode 100644 mishards/models.py diff --git a/manager.py b/manager.py new file mode 100644 index 0000000000..0a2acad26f --- /dev/null +++ b/manager.py @@ -0,0 +1,14 @@ +import fire +from mishards import db + +class DBHandler: + @classmethod + def create_all(cls): + db.create_all() + + @classmethod + def drop_all(cls): + db.drop_all() + +if __name__ == '__main__': + fire.Fire(DBHandler) diff --git a/mishards/__init__.py b/mishards/__init__.py index b3a14cf7e3..c799e42fa4 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -1,8 +1,13 @@ -import settings -from connections import ConnectionMgr +from mishards import settings + +from mishards.db_base import DB +db = DB() +db.init_db(uri=settings.SQLALCHEMY_DATABASE_URI) + +from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() -from service_founder import ServiceFounder +from mishards.service_founder import ServiceFounder discover = ServiceFounder(namespace=settings.SD_NAMESPACE, conn_mgr=connect_mgr, pod_patt=settings.SD_ROSERVER_POD_PATT, @@ -10,5 +15,5 @@ discover = ServiceFounder(namespace=settings.SD_NAMESPACE, in_cluster=settings.SD_IN_CLUSTER, poll_interval=settings.SD_POLL_INTERVAL) -from server import Server +from mishards.server import Server grpc_server = Server(conn_mgr=connect_mgr) diff --git a/mishards/connections.py b/mishards/connections.py index 82dd082eac..9201ea2b08 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -4,9 +4,8 @@ from functools import wraps from contextlib import contextmanager from milvus import Milvus -import settings -import exceptions -from utils import singleton +from mishards import (settings, exceptions) +from mishards.utils import singleton logger = logging.getLogger(__name__) diff --git a/mishards/db_base.py b/mishards/db_base.py new file mode 100644 index 0000000000..702c9e57e9 --- /dev/null +++ b/mishards/db_base.py @@ -0,0 +1,27 @@ +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker, scoped_session + +class DB: + Model = declarative_base() + def __init__(self, uri=None): + uri and self.init_db(uri) + + def init_db(self, uri): + self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, + pool_pre_ping=True, + max_overflow=0) + self.uri = uri + session = sessionmaker() + session.configure(bind=self.engine) + self.db_session = session() + + @property + def Session(self): + return self.db_session + + def drop_all(self): + self.Model.metadata.drop_all(self.engine) + + def create_all(self): + self.Model.metadata.create_all(self.engine) diff --git a/mishards/exceptions.py b/mishards/exceptions.py index 1445d18769..0f89ecb52d 100644 --- a/mishards/exceptions.py +++ b/mishards/exceptions.py @@ -1,4 +1,4 @@ -import exception_codes as codes +import mishards.exception_codes as codes class BaseException(Exception): code = codes.INVALID_CODE diff --git a/mishards/main.py b/mishards/main.py index 0526f87ff8..5d96d8b499 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -1,21 +1,17 @@ -import sys -import os +import os, sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import settings -from mishards import (connect_mgr, +from mishards import ( + settings, + db, connect_mgr, discover, grpc_server as server) def main(): - try: - discover.start() - connect_mgr.register('WOSERVER', settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER) - server.run(port=settings.SERVER_PORT) - return 0 - except Exception as e: - logger.error(e) - return 1 + discover.start() + connect_mgr.register('WOSERVER', settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER) + server.run(port=settings.SERVER_PORT) + return 0 if __name__ == '__main__': sys.exit(main()) diff --git a/mishards/models.py b/mishards/models.py new file mode 100644 index 0000000000..c699f490dd --- /dev/null +++ b/mishards/models.py @@ -0,0 +1,75 @@ +import logging +from sqlalchemy import (Integer, Boolean, Text, + String, BigInteger, func, and_, or_, + Column) +from sqlalchemy.orm import relationship, backref + +from mishards import db + +logger = logging.getLogger(__name__) + +class TableFiles(db.Model): + FILE_TYPE_NEW = 0 + FILE_TYPE_RAW = 1 + FILE_TYPE_TO_INDEX = 2 + FILE_TYPE_INDEX = 3 + FILE_TYPE_TO_DELETE = 4 + FILE_TYPE_NEW_MERGE = 5 + FILE_TYPE_NEW_INDEX = 6 + FILE_TYPE_BACKUP = 7 + + __tablename__ = 'TableFiles' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + table_id = Column(String(50)) + engine_type = Column(Integer) + file_id = Column(String(50)) + file_type = Column(Integer) + file_size = Column(Integer, default=0) + row_count = Column(Integer, default=0) + updated_time = Column(BigInteger) + created_on = Column(BigInteger) + date = Column(Integer) + + table = relationship( + 'Table', + primaryjoin='and_(foreign(TableFile.table_id) == Table.table_id)', + backref=backref('files', uselist=True, lazy='dynamic') + ) + + +class Tables(db.Model): + TO_DELETE = 1 + NORMAL = 0 + + __tablename__ = 'Tables' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + table_id = Column(String(50), unique=True) + state = Column(Integer) + dimension = Column(Integer) + created_on = Column(Integer) + flag = Column(Integer, default=0) + index_file_size = Column(Integer) + engine_type = Column(Integer) + nlist = Column(Integer) + metric_type = Column(Integer) + + def files_to_search(self, date_range=None): + cond = or_( + TableFile.file_type==TableFile.FILE_TYPE_RAW, + TableFile.file_type==TableFile.FILE_TYPE_TO_INDEX, + TableFile.file_type==TableFile.FILE_TYPE_INDEX, + ) + if date_range: + cond = and_( + cond, + or_( + and_(TableFile.date>=d[0], TableFile.date Date: Wed, 18 Sep 2019 16:59:04 +0800 Subject: [PATCH 008/126] update for models --- manager.py | 13 ++++ mishards/__init__.py | 2 +- mishards/db_base.py | 11 ++- mishards/factories.py | 49 ++++++++++++ mishards/hash_ring.py | 150 ++++++++++++++++++++++++++++++++++++ mishards/models.py | 12 +-- mishards/service_founder.py | 4 +- mishards/service_handler.py | 39 ++++++++-- mishards/settings.py | 1 + 9 files changed, 262 insertions(+), 19 deletions(-) create mode 100644 mishards/factories.py create mode 100644 mishards/hash_ring.py diff --git a/manager.py b/manager.py index 0a2acad26f..31f5894d2d 100644 --- a/manager.py +++ b/manager.py @@ -1,5 +1,6 @@ import fire from mishards import db +from sqlalchemy import and_ class DBHandler: @classmethod @@ -10,5 +11,17 @@ class DBHandler: def drop_all(cls): db.drop_all() + @classmethod + def fun(cls, tid): + from mishards.factories import TablesFactory, TableFilesFactory, Tables + f = db.Session.query(Tables).filter(and_( + Tables.table_id==tid, + Tables.state!=Tables.TO_DELETE) + ).first() + print(f) + + # f1 = TableFilesFactory() + + if __name__ == '__main__': fire.Fire(DBHandler) diff --git a/mishards/__init__.py b/mishards/__init__.py index c799e42fa4..a792cd5ce9 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -2,7 +2,7 @@ from mishards import settings from mishards.db_base import DB db = DB() -db.init_db(uri=settings.SQLALCHEMY_DATABASE_URI) +db.init_db(uri=settings.SQLALCHEMY_DATABASE_URI, echo=settings.SQL_ECHO) from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() diff --git a/mishards/db_base.py b/mishards/db_base.py index 702c9e57e9..5ad1c394d7 100644 --- a/mishards/db_base.py +++ b/mishards/db_base.py @@ -1,15 +1,20 @@ +import logging from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, scoped_session +logger = logging.getLogger(__name__) + class DB: Model = declarative_base() - def __init__(self, uri=None): - uri and self.init_db(uri) + def __init__(self, uri=None, echo=False): + self.echo = echo + uri and self.init_db(uri, echo) - def init_db(self, uri): + def init_db(self, uri, echo=False): self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, pool_pre_ping=True, + echo=echo, max_overflow=0) self.uri = uri session = sessionmaker() diff --git a/mishards/factories.py b/mishards/factories.py new file mode 100644 index 0000000000..5bd059654a --- /dev/null +++ b/mishards/factories.py @@ -0,0 +1,49 @@ +import time +import datetime +import random +import factory +from factory.alchemy import SQLAlchemyModelFactory +from faker import Faker +from faker.providers import BaseProvider + +from mishards import db +from mishards.models import Tables, TableFiles + +class FakerProvider(BaseProvider): + def this_date(self): + t = datetime.datetime.today() + return (t.year - 1900) * 10000 + (t.month-1)*100 + t.day + +factory.Faker.add_provider(FakerProvider) + +class TablesFactory(SQLAlchemyModelFactory): + class Meta: + model = Tables + sqlalchemy_session = db.Session + sqlalchemy_session_persistence = 'commit' + + id = factory.Faker('random_number', digits=16, fix_len=True) + table_id = factory.Faker('uuid4') + state = factory.Faker('random_element', elements=(0,1,2,3)) + dimension = factory.Faker('random_element', elements=(256,512)) + created_on = int(time.time()) + index_file_size = 0 + engine_type = factory.Faker('random_element', elements=(0,1,2,3)) + metric_type = factory.Faker('random_element', elements=(0,1)) + nlist = 16384 + +class TableFilesFactory(SQLAlchemyModelFactory): + class Meta: + model = TableFiles + sqlalchemy_session = db.Session + sqlalchemy_session_persistence = 'commit' + + id = factory.Faker('random_number', digits=16, fix_len=True) + table = factory.SubFactory(TablesFactory) + engine_type = factory.Faker('random_element', elements=(0,1,2,3)) + file_id = factory.Faker('uuid4') + file_type = factory.Faker('random_element', elements=(0,1,2,3,4)) + file_size = factory.Faker('random_number') + updated_time = int(time.time()) + created_on = int(time.time()) + date = factory.Faker('this_date') diff --git a/mishards/hash_ring.py b/mishards/hash_ring.py new file mode 100644 index 0000000000..bfec108c5c --- /dev/null +++ b/mishards/hash_ring.py @@ -0,0 +1,150 @@ +import math +import sys +from bisect import bisect + +if sys.version_info >= (2, 5): + import hashlib + md5_constructor = hashlib.md5 +else: + import md5 + md5_constructor = md5.new + +class HashRing(object): + + def __init__(self, nodes=None, weights=None): + """`nodes` is a list of objects that have a proper __str__ representation. + `weights` is dictionary that sets weights to the nodes. The default + weight is that all nodes are equal. + """ + self.ring = dict() + self._sorted_keys = [] + + self.nodes = nodes + + if not weights: + weights = {} + self.weights = weights + + self._generate_circle() + + def _generate_circle(self): + """Generates the circle. + """ + total_weight = 0 + for node in self.nodes: + total_weight += self.weights.get(node, 1) + + for node in self.nodes: + weight = 1 + + if node in self.weights: + weight = self.weights.get(node) + + factor = math.floor((40*len(self.nodes)*weight) / total_weight); + + for j in range(0, int(factor)): + b_key = self._hash_digest( '%s-%s' % (node, j) ) + + for i in range(0, 3): + key = self._hash_val(b_key, lambda x: x+i*4) + self.ring[key] = node + self._sorted_keys.append(key) + + self._sorted_keys.sort() + + def get_node(self, string_key): + """Given a string key a corresponding node in the hash ring is returned. + + If the hash ring is empty, `None` is returned. + """ + pos = self.get_node_pos(string_key) + if pos is None: + return None + return self.ring[ self._sorted_keys[pos] ] + + def get_node_pos(self, string_key): + """Given a string key a corresponding node in the hash ring is returned + along with it's position in the ring. + + If the hash ring is empty, (`None`, `None`) is returned. + """ + if not self.ring: + return None + + key = self.gen_key(string_key) + + nodes = self._sorted_keys + pos = bisect(nodes, key) + + if pos == len(nodes): + return 0 + else: + return pos + + def iterate_nodes(self, string_key, distinct=True): + """Given a string key it returns the nodes as a generator that can hold the key. + + The generator iterates one time through the ring + starting at the correct position. + + if `distinct` is set, then the nodes returned will be unique, + i.e. no virtual copies will be returned. + """ + if not self.ring: + yield None, None + + returned_values = set() + def distinct_filter(value): + if str(value) not in returned_values: + returned_values.add(str(value)) + return value + + pos = self.get_node_pos(string_key) + for key in self._sorted_keys[pos:]: + val = distinct_filter(self.ring[key]) + if val: + yield val + + for i, key in enumerate(self._sorted_keys): + if i < pos: + val = distinct_filter(self.ring[key]) + if val: + yield val + + def gen_key(self, key): + """Given a string key it returns a long value, + this long value represents a place on the hash ring. + + md5 is currently used because it mixes well. + """ + b_key = self._hash_digest(key) + return self._hash_val(b_key, lambda x: x) + + def _hash_val(self, b_key, entry_fn): + return (( b_key[entry_fn(3)] << 24) + |(b_key[entry_fn(2)] << 16) + |(b_key[entry_fn(1)] << 8) + | b_key[entry_fn(0)] ) + + def _hash_digest(self, key): + m = md5_constructor() + key = key.encode() + m.update(key) + return m.digest() + +if __name__ == '__main__': + from collections import defaultdict + servers = ['192.168.0.246:11212', + '192.168.0.247:11212', + '192.168.0.248:11212', + '192.168.0.249:11212'] + + ring = HashRing(servers) + keys = ['{}'.format(i) for i in range(100)] + mapped = defaultdict(list) + for k in keys: + server = ring.get_node(k) + mapped[server].append(k) + + for k,v in mapped.items(): + print(k, v) diff --git a/mishards/models.py b/mishards/models.py index c699f490dd..0f7bb603ae 100644 --- a/mishards/models.py +++ b/mishards/models.py @@ -32,8 +32,8 @@ class TableFiles(db.Model): date = Column(Integer) table = relationship( - 'Table', - primaryjoin='and_(foreign(TableFile.table_id) == Table.table_id)', + 'Tables', + primaryjoin='and_(foreign(TableFiles.table_id) == Tables.table_id)', backref=backref('files', uselist=True, lazy='dynamic') ) @@ -57,15 +57,15 @@ class Tables(db.Model): def files_to_search(self, date_range=None): cond = or_( - TableFile.file_type==TableFile.FILE_TYPE_RAW, - TableFile.file_type==TableFile.FILE_TYPE_TO_INDEX, - TableFile.file_type==TableFile.FILE_TYPE_INDEX, + TableFiles.file_type==TableFiles.FILE_TYPE_RAW, + TableFiles.file_type==TableFiles.FILE_TYPE_TO_INDEX, + TableFiles.file_type==TableFiles.FILE_TYPE_INDEX, ) if date_range: cond = and_( cond, or_( - and_(TableFile.date>=d[0], TableFile.date=d[0], TableFiles.date Date: Wed, 18 Sep 2019 17:09:03 +0800 Subject: [PATCH 009/126] fix session bug --- mishards/db_base.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mishards/db_base.py b/mishards/db_base.py index 5ad1c394d7..ffbe29f94f 100644 --- a/mishards/db_base.py +++ b/mishards/db_base.py @@ -17,13 +17,12 @@ class DB: echo=echo, max_overflow=0) self.uri = uri - session = sessionmaker() - session.configure(bind=self.engine) - self.db_session = session() + self.session = sessionmaker() + self.session.configure(bind=self.engine) @property def Session(self): - return self.db_session + return self.session() def drop_all(self): self.Model.metadata.drop_all(self.engine) From f22204878a1b7fefda9cb258ce4002c01100a86f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 17:09:38 +0800 Subject: [PATCH 010/126] fix session bug --- mishards/service_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 7dd4380d97..eb2951be5e 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -60,7 +60,6 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): Tables.table_id==table_id, Tables.state!=Tables.TO_DELETE )).first() - logger.error(table) if not table: raise exceptions.TableNotFoundError(table_id) From 0ad5c32c46f29fd5486d02e30f74cc06f17c4eb6 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 18:00:30 +0800 Subject: [PATCH 011/126] update requirements.txt --- requirements.txt | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..8cedabdf7b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,33 @@ +environs==4.2.0 +factory-boy==2.12.0 +Faker==1.0.7 +fire==0.1.3 +google-auth==1.6.3 +grpcio==1.22.0 +grpcio-tools==1.22.0 +kubernetes==10.0.1 +MarkupSafe==1.1.1 +marshmallow==2.19.5 +pymysql==0.9.3 +protobuf==3.9.1 +py==1.8.0 +pyasn1==0.4.7 +pyasn1-modules==0.2.6 +pylint==2.3.1 +#pymilvus-test==0.2.15 +pymilvus==0.2.0 +pyparsing==2.4.0 +pytest==4.6.3 +pytest-level==0.1.1 +pytest-print==0.1.2 +pytest-repeat==0.8.0 +pytest-timeout==1.3.3 +python-dateutil==2.8.0 +python-dotenv==0.10.3 +pytz==2019.1 +requests==2.22.0 +requests-oauthlib==1.2.0 +rsa==4.0 +six==1.12.0 +SQLAlchemy==1.3.5 +urllib3==1.25.3 From c042d2f3234038e01a00e7bc0631b2e653387642 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 18:16:51 +0800 Subject: [PATCH 012/126] add dockerfile --- Dockerfile | 10 ++++++++++ build.sh | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 Dockerfile create mode 100755 build.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..594640619e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.6 +RUN apt update && apt install -y \ + less \ + telnet +RUN mkdir /source +WORKDIR /source +ADD ./requirements.txt ./ +RUN pip install -r requirements.txt +COPY . . +CMD python mishards/main.py diff --git a/build.sh b/build.sh new file mode 100755 index 0000000000..2b3c89bbf9 --- /dev/null +++ b/build.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +BOLD=`tput bold` +NORMAL=`tput sgr0` +YELLOW='\033[1;33m' +ENDC='\033[0m' + +function build_image() { + dockerfile=$1 + remote_registry=$2 + tagged=$2 + buildcmd="docker build -t ${tagged} -f ${dockerfile} ." + echo -e "${BOLD}$buildcmd${NORMAL}" + $buildcmd + pushcmd="docker push ${remote_registry}" + echo -e "${BOLD}$pushcmd${NORMAL}" + $pushcmd + echo -e "${YELLOW}${BOLD}Image: ${remote_registry}${NORMAL}${ENDC}" +} + +case "$1" in + +all) + version="" + [[ ! -z $2 ]] && version=":${2}" + build_image "Dockerfile" "registry.zilliz.com/milvus/mishards${version}" "registry.zilliz.com/milvus/mishards" + ;; +*) + echo "Usage: [option...] {base | apps}" + echo "all, Usage: build.sh all [tagname|] => registry.zilliz.com/milvus/mishards:\${tagname}" + ;; +esac From dd59127e9722fcdc9d4b19f17358fb65a73691d4 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 18:17:51 +0800 Subject: [PATCH 013/126] add env example --- mishards/.env.example | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 mishards/.env.example diff --git a/mishards/.env.example b/mishards/.env.example new file mode 100644 index 0000000000..22406c7f34 --- /dev/null +++ b/mishards/.env.example @@ -0,0 +1,14 @@ +DEBUG=False + +WOSERVER=tcp://127.0.0.1:19530 +TESTING_WOSERVER=tcp://127.0.0.1:19530 +SERVER_PORT=19531 + +SD_NAMESPACE=xp +SD_IN_CLUSTER=False +SD_POLL_INTERVAL=5 +SD_ROSERVER_POD_PATT=.*-ro-servers-.* +SD_LABEL_SELECTOR=tier=ro-servers + +SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 +SQL_ECHO=True From cee3d7e20ce1141eb01091d6c262d6e0a771fbf1 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 20:16:02 +0800 Subject: [PATCH 014/126] remove dummy settings --- mishards/settings.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mishards/settings.py b/mishards/settings.py index 62948e2fa9..2bf7e96a8f 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -8,8 +8,6 @@ env.read_env() DEBUG = env.bool('DEBUG', False) -METADATA_URI = env.str('METADATA_URI', '') - LOG_LEVEL = env.str('LOG_LEVEL', 'DEBUG' if DEBUG else 'INFO') LOG_PATH = env.str('LOG_PATH', '/tmp/mishards') LOG_NAME = env.str('LOG_NAME', 'logfile') From e04e00df4b0d5c1358da941267203880c5f2bd96 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 20:16:18 +0800 Subject: [PATCH 015/126] add docker ignore file --- .dockerignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..d1012a3afd --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +.git +.gitignore +.env + +mishards/.env From e242a1cc91fe4b3afea1dc88f4a42b1817f5b5b2 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 18 Sep 2019 20:16:46 +0800 Subject: [PATCH 016/126] temp support dns addr --- mishards/connections.py | 1 + mishards/main.py | 8 +++++++- mishards/service_handler.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/mishards/connections.py b/mishards/connections.py index 9201ea2b08..c6323f66f8 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -1,5 +1,6 @@ import logging import threading +import socket from functools import wraps from contextlib import contextmanager from milvus import Milvus diff --git a/mishards/main.py b/mishards/main.py index 5d96d8b499..e9c47f9edf 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -1,6 +1,9 @@ import os, sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from urllib.parse import urlparse +import socket + from mishards import ( settings, db, connect_mgr, @@ -9,7 +12,10 @@ from mishards import ( def main(): discover.start() - connect_mgr.register('WOSERVER', settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER) + woserver = settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER + url = urlparse(woserver) + connect_mgr.register('WOSERVER', + '{}://{}:{}'.format(url.scheme, socket.gethostbyname(url.hostname), url.port)) server.run(port=settings.SERVER_PORT) return 0 diff --git a/mishards/service_handler.py b/mishards/service_handler.py index eb2951be5e..ac70440c47 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -145,7 +145,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): for res in rs: res.result() - reverse = table_meta.metric_type == types.MetricType.L2 + reverse = table_meta.metric_type == types.MetricType.IP return self._do_merge(all_topk_results, topk, reverse=reverse) def CreateTable(self, request, context): From 512e2b31c46708401c3cba3f3f65c0cc092feef6 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 19 Sep 2019 10:17:00 +0800 Subject: [PATCH 017/126] add pre run handlers --- mishards/main.py | 10 ---------- mishards/server.py | 28 +++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/mishards/main.py b/mishards/main.py index e9c47f9edf..7fac55dfa2 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -1,21 +1,11 @@ import os, sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from urllib.parse import urlparse -import socket - from mishards import ( settings, - db, connect_mgr, - discover, grpc_server as server) def main(): - discover.start() - woserver = settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER - url = urlparse(woserver) - connect_mgr.register('WOSERVER', - '{}://{}:{}'.format(url.scheme, socket.gethostbyname(url.hostname), url.port)) server.run(port=settings.SERVER_PORT) return 0 diff --git a/mishards/server.py b/mishards/server.py index 185ed3c957..19cca2c18a 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -1,17 +1,21 @@ import logging import grpc import time +import socket +from urllib.parse import urlparse +from functools import wraps from concurrent import futures from grpc._cython import cygrpc from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server from mishards.service_handler import ServiceHandler -import mishards.settings +from mishards import settings, discover logger = logging.getLogger(__name__) class Server: def __init__(self, conn_mgr, port=19530, max_workers=10, **kwargs): + self.pre_run_handlers = set() self.exit_flag = False self.port = int(port) self.conn_mgr = conn_mgr @@ -21,6 +25,27 @@ class Server: (cygrpc.ChannelArgKey.max_receive_message_length, -1)] ) + self.register_pre_run_handler(self.pre_run_handler) + + def pre_run_handler(self): + woserver = settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER + url = urlparse(woserver) + ip = socket.gethostbyname(url.hostname) + logger.error(ip) + socket.inet_pton(socket.AF_INET, ip) + self.conn_mgr.register('WOSERVER', + '{}://{}:{}'.format(url.scheme, ip, url.port)) + + def register_pre_run_handler(self, func): + logger.info('Regiterring {} into server pre_run_handlers'.format(func)) + self.pre_run_handlers.add(func) + return func + + def on_pre_run(self): + for handler in self.pre_run_handlers: + handler() + discover.start() + def start(self, port=None): add_MilvusServiceServicer_to_server(ServiceHandler(conn_mgr=self.conn_mgr), self.server_impl) self.server_impl.add_insecure_port("[::]:{}".format(str(port or self._port))) @@ -29,6 +54,7 @@ class Server: def run(self, port): logger.info('Milvus server start ......') port = port or self.port + self.on_pre_run() self.start(port) logger.info('Successfully') From d3e79f539ea64e78e6b05910fd607f16c1221e71 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 19 Sep 2019 10:18:46 +0800 Subject: [PATCH 018/126] add pre run handlers --- mishards/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mishards/server.py b/mishards/server.py index 19cca2c18a..9966360d47 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -31,7 +31,6 @@ class Server: woserver = settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER url = urlparse(woserver) ip = socket.gethostbyname(url.hostname) - logger.error(ip) socket.inet_pton(socket.AF_INET, ip) self.conn_mgr.register('WOSERVER', '{}://{}:{}'.format(url.scheme, ip, url.port)) From 5249b80b0da577bde03da99f884957a5e6d3aad0 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 19 Sep 2019 10:22:07 +0800 Subject: [PATCH 019/126] remove dummy commented code --- mishards/service_handler.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index ac70440c47..f88655d2d6 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -246,32 +246,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): for query_range in request.query_range_array: query_range_array.append( Range(query_range.start_value, query_range.end_value)) - # except (TableNotFoundException, exceptions.GRPCInvlidArgument) as exc: - # return milvus_pb2.TopKQueryResultList( - # status=status_pb2.Status(error_code=exc.code, reason=exc.message) - # ) - # except Exception as e: - # return milvus_pb2.TopKQueryResultList( - # status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=str(e)) - # ) results = self._do_query(table_name, table_meta, query_record_array, topk, nprobe, query_range_array) - # try: - # results = workflow.query_vectors(table_name, table_meta, query_record_array, topk, - # nprobe, query_range_array) - # except (exceptions.GRPCQueryInvalidRangeException, TableNotFoundException) as exc: - # return milvus_pb2.TopKQueryResultList( - # status=status_pb2.Status(error_code=exc.code, reason=exc.message) - # ) - # except exceptions.ServiceNotFoundException as exc: - # return milvus_pb2.TopKQueryResultList( - # status=status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, reason=exc.message) - # ) - # except Exception as e: - # logger.error(e) - # results = workflow.query_vectors(table_name, table_meta, query_record_array, - # topk, nprobe, query_range_array) now = time.time() logger.info('SearchVector takes: {}'.format(now - start)) From 09d3e7844936dfcab6ad99e93218a581e4eb095c Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 19 Sep 2019 19:41:20 +0800 Subject: [PATCH 020/126] add exception handler --- mishards/connections.py | 21 ++++++++------ mishards/exception_handlers.py | 35 +++++++++++++++++++++++ mishards/exceptions.py | 3 +- mishards/server.py | 10 +++++++ mishards/service_handler.py | 51 ++++++++++++++++++++-------------- 5 files changed, 89 insertions(+), 31 deletions(-) create mode 100644 mishards/exception_handlers.py diff --git a/mishards/connections.py b/mishards/connections.py index c6323f66f8..365dc60125 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -24,14 +24,14 @@ class Connection: def __str__(self): return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) - def _connect(self): + def _connect(self, metadata=None): try: self.conn.connect(uri=self.uri) except Exception as e: if not self.error_handlers: - raise exceptions.ConnectionConnectError(e) + raise exceptions.ConnectionConnectError(message=str(e), metadata=metadata) for handler in self.error_handlers: - handler(e) + handler(e, metadata=metadata) @property def can_retry(self): @@ -47,14 +47,15 @@ class Connection: else: logger.warn('{} is retrying {}'.format(self, self.retried)) - def on_connect(self): + def on_connect(self, metadata=None): while not self.connected and self.can_retry: self.retried += 1 self.on_retry() - self._connect() + self._connect(metadata=metadata) if not self.can_retry and not self.connected: - raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry)) + raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry, + metadata=metadata)) self.retried = 0 @@ -81,14 +82,15 @@ class ConnectionMgr: def conn_names(self): return set(self.metas.keys()) - set(['WOSERVER']) - def conn(self, name, throw=False): + def conn(self, name, metadata, throw=False): c = self.conns.get(name, None) if not c: url = self.metas.get(name, None) if not url: if not throw: return None - raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name)) + raise exceptions.ConnectionNotFoundError(message='Connection {} not found'.format(name), + metadata=metadata) this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) threaded = { threading.get_ident() : this_conn @@ -103,7 +105,8 @@ class ConnectionMgr: if not url: if not throw: return None - raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name)) + raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name), + metadata=metadata) this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) c[tid] = this_conn return this_conn diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py new file mode 100644 index 0000000000..3de0918be4 --- /dev/null +++ b/mishards/exception_handlers.py @@ -0,0 +1,35 @@ +import logging +from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 +from mishards import server, exceptions + +logger = logging.getLogger(__name__) + +def resp_handler(err, error_code): + if not isinstance(err, exceptions.BaseException): + return status_pb2.Status(error_code=error_code, reason=str(err)) + + status = status_pb2.Status(error_code=error_code, reason=err.message) + + if err.metadata is None: + return status + + resp_class = err.metadata.get('resp_class', None) + if not resp_class: + return status + + if resp_class == milvus_pb2.BoolReply: + return resp_class(status=status, bool_reply=False) + + if resp_class == milvus_pb2.VectorIds: + return resp_class(status=status, vector_id_array=[]) + + if resp_class == milvus_pb2.TopKQueryResultList: + return resp_class(status=status, topk_query_result=[]) + + status.error_code = status_pb2.UNEXPECTED_ERROR + return status + +@server.error_handler(exceptions.TableNotFoundError) +def TableNotFoundErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.TABLE_NOT_EXISTS) diff --git a/mishards/exceptions.py b/mishards/exceptions.py index 0f89ecb52d..1579fefcf4 100644 --- a/mishards/exceptions.py +++ b/mishards/exceptions.py @@ -3,8 +3,9 @@ import mishards.exception_codes as codes class BaseException(Exception): code = codes.INVALID_CODE message = 'BaseException' - def __init__(self, message=''): + def __init__(self, message='', metadata=None): self.message = self.__class__.__name__ if not message else message + self.metadata = metadata class ConnectionConnectError(BaseException): code = codes.CONNECT_ERROR_CODE diff --git a/mishards/server.py b/mishards/server.py index 9966360d47..b000016e29 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -2,6 +2,7 @@ import logging import grpc import time import socket +import inspect from urllib.parse import urlparse from functools import wraps from concurrent import futures @@ -16,6 +17,7 @@ logger = logging.getLogger(__name__) class Server: def __init__(self, conn_mgr, port=19530, max_workers=10, **kwargs): self.pre_run_handlers = set() + self.error_handler = {} self.exit_flag = False self.port = int(port) self.conn_mgr = conn_mgr @@ -40,6 +42,14 @@ class Server: self.pre_run_handlers.add(func) return func + def errorhandler(self, exception): + if inspect.isclass(exception) and issubclass(exception, Exception): + def wrapper(func): + self.error_handlers[exception] = func + return func + return wrapper + return exception + def on_pre_run(self): for handler in self.pre_run_handlers: handler() diff --git a/mishards/service_handler.py b/mishards/service_handler.py index f88655d2d6..5346be91d8 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -25,18 +25,17 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): self.conn_mgr = conn_mgr self.table_meta = {} - @property - def connection(self): + def connection(self, metadata=None): conn = self.conn_mgr.conn('WOSERVER') if conn: - conn.on_connect() + conn.on_connect(metadata=metadata) return conn.conn - def query_conn(self, name): - conn = self.conn_mgr.conn(name) + def query_conn(self, name, metadata=None): + conn = self.conn_mgr.conn(name, metadata=metadata) if not conn: - raise exceptions.ConnectionNotFoundError(name) - conn.on_connect() + raise exceptions.ConnectionNotFoundError(name, metadata=metadata) + conn.on_connect(metadata=metadata) return conn.conn def _format_date(self, start, end): @@ -55,14 +54,14 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return self._format_date(start, end) - def _get_routing_file_ids(self, table_id, range_array): + def _get_routing_file_ids(self, table_id, range_array, metadata=None): table = db.Session.query(Tables).filter(and_( Tables.table_id==table_id, Tables.state!=Tables.TO_DELETE )).first() if not table: - raise exceptions.TableNotFoundError(table_id) + raise exceptions.TableNotFoundError(table_id, metadata=metadata) files = table.files_to_search(range_array) servers = self.conn_mgr.conn_names @@ -84,7 +83,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return routing - def _do_merge(self, files_n_topk_results, topk, reverse=False): + def _do_merge(self, files_n_topk_results, topk, reverse=False, **kwargs): if not files_n_topk_results: return [] @@ -111,9 +110,11 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def _do_query(self, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): range_array = [self._range_to_date(r) for r in range_array] if range_array else None - routing = self._get_routing_file_ids(table_id, range_array) + routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) logger.info('Routing: {}'.format(routing)) + metadata = kwargs.get('metadata', None) + rs = [] all_topk_results = [] @@ -124,7 +125,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): addr, query_params, len(vectors), topk, nprobe )) - conn = self.query_conn(addr) + conn = self.query_conn(addr, metadata=metadata) start = time.time() ret = conn.search_vectors_in_files(table_name=query_params['table_id'], file_ids=query_params['file_ids'], @@ -146,7 +147,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): res.result() reverse = table_meta.metric_type == types.MetricType.IP - return self._do_merge(all_topk_results, topk, reverse=reverse) + return self._do_merge(all_topk_results, topk, reverse=reverse, metadata=metadata) def CreateTable(self, request, context): _status, _table_schema = Parser.parse_proto_TableSchema(request) @@ -156,7 +157,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('CreateTable {}'.format(_table_schema['table_name'])) - _status = self.connection.create_table(_table_schema) + _status = self.connection().create_table(_table_schema) return status_pb2.Status(error_code=_status.code, reason=_status.message) @@ -171,7 +172,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('HasTable {}'.format(_table_name)) - _bool = self.connection.has_table(_table_name) + _bool = self.connection(metadata={ + 'resp_class': milvus_pb2.BoolReply + }).has_table(_table_name) return milvus_pb2.BoolReply( status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="OK"), @@ -186,7 +189,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('DropTable {}'.format(_table_name)) - _status = self.connection.delete_table(_table_name) + _status = self.connection().delete_table(_table_name) return status_pb2.Status(error_code=_status.code, reason=_status.message) @@ -201,14 +204,16 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('CreateIndex {}'.format(_table_name)) # TODO: interface create_table incompleted - _status = self.connection.create_index(_table_name, _index) + _status = self.connection().create_index(_table_name, _index) return status_pb2.Status(error_code=_status.code, reason=_status.message) def Insert(self, request, context): logger.info('Insert') # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' - _status, _ids = self.connection.add_vectors(None, None, insert_param=request) + _status, _ids = self.connection(metadata={ + 'resp_class': milvus_pb2.VectorIds + }).add_vectors(None, None, insert_param=request) return milvus_pb2.VectorIds( status=status_pb2.Status(error_code=_status.code, reason=_status.message), vector_id_array=_ids @@ -227,10 +232,14 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): raise exceptions.GRPCInvlidArgument('Invalid nprobe: {}'.format(nprobe)) table_meta = self.table_meta.get(table_name, None) + + metadata = { + 'resp_class': milvus_pb2.TopKQueryResultList + } if not table_meta: - status, info = self.connection.describe_table(table_name) + status, info = self.connection(metadata=metadata).describe_table(table_name) if not status.OK(): - raise exceptions.TableNotFoundError(table_name) + raise exceptions.TableNotFoundError(table_name, metadata=metadata) self.table_meta[table_name] = info table_meta = info @@ -248,7 +257,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): Range(query_range.start_value, query_range.end_value)) results = self._do_query(table_name, table_meta, query_record_array, topk, - nprobe, query_range_array) + nprobe, query_range_array, metadata=metadata) now = time.time() logger.info('SearchVector takes: {}'.format(now - start)) From eb9174f2d91355c218c4e256a7361d68e776b79e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 09:56:19 +0800 Subject: [PATCH 021/126] optimize exception handlers --- mishards/__init__.py | 2 ++ mishards/exception_codes.py | 1 + mishards/exception_handlers.py | 12 +++++++++-- mishards/exceptions.py | 3 +++ mishards/grpc_utils/__init__.py | 3 +++ mishards/server.py | 26 ++++++++++++++++++++++-- mishards/service_handler.py | 36 ++++++++++++++++++++++++++------- 7 files changed, 72 insertions(+), 11 deletions(-) diff --git a/mishards/__init__.py b/mishards/__init__.py index a792cd5ce9..8105e7edc8 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -17,3 +17,5 @@ discover = ServiceFounder(namespace=settings.SD_NAMESPACE, from mishards.server import Server grpc_server = Server(conn_mgr=connect_mgr) + +from mishards import exception_handlers diff --git a/mishards/exception_codes.py b/mishards/exception_codes.py index 32b29bdfab..37492f25d4 100644 --- a/mishards/exception_codes.py +++ b/mishards/exception_codes.py @@ -4,3 +4,4 @@ CONNECT_ERROR_CODE = 10001 CONNECTTION_NOT_FOUND_CODE = 10002 TABLE_NOT_FOUND_CODE = 20001 +INVALID_ARGUMENT = 20002 diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py index 3de0918be4..6207f2088c 100644 --- a/mishards/exception_handlers.py +++ b/mishards/exception_handlers.py @@ -1,6 +1,6 @@ import logging from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 -from mishards import server, exceptions +from mishards import grpc_server as server, exceptions logger = logging.getLogger(__name__) @@ -26,10 +26,18 @@ def resp_handler(err, error_code): if resp_class == milvus_pb2.TopKQueryResultList: return resp_class(status=status, topk_query_result=[]) + if resp_class == milvus_pb2.TableRowCount: + return resp_class(status=status, table_row_count=-1) + status.error_code = status_pb2.UNEXPECTED_ERROR return status -@server.error_handler(exceptions.TableNotFoundError) +@server.errorhandler(exceptions.TableNotFoundError) def TableNotFoundErrorHandler(err): logger.error(err) return resp_handler(err, status_pb2.TABLE_NOT_EXISTS) + +@server.errorhandler(exceptions.InvalidArgumentError) +def InvalidArgumentErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.ILLEGAL_ARGUMENT) diff --git a/mishards/exceptions.py b/mishards/exceptions.py index 1579fefcf4..4686cf674f 100644 --- a/mishards/exceptions.py +++ b/mishards/exceptions.py @@ -15,3 +15,6 @@ class ConnectionNotFoundError(BaseException): class TableNotFoundError(BaseException): code = codes.TABLE_NOT_FOUND_CODE + +class InvalidArgumentError(BaseException): + code = codes.INVALID_ARGUMENT diff --git a/mishards/grpc_utils/__init__.py b/mishards/grpc_utils/__init__.py index e69de29bb2..959d5549c7 100644 --- a/mishards/grpc_utils/__init__.py +++ b/mishards/grpc_utils/__init__.py @@ -0,0 +1,3 @@ +def mark_grpc_method(func): + setattr(func, 'grpc_method', True) + return func diff --git a/mishards/server.py b/mishards/server.py index b000016e29..9cca096b6b 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -7,6 +7,7 @@ from urllib.parse import urlparse from functools import wraps from concurrent import futures from grpc._cython import cygrpc +from grpc._channel import _Rendezvous, _UnaryUnaryMultiCallable from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server from mishards.service_handler import ServiceHandler from mishards import settings, discover @@ -17,7 +18,8 @@ logger = logging.getLogger(__name__) class Server: def __init__(self, conn_mgr, port=19530, max_workers=10, **kwargs): self.pre_run_handlers = set() - self.error_handler = {} + self.grpc_methods = set() + self.error_handlers = {} self.exit_flag = False self.port = int(port) self.conn_mgr = conn_mgr @@ -42,6 +44,18 @@ class Server: self.pre_run_handlers.add(func) return func + def wrap_method_with_errorhandler(self, func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + if e.__class__ in self.error_handlers: + return self.error_handlers[e.__class__](e) + raise + + return wrapper + def errorhandler(self, exception): if inspect.isclass(exception) and issubclass(exception, Exception): def wrapper(func): @@ -56,7 +70,8 @@ class Server: discover.start() def start(self, port=None): - add_MilvusServiceServicer_to_server(ServiceHandler(conn_mgr=self.conn_mgr), self.server_impl) + handler_class = self.add_error_handlers(ServiceHandler) + add_MilvusServiceServicer_to_server(handler_class(conn_mgr=self.conn_mgr), self.server_impl) self.server_impl.add_insecure_port("[::]:{}".format(str(port or self._port))) self.server_impl.start() @@ -80,3 +95,10 @@ class Server: self.exit_flag = True self.server_impl.stop(0) logger.info('Server is closed') + + def add_error_handlers(self, target): + for key, attr in target.__dict__.items(): + is_grpc_method = getattr(attr, 'grpc_method', False) + if is_grpc_method: + setattr(target, key, self.wrap_method_with_errorhandler(attr)) + return target diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 5346be91d8..acc04c5eee 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -12,6 +12,7 @@ from milvus.grpc_gen.milvus_pb2 import TopKQueryResult from milvus.client import types from mishards import (db, settings, exceptions) +from mishards.grpc_utils import mark_grpc_method from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser from mishards.models import Tables, TableFiles from mishards.hash_ring import HashRing @@ -24,9 +25,10 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def __init__(self, conn_mgr, *args, **kwargs): self.conn_mgr = conn_mgr self.table_meta = {} + self.error_handlers = {} def connection(self, metadata=None): - conn = self.conn_mgr.conn('WOSERVER') + conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) if conn: conn.on_connect(metadata=metadata) return conn.conn @@ -149,6 +151,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): reverse = table_meta.metric_type == types.MetricType.IP return self._do_merge(all_topk_results, topk, reverse=reverse, metadata=metadata) + @mark_grpc_method def CreateTable(self, request, context): _status, _table_schema = Parser.parse_proto_TableSchema(request) @@ -161,6 +164,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) + @mark_grpc_method def HasTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -181,6 +185,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): bool_reply=_bool ) + @mark_grpc_method def DropTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -193,6 +198,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) + @mark_grpc_method def CreateIndex(self, request, context): _status, unpacks = Parser.parse_proto_IndexParam(request) @@ -208,6 +214,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) + @mark_grpc_method def Insert(self, request, context): logger.info('Insert') # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' @@ -219,6 +226,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): vector_id_array=_ids ) + @mark_grpc_method def Search(self, request, context): table_name = request.table_name @@ -228,14 +236,16 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('Search {}: topk={} nprobe={}'.format(table_name, topk, nprobe)) - if nprobe > self.MAX_NPROBE or nprobe <= 0: - raise exceptions.GRPCInvlidArgument('Invalid nprobe: {}'.format(nprobe)) - - table_meta = self.table_meta.get(table_name, None) - metadata = { 'resp_class': milvus_pb2.TopKQueryResultList } + + if nprobe > self.MAX_NPROBE or nprobe <= 0: + raise exceptions.InvalidArgumentError(message='Invalid nprobe: {}'.format(nprobe), + metadata=metadata) + + table_meta = self.table_meta.get(table_name, None) + if not table_meta: status, info = self.connection(metadata=metadata).describe_table(table_name) if not status.OK(): @@ -268,9 +278,11 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): ) return topk_result_list + @mark_grpc_method def SearchInFiles(self, request, context): raise NotImplemented() + @mark_grpc_method def DescribeTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -304,6 +316,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): ) ) + @mark_grpc_method def CountTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -316,12 +329,16 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('CountTable {}'.format(_table_name)) - _status, _count = self.connection.get_table_row_count(_table_name) + metadata = { + 'resp_class': milvus_pb2.TableRowCount + } + _status, _count = self.connection(metadata=metadata).get_table_row_count(_table_name) return milvus_pb2.TableRowCount( status=status_pb2.Status(error_code=_status.code, reason=_status.message), table_row_count=_count if isinstance(_count, int) else -1) + @mark_grpc_method def Cmd(self, request, context): _status, _cmd = Parser.parse_proto_Command(request) logger.info('Cmd: {}'.format(_cmd)) @@ -341,6 +358,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): string_reply=_reply ) + @mark_grpc_method def ShowTables(self, request, context): logger.info('ShowTables') _status, _results = self.connection.show_tables() @@ -354,6 +372,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): table_name=_result ) + @mark_grpc_method def DeleteByRange(self, request, context): _status, unpacks = \ Parser.parse_proto_DeleteByRangeParam(request) @@ -367,6 +386,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status = self.connection.delete_vectors_by_range(_table_name, _start_date, _end_date) return status_pb2.Status(error_code=_status.code, reason=_status.message) + @mark_grpc_method def PreloadTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -377,6 +397,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status = self.connection.preload_table(_table_name) return status_pb2.Status(error_code=_status.code, reason=_status.message) + @mark_grpc_method def DescribeIndex(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -397,6 +418,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return milvus_pb2.IndexParam(table_name=_tablename, index=_index) + @mark_grpc_method def DropIndex(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) From 1144f6798dcef8ec6422a373f169ba72ddd11f34 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 10:20:25 +0800 Subject: [PATCH 022/126] fix bug in service handler --- mishards/service_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index acc04c5eee..128667d9b6 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -112,6 +112,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def _do_query(self, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): range_array = [self._range_to_date(r) for r in range_array] if range_array else None + metadata = kwargs.get('metadata', None) routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) logger.info('Routing: {}'.format(routing)) From 33fe3b1bdee22e56a4288a1f65cff50263323954 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 10:44:26 +0800 Subject: [PATCH 023/126] add more exception handlers --- mishards/exception_codes.py | 4 +++- mishards/exception_handlers.py | 13 +++++++++++++ mishards/exceptions.py | 8 +++++++- mishards/service_handler.py | 25 +++++++++++++++++-------- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/mishards/exception_codes.py b/mishards/exception_codes.py index 37492f25d4..ecb2469562 100644 --- a/mishards/exception_codes.py +++ b/mishards/exception_codes.py @@ -2,6 +2,8 @@ INVALID_CODE = -1 CONNECT_ERROR_CODE = 10001 CONNECTTION_NOT_FOUND_CODE = 10002 +DB_ERROR_CODE = 10003 TABLE_NOT_FOUND_CODE = 20001 -INVALID_ARGUMENT = 20002 +INVALID_ARGUMENT_CODE = 20002 +INVALID_DATE_RANGE_CODE = 20003 diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py index 6207f2088c..2518b64b3e 100644 --- a/mishards/exception_handlers.py +++ b/mishards/exception_handlers.py @@ -29,6 +29,9 @@ def resp_handler(err, error_code): if resp_class == milvus_pb2.TableRowCount: return resp_class(status=status, table_row_count=-1) + if resp_class == milvus_pb2.TableName: + return resp_class(status=status, table_name=[]) + status.error_code = status_pb2.UNEXPECTED_ERROR return status @@ -41,3 +44,13 @@ def TableNotFoundErrorHandler(err): def InvalidArgumentErrorHandler(err): logger.error(err) return resp_handler(err, status_pb2.ILLEGAL_ARGUMENT) + +@server.errorhandler(exceptions.DBError) +def DBErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.UNEXPECTED_ERROR) + +@server.errorhandler(exceptions.InvalidRangeError) +def InvalidArgumentErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.ILLEGAL_RANGE) diff --git a/mishards/exceptions.py b/mishards/exceptions.py index 4686cf674f..2aa2b39eb9 100644 --- a/mishards/exceptions.py +++ b/mishards/exceptions.py @@ -13,8 +13,14 @@ class ConnectionConnectError(BaseException): class ConnectionNotFoundError(BaseException): code = codes.CONNECTTION_NOT_FOUND_CODE +class DBError(BaseException): + code = codes.DB_ERROR_CODE + class TableNotFoundError(BaseException): code = codes.TABLE_NOT_FOUND_CODE class InvalidArgumentError(BaseException): - code = codes.INVALID_ARGUMENT + code = codes.INVALID_ARGUMENT_CODE + +class InvalidRangeError(BaseException): + code = codes.INVALID_DATE_RANGE_CODE diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 128667d9b6..536a17c4e3 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -5,10 +5,12 @@ from contextlib import contextmanager from collections import defaultdict from sqlalchemy import and_ +from sqlalchemy import exc as sqlalchemy_exc from concurrent.futures import ThreadPoolExecutor from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 from milvus.grpc_gen.milvus_pb2 import TopKQueryResult +from milvus.client.Abstract import Range from milvus.client import types from mishards import (db, settings, exceptions) @@ -44,7 +46,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return ((start.year-1900)*10000 + (start.month-1)*100 + start.day , (end.year-1900)*10000 + (end.month-1)*100 + end.day) - def _range_to_date(self, range_obj): + def _range_to_date(self, range_obj, metadata=None): try: start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') @@ -52,15 +54,19 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): except (ValueError, AssertionError): raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( range_obj.start_date, range_obj.end_date - )) + ), metadata=metadata) return self._format_date(start, end) def _get_routing_file_ids(self, table_id, range_array, metadata=None): - table = db.Session.query(Tables).filter(and_( - Tables.table_id==table_id, - Tables.state!=Tables.TO_DELETE - )).first() + # PXU TODO: Implement Thread-local Context + try: + table = db.Session.query(Tables).filter(and_( + Tables.table_id==table_id, + Tables.state!=Tables.TO_DELETE + )).first() + except sqlalchemy_exc.SQLAlchemyError as e: + raise exceptions.DBError(message=str(e), metadata=metadata) if not table: raise exceptions.TableNotFoundError(table_id, metadata=metadata) @@ -111,8 +117,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return topk_query_result def _do_query(self, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): - range_array = [self._range_to_date(r) for r in range_array] if range_array else None metadata = kwargs.get('metadata', None) + range_array = [self._range_to_date(r, metadata=metadata) for r in range_array] if range_array else None routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) logger.info('Routing: {}'.format(routing)) @@ -362,7 +368,10 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): @mark_grpc_method def ShowTables(self, request, context): logger.info('ShowTables') - _status, _results = self.connection.show_tables() + metadata = { + 'resp_class': milvus_pb2.TableName + } + _status, _results = self.connection(metadata=metadata).show_tables() if not _status.OK(): _results = [] From 1e2cc2eb6622a46aaa0ff17d230350605b430687 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 11:00:35 +0800 Subject: [PATCH 024/126] refactor sd --- mishards/__init__.py | 2 +- sd/__init__.py | 0 {mishards => sd}/service_founder.py | 0 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 sd/__init__.py rename {mishards => sd}/service_founder.py (100%) diff --git a/mishards/__init__.py b/mishards/__init__.py index 8105e7edc8..3158afa5b3 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -7,7 +7,7 @@ db.init_db(uri=settings.SQLALCHEMY_DATABASE_URI, echo=settings.SQL_ECHO) from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() -from mishards.service_founder import ServiceFounder +from sd.service_founder import ServiceFounder discover = ServiceFounder(namespace=settings.SD_NAMESPACE, conn_mgr=connect_mgr, pod_patt=settings.SD_ROSERVER_POD_PATT, diff --git a/sd/__init__.py b/sd/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mishards/service_founder.py b/sd/service_founder.py similarity index 100% rename from mishards/service_founder.py rename to sd/service_founder.py From 8569309644e752b128af402fa95d5575e3096604 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 11:08:14 +0800 Subject: [PATCH 025/126] refactor utils --- mishards/connections.py | 2 +- mishards/settings.py | 2 +- sd/service_founder.py | 2 +- {mishards/utils => utils}/__init__.py | 0 {mishards/utils => utils}/logger_helper.py | 0 5 files changed, 3 insertions(+), 3 deletions(-) rename {mishards/utils => utils}/__init__.py (100%) rename {mishards/utils => utils}/logger_helper.py (100%) diff --git a/mishards/connections.py b/mishards/connections.py index 365dc60125..7307c2a489 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -6,7 +6,7 @@ from contextlib import contextmanager from milvus import Milvus from mishards import (settings, exceptions) -from mishards.utils import singleton +from utils import singleton logger = logging.getLogger(__name__) diff --git a/mishards/settings.py b/mishards/settings.py index 2bf7e96a8f..f99bd3b3c6 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -13,7 +13,7 @@ LOG_PATH = env.str('LOG_PATH', '/tmp/mishards') LOG_NAME = env.str('LOG_NAME', 'logfile') TIMEZONE = env.str('TIMEZONE', 'UTC') -from mishards.utils.logger_helper import config +from utils.logger_helper import config config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') diff --git a/sd/service_founder.py b/sd/service_founder.py index f1a37a440b..79292d452f 100644 --- a/sd/service_founder.py +++ b/sd/service_founder.py @@ -11,7 +11,7 @@ import queue from functools import wraps from kubernetes import client, config, watch -from mishards.utils import singleton +from utils import singleton logger = logging.getLogger(__name__) diff --git a/mishards/utils/__init__.py b/utils/__init__.py similarity index 100% rename from mishards/utils/__init__.py rename to utils/__init__.py diff --git a/mishards/utils/logger_helper.py b/utils/logger_helper.py similarity index 100% rename from mishards/utils/logger_helper.py rename to utils/logger_helper.py From b4ed4b2e35c3119290b29f1539c2cf37aca7cebd Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 12:17:13 +0800 Subject: [PATCH 026/126] refactor kubernetes service provider --- mishards/__init__.py | 11 ++-- mishards/settings.py | 16 +++-- sd/__init__.py | 27 ++++++++ ...vice_founder.py => kubernetes_provider.py} | 62 ++++++++++++++----- 4 files changed, 90 insertions(+), 26 deletions(-) rename sd/{service_founder.py => kubernetes_provider.py} (83%) diff --git a/mishards/__init__.py b/mishards/__init__.py index 3158afa5b3..55b24c082c 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -7,13 +7,10 @@ db.init_db(uri=settings.SQLALCHEMY_DATABASE_URI, echo=settings.SQL_ECHO) from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() -from sd.service_founder import ServiceFounder -discover = ServiceFounder(namespace=settings.SD_NAMESPACE, - conn_mgr=connect_mgr, - pod_patt=settings.SD_ROSERVER_POD_PATT, - label_selector=settings.SD_LABEL_SELECTOR, - in_cluster=settings.SD_IN_CLUSTER, - poll_interval=settings.SD_POLL_INTERVAL) +from sd import ProviderManager + +sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) +discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) from mishards.server import Server grpc_server = Server(conn_mgr=connect_mgr) diff --git a/mishards/settings.py b/mishards/settings.py index f99bd3b3c6..046508f92c 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -26,11 +26,17 @@ SEARCH_WORKER_SIZE = env.int('SEARCH_WORKER_SIZE', 10) SERVER_PORT = env.int('SERVER_PORT', 19530) WOSERVER = env.str('WOSERVER') -SD_NAMESPACE = env.str('SD_NAMESPACE', '') -SD_IN_CLUSTER = env.bool('SD_IN_CLUSTER', False) -SD_POLL_INTERVAL = env.int('SD_POLL_INTERVAL', 5) -SD_ROSERVER_POD_PATT = env.str('SD_ROSERVER_POD_PATT', '') -SD_LABEL_SELECTOR = env.str('SD_LABEL_SELECTOR', '') +SD_PROVIDER_SETTINGS = None +SD_PROVIDER = env.str('SD_PROVIDER', 'Kubernetes') +if SD_PROVIDER == 'Kubernetes': + from sd.kubernetes_provider import KubernetesProviderSettings + SD_PROVIDER_SETTINGS = KubernetesProviderSettings( + namespace=env.str('SD_NAMESPACE', ''), + in_cluster=env.bool('SD_IN_CLUSTER', False), + poll_interval=env.int('SD_POLL_INTERVAL', 5), + pod_patt=env.str('SD_ROSERVER_POD_PATT', ''), + label_selector=env.str('SD_LABEL_SELECTOR', '') + ) TESTING = env.bool('TESTING', False) TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') diff --git a/sd/__init__.py b/sd/__init__.py index e69de29bb2..5c37bc621b 100644 --- a/sd/__init__.py +++ b/sd/__init__.py @@ -0,0 +1,27 @@ +import logging +import inspect +# from utils import singleton + +logger = logging.getLogger(__name__) + + +class ProviderManager: + PROVIDERS = {} + + @classmethod + def register_service_provider(cls, target): + if inspect.isfunction(target): + cls.PROVIDERS[target.__name__] = target + elif inspect.isclass(target): + name = target.__dict__.get('NAME', None) + name = name if name else target.__class__.__name__ + cls.PROVIDERS[name] = target + else: + assert False, 'Cannot register_service_provider for: {}'.format(target) + return target + + @classmethod + def get_provider(cls, name): + return cls.PROVIDERS.get(name, None) + +from sd import kubernetes_provider diff --git a/sd/service_founder.py b/sd/kubernetes_provider.py similarity index 83% rename from sd/service_founder.py rename to sd/kubernetes_provider.py index 79292d452f..51665a0cb5 100644 --- a/sd/service_founder.py +++ b/sd/kubernetes_provider.py @@ -12,6 +12,7 @@ from functools import wraps from kubernetes import client, config, watch from utils import singleton +from sd import ProviderManager logger = logging.getLogger(__name__) @@ -32,7 +33,7 @@ class K8SMixin: self.v1 = client.CoreV1Api() -class K8SServiceDiscover(threading.Thread, K8SMixin): +class K8SHeartbeatHandler(threading.Thread, K8SMixin): def __init__(self, message_queue, namespace, label_selector, in_cluster=False, **kwargs): K8SMixin.__init__(self, namespace=namespace, in_cluster=in_cluster, **kwargs) threading.Thread.__init__(self) @@ -202,13 +203,26 @@ class EventHandler(threading.Thread): except queue.Empty: continue -@singleton -class ServiceFounder(object): - def __init__(self, conn_mgr, namespace, pod_patt, label_selector, in_cluster=False, **kwargs): +class KubernetesProviderSettings: + def __init__(self, namespace, pod_patt, label_selector, in_cluster, poll_interval, **kwargs): self.namespace = namespace + self.pod_patt = pod_patt + self.label_selector = label_selector + self.in_cluster = in_cluster + self.poll_interval = poll_interval + +@singleton +@ProviderManager.register_service_provider +class KubernetesProvider(object): + NAME = 'Kubernetes' + def __init__(self, settings, conn_mgr, **kwargs): + self.namespace = settings.namespace + self.pod_patt = settings.pod_patt + self.label_selector = settings.label_selector + self.in_cluster = settings.in_cluster + self.poll_interval = settings.poll_interval self.kwargs = kwargs self.queue = queue.Queue() - self.in_cluster = in_cluster self.conn_mgr = conn_mgr @@ -226,19 +240,20 @@ class ServiceFounder(object): **kwargs ) - self.pod_heartbeater = K8SServiceDiscover( + self.pod_heartbeater = K8SHeartbeatHandler( message_queue=self.queue, - namespace=namespace, - label_selector=label_selector, + namespace=self.namespace, + label_selector=self.label_selector, in_cluster=self.in_cluster, v1=self.v1, + poll_interval=self.poll_interval, **kwargs ) self.event_handler = EventHandler(mgr=self, message_queue=self.queue, namespace=self.namespace, - pod_patt=pod_patt, **kwargs) + pod_patt=self.pod_patt, **kwargs) def add_pod(self, name, ip): self.conn_mgr.register(name, 'tcp://{}:19530'.format(ip)) @@ -250,8 +265,6 @@ class ServiceFounder(object): self.listener.daemon = True self.listener.start() self.event_handler.start() - # while self.listener.at_start_up: - # time.sleep(1) self.pod_heartbeater.start() @@ -262,11 +275,32 @@ class ServiceFounder(object): if __name__ == '__main__': - from mishards import connect_mgr logging.basicConfig(level=logging.INFO) - t = ServiceFounder(namespace='xp', conn_mgr=connect_mgr, pod_patt=".*-ro-servers-.*", label_selector='tier=ro-servers', in_cluster=False) + class Connect: + def register(self, name, value): + logger.error('Register: {} - {}'.format(name, value)) + def unregister(self, name): + logger.error('Unregister: {}'.format(name)) + + @property + def conn_names(self): + return set() + + connect_mgr = Connect() + + settings = KubernetesProviderSettings( + namespace='xp', + pod_patt=".*-ro-servers-.*", + label_selector='tier=ro-servers', + poll_interval=5, + in_cluster=False) + + provider_class = ProviderManager.get_provider('Kubernetes') + t = provider_class(conn_mgr=connect_mgr, + settings=settings + ) t.start() - cnt = 2 + cnt = 100 while cnt > 0: time.sleep(2) cnt -= 1 From 6acddae13095080d8a60abfcafa6e6cca354a6bf Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 12:53:13 +0800 Subject: [PATCH 027/126] add static provider --- mishards/settings.py | 5 +++++ sd/__init__.py | 2 +- sd/static_provider.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 sd/static_provider.py diff --git a/mishards/settings.py b/mishards/settings.py index 046508f92c..46221c5f98 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -37,6 +37,11 @@ if SD_PROVIDER == 'Kubernetes': pod_patt=env.str('SD_ROSERVER_POD_PATT', ''), label_selector=env.str('SD_LABEL_SELECTOR', '') ) +elif SD_PROVIDER == 'Static': + from sd.static_provider import StaticProviderSettings + SD_PROVIDER_SETTINGS = StaticProviderSettings( + hosts=env.list('SD_STATIC_HOSTS', []) + ) TESTING = env.bool('TESTING', False) TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') diff --git a/sd/__init__.py b/sd/__init__.py index 5c37bc621b..6dfba5ddc1 100644 --- a/sd/__init__.py +++ b/sd/__init__.py @@ -24,4 +24,4 @@ class ProviderManager: def get_provider(cls, name): return cls.PROVIDERS.get(name, None) -from sd import kubernetes_provider +from sd import kubernetes_provider, static_provider diff --git a/sd/static_provider.py b/sd/static_provider.py new file mode 100644 index 0000000000..73ae483b34 --- /dev/null +++ b/sd/static_provider.py @@ -0,0 +1,32 @@ +import os, sys +if __name__ == '__main__': + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from utils import singleton +from sd import ProviderManager + +class StaticProviderSettings: + def __init__(self, hosts): + self.hosts = hosts + +@singleton +@ProviderManager.register_service_provider +class KubernetesProvider(object): + NAME = 'Static' + def __init__(self, settings, conn_mgr, **kwargs): + self.conn_mgr = conn_mgr + self.hosts = settings.hosts + + def start(self): + for host in self.hosts: + self.add_pod(host, host) + + def stop(self): + for host in self.hosts: + self.delete_pod(host) + + def add_pod(self, name, ip): + self.conn_mgr.register(name, 'tcp://{}:19530'.format(ip)) + + def delete_pod(self, name): + self.conn_mgr.unregister(name) From ce95b50143ed4a57cacd414eeece12cb6d1fe638 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 13:32:29 +0800 Subject: [PATCH 028/126] support sqlite --- mishards/db_base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mishards/db_base.py b/mishards/db_base.py index ffbe29f94f..3b2c699864 100644 --- a/mishards/db_base.py +++ b/mishards/db_base.py @@ -1,5 +1,6 @@ import logging from sqlalchemy import create_engine +from sqlalchemy.engine.url import make_url from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, scoped_session @@ -12,7 +13,11 @@ class DB: uri and self.init_db(uri, echo) def init_db(self, uri, echo=False): - self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, + url = make_url(uri) + if url.get_backend_name() == 'sqlite': + self.engine = create_engine(url) + else: + self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, pool_pre_ping=True, echo=echo, max_overflow=0) From 76eb24484765a3771797701f3498af7ab37b744e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 14:08:57 +0800 Subject: [PATCH 029/126] fix exception handler used in service handler --- mishards/exception_handlers.py | 18 ++++++++++++++++++ mishards/service_handler.py | 28 ++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py index 2518b64b3e..a2659f91af 100644 --- a/mishards/exception_handlers.py +++ b/mishards/exception_handlers.py @@ -32,6 +32,24 @@ def resp_handler(err, error_code): if resp_class == milvus_pb2.TableName: return resp_class(status=status, table_name=[]) + if resp_class == milvus_pb2.StringReply: + return resp_class(status=status, string_reply='') + + if resp_class == milvus_pb2.TableSchema: + table_name = milvus_pb2.TableName( + status=status + ) + return milvus_pb2.TableSchema( + table_name=table_name + ) + + if resp_class == milvus_pb2.IndexParam: + return milvus_pb2.IndexParam( + table_name=milvus_pb2.TableName( + status=status + ) + ) + status.error_code = status_pb2.UNEXPECTED_ERROR return status diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 536a17c4e3..f39ad3ef46 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -50,7 +50,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): try: start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') - assert start >= end + assert start < end except (ValueError, AssertionError): raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( range_obj.start_date, range_obj.end_date @@ -301,8 +301,12 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): table_name=table_name ) + metadata = { + 'resp_class': milvus_pb2.TableSchema + } + logger.info('DescribeTable {}'.format(_table_name)) - _status, _table = self.connection.describe_table(_table_name) + _status, _table = self.connection(metadata=metadata).describe_table(_table_name) if _status.OK(): _grpc_table_name = milvus_pb2.TableName( @@ -355,10 +359,14 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): status_pb2.Status(error_code=_status.code, reason=_status.message) ) + metadata = { + 'resp_class': milvus_pb2.StringReply + } + if _cmd == 'version': - _status, _reply = self.connection.server_version() + _status, _reply = self.connection(metadata=metadata).server_version() else: - _status, _reply = self.connection.server_status() + _status, _reply = self.connection(metadata=metadata).server_status() return milvus_pb2.StringReply( status=status_pb2.Status(error_code=_status.code, reason=_status.message), @@ -393,7 +401,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _table_name, _start_date, _end_date = unpacks logger.info('DeleteByRange {}: {} {}'.format(_table_name, _start_date, _end_date)) - _status = self.connection.delete_vectors_by_range(_table_name, _start_date, _end_date) + _status = self.connection().delete_vectors_by_range(_table_name, _start_date, _end_date) return status_pb2.Status(error_code=_status.code, reason=_status.message) @mark_grpc_method @@ -404,7 +412,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) logger.info('PreloadTable {}'.format(_table_name)) - _status = self.connection.preload_table(_table_name) + _status = self.connection().preload_table(_table_name) return status_pb2.Status(error_code=_status.code, reason=_status.message) @mark_grpc_method @@ -418,8 +426,12 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): ) ) + metadata = { + 'resp_class': milvus_pb2.IndexParam + } + logger.info('DescribeIndex {}'.format(_table_name)) - _status, _index_param = self.connection.describe_index(_table_name) + _status, _index_param = self.connection(metadata=metadata).describe_index(_table_name) _index = milvus_pb2.Index(index_type=_index_param._index_type, nlist=_index_param._nlist) _tablename = milvus_pb2.TableName( @@ -436,5 +448,5 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) logger.info('DropIndex {}'.format(_table_name)) - _status = self.connection.drop_index(_table_name) + _status = self.connection().drop_index(_table_name) return status_pb2.Status(error_code=_status.code, reason=_status.message) From bc056a282929dab4b0e45f2101b3dbef8a28e0a7 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 21 Sep 2019 14:13:53 +0800 Subject: [PATCH 030/126] add more print info at startup --- mishards/connections.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mishards/connections.py b/mishards/connections.py index 7307c2a489..35c5d6c3bd 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -139,6 +139,7 @@ class ConnectionMgr: logger.warn('Non-existed meta: {}'.format(name)) def register(self, name, url): + logger.info('Register Connection: name={};url={}'.format(name, url)) meta = self.metas.get(name) if not meta: return self.on_new_meta(name, url) @@ -146,6 +147,7 @@ class ConnectionMgr: return self.on_duplicate_meta(name, url) def unregister(self, name): + logger.info('Unregister Connection: name={}'.format(name)) url = self.metas.pop(name, None) if url is None: return self.on_nonexisted_meta(name) From a0a5965fc6c826accf02a64c743d45e636f5b687 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 25 Sep 2019 16:23:02 +0800 Subject: [PATCH 031/126] add tracing --- mishards/server.py | 34 ++++++++++++++++++++++++++++++++++ mishards/settings.py | 9 +++++++++ requirements.txt | 2 ++ 3 files changed, 45 insertions(+) diff --git a/mishards/server.py b/mishards/server.py index 9cca096b6b..4e44731f0e 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -8,12 +8,17 @@ from functools import wraps from concurrent import futures from grpc._cython import cygrpc from grpc._channel import _Rendezvous, _UnaryUnaryMultiCallable +from jaeger_client import Config +from grpc_opentracing import open_tracing_server_interceptor +from grpc_opentracing.grpcext import intercept_server from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server from mishards.service_handler import ServiceHandler from mishards import settings, discover logger = logging.getLogger(__name__) +def empty_server_interceptor_decorator(target_server, interceptor): + return target_server class Server: def __init__(self, conn_mgr, port=19530, max_workers=10, **kwargs): @@ -23,12 +28,40 @@ class Server: self.exit_flag = False self.port = int(port) self.conn_mgr = conn_mgr + tracer_interceptor = None + self.tracer = None + interceptor_decorator = empty_server_interceptor_decorator + + if settings.TRACING_ENABLED: + tracer_config = Config(config={ + 'sampler': { + 'type': 'const', + 'param': 1, + }, + 'local_agent': { + 'reporting_host': settings.TracingConfig.TRACING_REPORTING_HOST, + 'reporting_port': settings.TracingConfig.TRACING_REPORTING_PORT + }, + 'logging': settings.TracingConfig.TRACING_LOGGING, + }, + service_name=settings.TracingConfig.TRACING_SERVICE_NAME, + validate=settings.TracingConfig.TRACING_VALIDATE + ) + + self.tracer = tracer_config.initialize_tracer() + tracer_interceptor = open_tracing_server_interceptor(self.tracer, + log_payloads=settings.TracingConfig.TRACING_LOG_PAYLOAD) + + interceptor_decorator = intercept_server + self.server_impl = grpc.server( thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), options=[(cygrpc.ChannelArgKey.max_send_message_length, -1), (cygrpc.ChannelArgKey.max_receive_message_length, -1)] ) + self.server_impl = interceptor_decorator(self.server_impl, tracer_interceptor) + self.register_pre_run_handler(self.pre_run_handler) def pre_run_handler(self): @@ -94,6 +127,7 @@ class Server: logger.info('Server is shuting down ......') self.exit_flag = True self.server_impl.stop(0) + self.tracer and self.tracer.close() logger.info('Server is closed') def add_error_handlers(self, target): diff --git a/mishards/settings.py b/mishards/settings.py index 46221c5f98..94b8998881 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -46,6 +46,15 @@ elif SD_PROVIDER == 'Static': TESTING = env.bool('TESTING', False) TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') +TRACING_ENABLED = env.bool('TRACING_ENABLED', False) +class TracingConfig: + TRACING_LOGGING = env.bool('TRACING_LOGGING', True), + TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') + TRACING_VALIDATE = env.bool('TRACING_VALIDATE', True) + TRACING_LOG_PAYLOAD = env.bool('TRACING_LOG_PAYLOAD', DEBUG) + TRACING_REPORTING_HOST = env.str('TRACING_REPORTING_HOST', '127.0.0.1') + TRACING_REPORTING_PORT = env.str('TRACING_REPORTING_PORT', '5775') + if __name__ == '__main__': import logging diff --git a/requirements.txt b/requirements.txt index 8cedabdf7b..03db7aeed3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,3 +31,5 @@ rsa==4.0 six==1.12.0 SQLAlchemy==1.3.5 urllib3==1.25.3 +jaeger-client>=3.4.0 +grpcio-opentracing>=1.0 From d4fb05688aa819f0761ed1017717a74e52a78873 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 25 Sep 2019 17:14:18 +0800 Subject: [PATCH 032/126] refactor tracing --- mishards/__init__.py | 5 ++++- mishards/server.py | 35 ++++------------------------------- mishards/settings.py | 2 +- tracing/__init__.py | 17 +++++++++++++++++ tracing/factory.py | 39 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 33 deletions(-) create mode 100644 tracing/__init__.py create mode 100644 tracing/factory.py diff --git a/mishards/__init__.py b/mishards/__init__.py index 55b24c082c..640293c265 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -12,7 +12,10 @@ from sd import ProviderManager sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) +from tracing.factory import TracerFactory +tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig) + from mishards.server import Server -grpc_server = Server(conn_mgr=connect_mgr) +grpc_server = Server(conn_mgr=connect_mgr, tracer=tracer) from mishards import exception_handlers diff --git a/mishards/server.py b/mishards/server.py index 4e44731f0e..93d7e38826 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -9,19 +9,15 @@ from concurrent import futures from grpc._cython import cygrpc from grpc._channel import _Rendezvous, _UnaryUnaryMultiCallable from jaeger_client import Config -from grpc_opentracing import open_tracing_server_interceptor -from grpc_opentracing.grpcext import intercept_server from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server from mishards.service_handler import ServiceHandler from mishards import settings, discover logger = logging.getLogger(__name__) -def empty_server_interceptor_decorator(target_server, interceptor): - return target_server class Server: - def __init__(self, conn_mgr, port=19530, max_workers=10, **kwargs): + def __init__(self, conn_mgr, tracer, port=19530, max_workers=10, **kwargs): self.pre_run_handlers = set() self.grpc_methods = set() self.error_handlers = {} @@ -29,30 +25,7 @@ class Server: self.port = int(port) self.conn_mgr = conn_mgr tracer_interceptor = None - self.tracer = None - interceptor_decorator = empty_server_interceptor_decorator - - if settings.TRACING_ENABLED: - tracer_config = Config(config={ - 'sampler': { - 'type': 'const', - 'param': 1, - }, - 'local_agent': { - 'reporting_host': settings.TracingConfig.TRACING_REPORTING_HOST, - 'reporting_port': settings.TracingConfig.TRACING_REPORTING_PORT - }, - 'logging': settings.TracingConfig.TRACING_LOGGING, - }, - service_name=settings.TracingConfig.TRACING_SERVICE_NAME, - validate=settings.TracingConfig.TRACING_VALIDATE - ) - - self.tracer = tracer_config.initialize_tracer() - tracer_interceptor = open_tracing_server_interceptor(self.tracer, - log_payloads=settings.TracingConfig.TRACING_LOG_PAYLOAD) - - interceptor_decorator = intercept_server + self.tracer = tracer self.server_impl = grpc.server( thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), @@ -60,7 +33,7 @@ class Server: (cygrpc.ChannelArgKey.max_receive_message_length, -1)] ) - self.server_impl = interceptor_decorator(self.server_impl, tracer_interceptor) + self.server_impl = self.tracer.decorate(self.server_impl) self.register_pre_run_handler(self.pre_run_handler) @@ -127,7 +100,7 @@ class Server: logger.info('Server is shuting down ......') self.exit_flag = True self.server_impl.stop(0) - self.tracer and self.tracer.close() + self.tracer.close() logger.info('Server is closed') def add_error_handlers(self, target): diff --git a/mishards/settings.py b/mishards/settings.py index 94b8998881..9a8e770f11 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -46,7 +46,7 @@ elif SD_PROVIDER == 'Static': TESTING = env.bool('TESTING', False) TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') -TRACING_ENABLED = env.bool('TRACING_ENABLED', False) +TRACING_TYPE = env.str('TRACING_TYPE', '') class TracingConfig: TRACING_LOGGING = env.bool('TRACING_LOGGING', True), TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') diff --git a/tracing/__init__.py b/tracing/__init__.py new file mode 100644 index 0000000000..3edddea9df --- /dev/null +++ b/tracing/__init__.py @@ -0,0 +1,17 @@ + +def empty_server_interceptor_decorator(target_server, interceptor): + return target_server + +class Tracer: + def __init__(self, tracer=None, + interceptor=None, + server_decorator=empty_server_interceptor_decorator): + self.tracer = tracer + self.interceptor = interceptor + self.server_decorator=server_decorator + + def decorate(self, server): + return self.server_decorator(server, self.interceptor) + + def close(self): + self.tracer and self.tracer.close() diff --git a/tracing/factory.py b/tracing/factory.py new file mode 100644 index 0000000000..f00a537e78 --- /dev/null +++ b/tracing/factory.py @@ -0,0 +1,39 @@ +import logging +from jaeger_client import Config +from grpc_opentracing.grpcext import intercept_server +from grpc_opentracing import open_tracing_server_interceptor + +from tracing import Tracer, empty_server_interceptor_decorator + +logger = logging.getLogger(__name__) + + +class TracerFactory: + @classmethod + def new_tracer(cls, tracer_type, tracer_config, **kwargs): + if not tracer_type: + return Tracer() + + if tracer_type.lower() == 'jaeger': + config = Config(config={ + 'sampler': { + 'type': 'const', + 'param': 1, + }, + 'local_agent': { + 'reporting_host': tracer_config.TRACING_REPORTING_HOST, + 'reporting_port': tracer_config.TRACING_REPORTING_PORT + }, + 'logging': tracer_config.TRACING_LOGGING, + }, + service_name=tracer_config.TRACING_SERVICE_NAME, + validate=tracer_config.TRACING_VALIDATE + ) + + tracer = config.initialize_tracer() + tracer_interceptor = open_tracing_server_interceptor(tracer, + log_payloads=tracer_config.TRACING_LOG_PAYLOAD) + + return Tracer(tracer, tracer_interceptor, intercept_server) + + assert False, 'Unsupported tracer type: {}'.format(tracer_type) From 63d3372b4c8931bc0258f378dee00509dc1080ef Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 25 Sep 2019 18:36:19 +0800 Subject: [PATCH 033/126] convert hostname to ip to avoid pymilvus dns domain name parse bug --- mishards/server.py | 2 +- sd/static_provider.py | 3 ++- start_services.yml | 28 ++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 start_services.yml diff --git a/mishards/server.py b/mishards/server.py index 93d7e38826..679d5f996e 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -43,7 +43,7 @@ class Server: ip = socket.gethostbyname(url.hostname) socket.inet_pton(socket.AF_INET, ip) self.conn_mgr.register('WOSERVER', - '{}://{}:{}'.format(url.scheme, ip, url.port)) + '{}://{}:{}'.format(url.scheme, ip, url.port or 80)) def register_pre_run_handler(self, func): logger.info('Regiterring {} into server pre_run_handlers'.format(func)) diff --git a/sd/static_provider.py b/sd/static_provider.py index 73ae483b34..423d6c4d60 100644 --- a/sd/static_provider.py +++ b/sd/static_provider.py @@ -2,6 +2,7 @@ import os, sys if __name__ == '__main__': sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import socket from utils import singleton from sd import ProviderManager @@ -15,7 +16,7 @@ class KubernetesProvider(object): NAME = 'Static' def __init__(self, settings, conn_mgr, **kwargs): self.conn_mgr = conn_mgr - self.hosts = settings.hosts + self.hosts = [socket.gethostbyname(host) for host in settings.hosts] def start(self): for host in self.hosts: diff --git a/start_services.yml b/start_services.yml new file mode 100644 index 0000000000..e2cd0653c3 --- /dev/null +++ b/start_services.yml @@ -0,0 +1,28 @@ +version: "2.3" +services: + milvus: + runtime: nvidia + restart: always + image: registry.zilliz.com/milvus/engine:branch-0.4.0-release-c58ca6 + # ports: + # - "0.0.0.0:19530:19530" + volumes: + - /tmp/milvus/db:/opt/milvus/db + + mishards: + restart: always + image: registry.zilliz.com/milvus/mishards:v0.0.2 + ports: + - "0.0.0.0:19530:19531" + - "0.0.0.0:19532:19532" + volumes: + - /tmp/milvus/db:/tmp/milvus/db + - /tmp/mishards_env:/source/mishards/.env + command: ["python", "mishards/main.py"] + environment: + DEBUG: 'true' + SERVER_PORT: 19531 + WOSERVER: tcp://milvus:19530 + SD_STATIC_HOSTS: milvus + depends_on: + - milvus From d96e601ab83f8b62992e0d16e66741cf2c0d59a5 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 25 Sep 2019 19:37:25 +0800 Subject: [PATCH 034/126] add jaeger in start_services.yml --- start_services.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/start_services.yml b/start_services.yml index e2cd0653c3..5c779c5b82 100644 --- a/start_services.yml +++ b/start_services.yml @@ -9,6 +9,16 @@ services: volumes: - /tmp/milvus/db:/opt/milvus/db + jaeger: + restart: always + image: jaegertracing/all-in-one:1.14 + ports: + - "0.0.0.0:5775:5775/udp" + - "0.0.0.0:16686:16686" + - "0.0.0.0:9441:9441" + environment: + COLLECTOR_ZIPKIN_HTTP_PORT: 9411 + mishards: restart: always image: registry.zilliz.com/milvus/mishards:v0.0.2 @@ -24,5 +34,11 @@ services: SERVER_PORT: 19531 WOSERVER: tcp://milvus:19530 SD_STATIC_HOSTS: milvus + TRACING_TYPE: jaeger + TRACING_SERVICE_NAME: mishards-demo + TRACING_REPORTING_HOST: jaeger + TRACING_REPORTING_PORT: 5775 + depends_on: - milvus + - jaeger From dc2a60f0808701521c3876edf26b5ac26eab90b8 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 26 Sep 2019 10:33:38 +0800 Subject: [PATCH 035/126] fix bug in jaeger tracing settings --- mishards/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mishards/settings.py b/mishards/settings.py index 9a8e770f11..eb6e1e5964 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -48,7 +48,7 @@ TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') TRACING_TYPE = env.str('TRACING_TYPE', '') class TracingConfig: - TRACING_LOGGING = env.bool('TRACING_LOGGING', True), + TRACING_LOGGING = env.bool('TRACING_LOGGING', True) TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') TRACING_VALIDATE = env.bool('TRACING_VALIDATE', True) TRACING_LOG_PAYLOAD = env.bool('TRACING_LOG_PAYLOAD', DEBUG) From 4c9cd6dc8ed1ba440bd9839e097c507668b1743f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 26 Sep 2019 11:02:05 +0800 Subject: [PATCH 036/126] add span decorator --- mishards/server.py | 1 - tracing/__init__.py | 12 ++++++++++++ tracing/factory.py | 7 +++++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/mishards/server.py b/mishards/server.py index 679d5f996e..9dc09d6f05 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -24,7 +24,6 @@ class Server: self.exit_flag = False self.port = int(port) self.conn_mgr = conn_mgr - tracer_interceptor = None self.tracer = tracer self.server_impl = grpc.server( diff --git a/tracing/__init__.py b/tracing/__init__.py index 3edddea9df..04975c4cfd 100644 --- a/tracing/__init__.py +++ b/tracing/__init__.py @@ -1,3 +1,15 @@ +from grpc_opentracing import SpanDecorator + +class GrpcSpanDecorator(SpanDecorator): + def __call__(self, span, rpc_info): + if rpc_info.response.status.error_code == 0: + return + span.set_tag('error', True) + error_log = {'event': 'error', + 'error.kind': str(rpc_info.response.status.error_code), + 'message': rpc_info.response.status.reason + } + span.log_kv(error_log) def empty_server_interceptor_decorator(target_server, interceptor): return target_server diff --git a/tracing/factory.py b/tracing/factory.py index f00a537e78..f692563e7b 100644 --- a/tracing/factory.py +++ b/tracing/factory.py @@ -3,7 +3,9 @@ from jaeger_client import Config from grpc_opentracing.grpcext import intercept_server from grpc_opentracing import open_tracing_server_interceptor -from tracing import Tracer, empty_server_interceptor_decorator +from tracing import (Tracer, + GrpcSpanDecorator, + empty_server_interceptor_decorator) logger = logging.getLogger(__name__) @@ -32,7 +34,8 @@ class TracerFactory: tracer = config.initialize_tracer() tracer_interceptor = open_tracing_server_interceptor(tracer, - log_payloads=tracer_config.TRACING_LOG_PAYLOAD) + log_payloads=tracer_config.TRACING_LOG_PAYLOAD, + span_decorator=GrpcSpanDecorator()) return Tracer(tracer, tracer_interceptor, intercept_server) From 48f172facb6db3f27684fd8be4c8c3936cb6e148 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 26 Sep 2019 11:38:31 +0800 Subject: [PATCH 037/126] refactor tracing --- mishards/__init__.py | 4 +++- mishards/grpc_utils/__init__.py | 21 +++++++++++++++++++++ mishards/settings.py | 17 ++++++++++++----- tracing/__init__.py | 13 ------------- tracing/factory.py | 17 +++-------------- 5 files changed, 39 insertions(+), 33 deletions(-) diff --git a/mishards/__init__.py b/mishards/__init__.py index 640293c265..c1cea84861 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -13,7 +13,9 @@ sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) from tracing.factory import TracerFactory -tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig) +from grpc_utils import GrpcSpanDecorator +tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig, + span_decorator=GrpcSpanDecorator()) from mishards.server import Server grpc_server = Server(conn_mgr=connect_mgr, tracer=tracer) diff --git a/mishards/grpc_utils/__init__.py b/mishards/grpc_utils/__init__.py index 959d5549c7..9ee7d22f37 100644 --- a/mishards/grpc_utils/__init__.py +++ b/mishards/grpc_utils/__init__.py @@ -1,3 +1,24 @@ +from grpc_opentracing import SpanDecorator +from milvus.grpc_gen import status_pb2 + + +class GrpcSpanDecorator(SpanDecorator): + def __call__(self, span, rpc_info): + status = None + if isinstance(rpc_info.response, status_pb2.Status): + status = rpc_info.response + else: + status = rpc_info.response.status + if status.error_code == 0: + return + span.set_tag('error', True) + span.set_tag('error_code', status.error_code) + error_log = {'event': 'error', + 'request': rpc_info.request, + 'response': rpc_info.response + } + span.log_kv(error_log) + def mark_grpc_method(func): setattr(func, 'grpc_method', True) return func diff --git a/mishards/settings.py b/mishards/settings.py index eb6e1e5964..4a70d44561 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -48,13 +48,20 @@ TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') TRACING_TYPE = env.str('TRACING_TYPE', '') class TracingConfig: - TRACING_LOGGING = env.bool('TRACING_LOGGING', True) TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') TRACING_VALIDATE = env.bool('TRACING_VALIDATE', True) - TRACING_LOG_PAYLOAD = env.bool('TRACING_LOG_PAYLOAD', DEBUG) - TRACING_REPORTING_HOST = env.str('TRACING_REPORTING_HOST', '127.0.0.1') - TRACING_REPORTING_PORT = env.str('TRACING_REPORTING_PORT', '5775') - + TRACING_LOG_PAYLOAD = env.bool('TRACING_LOG_PAYLOAD', False) + TRACING_CONFIG = { + 'sampler': { + 'type': env.str('TRACING_SAMPLER_TYPE', 'const'), + 'param': env.str('TRACING_SAMPLER_PARAM', "1"), + }, + 'local_agent': { + 'reporting_host': env.str('TRACING_REPORTING_HOST', '127.0.0.1'), + 'reporting_port': env.str('TRACING_REPORTING_PORT', '5775') + }, + 'logging': env.bool('TRACING_LOGGING', True) + } if __name__ == '__main__': import logging diff --git a/tracing/__init__.py b/tracing/__init__.py index 04975c4cfd..0aebf6ffba 100644 --- a/tracing/__init__.py +++ b/tracing/__init__.py @@ -1,16 +1,3 @@ -from grpc_opentracing import SpanDecorator - -class GrpcSpanDecorator(SpanDecorator): - def __call__(self, span, rpc_info): - if rpc_info.response.status.error_code == 0: - return - span.set_tag('error', True) - error_log = {'event': 'error', - 'error.kind': str(rpc_info.response.status.error_code), - 'message': rpc_info.response.status.reason - } - span.log_kv(error_log) - def empty_server_interceptor_decorator(target_server, interceptor): return target_server diff --git a/tracing/factory.py b/tracing/factory.py index f692563e7b..fd06fe3cac 100644 --- a/tracing/factory.py +++ b/tracing/factory.py @@ -4,7 +4,6 @@ from grpc_opentracing.grpcext import intercept_server from grpc_opentracing import open_tracing_server_interceptor from tracing import (Tracer, - GrpcSpanDecorator, empty_server_interceptor_decorator) logger = logging.getLogger(__name__) @@ -12,22 +11,12 @@ logger = logging.getLogger(__name__) class TracerFactory: @classmethod - def new_tracer(cls, tracer_type, tracer_config, **kwargs): + def new_tracer(cls, tracer_type, tracer_config, span_decorator=None, **kwargs): if not tracer_type: return Tracer() if tracer_type.lower() == 'jaeger': - config = Config(config={ - 'sampler': { - 'type': 'const', - 'param': 1, - }, - 'local_agent': { - 'reporting_host': tracer_config.TRACING_REPORTING_HOST, - 'reporting_port': tracer_config.TRACING_REPORTING_PORT - }, - 'logging': tracer_config.TRACING_LOGGING, - }, + config = Config(config=tracer_config.TRACING_CONFIG, service_name=tracer_config.TRACING_SERVICE_NAME, validate=tracer_config.TRACING_VALIDATE ) @@ -35,7 +24,7 @@ class TracerFactory: tracer = config.initialize_tracer() tracer_interceptor = open_tracing_server_interceptor(tracer, log_payloads=tracer_config.TRACING_LOG_PAYLOAD, - span_decorator=GrpcSpanDecorator()) + span_decorator=span_decorator) return Tracer(tracer, tracer_interceptor, intercept_server) From bdbb70f63f2c72e070b98330e3ced1d959d9c366 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 26 Sep 2019 12:06:38 +0800 Subject: [PATCH 038/126] change grpc decorator --- mishards/grpc_utils/__init__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mishards/grpc_utils/__init__.py b/mishards/grpc_utils/__init__.py index 9ee7d22f37..ba9a5e175d 100644 --- a/mishards/grpc_utils/__init__.py +++ b/mishards/grpc_utils/__init__.py @@ -5,18 +5,24 @@ from milvus.grpc_gen import status_pb2 class GrpcSpanDecorator(SpanDecorator): def __call__(self, span, rpc_info): status = None + if not rpc_info.response: + return if isinstance(rpc_info.response, status_pb2.Status): status = rpc_info.response else: - status = rpc_info.response.status + try: + status = rpc_info.response.status + except Exception as e: + status = status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, + reason='Should not happen') + if status.error_code == 0: return - span.set_tag('error', True) - span.set_tag('error_code', status.error_code) error_log = {'event': 'error', 'request': rpc_info.request, 'response': rpc_info.response } + span.set_tag('error', True) span.log_kv(error_log) def mark_grpc_method(func): From 11ba6beb40f2e6b9ef4351cbcffa1b4810b7e5d9 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 26 Sep 2019 15:22:00 +0800 Subject: [PATCH 039/126] update for search error handling --- mishards/service_handler.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index f39ad3ef46..cb904f4e42 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -92,13 +92,17 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return routing def _do_merge(self, files_n_topk_results, topk, reverse=False, **kwargs): + status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="Success") if not files_n_topk_results: - return [] + return status, [] request_results = defaultdict(list) calc_time = time.time() for files_collection in files_n_topk_results: + if isinstance(files_collection, tuple): + status, _ = files_collection + return status, [] for request_pos, each_request_results in enumerate(files_collection.topk_query_result): request_results[request_pos].extend(each_request_results.query_result_arrays) request_results[request_pos] = sorted(request_results[request_pos], key=lambda x: x.distance, @@ -114,7 +118,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): query_result = TopKQueryResult(query_result_arrays=result[1]) topk_query_result.append(query_result) - return topk_query_result + return status, topk_query_result def _do_query(self, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): metadata = kwargs.get('metadata', None) @@ -273,14 +277,14 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): query_range_array.append( Range(query_range.start_value, query_range.end_value)) - results = self._do_query(table_name, table_meta, query_record_array, topk, + status, results = self._do_query(table_name, table_meta, query_record_array, topk, nprobe, query_range_array, metadata=metadata) now = time.time() logger.info('SearchVector takes: {}'.format(now - start)) topk_result_list = milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="Success"), + status=status, topk_query_result=results ) return topk_result_list From 110e56c1b7f20574db351eea6a3c3d812ad21fc3 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 26 Sep 2019 18:34:02 +0800 Subject: [PATCH 040/126] add more child span for search --- mishards/server.py | 2 +- mishards/service_handler.py | 31 ++++++++++++++++++------------- tracing/__init__.py | 6 ++++++ 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/mishards/server.py b/mishards/server.py index 9dc09d6f05..876424089c 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -76,7 +76,7 @@ class Server: def start(self, port=None): handler_class = self.add_error_handlers(ServiceHandler) - add_MilvusServiceServicer_to_server(handler_class(conn_mgr=self.conn_mgr), self.server_impl) + add_MilvusServiceServicer_to_server(handler_class(conn_mgr=self.conn_mgr, tracer=self.tracer), self.server_impl) self.server_impl.add_insecure_port("[::]:{}".format(str(port or self._port))) self.server_impl.start() diff --git a/mishards/service_handler.py b/mishards/service_handler.py index cb904f4e42..72ae73932c 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -24,10 +24,11 @@ logger = logging.getLogger(__name__) class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): MAX_NPROBE = 2048 - def __init__(self, conn_mgr, *args, **kwargs): + def __init__(self, conn_mgr, tracer, *args, **kwargs): self.conn_mgr = conn_mgr self.table_meta = {} self.error_handlers = {} + self.tracer = tracer def connection(self, metadata=None): conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) @@ -120,7 +121,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status, topk_query_result - def _do_query(self, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): + def _do_query(self, context, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): metadata = kwargs.get('metadata', None) range_array = [self._range_to_date(r, metadata=metadata) for r in range_array] if range_array else None routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) @@ -140,16 +141,18 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): conn = self.query_conn(addr, metadata=metadata) start = time.time() - ret = conn.search_vectors_in_files(table_name=query_params['table_id'], - file_ids=query_params['file_ids'], - query_records=vectors, - top_k=topk, - nprobe=nprobe, - lazy=True) - end = time.time() - logger.info('search_vectors_in_files takes: {}'.format(end - start)) + with self.tracer.start_span('search_{}_span'.format(addr), + child_of=context.get_active_span().context): + ret = conn.search_vectors_in_files(table_name=query_params['table_id'], + file_ids=query_params['file_ids'], + query_records=vectors, + top_k=topk, + nprobe=nprobe, + lazy=True) + end = time.time() + logger.info('search_vectors_in_files takes: {}'.format(end - start)) - all_topk_results.append(ret) + all_topk_results.append(ret) with ThreadPoolExecutor(max_workers=workers) as pool: for addr, params in routing.items(): @@ -160,7 +163,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): res.result() reverse = table_meta.metric_type == types.MetricType.IP - return self._do_merge(all_topk_results, topk, reverse=reverse, metadata=metadata) + with self.tracer.start_span('do_merge', + child_of=context.get_active_span().context): + return self._do_merge(all_topk_results, topk, reverse=reverse, metadata=metadata) @mark_grpc_method def CreateTable(self, request, context): @@ -277,7 +282,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): query_range_array.append( Range(query_range.start_value, query_range.end_value)) - status, results = self._do_query(table_name, table_meta, query_record_array, topk, + status, results = self._do_query(context, table_name, table_meta, query_record_array, topk, nprobe, query_range_array, metadata=metadata) now = time.time() diff --git a/tracing/__init__.py b/tracing/__init__.py index 0aebf6ffba..27c57473db 100644 --- a/tracing/__init__.py +++ b/tracing/__init__.py @@ -14,3 +14,9 @@ class Tracer: def close(self): self.tracer and self.tracer.close() + + def start_span(self, operation_name=None, + child_of=None, references=None, tags=None, + start_time=None, ignore_active_span=False): + return self.tracer.start_span(operation_name, child_of, + references, tags, start_time, ignore_active_span) From a6a1ff2f13dbdadb178ae91582a50b50df12e9a2 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 26 Sep 2019 19:23:15 +0800 Subject: [PATCH 041/126] add routing span --- mishards/service_handler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 72ae73932c..cafe4be60f 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -124,7 +124,11 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def _do_query(self, context, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): metadata = kwargs.get('metadata', None) range_array = [self._range_to_date(r, metadata=metadata) for r in range_array] if range_array else None - routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) + + routing = {} + with self.tracer.start_span('get_routing', + child_of=context.get_active_span().context): + routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) logger.info('Routing: {}'.format(routing)) metadata = kwargs.get('metadata', None) From 81a78a40cb9647d78b59505997f0e02ba936e737 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 27 Sep 2019 10:21:17 +0800 Subject: [PATCH 042/126] more detail tracing in search --- mishards/service_handler.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index cafe4be60f..ddff2903b8 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -145,7 +145,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): conn = self.query_conn(addr, metadata=metadata) start = time.time() - with self.tracer.start_span('search_{}_span'.format(addr), + span = kwargs.get('span', None) + span = span if span else context.get_active_span().context + with self.tracer.start_span('search_{}'.format(addr), child_of=context.get_active_span().context): ret = conn.search_vectors_in_files(table_name=query_params['table_id'], file_ids=query_params['file_ids'], @@ -158,13 +160,15 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): all_topk_results.append(ret) - with ThreadPoolExecutor(max_workers=workers) as pool: - for addr, params in routing.items(): - res = pool.submit(search, addr, params, vectors, topk, nprobe) - rs.append(res) + with self.tracer.start_span('do_search', + child_of=context.get_active_span().context) as span: + with ThreadPoolExecutor(max_workers=workers) as pool: + for addr, params in routing.items(): + res = pool.submit(search, addr, params, vectors, topk, nprobe, span=span) + rs.append(res) - for res in rs: - res.result() + for res in rs: + res.result() reverse = table_meta.metric_type == types.MetricType.IP with self.tracer.start_span('do_merge', From 98d49b803d76daf40a3bfc5c2f142ba29ddc0433 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 27 Sep 2019 11:29:22 +0800 Subject: [PATCH 043/126] update for proto update --- mishards/exception_handlers.py | 5 +---- mishards/grpc_utils/grpc_args_parser.py | 5 +++-- mishards/service_handler.py | 30 ++++++++----------------- requirements.txt | 4 ++-- 4 files changed, 15 insertions(+), 29 deletions(-) diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py index a2659f91af..16ba34a3b1 100644 --- a/mishards/exception_handlers.py +++ b/mishards/exception_handlers.py @@ -36,11 +36,8 @@ def resp_handler(err, error_code): return resp_class(status=status, string_reply='') if resp_class == milvus_pb2.TableSchema: - table_name = milvus_pb2.TableName( - status=status - ) return milvus_pb2.TableSchema( - table_name=table_name + status=status ) if resp_class == milvus_pb2.IndexParam: diff --git a/mishards/grpc_utils/grpc_args_parser.py b/mishards/grpc_utils/grpc_args_parser.py index c8dc9d71d9..039299803d 100644 --- a/mishards/grpc_utils/grpc_args_parser.py +++ b/mishards/grpc_utils/grpc_args_parser.py @@ -21,7 +21,8 @@ class GrpcArgsParser(object): @error_status def parse_proto_TableSchema(cls, param): _table_schema = { - 'table_name': param.table_name.table_name, + 'status': param.status, + 'table_name': param.table_name, 'dimension': param.dimension, 'index_file_size': param.index_file_size, 'metric_type': param.metric_type @@ -47,7 +48,7 @@ class GrpcArgsParser(object): @classmethod @error_status def parse_proto_IndexParam(cls, param): - _table_name = param.table_name.table_name + _table_name = param.table_name _status, _index = cls.parse_proto_Index(param.index) if not _status.OK(): diff --git a/mishards/service_handler.py b/mishards/service_handler.py index ddff2903b8..81217b52be 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -311,11 +311,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - table_name = milvus_pb2.TableName( - status=status_pb2.Status(error_code=_status.code, reason=_status.message) - ) return milvus_pb2.TableSchema( - table_name=table_name + status=status_pb2.Status(error_code=_status.code, reason=_status.message), ) metadata = { @@ -326,22 +323,17 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table = self.connection(metadata=metadata).describe_table(_table_name) if _status.OK(): - _grpc_table_name = milvus_pb2.TableName( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - table_name=_table.table_name - ) - return milvus_pb2.TableSchema( - table_name=_grpc_table_name, + table_name=_table_name, index_file_size=_table.index_file_size, dimension=_table.dimension, - metric_type=_table.metric_type + metric_type=_table.metric_type, + status=status_pb2.Status(error_code=_status.code, reason=_status.message), ) return milvus_pb2.TableSchema( - table_name=milvus_pb2.TableName( - status=status_pb2.Status(error_code=_status.code, reason=_status.message) - ) + table_name=_table_name, + status=status_pb2.Status(error_code=_status.code, reason=_status.message), ) @mark_grpc_method @@ -398,14 +390,10 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): } _status, _results = self.connection(metadata=metadata).show_tables() - if not _status.OK(): - _results = [] - - for _result in _results: - yield milvus_pb2.TableName( + return milvus_pb2.TableNameList( status=status_pb2.Status(error_code=_status.code, reason=_status.message), - table_name=_result - ) + table_names=_results + ) @mark_grpc_method def DeleteByRange(self, request, context): diff --git a/requirements.txt b/requirements.txt index 03db7aeed3..e94f8d1597 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,8 +14,8 @@ py==1.8.0 pyasn1==0.4.7 pyasn1-modules==0.2.6 pylint==2.3.1 -#pymilvus-test==0.2.15 -pymilvus==0.2.0 +pymilvus-test==0.2.15 +#pymilvus==0.2.0 pyparsing==2.4.0 pytest==4.6.3 pytest-level==0.1.1 From 76581d0641f55907f0dd7d8a5b35b4f8b1175e11 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 27 Sep 2019 11:39:24 +0800 Subject: [PATCH 044/126] update DecribeIndex for proto changes --- mishards/service_handler.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 81217b52be..60d64cef37 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -426,9 +426,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): if not _status.OK(): return milvus_pb2.IndexParam( - table_name=milvus_pb2.TableName( status=status_pb2.Status(error_code=_status.code, reason=_status.message) - ) ) metadata = { @@ -439,11 +437,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _index_param = self.connection(metadata=metadata).describe_index(_table_name) _index = milvus_pb2.Index(index_type=_index_param._index_type, nlist=_index_param._nlist) - _tablename = milvus_pb2.TableName( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - table_name=_table_name) - return milvus_pb2.IndexParam(table_name=_tablename, index=_index) + return milvus_pb2.IndexParam(status=status_pb2.Status(error_code=_status.code, reason=_status.message), + table_name=_table_name, index=_index) @mark_grpc_method def DropIndex(self, request, context): From 663f9a2312997fda9dad71135a49dd307b20898e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 27 Sep 2019 14:03:46 +0800 Subject: [PATCH 045/126] small refactor in server --- mishards/grpc_utils/__init__.py | 5 +++++ mishards/server.py | 14 +++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/mishards/grpc_utils/__init__.py b/mishards/grpc_utils/__init__.py index ba9a5e175d..550913ed60 100644 --- a/mishards/grpc_utils/__init__.py +++ b/mishards/grpc_utils/__init__.py @@ -28,3 +28,8 @@ class GrpcSpanDecorator(SpanDecorator): def mark_grpc_method(func): setattr(func, 'grpc_method', True) return func + +def is_grpc_method(func): + if not func: + return False + return getattr(func, 'grpc_method', False) diff --git a/mishards/server.py b/mishards/server.py index 876424089c..1f72a8812d 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -10,6 +10,7 @@ from grpc._cython import cygrpc from grpc._channel import _Rendezvous, _UnaryUnaryMultiCallable from jaeger_client import Config from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server +from mishards.grpc_utils import is_grpc_method from mishards.service_handler import ServiceHandler from mishards import settings, discover @@ -75,7 +76,7 @@ class Server: discover.start() def start(self, port=None): - handler_class = self.add_error_handlers(ServiceHandler) + handler_class = self.decorate_handler(ServiceHandler) add_MilvusServiceServicer_to_server(handler_class(conn_mgr=self.conn_mgr, tracer=self.tracer), self.server_impl) self.server_impl.add_insecure_port("[::]:{}".format(str(port or self._port))) self.server_impl.start() @@ -102,9 +103,8 @@ class Server: self.tracer.close() logger.info('Server is closed') - def add_error_handlers(self, target): - for key, attr in target.__dict__.items(): - is_grpc_method = getattr(attr, 'grpc_method', False) - if is_grpc_method: - setattr(target, key, self.wrap_method_with_errorhandler(attr)) - return target + def decorate_handler(self, handler): + for key, attr in handler.__dict__.items(): + if is_grpc_method(attr): + setattr(handler, key, self.wrap_method_with_errorhandler(attr)) + return handler From 7220af2cd172ac6a4304b75f6f5e48d409671e70 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 28 Sep 2019 11:01:52 +0800 Subject: [PATCH 046/126] refactor settings --- mishards/__init__.py | 39 +++++++++++++++++++++++---------------- mishards/main.py | 4 ++-- mishards/server.py | 9 ++++++--- mishards/settings.py | 12 +++++++++--- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/mishards/__init__.py b/mishards/__init__.py index c1cea84861..76f3168b51 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -2,22 +2,29 @@ from mishards import settings from mishards.db_base import DB db = DB() -db.init_db(uri=settings.SQLALCHEMY_DATABASE_URI, echo=settings.SQL_ECHO) - -from mishards.connections import ConnectionMgr -connect_mgr = ConnectionMgr() - -from sd import ProviderManager - -sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) -discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) - -from tracing.factory import TracerFactory -from grpc_utils import GrpcSpanDecorator -tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig, - span_decorator=GrpcSpanDecorator()) from mishards.server import Server -grpc_server = Server(conn_mgr=connect_mgr, tracer=tracer) +grpc_server = Server() -from mishards import exception_handlers +def create_app(testing_config=None): + config = testing_config if testing_config else settings.DefaultConfig + db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) + + from mishards.connections import ConnectionMgr + connect_mgr = ConnectionMgr() + + from sd import ProviderManager + + sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) + discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) + + from tracing.factory import TracerFactory + from grpc_utils import GrpcSpanDecorator + tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig, + span_decorator=GrpcSpanDecorator()) + + grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, discover=discover) + + from mishards import exception_handlers + + return grpc_server diff --git a/mishards/main.py b/mishards/main.py index 7fac55dfa2..9197fbf598 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -2,10 +2,10 @@ import os, sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from mishards import ( - settings, - grpc_server as server) + settings, create_app) def main(): + server = create_app() server.run(port=settings.SERVER_PORT) return 0 diff --git a/mishards/server.py b/mishards/server.py index 1f72a8812d..0ca4a8f866 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -12,20 +12,23 @@ from jaeger_client import Config from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server from mishards.grpc_utils import is_grpc_method from mishards.service_handler import ServiceHandler -from mishards import settings, discover +from mishards import settings logger = logging.getLogger(__name__) class Server: - def __init__(self, conn_mgr, tracer, port=19530, max_workers=10, **kwargs): + def __init__(self): self.pre_run_handlers = set() self.grpc_methods = set() self.error_handlers = {} self.exit_flag = False + + def init_app(self, conn_mgr, tracer, discover, port=19530, max_workers=10, **kwargs): self.port = int(port) self.conn_mgr = conn_mgr self.tracer = tracer + self.discover = discover self.server_impl = grpc.server( thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), @@ -73,7 +76,7 @@ class Server: def on_pre_run(self): for handler in self.pre_run_handlers: handler() - discover.start() + self.discover.start() def start(self, port=None): handler_class = self.decorate_handler(ServiceHandler) diff --git a/mishards/settings.py b/mishards/settings.py index 4a70d44561..b42cb791f6 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -16,9 +16,6 @@ TIMEZONE = env.str('TIMEZONE', 'UTC') from utils.logger_helper import config config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) -SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') -SQL_ECHO = env.bool('SQL_ECHO', False) - TIMEOUT = env.int('TIMEOUT', 60) MAX_RETRY = env.int('MAX_RETRY', 3) SEARCH_WORKER_SIZE = env.int('SEARCH_WORKER_SIZE', 10) @@ -63,6 +60,15 @@ class TracingConfig: 'logging': env.bool('TRACING_LOGGING', True) } +class DefaultConfig: + SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') + SQL_ECHO = env.bool('SQL_ECHO', False) + +# class TestingConfig(DefaultConfig): +# SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') +# SQL_ECHO = env.bool('SQL_TEST_ECHO', False) + + if __name__ == '__main__': import logging logger = logging.getLogger(__name__) From 4051cf7e07b54d79c6303f8b0fb7f9311aadd850 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 28 Sep 2019 11:22:46 +0800 Subject: [PATCH 047/126] update for testing config --- mishards/__init__.py | 4 ++++ mishards/db_base.py | 4 ++++ mishards/main.py | 2 +- mishards/server.py | 1 - mishards/settings.py | 8 +++++--- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/mishards/__init__.py b/mishards/__init__.py index 76f3168b51..8682b6eba6 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -7,8 +7,12 @@ from mishards.server import Server grpc_server = Server() def create_app(testing_config=None): + import logging + logger = logging.getLogger() + config = testing_config if testing_config else settings.DefaultConfig db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) + logger.info(db) from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() diff --git a/mishards/db_base.py b/mishards/db_base.py index 3b2c699864..1006f21f55 100644 --- a/mishards/db_base.py +++ b/mishards/db_base.py @@ -24,6 +24,10 @@ class DB: self.uri = uri self.session = sessionmaker() self.session.configure(bind=self.engine) + self.url = url + + def __str__(self): + return ''.format(self.url.get_backend_name(), self.url.database) @property def Session(self): diff --git a/mishards/main.py b/mishards/main.py index 9197fbf598..5d8db0a179 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -5,7 +5,7 @@ from mishards import ( settings, create_app) def main(): - server = create_app() + server = create_app(settings.TestingConfig if settings.TESTING else settings.DefaultConfig) server.run(port=settings.SERVER_PORT) return 0 diff --git a/mishards/server.py b/mishards/server.py index 0ca4a8f866..c044bbb7ad 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -90,7 +90,6 @@ class Server: self.on_pre_run() self.start(port) - logger.info('Successfully') logger.info('Listening on port {}'.format(port)) try: diff --git a/mishards/settings.py b/mishards/settings.py index b42cb791f6..71e94b76a2 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -64,9 +64,11 @@ class DefaultConfig: SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') SQL_ECHO = env.bool('SQL_ECHO', False) -# class TestingConfig(DefaultConfig): -# SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') -# SQL_ECHO = env.bool('SQL_TEST_ECHO', False) +TESTING = env.bool('TESTING', False) +if TESTING: + class TestingConfig(DefaultConfig): + SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') + SQL_ECHO = env.bool('SQL_TEST_ECHO', False) if __name__ == '__main__': From 71231205659444422fcc505c4cd7d5cadae70aa7 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 28 Sep 2019 15:54:55 +0800 Subject: [PATCH 048/126] update db session and related factory impl --- mishards/__init__.py | 7 +++---- mishards/db_base.py | 13 ++++++++++--- mishards/factories.py | 4 ++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/mishards/__init__.py b/mishards/__init__.py index 8682b6eba6..b351986cba 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -1,4 +1,6 @@ +import logging from mishards import settings +logger = logging.getLogger() from mishards.db_base import DB db = DB() @@ -7,9 +9,6 @@ from mishards.server import Server grpc_server = Server() def create_app(testing_config=None): - import logging - logger = logging.getLogger() - config = testing_config if testing_config else settings.DefaultConfig db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) logger.info(db) @@ -23,7 +22,7 @@ def create_app(testing_config=None): discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) from tracing.factory import TracerFactory - from grpc_utils import GrpcSpanDecorator + from mishards.grpc_utils import GrpcSpanDecorator tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig, span_decorator=GrpcSpanDecorator()) diff --git a/mishards/db_base.py b/mishards/db_base.py index 1006f21f55..b1492aa8f5 100644 --- a/mishards/db_base.py +++ b/mishards/db_base.py @@ -3,14 +3,23 @@ from sqlalchemy import create_engine from sqlalchemy.engine.url import make_url from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, scoped_session +from sqlalchemy.orm.session import Session as SessionBase logger = logging.getLogger(__name__) + +class LocalSession(SessionBase): + def __init__(self, db, autocommit=False, autoflush=True, **options): + self.db = db + bind = options.pop('bind', None) or db.engine + SessionBase.__init__(self, autocommit=autocommit, autoflush=autoflush, bind=bind, **options) + class DB: Model = declarative_base() def __init__(self, uri=None, echo=False): self.echo = echo uri and self.init_db(uri, echo) + self.session_factory = scoped_session(sessionmaker(class_=LocalSession, db=self)) def init_db(self, uri, echo=False): url = make_url(uri) @@ -22,8 +31,6 @@ class DB: echo=echo, max_overflow=0) self.uri = uri - self.session = sessionmaker() - self.session.configure(bind=self.engine) self.url = url def __str__(self): @@ -31,7 +38,7 @@ class DB: @property def Session(self): - return self.session() + return self.session_factory() def drop_all(self): self.Model.metadata.drop_all(self.engine) diff --git a/mishards/factories.py b/mishards/factories.py index 5bd059654a..26e9ab2619 100644 --- a/mishards/factories.py +++ b/mishards/factories.py @@ -19,7 +19,7 @@ factory.Faker.add_provider(FakerProvider) class TablesFactory(SQLAlchemyModelFactory): class Meta: model = Tables - sqlalchemy_session = db.Session + sqlalchemy_session = db.session_factory sqlalchemy_session_persistence = 'commit' id = factory.Faker('random_number', digits=16, fix_len=True) @@ -35,7 +35,7 @@ class TablesFactory(SQLAlchemyModelFactory): class TableFilesFactory(SQLAlchemyModelFactory): class Meta: model = TableFiles - sqlalchemy_session = db.Session + sqlalchemy_session = db.session_factory sqlalchemy_session_persistence = 'commit' id = factory.Faker('random_number', digits=16, fix_len=True) From 13bad105e201172d6a072174ffb07ecddf326bfa Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 28 Sep 2019 16:42:05 +0800 Subject: [PATCH 049/126] add unit test --- conftest.py | 22 +++++++++++++++++++++ mishards/test_connections.py | 0 mishards/test_models.py | 38 ++++++++++++++++++++++++++++++++++++ setup.cfg | 4 ++++ 4 files changed, 64 insertions(+) create mode 100644 conftest.py create mode 100644 mishards/test_connections.py create mode 100644 mishards/test_models.py create mode 100644 setup.cfg diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000000..630ff0ba31 --- /dev/null +++ b/conftest.py @@ -0,0 +1,22 @@ +import logging +import pytest +from mishards import settings, db, create_app + +logger = logging.getLogger(__name__) + +def clear_data(session): + meta = db.metadata + for table in reversed(meta.sorted_tables): + session.execute(table.delete()) + session.commit() + +# @pytest.fixture(scope="module") +@pytest.fixture +def app(request): + app = create_app(settings.TestingConfig) + db.drop_all() + db.create_all() + + yield app + + db.drop_all() diff --git a/mishards/test_connections.py b/mishards/test_connections.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mishards/test_models.py b/mishards/test_models.py new file mode 100644 index 0000000000..85dcc246aa --- /dev/null +++ b/mishards/test_models.py @@ -0,0 +1,38 @@ +import logging +import pytest +from mishards.factories import TableFiles, Tables, TableFilesFactory, TablesFactory +from mishards import db, create_app, settings +from mishards.factories import ( + Tables, TableFiles, + TablesFactory, TableFilesFactory + ) + +logger = logging.getLogger(__name__) + +@pytest.mark.usefixtures('app') +class TestModels: + def test_files_to_search(self): + table = TablesFactory() + new_files_cnt = 5 + to_index_cnt = 10 + raw_cnt = 20 + backup_cnt = 12 + to_delete_cnt = 9 + index_cnt = 8 + new_index_cnt = 6 + new_merge_cnt = 11 + + new_files = TableFilesFactory.create_batch(new_files_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW, date=110) + to_index_files = TableFilesFactory.create_batch(to_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_INDEX, date=110) + raw_files = TableFilesFactory.create_batch(raw_cnt, table=table, file_type=TableFiles.FILE_TYPE_RAW, date=120) + backup_files = TableFilesFactory.create_batch(backup_cnt, table=table, file_type=TableFiles.FILE_TYPE_BACKUP, date=110) + index_files = TableFilesFactory.create_batch(index_cnt, table=table, file_type=TableFiles.FILE_TYPE_INDEX, date=110) + new_index_files = TableFilesFactory.create_batch(new_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW_INDEX, date=110) + new_merge_files = TableFilesFactory.create_batch(new_merge_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW_MERGE, date=110) + to_delete_files = TableFilesFactory.create_batch(to_delete_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_DELETE, date=110) + assert table.files_to_search().count() == raw_cnt + index_cnt + to_index_cnt + + assert table.files_to_search([(100, 115)]).count() == index_cnt + to_index_cnt + assert table.files_to_search([(111, 120)]).count() == 0 + assert table.files_to_search([(111, 121)]).count() == raw_cnt + assert table.files_to_search([(110, 121)]).count() == raw_cnt + index_cnt + to_index_cnt diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000..4a88432914 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,4 @@ +[tool:pytest] +testpaths = mishards +log_cli=true +log_cli_level=info From dd38d54d647816516479782404a9c71805cf05b9 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 28 Sep 2019 17:26:28 +0800 Subject: [PATCH 050/126] add connection tests --- conftest.py | 7 ---- mishards/connections.py | 10 ++--- mishards/test_connections.py | 73 ++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 12 deletions(-) diff --git a/conftest.py b/conftest.py index 630ff0ba31..c4fed5cc7e 100644 --- a/conftest.py +++ b/conftest.py @@ -4,13 +4,6 @@ from mishards import settings, db, create_app logger = logging.getLogger(__name__) -def clear_data(session): - meta = db.metadata - for table in reversed(meta.sorted_tables): - session.execute(table.delete()) - session.commit() - -# @pytest.fixture(scope="module") @pytest.fixture def app(request): app = create_app(settings.TestingConfig) diff --git a/mishards/connections.py b/mishards/connections.py index 35c5d6c3bd..caaf9629dd 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -35,7 +35,7 @@ class Connection: @property def can_retry(self): - return self.retried <= self.max_retry + return self.retried < self.max_retry @property def connected(self): @@ -45,7 +45,7 @@ class Connection: if self.on_retry_func: self.on_retry_func(self) else: - logger.warn('{} is retrying {}'.format(self, self.retried)) + logger.warning('{} is retrying {}'.format(self, self.retried)) def on_connect(self, metadata=None): while not self.connected and self.can_retry: @@ -123,11 +123,11 @@ class ConnectionMgr: return self.on_diff_meta(name, url) def on_same_meta(self, name, url): - # logger.warn('Register same meta: {}:{}'.format(name, url)) + # logger.warning('Register same meta: {}:{}'.format(name, url)) pass def on_diff_meta(self, name, url): - logger.warn('Received {} with diff url={}'.format(name, url)) + logger.warning('Received {} with diff url={}'.format(name, url)) self.metas[name] = url self.conns[name] = {} @@ -136,7 +136,7 @@ class ConnectionMgr: self.conns.pop(name, None) def on_nonexisted_meta(self, name): - logger.warn('Non-existed meta: {}'.format(name)) + logger.warning('Non-existed meta: {}'.format(name)) def register(self, name, url): logger.info('Register Connection: name={};url={}'.format(name, url)) diff --git a/mishards/test_connections.py b/mishards/test_connections.py index e69de29bb2..1f46b60f8b 100644 --- a/mishards/test_connections.py +++ b/mishards/test_connections.py @@ -0,0 +1,73 @@ +import logging +import pytest + +from mishards.connections import (ConnectionMgr, Connection) +from mishards import exceptions + +logger = logging.getLogger(__name__) + +@pytest.mark.usefixtures('app') +class TestConnection: + def test_manager(self): + mgr = ConnectionMgr() + + mgr.register('pod1', '111') + mgr.register('pod2', '222') + mgr.register('pod2', '222') + mgr.register('pod2', '2222') + assert len(mgr.conn_names) == 2 + + mgr.unregister('pod1') + assert len(mgr.conn_names) == 1 + + mgr.unregister('pod2') + assert len(mgr.conn_names) == 0 + + mgr.register('WOSERVER', 'xxxx') + assert len(mgr.conn_names) == 0 + + def test_connection(self): + class Conn: + def __init__(self, state): + self.state = state + def connect(self, uri): + return self.state + def connected(self): + return self.state + FAIL_CONN = Conn(False) + PASS_CONN = Conn(True) + + class Retry: + def __init__(self): + self.times = 0 + + def __call__(self, conn): + self.times += 1 + logger.info('Retrying {}'.format(self.times)) + + class Func(): + def __init__(self): + self.executed = False + def __call__(self): + self.executed = True + + max_retry = 3 + + RetryObj = Retry() + c = Connection('client', uri='', + max_retry=max_retry, + on_retry_func=RetryObj) + c.conn = FAIL_CONN + ff = Func() + this_connect = c.connect(func=ff) + with pytest.raises(exceptions.ConnectionConnectError): + this_connect() + assert RetryObj.times == max_retry + assert not ff.executed + RetryObj = Retry() + + c.conn = PASS_CONN + this_connect = c.connect(func=ff) + this_connect() + assert ff.executed + assert RetryObj.times == 0 From 7d1590c691a8aa518290614de6f9df2ca3af21af Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 28 Sep 2019 17:27:33 +0800 Subject: [PATCH 051/126] remove dummy code --- mishards/connections.py | 56 ----------------------------------------- 1 file changed, 56 deletions(-) diff --git a/mishards/connections.py b/mishards/connections.py index caaf9629dd..22524c3a20 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -152,59 +152,3 @@ class ConnectionMgr: if url is None: return self.on_nonexisted_meta(name) return self.on_unregister_meta(name, url) - - -if __name__ == '__main__': - class Conn: - def __init__(self, state): - self.state = state - - def connect(self, uri): - return self.state - - def connected(self): - return self.state - - fail_conn = Conn(False) - success_conn = Conn(True) - - class Retry: - def __init__(self): - self.times = 0 - - def __call__(self, conn): - self.times += 1 - print('Retrying {}'.format(self.times)) - - - retry_obj = Retry() - c = Connection('client', uri='', on_retry_func=retry_obj) - - def f(): - print('ffffffff') - - # c.conn = fail_conn - # m = c.connect(func=f) - # m() - - c.conn = success_conn - m = c.connect(func=f) - m() - - mgr = ConnectionMgr() - mgr.register('pod1', '111') - mgr.register('pod2', '222') - mgr.register('pod2', '222') - mgr.register('pod2', 'tcp://127.0.0.1:19530') - - pod3 = mgr.conn('pod3') - print(pod3) - - pod2 = mgr.conn('pod2') - print(pod2) - print(pod2.connected) - - mgr.unregister('pod1') - - logger.info(mgr.metas) - logger.info(mgr.conns) From 6d25b23e39e8233b18ec2ac95371aa3abb0f4716 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 28 Sep 2019 18:21:33 +0800 Subject: [PATCH 052/126] update env example --- mishards/.env.example | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/mishards/.env.example b/mishards/.env.example index 22406c7f34..76b1810759 100644 --- a/mishards/.env.example +++ b/mishards/.env.example @@ -1,8 +1,10 @@ -DEBUG=False +DEBUG=True WOSERVER=tcp://127.0.0.1:19530 TESTING_WOSERVER=tcp://127.0.0.1:19530 -SERVER_PORT=19531 +SERVER_PORT=19532 + +SD_PROVIDER=Static SD_NAMESPACE=xp SD_IN_CLUSTER=False @@ -10,5 +12,21 @@ SD_POLL_INTERVAL=5 SD_ROSERVER_POD_PATT=.*-ro-servers-.* SD_LABEL_SELECTOR=tier=ro-servers +SD_STATIC_HOSTS=127.0.0.1 + SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 +#SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_ECHO=True + +TESTING=True +#SQLALCHEMY_DATABASE_TEST_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 +SQLALCHEMY_DATABASE_TEST_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False +SQL_TEST_ECHO=False + +TRACING_TYPE=jaeger +TRACING_SERVICE_NAME=fortest +TRACING_SAMPLER_TYPE=const +TRACING_SAMPLER_PARAM=1 +TRACING_LOG_PAYLOAD=True +#TRACING_SAMPLER_TYPE=probabilistic +#TRACING_SAMPLER_PARAM=0.5 From 498f3e9c8c89a916a8af44f491ccffb8ccd5a068 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 11:20:45 +0800 Subject: [PATCH 053/126] load env example by default --- mishards/settings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mishards/settings.py b/mishards/settings.py index 71e94b76a2..f5028cbbc7 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -1,10 +1,12 @@ import sys import os -from environs import Env +from dotenv import load_dotenv +load_dotenv('./mishards/.env.example') +from environs import Env env = Env() -env.read_env() +env.read_env(override=True) DEBUG = env.bool('DEBUG', False) From bef93edab9921f04d15747a7e245f8649597e4a7 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 11:29:41 +0800 Subject: [PATCH 054/126] update default sql url --- mishards/.env.example | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mishards/.env.example b/mishards/.env.example index 76b1810759..47a4549f04 100644 --- a/mishards/.env.example +++ b/mishards/.env.example @@ -14,8 +14,8 @@ SD_LABEL_SELECTOR=tier=ro-servers SD_STATIC_HOSTS=127.0.0.1 -SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 -#SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False +#SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 +SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_ECHO=True TESTING=True From 71c67f59a3b1d348c0e27c49a642bf64b0227a5a Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 13:42:12 +0800 Subject: [PATCH 055/126] update for code style --- conftest.py | 1 + manager.py | 7 ++- mishards/__init__.py | 5 +- mishards/connections.py | 10 ++-- mishards/db_base.py | 8 ++- mishards/exception_handlers.py | 5 ++ mishards/exceptions.py | 8 +++ mishards/factories.py | 18 +++--- mishards/grpc_utils/__init__.py | 10 ++-- mishards/grpc_utils/grpc_args_wrapper.py | 4 +- mishards/hash_ring.py | 28 +++++----- mishards/main.py | 11 ++-- mishards/models.py | 15 ++--- mishards/server.py | 4 +- mishards/service_handler.py | 64 ++++++++++----------- mishards/settings.py | 10 +++- mishards/test_connections.py | 8 ++- mishards/test_models.py | 7 ++- sd/__init__.py | 1 + sd/kubernetes_provider.py | 71 +++++++++++++----------- sd/static_provider.py | 6 +- tracing/__init__.py | 13 +++-- tracing/factory.py | 12 ++-- utils/__init__.py | 1 + utils/logger_helper.py | 17 ++++-- 25 files changed, 201 insertions(+), 143 deletions(-) diff --git a/conftest.py b/conftest.py index c4fed5cc7e..d6c9f3acc7 100644 --- a/conftest.py +++ b/conftest.py @@ -4,6 +4,7 @@ from mishards import settings, db, create_app logger = logging.getLogger(__name__) + @pytest.fixture def app(request): app = create_app(settings.TestingConfig) diff --git a/manager.py b/manager.py index 31f5894d2d..931c90ebc8 100644 --- a/manager.py +++ b/manager.py @@ -2,6 +2,7 @@ import fire from mishards import db from sqlalchemy import and_ + class DBHandler: @classmethod def create_all(cls): @@ -15,9 +16,9 @@ class DBHandler: def fun(cls, tid): from mishards.factories import TablesFactory, TableFilesFactory, Tables f = db.Session.query(Tables).filter(and_( - Tables.table_id==tid, - Tables.state!=Tables.TO_DELETE) - ).first() + Tables.table_id == tid, + Tables.state != Tables.TO_DELETE) + ).first() print(f) # f1 = TableFilesFactory() diff --git a/mishards/__init__.py b/mishards/__init__.py index b351986cba..47d8adb6e3 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -1,4 +1,4 @@ -import logging +import logging from mishards import settings logger = logging.getLogger() @@ -8,6 +8,7 @@ db = DB() from mishards.server import Server grpc_server = Server() + def create_app(testing_config=None): config = testing_config if testing_config else settings.DefaultConfig db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) @@ -24,7 +25,7 @@ def create_app(testing_config=None): from tracing.factory import TracerFactory from mishards.grpc_utils import GrpcSpanDecorator tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig, - span_decorator=GrpcSpanDecorator()) + span_decorator=GrpcSpanDecorator()) grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, discover=discover) diff --git a/mishards/connections.py b/mishards/connections.py index 22524c3a20..ccd8e7e81b 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -10,6 +10,7 @@ from utils import singleton logger = logging.getLogger(__name__) + class Connection: def __init__(self, name, uri, max_retry=1, error_handlers=None, **kwargs): self.name = name @@ -55,7 +56,7 @@ class Connection: if not self.can_retry and not self.connected: raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry, - metadata=metadata)) + metadata=metadata)) self.retried = 0 @@ -72,6 +73,7 @@ class Connection: raise e return inner + @singleton class ConnectionMgr: def __init__(self): @@ -90,10 +92,10 @@ class ConnectionMgr: if not throw: return None raise exceptions.ConnectionNotFoundError(message='Connection {} not found'.format(name), - metadata=metadata) + metadata=metadata) this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) threaded = { - threading.get_ident() : this_conn + threading.get_ident(): this_conn } self.conns[name] = threaded return this_conn @@ -106,7 +108,7 @@ class ConnectionMgr: if not throw: return None raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name), - metadata=metadata) + metadata=metadata) this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) c[tid] = this_conn return this_conn diff --git a/mishards/db_base.py b/mishards/db_base.py index b1492aa8f5..6fb3aef4e1 100644 --- a/mishards/db_base.py +++ b/mishards/db_base.py @@ -14,8 +14,10 @@ class LocalSession(SessionBase): bind = options.pop('bind', None) or db.engine SessionBase.__init__(self, autocommit=autocommit, autoflush=autoflush, bind=bind, **options) + class DB: Model = declarative_base() + def __init__(self, uri=None, echo=False): self.echo = echo uri and self.init_db(uri, echo) @@ -27,9 +29,9 @@ class DB: self.engine = create_engine(url) else: self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, - pool_pre_ping=True, - echo=echo, - max_overflow=0) + pool_pre_ping=True, + echo=echo, + max_overflow=0) self.uri = uri self.url = url diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py index 16ba34a3b1..1e5ffb3529 100644 --- a/mishards/exception_handlers.py +++ b/mishards/exception_handlers.py @@ -4,6 +4,7 @@ from mishards import grpc_server as server, exceptions logger = logging.getLogger(__name__) + def resp_handler(err, error_code): if not isinstance(err, exceptions.BaseException): return status_pb2.Status(error_code=error_code, reason=str(err)) @@ -50,21 +51,25 @@ def resp_handler(err, error_code): status.error_code = status_pb2.UNEXPECTED_ERROR return status + @server.errorhandler(exceptions.TableNotFoundError) def TableNotFoundErrorHandler(err): logger.error(err) return resp_handler(err, status_pb2.TABLE_NOT_EXISTS) + @server.errorhandler(exceptions.InvalidArgumentError) def InvalidArgumentErrorHandler(err): logger.error(err) return resp_handler(err, status_pb2.ILLEGAL_ARGUMENT) + @server.errorhandler(exceptions.DBError) def DBErrorHandler(err): logger.error(err) return resp_handler(err, status_pb2.UNEXPECTED_ERROR) + @server.errorhandler(exceptions.InvalidRangeError) def InvalidArgumentErrorHandler(err): logger.error(err) diff --git a/mishards/exceptions.py b/mishards/exceptions.py index 2aa2b39eb9..acd9372d6a 100644 --- a/mishards/exceptions.py +++ b/mishards/exceptions.py @@ -1,26 +1,34 @@ import mishards.exception_codes as codes + class BaseException(Exception): code = codes.INVALID_CODE message = 'BaseException' + def __init__(self, message='', metadata=None): self.message = self.__class__.__name__ if not message else message self.metadata = metadata + class ConnectionConnectError(BaseException): code = codes.CONNECT_ERROR_CODE + class ConnectionNotFoundError(BaseException): code = codes.CONNECTTION_NOT_FOUND_CODE + class DBError(BaseException): code = codes.DB_ERROR_CODE + class TableNotFoundError(BaseException): code = codes.TABLE_NOT_FOUND_CODE + class InvalidArgumentError(BaseException): code = codes.INVALID_ARGUMENT_CODE + class InvalidRangeError(BaseException): code = codes.INVALID_DATE_RANGE_CODE diff --git a/mishards/factories.py b/mishards/factories.py index 26e9ab2619..c4037fe2d7 100644 --- a/mishards/factories.py +++ b/mishards/factories.py @@ -9,13 +9,16 @@ from faker.providers import BaseProvider from mishards import db from mishards.models import Tables, TableFiles + class FakerProvider(BaseProvider): def this_date(self): t = datetime.datetime.today() - return (t.year - 1900) * 10000 + (t.month-1)*100 + t.day + return (t.year - 1900) * 10000 + (t.month - 1) * 100 + t.day + factory.Faker.add_provider(FakerProvider) + class TablesFactory(SQLAlchemyModelFactory): class Meta: model = Tables @@ -24,14 +27,15 @@ class TablesFactory(SQLAlchemyModelFactory): id = factory.Faker('random_number', digits=16, fix_len=True) table_id = factory.Faker('uuid4') - state = factory.Faker('random_element', elements=(0,1,2,3)) - dimension = factory.Faker('random_element', elements=(256,512)) + state = factory.Faker('random_element', elements=(0, 1, 2, 3)) + dimension = factory.Faker('random_element', elements=(256, 512)) created_on = int(time.time()) index_file_size = 0 - engine_type = factory.Faker('random_element', elements=(0,1,2,3)) - metric_type = factory.Faker('random_element', elements=(0,1)) + engine_type = factory.Faker('random_element', elements=(0, 1, 2, 3)) + metric_type = factory.Faker('random_element', elements=(0, 1)) nlist = 16384 + class TableFilesFactory(SQLAlchemyModelFactory): class Meta: model = TableFiles @@ -40,9 +44,9 @@ class TableFilesFactory(SQLAlchemyModelFactory): id = factory.Faker('random_number', digits=16, fix_len=True) table = factory.SubFactory(TablesFactory) - engine_type = factory.Faker('random_element', elements=(0,1,2,3)) + engine_type = factory.Faker('random_element', elements=(0, 1, 2, 3)) file_id = factory.Faker('uuid4') - file_type = factory.Faker('random_element', elements=(0,1,2,3,4)) + file_type = factory.Faker('random_element', elements=(0, 1, 2, 3, 4)) file_size = factory.Faker('random_number') updated_time = int(time.time()) created_on = int(time.time()) diff --git a/mishards/grpc_utils/__init__.py b/mishards/grpc_utils/__init__.py index 550913ed60..f5225b2a66 100644 --- a/mishards/grpc_utils/__init__.py +++ b/mishards/grpc_utils/__init__.py @@ -14,21 +14,23 @@ class GrpcSpanDecorator(SpanDecorator): status = rpc_info.response.status except Exception as e: status = status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, - reason='Should not happen') + reason='Should not happen') if status.error_code == 0: return error_log = {'event': 'error', - 'request': rpc_info.request, - 'response': rpc_info.response - } + 'request': rpc_info.request, + 'response': rpc_info.response + } span.set_tag('error', True) span.log_kv(error_log) + def mark_grpc_method(func): setattr(func, 'grpc_method', True) return func + def is_grpc_method(func): if not func: return False diff --git a/mishards/grpc_utils/grpc_args_wrapper.py b/mishards/grpc_utils/grpc_args_wrapper.py index a864b1e400..7447dbd995 100644 --- a/mishards/grpc_utils/grpc_args_wrapper.py +++ b/mishards/grpc_utils/grpc_args_wrapper.py @@ -1,4 +1,4 @@ # class GrpcArgsWrapper(object): - # @classmethod - # def proto_TableName(cls): \ No newline at end of file +# @classmethod +# def proto_TableName(cls): diff --git a/mishards/hash_ring.py b/mishards/hash_ring.py index bfec108c5c..a97f3f580e 100644 --- a/mishards/hash_ring.py +++ b/mishards/hash_ring.py @@ -9,8 +9,8 @@ else: import md5 md5_constructor = md5.new -class HashRing(object): +class HashRing(object): def __init__(self, nodes=None, weights=None): """`nodes` is a list of objects that have a proper __str__ representation. `weights` is dictionary that sets weights to the nodes. The default @@ -40,13 +40,13 @@ class HashRing(object): if node in self.weights: weight = self.weights.get(node) - factor = math.floor((40*len(self.nodes)*weight) / total_weight); + factor = math.floor((40 * len(self.nodes) * weight) / total_weight) for j in range(0, int(factor)): - b_key = self._hash_digest( '%s-%s' % (node, j) ) + b_key = self._hash_digest('%s-%s' % (node, j)) for i in range(0, 3): - key = self._hash_val(b_key, lambda x: x+i*4) + key = self._hash_val(b_key, lambda x: x + i * 4) self.ring[key] = node self._sorted_keys.append(key) @@ -60,7 +60,7 @@ class HashRing(object): pos = self.get_node_pos(string_key) if pos is None: return None - return self.ring[ self._sorted_keys[pos] ] + return self.ring[self._sorted_keys[pos]] def get_node_pos(self, string_key): """Given a string key a corresponding node in the hash ring is returned @@ -94,6 +94,7 @@ class HashRing(object): yield None, None returned_values = set() + def distinct_filter(value): if str(value) not in returned_values: returned_values.add(str(value)) @@ -121,10 +122,8 @@ class HashRing(object): return self._hash_val(b_key, lambda x: x) def _hash_val(self, b_key, entry_fn): - return (( b_key[entry_fn(3)] << 24) - |(b_key[entry_fn(2)] << 16) - |(b_key[entry_fn(1)] << 8) - | b_key[entry_fn(0)] ) + return (b_key[entry_fn(3)] << 24) | (b_key[entry_fn(2)] << 16) | ( + b_key[entry_fn(1)] << 8) | b_key[entry_fn(0)] def _hash_digest(self, key): m = md5_constructor() @@ -132,12 +131,13 @@ class HashRing(object): m.update(key) return m.digest() + if __name__ == '__main__': from collections import defaultdict - servers = ['192.168.0.246:11212', - '192.168.0.247:11212', - '192.168.0.248:11212', - '192.168.0.249:11212'] + servers = [ + '192.168.0.246:11212', '192.168.0.247:11212', '192.168.0.248:11212', + '192.168.0.249:11212' + ] ring = HashRing(servers) keys = ['{}'.format(i) for i in range(100)] @@ -146,5 +146,5 @@ if __name__ == '__main__': server = ring.get_node(k) mapped[server].append(k) - for k,v in mapped.items(): + for k, v in mapped.items(): print(k, v) diff --git a/mishards/main.py b/mishards/main.py index 5d8db0a179..3f69484ee4 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -1,13 +1,16 @@ -import os, sys +import os +import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from mishards import ( - settings, create_app) +from mishards import (settings, create_app) + def main(): - server = create_app(settings.TestingConfig if settings.TESTING else settings.DefaultConfig) + server = create_app( + settings.TestingConfig if settings.TESTING else settings.DefaultConfig) server.run(port=settings.SERVER_PORT) return 0 + if __name__ == '__main__': sys.exit(main()) diff --git a/mishards/models.py b/mishards/models.py index 0f7bb603ae..54cf5f8ed9 100644 --- a/mishards/models.py +++ b/mishards/models.py @@ -1,13 +1,14 @@ import logging from sqlalchemy import (Integer, Boolean, Text, - String, BigInteger, func, and_, or_, - Column) + String, BigInteger, func, and_, or_, + Column) from sqlalchemy.orm import relationship, backref from mishards import db logger = logging.getLogger(__name__) + class TableFiles(db.Model): FILE_TYPE_NEW = 0 FILE_TYPE_RAW = 1 @@ -57,16 +58,16 @@ class Tables(db.Model): def files_to_search(self, date_range=None): cond = or_( - TableFiles.file_type==TableFiles.FILE_TYPE_RAW, - TableFiles.file_type==TableFiles.FILE_TYPE_TO_INDEX, - TableFiles.file_type==TableFiles.FILE_TYPE_INDEX, + TableFiles.file_type == TableFiles.FILE_TYPE_RAW, + TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX, + TableFiles.file_type == TableFiles.FILE_TYPE_INDEX, ) if date_range: cond = and_( cond, or_( - and_(TableFiles.date>=d[0], TableFiles.date= d[0], TableFiles.date < d[1]) for d in date_range + ) ) files = self.files.filter(cond) diff --git a/mishards/server.py b/mishards/server.py index c044bbb7ad..032d101cba 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -33,7 +33,7 @@ class Server: self.server_impl = grpc.server( thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), options=[(cygrpc.ChannelArgKey.max_send_message_length, -1), - (cygrpc.ChannelArgKey.max_receive_message_length, -1)] + (cygrpc.ChannelArgKey.max_receive_message_length, -1)] ) self.server_impl = self.tracer.decorate(self.server_impl) @@ -46,7 +46,7 @@ class Server: ip = socket.gethostbyname(url.hostname) socket.inet_pton(socket.AF_INET, ip) self.conn_mgr.register('WOSERVER', - '{}://{}:{}'.format(url.scheme, ip, url.port or 80)) + '{}://{}:{}'.format(url.scheme, ip, url.port or 80)) def register_pre_run_handler(self, func): logger.info('Regiterring {} into server pre_run_handlers'.format(func)) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 60d64cef37..2a1e0eef02 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -11,7 +11,7 @@ from concurrent.futures import ThreadPoolExecutor from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 from milvus.grpc_gen.milvus_pb2 import TopKQueryResult from milvus.client.Abstract import Range -from milvus.client import types +from milvus.client import types as Types from mishards import (db, settings, exceptions) from mishards.grpc_utils import mark_grpc_method @@ -24,6 +24,7 @@ logger = logging.getLogger(__name__) class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): MAX_NPROBE = 2048 + def __init__(self, conn_mgr, tracer, *args, **kwargs): self.conn_mgr = conn_mgr self.table_meta = {} @@ -44,8 +45,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return conn.conn def _format_date(self, start, end): - return ((start.year-1900)*10000 + (start.month-1)*100 + start.day - , (end.year-1900)*10000 + (end.month-1)*100 + end.day) + return ((start.year - 1900) * 10000 + (start.month - 1) * 100 + start.day, (end.year - 1900) * 10000 + (end.month - 1) * 100 + end.day) def _range_to_date(self, range_obj, metadata=None): try: @@ -54,8 +54,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): assert start < end except (ValueError, AssertionError): raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( - range_obj.start_date, range_obj.end_date - ), metadata=metadata) + range_obj.start_date, range_obj.end_date + ), metadata=metadata) return self._format_date(start, end) @@ -63,9 +63,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): # PXU TODO: Implement Thread-local Context try: table = db.Session.query(Tables).filter(and_( - Tables.table_id==table_id, - Tables.state!=Tables.TO_DELETE - )).first() + Tables.table_id == table_id, + Tables.state != Tables.TO_DELETE + )).first() except sqlalchemy_exc.SQLAlchemyError as e: raise exceptions.DBError(message=str(e), metadata=metadata) @@ -93,7 +93,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return routing def _do_merge(self, files_n_topk_results, topk, reverse=False, **kwargs): - status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="Success") + status = status_pb2.Status(error_code=status_pb2.SUCCESS, reason="Success") if not files_n_topk_results: return status, [] @@ -107,7 +107,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): for request_pos, each_request_results in enumerate(files_collection.topk_query_result): request_results[request_pos].extend(each_request_results.query_result_arrays) request_results[request_pos] = sorted(request_results[request_pos], key=lambda x: x.distance, - reverse=reverse)[:topk] + reverse=reverse)[:topk] calc_time = time.time() - calc_time logger.info('Merge takes {}'.format(calc_time)) @@ -127,7 +127,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): routing = {} with self.tracer.start_span('get_routing', - child_of=context.get_active_span().context): + child_of=context.get_active_span().context): routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) logger.info('Routing: {}'.format(routing)) @@ -140,28 +140,28 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def search(addr, query_params, vectors, topk, nprobe, **kwargs): logger.info('Send Search Request: addr={};params={};nq={};topk={};nprobe={}'.format( - addr, query_params, len(vectors), topk, nprobe - )) + addr, query_params, len(vectors), topk, nprobe + )) conn = self.query_conn(addr, metadata=metadata) start = time.time() span = kwargs.get('span', None) span = span if span else context.get_active_span().context with self.tracer.start_span('search_{}'.format(addr), - child_of=context.get_active_span().context): + child_of=context.get_active_span().context): ret = conn.search_vectors_in_files(table_name=query_params['table_id'], - file_ids=query_params['file_ids'], - query_records=vectors, - top_k=topk, - nprobe=nprobe, - lazy=True) + file_ids=query_params['file_ids'], + query_records=vectors, + top_k=topk, + nprobe=nprobe, + lazy=True) end = time.time() logger.info('search_vectors_in_files takes: {}'.format(end - start)) all_topk_results.append(ret) with self.tracer.start_span('do_search', - child_of=context.get_active_span().context) as span: + child_of=context.get_active_span().context) as span: with ThreadPoolExecutor(max_workers=workers) as pool: for addr, params in routing.items(): res = pool.submit(search, addr, params, vectors, topk, nprobe, span=span) @@ -170,9 +170,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): for res in rs: res.result() - reverse = table_meta.metric_type == types.MetricType.IP + reverse = table_meta.metric_type == Types.MetricType.IP with self.tracer.start_span('do_merge', - child_of=context.get_active_span().context): + child_of=context.get_active_span().context): return self._do_merge(all_topk_results, topk, reverse=reverse, metadata=metadata) @mark_grpc_method @@ -201,8 +201,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('HasTable {}'.format(_table_name)) _bool = self.connection(metadata={ - 'resp_class': milvus_pb2.BoolReply - }).has_table(_table_name) + 'resp_class': milvus_pb2.BoolReply + }).has_table(_table_name) return milvus_pb2.BoolReply( status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="OK"), @@ -244,7 +244,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' _status, _ids = self.connection(metadata={ 'resp_class': milvus_pb2.VectorIds - }).add_vectors(None, None, insert_param=request) + }).add_vectors(None, None, insert_param=request) return milvus_pb2.VectorIds( status=status_pb2.Status(error_code=_status.code, reason=_status.message), vector_id_array=_ids @@ -266,7 +266,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): if nprobe > self.MAX_NPROBE or nprobe <= 0: raise exceptions.InvalidArgumentError(message='Invalid nprobe: {}'.format(nprobe), - metadata=metadata) + metadata=metadata) table_meta = self.table_meta.get(table_name, None) @@ -332,8 +332,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): ) return milvus_pb2.TableSchema( - table_name=_table_name, - status=status_pb2.Status(error_code=_status.code, reason=_status.message), + table_name=_table_name, + status=status_pb2.Status(error_code=_status.code, reason=_status.message), ) @mark_grpc_method @@ -391,8 +391,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _results = self.connection(metadata=metadata).show_tables() return milvus_pb2.TableNameList( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - table_names=_results + status=status_pb2.Status(error_code=_status.code, reason=_status.message), + table_names=_results ) @mark_grpc_method @@ -426,7 +426,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): if not _status.OK(): return milvus_pb2.IndexParam( - status=status_pb2.Status(error_code=_status.code, reason=_status.message) + status=status_pb2.Status(error_code=_status.code, reason=_status.message) ) metadata = { @@ -439,7 +439,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _index = milvus_pb2.Index(index_type=_index_param._index_type, nlist=_index_param._nlist) return milvus_pb2.IndexParam(status=status_pb2.Status(error_code=_status.code, reason=_status.message), - table_name=_table_name, index=_index) + table_name=_table_name, index=_index) @mark_grpc_method def DropIndex(self, request, context): diff --git a/mishards/settings.py b/mishards/settings.py index f5028cbbc7..4563538a08 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -39,13 +39,15 @@ if SD_PROVIDER == 'Kubernetes': elif SD_PROVIDER == 'Static': from sd.static_provider import StaticProviderSettings SD_PROVIDER_SETTINGS = StaticProviderSettings( - hosts=env.list('SD_STATIC_HOSTS', []) - ) + hosts=env.list('SD_STATIC_HOSTS', []) + ) TESTING = env.bool('TESTING', False) TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') TRACING_TYPE = env.str('TRACING_TYPE', '') + + class TracingConfig: TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') TRACING_VALIDATE = env.bool('TRACING_VALIDATE', True) @@ -54,7 +56,7 @@ class TracingConfig: 'sampler': { 'type': env.str('TRACING_SAMPLER_TYPE', 'const'), 'param': env.str('TRACING_SAMPLER_PARAM', "1"), - }, + }, 'local_agent': { 'reporting_host': env.str('TRACING_REPORTING_HOST', '127.0.0.1'), 'reporting_port': env.str('TRACING_REPORTING_PORT', '5775') @@ -62,10 +64,12 @@ class TracingConfig: 'logging': env.bool('TRACING_LOGGING', True) } + class DefaultConfig: SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') SQL_ECHO = env.bool('SQL_ECHO', False) + TESTING = env.bool('TESTING', False) if TESTING: class TestingConfig(DefaultConfig): diff --git a/mishards/test_connections.py b/mishards/test_connections.py index 1f46b60f8b..f1c54f0c61 100644 --- a/mishards/test_connections.py +++ b/mishards/test_connections.py @@ -6,6 +6,7 @@ from mishards import exceptions logger = logging.getLogger(__name__) + @pytest.mark.usefixtures('app') class TestConnection: def test_manager(self): @@ -30,8 +31,10 @@ class TestConnection: class Conn: def __init__(self, state): self.state = state + def connect(self, uri): return self.state + def connected(self): return self.state FAIL_CONN = Conn(False) @@ -48,6 +51,7 @@ class TestConnection: class Func(): def __init__(self): self.executed = False + def __call__(self): self.executed = True @@ -55,8 +59,8 @@ class TestConnection: RetryObj = Retry() c = Connection('client', uri='', - max_retry=max_retry, - on_retry_func=RetryObj) + max_retry=max_retry, + on_retry_func=RetryObj) c.conn = FAIL_CONN ff = Func() this_connect = c.connect(func=ff) diff --git a/mishards/test_models.py b/mishards/test_models.py index 85dcc246aa..d60b62713e 100644 --- a/mishards/test_models.py +++ b/mishards/test_models.py @@ -3,12 +3,13 @@ import pytest from mishards.factories import TableFiles, Tables, TableFilesFactory, TablesFactory from mishards import db, create_app, settings from mishards.factories import ( - Tables, TableFiles, - TablesFactory, TableFilesFactory - ) + Tables, TableFiles, + TablesFactory, TableFilesFactory +) logger = logging.getLogger(__name__) + @pytest.mark.usefixtures('app') class TestModels: def test_files_to_search(self): diff --git a/sd/__init__.py b/sd/__init__.py index 6dfba5ddc1..7943887d0f 100644 --- a/sd/__init__.py +++ b/sd/__init__.py @@ -24,4 +24,5 @@ class ProviderManager: def get_provider(cls, name): return cls.PROVIDERS.get(name, None) + from sd import kubernetes_provider, static_provider diff --git a/sd/kubernetes_provider.py b/sd/kubernetes_provider.py index 51665a0cb5..924f1fc8a4 100644 --- a/sd/kubernetes_provider.py +++ b/sd/kubernetes_provider.py @@ -1,4 +1,5 @@ -import os, sys +import os +import sys if __name__ == '__main__': sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -71,7 +72,6 @@ class K8SHeartbeatHandler(threading.Thread, K8SMixin): self.queue.put(event_message) - except Exception as exc: logger.error(exc) @@ -98,18 +98,18 @@ class K8SEventListener(threading.Thread, K8SMixin): resource_version = '' w = watch.Watch() for event in w.stream(self.v1.list_namespaced_event, namespace=self.namespace, - field_selector='involvedObject.kind=Pod'): + field_selector='involvedObject.kind=Pod'): if self.terminate: break resource_version = int(event['object'].metadata.resource_version) info = dict( - eType='WatchEvent', - pod=event['object'].involved_object.name, - reason=event['object'].reason, - message=event['object'].message, - start_up=self.at_start_up, + eType='WatchEvent', + pod=event['object'].involved_object.name, + reason=event['object'].reason, + message=event['object'].message, + start_up=self.at_start_up, ) self.at_start_up = False # logger.info('Received event: {}'.format(info)) @@ -135,7 +135,7 @@ class EventHandler(threading.Thread): def on_pod_started(self, event, **kwargs): try_cnt = 3 pod = None - while try_cnt > 0: + while try_cnt > 0: try_cnt -= 1 try: pod = self.mgr.v1.read_namespaced_pod(name=event['pod'], namespace=self.namespace) @@ -203,6 +203,7 @@ class EventHandler(threading.Thread): except queue.Empty: continue + class KubernetesProviderSettings: def __init__(self, namespace, pod_patt, label_selector, in_cluster, poll_interval, **kwargs): self.namespace = namespace @@ -211,10 +212,12 @@ class KubernetesProviderSettings: self.in_cluster = in_cluster self.poll_interval = poll_interval + @singleton @ProviderManager.register_service_provider class KubernetesProvider(object): NAME = 'Kubernetes' + def __init__(self, settings, conn_mgr, **kwargs): self.namespace = settings.namespace self.pod_patt = settings.pod_patt @@ -233,27 +236,27 @@ class KubernetesProvider(object): self.v1 = client.CoreV1Api() self.listener = K8SEventListener( - message_queue=self.queue, - namespace=self.namespace, - in_cluster=self.in_cluster, - v1=self.v1, - **kwargs - ) + message_queue=self.queue, + namespace=self.namespace, + in_cluster=self.in_cluster, + v1=self.v1, + **kwargs + ) self.pod_heartbeater = K8SHeartbeatHandler( - message_queue=self.queue, - namespace=self.namespace, - label_selector=self.label_selector, - in_cluster=self.in_cluster, - v1=self.v1, - poll_interval=self.poll_interval, - **kwargs - ) + message_queue=self.queue, + namespace=self.namespace, + label_selector=self.label_selector, + in_cluster=self.in_cluster, + v1=self.v1, + poll_interval=self.poll_interval, + **kwargs + ) self.event_handler = EventHandler(mgr=self, - message_queue=self.queue, - namespace=self.namespace, - pod_patt=self.pod_patt, **kwargs) + message_queue=self.queue, + namespace=self.namespace, + pod_patt=self.pod_patt, **kwargs) def add_pod(self, name, ip): self.conn_mgr.register(name, 'tcp://{}:19530'.format(ip)) @@ -276,9 +279,11 @@ class KubernetesProvider(object): if __name__ == '__main__': logging.basicConfig(level=logging.INFO) + class Connect: def register(self, name, value): logger.error('Register: {} - {}'.format(name, value)) + def unregister(self, name): logger.error('Unregister: {}'.format(name)) @@ -289,16 +294,16 @@ if __name__ == '__main__': connect_mgr = Connect() settings = KubernetesProviderSettings( - namespace='xp', - pod_patt=".*-ro-servers-.*", - label_selector='tier=ro-servers', - poll_interval=5, - in_cluster=False) + namespace='xp', + pod_patt=".*-ro-servers-.*", + label_selector='tier=ro-servers', + poll_interval=5, + in_cluster=False) provider_class = ProviderManager.get_provider('Kubernetes') t = provider_class(conn_mgr=connect_mgr, - settings=settings - ) + settings=settings + ) t.start() cnt = 100 while cnt > 0: diff --git a/sd/static_provider.py b/sd/static_provider.py index 423d6c4d60..5c97c4efd0 100644 --- a/sd/static_provider.py +++ b/sd/static_provider.py @@ -1,4 +1,5 @@ -import os, sys +import os +import sys if __name__ == '__main__': sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -6,14 +7,17 @@ import socket from utils import singleton from sd import ProviderManager + class StaticProviderSettings: def __init__(self, hosts): self.hosts = hosts + @singleton @ProviderManager.register_service_provider class KubernetesProvider(object): NAME = 'Static' + def __init__(self, settings, conn_mgr, **kwargs): self.conn_mgr = conn_mgr self.hosts = [socket.gethostbyname(host) for host in settings.hosts] diff --git a/tracing/__init__.py b/tracing/__init__.py index 27c57473db..5014309a52 100644 --- a/tracing/__init__.py +++ b/tracing/__init__.py @@ -1,13 +1,14 @@ def empty_server_interceptor_decorator(target_server, interceptor): return target_server + class Tracer: def __init__(self, tracer=None, - interceptor=None, - server_decorator=empty_server_interceptor_decorator): + interceptor=None, + server_decorator=empty_server_interceptor_decorator): self.tracer = tracer self.interceptor = interceptor - self.server_decorator=server_decorator + self.server_decorator = server_decorator def decorate(self, server): return self.server_decorator(server, self.interceptor) @@ -16,7 +17,7 @@ class Tracer: self.tracer and self.tracer.close() def start_span(self, operation_name=None, - child_of=None, references=None, tags=None, - start_time=None, ignore_active_span=False): + child_of=None, references=None, tags=None, + start_time=None, ignore_active_span=False): return self.tracer.start_span(operation_name, child_of, - references, tags, start_time, ignore_active_span) + references, tags, start_time, ignore_active_span) diff --git a/tracing/factory.py b/tracing/factory.py index fd06fe3cac..648dfa291e 100644 --- a/tracing/factory.py +++ b/tracing/factory.py @@ -4,7 +4,7 @@ from grpc_opentracing.grpcext import intercept_server from grpc_opentracing import open_tracing_server_interceptor from tracing import (Tracer, - empty_server_interceptor_decorator) + empty_server_interceptor_decorator) logger = logging.getLogger(__name__) @@ -17,14 +17,14 @@ class TracerFactory: if tracer_type.lower() == 'jaeger': config = Config(config=tracer_config.TRACING_CONFIG, - service_name=tracer_config.TRACING_SERVICE_NAME, - validate=tracer_config.TRACING_VALIDATE - ) + service_name=tracer_config.TRACING_SERVICE_NAME, + validate=tracer_config.TRACING_VALIDATE + ) tracer = config.initialize_tracer() tracer_interceptor = open_tracing_server_interceptor(tracer, - log_payloads=tracer_config.TRACING_LOG_PAYLOAD, - span_decorator=span_decorator) + log_payloads=tracer_config.TRACING_LOG_PAYLOAD, + span_decorator=span_decorator) return Tracer(tracer, tracer_interceptor, intercept_server) diff --git a/utils/__init__.py b/utils/__init__.py index ec7f32bcbc..c1d55e76c0 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,5 +1,6 @@ from functools import wraps + def singleton(cls): instances = {} @wraps(cls) diff --git a/utils/logger_helper.py b/utils/logger_helper.py index 1b59aa40ec..55ce3206ab 100644 --- a/utils/logger_helper.py +++ b/utils/logger_helper.py @@ -9,18 +9,22 @@ class InfoFilter(logging.Filter): def filter(self, rec): return rec.levelno == logging.INFO + class DebugFilter(logging.Filter): def filter(self, rec): return rec.levelno == logging.DEBUG + class WarnFilter(logging.Filter): def filter(self, rec): return rec.levelno == logging.WARN + class ErrorFilter(logging.Filter): def filter(self, rec): return rec.levelno == logging.ERROR + class CriticalFilter(logging.Filter): def filter(self, rec): return rec.levelno == logging.CRITICAL @@ -36,6 +40,7 @@ COLORS = { 'ENDC': '\033[0m', } + class ColorFulFormatColMixin: def format_col(self, message_str, level_name): if level_name in COLORS.keys(): @@ -43,12 +48,14 @@ class ColorFulFormatColMixin: 'ENDC') return message_str + class ColorfulFormatter(logging.Formatter, ColorFulFormatColMixin): def format(self, record): message_str = super(ColorfulFormatter, self).format(record) return self.format_col(message_str, level_name=record.levelname) + def config(log_level, log_path, name, tz='UTC'): def build_log_file(level, log_path, name, tz): utc_now = datetime.datetime.utcnow() @@ -56,7 +63,7 @@ def config(log_level, log_path, name, tz='UTC'): local_tz = timezone(tz) tznow = utc_now.replace(tzinfo=utc_tz).astimezone(local_tz) return '{}-{}-{}.log'.format(os.path.join(log_path, name), tznow.strftime("%m-%d-%Y-%H:%M:%S"), - level) + level) if not os.path.exists(log_path): os.makedirs(log_path) @@ -66,10 +73,10 @@ def config(log_level, log_path, name, tz='UTC'): 'disable_existing_loggers': False, 'formatters': { 'default': { - 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)' + 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)' }, 'colorful_console': { - 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)', + 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)', '()': ColorfulFormatter, }, }, @@ -133,8 +140,8 @@ def config(log_level, log_path, name, tz='UTC'): }, 'loggers': { '': { - 'handlers': ['milvus_celery_console', 'milvus_info_file', 'milvus_debug_file', 'milvus_warn_file', \ - 'milvus_error_file', 'milvus_critical_file'], + 'handlers': ['milvus_celery_console', 'milvus_info_file', 'milvus_debug_file', 'milvus_warn_file', + 'milvus_error_file', 'milvus_critical_file'], 'level': log_level, 'propagate': False }, From 4455f539fab8fbf0343b7678a1b1182ac7afb2a3 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 13:54:37 +0800 Subject: [PATCH 056/126] code refactor for unused import --- mishards/connections.py | 2 -- mishards/models.py | 2 +- mishards/server.py | 2 -- mishards/service_handler.py | 1 - sd/kubernetes_provider.py | 5 ++--- 5 files changed, 3 insertions(+), 9 deletions(-) diff --git a/mishards/connections.py b/mishards/connections.py index ccd8e7e81b..22263e9e7e 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -1,8 +1,6 @@ import logging import threading -import socket from functools import wraps -from contextlib import contextmanager from milvus import Milvus from mishards import (settings, exceptions) diff --git a/mishards/models.py b/mishards/models.py index 54cf5f8ed9..4b6c8f9ef4 100644 --- a/mishards/models.py +++ b/mishards/models.py @@ -1,6 +1,6 @@ import logging from sqlalchemy import (Integer, Boolean, Text, - String, BigInteger, func, and_, or_, + String, BigInteger, and_, or_, Column) from sqlalchemy.orm import relationship, backref diff --git a/mishards/server.py b/mishards/server.py index 032d101cba..feb2176e86 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -7,8 +7,6 @@ from urllib.parse import urlparse from functools import wraps from concurrent import futures from grpc._cython import cygrpc -from grpc._channel import _Rendezvous, _UnaryUnaryMultiCallable -from jaeger_client import Config from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server from mishards.grpc_utils import is_grpc_method from mishards.service_handler import ServiceHandler diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 2a1e0eef02..9d851ecfcb 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -1,7 +1,6 @@ import logging import time import datetime -from contextlib import contextmanager from collections import defaultdict from sqlalchemy import and_ diff --git a/sd/kubernetes_provider.py b/sd/kubernetes_provider.py index 924f1fc8a4..8ee1588ec4 100644 --- a/sd/kubernetes_provider.py +++ b/sd/kubernetes_provider.py @@ -9,7 +9,6 @@ import time import copy import threading import queue -from functools import wraps from kubernetes import client, config, watch from utils import singleton @@ -17,7 +16,7 @@ from sd import ProviderManager logger = logging.getLogger(__name__) -incluster_namespace_path = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' +INCLUSTER_NAMESPACE_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' class K8SMixin: @@ -27,7 +26,7 @@ class K8SMixin: self.kwargs = kwargs self.v1 = kwargs.get('v1', None) if not self.namespace: - self.namespace = open(incluster_namespace_path).read() + self.namespace = open(INCLUSTER_NAMESPACE_PATH).read() if not self.v1: config.load_incluster_config() if self.in_cluster else config.load_kube_config() From 7ccab1640f78ceb1555cc3633d5d6d140f693f7f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 15:04:37 +0800 Subject: [PATCH 057/126] update pymilvus version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e94f8d1597..ea338d0723 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ py==1.8.0 pyasn1==0.4.7 pyasn1-modules==0.2.6 pylint==2.3.1 -pymilvus-test==0.2.15 +pymilvus-test==0.2.21 #pymilvus==0.2.0 pyparsing==2.4.0 pytest==4.6.3 From f32d269eed453aa8dab638fc05c6d2f051fa7bd4 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 15:04:55 +0800 Subject: [PATCH 058/126] update for docker-compose --- start_services.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/start_services.yml b/start_services.yml index 5c779c5b82..b2d4d97cb6 100644 --- a/start_services.yml +++ b/start_services.yml @@ -3,7 +3,7 @@ services: milvus: runtime: nvidia restart: always - image: registry.zilliz.com/milvus/engine:branch-0.4.0-release-c58ca6 + image: registry.zilliz.com/milvus/engine:branch-0.5.0-release-4316de # ports: # - "0.0.0.0:19530:19530" volumes: @@ -21,13 +21,13 @@ services: mishards: restart: always - image: registry.zilliz.com/milvus/mishards:v0.0.2 + image: registry.zilliz.com/milvus/mishards:v0.0.3 ports: - "0.0.0.0:19530:19531" - "0.0.0.0:19532:19532" volumes: - /tmp/milvus/db:/tmp/milvus/db - - /tmp/mishards_env:/source/mishards/.env + # - /tmp/mishards_env:/source/mishards/.env command: ["python", "mishards/main.py"] environment: DEBUG: 'true' From fd735cc62efbd29980839454e1113afe95633178 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 15:17:08 +0800 Subject: [PATCH 059/126] change read .env and read .env.example --- mishards/.env.example | 2 +- mishards/settings.py | 18 ++++++++++-------- start_services.yml | 1 + 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/mishards/.env.example b/mishards/.env.example index 47a4549f04..bfea0a3edc 100644 --- a/mishards/.env.example +++ b/mishards/.env.example @@ -18,7 +18,7 @@ SD_STATIC_HOSTS=127.0.0.1 SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_ECHO=True -TESTING=True +TESTING=False #SQLALCHEMY_DATABASE_TEST_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 SQLALCHEMY_DATABASE_TEST_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_TEST_ECHO=False diff --git a/mishards/settings.py b/mishards/settings.py index 4563538a08..1982a508e7 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -1,12 +1,15 @@ import sys import os -from dotenv import load_dotenv -load_dotenv('./mishards/.env.example') - from environs import Env env = Env() -env.read_env(override=True) + +FROM_EXAMPLE = env.bool('FROM_EXAMPLE', False) +if FROM_EXAMPLE: + from dotenv import load_dotenv + load_dotenv('./mishards/.env.example') +else: + env.read_env() DEBUG = env.bool('DEBUG', False) @@ -34,13 +37,11 @@ if SD_PROVIDER == 'Kubernetes': in_cluster=env.bool('SD_IN_CLUSTER', False), poll_interval=env.int('SD_POLL_INTERVAL', 5), pod_patt=env.str('SD_ROSERVER_POD_PATT', ''), - label_selector=env.str('SD_LABEL_SELECTOR', '') - ) + label_selector=env.str('SD_LABEL_SELECTOR', '')) elif SD_PROVIDER == 'Static': from sd.static_provider import StaticProviderSettings SD_PROVIDER_SETTINGS = StaticProviderSettings( - hosts=env.list('SD_STATIC_HOSTS', []) - ) + hosts=env.list('SD_STATIC_HOSTS', [])) TESTING = env.bool('TESTING', False) TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') @@ -72,6 +73,7 @@ class DefaultConfig: TESTING = env.bool('TESTING', False) if TESTING: + class TestingConfig(DefaultConfig): SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') SQL_ECHO = env.bool('SQL_TEST_ECHO', False) diff --git a/start_services.yml b/start_services.yml index b2d4d97cb6..c7a3c36f51 100644 --- a/start_services.yml +++ b/start_services.yml @@ -30,6 +30,7 @@ services: # - /tmp/mishards_env:/source/mishards/.env command: ["python", "mishards/main.py"] environment: + FROM_EXAMPLE: 'true' DEBUG: 'true' SERVER_PORT: 19531 WOSERVER: tcp://milvus:19530 From 4dd19f607d4ff23276864bd1b935fe415eaaa515 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 15:35:29 +0800 Subject: [PATCH 060/126] update build.sh --- build.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 2b3c89bbf9..c46b6a8ea9 100755 --- a/build.sh +++ b/build.sh @@ -5,6 +5,8 @@ NORMAL=`tput sgr0` YELLOW='\033[1;33m' ENDC='\033[0m' +echo -e "${BOLD}MISHARDS_REGISTRY=${MISHARDS_REGISTRY}${ENDC}" + function build_image() { dockerfile=$1 remote_registry=$2 @@ -21,12 +23,17 @@ function build_image() { case "$1" in all) + [[ -z $MISHARDS_REGISTRY ]] && { + echo -e "${YELLOW}Error: Please set docker registry first:${ENDC}\n\t${BOLD}export MISHARDS_REGISTRY=xxxx${ENDC}" + exit 1 + } + version="" [[ ! -z $2 ]] && version=":${2}" - build_image "Dockerfile" "registry.zilliz.com/milvus/mishards${version}" "registry.zilliz.com/milvus/mishards" + build_image "Dockerfile" "${MISHARDS_REGISTRY}${version}" "${MISHARDS_REGISTRY}" ;; *) echo "Usage: [option...] {base | apps}" - echo "all, Usage: build.sh all [tagname|] => registry.zilliz.com/milvus/mishards:\${tagname}" + echo "all, Usage: build.sh all [tagname|] => ${MISHARDS_REGISTRY}:\${tagname}" ;; esac From 66fc20ee54f3040f22ee3b4a5f48d11e84c21056 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 15:35:29 +0800 Subject: [PATCH 061/126] update build.sh update build.sh --- build.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 2b3c89bbf9..8e142d0115 100755 --- a/build.sh +++ b/build.sh @@ -21,12 +21,17 @@ function build_image() { case "$1" in all) + [[ -z $MISHARDS_REGISTRY ]] && { + echo -e "${YELLOW}Error: Please set docker registry first:${ENDC}\n\t${BOLD}export MISHARDS_REGISTRY=xxxx\n${ENDC}" + exit 1 + } + version="" [[ ! -z $2 ]] && version=":${2}" - build_image "Dockerfile" "registry.zilliz.com/milvus/mishards${version}" "registry.zilliz.com/milvus/mishards" + build_image "Dockerfile" "${MISHARDS_REGISTRY}${version}" "${MISHARDS_REGISTRY}" ;; *) echo "Usage: [option...] {base | apps}" - echo "all, Usage: build.sh all [tagname|] => registry.zilliz.com/milvus/mishards:\${tagname}" + echo "all, Usage: build.sh all [tagname|] => {docker_registry}:\${tagname}" ;; esac From 8a432bc472d903e7d783d71f84e2d61768813518 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 15:56:47 +0800 Subject: [PATCH 062/126] update k8s provider for sd --- sd/kubernetes_provider.py | 108 ++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/sd/kubernetes_provider.py b/sd/kubernetes_provider.py index 8ee1588ec4..9a15b2fa78 100644 --- a/sd/kubernetes_provider.py +++ b/sd/kubernetes_provider.py @@ -1,7 +1,8 @@ import os import sys if __name__ == '__main__': - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + sys.path.append(os.path.dirname(os.path.dirname( + os.path.abspath(__file__)))) import re import logging @@ -9,6 +10,7 @@ import time import copy import threading import queue +import enum from kubernetes import client, config, watch from utils import singleton @@ -19,6 +21,11 @@ logger = logging.getLogger(__name__) INCLUSTER_NAMESPACE_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' +class EventType(enum.Enum): + PodHeartBeat = 1 + Watch = 2 + + class K8SMixin: def __init__(self, namespace, in_cluster=False, **kwargs): self.namespace = namespace @@ -29,13 +36,22 @@ class K8SMixin: self.namespace = open(INCLUSTER_NAMESPACE_PATH).read() if not self.v1: - config.load_incluster_config() if self.in_cluster else config.load_kube_config() + config.load_incluster_config( + ) if self.in_cluster else config.load_kube_config() self.v1 = client.CoreV1Api() class K8SHeartbeatHandler(threading.Thread, K8SMixin): - def __init__(self, message_queue, namespace, label_selector, in_cluster=False, **kwargs): - K8SMixin.__init__(self, namespace=namespace, in_cluster=in_cluster, **kwargs) + def __init__(self, + message_queue, + namespace, + label_selector, + in_cluster=False, + **kwargs): + K8SMixin.__init__(self, + namespace=namespace, + in_cluster=in_cluster, + **kwargs) threading.Thread.__init__(self) self.queue = message_queue self.terminate = False @@ -45,13 +61,13 @@ class K8SHeartbeatHandler(threading.Thread, K8SMixin): def run(self): while not self.terminate: try: - pods = self.v1.list_namespaced_pod(namespace=self.namespace, label_selector=self.label_selector) - event_message = { - 'eType': 'PodHeartBeat', - 'events': [] - } + pods = self.v1.list_namespaced_pod( + namespace=self.namespace, + label_selector=self.label_selector) + event_message = {'eType': EventType.PodHeartBeat, 'events': []} for item in pods.items: - pod = self.v1.read_namespaced_pod(name=item.metadata.name, namespace=self.namespace) + pod = self.v1.read_namespaced_pod(name=item.metadata.name, + namespace=self.namespace) name = pod.metadata.name ip = pod.status.pod_ip phase = pod.status.phase @@ -59,13 +75,11 @@ class K8SHeartbeatHandler(threading.Thread, K8SMixin): message = pod.status.message ready = True if phase == 'Running' else False - pod_event = dict( - pod=name, - ip=ip, - ready=ready, - reason=reason, - message=message - ) + pod_event = dict(pod=name, + ip=ip, + ready=ready, + reason=reason, + message=message) event_message['events'].append(pod_event) @@ -82,7 +96,10 @@ class K8SHeartbeatHandler(threading.Thread, K8SMixin): class K8SEventListener(threading.Thread, K8SMixin): def __init__(self, message_queue, namespace, in_cluster=False, **kwargs): - K8SMixin.__init__(self, namespace=namespace, in_cluster=in_cluster, **kwargs) + K8SMixin.__init__(self, + namespace=namespace, + in_cluster=in_cluster, + **kwargs) threading.Thread.__init__(self) self.queue = message_queue self.terminate = False @@ -96,7 +113,8 @@ class K8SEventListener(threading.Thread, K8SMixin): def run(self): resource_version = '' w = watch.Watch() - for event in w.stream(self.v1.list_namespaced_event, namespace=self.namespace, + for event in w.stream(self.v1.list_namespaced_event, + namespace=self.namespace, field_selector='involvedObject.kind=Pod'): if self.terminate: break @@ -104,7 +122,7 @@ class K8SEventListener(threading.Thread, K8SMixin): resource_version = int(event['object'].metadata.resource_version) info = dict( - eType='WatchEvent', + eType=EventType.Watch, pod=event['object'].involved_object.name, reason=event['object'].reason, message=event['object'].message, @@ -137,7 +155,8 @@ class EventHandler(threading.Thread): while try_cnt > 0: try_cnt -= 1 try: - pod = self.mgr.v1.read_namespaced_pod(name=event['pod'], namespace=self.namespace) + pod = self.mgr.v1.read_namespaced_pod(name=event['pod'], + namespace=self.namespace) if not pod.status.pod_ip: time.sleep(0.5) continue @@ -147,13 +166,15 @@ class EventHandler(threading.Thread): if try_cnt <= 0 and not pod: if not event['start_up']: - logger.error('Pod {} is started but cannot read pod'.format(event['pod'])) + logger.error('Pod {} is started but cannot read pod'.format( + event['pod'])) return elif try_cnt <= 0 and not pod.status.pod_ip: logger.warn('NoPodIPFoundError') return - logger.info('Register POD {} with IP {}'.format(pod.metadata.name, pod.status.pod_ip)) + logger.info('Register POD {} with IP {}'.format( + pod.metadata.name, pod.status.pod_ip)) self.mgr.add_pod(name=pod.metadata.name, ip=pod.status.pod_ip) def on_pod_killing(self, event, **kwargs): @@ -178,7 +199,7 @@ class EventHandler(threading.Thread): logger.info(self.mgr.conn_mgr.conn_names) def handle_event(self, event): - if event['eType'] == 'PodHeartBeat': + if event['eType'] == EventType.PodHeartBeat: return self.on_pod_heartbeat(event) if not event or (event['reason'] not in ('Started', 'Killing')): @@ -204,7 +225,8 @@ class EventHandler(threading.Thread): class KubernetesProviderSettings: - def __init__(self, namespace, pod_patt, label_selector, in_cluster, poll_interval, **kwargs): + def __init__(self, namespace, pod_patt, label_selector, in_cluster, + poll_interval, **kwargs): self.namespace = namespace self.pod_patt = pod_patt self.label_selector = label_selector @@ -231,16 +253,15 @@ class KubernetesProvider(object): if not self.namespace: self.namespace = open(incluster_namespace_path).read() - config.load_incluster_config() if self.in_cluster else config.load_kube_config() + config.load_incluster_config( + ) if self.in_cluster else config.load_kube_config() self.v1 = client.CoreV1Api() - self.listener = K8SEventListener( - message_queue=self.queue, - namespace=self.namespace, - in_cluster=self.in_cluster, - v1=self.v1, - **kwargs - ) + self.listener = K8SEventListener(message_queue=self.queue, + namespace=self.namespace, + in_cluster=self.in_cluster, + v1=self.v1, + **kwargs) self.pod_heartbeater = K8SHeartbeatHandler( message_queue=self.queue, @@ -249,13 +270,13 @@ class KubernetesProvider(object): in_cluster=self.in_cluster, v1=self.v1, poll_interval=self.poll_interval, - **kwargs - ) + **kwargs) self.event_handler = EventHandler(mgr=self, message_queue=self.queue, namespace=self.namespace, - pod_patt=self.pod_patt, **kwargs) + pod_patt=self.pod_patt, + **kwargs) def add_pod(self, name, ip): self.conn_mgr.register(name, 'tcp://{}:19530'.format(ip)) @@ -292,17 +313,14 @@ if __name__ == '__main__': connect_mgr = Connect() - settings = KubernetesProviderSettings( - namespace='xp', - pod_patt=".*-ro-servers-.*", - label_selector='tier=ro-servers', - poll_interval=5, - in_cluster=False) + settings = KubernetesProviderSettings(namespace='xp', + pod_patt=".*-ro-servers-.*", + label_selector='tier=ro-servers', + poll_interval=5, + in_cluster=False) provider_class = ProviderManager.get_provider('Kubernetes') - t = provider_class(conn_mgr=connect_mgr, - settings=settings - ) + t = provider_class(conn_mgr=connect_mgr, settings=settings) t.start() cnt = 100 while cnt > 0: From c4f7b7c4b2d206f0051cf79ac193ffa3500f7b58 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 16:11:40 +0800 Subject: [PATCH 063/126] update docker and git ignore --- .dockerignore | 2 ++ .gitignore | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.dockerignore b/.dockerignore index d1012a3afd..7f608f71d6 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,7 @@ .git .gitignore .env +.coverage +cov_html/ mishards/.env diff --git a/.gitignore b/.gitignore index 624eb4fa58..8919efeb01 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .env +.coverage +cov_html/ __pycache__/ From 8ad5d6c2d95a06df5e39200d6e7c9419789ecc2e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 14 Oct 2019 17:05:11 +0800 Subject: [PATCH 064/126] add test_grpc --- mishards/grpc_utils/test_grpc.py | 77 ++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 mishards/grpc_utils/test_grpc.py diff --git a/mishards/grpc_utils/test_grpc.py b/mishards/grpc_utils/test_grpc.py new file mode 100644 index 0000000000..068ee391e7 --- /dev/null +++ b/mishards/grpc_utils/test_grpc.py @@ -0,0 +1,77 @@ +import logging +import opentracing +from mishards.grpc_utils import GrpcSpanDecorator, is_grpc_method +from milvus.grpc_gen import status_pb2, milvus_pb2 + + +logger = logging.getLogger(__name__) + + +class TestTracer(opentracing.Tracer): + pass + +class TestSpan(opentracing.Span): + def __init__(self, context, tracer, **kwargs): + super(TestSpan, self).__init__(tracer, context) + self.reset() + + def set_tag(self, key, value): + self.tags.append({key:value}) + + def log_kv(self, key_values, timestamp=None): + self.logs.append(key_values) + + def reset(self): + self.tags = [] + self.logs = [] + + +class TestRpcInfo: + def __init__(self, request, response): + self.request = request + self.response = response + + +class TestGrpcUtils: + def test_span_deco(self): + request = 'request' + OK = status_pb2.Status(error_code=status_pb2.SUCCESS, reason='Success') + response = OK + rpc_info = TestRpcInfo(request=request, response=response) + span = TestSpan(context=None, tracer=TestTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + assert len(span.logs) == 0 + assert len(span.tags) == 0 + + response = milvus_pb2.BoolReply(status=OK, bool_reply=False) + rpc_info = TestRpcInfo(request=request, response=response) + span = TestSpan(context=None, tracer=TestTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + assert len(span.logs) == 0 + assert len(span.tags) == 0 + + response = 1 + rpc_info = TestRpcInfo(request=request, response=response) + span = TestSpan(context=None, tracer=TestTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + logger.error(span.logs) + assert len(span.logs) == 1 + assert len(span.tags) == 1 + + response = 0 + rpc_info = TestRpcInfo(request=request, response=response) + span = TestSpan(context=None, tracer=TestTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + logger.error(span.logs) + assert len(span.logs) == 0 + assert len(span.tags) == 0 + + def test_is_grpc_method(self): + target = 1 + assert not is_grpc_method(target) + target = None + assert not is_grpc_method(target) From 4aa29968a68ad16abefe29941e43c5148c99164b Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 16 Oct 2019 14:19:01 +0800 Subject: [PATCH 065/126] update for TESTING changes --- conftest.py | 10 ++++++++++ mishards/__init__.py | 2 +- mishards/connections.py | 2 +- mishards/grpc_utils/test_grpc.py | 2 -- mishards/main.py | 3 +-- mishards/server.py | 2 +- mishards/settings.py | 22 ++++++++++++---------- mishards/test_connections.py | 26 +++++++++++++++++++++++++- requirements.txt | 1 + tracing/factory.py | 12 ++++++++---- 10 files changed, 60 insertions(+), 22 deletions(-) diff --git a/conftest.py b/conftest.py index d6c9f3acc7..1aba5b32cf 100644 --- a/conftest.py +++ b/conftest.py @@ -1,5 +1,6 @@ import logging import pytest +import grpc from mishards import settings, db, create_app logger = logging.getLogger(__name__) @@ -14,3 +15,12 @@ def app(request): yield app db.drop_all() + +@pytest.fixture +def started_app(app): + app.on_pre_run() + app.start(app.port) + + yield app + + app.stop() diff --git a/mishards/__init__.py b/mishards/__init__.py index 47d8adb6e3..4bd77d8c60 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -24,7 +24,7 @@ def create_app(testing_config=None): from tracing.factory import TracerFactory from mishards.grpc_utils import GrpcSpanDecorator - tracer = TracerFactory.new_tracer(settings.TRACING_TYPE, settings.TracingConfig, + tracer = TracerFactory.new_tracer(config.TRACING_TYPE, settings.TracingConfig, span_decorator=GrpcSpanDecorator()) grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, discover=discover) diff --git a/mishards/connections.py b/mishards/connections.py index 22263e9e7e..7db271381c 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -18,7 +18,7 @@ class Connection: self.conn = Milvus() self.error_handlers = [] if not error_handlers else error_handlers self.on_retry_func = kwargs.get('on_retry_func', None) - self._connect() + # self._connect() def __str__(self): return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) diff --git a/mishards/grpc_utils/test_grpc.py b/mishards/grpc_utils/test_grpc.py index 068ee391e7..d8511c8d6c 100644 --- a/mishards/grpc_utils/test_grpc.py +++ b/mishards/grpc_utils/test_grpc.py @@ -57,7 +57,6 @@ class TestGrpcUtils: span = TestSpan(context=None, tracer=TestTracer()) span_deco = GrpcSpanDecorator() span_deco(span, rpc_info) - logger.error(span.logs) assert len(span.logs) == 1 assert len(span.tags) == 1 @@ -66,7 +65,6 @@ class TestGrpcUtils: span = TestSpan(context=None, tracer=TestTracer()) span_deco = GrpcSpanDecorator() span_deco(span, rpc_info) - logger.error(span.logs) assert len(span.logs) == 0 assert len(span.tags) == 0 diff --git a/mishards/main.py b/mishards/main.py index 3f69484ee4..c0d142607b 100644 --- a/mishards/main.py +++ b/mishards/main.py @@ -6,8 +6,7 @@ from mishards import (settings, create_app) def main(): - server = create_app( - settings.TestingConfig if settings.TESTING else settings.DefaultConfig) + server = create_app(settings.DefaultConfig) server.run(port=settings.SERVER_PORT) return 0 diff --git a/mishards/server.py b/mishards/server.py index feb2176e86..dcaacd0fbc 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -39,7 +39,7 @@ class Server: self.register_pre_run_handler(self.pre_run_handler) def pre_run_handler(self): - woserver = settings.WOSERVER if not settings.TESTING else settings.TESTING_WOSERVER + woserver = settings.WOSERVER url = urlparse(woserver) ip = socket.gethostbyname(url.hostname) socket.inet_pton(socket.AF_INET, ip) diff --git a/mishards/settings.py b/mishards/settings.py index 1982a508e7..c9b62717d4 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -43,10 +43,7 @@ elif SD_PROVIDER == 'Static': SD_PROVIDER_SETTINGS = StaticProviderSettings( hosts=env.list('SD_STATIC_HOSTS', [])) -TESTING = env.bool('TESTING', False) -TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') - -TRACING_TYPE = env.str('TRACING_TYPE', '') +# TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') class TracingConfig: @@ -64,19 +61,24 @@ class TracingConfig: }, 'logging': env.bool('TRACING_LOGGING', True) } + DEFAULT_TRACING_CONFIG = { + 'sampler': { + 'type': env.str('TRACING_SAMPLER_TYPE', 'const'), + 'param': env.str('TRACING_SAMPLER_PARAM', "0"), + } + } class DefaultConfig: SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') SQL_ECHO = env.bool('SQL_ECHO', False) + TRACING_TYPE = env.str('TRACING_TYPE', '') -TESTING = env.bool('TESTING', False) -if TESTING: - - class TestingConfig(DefaultConfig): - SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') - SQL_ECHO = env.bool('SQL_TEST_ECHO', False) +class TestingConfig(DefaultConfig): + SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') + SQL_ECHO = env.bool('SQL_TEST_ECHO', False) + TRACING_TYPE = env.str('TRACING_TEST_TYPE', '') if __name__ == '__main__': diff --git a/mishards/test_connections.py b/mishards/test_connections.py index f1c54f0c61..819d2e03da 100644 --- a/mishards/test_connections.py +++ b/mishards/test_connections.py @@ -1,6 +1,8 @@ import logging import pytest +import mock +from milvus import Milvus from mishards.connections import (ConnectionMgr, Connection) from mishards import exceptions @@ -27,6 +29,12 @@ class TestConnection: mgr.register('WOSERVER', 'xxxx') assert len(mgr.conn_names) == 0 + assert not mgr.conn('XXXX', None) + with pytest.raises(exceptions.ConnectionNotFoundError): + mgr.conn('XXXX', None, True) + + mgr.conn('WOSERVER', None) + def test_connection(self): class Conn: def __init__(self, state): @@ -37,6 +45,7 @@ class TestConnection: def connected(self): return self.state + FAIL_CONN = Conn(False) PASS_CONN = Conn(True) @@ -58,7 +67,9 @@ class TestConnection: max_retry = 3 RetryObj = Retry() - c = Connection('client', uri='', + + c = Connection('client', + uri='xx', max_retry=max_retry, on_retry_func=RetryObj) c.conn = FAIL_CONN @@ -75,3 +86,16 @@ class TestConnection: this_connect() assert ff.executed assert RetryObj.times == 0 + + this_connect = c.connect(func=None) + with pytest.raises(TypeError): + this_connect() + + errors = [] + + def error_handler(err): + errors.append(err) + + this_connect = c.connect(func=None, exception_handler=error_handler) + this_connect() + assert len(errors) == 1 diff --git a/requirements.txt b/requirements.txt index ea338d0723..133cfac8ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ SQLAlchemy==1.3.5 urllib3==1.25.3 jaeger-client>=3.4.0 grpcio-opentracing>=1.0 +mock==2.0.0 diff --git a/tracing/factory.py b/tracing/factory.py index 648dfa291e..0c14d9d536 100644 --- a/tracing/factory.py +++ b/tracing/factory.py @@ -12,13 +12,17 @@ logger = logging.getLogger(__name__) class TracerFactory: @classmethod def new_tracer(cls, tracer_type, tracer_config, span_decorator=None, **kwargs): + config = tracer_config.TRACING_CONFIG + service_name = tracer_config.TRACING_SERVICE_NAME + validate=tracer_config.TRACING_VALIDATE if not tracer_type: - return Tracer() + tracer_type = 'jaeger' + config = tracer_config.DEFAULT_TRACING_CONFIG if tracer_type.lower() == 'jaeger': - config = Config(config=tracer_config.TRACING_CONFIG, - service_name=tracer_config.TRACING_SERVICE_NAME, - validate=tracer_config.TRACING_VALIDATE + config = Config(config=config, + service_name=service_name, + validate=validate ) tracer = config.initialize_tracer() From 9012f47a101228f956d04cc2eae804f38ca4e50e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 16 Oct 2019 17:38:34 +0800 Subject: [PATCH 066/126] changes for unit test --- mishards/grpc_utils/test_grpc.py | 24 +++++++-------- mishards/service_handler.py | 51 ++++++++++++++++++++++++-------- tracing/factory.py | 8 +++-- 3 files changed, 56 insertions(+), 27 deletions(-) diff --git a/mishards/grpc_utils/test_grpc.py b/mishards/grpc_utils/test_grpc.py index d8511c8d6c..314fccfe00 100644 --- a/mishards/grpc_utils/test_grpc.py +++ b/mishards/grpc_utils/test_grpc.py @@ -7,12 +7,12 @@ from milvus.grpc_gen import status_pb2, milvus_pb2 logger = logging.getLogger(__name__) -class TestTracer(opentracing.Tracer): +class FakeTracer(opentracing.Tracer): pass -class TestSpan(opentracing.Span): +class FakeSpan(opentracing.Span): def __init__(self, context, tracer, **kwargs): - super(TestSpan, self).__init__(tracer, context) + super(FakeSpan, self).__init__(tracer, context) self.reset() def set_tag(self, key, value): @@ -26,7 +26,7 @@ class TestSpan(opentracing.Span): self.logs = [] -class TestRpcInfo: +class FakeRpcInfo: def __init__(self, request, response): self.request = request self.response = response @@ -37,32 +37,32 @@ class TestGrpcUtils: request = 'request' OK = status_pb2.Status(error_code=status_pb2.SUCCESS, reason='Success') response = OK - rpc_info = TestRpcInfo(request=request, response=response) - span = TestSpan(context=None, tracer=TestTracer()) + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) span_deco = GrpcSpanDecorator() span_deco(span, rpc_info) assert len(span.logs) == 0 assert len(span.tags) == 0 response = milvus_pb2.BoolReply(status=OK, bool_reply=False) - rpc_info = TestRpcInfo(request=request, response=response) - span = TestSpan(context=None, tracer=TestTracer()) + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) span_deco = GrpcSpanDecorator() span_deco(span, rpc_info) assert len(span.logs) == 0 assert len(span.tags) == 0 response = 1 - rpc_info = TestRpcInfo(request=request, response=response) - span = TestSpan(context=None, tracer=TestTracer()) + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) span_deco = GrpcSpanDecorator() span_deco(span, rpc_info) assert len(span.logs) == 1 assert len(span.tags) == 1 response = 0 - rpc_info = TestRpcInfo(request=request, response=response) - span = TestSpan(context=None, tracer=TestTracer()) + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) span_deco = GrpcSpanDecorator() span_deco(span, rpc_info) assert len(span.logs) == 0 diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 9d851ecfcb..113ec3ca20 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -237,13 +237,15 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) + def _add_vectors(self, param, metadata=None): + return self.connection(metadata=metadata).add_vectors(None, None, insert_param=param) + @mark_grpc_method def Insert(self, request, context): logger.info('Insert') # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' - _status, _ids = self.connection(metadata={ - 'resp_class': milvus_pb2.VectorIds - }).add_vectors(None, None, insert_param=request) + _status, _ids = self._add_vectors(metadata={ + 'resp_class': milvus_pb2.VectorIds}, param=request) return milvus_pb2.VectorIds( status=status_pb2.Status(error_code=_status.code, reason=_status.message), vector_id_array=_ids @@ -305,6 +307,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def SearchInFiles(self, request, context): raise NotImplemented() + def _describe_table(self, table_name, metadata=None): + return self.connection(metadata=metadata).describe_table(table_name) + @mark_grpc_method def DescribeTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -319,7 +324,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): } logger.info('DescribeTable {}'.format(_table_name)) - _status, _table = self.connection(metadata=metadata).describe_table(_table_name) + _status, _table = self._describe_table(metadata=metadata, table_name=_table_name) if _status.OK(): return milvus_pb2.TableSchema( @@ -335,6 +340,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): status=status_pb2.Status(error_code=_status.code, reason=_status.message), ) + def _count_table(self, table_name, metadata=None): + return self.connection(metadata=metadata).get_table_row_count(table_name) + @mark_grpc_method def CountTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -351,12 +359,16 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): metadata = { 'resp_class': milvus_pb2.TableRowCount } - _status, _count = self.connection(metadata=metadata).get_table_row_count(_table_name) + _status, _count = self._count_table(_table_name, metadata=metadata) return milvus_pb2.TableRowCount( status=status_pb2.Status(error_code=_status.code, reason=_status.message), table_row_count=_count if isinstance(_count, int) else -1) + + def _get_server_version(self, metadata=None): + return self.connection(metadata=metadata).server_version() + @mark_grpc_method def Cmd(self, request, context): _status, _cmd = Parser.parse_proto_Command(request) @@ -364,7 +376,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): if not _status.OK(): return milvus_pb2.StringReply( - status_pb2.Status(error_code=_status.code, reason=_status.message) + status=status_pb2.Status(error_code=_status.code, reason=_status.message) ) metadata = { @@ -372,7 +384,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): } if _cmd == 'version': - _status, _reply = self.connection(metadata=metadata).server_version() + _status, _reply = self._get_server_version(metadata=metadata) else: _status, _reply = self.connection(metadata=metadata).server_status() @@ -381,19 +393,25 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): string_reply=_reply ) + def _show_tables(self): + return self.connection(metadata=metadata).show_tables() + @mark_grpc_method def ShowTables(self, request, context): logger.info('ShowTables') metadata = { 'resp_class': milvus_pb2.TableName } - _status, _results = self.connection(metadata=metadata).show_tables() + _status, _results = self._show_tables() return milvus_pb2.TableNameList( status=status_pb2.Status(error_code=_status.code, reason=_status.message), table_names=_results ) + def _delete_by_range(self, table_name, start_date, end_date): + return self.connection().delete_vectors_by_range(table_name, start_date, end_date) + @mark_grpc_method def DeleteByRange(self, request, context): _status, unpacks = \ @@ -405,9 +423,12 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _table_name, _start_date, _end_date = unpacks logger.info('DeleteByRange {}: {} {}'.format(_table_name, _start_date, _end_date)) - _status = self.connection().delete_vectors_by_range(_table_name, _start_date, _end_date) + _status = self._delete_by_range(_table_name, _start_date, _end_date) return status_pb2.Status(error_code=_status.code, reason=_status.message) + def _preload_table(self, table_name): + return self.connection().preload_table(table_name) + @mark_grpc_method def PreloadTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -416,9 +437,12 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) logger.info('PreloadTable {}'.format(_table_name)) - _status = self.connection().preload_table(_table_name) + _status = self._preload_table(_table_name) return status_pb2.Status(error_code=_status.code, reason=_status.message) + def _describe_index(self, table_name, metadata=None): + return self.connection(metadata=metadata).describe_index(table_name) + @mark_grpc_method def DescribeIndex(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -433,13 +457,16 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): } logger.info('DescribeIndex {}'.format(_table_name)) - _status, _index_param = self.connection(metadata=metadata).describe_index(_table_name) + _status, _index_param = self._describe_index(table_name=_table_name, metadata=metadata) _index = milvus_pb2.Index(index_type=_index_param._index_type, nlist=_index_param._nlist) return milvus_pb2.IndexParam(status=status_pb2.Status(error_code=_status.code, reason=_status.message), table_name=_table_name, index=_index) + def _drop_index(self, table_name): + return self.connection().drop_index(table_name) + @mark_grpc_method def DropIndex(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -448,5 +475,5 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) logger.info('DropIndex {}'.format(_table_name)) - _status = self.connection().drop_index(_table_name) + _status = self._drop_index(_table_name) return status_pb2.Status(error_code=_status.code, reason=_status.message) diff --git a/tracing/factory.py b/tracing/factory.py index 0c14d9d536..61cd75fcd6 100644 --- a/tracing/factory.py +++ b/tracing/factory.py @@ -12,12 +12,14 @@ logger = logging.getLogger(__name__) class TracerFactory: @classmethod def new_tracer(cls, tracer_type, tracer_config, span_decorator=None, **kwargs): + if not tracer_type: + return Tracer() config = tracer_config.TRACING_CONFIG service_name = tracer_config.TRACING_SERVICE_NAME validate=tracer_config.TRACING_VALIDATE - if not tracer_type: - tracer_type = 'jaeger' - config = tracer_config.DEFAULT_TRACING_CONFIG + # if not tracer_type: + # tracer_type = 'jaeger' + # config = tracer_config.DEFAULT_TRACING_CONFIG if tracer_type.lower() == 'jaeger': config = Config(config=config, From e0498e081df88eecb646c9d86cf744412f908902 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 17 Oct 2019 14:13:50 +0800 Subject: [PATCH 067/126] update for server test update for server test --- mishards/factories.py | 5 +- mishards/service_handler.py | 37 +++-- mishards/test_server.py | 279 ++++++++++++++++++++++++++++++++++++ tracing/__init__.py | 13 ++ 4 files changed, 320 insertions(+), 14 deletions(-) create mode 100644 mishards/test_server.py diff --git a/mishards/factories.py b/mishards/factories.py index c4037fe2d7..52c0253b39 100644 --- a/mishards/factories.py +++ b/mishards/factories.py @@ -6,6 +6,7 @@ from factory.alchemy import SQLAlchemyModelFactory from faker import Faker from faker.providers import BaseProvider +from milvus.client.types import MetricType from mishards import db from mishards.models import Tables, TableFiles @@ -27,12 +28,12 @@ class TablesFactory(SQLAlchemyModelFactory): id = factory.Faker('random_number', digits=16, fix_len=True) table_id = factory.Faker('uuid4') - state = factory.Faker('random_element', elements=(0, 1, 2, 3)) + state = factory.Faker('random_element', elements=(0, 1)) dimension = factory.Faker('random_element', elements=(256, 512)) created_on = int(time.time()) index_file_size = 0 engine_type = factory.Faker('random_element', elements=(0, 1, 2, 3)) - metric_type = factory.Faker('random_element', elements=(0, 1)) + metric_type = factory.Faker('random_element', elements=(MetricType.L2, MetricType.IP)) nlist = 16384 diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 113ec3ca20..e04965c12a 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -125,8 +125,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): range_array = [self._range_to_date(r, metadata=metadata) for r in range_array] if range_array else None routing = {} + p_span = None if self.tracer.empty else context.get_active_span().context with self.tracer.start_span('get_routing', - child_of=context.get_active_span().context): + child_of=p_span): routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) logger.info('Routing: {}'.format(routing)) @@ -145,9 +146,10 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): conn = self.query_conn(addr, metadata=metadata) start = time.time() span = kwargs.get('span', None) - span = span if span else context.get_active_span().context + span = span if span else (None if self.tracer.empty else context.get_active_span().context) + with self.tracer.start_span('search_{}'.format(addr), - child_of=context.get_active_span().context): + child_of=span): ret = conn.search_vectors_in_files(table_name=query_params['table_id'], file_ids=query_params['file_ids'], query_records=vectors, @@ -160,7 +162,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): all_topk_results.append(ret) with self.tracer.start_span('do_search', - child_of=context.get_active_span().context) as span: + child_of=p_span) as span: with ThreadPoolExecutor(max_workers=workers) as pool: for addr, params in routing.items(): res = pool.submit(search, addr, params, vectors, topk, nprobe, span=span) @@ -171,9 +173,12 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): reverse = table_meta.metric_type == Types.MetricType.IP with self.tracer.start_span('do_merge', - child_of=context.get_active_span().context): + child_of=p_span): return self._do_merge(all_topk_results, topk, reverse=reverse, metadata=metadata) + def _create_table(self, table_schema): + return self.connection().create_table(table_schema) + @mark_grpc_method def CreateTable(self, request, context): _status, _table_schema = Parser.parse_proto_TableSchema(request) @@ -183,10 +188,13 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('CreateTable {}'.format(_table_schema['table_name'])) - _status = self.connection().create_table(_table_schema) + _status = self._create_table(_table_schema) return status_pb2.Status(error_code=_status.code, reason=_status.message) + def _has_table(self, table_name, metadata=None): + return self.connection(metadata=metadata).has_table(table_name) + @mark_grpc_method def HasTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -199,15 +207,17 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('HasTable {}'.format(_table_name)) - _bool = self.connection(metadata={ - 'resp_class': milvus_pb2.BoolReply - }).has_table(_table_name) + _bool = self._has_table(_table_name, metadata={ + 'resp_class': milvus_pb2.BoolReply}) return milvus_pb2.BoolReply( status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="OK"), bool_reply=_bool ) + def _delete_table(self, table_name): + return self.connection().delete_table(table_name) + @mark_grpc_method def DropTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) @@ -217,10 +227,13 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('DropTable {}'.format(_table_name)) - _status = self.connection().delete_table(_table_name) + _status = self._delete_table(_table_name) return status_pb2.Status(error_code=_status.code, reason=_status.message) + def _create_index(self, table_name, index): + return self.connection().create_index(table_name, index) + @mark_grpc_method def CreateIndex(self, request, context): _status, unpacks = Parser.parse_proto_IndexParam(request) @@ -233,7 +246,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('CreateIndex {}'.format(_table_name)) # TODO: interface create_table incompleted - _status = self.connection().create_index(_table_name, _index) + _status = self._create_index(_table_name, _index) return status_pb2.Status(error_code=_status.code, reason=_status.message) @@ -298,7 +311,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('SearchVector takes: {}'.format(now - start)) topk_result_list = milvus_pb2.TopKQueryResultList( - status=status, + status=status_pb2.Status(error_code=status.error_code, reason=status.reason), topk_query_result=results ) return topk_result_list diff --git a/mishards/test_server.py b/mishards/test_server.py new file mode 100644 index 0000000000..e9a7c0d878 --- /dev/null +++ b/mishards/test_server.py @@ -0,0 +1,279 @@ +import logging +import pytest +import mock +import datetime +import random +import faker +import inspect +from milvus import Milvus +from milvus.client.types import Status, IndexType, MetricType +from milvus.client.Abstract import IndexParam, TableSchema +from milvus.grpc_gen import status_pb2, milvus_pb2 +from mishards import db, create_app, settings +from mishards.service_handler import ServiceHandler +from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser +from mishards.factories import TableFilesFactory, TablesFactory, TableFiles, Tables + +logger = logging.getLogger(__name__) + +OK = Status(code=Status.SUCCESS, message='Success') +BAD = Status(code=Status.PERMISSION_DENIED, message='Fail') + + +@pytest.mark.usefixtures('started_app') +class TestServer: + def client(self, port): + m = Milvus() + m.connect(host='localhost', port=port) + return m + + def test_server_start(self, started_app): + assert started_app.conn_mgr.metas.get('WOSERVER') == settings.WOSERVER + + def test_cmd(self, started_app): + ServiceHandler._get_server_version = mock.MagicMock(return_value=(OK, + '')) + status, _ = self.client(started_app.port).server_version() + assert status.OK() + + Parser.parse_proto_Command = mock.MagicMock(return_value=(BAD, 'cmd')) + status, _ = self.client(started_app.port).server_version() + assert not status.OK() + + def test_drop_index(self, started_app): + table_name = inspect.currentframe().f_code.co_name + ServiceHandler._drop_index = mock.MagicMock(return_value=OK) + status = self.client(started_app.port).drop_index(table_name) + assert status.OK() + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status = self.client(started_app.port).drop_index(table_name) + assert not status.OK() + + def test_describe_index(self, started_app): + table_name = inspect.currentframe().f_code.co_name + index_type = IndexType.FLAT + nlist = 1 + index_param = IndexParam(table_name=table_name, + index_type=index_type, + nlist=nlist) + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._describe_index = mock.MagicMock( + return_value=(OK, index_param)) + status, ret = self.client(started_app.port).describe_index(table_name) + assert status.OK() + assert ret._table_name == index_param._table_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status, _ = self.client(started_app.port).describe_index(table_name) + assert not status.OK() + + def test_preload(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._preload_table = mock.MagicMock(return_value=OK) + status = self.client(started_app.port).preload_table(table_name) + assert status.OK() + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status = self.client(started_app.port).preload_table(table_name) + assert not status.OK() + + def test_delete_by_range(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + unpacked = table_name, datetime.datetime.today( + ), datetime.datetime.today() + + Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( + return_value=(OK, unpacked)) + ServiceHandler._delete_by_range = mock.MagicMock(return_value=OK) + status = self.client(started_app.port).delete_vectors_by_range( + *unpacked) + assert status.OK() + + Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( + return_value=(BAD, unpacked)) + status = self.client(started_app.port).delete_vectors_by_range( + *unpacked) + assert not status.OK() + + def test_count_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + count = random.randint(100, 200) + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._count_table = mock.MagicMock(return_value=(OK, count)) + status, ret = self.client( + started_app.port).get_table_row_count(table_name) + assert status.OK() + assert ret == count + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status, _ = self.client( + started_app.port).get_table_row_count(table_name) + assert not status.OK() + + def test_show_tables(self, started_app): + tables = ['t1', 't2'] + ServiceHandler._show_tables = mock.MagicMock(return_value=(OK, tables)) + status, ret = self.client(started_app.port).show_tables() + assert status.OK() + assert ret == tables + + def test_describe_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + dimension = 128 + nlist = 1 + table_schema = TableSchema(table_name=table_name, + index_file_size=100, + metric_type=MetricType.L2, + dimension=dimension) + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_schema.table_name)) + ServiceHandler._describe_table = mock.MagicMock( + return_value=(OK, table_schema)) + status, _ = self.client(started_app.port).describe_table(table_name) + assert status.OK() + + ServiceHandler._describe_table = mock.MagicMock( + return_value=(BAD, table_schema)) + status, _ = self.client(started_app.port).describe_table(table_name) + assert not status.OK() + + Parser.parse_proto_TableName = mock.MagicMock(return_value=(BAD, + 'cmd')) + status, ret = self.client(started_app.port).describe_table(table_name) + assert not status.OK() + + def test_insert(self, started_app): + table_name = inspect.currentframe().f_code.co_name + vectors = [[random.random() for _ in range(16)] for _ in range(10)] + ids = [random.randint(1000000, 20000000) for _ in range(10)] + ServiceHandler._add_vectors = mock.MagicMock(return_value=(OK, ids)) + status, ret = self.client(started_app.port).add_vectors( + table_name=table_name, records=vectors) + assert status.OK() + assert ids == ret + + def test_create_index(self, started_app): + table_name = inspect.currentframe().f_code.co_name + unpacks = table_name, None + Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(OK, + unpacks)) + ServiceHandler._create_index = mock.MagicMock(return_value=OK) + status = self.client( + started_app.port).create_index(table_name=table_name) + assert status.OK() + + Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(BAD, + None)) + status = self.client( + started_app.port).create_index(table_name=table_name) + assert not status.OK() + + def test_drop_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._delete_table = mock.MagicMock(return_value=OK) + status = self.client( + started_app.port).delete_table(table_name=table_name) + assert status.OK() + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status = self.client( + started_app.port).delete_table(table_name=table_name) + assert not status.OK() + + def test_has_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._has_table = mock.MagicMock(return_value=True) + has = self.client(started_app.port).has_table(table_name=table_name) + assert has + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + has = self.client(started_app.port).has_table(table_name=table_name) + assert not has + + def test_create_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + dimension = 128 + table_schema = dict(table_name=table_name, + index_file_size=100, + metric_type=MetricType.L2, + dimension=dimension) + + ServiceHandler._create_table = mock.MagicMock(return_value=OK) + status = self.client(started_app.port).create_table(table_schema) + assert status.OK() + + Parser.parse_proto_TableSchema = mock.MagicMock(return_value=(BAD, + None)) + status = self.client(started_app.port).create_table(table_schema) + assert not status.OK() + + def random_data(self, n, dimension): + return [[random.random() for _ in range(dimension)] for _ in range(n)] + + def test_search(self, started_app): + table_name = inspect.currentframe().f_code.co_name + to_index_cnt = random.randint(10, 20) + table = TablesFactory(table_id=table_name, state=Tables.NORMAL) + to_index_files = TableFilesFactory.create_batch( + to_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_INDEX) + topk = random.randint(5, 10) + nq = random.randint(5, 10) + param = { + 'table_name': table_name, + 'query_records': self.random_data(nq, table.dimension), + 'top_k': topk, + 'nprobe': 2049 + } + + result = [ + milvus_pb2.TopKQueryResult(query_result_arrays=[ + milvus_pb2.QueryResult(id=i, distance=random.random()) + for i in range(topk) + ]) for i in range(nq) + ] + + mock_results = milvus_pb2.TopKQueryResultList(status=status_pb2.Status( + error_code=status_pb2.SUCCESS, reason="Success"), + topk_query_result=result) + + table_schema = TableSchema(table_name=table_name, + index_file_size=table.index_file_size, + metric_type=table.metric_type, + dimension=table.dimension) + + status, _ = self.client(started_app.port).search_vectors(**param) + assert status.code == Status.ILLEGAL_ARGUMENT + + param['nprobe'] = 2048 + Milvus.describe_table = mock.MagicMock(return_value=(BAD, + table_schema)) + status, ret = self.client(started_app.port).search_vectors(**param) + assert status.code == Status.TABLE_NOT_EXISTS + + Milvus.describe_table = mock.MagicMock(return_value=(OK, table_schema)) + Milvus.search_vectors_in_files = mock.MagicMock( + return_value=mock_results) + + status, ret = self.client(started_app.port).search_vectors(**param) + assert status.OK() + assert len(ret) == nq diff --git a/tracing/__init__.py b/tracing/__init__.py index 5014309a52..a1974e2204 100644 --- a/tracing/__init__.py +++ b/tracing/__init__.py @@ -1,6 +1,13 @@ +from contextlib import contextmanager + def empty_server_interceptor_decorator(target_server, interceptor): return target_server +@contextmanager +def EmptySpan(*args, **kwargs): + yield None + return + class Tracer: def __init__(self, tracer=None, @@ -13,11 +20,17 @@ class Tracer: def decorate(self, server): return self.server_decorator(server, self.interceptor) + @property + def empty(self): + return self.tracer is None + def close(self): self.tracer and self.tracer.close() def start_span(self, operation_name=None, child_of=None, references=None, tags=None, start_time=None, ignore_active_span=False): + if self.empty: + return EmptySpan() return self.tracer.start_span(operation_name, child_of, references, tags, start_time, ignore_active_span) From 24b2e73e5ae132f0e2f0a391895b3031165098e7 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 17 Oct 2019 14:20:09 +0800 Subject: [PATCH 068/126] code style format --- conftest.py | 1 + mishards/grpc_utils/test_grpc.py | 4 ++-- mishards/service_handler.py | 3 +-- mishards/test_server.py | 2 +- tracing/__init__.py | 19 +++++++++++++------ tracing/factory.py | 21 ++++++++++++--------- 6 files changed, 30 insertions(+), 20 deletions(-) diff --git a/conftest.py b/conftest.py index 1aba5b32cf..ebe8276cea 100644 --- a/conftest.py +++ b/conftest.py @@ -16,6 +16,7 @@ def app(request): db.drop_all() + @pytest.fixture def started_app(app): app.on_pre_run() diff --git a/mishards/grpc_utils/test_grpc.py b/mishards/grpc_utils/test_grpc.py index 314fccfe00..9af09e5d0d 100644 --- a/mishards/grpc_utils/test_grpc.py +++ b/mishards/grpc_utils/test_grpc.py @@ -3,20 +3,20 @@ import opentracing from mishards.grpc_utils import GrpcSpanDecorator, is_grpc_method from milvus.grpc_gen import status_pb2, milvus_pb2 - logger = logging.getLogger(__name__) class FakeTracer(opentracing.Tracer): pass + class FakeSpan(opentracing.Span): def __init__(self, context, tracer, **kwargs): super(FakeSpan, self).__init__(tracer, context) self.reset() def set_tag(self, key, value): - self.tags.append({key:value}) + self.tags.append({key: value}) def log_kv(self, key_values, timestamp=None): self.logs.append(key_values) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index e04965c12a..0172f73126 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -232,7 +232,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) def _create_index(self, table_name, index): - return self.connection().create_index(table_name, index) + return self.connection().create_index(table_name, index) @mark_grpc_method def CreateIndex(self, request, context): @@ -378,7 +378,6 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): status=status_pb2.Status(error_code=_status.code, reason=_status.message), table_row_count=_count if isinstance(_count, int) else -1) - def _get_server_version(self, metadata=None): return self.connection(metadata=metadata).server_version() diff --git a/mishards/test_server.py b/mishards/test_server.py index e9a7c0d878..a2677847da 100644 --- a/mishards/test_server.py +++ b/mishards/test_server.py @@ -254,7 +254,7 @@ class TestServer: mock_results = milvus_pb2.TopKQueryResultList(status=status_pb2.Status( error_code=status_pb2.SUCCESS, reason="Success"), - topk_query_result=result) + topk_query_result=result) table_schema = TableSchema(table_name=table_name, index_file_size=table.index_file_size, diff --git a/tracing/__init__.py b/tracing/__init__.py index a1974e2204..64a5b50d15 100644 --- a/tracing/__init__.py +++ b/tracing/__init__.py @@ -1,8 +1,10 @@ from contextlib import contextmanager + def empty_server_interceptor_decorator(target_server, interceptor): return target_server + @contextmanager def EmptySpan(*args, **kwargs): yield None @@ -10,7 +12,8 @@ def EmptySpan(*args, **kwargs): class Tracer: - def __init__(self, tracer=None, + def __init__(self, + tracer=None, interceptor=None, server_decorator=empty_server_interceptor_decorator): self.tracer = tracer @@ -27,10 +30,14 @@ class Tracer: def close(self): self.tracer and self.tracer.close() - def start_span(self, operation_name=None, - child_of=None, references=None, tags=None, - start_time=None, ignore_active_span=False): + def start_span(self, + operation_name=None, + child_of=None, + references=None, + tags=None, + start_time=None, + ignore_active_span=False): if self.empty: return EmptySpan() - return self.tracer.start_span(operation_name, child_of, - references, tags, start_time, ignore_active_span) + return self.tracer.start_span(operation_name, child_of, references, + tags, start_time, ignore_active_span) diff --git a/tracing/factory.py b/tracing/factory.py index 61cd75fcd6..14fcde2eb3 100644 --- a/tracing/factory.py +++ b/tracing/factory.py @@ -3,20 +3,23 @@ from jaeger_client import Config from grpc_opentracing.grpcext import intercept_server from grpc_opentracing import open_tracing_server_interceptor -from tracing import (Tracer, - empty_server_interceptor_decorator) +from tracing import (Tracer, empty_server_interceptor_decorator) logger = logging.getLogger(__name__) class TracerFactory: @classmethod - def new_tracer(cls, tracer_type, tracer_config, span_decorator=None, **kwargs): + def new_tracer(cls, + tracer_type, + tracer_config, + span_decorator=None, + **kwargs): if not tracer_type: return Tracer() config = tracer_config.TRACING_CONFIG service_name = tracer_config.TRACING_SERVICE_NAME - validate=tracer_config.TRACING_VALIDATE + validate = tracer_config.TRACING_VALIDATE # if not tracer_type: # tracer_type = 'jaeger' # config = tracer_config.DEFAULT_TRACING_CONFIG @@ -24,13 +27,13 @@ class TracerFactory: if tracer_type.lower() == 'jaeger': config = Config(config=config, service_name=service_name, - validate=validate - ) + validate=validate) tracer = config.initialize_tracer() - tracer_interceptor = open_tracing_server_interceptor(tracer, - log_payloads=tracer_config.TRACING_LOG_PAYLOAD, - span_decorator=span_decorator) + tracer_interceptor = open_tracing_server_interceptor( + tracer, + log_payloads=tracer_config.TRACING_LOG_PAYLOAD, + span_decorator=span_decorator) return Tracer(tracer, tracer_interceptor, intercept_server) From 560c4310ae15a8326ca90e1df153e89fc4befb6b Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 10:19:39 +0800 Subject: [PATCH 069/126] small refactor --- mishards/service_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 0172f73126..1396466568 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -405,7 +405,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): string_reply=_reply ) - def _show_tables(self): + def _show_tables(self, metadata=None): return self.connection(metadata=metadata).show_tables() @mark_grpc_method @@ -414,7 +414,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): metadata = { 'resp_class': milvus_pb2.TableName } - _status, _results = self._show_tables() + _status, _results = self._show_tables(metadata=metadata) return milvus_pb2.TableNameList( status=status_pb2.Status(error_code=_status.code, reason=_status.message), From a3409be0dc4330923dd5bab2d647d1f11dc3d538 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 13:38:19 +0800 Subject: [PATCH 070/126] add router in impl --- mishards/__init__.py | 5 +- mishards/routings.py | 81 +++++++++ mishards/server.py | 27 ++- mishards/service_handler.py | 331 +++++++++++++++++------------------- mishards/settings.py | 2 + mishards/utilities.py | 20 +++ 6 files changed, 287 insertions(+), 179 deletions(-) create mode 100644 mishards/routings.py create mode 100644 mishards/utilities.py diff --git a/mishards/__init__.py b/mishards/__init__.py index 4bd77d8c60..759e8c2e5a 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -27,7 +27,10 @@ def create_app(testing_config=None): tracer = TracerFactory.new_tracer(config.TRACING_TYPE, settings.TracingConfig, span_decorator=GrpcSpanDecorator()) - grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, discover=discover) + from mishards.routings import RouterFactory + router = RouterFactory.new_router(config.ROUTER_CLASS_NAME, connect_mgr) + + grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, router=router, discover=discover) from mishards import exception_handlers diff --git a/mishards/routings.py b/mishards/routings.py new file mode 100644 index 0000000000..a61352f40b --- /dev/null +++ b/mishards/routings.py @@ -0,0 +1,81 @@ +import logging +from sqlalchemy import exc as sqlalchemy_exc +from sqlalchemy import and_ + +from mishards import exceptions, db +from mishards.hash_ring import HashRing +from mishards.models import Tables + +logger = logging.getLogger(__name__) + + +class RouteManager: + ROUTER_CLASSES = {} + + @classmethod + def register_router_class(cls, target): + name = target.__dict__.get('NAME', None) + name = name if name else target.__class__.__name__ + cls.ROUTER_CLASSES[name] = target + return target + + @classmethod + def get_router_class(cls, name): + return cls.ROUTER_CLASSES.get(name, None) + + +class RouterFactory: + @classmethod + def new_router(cls, name, conn_mgr, **kwargs): + router_class = RouteManager.get_router_class(name) + assert router_class + return router_class(conn_mgr, **kwargs) + + +class RouterMixin: + def __init__(self, conn_mgr): + self.conn_mgr = conn_mgr + + def routing(self, table_name, metadata=None, **kwargs): + raise NotImplemented() + + +@RouteManager.register_router_class +class FileBasedHashRingRouter(RouterMixin): + NAME = 'FileBasedHashRingRouter' + + def __init__(self, conn_mgr, **kwargs): + super(FileBasedHashRingRouter, self).__init__(conn_mgr) + + def routing(self, table_name, metadata=None, **kwargs): + range_array = kwargs.pop('range_array', None) + return self._route(table_name, range_array, metadata, **kwargs) + + def _route(self, table_name, range_array, metadata=None, **kwargs): + # PXU TODO: Implement Thread-local Context + try: + table = db.Session.query(Tables).filter( + and_(Tables.table_id == table_name, + Tables.state != Tables.TO_DELETE)).first() + except sqlalchemy_exc.SQLAlchemyError as e: + raise exceptions.DBError(message=str(e), metadata=metadata) + + if not table: + raise exceptions.TableNotFoundError(table_name, metadata=metadata) + files = table.files_to_search(range_array) + + servers = self.conn_mgr.conn_names + logger.info('Available servers: {}'.format(servers)) + + ring = HashRing(servers) + + routing = {} + + for f in files: + target_host = ring.get_node(str(f.id)) + sub = routing.get(target_host, None) + if not sub: + routing[target_host] = {'table_id': table_name, 'file_ids': []} + routing[target_host]['file_ids'].append(str(f.id)) + + return routing diff --git a/mishards/server.py b/mishards/server.py index dcaacd0fbc..20be8f1746 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -22,17 +22,24 @@ class Server: self.error_handlers = {} self.exit_flag = False - def init_app(self, conn_mgr, tracer, discover, port=19530, max_workers=10, **kwargs): + def init_app(self, + conn_mgr, + tracer, + router, + discover, + port=19530, + max_workers=10, + **kwargs): self.port = int(port) self.conn_mgr = conn_mgr self.tracer = tracer + self.router = router self.discover = discover self.server_impl = grpc.server( thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), options=[(cygrpc.ChannelArgKey.max_send_message_length, -1), - (cygrpc.ChannelArgKey.max_receive_message_length, -1)] - ) + (cygrpc.ChannelArgKey.max_receive_message_length, -1)]) self.server_impl = self.tracer.decorate(self.server_impl) @@ -43,8 +50,8 @@ class Server: url = urlparse(woserver) ip = socket.gethostbyname(url.hostname) socket.inet_pton(socket.AF_INET, ip) - self.conn_mgr.register('WOSERVER', - '{}://{}:{}'.format(url.scheme, ip, url.port or 80)) + self.conn_mgr.register( + 'WOSERVER', '{}://{}:{}'.format(url.scheme, ip, url.port or 80)) def register_pre_run_handler(self, func): logger.info('Regiterring {} into server pre_run_handlers'.format(func)) @@ -65,9 +72,11 @@ class Server: def errorhandler(self, exception): if inspect.isclass(exception) and issubclass(exception, Exception): + def wrapper(func): self.error_handlers[exception] = func return func + return wrapper return exception @@ -78,8 +87,12 @@ class Server: def start(self, port=None): handler_class = self.decorate_handler(ServiceHandler) - add_MilvusServiceServicer_to_server(handler_class(conn_mgr=self.conn_mgr, tracer=self.tracer), self.server_impl) - self.server_impl.add_insecure_port("[::]:{}".format(str(port or self._port))) + add_MilvusServiceServicer_to_server( + handler_class(conn_mgr=self.conn_mgr, + tracer=self.tracer, + router=self.router), self.server_impl) + self.server_impl.add_insecure_port("[::]:{}".format( + str(port or self._port))) self.server_impl.start() def run(self, port): diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 1396466568..e26f2bfd74 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -3,9 +3,6 @@ import time import datetime from collections import defaultdict -from sqlalchemy import and_ -from sqlalchemy import exc as sqlalchemy_exc - from concurrent.futures import ThreadPoolExecutor from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 from milvus.grpc_gen.milvus_pb2 import TopKQueryResult @@ -15,8 +12,7 @@ from milvus.client import types as Types from mishards import (db, settings, exceptions) from mishards.grpc_utils import mark_grpc_method from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser -from mishards.models import Tables, TableFiles -from mishards.hash_ring import HashRing +from mishards import utilities logger = logging.getLogger(__name__) @@ -24,11 +20,12 @@ logger = logging.getLogger(__name__) class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): MAX_NPROBE = 2048 - def __init__(self, conn_mgr, tracer, *args, **kwargs): + def __init__(self, conn_mgr, tracer, router, *args, **kwargs): self.conn_mgr = conn_mgr self.table_meta = {} self.error_handlers = {} self.tracer = tracer + self.router = router def connection(self, metadata=None): conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) @@ -43,56 +40,9 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): conn.on_connect(metadata=metadata) return conn.conn - def _format_date(self, start, end): - return ((start.year - 1900) * 10000 + (start.month - 1) * 100 + start.day, (end.year - 1900) * 10000 + (end.month - 1) * 100 + end.day) - - def _range_to_date(self, range_obj, metadata=None): - try: - start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') - end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') - assert start < end - except (ValueError, AssertionError): - raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( - range_obj.start_date, range_obj.end_date - ), metadata=metadata) - - return self._format_date(start, end) - - def _get_routing_file_ids(self, table_id, range_array, metadata=None): - # PXU TODO: Implement Thread-local Context - try: - table = db.Session.query(Tables).filter(and_( - Tables.table_id == table_id, - Tables.state != Tables.TO_DELETE - )).first() - except sqlalchemy_exc.SQLAlchemyError as e: - raise exceptions.DBError(message=str(e), metadata=metadata) - - if not table: - raise exceptions.TableNotFoundError(table_id, metadata=metadata) - files = table.files_to_search(range_array) - - servers = self.conn_mgr.conn_names - logger.info('Available servers: {}'.format(servers)) - - ring = HashRing(servers) - - routing = {} - - for f in files: - target_host = ring.get_node(str(f.id)) - sub = routing.get(target_host, None) - if not sub: - routing[target_host] = { - 'table_id': table_id, - 'file_ids': [] - } - routing[target_host]['file_ids'].append(str(f.id)) - - return routing - def _do_merge(self, files_n_topk_results, topk, reverse=False, **kwargs): - status = status_pb2.Status(error_code=status_pb2.SUCCESS, reason="Success") + status = status_pb2.Status(error_code=status_pb2.SUCCESS, + reason="Success") if not files_n_topk_results: return status, [] @@ -103,10 +53,14 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): if isinstance(files_collection, tuple): status, _ = files_collection return status, [] - for request_pos, each_request_results in enumerate(files_collection.topk_query_result): - request_results[request_pos].extend(each_request_results.query_result_arrays) - request_results[request_pos] = sorted(request_results[request_pos], key=lambda x: x.distance, - reverse=reverse)[:topk] + for request_pos, each_request_results in enumerate( + files_collection.topk_query_result): + request_results[request_pos].extend( + each_request_results.query_result_arrays) + request_results[request_pos] = sorted( + request_results[request_pos], + key=lambda x: x.distance, + reverse=reverse)[:topk] calc_time = time.time() - calc_time logger.info('Merge takes {}'.format(calc_time)) @@ -120,15 +74,27 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status, topk_query_result - def _do_query(self, context, table_id, table_meta, vectors, topk, nprobe, range_array=None, **kwargs): + def _do_query(self, + context, + table_id, + table_meta, + vectors, + topk, + nprobe, + range_array=None, + **kwargs): metadata = kwargs.get('metadata', None) - range_array = [self._range_to_date(r, metadata=metadata) for r in range_array] if range_array else None + range_array = [ + utilities.range_to_date(r, metadata=metadata) for r in range_array + ] if range_array else None routing = {} - p_span = None if self.tracer.empty else context.get_active_span().context - with self.tracer.start_span('get_routing', - child_of=p_span): - routing = self._get_routing_file_ids(table_id, range_array, metadata=metadata) + p_span = None if self.tracer.empty else context.get_active_span( + ).context + with self.tracer.start_span('get_routing', child_of=p_span): + routing = self.router.routing(table_id, + range_array=range_array, + metadata=metadata) logger.info('Routing: {}'.format(routing)) metadata = kwargs.get('metadata', None) @@ -139,42 +105,51 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): workers = settings.SEARCH_WORKER_SIZE def search(addr, query_params, vectors, topk, nprobe, **kwargs): - logger.info('Send Search Request: addr={};params={};nq={};topk={};nprobe={}'.format( - addr, query_params, len(vectors), topk, nprobe - )) + logger.info( + 'Send Search Request: addr={};params={};nq={};topk={};nprobe={}' + .format(addr, query_params, len(vectors), topk, nprobe)) conn = self.query_conn(addr, metadata=metadata) start = time.time() span = kwargs.get('span', None) - span = span if span else (None if self.tracer.empty else context.get_active_span().context) + span = span if span else (None if self.tracer.empty else + context.get_active_span().context) with self.tracer.start_span('search_{}'.format(addr), child_of=span): - ret = conn.search_vectors_in_files(table_name=query_params['table_id'], - file_ids=query_params['file_ids'], - query_records=vectors, - top_k=topk, - nprobe=nprobe, - lazy=True) + ret = conn.search_vectors_in_files( + table_name=query_params['table_id'], + file_ids=query_params['file_ids'], + query_records=vectors, + top_k=topk, + nprobe=nprobe, + lazy=True) end = time.time() logger.info('search_vectors_in_files takes: {}'.format(end - start)) all_topk_results.append(ret) - with self.tracer.start_span('do_search', - child_of=p_span) as span: + with self.tracer.start_span('do_search', child_of=p_span) as span: with ThreadPoolExecutor(max_workers=workers) as pool: for addr, params in routing.items(): - res = pool.submit(search, addr, params, vectors, topk, nprobe, span=span) + res = pool.submit(search, + addr, + params, + vectors, + topk, + nprobe, + span=span) rs.append(res) for res in rs: res.result() reverse = table_meta.metric_type == Types.MetricType.IP - with self.tracer.start_span('do_merge', - child_of=p_span): - return self._do_merge(all_topk_results, topk, reverse=reverse, metadata=metadata) + with self.tracer.start_span('do_merge', child_of=p_span): + return self._do_merge(all_topk_results, + topk, + reverse=reverse, + metadata=metadata) def _create_table(self, table_schema): return self.connection().create_table(table_schema) @@ -184,13 +159,15 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_schema = Parser.parse_proto_TableSchema(request) if not _status.OK(): - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) logger.info('CreateTable {}'.format(_table_schema['table_name'])) _status = self._create_table(_table_schema) - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) def _has_table(self, table_name, metadata=None): return self.connection(metadata=metadata).has_table(table_name) @@ -200,20 +177,18 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - return milvus_pb2.BoolReply( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - bool_reply=False - ) + return milvus_pb2.BoolReply(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + bool_reply=False) logger.info('HasTable {}'.format(_table_name)) - _bool = self._has_table(_table_name, metadata={ - 'resp_class': milvus_pb2.BoolReply}) + _bool = self._has_table(_table_name, + metadata={'resp_class': milvus_pb2.BoolReply}) - return milvus_pb2.BoolReply( - status=status_pb2.Status(error_code=status_pb2.SUCCESS, reason="OK"), - bool_reply=_bool - ) + return milvus_pb2.BoolReply(status=status_pb2.Status( + error_code=status_pb2.SUCCESS, reason="OK"), + bool_reply=_bool) def _delete_table(self, table_name): return self.connection().delete_table(table_name) @@ -223,13 +198,15 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) logger.info('DropTable {}'.format(_table_name)) _status = self._delete_table(_table_name) - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) def _create_index(self, table_name, index): return self.connection().create_index(table_name, index) @@ -239,7 +216,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, unpacks = Parser.parse_proto_IndexParam(request) if not _status.OK(): - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) _table_name, _index = unpacks @@ -248,21 +226,22 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): # TODO: interface create_table incompleted _status = self._create_index(_table_name, _index) - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) def _add_vectors(self, param, metadata=None): - return self.connection(metadata=metadata).add_vectors(None, None, insert_param=param) + return self.connection(metadata=metadata).add_vectors( + None, None, insert_param=param) @mark_grpc_method def Insert(self, request, context): logger.info('Insert') # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' - _status, _ids = self._add_vectors(metadata={ - 'resp_class': milvus_pb2.VectorIds}, param=request) - return milvus_pb2.VectorIds( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - vector_id_array=_ids - ) + _status, _ids = self._add_vectors( + metadata={'resp_class': milvus_pb2.VectorIds}, param=request) + return milvus_pb2.VectorIds(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + vector_id_array=_ids) @mark_grpc_method def Search(self, request, context): @@ -272,22 +251,23 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): topk = request.topk nprobe = request.nprobe - logger.info('Search {}: topk={} nprobe={}'.format(table_name, topk, nprobe)) + logger.info('Search {}: topk={} nprobe={}'.format( + table_name, topk, nprobe)) - metadata = { - 'resp_class': milvus_pb2.TopKQueryResultList - } + metadata = {'resp_class': milvus_pb2.TopKQueryResultList} if nprobe > self.MAX_NPROBE or nprobe <= 0: - raise exceptions.InvalidArgumentError(message='Invalid nprobe: {}'.format(nprobe), - metadata=metadata) + raise exceptions.InvalidArgumentError( + message='Invalid nprobe: {}'.format(nprobe), metadata=metadata) table_meta = self.table_meta.get(table_name, None) if not table_meta: - status, info = self.connection(metadata=metadata).describe_table(table_name) + status, info = self.connection( + metadata=metadata).describe_table(table_name) if not status.OK(): - raise exceptions.TableNotFoundError(table_name, metadata=metadata) + raise exceptions.TableNotFoundError(table_name, + metadata=metadata) self.table_meta[table_name] = info table_meta = info @@ -304,16 +284,22 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): query_range_array.append( Range(query_range.start_value, query_range.end_value)) - status, results = self._do_query(context, table_name, table_meta, query_record_array, topk, - nprobe, query_range_array, metadata=metadata) + status, results = self._do_query(context, + table_name, + table_meta, + query_record_array, + topk, + nprobe, + query_range_array, + metadata=metadata) now = time.time() logger.info('SearchVector takes: {}'.format(now - start)) topk_result_list = milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=status.error_code, reason=status.reason), - topk_query_result=results - ) + status=status_pb2.Status(error_code=status.error_code, + reason=status.reason), + topk_query_result=results) return topk_result_list @mark_grpc_method @@ -328,16 +314,14 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - return milvus_pb2.TableSchema( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - ) + return milvus_pb2.TableSchema(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), ) - metadata = { - 'resp_class': milvus_pb2.TableSchema - } + metadata = {'resp_class': milvus_pb2.TableSchema} logger.info('DescribeTable {}'.format(_table_name)) - _status, _table = self._describe_table(metadata=metadata, table_name=_table_name) + _status, _table = self._describe_table(metadata=metadata, + table_name=_table_name) if _status.OK(): return milvus_pb2.TableSchema( @@ -345,37 +329,38 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): index_file_size=_table.index_file_size, dimension=_table.dimension, metric_type=_table.metric_type, - status=status_pb2.Status(error_code=_status.code, reason=_status.message), + status=status_pb2.Status(error_code=_status.code, + reason=_status.message), ) return milvus_pb2.TableSchema( table_name=_table_name, - status=status_pb2.Status(error_code=_status.code, reason=_status.message), + status=status_pb2.Status(error_code=_status.code, + reason=_status.message), ) def _count_table(self, table_name, metadata=None): - return self.connection(metadata=metadata).get_table_row_count(table_name) + return self.connection( + metadata=metadata).get_table_row_count(table_name) @mark_grpc_method def CountTable(self, request, context): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - status = status_pb2.Status(error_code=_status.code, reason=_status.message) + status = status_pb2.Status(error_code=_status.code, + reason=_status.message) - return milvus_pb2.TableRowCount( - status=status - ) + return milvus_pb2.TableRowCount(status=status) logger.info('CountTable {}'.format(_table_name)) - metadata = { - 'resp_class': milvus_pb2.TableRowCount - } + metadata = {'resp_class': milvus_pb2.TableRowCount} _status, _count = self._count_table(_table_name, metadata=metadata) return milvus_pb2.TableRowCount( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), + status=status_pb2.Status(error_code=_status.code, + reason=_status.message), table_row_count=_count if isinstance(_count, int) else -1) def _get_server_version(self, metadata=None): @@ -387,23 +372,20 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('Cmd: {}'.format(_cmd)) if not _status.OK(): - return milvus_pb2.StringReply( - status=status_pb2.Status(error_code=_status.code, reason=_status.message) - ) + return milvus_pb2.StringReply(status=status_pb2.Status( + error_code=_status.code, reason=_status.message)) - metadata = { - 'resp_class': milvus_pb2.StringReply - } + metadata = {'resp_class': milvus_pb2.StringReply} if _cmd == 'version': _status, _reply = self._get_server_version(metadata=metadata) else: - _status, _reply = self.connection(metadata=metadata).server_status() + _status, _reply = self.connection( + metadata=metadata).server_status() - return milvus_pb2.StringReply( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - string_reply=_reply - ) + return milvus_pb2.StringReply(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + string_reply=_reply) def _show_tables(self, metadata=None): return self.connection(metadata=metadata).show_tables() @@ -411,18 +393,17 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): @mark_grpc_method def ShowTables(self, request, context): logger.info('ShowTables') - metadata = { - 'resp_class': milvus_pb2.TableName - } + metadata = {'resp_class': milvus_pb2.TableName} _status, _results = self._show_tables(metadata=metadata) - return milvus_pb2.TableNameList( - status=status_pb2.Status(error_code=_status.code, reason=_status.message), - table_names=_results - ) + return milvus_pb2.TableNameList(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + table_names=_results) def _delete_by_range(self, table_name, start_date, end_date): - return self.connection().delete_vectors_by_range(table_name, start_date, end_date) + return self.connection().delete_vectors_by_range(table_name, + start_date, + end_date) @mark_grpc_method def DeleteByRange(self, request, context): @@ -430,13 +411,16 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): Parser.parse_proto_DeleteByRangeParam(request) if not _status.OK(): - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) _table_name, _start_date, _end_date = unpacks - logger.info('DeleteByRange {}: {} {}'.format(_table_name, _start_date, _end_date)) + logger.info('DeleteByRange {}: {} {}'.format(_table_name, _start_date, + _end_date)) _status = self._delete_by_range(_table_name, _start_date, _end_date) - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) def _preload_table(self, table_name): return self.connection().preload_table(table_name) @@ -446,11 +430,13 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) logger.info('PreloadTable {}'.format(_table_name)) _status = self._preload_table(_table_name) - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) def _describe_index(self, table_name, metadata=None): return self.connection(metadata=metadata).describe_index(table_name) @@ -460,21 +446,22 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - return milvus_pb2.IndexParam( - status=status_pb2.Status(error_code=_status.code, reason=_status.message) - ) + return milvus_pb2.IndexParam(status=status_pb2.Status( + error_code=_status.code, reason=_status.message)) - metadata = { - 'resp_class': milvus_pb2.IndexParam - } + metadata = {'resp_class': milvus_pb2.IndexParam} logger.info('DescribeIndex {}'.format(_table_name)) - _status, _index_param = self._describe_index(table_name=_table_name, metadata=metadata) + _status, _index_param = self._describe_index(table_name=_table_name, + metadata=metadata) - _index = milvus_pb2.Index(index_type=_index_param._index_type, nlist=_index_param._nlist) + _index = milvus_pb2.Index(index_type=_index_param._index_type, + nlist=_index_param._nlist) - return milvus_pb2.IndexParam(status=status_pb2.Status(error_code=_status.code, reason=_status.message), - table_name=_table_name, index=_index) + return milvus_pb2.IndexParam(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + table_name=_table_name, + index=_index) def _drop_index(self, table_name): return self.connection().drop_index(table_name) @@ -484,8 +471,10 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _table_name = Parser.parse_proto_TableName(request) if not _status.OK(): - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) logger.info('DropIndex {}'.format(_table_name)) _status = self._drop_index(_table_name) - return status_pb2.Status(error_code=_status.code, reason=_status.message) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) diff --git a/mishards/settings.py b/mishards/settings.py index c9b62717d4..5e81a1a8ad 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -73,12 +73,14 @@ class DefaultConfig: SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') SQL_ECHO = env.bool('SQL_ECHO', False) TRACING_TYPE = env.str('TRACING_TYPE', '') + ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_NAME', 'FileBasedHashRingRouter') class TestingConfig(DefaultConfig): SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') SQL_ECHO = env.bool('SQL_TEST_ECHO', False) TRACING_TYPE = env.str('TRACING_TEST_TYPE', '') + ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_TEST_NAME', 'FileBasedHashRingRouter') if __name__ == '__main__': diff --git a/mishards/utilities.py b/mishards/utilities.py new file mode 100644 index 0000000000..c08d0d42df --- /dev/null +++ b/mishards/utilities.py @@ -0,0 +1,20 @@ +import datetime +from mishards import exceptions + + +def format_date(self, start, end): + return ((start.year - 1900) * 10000 + (start.month - 1) * 100 + start.day, + (end.year - 1900) * 10000 + (end.month - 1) * 100 + end.day) + + +def range_to_date(self, range_obj, metadata=None): + try: + start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') + end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') + assert start < end + except (ValueError, AssertionError): + raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( + range_obj.start_date, range_obj.end_date), + metadata=metadata) + + return self.format_date(start, end) From fb5e6ab3b809754fd425770fd5cf48a704135ad0 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 13:46:09 +0800 Subject: [PATCH 071/126] refactor max workers in handler --- mishards/service_handler.py | 8 ++++---- mishards/settings.py | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index e26f2bfd74..669d96802a 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -3,6 +3,7 @@ import time import datetime from collections import defaultdict +import multiprocessing from concurrent.futures import ThreadPoolExecutor from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 from milvus.grpc_gen.milvus_pb2 import TopKQueryResult @@ -20,12 +21,13 @@ logger = logging.getLogger(__name__) class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): MAX_NPROBE = 2048 - def __init__(self, conn_mgr, tracer, router, *args, **kwargs): + def __init__(self, conn_mgr, tracer, router, max_workers=multiprocessing.cpu_count(), **kwargs): self.conn_mgr = conn_mgr self.table_meta = {} self.error_handlers = {} self.tracer = tracer self.router = router + self.max_workers = max_workers def connection(self, metadata=None): conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) @@ -102,8 +104,6 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): rs = [] all_topk_results = [] - workers = settings.SEARCH_WORKER_SIZE - def search(addr, query_params, vectors, topk, nprobe, **kwargs): logger.info( 'Send Search Request: addr={};params={};nq={};topk={};nprobe={}' @@ -130,7 +130,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): all_topk_results.append(ret) with self.tracer.start_span('do_search', child_of=p_span) as span: - with ThreadPoolExecutor(max_workers=workers) as pool: + with ThreadPoolExecutor(max_workers=self.max_workers) as pool: for addr, params in routing.items(): res = pool.submit(search, addr, diff --git a/mishards/settings.py b/mishards/settings.py index 5e81a1a8ad..fd07d9d436 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -23,7 +23,6 @@ config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) TIMEOUT = env.int('TIMEOUT', 60) MAX_RETRY = env.int('MAX_RETRY', 3) -SEARCH_WORKER_SIZE = env.int('SEARCH_WORKER_SIZE', 10) SERVER_PORT = env.int('SERVER_PORT', 19530) WOSERVER = env.str('WOSERVER') From bafa336410619817bb733c805f90ba3428c4cdf1 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 13:55:22 +0800 Subject: [PATCH 072/126] change retry count logic --- mishards/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mishards/connections.py b/mishards/connections.py index 7db271381c..915454711f 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -44,7 +44,7 @@ class Connection: if self.on_retry_func: self.on_retry_func(self) else: - logger.warning('{} is retrying {}'.format(self, self.retried)) + self.retried > 1 and logger.warning('{} is retrying {}'.format(self, self.retried)) def on_connect(self, metadata=None): while not self.connected and self.can_retry: From 3fb602c83fffea7dd39dd46cdd93a00b3ed98c32 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 13:55:34 +0800 Subject: [PATCH 073/126] change log format --- utils/logger_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/logger_helper.py b/utils/logger_helper.py index 55ce3206ab..b4e3b9c5b6 100644 --- a/utils/logger_helper.py +++ b/utils/logger_helper.py @@ -73,10 +73,10 @@ def config(log_level, log_path, name, tz='UTC'): 'disable_existing_loggers': False, 'formatters': { 'default': { - 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)' + 'format': '%(asctime)s | %(levelname)s | %(name)s | %(threadName)s: %(message)s (%(filename)s:%(lineno)s)', }, 'colorful_console': { - 'format': '[%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)', + 'format': '%(asctime)s | %(levelname)s | %(name)s | %(threadName)s: %(message)s (%(filename)s:%(lineno)s)', '()': ColorfulFormatter, }, }, From 4231328e0e75cdcc4cba55e2f340c09d40e5d34f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 13:57:57 +0800 Subject: [PATCH 074/126] smaill code changes for logging --- mishards/__init__.py | 1 - sd/kubernetes_provider.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/mishards/__init__.py b/mishards/__init__.py index 759e8c2e5a..7db3d8cb5e 100644 --- a/mishards/__init__.py +++ b/mishards/__init__.py @@ -12,7 +12,6 @@ grpc_server = Server() def create_app(testing_config=None): config = testing_config if testing_config else settings.DefaultConfig db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) - logger.info(db) from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() diff --git a/sd/kubernetes_provider.py b/sd/kubernetes_provider.py index 9a15b2fa78..ca593a3682 100644 --- a/sd/kubernetes_provider.py +++ b/sd/kubernetes_provider.py @@ -170,7 +170,7 @@ class EventHandler(threading.Thread): event['pod'])) return elif try_cnt <= 0 and not pod.status.pod_ip: - logger.warn('NoPodIPFoundError') + logger.warning('NoPodIPFoundError') return logger.info('Register POD {} with IP {}'.format( From 2b8a6f43debb99e904968fb13cc351b5d0b32dbd Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 16:26:53 +0800 Subject: [PATCH 075/126] set test sql uri default value --- mishards/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mishards/settings.py b/mishards/settings.py index fd07d9d436..773c04f083 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -76,7 +76,7 @@ class DefaultConfig: class TestingConfig(DefaultConfig): - SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI') + SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI', '') SQL_ECHO = env.bool('SQL_TEST_ECHO', False) TRACING_TYPE = env.str('TRACING_TEST_TYPE', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_TEST_NAME', 'FileBasedHashRingRouter') From 9b2a9193908443f1a5c545cc01b5e5953e969383 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 17:12:30 +0800 Subject: [PATCH 076/126] ignore pyc files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8919efeb01..60d9da8c38 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .env .coverage +*.pyc cov_html/ __pycache__/ From c40b72df960b464756c62e52a9a18c89e3c3a40b Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 17:12:58 +0800 Subject: [PATCH 077/126] change heartbeat log --- mishards/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mishards/connections.py b/mishards/connections.py index 915454711f..618690a099 100644 --- a/mishards/connections.py +++ b/mishards/connections.py @@ -114,6 +114,7 @@ class ConnectionMgr: return rconn def on_new_meta(self, name, url): + logger.info('Register Connection: name={};url={}'.format(name, url)) self.metas[name] = url def on_duplicate_meta(self, name, url): @@ -139,7 +140,6 @@ class ConnectionMgr: logger.warning('Non-existed meta: {}'.format(name)) def register(self, name, url): - logger.info('Register Connection: name={};url={}'.format(name, url)) meta = self.metas.get(name) if not meta: return self.on_new_meta(name, url) From bdff52021d115facf1a6f4ce8c54759b370e1a60 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 18 Oct 2019 17:13:28 +0800 Subject: [PATCH 078/126] db session bug fix for multi-threading scenario --- mishards/db_base.py | 3 +++ mishards/routings.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/mishards/db_base.py b/mishards/db_base.py index 6fb3aef4e1..5f2eee9ba1 100644 --- a/mishards/db_base.py +++ b/mishards/db_base.py @@ -42,6 +42,9 @@ class DB: def Session(self): return self.session_factory() + def remove_session(self): + self.session_factory.remove() + def drop_all(self): self.Model.metadata.drop_all(self.engine) diff --git a/mishards/routings.py b/mishards/routings.py index a61352f40b..f04f3d2484 100644 --- a/mishards/routings.py +++ b/mishards/routings.py @@ -53,6 +53,7 @@ class FileBasedHashRingRouter(RouterMixin): def _route(self, table_name, range_array, metadata=None, **kwargs): # PXU TODO: Implement Thread-local Context + # PXU TODO: Session life mgt try: table = db.Session.query(Tables).filter( and_(Tables.table_id == table_name, @@ -63,6 +64,7 @@ class FileBasedHashRingRouter(RouterMixin): if not table: raise exceptions.TableNotFoundError(table_name, metadata=metadata) files = table.files_to_search(range_array) + db.remove_session() servers = self.conn_mgr.conn_names logger.info('Available servers: {}'.format(servers)) From 46210920818662372a22d184823dd0370cbf7f27 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 19 Oct 2019 11:21:53 +0800 Subject: [PATCH 079/126] remove conn_mgr from handler --- mishards/routings.py | 13 ++++++++++ mishards/server.py | 3 +-- mishards/service_handler.py | 48 +++++++++++++------------------------ 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/mishards/routings.py b/mishards/routings.py index f04f3d2484..823972726f 100644 --- a/mishards/routings.py +++ b/mishards/routings.py @@ -39,6 +39,19 @@ class RouterMixin: def routing(self, table_name, metadata=None, **kwargs): raise NotImplemented() + def connection(self, metadata=None): + conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) + if conn: + conn.on_connect(metadata=metadata) + return conn.conn + + def query_conn(self, name, metadata=None): + conn = self.conn_mgr.conn(name, metadata=metadata) + if not conn: + raise exceptions.ConnectionNotFoundError(name, metadata=metadata) + conn.on_connect(metadata=metadata) + return conn.conn + @RouteManager.register_router_class class FileBasedHashRingRouter(RouterMixin): diff --git a/mishards/server.py b/mishards/server.py index 20be8f1746..6eb0e92582 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -88,8 +88,7 @@ class Server: def start(self, port=None): handler_class = self.decorate_handler(ServiceHandler) add_MilvusServiceServicer_to_server( - handler_class(conn_mgr=self.conn_mgr, - tracer=self.tracer, + handler_class(tracer=self.tracer, router=self.router), self.server_impl) self.server_impl.add_insecure_port("[::]:{}".format( str(port or self._port))) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 669d96802a..04e74415a1 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -21,27 +21,13 @@ logger = logging.getLogger(__name__) class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): MAX_NPROBE = 2048 - def __init__(self, conn_mgr, tracer, router, max_workers=multiprocessing.cpu_count(), **kwargs): - self.conn_mgr = conn_mgr + def __init__(self, tracer, router, max_workers=multiprocessing.cpu_count(), **kwargs): self.table_meta = {} self.error_handlers = {} self.tracer = tracer self.router = router self.max_workers = max_workers - def connection(self, metadata=None): - conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) - if conn: - conn.on_connect(metadata=metadata) - return conn.conn - - def query_conn(self, name, metadata=None): - conn = self.conn_mgr.conn(name, metadata=metadata) - if not conn: - raise exceptions.ConnectionNotFoundError(name, metadata=metadata) - conn.on_connect(metadata=metadata) - return conn.conn - def _do_merge(self, files_n_topk_results, topk, reverse=False, **kwargs): status = status_pb2.Status(error_code=status_pb2.SUCCESS, reason="Success") @@ -109,7 +95,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): 'Send Search Request: addr={};params={};nq={};topk={};nprobe={}' .format(addr, query_params, len(vectors), topk, nprobe)) - conn = self.query_conn(addr, metadata=metadata) + conn = self.router.query_conn(addr, metadata=metadata) start = time.time() span = kwargs.get('span', None) span = span if span else (None if self.tracer.empty else @@ -152,7 +138,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): metadata=metadata) def _create_table(self, table_schema): - return self.connection().create_table(table_schema) + return self.router.connection().create_table(table_schema) @mark_grpc_method def CreateTable(self, request, context): @@ -170,7 +156,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): reason=_status.message) def _has_table(self, table_name, metadata=None): - return self.connection(metadata=metadata).has_table(table_name) + return self.router.connection(metadata=metadata).has_table(table_name) @mark_grpc_method def HasTable(self, request, context): @@ -191,7 +177,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): bool_reply=_bool) def _delete_table(self, table_name): - return self.connection().delete_table(table_name) + return self.router.connection().delete_table(table_name) @mark_grpc_method def DropTable(self, request, context): @@ -209,7 +195,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): reason=_status.message) def _create_index(self, table_name, index): - return self.connection().create_index(table_name, index) + return self.router.connection().create_index(table_name, index) @mark_grpc_method def CreateIndex(self, request, context): @@ -230,7 +216,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): reason=_status.message) def _add_vectors(self, param, metadata=None): - return self.connection(metadata=metadata).add_vectors( + return self.router.connection(metadata=metadata).add_vectors( None, None, insert_param=param) @mark_grpc_method @@ -263,7 +249,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): table_meta = self.table_meta.get(table_name, None) if not table_meta: - status, info = self.connection( + status, info = self.router.connection( metadata=metadata).describe_table(table_name) if not status.OK(): raise exceptions.TableNotFoundError(table_name, @@ -307,7 +293,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): raise NotImplemented() def _describe_table(self, table_name, metadata=None): - return self.connection(metadata=metadata).describe_table(table_name) + return self.router.connection(metadata=metadata).describe_table(table_name) @mark_grpc_method def DescribeTable(self, request, context): @@ -340,7 +326,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): ) def _count_table(self, table_name, metadata=None): - return self.connection( + return self.router.connection( metadata=metadata).get_table_row_count(table_name) @mark_grpc_method @@ -364,7 +350,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): table_row_count=_count if isinstance(_count, int) else -1) def _get_server_version(self, metadata=None): - return self.connection(metadata=metadata).server_version() + return self.router.connection(metadata=metadata).server_version() @mark_grpc_method def Cmd(self, request, context): @@ -380,7 +366,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): if _cmd == 'version': _status, _reply = self._get_server_version(metadata=metadata) else: - _status, _reply = self.connection( + _status, _reply = self.router.connection( metadata=metadata).server_status() return milvus_pb2.StringReply(status=status_pb2.Status( @@ -388,7 +374,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): string_reply=_reply) def _show_tables(self, metadata=None): - return self.connection(metadata=metadata).show_tables() + return self.router.connection(metadata=metadata).show_tables() @mark_grpc_method def ShowTables(self, request, context): @@ -401,7 +387,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): table_names=_results) def _delete_by_range(self, table_name, start_date, end_date): - return self.connection().delete_vectors_by_range(table_name, + return self.router.connection().delete_vectors_by_range(table_name, start_date, end_date) @@ -423,7 +409,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): reason=_status.message) def _preload_table(self, table_name): - return self.connection().preload_table(table_name) + return self.router.connection().preload_table(table_name) @mark_grpc_method def PreloadTable(self, request, context): @@ -439,7 +425,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): reason=_status.message) def _describe_index(self, table_name, metadata=None): - return self.connection(metadata=metadata).describe_index(table_name) + return self.router.connection(metadata=metadata).describe_index(table_name) @mark_grpc_method def DescribeIndex(self, request, context): @@ -464,7 +450,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): index=_index) def _drop_index(self, table_name): - return self.connection().drop_index(table_name) + return self.router.connection().drop_index(table_name) @mark_grpc_method def DropIndex(self, request, context): From 43bc2cc60c8b1c5428cb990f7300c91f81a63ead Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 19 Oct 2019 11:21:53 +0800 Subject: [PATCH 080/126] remove conn_mgr from handler remove conn_mgr from handler --- mishards/service_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 04e74415a1..485aa8b211 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -388,8 +388,8 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def _delete_by_range(self, table_name, start_date, end_date): return self.router.connection().delete_vectors_by_range(table_name, - start_date, - end_date) + start_date, + end_date) @mark_grpc_method def DeleteByRange(self, request, context): From 3ddd181dd2225c1166d3989249d984ae7677538a Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 19 Oct 2019 14:02:48 +0800 Subject: [PATCH 081/126] update for better test --- conftest.py | 2 +- mishards/server.py | 2 +- mishards/settings.py | 7 +++++-- sd/kubernetes_provider.py | 6 ++++-- sd/static_provider.py | 6 ++++-- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/conftest.py b/conftest.py index ebe8276cea..34e22af693 100644 --- a/conftest.py +++ b/conftest.py @@ -20,7 +20,7 @@ def app(request): @pytest.fixture def started_app(app): app.on_pre_run() - app.start(app.port) + app.start(settings.SERVER_TEST_PORT) yield app diff --git a/mishards/server.py b/mishards/server.py index 6eb0e92582..599a00e455 100644 --- a/mishards/server.py +++ b/mishards/server.py @@ -91,7 +91,7 @@ class Server: handler_class(tracer=self.tracer, router=self.router), self.server_impl) self.server_impl.add_insecure_port("[::]:{}".format( - str(port or self._port))) + str(port or self.port))) self.server_impl.start() def run(self, port): diff --git a/mishards/settings.py b/mishards/settings.py index 773c04f083..21a3bb7a65 100644 --- a/mishards/settings.py +++ b/mishards/settings.py @@ -25,6 +25,7 @@ TIMEOUT = env.int('TIMEOUT', 60) MAX_RETRY = env.int('MAX_RETRY', 3) SERVER_PORT = env.int('SERVER_PORT', 19530) +SERVER_TEST_PORT = env.int('SERVER_TEST_PORT', 19530) WOSERVER = env.str('WOSERVER') SD_PROVIDER_SETTINGS = None @@ -36,11 +37,13 @@ if SD_PROVIDER == 'Kubernetes': in_cluster=env.bool('SD_IN_CLUSTER', False), poll_interval=env.int('SD_POLL_INTERVAL', 5), pod_patt=env.str('SD_ROSERVER_POD_PATT', ''), - label_selector=env.str('SD_LABEL_SELECTOR', '')) + label_selector=env.str('SD_LABEL_SELECTOR', ''), + port=env.int('SD_PORT', 19530)) elif SD_PROVIDER == 'Static': from sd.static_provider import StaticProviderSettings SD_PROVIDER_SETTINGS = StaticProviderSettings( - hosts=env.list('SD_STATIC_HOSTS', [])) + hosts=env.list('SD_STATIC_HOSTS', []), + port=env.int('SD_STATIC_PORT', 19530)) # TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') diff --git a/sd/kubernetes_provider.py b/sd/kubernetes_provider.py index ca593a3682..eb113db007 100644 --- a/sd/kubernetes_provider.py +++ b/sd/kubernetes_provider.py @@ -226,12 +226,13 @@ class EventHandler(threading.Thread): class KubernetesProviderSettings: def __init__(self, namespace, pod_patt, label_selector, in_cluster, - poll_interval, **kwargs): + poll_interval, port=None, **kwargs): self.namespace = namespace self.pod_patt = pod_patt self.label_selector = label_selector self.in_cluster = in_cluster self.poll_interval = poll_interval + self.port = int(port) if port else 19530 @singleton @@ -245,6 +246,7 @@ class KubernetesProvider(object): self.label_selector = settings.label_selector self.in_cluster = settings.in_cluster self.poll_interval = settings.poll_interval + self.port = settings.port self.kwargs = kwargs self.queue = queue.Queue() @@ -279,7 +281,7 @@ class KubernetesProvider(object): **kwargs) def add_pod(self, name, ip): - self.conn_mgr.register(name, 'tcp://{}:19530'.format(ip)) + self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) def delete_pod(self, name): self.conn_mgr.unregister(name) diff --git a/sd/static_provider.py b/sd/static_provider.py index 5c97c4efd0..e88780740f 100644 --- a/sd/static_provider.py +++ b/sd/static_provider.py @@ -9,8 +9,9 @@ from sd import ProviderManager class StaticProviderSettings: - def __init__(self, hosts): + def __init__(self, hosts, port=None): self.hosts = hosts + self.port = int(port) if port else 19530 @singleton @@ -21,6 +22,7 @@ class KubernetesProvider(object): def __init__(self, settings, conn_mgr, **kwargs): self.conn_mgr = conn_mgr self.hosts = [socket.gethostbyname(host) for host in settings.hosts] + self.port = settings.port def start(self): for host in self.hosts: @@ -31,7 +33,7 @@ class KubernetesProvider(object): self.delete_pod(host) def add_pod(self, name, ip): - self.conn_mgr.register(name, 'tcp://{}:19530'.format(ip)) + self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) def delete_pod(self, name): self.conn_mgr.unregister(name) From 9dc45d650c713caa8876b7693d526e66922db629 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 19 Oct 2019 14:03:06 +0800 Subject: [PATCH 082/126] update test_server --- mishards/test_server.py | 70 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/mishards/test_server.py b/mishards/test_server.py index a2677847da..2f24a1167b 100644 --- a/mishards/test_server.py +++ b/mishards/test_server.py @@ -13,6 +13,7 @@ from mishards import db, create_app, settings from mishards.service_handler import ServiceHandler from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser from mishards.factories import TableFilesFactory, TablesFactory, TableFiles, Tables +from mishards.routings import RouterMixin logger = logging.getLogger(__name__) @@ -22,9 +23,10 @@ BAD = Status(code=Status.PERMISSION_DENIED, message='Fail') @pytest.mark.usefixtures('started_app') class TestServer: - def client(self, port): + @property + def client(self): m = Milvus() - m.connect(host='localhost', port=port) + m.connect(host='localhost', port=settings.SERVER_TEST_PORT) return m def test_server_start(self, started_app): @@ -33,22 +35,22 @@ class TestServer: def test_cmd(self, started_app): ServiceHandler._get_server_version = mock.MagicMock(return_value=(OK, '')) - status, _ = self.client(started_app.port).server_version() + status, _ = self.client.server_version() assert status.OK() Parser.parse_proto_Command = mock.MagicMock(return_value=(BAD, 'cmd')) - status, _ = self.client(started_app.port).server_version() + status, _ = self.client.server_version() assert not status.OK() def test_drop_index(self, started_app): table_name = inspect.currentframe().f_code.co_name ServiceHandler._drop_index = mock.MagicMock(return_value=OK) - status = self.client(started_app.port).drop_index(table_name) + status = self.client.drop_index(table_name) assert status.OK() Parser.parse_proto_TableName = mock.MagicMock( return_value=(BAD, table_name)) - status = self.client(started_app.port).drop_index(table_name) + status = self.client.drop_index(table_name) assert not status.OK() def test_describe_index(self, started_app): @@ -62,13 +64,13 @@ class TestServer: return_value=(OK, table_name)) ServiceHandler._describe_index = mock.MagicMock( return_value=(OK, index_param)) - status, ret = self.client(started_app.port).describe_index(table_name) + status, ret = self.client.describe_index(table_name) assert status.OK() assert ret._table_name == index_param._table_name Parser.parse_proto_TableName = mock.MagicMock( return_value=(BAD, table_name)) - status, _ = self.client(started_app.port).describe_index(table_name) + status, _ = self.client.describe_index(table_name) assert not status.OK() def test_preload(self, started_app): @@ -77,12 +79,12 @@ class TestServer: Parser.parse_proto_TableName = mock.MagicMock( return_value=(OK, table_name)) ServiceHandler._preload_table = mock.MagicMock(return_value=OK) - status = self.client(started_app.port).preload_table(table_name) + status = self.client.preload_table(table_name) assert status.OK() Parser.parse_proto_TableName = mock.MagicMock( return_value=(BAD, table_name)) - status = self.client(started_app.port).preload_table(table_name) + status = self.client.preload_table(table_name) assert not status.OK() def test_delete_by_range(self, started_app): @@ -94,13 +96,13 @@ class TestServer: Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( return_value=(OK, unpacked)) ServiceHandler._delete_by_range = mock.MagicMock(return_value=OK) - status = self.client(started_app.port).delete_vectors_by_range( + status = self.client.delete_vectors_by_range( *unpacked) assert status.OK() Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( return_value=(BAD, unpacked)) - status = self.client(started_app.port).delete_vectors_by_range( + status = self.client.delete_vectors_by_range( *unpacked) assert not status.OK() @@ -111,21 +113,19 @@ class TestServer: Parser.parse_proto_TableName = mock.MagicMock( return_value=(OK, table_name)) ServiceHandler._count_table = mock.MagicMock(return_value=(OK, count)) - status, ret = self.client( - started_app.port).get_table_row_count(table_name) + status, ret = self.client.get_table_row_count(table_name) assert status.OK() assert ret == count Parser.parse_proto_TableName = mock.MagicMock( return_value=(BAD, table_name)) - status, _ = self.client( - started_app.port).get_table_row_count(table_name) + status, _ = self.client.get_table_row_count(table_name) assert not status.OK() def test_show_tables(self, started_app): tables = ['t1', 't2'] ServiceHandler._show_tables = mock.MagicMock(return_value=(OK, tables)) - status, ret = self.client(started_app.port).show_tables() + status, ret = self.client.show_tables() assert status.OK() assert ret == tables @@ -141,17 +141,17 @@ class TestServer: return_value=(OK, table_schema.table_name)) ServiceHandler._describe_table = mock.MagicMock( return_value=(OK, table_schema)) - status, _ = self.client(started_app.port).describe_table(table_name) + status, _ = self.client.describe_table(table_name) assert status.OK() ServiceHandler._describe_table = mock.MagicMock( return_value=(BAD, table_schema)) - status, _ = self.client(started_app.port).describe_table(table_name) + status, _ = self.client.describe_table(table_name) assert not status.OK() Parser.parse_proto_TableName = mock.MagicMock(return_value=(BAD, 'cmd')) - status, ret = self.client(started_app.port).describe_table(table_name) + status, ret = self.client.describe_table(table_name) assert not status.OK() def test_insert(self, started_app): @@ -159,7 +159,7 @@ class TestServer: vectors = [[random.random() for _ in range(16)] for _ in range(10)] ids = [random.randint(1000000, 20000000) for _ in range(10)] ServiceHandler._add_vectors = mock.MagicMock(return_value=(OK, ids)) - status, ret = self.client(started_app.port).add_vectors( + status, ret = self.client.add_vectors( table_name=table_name, records=vectors) assert status.OK() assert ids == ret @@ -170,14 +170,12 @@ class TestServer: Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(OK, unpacks)) ServiceHandler._create_index = mock.MagicMock(return_value=OK) - status = self.client( - started_app.port).create_index(table_name=table_name) + status = self.client.create_index(table_name=table_name) assert status.OK() Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(BAD, None)) - status = self.client( - started_app.port).create_index(table_name=table_name) + status = self.client.create_index(table_name=table_name) assert not status.OK() def test_drop_table(self, started_app): @@ -186,14 +184,12 @@ class TestServer: Parser.parse_proto_TableName = mock.MagicMock( return_value=(OK, table_name)) ServiceHandler._delete_table = mock.MagicMock(return_value=OK) - status = self.client( - started_app.port).delete_table(table_name=table_name) + status = self.client.delete_table(table_name=table_name) assert status.OK() Parser.parse_proto_TableName = mock.MagicMock( return_value=(BAD, table_name)) - status = self.client( - started_app.port).delete_table(table_name=table_name) + status = self.client.delete_table(table_name=table_name) assert not status.OK() def test_has_table(self, started_app): @@ -202,12 +198,12 @@ class TestServer: Parser.parse_proto_TableName = mock.MagicMock( return_value=(OK, table_name)) ServiceHandler._has_table = mock.MagicMock(return_value=True) - has = self.client(started_app.port).has_table(table_name=table_name) + has = self.client.has_table(table_name=table_name) assert has Parser.parse_proto_TableName = mock.MagicMock( return_value=(BAD, table_name)) - has = self.client(started_app.port).has_table(table_name=table_name) + has = self.client.has_table(table_name=table_name) assert not has def test_create_table(self, started_app): @@ -219,12 +215,12 @@ class TestServer: dimension=dimension) ServiceHandler._create_table = mock.MagicMock(return_value=OK) - status = self.client(started_app.port).create_table(table_schema) + status = self.client.create_table(table_schema) assert status.OK() Parser.parse_proto_TableSchema = mock.MagicMock(return_value=(BAD, None)) - status = self.client(started_app.port).create_table(table_schema) + status = self.client.create_table(table_schema) assert not status.OK() def random_data(self, n, dimension): @@ -261,19 +257,21 @@ class TestServer: metric_type=table.metric_type, dimension=table.dimension) - status, _ = self.client(started_app.port).search_vectors(**param) + status, _ = self.client.search_vectors(**param) assert status.code == Status.ILLEGAL_ARGUMENT param['nprobe'] = 2048 + RouterMixin.connection = mock.MagicMock(return_value=Milvus()) + RouterMixin.query_conn = mock.MagicMock(return_value=Milvus()) Milvus.describe_table = mock.MagicMock(return_value=(BAD, table_schema)) - status, ret = self.client(started_app.port).search_vectors(**param) + status, ret = self.client.search_vectors(**param) assert status.code == Status.TABLE_NOT_EXISTS Milvus.describe_table = mock.MagicMock(return_value=(OK, table_schema)) Milvus.search_vectors_in_files = mock.MagicMock( return_value=mock_results) - status, ret = self.client(started_app.port).search_vectors(**param) + status, ret = self.client.search_vectors(**param) assert status.OK() assert len(ret) == nq From 4efa4506a99e044cd6a3d39e7713f1ef78fc4877 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 19 Oct 2019 14:06:35 +0800 Subject: [PATCH 083/126] update .env.example --- mishards/.env.example | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mishards/.env.example b/mishards/.env.example index bfea0a3edc..0a23c0cf56 100644 --- a/mishards/.env.example +++ b/mishards/.env.example @@ -1,8 +1,8 @@ DEBUG=True WOSERVER=tcp://127.0.0.1:19530 -TESTING_WOSERVER=tcp://127.0.0.1:19530 SERVER_PORT=19532 +SERVER_TEST_PORT=19888 SD_PROVIDER=Static @@ -13,16 +13,17 @@ SD_ROSERVER_POD_PATT=.*-ro-servers-.* SD_LABEL_SELECTOR=tier=ro-servers SD_STATIC_HOSTS=127.0.0.1 +SD_STATIC_PORT=19530 #SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_ECHO=True -TESTING=False #SQLALCHEMY_DATABASE_TEST_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 SQLALCHEMY_DATABASE_TEST_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_TEST_ECHO=False +# TRACING_TEST_TYPE=jaeger TRACING_TYPE=jaeger TRACING_SERVICE_NAME=fortest TRACING_SAMPLER_TYPE=const From a27eef278b538ed21010a0719885c49c7ec597e2 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 11:42:54 +0800 Subject: [PATCH 084/126] update for new sdk --- mishards/service_handler.py | 4 ++-- mishards/test_server.py | 6 ++++-- requirements.txt | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 485aa8b211..4519afbaa0 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -7,7 +7,7 @@ import multiprocessing from concurrent.futures import ThreadPoolExecutor from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 from milvus.grpc_gen.milvus_pb2 import TopKQueryResult -from milvus.client.Abstract import Range +from milvus.client.abstract import Range from milvus.client import types as Types from mishards import (db, settings, exceptions) @@ -109,7 +109,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): query_records=vectors, top_k=topk, nprobe=nprobe, - lazy=True) + lazy_=True) end = time.time() logger.info('search_vectors_in_files takes: {}'.format(end - start)) diff --git a/mishards/test_server.py b/mishards/test_server.py index 2f24a1167b..a7fec615c9 100644 --- a/mishards/test_server.py +++ b/mishards/test_server.py @@ -7,7 +7,7 @@ import faker import inspect from milvus import Milvus from milvus.client.types import Status, IndexType, MetricType -from milvus.client.Abstract import IndexParam, TableSchema +from milvus.client.abstract import IndexParam, TableSchema from milvus.grpc_gen import status_pb2, milvus_pb2 from mishards import db, create_app, settings from mishards.service_handler import ServiceHandler @@ -87,6 +87,7 @@ class TestServer: status = self.client.preload_table(table_name) assert not status.OK() + @pytest.mark.skip def test_delete_by_range(self, started_app): table_name = inspect.currentframe().f_code.co_name @@ -203,7 +204,8 @@ class TestServer: Parser.parse_proto_TableName = mock.MagicMock( return_value=(BAD, table_name)) - has = self.client.has_table(table_name=table_name) + status, has = self.client.has_table(table_name=table_name) + assert not status.OK() assert not has def test_create_table(self, started_app): diff --git a/requirements.txt b/requirements.txt index 133cfac8ab..ae224e92ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ py==1.8.0 pyasn1==0.4.7 pyasn1-modules==0.2.6 pylint==2.3.1 -pymilvus-test==0.2.21 +pymilvus-test==0.2.28 #pymilvus==0.2.0 pyparsing==2.4.0 pytest==4.6.3 From 703371efa379c9eba1c0c36004db25e7e9b22521 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 12:08:45 +0800 Subject: [PATCH 085/126] check return index param in DescribeIndex --- mishards/service_handler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 4519afbaa0..0c6b41ece6 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -441,6 +441,10 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): _status, _index_param = self._describe_index(table_name=_table_name, metadata=metadata) + if not _index_param: + return milvus_pb2.IndexParam(status=status_pb2.Status( + error_code=_status.code, reason=_status.message)) + _index = milvus_pb2.Index(index_type=_index_param._index_type, nlist=_index_param._nlist) From 26b3adfcc37d4b0e18b953786d47f9fcb39c89a3 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 14:34:12 +0800 Subject: [PATCH 086/126] update for new sdk changes --- mishards/service_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 0c6b41ece6..44e1d8cf7b 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -169,11 +169,11 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('HasTable {}'.format(_table_name)) - _bool = self._has_table(_table_name, + _status, _bool = self._has_table(_table_name, metadata={'resp_class': milvus_pb2.BoolReply}) return milvus_pb2.BoolReply(status=status_pb2.Status( - error_code=status_pb2.SUCCESS, reason="OK"), + error_code=_status.code, reason=_status.message), bool_reply=_bool) def _delete_table(self, table_name): From c4a5c5c69b5f2bb4d8b7f016e230a74d5ddfd2d5 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 14:58:39 +0800 Subject: [PATCH 087/126] bug fix for time range and topk check in search --- mishards/exception_codes.py | 1 + mishards/exception_handlers.py | 6 ++++++ mishards/exceptions.py | 4 ++++ mishards/service_handler.py | 5 +++++ mishards/utilities.py | 6 +++--- 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/mishards/exception_codes.py b/mishards/exception_codes.py index ecb2469562..bdd4572dd5 100644 --- a/mishards/exception_codes.py +++ b/mishards/exception_codes.py @@ -7,3 +7,4 @@ DB_ERROR_CODE = 10003 TABLE_NOT_FOUND_CODE = 20001 INVALID_ARGUMENT_CODE = 20002 INVALID_DATE_RANGE_CODE = 20003 +INVALID_TOPK_CODE = 20004 diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py index 1e5ffb3529..c79a6db5a3 100644 --- a/mishards/exception_handlers.py +++ b/mishards/exception_handlers.py @@ -58,6 +58,12 @@ def TableNotFoundErrorHandler(err): return resp_handler(err, status_pb2.TABLE_NOT_EXISTS) +@server.errorhandler(exceptions.InvalidTopKError) +def InvalidTopKErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.ILLEGAL_TOPK) + + @server.errorhandler(exceptions.InvalidArgumentError) def InvalidArgumentErrorHandler(err): logger.error(err) diff --git a/mishards/exceptions.py b/mishards/exceptions.py index acd9372d6a..72839f88d2 100644 --- a/mishards/exceptions.py +++ b/mishards/exceptions.py @@ -26,6 +26,10 @@ class TableNotFoundError(BaseException): code = codes.TABLE_NOT_FOUND_CODE +class InvalidTopKError(BaseException): + code = codes.INVALID_TOPK_CODE + + class InvalidArgumentError(BaseException): code = codes.INVALID_ARGUMENT_CODE diff --git a/mishards/service_handler.py b/mishards/service_handler.py index 44e1d8cf7b..5e91c14f14 100644 --- a/mishards/service_handler.py +++ b/mishards/service_handler.py @@ -20,6 +20,7 @@ logger = logging.getLogger(__name__) class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): MAX_NPROBE = 2048 + MAX_TOPK = 2048 def __init__(self, tracer, router, max_workers=multiprocessing.cpu_count(), **kwargs): self.table_meta = {} @@ -246,6 +247,10 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): raise exceptions.InvalidArgumentError( message='Invalid nprobe: {}'.format(nprobe), metadata=metadata) + if topk > self.MAX_TOPK or topk <= 0: + raise exceptions.InvalidTopKError( + message='Invalid topk: {}'.format(topk), metadata=metadata) + table_meta = self.table_meta.get(table_name, None) if not table_meta: diff --git a/mishards/utilities.py b/mishards/utilities.py index c08d0d42df..42e982b5f1 100644 --- a/mishards/utilities.py +++ b/mishards/utilities.py @@ -2,12 +2,12 @@ import datetime from mishards import exceptions -def format_date(self, start, end): +def format_date(start, end): return ((start.year - 1900) * 10000 + (start.month - 1) * 100 + start.day, (end.year - 1900) * 10000 + (end.month - 1) * 100 + end.day) -def range_to_date(self, range_obj, metadata=None): +def range_to_date(range_obj, metadata=None): try: start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') @@ -17,4 +17,4 @@ def range_to_date(self, range_obj, metadata=None): range_obj.start_date, range_obj.end_date), metadata=metadata) - return self.format_date(start, end) + return format_date(start, end) From e47f3ec28a89715745be8949c160e81f416fcd9f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 15:06:58 +0800 Subject: [PATCH 088/126] update to latest image --- start_services.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/start_services.yml b/start_services.yml index c7a3c36f51..57fe061bb7 100644 --- a/start_services.yml +++ b/start_services.yml @@ -21,7 +21,7 @@ services: mishards: restart: always - image: registry.zilliz.com/milvus/mishards:v0.0.3 + image: registry.zilliz.com/milvus/mishards:v0.0.4 ports: - "0.0.0.0:19530:19531" - "0.0.0.0:19532:19532" From 7b0a731e047b571c1154ca0dba37f8be8f867c8d Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 15:45:25 +0800 Subject: [PATCH 089/126] fix bug in test_server --- mishards/test_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mishards/test_server.py b/mishards/test_server.py index a7fec615c9..efd3912076 100644 --- a/mishards/test_server.py +++ b/mishards/test_server.py @@ -198,7 +198,7 @@ class TestServer: Parser.parse_proto_TableName = mock.MagicMock( return_value=(OK, table_name)) - ServiceHandler._has_table = mock.MagicMock(return_value=True) + ServiceHandler._has_table = mock.MagicMock(return_value=(OK, True)) has = self.client.has_table(table_name=table_name) assert has From 9a4c732563323cd8814a11a5eda8891745e264ba Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 16:20:29 +0800 Subject: [PATCH 090/126] fix bug in test_server --- Dockerfile | 10 - build.sh | 39 -- conftest.py | 27 -- manager.py | 28 -- mishards/.env.example | 33 -- mishards/__init__.py | 36 -- mishards/connections.py | 154 -------- mishards/db_base.py | 52 --- mishards/exception_codes.py | 10 - mishards/exception_handlers.py | 82 ---- mishards/exceptions.py | 38 -- mishards/factories.py | 54 --- mishards/grpc_utils/__init__.py | 37 -- mishards/grpc_utils/grpc_args_parser.py | 102 ----- mishards/grpc_utils/grpc_args_wrapper.py | 4 - mishards/grpc_utils/test_grpc.py | 75 ---- mishards/hash_ring.py | 150 ------- mishards/main.py | 15 - mishards/models.py | 76 ---- mishards/routings.py | 96 ----- mishards/server.py | 122 ------ mishards/service_handler.py | 475 ----------------------- mishards/settings.py | 94 ----- mishards/test_connections.py | 101 ----- mishards/test_models.py | 39 -- mishards/test_server.py | 279 ------------- mishards/utilities.py | 20 - requirements.txt | 36 -- sd/__init__.py | 28 -- sd/kubernetes_provider.py | 331 ---------------- sd/static_provider.py | 39 -- setup.cfg | 4 - start_services.yml | 45 --- tracing/__init__.py | 43 -- tracing/factory.py | 40 -- utils/__init__.py | 11 - utils/logger_helper.py | 152 -------- 37 files changed, 2977 deletions(-) delete mode 100644 Dockerfile delete mode 100755 build.sh delete mode 100644 conftest.py delete mode 100644 manager.py delete mode 100644 mishards/.env.example delete mode 100644 mishards/__init__.py delete mode 100644 mishards/connections.py delete mode 100644 mishards/db_base.py delete mode 100644 mishards/exception_codes.py delete mode 100644 mishards/exception_handlers.py delete mode 100644 mishards/exceptions.py delete mode 100644 mishards/factories.py delete mode 100644 mishards/grpc_utils/__init__.py delete mode 100644 mishards/grpc_utils/grpc_args_parser.py delete mode 100644 mishards/grpc_utils/grpc_args_wrapper.py delete mode 100644 mishards/grpc_utils/test_grpc.py delete mode 100644 mishards/hash_ring.py delete mode 100644 mishards/main.py delete mode 100644 mishards/models.py delete mode 100644 mishards/routings.py delete mode 100644 mishards/server.py delete mode 100644 mishards/service_handler.py delete mode 100644 mishards/settings.py delete mode 100644 mishards/test_connections.py delete mode 100644 mishards/test_models.py delete mode 100644 mishards/test_server.py delete mode 100644 mishards/utilities.py delete mode 100644 requirements.txt delete mode 100644 sd/__init__.py delete mode 100644 sd/kubernetes_provider.py delete mode 100644 sd/static_provider.py delete mode 100644 setup.cfg delete mode 100644 start_services.yml delete mode 100644 tracing/__init__.py delete mode 100644 tracing/factory.py delete mode 100644 utils/__init__.py delete mode 100644 utils/logger_helper.py diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 594640619e..0000000000 --- a/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM python:3.6 -RUN apt update && apt install -y \ - less \ - telnet -RUN mkdir /source -WORKDIR /source -ADD ./requirements.txt ./ -RUN pip install -r requirements.txt -COPY . . -CMD python mishards/main.py diff --git a/build.sh b/build.sh deleted file mode 100755 index fad30518f2..0000000000 --- a/build.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -BOLD=`tput bold` -NORMAL=`tput sgr0` -YELLOW='\033[1;33m' -ENDC='\033[0m' - -echo -e "${BOLD}MISHARDS_REGISTRY=${MISHARDS_REGISTRY}${ENDC}" - -function build_image() { - dockerfile=$1 - remote_registry=$2 - tagged=$2 - buildcmd="docker build -t ${tagged} -f ${dockerfile} ." - echo -e "${BOLD}$buildcmd${NORMAL}" - $buildcmd - pushcmd="docker push ${remote_registry}" - echo -e "${BOLD}$pushcmd${NORMAL}" - $pushcmd - echo -e "${YELLOW}${BOLD}Image: ${remote_registry}${NORMAL}${ENDC}" -} - -case "$1" in - -all) - [[ -z $MISHARDS_REGISTRY ]] && { - echo -e "${YELLOW}Error: Please set docker registry first:${ENDC}\n\t${BOLD}export MISHARDS_REGISTRY=xxxx\n${ENDC}" - exit 1 - } - - version="" - [[ ! -z $2 ]] && version=":${2}" - build_image "Dockerfile" "${MISHARDS_REGISTRY}${version}" "${MISHARDS_REGISTRY}" - ;; -*) - echo "Usage: [option...] {base | apps}" - echo "all, Usage: build.sh all [tagname|] => {docker_registry}:\${tagname}" - ;; -esac diff --git a/conftest.py b/conftest.py deleted file mode 100644 index 34e22af693..0000000000 --- a/conftest.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging -import pytest -import grpc -from mishards import settings, db, create_app - -logger = logging.getLogger(__name__) - - -@pytest.fixture -def app(request): - app = create_app(settings.TestingConfig) - db.drop_all() - db.create_all() - - yield app - - db.drop_all() - - -@pytest.fixture -def started_app(app): - app.on_pre_run() - app.start(settings.SERVER_TEST_PORT) - - yield app - - app.stop() diff --git a/manager.py b/manager.py deleted file mode 100644 index 931c90ebc8..0000000000 --- a/manager.py +++ /dev/null @@ -1,28 +0,0 @@ -import fire -from mishards import db -from sqlalchemy import and_ - - -class DBHandler: - @classmethod - def create_all(cls): - db.create_all() - - @classmethod - def drop_all(cls): - db.drop_all() - - @classmethod - def fun(cls, tid): - from mishards.factories import TablesFactory, TableFilesFactory, Tables - f = db.Session.query(Tables).filter(and_( - Tables.table_id == tid, - Tables.state != Tables.TO_DELETE) - ).first() - print(f) - - # f1 = TableFilesFactory() - - -if __name__ == '__main__': - fire.Fire(DBHandler) diff --git a/mishards/.env.example b/mishards/.env.example deleted file mode 100644 index 0a23c0cf56..0000000000 --- a/mishards/.env.example +++ /dev/null @@ -1,33 +0,0 @@ -DEBUG=True - -WOSERVER=tcp://127.0.0.1:19530 -SERVER_PORT=19532 -SERVER_TEST_PORT=19888 - -SD_PROVIDER=Static - -SD_NAMESPACE=xp -SD_IN_CLUSTER=False -SD_POLL_INTERVAL=5 -SD_ROSERVER_POD_PATT=.*-ro-servers-.* -SD_LABEL_SELECTOR=tier=ro-servers - -SD_STATIC_HOSTS=127.0.0.1 -SD_STATIC_PORT=19530 - -#SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 -SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False -SQL_ECHO=True - -#SQLALCHEMY_DATABASE_TEST_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 -SQLALCHEMY_DATABASE_TEST_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False -SQL_TEST_ECHO=False - -# TRACING_TEST_TYPE=jaeger -TRACING_TYPE=jaeger -TRACING_SERVICE_NAME=fortest -TRACING_SAMPLER_TYPE=const -TRACING_SAMPLER_PARAM=1 -TRACING_LOG_PAYLOAD=True -#TRACING_SAMPLER_TYPE=probabilistic -#TRACING_SAMPLER_PARAM=0.5 diff --git a/mishards/__init__.py b/mishards/__init__.py deleted file mode 100644 index 7db3d8cb5e..0000000000 --- a/mishards/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -import logging -from mishards import settings -logger = logging.getLogger() - -from mishards.db_base import DB -db = DB() - -from mishards.server import Server -grpc_server = Server() - - -def create_app(testing_config=None): - config = testing_config if testing_config else settings.DefaultConfig - db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) - - from mishards.connections import ConnectionMgr - connect_mgr = ConnectionMgr() - - from sd import ProviderManager - - sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) - discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) - - from tracing.factory import TracerFactory - from mishards.grpc_utils import GrpcSpanDecorator - tracer = TracerFactory.new_tracer(config.TRACING_TYPE, settings.TracingConfig, - span_decorator=GrpcSpanDecorator()) - - from mishards.routings import RouterFactory - router = RouterFactory.new_router(config.ROUTER_CLASS_NAME, connect_mgr) - - grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, router=router, discover=discover) - - from mishards import exception_handlers - - return grpc_server diff --git a/mishards/connections.py b/mishards/connections.py deleted file mode 100644 index 618690a099..0000000000 --- a/mishards/connections.py +++ /dev/null @@ -1,154 +0,0 @@ -import logging -import threading -from functools import wraps -from milvus import Milvus - -from mishards import (settings, exceptions) -from utils import singleton - -logger = logging.getLogger(__name__) - - -class Connection: - def __init__(self, name, uri, max_retry=1, error_handlers=None, **kwargs): - self.name = name - self.uri = uri - self.max_retry = max_retry - self.retried = 0 - self.conn = Milvus() - self.error_handlers = [] if not error_handlers else error_handlers - self.on_retry_func = kwargs.get('on_retry_func', None) - # self._connect() - - def __str__(self): - return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) - - def _connect(self, metadata=None): - try: - self.conn.connect(uri=self.uri) - except Exception as e: - if not self.error_handlers: - raise exceptions.ConnectionConnectError(message=str(e), metadata=metadata) - for handler in self.error_handlers: - handler(e, metadata=metadata) - - @property - def can_retry(self): - return self.retried < self.max_retry - - @property - def connected(self): - return self.conn.connected() - - def on_retry(self): - if self.on_retry_func: - self.on_retry_func(self) - else: - self.retried > 1 and logger.warning('{} is retrying {}'.format(self, self.retried)) - - def on_connect(self, metadata=None): - while not self.connected and self.can_retry: - self.retried += 1 - self.on_retry() - self._connect(metadata=metadata) - - if not self.can_retry and not self.connected: - raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry, - metadata=metadata)) - - self.retried = 0 - - def connect(self, func, exception_handler=None): - @wraps(func) - def inner(*args, **kwargs): - self.on_connect() - try: - return func(*args, **kwargs) - except Exception as e: - if exception_handler: - exception_handler(e) - else: - raise e - return inner - - -@singleton -class ConnectionMgr: - def __init__(self): - self.metas = {} - self.conns = {} - - @property - def conn_names(self): - return set(self.metas.keys()) - set(['WOSERVER']) - - def conn(self, name, metadata, throw=False): - c = self.conns.get(name, None) - if not c: - url = self.metas.get(name, None) - if not url: - if not throw: - return None - raise exceptions.ConnectionNotFoundError(message='Connection {} not found'.format(name), - metadata=metadata) - this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) - threaded = { - threading.get_ident(): this_conn - } - self.conns[name] = threaded - return this_conn - - tid = threading.get_ident() - rconn = c.get(tid, None) - if not rconn: - url = self.metas.get(name, None) - if not url: - if not throw: - return None - raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name), - metadata=metadata) - this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) - c[tid] = this_conn - return this_conn - - return rconn - - def on_new_meta(self, name, url): - logger.info('Register Connection: name={};url={}'.format(name, url)) - self.metas[name] = url - - def on_duplicate_meta(self, name, url): - if self.metas[name] == url: - return self.on_same_meta(name, url) - - return self.on_diff_meta(name, url) - - def on_same_meta(self, name, url): - # logger.warning('Register same meta: {}:{}'.format(name, url)) - pass - - def on_diff_meta(self, name, url): - logger.warning('Received {} with diff url={}'.format(name, url)) - self.metas[name] = url - self.conns[name] = {} - - def on_unregister_meta(self, name, url): - logger.info('Unregister name={};url={}'.format(name, url)) - self.conns.pop(name, None) - - def on_nonexisted_meta(self, name): - logger.warning('Non-existed meta: {}'.format(name)) - - def register(self, name, url): - meta = self.metas.get(name) - if not meta: - return self.on_new_meta(name, url) - else: - return self.on_duplicate_meta(name, url) - - def unregister(self, name): - logger.info('Unregister Connection: name={}'.format(name)) - url = self.metas.pop(name, None) - if url is None: - return self.on_nonexisted_meta(name) - return self.on_unregister_meta(name, url) diff --git a/mishards/db_base.py b/mishards/db_base.py deleted file mode 100644 index 5f2eee9ba1..0000000000 --- a/mishards/db_base.py +++ /dev/null @@ -1,52 +0,0 @@ -import logging -from sqlalchemy import create_engine -from sqlalchemy.engine.url import make_url -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker, scoped_session -from sqlalchemy.orm.session import Session as SessionBase - -logger = logging.getLogger(__name__) - - -class LocalSession(SessionBase): - def __init__(self, db, autocommit=False, autoflush=True, **options): - self.db = db - bind = options.pop('bind', None) or db.engine - SessionBase.__init__(self, autocommit=autocommit, autoflush=autoflush, bind=bind, **options) - - -class DB: - Model = declarative_base() - - def __init__(self, uri=None, echo=False): - self.echo = echo - uri and self.init_db(uri, echo) - self.session_factory = scoped_session(sessionmaker(class_=LocalSession, db=self)) - - def init_db(self, uri, echo=False): - url = make_url(uri) - if url.get_backend_name() == 'sqlite': - self.engine = create_engine(url) - else: - self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, - pool_pre_ping=True, - echo=echo, - max_overflow=0) - self.uri = uri - self.url = url - - def __str__(self): - return ''.format(self.url.get_backend_name(), self.url.database) - - @property - def Session(self): - return self.session_factory() - - def remove_session(self): - self.session_factory.remove() - - def drop_all(self): - self.Model.metadata.drop_all(self.engine) - - def create_all(self): - self.Model.metadata.create_all(self.engine) diff --git a/mishards/exception_codes.py b/mishards/exception_codes.py deleted file mode 100644 index bdd4572dd5..0000000000 --- a/mishards/exception_codes.py +++ /dev/null @@ -1,10 +0,0 @@ -INVALID_CODE = -1 - -CONNECT_ERROR_CODE = 10001 -CONNECTTION_NOT_FOUND_CODE = 10002 -DB_ERROR_CODE = 10003 - -TABLE_NOT_FOUND_CODE = 20001 -INVALID_ARGUMENT_CODE = 20002 -INVALID_DATE_RANGE_CODE = 20003 -INVALID_TOPK_CODE = 20004 diff --git a/mishards/exception_handlers.py b/mishards/exception_handlers.py deleted file mode 100644 index c79a6db5a3..0000000000 --- a/mishards/exception_handlers.py +++ /dev/null @@ -1,82 +0,0 @@ -import logging -from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 -from mishards import grpc_server as server, exceptions - -logger = logging.getLogger(__name__) - - -def resp_handler(err, error_code): - if not isinstance(err, exceptions.BaseException): - return status_pb2.Status(error_code=error_code, reason=str(err)) - - status = status_pb2.Status(error_code=error_code, reason=err.message) - - if err.metadata is None: - return status - - resp_class = err.metadata.get('resp_class', None) - if not resp_class: - return status - - if resp_class == milvus_pb2.BoolReply: - return resp_class(status=status, bool_reply=False) - - if resp_class == milvus_pb2.VectorIds: - return resp_class(status=status, vector_id_array=[]) - - if resp_class == milvus_pb2.TopKQueryResultList: - return resp_class(status=status, topk_query_result=[]) - - if resp_class == milvus_pb2.TableRowCount: - return resp_class(status=status, table_row_count=-1) - - if resp_class == milvus_pb2.TableName: - return resp_class(status=status, table_name=[]) - - if resp_class == milvus_pb2.StringReply: - return resp_class(status=status, string_reply='') - - if resp_class == milvus_pb2.TableSchema: - return milvus_pb2.TableSchema( - status=status - ) - - if resp_class == milvus_pb2.IndexParam: - return milvus_pb2.IndexParam( - table_name=milvus_pb2.TableName( - status=status - ) - ) - - status.error_code = status_pb2.UNEXPECTED_ERROR - return status - - -@server.errorhandler(exceptions.TableNotFoundError) -def TableNotFoundErrorHandler(err): - logger.error(err) - return resp_handler(err, status_pb2.TABLE_NOT_EXISTS) - - -@server.errorhandler(exceptions.InvalidTopKError) -def InvalidTopKErrorHandler(err): - logger.error(err) - return resp_handler(err, status_pb2.ILLEGAL_TOPK) - - -@server.errorhandler(exceptions.InvalidArgumentError) -def InvalidArgumentErrorHandler(err): - logger.error(err) - return resp_handler(err, status_pb2.ILLEGAL_ARGUMENT) - - -@server.errorhandler(exceptions.DBError) -def DBErrorHandler(err): - logger.error(err) - return resp_handler(err, status_pb2.UNEXPECTED_ERROR) - - -@server.errorhandler(exceptions.InvalidRangeError) -def InvalidArgumentErrorHandler(err): - logger.error(err) - return resp_handler(err, status_pb2.ILLEGAL_RANGE) diff --git a/mishards/exceptions.py b/mishards/exceptions.py deleted file mode 100644 index 72839f88d2..0000000000 --- a/mishards/exceptions.py +++ /dev/null @@ -1,38 +0,0 @@ -import mishards.exception_codes as codes - - -class BaseException(Exception): - code = codes.INVALID_CODE - message = 'BaseException' - - def __init__(self, message='', metadata=None): - self.message = self.__class__.__name__ if not message else message - self.metadata = metadata - - -class ConnectionConnectError(BaseException): - code = codes.CONNECT_ERROR_CODE - - -class ConnectionNotFoundError(BaseException): - code = codes.CONNECTTION_NOT_FOUND_CODE - - -class DBError(BaseException): - code = codes.DB_ERROR_CODE - - -class TableNotFoundError(BaseException): - code = codes.TABLE_NOT_FOUND_CODE - - -class InvalidTopKError(BaseException): - code = codes.INVALID_TOPK_CODE - - -class InvalidArgumentError(BaseException): - code = codes.INVALID_ARGUMENT_CODE - - -class InvalidRangeError(BaseException): - code = codes.INVALID_DATE_RANGE_CODE diff --git a/mishards/factories.py b/mishards/factories.py deleted file mode 100644 index 52c0253b39..0000000000 --- a/mishards/factories.py +++ /dev/null @@ -1,54 +0,0 @@ -import time -import datetime -import random -import factory -from factory.alchemy import SQLAlchemyModelFactory -from faker import Faker -from faker.providers import BaseProvider - -from milvus.client.types import MetricType -from mishards import db -from mishards.models import Tables, TableFiles - - -class FakerProvider(BaseProvider): - def this_date(self): - t = datetime.datetime.today() - return (t.year - 1900) * 10000 + (t.month - 1) * 100 + t.day - - -factory.Faker.add_provider(FakerProvider) - - -class TablesFactory(SQLAlchemyModelFactory): - class Meta: - model = Tables - sqlalchemy_session = db.session_factory - sqlalchemy_session_persistence = 'commit' - - id = factory.Faker('random_number', digits=16, fix_len=True) - table_id = factory.Faker('uuid4') - state = factory.Faker('random_element', elements=(0, 1)) - dimension = factory.Faker('random_element', elements=(256, 512)) - created_on = int(time.time()) - index_file_size = 0 - engine_type = factory.Faker('random_element', elements=(0, 1, 2, 3)) - metric_type = factory.Faker('random_element', elements=(MetricType.L2, MetricType.IP)) - nlist = 16384 - - -class TableFilesFactory(SQLAlchemyModelFactory): - class Meta: - model = TableFiles - sqlalchemy_session = db.session_factory - sqlalchemy_session_persistence = 'commit' - - id = factory.Faker('random_number', digits=16, fix_len=True) - table = factory.SubFactory(TablesFactory) - engine_type = factory.Faker('random_element', elements=(0, 1, 2, 3)) - file_id = factory.Faker('uuid4') - file_type = factory.Faker('random_element', elements=(0, 1, 2, 3, 4)) - file_size = factory.Faker('random_number') - updated_time = int(time.time()) - created_on = int(time.time()) - date = factory.Faker('this_date') diff --git a/mishards/grpc_utils/__init__.py b/mishards/grpc_utils/__init__.py deleted file mode 100644 index f5225b2a66..0000000000 --- a/mishards/grpc_utils/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -from grpc_opentracing import SpanDecorator -from milvus.grpc_gen import status_pb2 - - -class GrpcSpanDecorator(SpanDecorator): - def __call__(self, span, rpc_info): - status = None - if not rpc_info.response: - return - if isinstance(rpc_info.response, status_pb2.Status): - status = rpc_info.response - else: - try: - status = rpc_info.response.status - except Exception as e: - status = status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, - reason='Should not happen') - - if status.error_code == 0: - return - error_log = {'event': 'error', - 'request': rpc_info.request, - 'response': rpc_info.response - } - span.set_tag('error', True) - span.log_kv(error_log) - - -def mark_grpc_method(func): - setattr(func, 'grpc_method', True) - return func - - -def is_grpc_method(func): - if not func: - return False - return getattr(func, 'grpc_method', False) diff --git a/mishards/grpc_utils/grpc_args_parser.py b/mishards/grpc_utils/grpc_args_parser.py deleted file mode 100644 index 039299803d..0000000000 --- a/mishards/grpc_utils/grpc_args_parser.py +++ /dev/null @@ -1,102 +0,0 @@ -from milvus import Status -from functools import wraps - - -def error_status(func): - @wraps(func) - def inner(*args, **kwargs): - try: - results = func(*args, **kwargs) - except Exception as e: - return Status(code=Status.UNEXPECTED_ERROR, message=str(e)), None - - return Status(code=0, message="Success"), results - - return inner - - -class GrpcArgsParser(object): - - @classmethod - @error_status - def parse_proto_TableSchema(cls, param): - _table_schema = { - 'status': param.status, - 'table_name': param.table_name, - 'dimension': param.dimension, - 'index_file_size': param.index_file_size, - 'metric_type': param.metric_type - } - - return _table_schema - - @classmethod - @error_status - def parse_proto_TableName(cls, param): - return param.table_name - - @classmethod - @error_status - def parse_proto_Index(cls, param): - _index = { - 'index_type': param.index_type, - 'nlist': param.nlist - } - - return _index - - @classmethod - @error_status - def parse_proto_IndexParam(cls, param): - _table_name = param.table_name - _status, _index = cls.parse_proto_Index(param.index) - - if not _status.OK(): - raise Exception("Argument parse error") - - return _table_name, _index - - @classmethod - @error_status - def parse_proto_Command(cls, param): - _cmd = param.cmd - - return _cmd - - @classmethod - @error_status - def parse_proto_Range(cls, param): - _start_value = param.start_value - _end_value = param.end_value - - return _start_value, _end_value - - @classmethod - @error_status - def parse_proto_RowRecord(cls, param): - return list(param.vector_data) - - @classmethod - @error_status - def parse_proto_SearchParam(cls, param): - _table_name = param.table_name - _topk = param.topk - _nprobe = param.nprobe - _status, _range = cls.parse_proto_Range(param.query_range_array) - - if not _status.OK(): - raise Exception("Argument parse error") - - _row_record = param.query_record_array - - return _table_name, _row_record, _range, _topk - - @classmethod - @error_status - def parse_proto_DeleteByRangeParam(cls, param): - _table_name = param.table_name - _range = param.range - _start_value = _range.start_value - _end_value = _range.end_value - - return _table_name, _start_value, _end_value diff --git a/mishards/grpc_utils/grpc_args_wrapper.py b/mishards/grpc_utils/grpc_args_wrapper.py deleted file mode 100644 index 7447dbd995..0000000000 --- a/mishards/grpc_utils/grpc_args_wrapper.py +++ /dev/null @@ -1,4 +0,0 @@ -# class GrpcArgsWrapper(object): - -# @classmethod -# def proto_TableName(cls): diff --git a/mishards/grpc_utils/test_grpc.py b/mishards/grpc_utils/test_grpc.py deleted file mode 100644 index 9af09e5d0d..0000000000 --- a/mishards/grpc_utils/test_grpc.py +++ /dev/null @@ -1,75 +0,0 @@ -import logging -import opentracing -from mishards.grpc_utils import GrpcSpanDecorator, is_grpc_method -from milvus.grpc_gen import status_pb2, milvus_pb2 - -logger = logging.getLogger(__name__) - - -class FakeTracer(opentracing.Tracer): - pass - - -class FakeSpan(opentracing.Span): - def __init__(self, context, tracer, **kwargs): - super(FakeSpan, self).__init__(tracer, context) - self.reset() - - def set_tag(self, key, value): - self.tags.append({key: value}) - - def log_kv(self, key_values, timestamp=None): - self.logs.append(key_values) - - def reset(self): - self.tags = [] - self.logs = [] - - -class FakeRpcInfo: - def __init__(self, request, response): - self.request = request - self.response = response - - -class TestGrpcUtils: - def test_span_deco(self): - request = 'request' - OK = status_pb2.Status(error_code=status_pb2.SUCCESS, reason='Success') - response = OK - rpc_info = FakeRpcInfo(request=request, response=response) - span = FakeSpan(context=None, tracer=FakeTracer()) - span_deco = GrpcSpanDecorator() - span_deco(span, rpc_info) - assert len(span.logs) == 0 - assert len(span.tags) == 0 - - response = milvus_pb2.BoolReply(status=OK, bool_reply=False) - rpc_info = FakeRpcInfo(request=request, response=response) - span = FakeSpan(context=None, tracer=FakeTracer()) - span_deco = GrpcSpanDecorator() - span_deco(span, rpc_info) - assert len(span.logs) == 0 - assert len(span.tags) == 0 - - response = 1 - rpc_info = FakeRpcInfo(request=request, response=response) - span = FakeSpan(context=None, tracer=FakeTracer()) - span_deco = GrpcSpanDecorator() - span_deco(span, rpc_info) - assert len(span.logs) == 1 - assert len(span.tags) == 1 - - response = 0 - rpc_info = FakeRpcInfo(request=request, response=response) - span = FakeSpan(context=None, tracer=FakeTracer()) - span_deco = GrpcSpanDecorator() - span_deco(span, rpc_info) - assert len(span.logs) == 0 - assert len(span.tags) == 0 - - def test_is_grpc_method(self): - target = 1 - assert not is_grpc_method(target) - target = None - assert not is_grpc_method(target) diff --git a/mishards/hash_ring.py b/mishards/hash_ring.py deleted file mode 100644 index a97f3f580e..0000000000 --- a/mishards/hash_ring.py +++ /dev/null @@ -1,150 +0,0 @@ -import math -import sys -from bisect import bisect - -if sys.version_info >= (2, 5): - import hashlib - md5_constructor = hashlib.md5 -else: - import md5 - md5_constructor = md5.new - - -class HashRing(object): - def __init__(self, nodes=None, weights=None): - """`nodes` is a list of objects that have a proper __str__ representation. - `weights` is dictionary that sets weights to the nodes. The default - weight is that all nodes are equal. - """ - self.ring = dict() - self._sorted_keys = [] - - self.nodes = nodes - - if not weights: - weights = {} - self.weights = weights - - self._generate_circle() - - def _generate_circle(self): - """Generates the circle. - """ - total_weight = 0 - for node in self.nodes: - total_weight += self.weights.get(node, 1) - - for node in self.nodes: - weight = 1 - - if node in self.weights: - weight = self.weights.get(node) - - factor = math.floor((40 * len(self.nodes) * weight) / total_weight) - - for j in range(0, int(factor)): - b_key = self._hash_digest('%s-%s' % (node, j)) - - for i in range(0, 3): - key = self._hash_val(b_key, lambda x: x + i * 4) - self.ring[key] = node - self._sorted_keys.append(key) - - self._sorted_keys.sort() - - def get_node(self, string_key): - """Given a string key a corresponding node in the hash ring is returned. - - If the hash ring is empty, `None` is returned. - """ - pos = self.get_node_pos(string_key) - if pos is None: - return None - return self.ring[self._sorted_keys[pos]] - - def get_node_pos(self, string_key): - """Given a string key a corresponding node in the hash ring is returned - along with it's position in the ring. - - If the hash ring is empty, (`None`, `None`) is returned. - """ - if not self.ring: - return None - - key = self.gen_key(string_key) - - nodes = self._sorted_keys - pos = bisect(nodes, key) - - if pos == len(nodes): - return 0 - else: - return pos - - def iterate_nodes(self, string_key, distinct=True): - """Given a string key it returns the nodes as a generator that can hold the key. - - The generator iterates one time through the ring - starting at the correct position. - - if `distinct` is set, then the nodes returned will be unique, - i.e. no virtual copies will be returned. - """ - if not self.ring: - yield None, None - - returned_values = set() - - def distinct_filter(value): - if str(value) not in returned_values: - returned_values.add(str(value)) - return value - - pos = self.get_node_pos(string_key) - for key in self._sorted_keys[pos:]: - val = distinct_filter(self.ring[key]) - if val: - yield val - - for i, key in enumerate(self._sorted_keys): - if i < pos: - val = distinct_filter(self.ring[key]) - if val: - yield val - - def gen_key(self, key): - """Given a string key it returns a long value, - this long value represents a place on the hash ring. - - md5 is currently used because it mixes well. - """ - b_key = self._hash_digest(key) - return self._hash_val(b_key, lambda x: x) - - def _hash_val(self, b_key, entry_fn): - return (b_key[entry_fn(3)] << 24) | (b_key[entry_fn(2)] << 16) | ( - b_key[entry_fn(1)] << 8) | b_key[entry_fn(0)] - - def _hash_digest(self, key): - m = md5_constructor() - key = key.encode() - m.update(key) - return m.digest() - - -if __name__ == '__main__': - from collections import defaultdict - servers = [ - '192.168.0.246:11212', '192.168.0.247:11212', '192.168.0.248:11212', - '192.168.0.249:11212' - ] - - ring = HashRing(servers) - keys = ['{}'.format(i) for i in range(100)] - mapped = defaultdict(list) - for k in keys: - server = ring.get_node(k) - mapped[server].append(k) - - for k, v in mapped.items(): - print(k, v) diff --git a/mishards/main.py b/mishards/main.py deleted file mode 100644 index c0d142607b..0000000000 --- a/mishards/main.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from mishards import (settings, create_app) - - -def main(): - server = create_app(settings.DefaultConfig) - server.run(port=settings.SERVER_PORT) - return 0 - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/mishards/models.py b/mishards/models.py deleted file mode 100644 index 4b6c8f9ef4..0000000000 --- a/mishards/models.py +++ /dev/null @@ -1,76 +0,0 @@ -import logging -from sqlalchemy import (Integer, Boolean, Text, - String, BigInteger, and_, or_, - Column) -from sqlalchemy.orm import relationship, backref - -from mishards import db - -logger = logging.getLogger(__name__) - - -class TableFiles(db.Model): - FILE_TYPE_NEW = 0 - FILE_TYPE_RAW = 1 - FILE_TYPE_TO_INDEX = 2 - FILE_TYPE_INDEX = 3 - FILE_TYPE_TO_DELETE = 4 - FILE_TYPE_NEW_MERGE = 5 - FILE_TYPE_NEW_INDEX = 6 - FILE_TYPE_BACKUP = 7 - - __tablename__ = 'TableFiles' - - id = Column(BigInteger, primary_key=True, autoincrement=True) - table_id = Column(String(50)) - engine_type = Column(Integer) - file_id = Column(String(50)) - file_type = Column(Integer) - file_size = Column(Integer, default=0) - row_count = Column(Integer, default=0) - updated_time = Column(BigInteger) - created_on = Column(BigInteger) - date = Column(Integer) - - table = relationship( - 'Tables', - primaryjoin='and_(foreign(TableFiles.table_id) == Tables.table_id)', - backref=backref('files', uselist=True, lazy='dynamic') - ) - - -class Tables(db.Model): - TO_DELETE = 1 - NORMAL = 0 - - __tablename__ = 'Tables' - - id = Column(BigInteger, primary_key=True, autoincrement=True) - table_id = Column(String(50), unique=True) - state = Column(Integer) - dimension = Column(Integer) - created_on = Column(Integer) - flag = Column(Integer, default=0) - index_file_size = Column(Integer) - engine_type = Column(Integer) - nlist = Column(Integer) - metric_type = Column(Integer) - - def files_to_search(self, date_range=None): - cond = or_( - TableFiles.file_type == TableFiles.FILE_TYPE_RAW, - TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX, - TableFiles.file_type == TableFiles.FILE_TYPE_INDEX, - ) - if date_range: - cond = and_( - cond, - or_( - and_(TableFiles.date >= d[0], TableFiles.date < d[1]) for d in date_range - ) - ) - - files = self.files.filter(cond) - - logger.debug('DATE_RANGE: {}'.format(date_range)) - return files diff --git a/mishards/routings.py b/mishards/routings.py deleted file mode 100644 index 823972726f..0000000000 --- a/mishards/routings.py +++ /dev/null @@ -1,96 +0,0 @@ -import logging -from sqlalchemy import exc as sqlalchemy_exc -from sqlalchemy import and_ - -from mishards import exceptions, db -from mishards.hash_ring import HashRing -from mishards.models import Tables - -logger = logging.getLogger(__name__) - - -class RouteManager: - ROUTER_CLASSES = {} - - @classmethod - def register_router_class(cls, target): - name = target.__dict__.get('NAME', None) - name = name if name else target.__class__.__name__ - cls.ROUTER_CLASSES[name] = target - return target - - @classmethod - def get_router_class(cls, name): - return cls.ROUTER_CLASSES.get(name, None) - - -class RouterFactory: - @classmethod - def new_router(cls, name, conn_mgr, **kwargs): - router_class = RouteManager.get_router_class(name) - assert router_class - return router_class(conn_mgr, **kwargs) - - -class RouterMixin: - def __init__(self, conn_mgr): - self.conn_mgr = conn_mgr - - def routing(self, table_name, metadata=None, **kwargs): - raise NotImplemented() - - def connection(self, metadata=None): - conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) - if conn: - conn.on_connect(metadata=metadata) - return conn.conn - - def query_conn(self, name, metadata=None): - conn = self.conn_mgr.conn(name, metadata=metadata) - if not conn: - raise exceptions.ConnectionNotFoundError(name, metadata=metadata) - conn.on_connect(metadata=metadata) - return conn.conn - - -@RouteManager.register_router_class -class FileBasedHashRingRouter(RouterMixin): - NAME = 'FileBasedHashRingRouter' - - def __init__(self, conn_mgr, **kwargs): - super(FileBasedHashRingRouter, self).__init__(conn_mgr) - - def routing(self, table_name, metadata=None, **kwargs): - range_array = kwargs.pop('range_array', None) - return self._route(table_name, range_array, metadata, **kwargs) - - def _route(self, table_name, range_array, metadata=None, **kwargs): - # PXU TODO: Implement Thread-local Context - # PXU TODO: Session life mgt - try: - table = db.Session.query(Tables).filter( - and_(Tables.table_id == table_name, - Tables.state != Tables.TO_DELETE)).first() - except sqlalchemy_exc.SQLAlchemyError as e: - raise exceptions.DBError(message=str(e), metadata=metadata) - - if not table: - raise exceptions.TableNotFoundError(table_name, metadata=metadata) - files = table.files_to_search(range_array) - db.remove_session() - - servers = self.conn_mgr.conn_names - logger.info('Available servers: {}'.format(servers)) - - ring = HashRing(servers) - - routing = {} - - for f in files: - target_host = ring.get_node(str(f.id)) - sub = routing.get(target_host, None) - if not sub: - routing[target_host] = {'table_id': table_name, 'file_ids': []} - routing[target_host]['file_ids'].append(str(f.id)) - - return routing diff --git a/mishards/server.py b/mishards/server.py deleted file mode 100644 index 599a00e455..0000000000 --- a/mishards/server.py +++ /dev/null @@ -1,122 +0,0 @@ -import logging -import grpc -import time -import socket -import inspect -from urllib.parse import urlparse -from functools import wraps -from concurrent import futures -from grpc._cython import cygrpc -from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server -from mishards.grpc_utils import is_grpc_method -from mishards.service_handler import ServiceHandler -from mishards import settings - -logger = logging.getLogger(__name__) - - -class Server: - def __init__(self): - self.pre_run_handlers = set() - self.grpc_methods = set() - self.error_handlers = {} - self.exit_flag = False - - def init_app(self, - conn_mgr, - tracer, - router, - discover, - port=19530, - max_workers=10, - **kwargs): - self.port = int(port) - self.conn_mgr = conn_mgr - self.tracer = tracer - self.router = router - self.discover = discover - - self.server_impl = grpc.server( - thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), - options=[(cygrpc.ChannelArgKey.max_send_message_length, -1), - (cygrpc.ChannelArgKey.max_receive_message_length, -1)]) - - self.server_impl = self.tracer.decorate(self.server_impl) - - self.register_pre_run_handler(self.pre_run_handler) - - def pre_run_handler(self): - woserver = settings.WOSERVER - url = urlparse(woserver) - ip = socket.gethostbyname(url.hostname) - socket.inet_pton(socket.AF_INET, ip) - self.conn_mgr.register( - 'WOSERVER', '{}://{}:{}'.format(url.scheme, ip, url.port or 80)) - - def register_pre_run_handler(self, func): - logger.info('Regiterring {} into server pre_run_handlers'.format(func)) - self.pre_run_handlers.add(func) - return func - - def wrap_method_with_errorhandler(self, func): - @wraps(func) - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as e: - if e.__class__ in self.error_handlers: - return self.error_handlers[e.__class__](e) - raise - - return wrapper - - def errorhandler(self, exception): - if inspect.isclass(exception) and issubclass(exception, Exception): - - def wrapper(func): - self.error_handlers[exception] = func - return func - - return wrapper - return exception - - def on_pre_run(self): - for handler in self.pre_run_handlers: - handler() - self.discover.start() - - def start(self, port=None): - handler_class = self.decorate_handler(ServiceHandler) - add_MilvusServiceServicer_to_server( - handler_class(tracer=self.tracer, - router=self.router), self.server_impl) - self.server_impl.add_insecure_port("[::]:{}".format( - str(port or self.port))) - self.server_impl.start() - - def run(self, port): - logger.info('Milvus server start ......') - port = port or self.port - self.on_pre_run() - - self.start(port) - logger.info('Listening on port {}'.format(port)) - - try: - while not self.exit_flag: - time.sleep(5) - except KeyboardInterrupt: - self.stop() - - def stop(self): - logger.info('Server is shuting down ......') - self.exit_flag = True - self.server_impl.stop(0) - self.tracer.close() - logger.info('Server is closed') - - def decorate_handler(self, handler): - for key, attr in handler.__dict__.items(): - if is_grpc_method(attr): - setattr(handler, key, self.wrap_method_with_errorhandler(attr)) - return handler diff --git a/mishards/service_handler.py b/mishards/service_handler.py deleted file mode 100644 index 5e91c14f14..0000000000 --- a/mishards/service_handler.py +++ /dev/null @@ -1,475 +0,0 @@ -import logging -import time -import datetime -from collections import defaultdict - -import multiprocessing -from concurrent.futures import ThreadPoolExecutor -from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 -from milvus.grpc_gen.milvus_pb2 import TopKQueryResult -from milvus.client.abstract import Range -from milvus.client import types as Types - -from mishards import (db, settings, exceptions) -from mishards.grpc_utils import mark_grpc_method -from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser -from mishards import utilities - -logger = logging.getLogger(__name__) - - -class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): - MAX_NPROBE = 2048 - MAX_TOPK = 2048 - - def __init__(self, tracer, router, max_workers=multiprocessing.cpu_count(), **kwargs): - self.table_meta = {} - self.error_handlers = {} - self.tracer = tracer - self.router = router - self.max_workers = max_workers - - def _do_merge(self, files_n_topk_results, topk, reverse=False, **kwargs): - status = status_pb2.Status(error_code=status_pb2.SUCCESS, - reason="Success") - if not files_n_topk_results: - return status, [] - - request_results = defaultdict(list) - - calc_time = time.time() - for files_collection in files_n_topk_results: - if isinstance(files_collection, tuple): - status, _ = files_collection - return status, [] - for request_pos, each_request_results in enumerate( - files_collection.topk_query_result): - request_results[request_pos].extend( - each_request_results.query_result_arrays) - request_results[request_pos] = sorted( - request_results[request_pos], - key=lambda x: x.distance, - reverse=reverse)[:topk] - - calc_time = time.time() - calc_time - logger.info('Merge takes {}'.format(calc_time)) - - results = sorted(request_results.items()) - topk_query_result = [] - - for result in results: - query_result = TopKQueryResult(query_result_arrays=result[1]) - topk_query_result.append(query_result) - - return status, topk_query_result - - def _do_query(self, - context, - table_id, - table_meta, - vectors, - topk, - nprobe, - range_array=None, - **kwargs): - metadata = kwargs.get('metadata', None) - range_array = [ - utilities.range_to_date(r, metadata=metadata) for r in range_array - ] if range_array else None - - routing = {} - p_span = None if self.tracer.empty else context.get_active_span( - ).context - with self.tracer.start_span('get_routing', child_of=p_span): - routing = self.router.routing(table_id, - range_array=range_array, - metadata=metadata) - logger.info('Routing: {}'.format(routing)) - - metadata = kwargs.get('metadata', None) - - rs = [] - all_topk_results = [] - - def search(addr, query_params, vectors, topk, nprobe, **kwargs): - logger.info( - 'Send Search Request: addr={};params={};nq={};topk={};nprobe={}' - .format(addr, query_params, len(vectors), topk, nprobe)) - - conn = self.router.query_conn(addr, metadata=metadata) - start = time.time() - span = kwargs.get('span', None) - span = span if span else (None if self.tracer.empty else - context.get_active_span().context) - - with self.tracer.start_span('search_{}'.format(addr), - child_of=span): - ret = conn.search_vectors_in_files( - table_name=query_params['table_id'], - file_ids=query_params['file_ids'], - query_records=vectors, - top_k=topk, - nprobe=nprobe, - lazy_=True) - end = time.time() - logger.info('search_vectors_in_files takes: {}'.format(end - start)) - - all_topk_results.append(ret) - - with self.tracer.start_span('do_search', child_of=p_span) as span: - with ThreadPoolExecutor(max_workers=self.max_workers) as pool: - for addr, params in routing.items(): - res = pool.submit(search, - addr, - params, - vectors, - topk, - nprobe, - span=span) - rs.append(res) - - for res in rs: - res.result() - - reverse = table_meta.metric_type == Types.MetricType.IP - with self.tracer.start_span('do_merge', child_of=p_span): - return self._do_merge(all_topk_results, - topk, - reverse=reverse, - metadata=metadata) - - def _create_table(self, table_schema): - return self.router.connection().create_table(table_schema) - - @mark_grpc_method - def CreateTable(self, request, context): - _status, _table_schema = Parser.parse_proto_TableSchema(request) - - if not _status.OK(): - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - logger.info('CreateTable {}'.format(_table_schema['table_name'])) - - _status = self._create_table(_table_schema) - - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - def _has_table(self, table_name, metadata=None): - return self.router.connection(metadata=metadata).has_table(table_name) - - @mark_grpc_method - def HasTable(self, request, context): - _status, _table_name = Parser.parse_proto_TableName(request) - - if not _status.OK(): - return milvus_pb2.BoolReply(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), - bool_reply=False) - - logger.info('HasTable {}'.format(_table_name)) - - _status, _bool = self._has_table(_table_name, - metadata={'resp_class': milvus_pb2.BoolReply}) - - return milvus_pb2.BoolReply(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), - bool_reply=_bool) - - def _delete_table(self, table_name): - return self.router.connection().delete_table(table_name) - - @mark_grpc_method - def DropTable(self, request, context): - _status, _table_name = Parser.parse_proto_TableName(request) - - if not _status.OK(): - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - logger.info('DropTable {}'.format(_table_name)) - - _status = self._delete_table(_table_name) - - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - def _create_index(self, table_name, index): - return self.router.connection().create_index(table_name, index) - - @mark_grpc_method - def CreateIndex(self, request, context): - _status, unpacks = Parser.parse_proto_IndexParam(request) - - if not _status.OK(): - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - _table_name, _index = unpacks - - logger.info('CreateIndex {}'.format(_table_name)) - - # TODO: interface create_table incompleted - _status = self._create_index(_table_name, _index) - - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - def _add_vectors(self, param, metadata=None): - return self.router.connection(metadata=metadata).add_vectors( - None, None, insert_param=param) - - @mark_grpc_method - def Insert(self, request, context): - logger.info('Insert') - # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' - _status, _ids = self._add_vectors( - metadata={'resp_class': milvus_pb2.VectorIds}, param=request) - return milvus_pb2.VectorIds(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), - vector_id_array=_ids) - - @mark_grpc_method - def Search(self, request, context): - - table_name = request.table_name - - topk = request.topk - nprobe = request.nprobe - - logger.info('Search {}: topk={} nprobe={}'.format( - table_name, topk, nprobe)) - - metadata = {'resp_class': milvus_pb2.TopKQueryResultList} - - if nprobe > self.MAX_NPROBE or nprobe <= 0: - raise exceptions.InvalidArgumentError( - message='Invalid nprobe: {}'.format(nprobe), metadata=metadata) - - if topk > self.MAX_TOPK or topk <= 0: - raise exceptions.InvalidTopKError( - message='Invalid topk: {}'.format(topk), metadata=metadata) - - table_meta = self.table_meta.get(table_name, None) - - if not table_meta: - status, info = self.router.connection( - metadata=metadata).describe_table(table_name) - if not status.OK(): - raise exceptions.TableNotFoundError(table_name, - metadata=metadata) - - self.table_meta[table_name] = info - table_meta = info - - start = time.time() - - query_record_array = [] - - for query_record in request.query_record_array: - query_record_array.append(list(query_record.vector_data)) - - query_range_array = [] - for query_range in request.query_range_array: - query_range_array.append( - Range(query_range.start_value, query_range.end_value)) - - status, results = self._do_query(context, - table_name, - table_meta, - query_record_array, - topk, - nprobe, - query_range_array, - metadata=metadata) - - now = time.time() - logger.info('SearchVector takes: {}'.format(now - start)) - - topk_result_list = milvus_pb2.TopKQueryResultList( - status=status_pb2.Status(error_code=status.error_code, - reason=status.reason), - topk_query_result=results) - return topk_result_list - - @mark_grpc_method - def SearchInFiles(self, request, context): - raise NotImplemented() - - def _describe_table(self, table_name, metadata=None): - return self.router.connection(metadata=metadata).describe_table(table_name) - - @mark_grpc_method - def DescribeTable(self, request, context): - _status, _table_name = Parser.parse_proto_TableName(request) - - if not _status.OK(): - return milvus_pb2.TableSchema(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), ) - - metadata = {'resp_class': milvus_pb2.TableSchema} - - logger.info('DescribeTable {}'.format(_table_name)) - _status, _table = self._describe_table(metadata=metadata, - table_name=_table_name) - - if _status.OK(): - return milvus_pb2.TableSchema( - table_name=_table_name, - index_file_size=_table.index_file_size, - dimension=_table.dimension, - metric_type=_table.metric_type, - status=status_pb2.Status(error_code=_status.code, - reason=_status.message), - ) - - return milvus_pb2.TableSchema( - table_name=_table_name, - status=status_pb2.Status(error_code=_status.code, - reason=_status.message), - ) - - def _count_table(self, table_name, metadata=None): - return self.router.connection( - metadata=metadata).get_table_row_count(table_name) - - @mark_grpc_method - def CountTable(self, request, context): - _status, _table_name = Parser.parse_proto_TableName(request) - - if not _status.OK(): - status = status_pb2.Status(error_code=_status.code, - reason=_status.message) - - return milvus_pb2.TableRowCount(status=status) - - logger.info('CountTable {}'.format(_table_name)) - - metadata = {'resp_class': milvus_pb2.TableRowCount} - _status, _count = self._count_table(_table_name, metadata=metadata) - - return milvus_pb2.TableRowCount( - status=status_pb2.Status(error_code=_status.code, - reason=_status.message), - table_row_count=_count if isinstance(_count, int) else -1) - - def _get_server_version(self, metadata=None): - return self.router.connection(metadata=metadata).server_version() - - @mark_grpc_method - def Cmd(self, request, context): - _status, _cmd = Parser.parse_proto_Command(request) - logger.info('Cmd: {}'.format(_cmd)) - - if not _status.OK(): - return milvus_pb2.StringReply(status=status_pb2.Status( - error_code=_status.code, reason=_status.message)) - - metadata = {'resp_class': milvus_pb2.StringReply} - - if _cmd == 'version': - _status, _reply = self._get_server_version(metadata=metadata) - else: - _status, _reply = self.router.connection( - metadata=metadata).server_status() - - return milvus_pb2.StringReply(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), - string_reply=_reply) - - def _show_tables(self, metadata=None): - return self.router.connection(metadata=metadata).show_tables() - - @mark_grpc_method - def ShowTables(self, request, context): - logger.info('ShowTables') - metadata = {'resp_class': milvus_pb2.TableName} - _status, _results = self._show_tables(metadata=metadata) - - return milvus_pb2.TableNameList(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), - table_names=_results) - - def _delete_by_range(self, table_name, start_date, end_date): - return self.router.connection().delete_vectors_by_range(table_name, - start_date, - end_date) - - @mark_grpc_method - def DeleteByRange(self, request, context): - _status, unpacks = \ - Parser.parse_proto_DeleteByRangeParam(request) - - if not _status.OK(): - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - _table_name, _start_date, _end_date = unpacks - - logger.info('DeleteByRange {}: {} {}'.format(_table_name, _start_date, - _end_date)) - _status = self._delete_by_range(_table_name, _start_date, _end_date) - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - def _preload_table(self, table_name): - return self.router.connection().preload_table(table_name) - - @mark_grpc_method - def PreloadTable(self, request, context): - _status, _table_name = Parser.parse_proto_TableName(request) - - if not _status.OK(): - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - logger.info('PreloadTable {}'.format(_table_name)) - _status = self._preload_table(_table_name) - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - def _describe_index(self, table_name, metadata=None): - return self.router.connection(metadata=metadata).describe_index(table_name) - - @mark_grpc_method - def DescribeIndex(self, request, context): - _status, _table_name = Parser.parse_proto_TableName(request) - - if not _status.OK(): - return milvus_pb2.IndexParam(status=status_pb2.Status( - error_code=_status.code, reason=_status.message)) - - metadata = {'resp_class': milvus_pb2.IndexParam} - - logger.info('DescribeIndex {}'.format(_table_name)) - _status, _index_param = self._describe_index(table_name=_table_name, - metadata=metadata) - - if not _index_param: - return milvus_pb2.IndexParam(status=status_pb2.Status( - error_code=_status.code, reason=_status.message)) - - _index = milvus_pb2.Index(index_type=_index_param._index_type, - nlist=_index_param._nlist) - - return milvus_pb2.IndexParam(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), - table_name=_table_name, - index=_index) - - def _drop_index(self, table_name): - return self.router.connection().drop_index(table_name) - - @mark_grpc_method - def DropIndex(self, request, context): - _status, _table_name = Parser.parse_proto_TableName(request) - - if not _status.OK(): - return status_pb2.Status(error_code=_status.code, - reason=_status.message) - - logger.info('DropIndex {}'.format(_table_name)) - _status = self._drop_index(_table_name) - return status_pb2.Status(error_code=_status.code, - reason=_status.message) diff --git a/mishards/settings.py b/mishards/settings.py deleted file mode 100644 index 21a3bb7a65..0000000000 --- a/mishards/settings.py +++ /dev/null @@ -1,94 +0,0 @@ -import sys -import os - -from environs import Env -env = Env() - -FROM_EXAMPLE = env.bool('FROM_EXAMPLE', False) -if FROM_EXAMPLE: - from dotenv import load_dotenv - load_dotenv('./mishards/.env.example') -else: - env.read_env() - -DEBUG = env.bool('DEBUG', False) - -LOG_LEVEL = env.str('LOG_LEVEL', 'DEBUG' if DEBUG else 'INFO') -LOG_PATH = env.str('LOG_PATH', '/tmp/mishards') -LOG_NAME = env.str('LOG_NAME', 'logfile') -TIMEZONE = env.str('TIMEZONE', 'UTC') - -from utils.logger_helper import config -config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) - -TIMEOUT = env.int('TIMEOUT', 60) -MAX_RETRY = env.int('MAX_RETRY', 3) - -SERVER_PORT = env.int('SERVER_PORT', 19530) -SERVER_TEST_PORT = env.int('SERVER_TEST_PORT', 19530) -WOSERVER = env.str('WOSERVER') - -SD_PROVIDER_SETTINGS = None -SD_PROVIDER = env.str('SD_PROVIDER', 'Kubernetes') -if SD_PROVIDER == 'Kubernetes': - from sd.kubernetes_provider import KubernetesProviderSettings - SD_PROVIDER_SETTINGS = KubernetesProviderSettings( - namespace=env.str('SD_NAMESPACE', ''), - in_cluster=env.bool('SD_IN_CLUSTER', False), - poll_interval=env.int('SD_POLL_INTERVAL', 5), - pod_patt=env.str('SD_ROSERVER_POD_PATT', ''), - label_selector=env.str('SD_LABEL_SELECTOR', ''), - port=env.int('SD_PORT', 19530)) -elif SD_PROVIDER == 'Static': - from sd.static_provider import StaticProviderSettings - SD_PROVIDER_SETTINGS = StaticProviderSettings( - hosts=env.list('SD_STATIC_HOSTS', []), - port=env.int('SD_STATIC_PORT', 19530)) - -# TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') - - -class TracingConfig: - TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') - TRACING_VALIDATE = env.bool('TRACING_VALIDATE', True) - TRACING_LOG_PAYLOAD = env.bool('TRACING_LOG_PAYLOAD', False) - TRACING_CONFIG = { - 'sampler': { - 'type': env.str('TRACING_SAMPLER_TYPE', 'const'), - 'param': env.str('TRACING_SAMPLER_PARAM', "1"), - }, - 'local_agent': { - 'reporting_host': env.str('TRACING_REPORTING_HOST', '127.0.0.1'), - 'reporting_port': env.str('TRACING_REPORTING_PORT', '5775') - }, - 'logging': env.bool('TRACING_LOGGING', True) - } - DEFAULT_TRACING_CONFIG = { - 'sampler': { - 'type': env.str('TRACING_SAMPLER_TYPE', 'const'), - 'param': env.str('TRACING_SAMPLER_PARAM', "0"), - } - } - - -class DefaultConfig: - SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') - SQL_ECHO = env.bool('SQL_ECHO', False) - TRACING_TYPE = env.str('TRACING_TYPE', '') - ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_NAME', 'FileBasedHashRingRouter') - - -class TestingConfig(DefaultConfig): - SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI', '') - SQL_ECHO = env.bool('SQL_TEST_ECHO', False) - TRACING_TYPE = env.str('TRACING_TEST_TYPE', '') - ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_TEST_NAME', 'FileBasedHashRingRouter') - - -if __name__ == '__main__': - import logging - logger = logging.getLogger(__name__) - logger.debug('DEBUG') - logger.info('INFO') - logger.warn('WARN') - logger.error('ERROR') diff --git a/mishards/test_connections.py b/mishards/test_connections.py deleted file mode 100644 index 819d2e03da..0000000000 --- a/mishards/test_connections.py +++ /dev/null @@ -1,101 +0,0 @@ -import logging -import pytest -import mock - -from milvus import Milvus -from mishards.connections import (ConnectionMgr, Connection) -from mishards import exceptions - -logger = logging.getLogger(__name__) - - -@pytest.mark.usefixtures('app') -class TestConnection: - def test_manager(self): - mgr = ConnectionMgr() - - mgr.register('pod1', '111') - mgr.register('pod2', '222') - mgr.register('pod2', '222') - mgr.register('pod2', '2222') - assert len(mgr.conn_names) == 2 - - mgr.unregister('pod1') - assert len(mgr.conn_names) == 1 - - mgr.unregister('pod2') - assert len(mgr.conn_names) == 0 - - mgr.register('WOSERVER', 'xxxx') - assert len(mgr.conn_names) == 0 - - assert not mgr.conn('XXXX', None) - with pytest.raises(exceptions.ConnectionNotFoundError): - mgr.conn('XXXX', None, True) - - mgr.conn('WOSERVER', None) - - def test_connection(self): - class Conn: - def __init__(self, state): - self.state = state - - def connect(self, uri): - return self.state - - def connected(self): - return self.state - - FAIL_CONN = Conn(False) - PASS_CONN = Conn(True) - - class Retry: - def __init__(self): - self.times = 0 - - def __call__(self, conn): - self.times += 1 - logger.info('Retrying {}'.format(self.times)) - - class Func(): - def __init__(self): - self.executed = False - - def __call__(self): - self.executed = True - - max_retry = 3 - - RetryObj = Retry() - - c = Connection('client', - uri='xx', - max_retry=max_retry, - on_retry_func=RetryObj) - c.conn = FAIL_CONN - ff = Func() - this_connect = c.connect(func=ff) - with pytest.raises(exceptions.ConnectionConnectError): - this_connect() - assert RetryObj.times == max_retry - assert not ff.executed - RetryObj = Retry() - - c.conn = PASS_CONN - this_connect = c.connect(func=ff) - this_connect() - assert ff.executed - assert RetryObj.times == 0 - - this_connect = c.connect(func=None) - with pytest.raises(TypeError): - this_connect() - - errors = [] - - def error_handler(err): - errors.append(err) - - this_connect = c.connect(func=None, exception_handler=error_handler) - this_connect() - assert len(errors) == 1 diff --git a/mishards/test_models.py b/mishards/test_models.py deleted file mode 100644 index d60b62713e..0000000000 --- a/mishards/test_models.py +++ /dev/null @@ -1,39 +0,0 @@ -import logging -import pytest -from mishards.factories import TableFiles, Tables, TableFilesFactory, TablesFactory -from mishards import db, create_app, settings -from mishards.factories import ( - Tables, TableFiles, - TablesFactory, TableFilesFactory -) - -logger = logging.getLogger(__name__) - - -@pytest.mark.usefixtures('app') -class TestModels: - def test_files_to_search(self): - table = TablesFactory() - new_files_cnt = 5 - to_index_cnt = 10 - raw_cnt = 20 - backup_cnt = 12 - to_delete_cnt = 9 - index_cnt = 8 - new_index_cnt = 6 - new_merge_cnt = 11 - - new_files = TableFilesFactory.create_batch(new_files_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW, date=110) - to_index_files = TableFilesFactory.create_batch(to_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_INDEX, date=110) - raw_files = TableFilesFactory.create_batch(raw_cnt, table=table, file_type=TableFiles.FILE_TYPE_RAW, date=120) - backup_files = TableFilesFactory.create_batch(backup_cnt, table=table, file_type=TableFiles.FILE_TYPE_BACKUP, date=110) - index_files = TableFilesFactory.create_batch(index_cnt, table=table, file_type=TableFiles.FILE_TYPE_INDEX, date=110) - new_index_files = TableFilesFactory.create_batch(new_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW_INDEX, date=110) - new_merge_files = TableFilesFactory.create_batch(new_merge_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW_MERGE, date=110) - to_delete_files = TableFilesFactory.create_batch(to_delete_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_DELETE, date=110) - assert table.files_to_search().count() == raw_cnt + index_cnt + to_index_cnt - - assert table.files_to_search([(100, 115)]).count() == index_cnt + to_index_cnt - assert table.files_to_search([(111, 120)]).count() == 0 - assert table.files_to_search([(111, 121)]).count() == raw_cnt - assert table.files_to_search([(110, 121)]).count() == raw_cnt + index_cnt + to_index_cnt diff --git a/mishards/test_server.py b/mishards/test_server.py deleted file mode 100644 index efd3912076..0000000000 --- a/mishards/test_server.py +++ /dev/null @@ -1,279 +0,0 @@ -import logging -import pytest -import mock -import datetime -import random -import faker -import inspect -from milvus import Milvus -from milvus.client.types import Status, IndexType, MetricType -from milvus.client.abstract import IndexParam, TableSchema -from milvus.grpc_gen import status_pb2, milvus_pb2 -from mishards import db, create_app, settings -from mishards.service_handler import ServiceHandler -from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser -from mishards.factories import TableFilesFactory, TablesFactory, TableFiles, Tables -from mishards.routings import RouterMixin - -logger = logging.getLogger(__name__) - -OK = Status(code=Status.SUCCESS, message='Success') -BAD = Status(code=Status.PERMISSION_DENIED, message='Fail') - - -@pytest.mark.usefixtures('started_app') -class TestServer: - @property - def client(self): - m = Milvus() - m.connect(host='localhost', port=settings.SERVER_TEST_PORT) - return m - - def test_server_start(self, started_app): - assert started_app.conn_mgr.metas.get('WOSERVER') == settings.WOSERVER - - def test_cmd(self, started_app): - ServiceHandler._get_server_version = mock.MagicMock(return_value=(OK, - '')) - status, _ = self.client.server_version() - assert status.OK() - - Parser.parse_proto_Command = mock.MagicMock(return_value=(BAD, 'cmd')) - status, _ = self.client.server_version() - assert not status.OK() - - def test_drop_index(self, started_app): - table_name = inspect.currentframe().f_code.co_name - ServiceHandler._drop_index = mock.MagicMock(return_value=OK) - status = self.client.drop_index(table_name) - assert status.OK() - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(BAD, table_name)) - status = self.client.drop_index(table_name) - assert not status.OK() - - def test_describe_index(self, started_app): - table_name = inspect.currentframe().f_code.co_name - index_type = IndexType.FLAT - nlist = 1 - index_param = IndexParam(table_name=table_name, - index_type=index_type, - nlist=nlist) - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(OK, table_name)) - ServiceHandler._describe_index = mock.MagicMock( - return_value=(OK, index_param)) - status, ret = self.client.describe_index(table_name) - assert status.OK() - assert ret._table_name == index_param._table_name - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(BAD, table_name)) - status, _ = self.client.describe_index(table_name) - assert not status.OK() - - def test_preload(self, started_app): - table_name = inspect.currentframe().f_code.co_name - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(OK, table_name)) - ServiceHandler._preload_table = mock.MagicMock(return_value=OK) - status = self.client.preload_table(table_name) - assert status.OK() - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(BAD, table_name)) - status = self.client.preload_table(table_name) - assert not status.OK() - - @pytest.mark.skip - def test_delete_by_range(self, started_app): - table_name = inspect.currentframe().f_code.co_name - - unpacked = table_name, datetime.datetime.today( - ), datetime.datetime.today() - - Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( - return_value=(OK, unpacked)) - ServiceHandler._delete_by_range = mock.MagicMock(return_value=OK) - status = self.client.delete_vectors_by_range( - *unpacked) - assert status.OK() - - Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( - return_value=(BAD, unpacked)) - status = self.client.delete_vectors_by_range( - *unpacked) - assert not status.OK() - - def test_count_table(self, started_app): - table_name = inspect.currentframe().f_code.co_name - count = random.randint(100, 200) - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(OK, table_name)) - ServiceHandler._count_table = mock.MagicMock(return_value=(OK, count)) - status, ret = self.client.get_table_row_count(table_name) - assert status.OK() - assert ret == count - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(BAD, table_name)) - status, _ = self.client.get_table_row_count(table_name) - assert not status.OK() - - def test_show_tables(self, started_app): - tables = ['t1', 't2'] - ServiceHandler._show_tables = mock.MagicMock(return_value=(OK, tables)) - status, ret = self.client.show_tables() - assert status.OK() - assert ret == tables - - def test_describe_table(self, started_app): - table_name = inspect.currentframe().f_code.co_name - dimension = 128 - nlist = 1 - table_schema = TableSchema(table_name=table_name, - index_file_size=100, - metric_type=MetricType.L2, - dimension=dimension) - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(OK, table_schema.table_name)) - ServiceHandler._describe_table = mock.MagicMock( - return_value=(OK, table_schema)) - status, _ = self.client.describe_table(table_name) - assert status.OK() - - ServiceHandler._describe_table = mock.MagicMock( - return_value=(BAD, table_schema)) - status, _ = self.client.describe_table(table_name) - assert not status.OK() - - Parser.parse_proto_TableName = mock.MagicMock(return_value=(BAD, - 'cmd')) - status, ret = self.client.describe_table(table_name) - assert not status.OK() - - def test_insert(self, started_app): - table_name = inspect.currentframe().f_code.co_name - vectors = [[random.random() for _ in range(16)] for _ in range(10)] - ids = [random.randint(1000000, 20000000) for _ in range(10)] - ServiceHandler._add_vectors = mock.MagicMock(return_value=(OK, ids)) - status, ret = self.client.add_vectors( - table_name=table_name, records=vectors) - assert status.OK() - assert ids == ret - - def test_create_index(self, started_app): - table_name = inspect.currentframe().f_code.co_name - unpacks = table_name, None - Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(OK, - unpacks)) - ServiceHandler._create_index = mock.MagicMock(return_value=OK) - status = self.client.create_index(table_name=table_name) - assert status.OK() - - Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(BAD, - None)) - status = self.client.create_index(table_name=table_name) - assert not status.OK() - - def test_drop_table(self, started_app): - table_name = inspect.currentframe().f_code.co_name - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(OK, table_name)) - ServiceHandler._delete_table = mock.MagicMock(return_value=OK) - status = self.client.delete_table(table_name=table_name) - assert status.OK() - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(BAD, table_name)) - status = self.client.delete_table(table_name=table_name) - assert not status.OK() - - def test_has_table(self, started_app): - table_name = inspect.currentframe().f_code.co_name - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(OK, table_name)) - ServiceHandler._has_table = mock.MagicMock(return_value=(OK, True)) - has = self.client.has_table(table_name=table_name) - assert has - - Parser.parse_proto_TableName = mock.MagicMock( - return_value=(BAD, table_name)) - status, has = self.client.has_table(table_name=table_name) - assert not status.OK() - assert not has - - def test_create_table(self, started_app): - table_name = inspect.currentframe().f_code.co_name - dimension = 128 - table_schema = dict(table_name=table_name, - index_file_size=100, - metric_type=MetricType.L2, - dimension=dimension) - - ServiceHandler._create_table = mock.MagicMock(return_value=OK) - status = self.client.create_table(table_schema) - assert status.OK() - - Parser.parse_proto_TableSchema = mock.MagicMock(return_value=(BAD, - None)) - status = self.client.create_table(table_schema) - assert not status.OK() - - def random_data(self, n, dimension): - return [[random.random() for _ in range(dimension)] for _ in range(n)] - - def test_search(self, started_app): - table_name = inspect.currentframe().f_code.co_name - to_index_cnt = random.randint(10, 20) - table = TablesFactory(table_id=table_name, state=Tables.NORMAL) - to_index_files = TableFilesFactory.create_batch( - to_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_INDEX) - topk = random.randint(5, 10) - nq = random.randint(5, 10) - param = { - 'table_name': table_name, - 'query_records': self.random_data(nq, table.dimension), - 'top_k': topk, - 'nprobe': 2049 - } - - result = [ - milvus_pb2.TopKQueryResult(query_result_arrays=[ - milvus_pb2.QueryResult(id=i, distance=random.random()) - for i in range(topk) - ]) for i in range(nq) - ] - - mock_results = milvus_pb2.TopKQueryResultList(status=status_pb2.Status( - error_code=status_pb2.SUCCESS, reason="Success"), - topk_query_result=result) - - table_schema = TableSchema(table_name=table_name, - index_file_size=table.index_file_size, - metric_type=table.metric_type, - dimension=table.dimension) - - status, _ = self.client.search_vectors(**param) - assert status.code == Status.ILLEGAL_ARGUMENT - - param['nprobe'] = 2048 - RouterMixin.connection = mock.MagicMock(return_value=Milvus()) - RouterMixin.query_conn = mock.MagicMock(return_value=Milvus()) - Milvus.describe_table = mock.MagicMock(return_value=(BAD, - table_schema)) - status, ret = self.client.search_vectors(**param) - assert status.code == Status.TABLE_NOT_EXISTS - - Milvus.describe_table = mock.MagicMock(return_value=(OK, table_schema)) - Milvus.search_vectors_in_files = mock.MagicMock( - return_value=mock_results) - - status, ret = self.client.search_vectors(**param) - assert status.OK() - assert len(ret) == nq diff --git a/mishards/utilities.py b/mishards/utilities.py deleted file mode 100644 index 42e982b5f1..0000000000 --- a/mishards/utilities.py +++ /dev/null @@ -1,20 +0,0 @@ -import datetime -from mishards import exceptions - - -def format_date(start, end): - return ((start.year - 1900) * 10000 + (start.month - 1) * 100 + start.day, - (end.year - 1900) * 10000 + (end.month - 1) * 100 + end.day) - - -def range_to_date(range_obj, metadata=None): - try: - start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') - end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') - assert start < end - except (ValueError, AssertionError): - raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( - range_obj.start_date, range_obj.end_date), - metadata=metadata) - - return format_date(start, end) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index ae224e92ed..0000000000 --- a/requirements.txt +++ /dev/null @@ -1,36 +0,0 @@ -environs==4.2.0 -factory-boy==2.12.0 -Faker==1.0.7 -fire==0.1.3 -google-auth==1.6.3 -grpcio==1.22.0 -grpcio-tools==1.22.0 -kubernetes==10.0.1 -MarkupSafe==1.1.1 -marshmallow==2.19.5 -pymysql==0.9.3 -protobuf==3.9.1 -py==1.8.0 -pyasn1==0.4.7 -pyasn1-modules==0.2.6 -pylint==2.3.1 -pymilvus-test==0.2.28 -#pymilvus==0.2.0 -pyparsing==2.4.0 -pytest==4.6.3 -pytest-level==0.1.1 -pytest-print==0.1.2 -pytest-repeat==0.8.0 -pytest-timeout==1.3.3 -python-dateutil==2.8.0 -python-dotenv==0.10.3 -pytz==2019.1 -requests==2.22.0 -requests-oauthlib==1.2.0 -rsa==4.0 -six==1.12.0 -SQLAlchemy==1.3.5 -urllib3==1.25.3 -jaeger-client>=3.4.0 -grpcio-opentracing>=1.0 -mock==2.0.0 diff --git a/sd/__init__.py b/sd/__init__.py deleted file mode 100644 index 7943887d0f..0000000000 --- a/sd/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging -import inspect -# from utils import singleton - -logger = logging.getLogger(__name__) - - -class ProviderManager: - PROVIDERS = {} - - @classmethod - def register_service_provider(cls, target): - if inspect.isfunction(target): - cls.PROVIDERS[target.__name__] = target - elif inspect.isclass(target): - name = target.__dict__.get('NAME', None) - name = name if name else target.__class__.__name__ - cls.PROVIDERS[name] = target - else: - assert False, 'Cannot register_service_provider for: {}'.format(target) - return target - - @classmethod - def get_provider(cls, name): - return cls.PROVIDERS.get(name, None) - - -from sd import kubernetes_provider, static_provider diff --git a/sd/kubernetes_provider.py b/sd/kubernetes_provider.py deleted file mode 100644 index eb113db007..0000000000 --- a/sd/kubernetes_provider.py +++ /dev/null @@ -1,331 +0,0 @@ -import os -import sys -if __name__ == '__main__': - sys.path.append(os.path.dirname(os.path.dirname( - os.path.abspath(__file__)))) - -import re -import logging -import time -import copy -import threading -import queue -import enum -from kubernetes import client, config, watch - -from utils import singleton -from sd import ProviderManager - -logger = logging.getLogger(__name__) - -INCLUSTER_NAMESPACE_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' - - -class EventType(enum.Enum): - PodHeartBeat = 1 - Watch = 2 - - -class K8SMixin: - def __init__(self, namespace, in_cluster=False, **kwargs): - self.namespace = namespace - self.in_cluster = in_cluster - self.kwargs = kwargs - self.v1 = kwargs.get('v1', None) - if not self.namespace: - self.namespace = open(INCLUSTER_NAMESPACE_PATH).read() - - if not self.v1: - config.load_incluster_config( - ) if self.in_cluster else config.load_kube_config() - self.v1 = client.CoreV1Api() - - -class K8SHeartbeatHandler(threading.Thread, K8SMixin): - def __init__(self, - message_queue, - namespace, - label_selector, - in_cluster=False, - **kwargs): - K8SMixin.__init__(self, - namespace=namespace, - in_cluster=in_cluster, - **kwargs) - threading.Thread.__init__(self) - self.queue = message_queue - self.terminate = False - self.label_selector = label_selector - self.poll_interval = kwargs.get('poll_interval', 5) - - def run(self): - while not self.terminate: - try: - pods = self.v1.list_namespaced_pod( - namespace=self.namespace, - label_selector=self.label_selector) - event_message = {'eType': EventType.PodHeartBeat, 'events': []} - for item in pods.items: - pod = self.v1.read_namespaced_pod(name=item.metadata.name, - namespace=self.namespace) - name = pod.metadata.name - ip = pod.status.pod_ip - phase = pod.status.phase - reason = pod.status.reason - message = pod.status.message - ready = True if phase == 'Running' else False - - pod_event = dict(pod=name, - ip=ip, - ready=ready, - reason=reason, - message=message) - - event_message['events'].append(pod_event) - - self.queue.put(event_message) - - except Exception as exc: - logger.error(exc) - - time.sleep(self.poll_interval) - - def stop(self): - self.terminate = True - - -class K8SEventListener(threading.Thread, K8SMixin): - def __init__(self, message_queue, namespace, in_cluster=False, **kwargs): - K8SMixin.__init__(self, - namespace=namespace, - in_cluster=in_cluster, - **kwargs) - threading.Thread.__init__(self) - self.queue = message_queue - self.terminate = False - self.at_start_up = True - self._stop_event = threading.Event() - - def stop(self): - self.terminate = True - self._stop_event.set() - - def run(self): - resource_version = '' - w = watch.Watch() - for event in w.stream(self.v1.list_namespaced_event, - namespace=self.namespace, - field_selector='involvedObject.kind=Pod'): - if self.terminate: - break - - resource_version = int(event['object'].metadata.resource_version) - - info = dict( - eType=EventType.Watch, - pod=event['object'].involved_object.name, - reason=event['object'].reason, - message=event['object'].message, - start_up=self.at_start_up, - ) - self.at_start_up = False - # logger.info('Received event: {}'.format(info)) - self.queue.put(info) - - -class EventHandler(threading.Thread): - def __init__(self, mgr, message_queue, namespace, pod_patt, **kwargs): - threading.Thread.__init__(self) - self.mgr = mgr - self.queue = message_queue - self.kwargs = kwargs - self.terminate = False - self.pod_patt = re.compile(pod_patt) - self.namespace = namespace - - def stop(self): - self.terminate = True - - def on_drop(self, event, **kwargs): - pass - - def on_pod_started(self, event, **kwargs): - try_cnt = 3 - pod = None - while try_cnt > 0: - try_cnt -= 1 - try: - pod = self.mgr.v1.read_namespaced_pod(name=event['pod'], - namespace=self.namespace) - if not pod.status.pod_ip: - time.sleep(0.5) - continue - break - except client.rest.ApiException as exc: - time.sleep(0.5) - - if try_cnt <= 0 and not pod: - if not event['start_up']: - logger.error('Pod {} is started but cannot read pod'.format( - event['pod'])) - return - elif try_cnt <= 0 and not pod.status.pod_ip: - logger.warning('NoPodIPFoundError') - return - - logger.info('Register POD {} with IP {}'.format( - pod.metadata.name, pod.status.pod_ip)) - self.mgr.add_pod(name=pod.metadata.name, ip=pod.status.pod_ip) - - def on_pod_killing(self, event, **kwargs): - logger.info('Unregister POD {}'.format(event['pod'])) - self.mgr.delete_pod(name=event['pod']) - - def on_pod_heartbeat(self, event, **kwargs): - names = self.mgr.conn_mgr.conn_names - - running_names = set() - for each_event in event['events']: - if each_event['ready']: - self.mgr.add_pod(name=each_event['pod'], ip=each_event['ip']) - running_names.add(each_event['pod']) - else: - self.mgr.delete_pod(name=each_event['pod']) - - to_delete = names - running_names - for name in to_delete: - self.mgr.delete_pod(name) - - logger.info(self.mgr.conn_mgr.conn_names) - - def handle_event(self, event): - if event['eType'] == EventType.PodHeartBeat: - return self.on_pod_heartbeat(event) - - if not event or (event['reason'] not in ('Started', 'Killing')): - return self.on_drop(event) - - if not re.match(self.pod_patt, event['pod']): - return self.on_drop(event) - - logger.info('Handling event: {}'.format(event)) - - if event['reason'] == 'Started': - return self.on_pod_started(event) - - return self.on_pod_killing(event) - - def run(self): - while not self.terminate: - try: - event = self.queue.get(timeout=1) - self.handle_event(event) - except queue.Empty: - continue - - -class KubernetesProviderSettings: - def __init__(self, namespace, pod_patt, label_selector, in_cluster, - poll_interval, port=None, **kwargs): - self.namespace = namespace - self.pod_patt = pod_patt - self.label_selector = label_selector - self.in_cluster = in_cluster - self.poll_interval = poll_interval - self.port = int(port) if port else 19530 - - -@singleton -@ProviderManager.register_service_provider -class KubernetesProvider(object): - NAME = 'Kubernetes' - - def __init__(self, settings, conn_mgr, **kwargs): - self.namespace = settings.namespace - self.pod_patt = settings.pod_patt - self.label_selector = settings.label_selector - self.in_cluster = settings.in_cluster - self.poll_interval = settings.poll_interval - self.port = settings.port - self.kwargs = kwargs - self.queue = queue.Queue() - - self.conn_mgr = conn_mgr - - if not self.namespace: - self.namespace = open(incluster_namespace_path).read() - - config.load_incluster_config( - ) if self.in_cluster else config.load_kube_config() - self.v1 = client.CoreV1Api() - - self.listener = K8SEventListener(message_queue=self.queue, - namespace=self.namespace, - in_cluster=self.in_cluster, - v1=self.v1, - **kwargs) - - self.pod_heartbeater = K8SHeartbeatHandler( - message_queue=self.queue, - namespace=self.namespace, - label_selector=self.label_selector, - in_cluster=self.in_cluster, - v1=self.v1, - poll_interval=self.poll_interval, - **kwargs) - - self.event_handler = EventHandler(mgr=self, - message_queue=self.queue, - namespace=self.namespace, - pod_patt=self.pod_patt, - **kwargs) - - def add_pod(self, name, ip): - self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) - - def delete_pod(self, name): - self.conn_mgr.unregister(name) - - def start(self): - self.listener.daemon = True - self.listener.start() - self.event_handler.start() - - self.pod_heartbeater.start() - - def stop(self): - self.listener.stop() - self.pod_heartbeater.stop() - self.event_handler.stop() - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - - class Connect: - def register(self, name, value): - logger.error('Register: {} - {}'.format(name, value)) - - def unregister(self, name): - logger.error('Unregister: {}'.format(name)) - - @property - def conn_names(self): - return set() - - connect_mgr = Connect() - - settings = KubernetesProviderSettings(namespace='xp', - pod_patt=".*-ro-servers-.*", - label_selector='tier=ro-servers', - poll_interval=5, - in_cluster=False) - - provider_class = ProviderManager.get_provider('Kubernetes') - t = provider_class(conn_mgr=connect_mgr, settings=settings) - t.start() - cnt = 100 - while cnt > 0: - time.sleep(2) - cnt -= 1 - t.stop() diff --git a/sd/static_provider.py b/sd/static_provider.py deleted file mode 100644 index e88780740f..0000000000 --- a/sd/static_provider.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import sys -if __name__ == '__main__': - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import socket -from utils import singleton -from sd import ProviderManager - - -class StaticProviderSettings: - def __init__(self, hosts, port=None): - self.hosts = hosts - self.port = int(port) if port else 19530 - - -@singleton -@ProviderManager.register_service_provider -class KubernetesProvider(object): - NAME = 'Static' - - def __init__(self, settings, conn_mgr, **kwargs): - self.conn_mgr = conn_mgr - self.hosts = [socket.gethostbyname(host) for host in settings.hosts] - self.port = settings.port - - def start(self): - for host in self.hosts: - self.add_pod(host, host) - - def stop(self): - for host in self.hosts: - self.delete_pod(host) - - def add_pod(self, name, ip): - self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) - - def delete_pod(self, name): - self.conn_mgr.unregister(name) diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 4a88432914..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,4 +0,0 @@ -[tool:pytest] -testpaths = mishards -log_cli=true -log_cli_level=info diff --git a/start_services.yml b/start_services.yml deleted file mode 100644 index 57fe061bb7..0000000000 --- a/start_services.yml +++ /dev/null @@ -1,45 +0,0 @@ -version: "2.3" -services: - milvus: - runtime: nvidia - restart: always - image: registry.zilliz.com/milvus/engine:branch-0.5.0-release-4316de - # ports: - # - "0.0.0.0:19530:19530" - volumes: - - /tmp/milvus/db:/opt/milvus/db - - jaeger: - restart: always - image: jaegertracing/all-in-one:1.14 - ports: - - "0.0.0.0:5775:5775/udp" - - "0.0.0.0:16686:16686" - - "0.0.0.0:9441:9441" - environment: - COLLECTOR_ZIPKIN_HTTP_PORT: 9411 - - mishards: - restart: always - image: registry.zilliz.com/milvus/mishards:v0.0.4 - ports: - - "0.0.0.0:19530:19531" - - "0.0.0.0:19532:19532" - volumes: - - /tmp/milvus/db:/tmp/milvus/db - # - /tmp/mishards_env:/source/mishards/.env - command: ["python", "mishards/main.py"] - environment: - FROM_EXAMPLE: 'true' - DEBUG: 'true' - SERVER_PORT: 19531 - WOSERVER: tcp://milvus:19530 - SD_STATIC_HOSTS: milvus - TRACING_TYPE: jaeger - TRACING_SERVICE_NAME: mishards-demo - TRACING_REPORTING_HOST: jaeger - TRACING_REPORTING_PORT: 5775 - - depends_on: - - milvus - - jaeger diff --git a/tracing/__init__.py b/tracing/__init__.py deleted file mode 100644 index 64a5b50d15..0000000000 --- a/tracing/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -from contextlib import contextmanager - - -def empty_server_interceptor_decorator(target_server, interceptor): - return target_server - - -@contextmanager -def EmptySpan(*args, **kwargs): - yield None - return - - -class Tracer: - def __init__(self, - tracer=None, - interceptor=None, - server_decorator=empty_server_interceptor_decorator): - self.tracer = tracer - self.interceptor = interceptor - self.server_decorator = server_decorator - - def decorate(self, server): - return self.server_decorator(server, self.interceptor) - - @property - def empty(self): - return self.tracer is None - - def close(self): - self.tracer and self.tracer.close() - - def start_span(self, - operation_name=None, - child_of=None, - references=None, - tags=None, - start_time=None, - ignore_active_span=False): - if self.empty: - return EmptySpan() - return self.tracer.start_span(operation_name, child_of, references, - tags, start_time, ignore_active_span) diff --git a/tracing/factory.py b/tracing/factory.py deleted file mode 100644 index 14fcde2eb3..0000000000 --- a/tracing/factory.py +++ /dev/null @@ -1,40 +0,0 @@ -import logging -from jaeger_client import Config -from grpc_opentracing.grpcext import intercept_server -from grpc_opentracing import open_tracing_server_interceptor - -from tracing import (Tracer, empty_server_interceptor_decorator) - -logger = logging.getLogger(__name__) - - -class TracerFactory: - @classmethod - def new_tracer(cls, - tracer_type, - tracer_config, - span_decorator=None, - **kwargs): - if not tracer_type: - return Tracer() - config = tracer_config.TRACING_CONFIG - service_name = tracer_config.TRACING_SERVICE_NAME - validate = tracer_config.TRACING_VALIDATE - # if not tracer_type: - # tracer_type = 'jaeger' - # config = tracer_config.DEFAULT_TRACING_CONFIG - - if tracer_type.lower() == 'jaeger': - config = Config(config=config, - service_name=service_name, - validate=validate) - - tracer = config.initialize_tracer() - tracer_interceptor = open_tracing_server_interceptor( - tracer, - log_payloads=tracer_config.TRACING_LOG_PAYLOAD, - span_decorator=span_decorator) - - return Tracer(tracer, tracer_interceptor, intercept_server) - - assert False, 'Unsupported tracer type: {}'.format(tracer_type) diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100644 index c1d55e76c0..0000000000 --- a/utils/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from functools import wraps - - -def singleton(cls): - instances = {} - @wraps(cls) - def getinstance(*args, **kw): - if cls not in instances: - instances[cls] = cls(*args, **kw) - return instances[cls] - return getinstance diff --git a/utils/logger_helper.py b/utils/logger_helper.py deleted file mode 100644 index b4e3b9c5b6..0000000000 --- a/utils/logger_helper.py +++ /dev/null @@ -1,152 +0,0 @@ -import os -import datetime -from pytz import timezone -from logging import Filter -import logging.config - - -class InfoFilter(logging.Filter): - def filter(self, rec): - return rec.levelno == logging.INFO - - -class DebugFilter(logging.Filter): - def filter(self, rec): - return rec.levelno == logging.DEBUG - - -class WarnFilter(logging.Filter): - def filter(self, rec): - return rec.levelno == logging.WARN - - -class ErrorFilter(logging.Filter): - def filter(self, rec): - return rec.levelno == logging.ERROR - - -class CriticalFilter(logging.Filter): - def filter(self, rec): - return rec.levelno == logging.CRITICAL - - -COLORS = { - 'HEADER': '\033[95m', - 'INFO': '\033[92m', - 'DEBUG': '\033[94m', - 'WARNING': '\033[93m', - 'ERROR': '\033[95m', - 'CRITICAL': '\033[91m', - 'ENDC': '\033[0m', -} - - -class ColorFulFormatColMixin: - def format_col(self, message_str, level_name): - if level_name in COLORS.keys(): - message_str = COLORS.get(level_name) + message_str + COLORS.get( - 'ENDC') - return message_str - - -class ColorfulFormatter(logging.Formatter, ColorFulFormatColMixin): - def format(self, record): - message_str = super(ColorfulFormatter, self).format(record) - - return self.format_col(message_str, level_name=record.levelname) - - -def config(log_level, log_path, name, tz='UTC'): - def build_log_file(level, log_path, name, tz): - utc_now = datetime.datetime.utcnow() - utc_tz = timezone('UTC') - local_tz = timezone(tz) - tznow = utc_now.replace(tzinfo=utc_tz).astimezone(local_tz) - return '{}-{}-{}.log'.format(os.path.join(log_path, name), tznow.strftime("%m-%d-%Y-%H:%M:%S"), - level) - - if not os.path.exists(log_path): - os.makedirs(log_path) - - LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'default': { - 'format': '%(asctime)s | %(levelname)s | %(name)s | %(threadName)s: %(message)s (%(filename)s:%(lineno)s)', - }, - 'colorful_console': { - 'format': '%(asctime)s | %(levelname)s | %(name)s | %(threadName)s: %(message)s (%(filename)s:%(lineno)s)', - '()': ColorfulFormatter, - }, - }, - 'filters': { - 'InfoFilter': { - '()': InfoFilter, - }, - 'DebugFilter': { - '()': DebugFilter, - }, - 'WarnFilter': { - '()': WarnFilter, - }, - 'ErrorFilter': { - '()': ErrorFilter, - }, - 'CriticalFilter': { - '()': CriticalFilter, - }, - }, - 'handlers': { - 'milvus_celery_console': { - 'class': 'logging.StreamHandler', - 'formatter': 'colorful_console', - }, - 'milvus_debug_file': { - 'level': 'DEBUG', - 'filters': ['DebugFilter'], - 'class': 'logging.handlers.RotatingFileHandler', - 'formatter': 'default', - 'filename': build_log_file('debug', log_path, name, tz) - }, - 'milvus_info_file': { - 'level': 'INFO', - 'filters': ['InfoFilter'], - 'class': 'logging.handlers.RotatingFileHandler', - 'formatter': 'default', - 'filename': build_log_file('info', log_path, name, tz) - }, - 'milvus_warn_file': { - 'level': 'WARN', - 'filters': ['WarnFilter'], - 'class': 'logging.handlers.RotatingFileHandler', - 'formatter': 'default', - 'filename': build_log_file('warn', log_path, name, tz) - }, - 'milvus_error_file': { - 'level': 'ERROR', - 'filters': ['ErrorFilter'], - 'class': 'logging.handlers.RotatingFileHandler', - 'formatter': 'default', - 'filename': build_log_file('error', log_path, name, tz) - }, - 'milvus_critical_file': { - 'level': 'CRITICAL', - 'filters': ['CriticalFilter'], - 'class': 'logging.handlers.RotatingFileHandler', - 'formatter': 'default', - 'filename': build_log_file('critical', log_path, name, tz) - }, - }, - 'loggers': { - '': { - 'handlers': ['milvus_celery_console', 'milvus_info_file', 'milvus_debug_file', 'milvus_warn_file', - 'milvus_error_file', 'milvus_critical_file'], - 'level': log_level, - 'propagate': False - }, - }, - 'propagate': False, - } - - logging.config.dictConfig(LOGGING) From 8553d1c332352d2b044e1f26136369fa71157247 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 21 Oct 2019 16:21:32 +0800 Subject: [PATCH 091/126] Preparing to merge into milvus --- shards/Dockerfile | 10 + shards/build.sh | 39 ++ shards/conftest.py | 27 + shards/manager.py | 28 ++ shards/mishards/.env.example | 33 ++ shards/mishards/__init__.py | 36 ++ shards/mishards/connections.py | 154 ++++++ shards/mishards/db_base.py | 52 ++ shards/mishards/exception_codes.py | 10 + shards/mishards/exception_handlers.py | 82 +++ shards/mishards/exceptions.py | 38 ++ shards/mishards/factories.py | 54 ++ shards/mishards/grpc_utils/__init__.py | 37 ++ .../mishards/grpc_utils/grpc_args_parser.py | 102 ++++ .../mishards/grpc_utils/grpc_args_wrapper.py | 4 + shards/mishards/grpc_utils/test_grpc.py | 75 +++ shards/mishards/hash_ring.py | 150 ++++++ shards/mishards/main.py | 15 + shards/mishards/models.py | 76 +++ shards/mishards/routings.py | 96 ++++ shards/mishards/server.py | 122 +++++ shards/mishards/service_handler.py | 475 ++++++++++++++++++ shards/mishards/settings.py | 94 ++++ shards/mishards/test_connections.py | 101 ++++ shards/mishards/test_models.py | 39 ++ shards/mishards/test_server.py | 279 ++++++++++ shards/mishards/utilities.py | 20 + shards/requirements.txt | 36 ++ shards/sd/__init__.py | 28 ++ shards/sd/kubernetes_provider.py | 331 ++++++++++++ shards/sd/static_provider.py | 39 ++ shards/setup.cfg | 4 + shards/start_services.yml | 45 ++ shards/tracing/__init__.py | 43 ++ shards/tracing/factory.py | 40 ++ shards/utils/__init__.py | 11 + shards/utils/logger_helper.py | 152 ++++++ 37 files changed, 2977 insertions(+) create mode 100644 shards/Dockerfile create mode 100755 shards/build.sh create mode 100644 shards/conftest.py create mode 100644 shards/manager.py create mode 100644 shards/mishards/.env.example create mode 100644 shards/mishards/__init__.py create mode 100644 shards/mishards/connections.py create mode 100644 shards/mishards/db_base.py create mode 100644 shards/mishards/exception_codes.py create mode 100644 shards/mishards/exception_handlers.py create mode 100644 shards/mishards/exceptions.py create mode 100644 shards/mishards/factories.py create mode 100644 shards/mishards/grpc_utils/__init__.py create mode 100644 shards/mishards/grpc_utils/grpc_args_parser.py create mode 100644 shards/mishards/grpc_utils/grpc_args_wrapper.py create mode 100644 shards/mishards/grpc_utils/test_grpc.py create mode 100644 shards/mishards/hash_ring.py create mode 100644 shards/mishards/main.py create mode 100644 shards/mishards/models.py create mode 100644 shards/mishards/routings.py create mode 100644 shards/mishards/server.py create mode 100644 shards/mishards/service_handler.py create mode 100644 shards/mishards/settings.py create mode 100644 shards/mishards/test_connections.py create mode 100644 shards/mishards/test_models.py create mode 100644 shards/mishards/test_server.py create mode 100644 shards/mishards/utilities.py create mode 100644 shards/requirements.txt create mode 100644 shards/sd/__init__.py create mode 100644 shards/sd/kubernetes_provider.py create mode 100644 shards/sd/static_provider.py create mode 100644 shards/setup.cfg create mode 100644 shards/start_services.yml create mode 100644 shards/tracing/__init__.py create mode 100644 shards/tracing/factory.py create mode 100644 shards/utils/__init__.py create mode 100644 shards/utils/logger_helper.py diff --git a/shards/Dockerfile b/shards/Dockerfile new file mode 100644 index 0000000000..594640619e --- /dev/null +++ b/shards/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.6 +RUN apt update && apt install -y \ + less \ + telnet +RUN mkdir /source +WORKDIR /source +ADD ./requirements.txt ./ +RUN pip install -r requirements.txt +COPY . . +CMD python mishards/main.py diff --git a/shards/build.sh b/shards/build.sh new file mode 100755 index 0000000000..fad30518f2 --- /dev/null +++ b/shards/build.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +BOLD=`tput bold` +NORMAL=`tput sgr0` +YELLOW='\033[1;33m' +ENDC='\033[0m' + +echo -e "${BOLD}MISHARDS_REGISTRY=${MISHARDS_REGISTRY}${ENDC}" + +function build_image() { + dockerfile=$1 + remote_registry=$2 + tagged=$2 + buildcmd="docker build -t ${tagged} -f ${dockerfile} ." + echo -e "${BOLD}$buildcmd${NORMAL}" + $buildcmd + pushcmd="docker push ${remote_registry}" + echo -e "${BOLD}$pushcmd${NORMAL}" + $pushcmd + echo -e "${YELLOW}${BOLD}Image: ${remote_registry}${NORMAL}${ENDC}" +} + +case "$1" in + +all) + [[ -z $MISHARDS_REGISTRY ]] && { + echo -e "${YELLOW}Error: Please set docker registry first:${ENDC}\n\t${BOLD}export MISHARDS_REGISTRY=xxxx\n${ENDC}" + exit 1 + } + + version="" + [[ ! -z $2 ]] && version=":${2}" + build_image "Dockerfile" "${MISHARDS_REGISTRY}${version}" "${MISHARDS_REGISTRY}" + ;; +*) + echo "Usage: [option...] {base | apps}" + echo "all, Usage: build.sh all [tagname|] => {docker_registry}:\${tagname}" + ;; +esac diff --git a/shards/conftest.py b/shards/conftest.py new file mode 100644 index 0000000000..34e22af693 --- /dev/null +++ b/shards/conftest.py @@ -0,0 +1,27 @@ +import logging +import pytest +import grpc +from mishards import settings, db, create_app + +logger = logging.getLogger(__name__) + + +@pytest.fixture +def app(request): + app = create_app(settings.TestingConfig) + db.drop_all() + db.create_all() + + yield app + + db.drop_all() + + +@pytest.fixture +def started_app(app): + app.on_pre_run() + app.start(settings.SERVER_TEST_PORT) + + yield app + + app.stop() diff --git a/shards/manager.py b/shards/manager.py new file mode 100644 index 0000000000..931c90ebc8 --- /dev/null +++ b/shards/manager.py @@ -0,0 +1,28 @@ +import fire +from mishards import db +from sqlalchemy import and_ + + +class DBHandler: + @classmethod + def create_all(cls): + db.create_all() + + @classmethod + def drop_all(cls): + db.drop_all() + + @classmethod + def fun(cls, tid): + from mishards.factories import TablesFactory, TableFilesFactory, Tables + f = db.Session.query(Tables).filter(and_( + Tables.table_id == tid, + Tables.state != Tables.TO_DELETE) + ).first() + print(f) + + # f1 = TableFilesFactory() + + +if __name__ == '__main__': + fire.Fire(DBHandler) diff --git a/shards/mishards/.env.example b/shards/mishards/.env.example new file mode 100644 index 0000000000..0a23c0cf56 --- /dev/null +++ b/shards/mishards/.env.example @@ -0,0 +1,33 @@ +DEBUG=True + +WOSERVER=tcp://127.0.0.1:19530 +SERVER_PORT=19532 +SERVER_TEST_PORT=19888 + +SD_PROVIDER=Static + +SD_NAMESPACE=xp +SD_IN_CLUSTER=False +SD_POLL_INTERVAL=5 +SD_ROSERVER_POD_PATT=.*-ro-servers-.* +SD_LABEL_SELECTOR=tier=ro-servers + +SD_STATIC_HOSTS=127.0.0.1 +SD_STATIC_PORT=19530 + +#SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 +SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False +SQL_ECHO=True + +#SQLALCHEMY_DATABASE_TEST_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 +SQLALCHEMY_DATABASE_TEST_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False +SQL_TEST_ECHO=False + +# TRACING_TEST_TYPE=jaeger +TRACING_TYPE=jaeger +TRACING_SERVICE_NAME=fortest +TRACING_SAMPLER_TYPE=const +TRACING_SAMPLER_PARAM=1 +TRACING_LOG_PAYLOAD=True +#TRACING_SAMPLER_TYPE=probabilistic +#TRACING_SAMPLER_PARAM=0.5 diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py new file mode 100644 index 0000000000..7db3d8cb5e --- /dev/null +++ b/shards/mishards/__init__.py @@ -0,0 +1,36 @@ +import logging +from mishards import settings +logger = logging.getLogger() + +from mishards.db_base import DB +db = DB() + +from mishards.server import Server +grpc_server = Server() + + +def create_app(testing_config=None): + config = testing_config if testing_config else settings.DefaultConfig + db.init_db(uri=config.SQLALCHEMY_DATABASE_URI, echo=config.SQL_ECHO) + + from mishards.connections import ConnectionMgr + connect_mgr = ConnectionMgr() + + from sd import ProviderManager + + sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) + discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) + + from tracing.factory import TracerFactory + from mishards.grpc_utils import GrpcSpanDecorator + tracer = TracerFactory.new_tracer(config.TRACING_TYPE, settings.TracingConfig, + span_decorator=GrpcSpanDecorator()) + + from mishards.routings import RouterFactory + router = RouterFactory.new_router(config.ROUTER_CLASS_NAME, connect_mgr) + + grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, router=router, discover=discover) + + from mishards import exception_handlers + + return grpc_server diff --git a/shards/mishards/connections.py b/shards/mishards/connections.py new file mode 100644 index 0000000000..618690a099 --- /dev/null +++ b/shards/mishards/connections.py @@ -0,0 +1,154 @@ +import logging +import threading +from functools import wraps +from milvus import Milvus + +from mishards import (settings, exceptions) +from utils import singleton + +logger = logging.getLogger(__name__) + + +class Connection: + def __init__(self, name, uri, max_retry=1, error_handlers=None, **kwargs): + self.name = name + self.uri = uri + self.max_retry = max_retry + self.retried = 0 + self.conn = Milvus() + self.error_handlers = [] if not error_handlers else error_handlers + self.on_retry_func = kwargs.get('on_retry_func', None) + # self._connect() + + def __str__(self): + return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) + + def _connect(self, metadata=None): + try: + self.conn.connect(uri=self.uri) + except Exception as e: + if not self.error_handlers: + raise exceptions.ConnectionConnectError(message=str(e), metadata=metadata) + for handler in self.error_handlers: + handler(e, metadata=metadata) + + @property + def can_retry(self): + return self.retried < self.max_retry + + @property + def connected(self): + return self.conn.connected() + + def on_retry(self): + if self.on_retry_func: + self.on_retry_func(self) + else: + self.retried > 1 and logger.warning('{} is retrying {}'.format(self, self.retried)) + + def on_connect(self, metadata=None): + while not self.connected and self.can_retry: + self.retried += 1 + self.on_retry() + self._connect(metadata=metadata) + + if not self.can_retry and not self.connected: + raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry, + metadata=metadata)) + + self.retried = 0 + + def connect(self, func, exception_handler=None): + @wraps(func) + def inner(*args, **kwargs): + self.on_connect() + try: + return func(*args, **kwargs) + except Exception as e: + if exception_handler: + exception_handler(e) + else: + raise e + return inner + + +@singleton +class ConnectionMgr: + def __init__(self): + self.metas = {} + self.conns = {} + + @property + def conn_names(self): + return set(self.metas.keys()) - set(['WOSERVER']) + + def conn(self, name, metadata, throw=False): + c = self.conns.get(name, None) + if not c: + url = self.metas.get(name, None) + if not url: + if not throw: + return None + raise exceptions.ConnectionNotFoundError(message='Connection {} not found'.format(name), + metadata=metadata) + this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) + threaded = { + threading.get_ident(): this_conn + } + self.conns[name] = threaded + return this_conn + + tid = threading.get_ident() + rconn = c.get(tid, None) + if not rconn: + url = self.metas.get(name, None) + if not url: + if not throw: + return None + raise exceptions.ConnectionNotFoundError('Connection {} not found'.format(name), + metadata=metadata) + this_conn = Connection(name=name, uri=url, max_retry=settings.MAX_RETRY) + c[tid] = this_conn + return this_conn + + return rconn + + def on_new_meta(self, name, url): + logger.info('Register Connection: name={};url={}'.format(name, url)) + self.metas[name] = url + + def on_duplicate_meta(self, name, url): + if self.metas[name] == url: + return self.on_same_meta(name, url) + + return self.on_diff_meta(name, url) + + def on_same_meta(self, name, url): + # logger.warning('Register same meta: {}:{}'.format(name, url)) + pass + + def on_diff_meta(self, name, url): + logger.warning('Received {} with diff url={}'.format(name, url)) + self.metas[name] = url + self.conns[name] = {} + + def on_unregister_meta(self, name, url): + logger.info('Unregister name={};url={}'.format(name, url)) + self.conns.pop(name, None) + + def on_nonexisted_meta(self, name): + logger.warning('Non-existed meta: {}'.format(name)) + + def register(self, name, url): + meta = self.metas.get(name) + if not meta: + return self.on_new_meta(name, url) + else: + return self.on_duplicate_meta(name, url) + + def unregister(self, name): + logger.info('Unregister Connection: name={}'.format(name)) + url = self.metas.pop(name, None) + if url is None: + return self.on_nonexisted_meta(name) + return self.on_unregister_meta(name, url) diff --git a/shards/mishards/db_base.py b/shards/mishards/db_base.py new file mode 100644 index 0000000000..5f2eee9ba1 --- /dev/null +++ b/shards/mishards/db_base.py @@ -0,0 +1,52 @@ +import logging +from sqlalchemy import create_engine +from sqlalchemy.engine.url import make_url +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker, scoped_session +from sqlalchemy.orm.session import Session as SessionBase + +logger = logging.getLogger(__name__) + + +class LocalSession(SessionBase): + def __init__(self, db, autocommit=False, autoflush=True, **options): + self.db = db + bind = options.pop('bind', None) or db.engine + SessionBase.__init__(self, autocommit=autocommit, autoflush=autoflush, bind=bind, **options) + + +class DB: + Model = declarative_base() + + def __init__(self, uri=None, echo=False): + self.echo = echo + uri and self.init_db(uri, echo) + self.session_factory = scoped_session(sessionmaker(class_=LocalSession, db=self)) + + def init_db(self, uri, echo=False): + url = make_url(uri) + if url.get_backend_name() == 'sqlite': + self.engine = create_engine(url) + else: + self.engine = create_engine(uri, pool_size=100, pool_recycle=5, pool_timeout=30, + pool_pre_ping=True, + echo=echo, + max_overflow=0) + self.uri = uri + self.url = url + + def __str__(self): + return ''.format(self.url.get_backend_name(), self.url.database) + + @property + def Session(self): + return self.session_factory() + + def remove_session(self): + self.session_factory.remove() + + def drop_all(self): + self.Model.metadata.drop_all(self.engine) + + def create_all(self): + self.Model.metadata.create_all(self.engine) diff --git a/shards/mishards/exception_codes.py b/shards/mishards/exception_codes.py new file mode 100644 index 0000000000..bdd4572dd5 --- /dev/null +++ b/shards/mishards/exception_codes.py @@ -0,0 +1,10 @@ +INVALID_CODE = -1 + +CONNECT_ERROR_CODE = 10001 +CONNECTTION_NOT_FOUND_CODE = 10002 +DB_ERROR_CODE = 10003 + +TABLE_NOT_FOUND_CODE = 20001 +INVALID_ARGUMENT_CODE = 20002 +INVALID_DATE_RANGE_CODE = 20003 +INVALID_TOPK_CODE = 20004 diff --git a/shards/mishards/exception_handlers.py b/shards/mishards/exception_handlers.py new file mode 100644 index 0000000000..c79a6db5a3 --- /dev/null +++ b/shards/mishards/exception_handlers.py @@ -0,0 +1,82 @@ +import logging +from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 +from mishards import grpc_server as server, exceptions + +logger = logging.getLogger(__name__) + + +def resp_handler(err, error_code): + if not isinstance(err, exceptions.BaseException): + return status_pb2.Status(error_code=error_code, reason=str(err)) + + status = status_pb2.Status(error_code=error_code, reason=err.message) + + if err.metadata is None: + return status + + resp_class = err.metadata.get('resp_class', None) + if not resp_class: + return status + + if resp_class == milvus_pb2.BoolReply: + return resp_class(status=status, bool_reply=False) + + if resp_class == milvus_pb2.VectorIds: + return resp_class(status=status, vector_id_array=[]) + + if resp_class == milvus_pb2.TopKQueryResultList: + return resp_class(status=status, topk_query_result=[]) + + if resp_class == milvus_pb2.TableRowCount: + return resp_class(status=status, table_row_count=-1) + + if resp_class == milvus_pb2.TableName: + return resp_class(status=status, table_name=[]) + + if resp_class == milvus_pb2.StringReply: + return resp_class(status=status, string_reply='') + + if resp_class == milvus_pb2.TableSchema: + return milvus_pb2.TableSchema( + status=status + ) + + if resp_class == milvus_pb2.IndexParam: + return milvus_pb2.IndexParam( + table_name=milvus_pb2.TableName( + status=status + ) + ) + + status.error_code = status_pb2.UNEXPECTED_ERROR + return status + + +@server.errorhandler(exceptions.TableNotFoundError) +def TableNotFoundErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.TABLE_NOT_EXISTS) + + +@server.errorhandler(exceptions.InvalidTopKError) +def InvalidTopKErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.ILLEGAL_TOPK) + + +@server.errorhandler(exceptions.InvalidArgumentError) +def InvalidArgumentErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.ILLEGAL_ARGUMENT) + + +@server.errorhandler(exceptions.DBError) +def DBErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.UNEXPECTED_ERROR) + + +@server.errorhandler(exceptions.InvalidRangeError) +def InvalidArgumentErrorHandler(err): + logger.error(err) + return resp_handler(err, status_pb2.ILLEGAL_RANGE) diff --git a/shards/mishards/exceptions.py b/shards/mishards/exceptions.py new file mode 100644 index 0000000000..72839f88d2 --- /dev/null +++ b/shards/mishards/exceptions.py @@ -0,0 +1,38 @@ +import mishards.exception_codes as codes + + +class BaseException(Exception): + code = codes.INVALID_CODE + message = 'BaseException' + + def __init__(self, message='', metadata=None): + self.message = self.__class__.__name__ if not message else message + self.metadata = metadata + + +class ConnectionConnectError(BaseException): + code = codes.CONNECT_ERROR_CODE + + +class ConnectionNotFoundError(BaseException): + code = codes.CONNECTTION_NOT_FOUND_CODE + + +class DBError(BaseException): + code = codes.DB_ERROR_CODE + + +class TableNotFoundError(BaseException): + code = codes.TABLE_NOT_FOUND_CODE + + +class InvalidTopKError(BaseException): + code = codes.INVALID_TOPK_CODE + + +class InvalidArgumentError(BaseException): + code = codes.INVALID_ARGUMENT_CODE + + +class InvalidRangeError(BaseException): + code = codes.INVALID_DATE_RANGE_CODE diff --git a/shards/mishards/factories.py b/shards/mishards/factories.py new file mode 100644 index 0000000000..52c0253b39 --- /dev/null +++ b/shards/mishards/factories.py @@ -0,0 +1,54 @@ +import time +import datetime +import random +import factory +from factory.alchemy import SQLAlchemyModelFactory +from faker import Faker +from faker.providers import BaseProvider + +from milvus.client.types import MetricType +from mishards import db +from mishards.models import Tables, TableFiles + + +class FakerProvider(BaseProvider): + def this_date(self): + t = datetime.datetime.today() + return (t.year - 1900) * 10000 + (t.month - 1) * 100 + t.day + + +factory.Faker.add_provider(FakerProvider) + + +class TablesFactory(SQLAlchemyModelFactory): + class Meta: + model = Tables + sqlalchemy_session = db.session_factory + sqlalchemy_session_persistence = 'commit' + + id = factory.Faker('random_number', digits=16, fix_len=True) + table_id = factory.Faker('uuid4') + state = factory.Faker('random_element', elements=(0, 1)) + dimension = factory.Faker('random_element', elements=(256, 512)) + created_on = int(time.time()) + index_file_size = 0 + engine_type = factory.Faker('random_element', elements=(0, 1, 2, 3)) + metric_type = factory.Faker('random_element', elements=(MetricType.L2, MetricType.IP)) + nlist = 16384 + + +class TableFilesFactory(SQLAlchemyModelFactory): + class Meta: + model = TableFiles + sqlalchemy_session = db.session_factory + sqlalchemy_session_persistence = 'commit' + + id = factory.Faker('random_number', digits=16, fix_len=True) + table = factory.SubFactory(TablesFactory) + engine_type = factory.Faker('random_element', elements=(0, 1, 2, 3)) + file_id = factory.Faker('uuid4') + file_type = factory.Faker('random_element', elements=(0, 1, 2, 3, 4)) + file_size = factory.Faker('random_number') + updated_time = int(time.time()) + created_on = int(time.time()) + date = factory.Faker('this_date') diff --git a/shards/mishards/grpc_utils/__init__.py b/shards/mishards/grpc_utils/__init__.py new file mode 100644 index 0000000000..f5225b2a66 --- /dev/null +++ b/shards/mishards/grpc_utils/__init__.py @@ -0,0 +1,37 @@ +from grpc_opentracing import SpanDecorator +from milvus.grpc_gen import status_pb2 + + +class GrpcSpanDecorator(SpanDecorator): + def __call__(self, span, rpc_info): + status = None + if not rpc_info.response: + return + if isinstance(rpc_info.response, status_pb2.Status): + status = rpc_info.response + else: + try: + status = rpc_info.response.status + except Exception as e: + status = status_pb2.Status(error_code=status_pb2.UNEXPECTED_ERROR, + reason='Should not happen') + + if status.error_code == 0: + return + error_log = {'event': 'error', + 'request': rpc_info.request, + 'response': rpc_info.response + } + span.set_tag('error', True) + span.log_kv(error_log) + + +def mark_grpc_method(func): + setattr(func, 'grpc_method', True) + return func + + +def is_grpc_method(func): + if not func: + return False + return getattr(func, 'grpc_method', False) diff --git a/shards/mishards/grpc_utils/grpc_args_parser.py b/shards/mishards/grpc_utils/grpc_args_parser.py new file mode 100644 index 0000000000..039299803d --- /dev/null +++ b/shards/mishards/grpc_utils/grpc_args_parser.py @@ -0,0 +1,102 @@ +from milvus import Status +from functools import wraps + + +def error_status(func): + @wraps(func) + def inner(*args, **kwargs): + try: + results = func(*args, **kwargs) + except Exception as e: + return Status(code=Status.UNEXPECTED_ERROR, message=str(e)), None + + return Status(code=0, message="Success"), results + + return inner + + +class GrpcArgsParser(object): + + @classmethod + @error_status + def parse_proto_TableSchema(cls, param): + _table_schema = { + 'status': param.status, + 'table_name': param.table_name, + 'dimension': param.dimension, + 'index_file_size': param.index_file_size, + 'metric_type': param.metric_type + } + + return _table_schema + + @classmethod + @error_status + def parse_proto_TableName(cls, param): + return param.table_name + + @classmethod + @error_status + def parse_proto_Index(cls, param): + _index = { + 'index_type': param.index_type, + 'nlist': param.nlist + } + + return _index + + @classmethod + @error_status + def parse_proto_IndexParam(cls, param): + _table_name = param.table_name + _status, _index = cls.parse_proto_Index(param.index) + + if not _status.OK(): + raise Exception("Argument parse error") + + return _table_name, _index + + @classmethod + @error_status + def parse_proto_Command(cls, param): + _cmd = param.cmd + + return _cmd + + @classmethod + @error_status + def parse_proto_Range(cls, param): + _start_value = param.start_value + _end_value = param.end_value + + return _start_value, _end_value + + @classmethod + @error_status + def parse_proto_RowRecord(cls, param): + return list(param.vector_data) + + @classmethod + @error_status + def parse_proto_SearchParam(cls, param): + _table_name = param.table_name + _topk = param.topk + _nprobe = param.nprobe + _status, _range = cls.parse_proto_Range(param.query_range_array) + + if not _status.OK(): + raise Exception("Argument parse error") + + _row_record = param.query_record_array + + return _table_name, _row_record, _range, _topk + + @classmethod + @error_status + def parse_proto_DeleteByRangeParam(cls, param): + _table_name = param.table_name + _range = param.range + _start_value = _range.start_value + _end_value = _range.end_value + + return _table_name, _start_value, _end_value diff --git a/shards/mishards/grpc_utils/grpc_args_wrapper.py b/shards/mishards/grpc_utils/grpc_args_wrapper.py new file mode 100644 index 0000000000..7447dbd995 --- /dev/null +++ b/shards/mishards/grpc_utils/grpc_args_wrapper.py @@ -0,0 +1,4 @@ +# class GrpcArgsWrapper(object): + +# @classmethod +# def proto_TableName(cls): diff --git a/shards/mishards/grpc_utils/test_grpc.py b/shards/mishards/grpc_utils/test_grpc.py new file mode 100644 index 0000000000..9af09e5d0d --- /dev/null +++ b/shards/mishards/grpc_utils/test_grpc.py @@ -0,0 +1,75 @@ +import logging +import opentracing +from mishards.grpc_utils import GrpcSpanDecorator, is_grpc_method +from milvus.grpc_gen import status_pb2, milvus_pb2 + +logger = logging.getLogger(__name__) + + +class FakeTracer(opentracing.Tracer): + pass + + +class FakeSpan(opentracing.Span): + def __init__(self, context, tracer, **kwargs): + super(FakeSpan, self).__init__(tracer, context) + self.reset() + + def set_tag(self, key, value): + self.tags.append({key: value}) + + def log_kv(self, key_values, timestamp=None): + self.logs.append(key_values) + + def reset(self): + self.tags = [] + self.logs = [] + + +class FakeRpcInfo: + def __init__(self, request, response): + self.request = request + self.response = response + + +class TestGrpcUtils: + def test_span_deco(self): + request = 'request' + OK = status_pb2.Status(error_code=status_pb2.SUCCESS, reason='Success') + response = OK + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + assert len(span.logs) == 0 + assert len(span.tags) == 0 + + response = milvus_pb2.BoolReply(status=OK, bool_reply=False) + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + assert len(span.logs) == 0 + assert len(span.tags) == 0 + + response = 1 + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + assert len(span.logs) == 1 + assert len(span.tags) == 1 + + response = 0 + rpc_info = FakeRpcInfo(request=request, response=response) + span = FakeSpan(context=None, tracer=FakeTracer()) + span_deco = GrpcSpanDecorator() + span_deco(span, rpc_info) + assert len(span.logs) == 0 + assert len(span.tags) == 0 + + def test_is_grpc_method(self): + target = 1 + assert not is_grpc_method(target) + target = None + assert not is_grpc_method(target) diff --git a/shards/mishards/hash_ring.py b/shards/mishards/hash_ring.py new file mode 100644 index 0000000000..a97f3f580e --- /dev/null +++ b/shards/mishards/hash_ring.py @@ -0,0 +1,150 @@ +import math +import sys +from bisect import bisect + +if sys.version_info >= (2, 5): + import hashlib + md5_constructor = hashlib.md5 +else: + import md5 + md5_constructor = md5.new + + +class HashRing(object): + def __init__(self, nodes=None, weights=None): + """`nodes` is a list of objects that have a proper __str__ representation. + `weights` is dictionary that sets weights to the nodes. The default + weight is that all nodes are equal. + """ + self.ring = dict() + self._sorted_keys = [] + + self.nodes = nodes + + if not weights: + weights = {} + self.weights = weights + + self._generate_circle() + + def _generate_circle(self): + """Generates the circle. + """ + total_weight = 0 + for node in self.nodes: + total_weight += self.weights.get(node, 1) + + for node in self.nodes: + weight = 1 + + if node in self.weights: + weight = self.weights.get(node) + + factor = math.floor((40 * len(self.nodes) * weight) / total_weight) + + for j in range(0, int(factor)): + b_key = self._hash_digest('%s-%s' % (node, j)) + + for i in range(0, 3): + key = self._hash_val(b_key, lambda x: x + i * 4) + self.ring[key] = node + self._sorted_keys.append(key) + + self._sorted_keys.sort() + + def get_node(self, string_key): + """Given a string key a corresponding node in the hash ring is returned. + + If the hash ring is empty, `None` is returned. + """ + pos = self.get_node_pos(string_key) + if pos is None: + return None + return self.ring[self._sorted_keys[pos]] + + def get_node_pos(self, string_key): + """Given a string key a corresponding node in the hash ring is returned + along with it's position in the ring. + + If the hash ring is empty, (`None`, `None`) is returned. + """ + if not self.ring: + return None + + key = self.gen_key(string_key) + + nodes = self._sorted_keys + pos = bisect(nodes, key) + + if pos == len(nodes): + return 0 + else: + return pos + + def iterate_nodes(self, string_key, distinct=True): + """Given a string key it returns the nodes as a generator that can hold the key. + + The generator iterates one time through the ring + starting at the correct position. + + if `distinct` is set, then the nodes returned will be unique, + i.e. no virtual copies will be returned. + """ + if not self.ring: + yield None, None + + returned_values = set() + + def distinct_filter(value): + if str(value) not in returned_values: + returned_values.add(str(value)) + return value + + pos = self.get_node_pos(string_key) + for key in self._sorted_keys[pos:]: + val = distinct_filter(self.ring[key]) + if val: + yield val + + for i, key in enumerate(self._sorted_keys): + if i < pos: + val = distinct_filter(self.ring[key]) + if val: + yield val + + def gen_key(self, key): + """Given a string key it returns a long value, + this long value represents a place on the hash ring. + + md5 is currently used because it mixes well. + """ + b_key = self._hash_digest(key) + return self._hash_val(b_key, lambda x: x) + + def _hash_val(self, b_key, entry_fn): + return (b_key[entry_fn(3)] << 24) | (b_key[entry_fn(2)] << 16) | ( + b_key[entry_fn(1)] << 8) | b_key[entry_fn(0)] + + def _hash_digest(self, key): + m = md5_constructor() + key = key.encode() + m.update(key) + return m.digest() + + +if __name__ == '__main__': + from collections import defaultdict + servers = [ + '192.168.0.246:11212', '192.168.0.247:11212', '192.168.0.248:11212', + '192.168.0.249:11212' + ] + + ring = HashRing(servers) + keys = ['{}'.format(i) for i in range(100)] + mapped = defaultdict(list) + for k in keys: + server = ring.get_node(k) + mapped[server].append(k) + + for k, v in mapped.items(): + print(k, v) diff --git a/shards/mishards/main.py b/shards/mishards/main.py new file mode 100644 index 0000000000..c0d142607b --- /dev/null +++ b/shards/mishards/main.py @@ -0,0 +1,15 @@ +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from mishards import (settings, create_app) + + +def main(): + server = create_app(settings.DefaultConfig) + server.run(port=settings.SERVER_PORT) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/shards/mishards/models.py b/shards/mishards/models.py new file mode 100644 index 0000000000..4b6c8f9ef4 --- /dev/null +++ b/shards/mishards/models.py @@ -0,0 +1,76 @@ +import logging +from sqlalchemy import (Integer, Boolean, Text, + String, BigInteger, and_, or_, + Column) +from sqlalchemy.orm import relationship, backref + +from mishards import db + +logger = logging.getLogger(__name__) + + +class TableFiles(db.Model): + FILE_TYPE_NEW = 0 + FILE_TYPE_RAW = 1 + FILE_TYPE_TO_INDEX = 2 + FILE_TYPE_INDEX = 3 + FILE_TYPE_TO_DELETE = 4 + FILE_TYPE_NEW_MERGE = 5 + FILE_TYPE_NEW_INDEX = 6 + FILE_TYPE_BACKUP = 7 + + __tablename__ = 'TableFiles' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + table_id = Column(String(50)) + engine_type = Column(Integer) + file_id = Column(String(50)) + file_type = Column(Integer) + file_size = Column(Integer, default=0) + row_count = Column(Integer, default=0) + updated_time = Column(BigInteger) + created_on = Column(BigInteger) + date = Column(Integer) + + table = relationship( + 'Tables', + primaryjoin='and_(foreign(TableFiles.table_id) == Tables.table_id)', + backref=backref('files', uselist=True, lazy='dynamic') + ) + + +class Tables(db.Model): + TO_DELETE = 1 + NORMAL = 0 + + __tablename__ = 'Tables' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + table_id = Column(String(50), unique=True) + state = Column(Integer) + dimension = Column(Integer) + created_on = Column(Integer) + flag = Column(Integer, default=0) + index_file_size = Column(Integer) + engine_type = Column(Integer) + nlist = Column(Integer) + metric_type = Column(Integer) + + def files_to_search(self, date_range=None): + cond = or_( + TableFiles.file_type == TableFiles.FILE_TYPE_RAW, + TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX, + TableFiles.file_type == TableFiles.FILE_TYPE_INDEX, + ) + if date_range: + cond = and_( + cond, + or_( + and_(TableFiles.date >= d[0], TableFiles.date < d[1]) for d in date_range + ) + ) + + files = self.files.filter(cond) + + logger.debug('DATE_RANGE: {}'.format(date_range)) + return files diff --git a/shards/mishards/routings.py b/shards/mishards/routings.py new file mode 100644 index 0000000000..823972726f --- /dev/null +++ b/shards/mishards/routings.py @@ -0,0 +1,96 @@ +import logging +from sqlalchemy import exc as sqlalchemy_exc +from sqlalchemy import and_ + +from mishards import exceptions, db +from mishards.hash_ring import HashRing +from mishards.models import Tables + +logger = logging.getLogger(__name__) + + +class RouteManager: + ROUTER_CLASSES = {} + + @classmethod + def register_router_class(cls, target): + name = target.__dict__.get('NAME', None) + name = name if name else target.__class__.__name__ + cls.ROUTER_CLASSES[name] = target + return target + + @classmethod + def get_router_class(cls, name): + return cls.ROUTER_CLASSES.get(name, None) + + +class RouterFactory: + @classmethod + def new_router(cls, name, conn_mgr, **kwargs): + router_class = RouteManager.get_router_class(name) + assert router_class + return router_class(conn_mgr, **kwargs) + + +class RouterMixin: + def __init__(self, conn_mgr): + self.conn_mgr = conn_mgr + + def routing(self, table_name, metadata=None, **kwargs): + raise NotImplemented() + + def connection(self, metadata=None): + conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) + if conn: + conn.on_connect(metadata=metadata) + return conn.conn + + def query_conn(self, name, metadata=None): + conn = self.conn_mgr.conn(name, metadata=metadata) + if not conn: + raise exceptions.ConnectionNotFoundError(name, metadata=metadata) + conn.on_connect(metadata=metadata) + return conn.conn + + +@RouteManager.register_router_class +class FileBasedHashRingRouter(RouterMixin): + NAME = 'FileBasedHashRingRouter' + + def __init__(self, conn_mgr, **kwargs): + super(FileBasedHashRingRouter, self).__init__(conn_mgr) + + def routing(self, table_name, metadata=None, **kwargs): + range_array = kwargs.pop('range_array', None) + return self._route(table_name, range_array, metadata, **kwargs) + + def _route(self, table_name, range_array, metadata=None, **kwargs): + # PXU TODO: Implement Thread-local Context + # PXU TODO: Session life mgt + try: + table = db.Session.query(Tables).filter( + and_(Tables.table_id == table_name, + Tables.state != Tables.TO_DELETE)).first() + except sqlalchemy_exc.SQLAlchemyError as e: + raise exceptions.DBError(message=str(e), metadata=metadata) + + if not table: + raise exceptions.TableNotFoundError(table_name, metadata=metadata) + files = table.files_to_search(range_array) + db.remove_session() + + servers = self.conn_mgr.conn_names + logger.info('Available servers: {}'.format(servers)) + + ring = HashRing(servers) + + routing = {} + + for f in files: + target_host = ring.get_node(str(f.id)) + sub = routing.get(target_host, None) + if not sub: + routing[target_host] = {'table_id': table_name, 'file_ids': []} + routing[target_host]['file_ids'].append(str(f.id)) + + return routing diff --git a/shards/mishards/server.py b/shards/mishards/server.py new file mode 100644 index 0000000000..599a00e455 --- /dev/null +++ b/shards/mishards/server.py @@ -0,0 +1,122 @@ +import logging +import grpc +import time +import socket +import inspect +from urllib.parse import urlparse +from functools import wraps +from concurrent import futures +from grpc._cython import cygrpc +from milvus.grpc_gen.milvus_pb2_grpc import add_MilvusServiceServicer_to_server +from mishards.grpc_utils import is_grpc_method +from mishards.service_handler import ServiceHandler +from mishards import settings + +logger = logging.getLogger(__name__) + + +class Server: + def __init__(self): + self.pre_run_handlers = set() + self.grpc_methods = set() + self.error_handlers = {} + self.exit_flag = False + + def init_app(self, + conn_mgr, + tracer, + router, + discover, + port=19530, + max_workers=10, + **kwargs): + self.port = int(port) + self.conn_mgr = conn_mgr + self.tracer = tracer + self.router = router + self.discover = discover + + self.server_impl = grpc.server( + thread_pool=futures.ThreadPoolExecutor(max_workers=max_workers), + options=[(cygrpc.ChannelArgKey.max_send_message_length, -1), + (cygrpc.ChannelArgKey.max_receive_message_length, -1)]) + + self.server_impl = self.tracer.decorate(self.server_impl) + + self.register_pre_run_handler(self.pre_run_handler) + + def pre_run_handler(self): + woserver = settings.WOSERVER + url = urlparse(woserver) + ip = socket.gethostbyname(url.hostname) + socket.inet_pton(socket.AF_INET, ip) + self.conn_mgr.register( + 'WOSERVER', '{}://{}:{}'.format(url.scheme, ip, url.port or 80)) + + def register_pre_run_handler(self, func): + logger.info('Regiterring {} into server pre_run_handlers'.format(func)) + self.pre_run_handlers.add(func) + return func + + def wrap_method_with_errorhandler(self, func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + if e.__class__ in self.error_handlers: + return self.error_handlers[e.__class__](e) + raise + + return wrapper + + def errorhandler(self, exception): + if inspect.isclass(exception) and issubclass(exception, Exception): + + def wrapper(func): + self.error_handlers[exception] = func + return func + + return wrapper + return exception + + def on_pre_run(self): + for handler in self.pre_run_handlers: + handler() + self.discover.start() + + def start(self, port=None): + handler_class = self.decorate_handler(ServiceHandler) + add_MilvusServiceServicer_to_server( + handler_class(tracer=self.tracer, + router=self.router), self.server_impl) + self.server_impl.add_insecure_port("[::]:{}".format( + str(port or self.port))) + self.server_impl.start() + + def run(self, port): + logger.info('Milvus server start ......') + port = port or self.port + self.on_pre_run() + + self.start(port) + logger.info('Listening on port {}'.format(port)) + + try: + while not self.exit_flag: + time.sleep(5) + except KeyboardInterrupt: + self.stop() + + def stop(self): + logger.info('Server is shuting down ......') + self.exit_flag = True + self.server_impl.stop(0) + self.tracer.close() + logger.info('Server is closed') + + def decorate_handler(self, handler): + for key, attr in handler.__dict__.items(): + if is_grpc_method(attr): + setattr(handler, key, self.wrap_method_with_errorhandler(attr)) + return handler diff --git a/shards/mishards/service_handler.py b/shards/mishards/service_handler.py new file mode 100644 index 0000000000..5e91c14f14 --- /dev/null +++ b/shards/mishards/service_handler.py @@ -0,0 +1,475 @@ +import logging +import time +import datetime +from collections import defaultdict + +import multiprocessing +from concurrent.futures import ThreadPoolExecutor +from milvus.grpc_gen import milvus_pb2, milvus_pb2_grpc, status_pb2 +from milvus.grpc_gen.milvus_pb2 import TopKQueryResult +from milvus.client.abstract import Range +from milvus.client import types as Types + +from mishards import (db, settings, exceptions) +from mishards.grpc_utils import mark_grpc_method +from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser +from mishards import utilities + +logger = logging.getLogger(__name__) + + +class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): + MAX_NPROBE = 2048 + MAX_TOPK = 2048 + + def __init__(self, tracer, router, max_workers=multiprocessing.cpu_count(), **kwargs): + self.table_meta = {} + self.error_handlers = {} + self.tracer = tracer + self.router = router + self.max_workers = max_workers + + def _do_merge(self, files_n_topk_results, topk, reverse=False, **kwargs): + status = status_pb2.Status(error_code=status_pb2.SUCCESS, + reason="Success") + if not files_n_topk_results: + return status, [] + + request_results = defaultdict(list) + + calc_time = time.time() + for files_collection in files_n_topk_results: + if isinstance(files_collection, tuple): + status, _ = files_collection + return status, [] + for request_pos, each_request_results in enumerate( + files_collection.topk_query_result): + request_results[request_pos].extend( + each_request_results.query_result_arrays) + request_results[request_pos] = sorted( + request_results[request_pos], + key=lambda x: x.distance, + reverse=reverse)[:topk] + + calc_time = time.time() - calc_time + logger.info('Merge takes {}'.format(calc_time)) + + results = sorted(request_results.items()) + topk_query_result = [] + + for result in results: + query_result = TopKQueryResult(query_result_arrays=result[1]) + topk_query_result.append(query_result) + + return status, topk_query_result + + def _do_query(self, + context, + table_id, + table_meta, + vectors, + topk, + nprobe, + range_array=None, + **kwargs): + metadata = kwargs.get('metadata', None) + range_array = [ + utilities.range_to_date(r, metadata=metadata) for r in range_array + ] if range_array else None + + routing = {} + p_span = None if self.tracer.empty else context.get_active_span( + ).context + with self.tracer.start_span('get_routing', child_of=p_span): + routing = self.router.routing(table_id, + range_array=range_array, + metadata=metadata) + logger.info('Routing: {}'.format(routing)) + + metadata = kwargs.get('metadata', None) + + rs = [] + all_topk_results = [] + + def search(addr, query_params, vectors, topk, nprobe, **kwargs): + logger.info( + 'Send Search Request: addr={};params={};nq={};topk={};nprobe={}' + .format(addr, query_params, len(vectors), topk, nprobe)) + + conn = self.router.query_conn(addr, metadata=metadata) + start = time.time() + span = kwargs.get('span', None) + span = span if span else (None if self.tracer.empty else + context.get_active_span().context) + + with self.tracer.start_span('search_{}'.format(addr), + child_of=span): + ret = conn.search_vectors_in_files( + table_name=query_params['table_id'], + file_ids=query_params['file_ids'], + query_records=vectors, + top_k=topk, + nprobe=nprobe, + lazy_=True) + end = time.time() + logger.info('search_vectors_in_files takes: {}'.format(end - start)) + + all_topk_results.append(ret) + + with self.tracer.start_span('do_search', child_of=p_span) as span: + with ThreadPoolExecutor(max_workers=self.max_workers) as pool: + for addr, params in routing.items(): + res = pool.submit(search, + addr, + params, + vectors, + topk, + nprobe, + span=span) + rs.append(res) + + for res in rs: + res.result() + + reverse = table_meta.metric_type == Types.MetricType.IP + with self.tracer.start_span('do_merge', child_of=p_span): + return self._do_merge(all_topk_results, + topk, + reverse=reverse, + metadata=metadata) + + def _create_table(self, table_schema): + return self.router.connection().create_table(table_schema) + + @mark_grpc_method + def CreateTable(self, request, context): + _status, _table_schema = Parser.parse_proto_TableSchema(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + logger.info('CreateTable {}'.format(_table_schema['table_name'])) + + _status = self._create_table(_table_schema) + + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + def _has_table(self, table_name, metadata=None): + return self.router.connection(metadata=metadata).has_table(table_name) + + @mark_grpc_method + def HasTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return milvus_pb2.BoolReply(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + bool_reply=False) + + logger.info('HasTable {}'.format(_table_name)) + + _status, _bool = self._has_table(_table_name, + metadata={'resp_class': milvus_pb2.BoolReply}) + + return milvus_pb2.BoolReply(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + bool_reply=_bool) + + def _delete_table(self, table_name): + return self.router.connection().delete_table(table_name) + + @mark_grpc_method + def DropTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + logger.info('DropTable {}'.format(_table_name)) + + _status = self._delete_table(_table_name) + + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + def _create_index(self, table_name, index): + return self.router.connection().create_index(table_name, index) + + @mark_grpc_method + def CreateIndex(self, request, context): + _status, unpacks = Parser.parse_proto_IndexParam(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + _table_name, _index = unpacks + + logger.info('CreateIndex {}'.format(_table_name)) + + # TODO: interface create_table incompleted + _status = self._create_index(_table_name, _index) + + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + def _add_vectors(self, param, metadata=None): + return self.router.connection(metadata=metadata).add_vectors( + None, None, insert_param=param) + + @mark_grpc_method + def Insert(self, request, context): + logger.info('Insert') + # TODO: Ths SDK interface add_vectors() could update, add a key 'row_id_array' + _status, _ids = self._add_vectors( + metadata={'resp_class': milvus_pb2.VectorIds}, param=request) + return milvus_pb2.VectorIds(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + vector_id_array=_ids) + + @mark_grpc_method + def Search(self, request, context): + + table_name = request.table_name + + topk = request.topk + nprobe = request.nprobe + + logger.info('Search {}: topk={} nprobe={}'.format( + table_name, topk, nprobe)) + + metadata = {'resp_class': milvus_pb2.TopKQueryResultList} + + if nprobe > self.MAX_NPROBE or nprobe <= 0: + raise exceptions.InvalidArgumentError( + message='Invalid nprobe: {}'.format(nprobe), metadata=metadata) + + if topk > self.MAX_TOPK or topk <= 0: + raise exceptions.InvalidTopKError( + message='Invalid topk: {}'.format(topk), metadata=metadata) + + table_meta = self.table_meta.get(table_name, None) + + if not table_meta: + status, info = self.router.connection( + metadata=metadata).describe_table(table_name) + if not status.OK(): + raise exceptions.TableNotFoundError(table_name, + metadata=metadata) + + self.table_meta[table_name] = info + table_meta = info + + start = time.time() + + query_record_array = [] + + for query_record in request.query_record_array: + query_record_array.append(list(query_record.vector_data)) + + query_range_array = [] + for query_range in request.query_range_array: + query_range_array.append( + Range(query_range.start_value, query_range.end_value)) + + status, results = self._do_query(context, + table_name, + table_meta, + query_record_array, + topk, + nprobe, + query_range_array, + metadata=metadata) + + now = time.time() + logger.info('SearchVector takes: {}'.format(now - start)) + + topk_result_list = milvus_pb2.TopKQueryResultList( + status=status_pb2.Status(error_code=status.error_code, + reason=status.reason), + topk_query_result=results) + return topk_result_list + + @mark_grpc_method + def SearchInFiles(self, request, context): + raise NotImplemented() + + def _describe_table(self, table_name, metadata=None): + return self.router.connection(metadata=metadata).describe_table(table_name) + + @mark_grpc_method + def DescribeTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return milvus_pb2.TableSchema(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), ) + + metadata = {'resp_class': milvus_pb2.TableSchema} + + logger.info('DescribeTable {}'.format(_table_name)) + _status, _table = self._describe_table(metadata=metadata, + table_name=_table_name) + + if _status.OK(): + return milvus_pb2.TableSchema( + table_name=_table_name, + index_file_size=_table.index_file_size, + dimension=_table.dimension, + metric_type=_table.metric_type, + status=status_pb2.Status(error_code=_status.code, + reason=_status.message), + ) + + return milvus_pb2.TableSchema( + table_name=_table_name, + status=status_pb2.Status(error_code=_status.code, + reason=_status.message), + ) + + def _count_table(self, table_name, metadata=None): + return self.router.connection( + metadata=metadata).get_table_row_count(table_name) + + @mark_grpc_method + def CountTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + status = status_pb2.Status(error_code=_status.code, + reason=_status.message) + + return milvus_pb2.TableRowCount(status=status) + + logger.info('CountTable {}'.format(_table_name)) + + metadata = {'resp_class': milvus_pb2.TableRowCount} + _status, _count = self._count_table(_table_name, metadata=metadata) + + return milvus_pb2.TableRowCount( + status=status_pb2.Status(error_code=_status.code, + reason=_status.message), + table_row_count=_count if isinstance(_count, int) else -1) + + def _get_server_version(self, metadata=None): + return self.router.connection(metadata=metadata).server_version() + + @mark_grpc_method + def Cmd(self, request, context): + _status, _cmd = Parser.parse_proto_Command(request) + logger.info('Cmd: {}'.format(_cmd)) + + if not _status.OK(): + return milvus_pb2.StringReply(status=status_pb2.Status( + error_code=_status.code, reason=_status.message)) + + metadata = {'resp_class': milvus_pb2.StringReply} + + if _cmd == 'version': + _status, _reply = self._get_server_version(metadata=metadata) + else: + _status, _reply = self.router.connection( + metadata=metadata).server_status() + + return milvus_pb2.StringReply(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + string_reply=_reply) + + def _show_tables(self, metadata=None): + return self.router.connection(metadata=metadata).show_tables() + + @mark_grpc_method + def ShowTables(self, request, context): + logger.info('ShowTables') + metadata = {'resp_class': milvus_pb2.TableName} + _status, _results = self._show_tables(metadata=metadata) + + return milvus_pb2.TableNameList(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + table_names=_results) + + def _delete_by_range(self, table_name, start_date, end_date): + return self.router.connection().delete_vectors_by_range(table_name, + start_date, + end_date) + + @mark_grpc_method + def DeleteByRange(self, request, context): + _status, unpacks = \ + Parser.parse_proto_DeleteByRangeParam(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + _table_name, _start_date, _end_date = unpacks + + logger.info('DeleteByRange {}: {} {}'.format(_table_name, _start_date, + _end_date)) + _status = self._delete_by_range(_table_name, _start_date, _end_date) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + def _preload_table(self, table_name): + return self.router.connection().preload_table(table_name) + + @mark_grpc_method + def PreloadTable(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + logger.info('PreloadTable {}'.format(_table_name)) + _status = self._preload_table(_table_name) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + def _describe_index(self, table_name, metadata=None): + return self.router.connection(metadata=metadata).describe_index(table_name) + + @mark_grpc_method + def DescribeIndex(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return milvus_pb2.IndexParam(status=status_pb2.Status( + error_code=_status.code, reason=_status.message)) + + metadata = {'resp_class': milvus_pb2.IndexParam} + + logger.info('DescribeIndex {}'.format(_table_name)) + _status, _index_param = self._describe_index(table_name=_table_name, + metadata=metadata) + + if not _index_param: + return milvus_pb2.IndexParam(status=status_pb2.Status( + error_code=_status.code, reason=_status.message)) + + _index = milvus_pb2.Index(index_type=_index_param._index_type, + nlist=_index_param._nlist) + + return milvus_pb2.IndexParam(status=status_pb2.Status( + error_code=_status.code, reason=_status.message), + table_name=_table_name, + index=_index) + + def _drop_index(self, table_name): + return self.router.connection().drop_index(table_name) + + @mark_grpc_method + def DropIndex(self, request, context): + _status, _table_name = Parser.parse_proto_TableName(request) + + if not _status.OK(): + return status_pb2.Status(error_code=_status.code, + reason=_status.message) + + logger.info('DropIndex {}'.format(_table_name)) + _status = self._drop_index(_table_name) + return status_pb2.Status(error_code=_status.code, + reason=_status.message) diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py new file mode 100644 index 0000000000..21a3bb7a65 --- /dev/null +++ b/shards/mishards/settings.py @@ -0,0 +1,94 @@ +import sys +import os + +from environs import Env +env = Env() + +FROM_EXAMPLE = env.bool('FROM_EXAMPLE', False) +if FROM_EXAMPLE: + from dotenv import load_dotenv + load_dotenv('./mishards/.env.example') +else: + env.read_env() + +DEBUG = env.bool('DEBUG', False) + +LOG_LEVEL = env.str('LOG_LEVEL', 'DEBUG' if DEBUG else 'INFO') +LOG_PATH = env.str('LOG_PATH', '/tmp/mishards') +LOG_NAME = env.str('LOG_NAME', 'logfile') +TIMEZONE = env.str('TIMEZONE', 'UTC') + +from utils.logger_helper import config +config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) + +TIMEOUT = env.int('TIMEOUT', 60) +MAX_RETRY = env.int('MAX_RETRY', 3) + +SERVER_PORT = env.int('SERVER_PORT', 19530) +SERVER_TEST_PORT = env.int('SERVER_TEST_PORT', 19530) +WOSERVER = env.str('WOSERVER') + +SD_PROVIDER_SETTINGS = None +SD_PROVIDER = env.str('SD_PROVIDER', 'Kubernetes') +if SD_PROVIDER == 'Kubernetes': + from sd.kubernetes_provider import KubernetesProviderSettings + SD_PROVIDER_SETTINGS = KubernetesProviderSettings( + namespace=env.str('SD_NAMESPACE', ''), + in_cluster=env.bool('SD_IN_CLUSTER', False), + poll_interval=env.int('SD_POLL_INTERVAL', 5), + pod_patt=env.str('SD_ROSERVER_POD_PATT', ''), + label_selector=env.str('SD_LABEL_SELECTOR', ''), + port=env.int('SD_PORT', 19530)) +elif SD_PROVIDER == 'Static': + from sd.static_provider import StaticProviderSettings + SD_PROVIDER_SETTINGS = StaticProviderSettings( + hosts=env.list('SD_STATIC_HOSTS', []), + port=env.int('SD_STATIC_PORT', 19530)) + +# TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') + + +class TracingConfig: + TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') + TRACING_VALIDATE = env.bool('TRACING_VALIDATE', True) + TRACING_LOG_PAYLOAD = env.bool('TRACING_LOG_PAYLOAD', False) + TRACING_CONFIG = { + 'sampler': { + 'type': env.str('TRACING_SAMPLER_TYPE', 'const'), + 'param': env.str('TRACING_SAMPLER_PARAM', "1"), + }, + 'local_agent': { + 'reporting_host': env.str('TRACING_REPORTING_HOST', '127.0.0.1'), + 'reporting_port': env.str('TRACING_REPORTING_PORT', '5775') + }, + 'logging': env.bool('TRACING_LOGGING', True) + } + DEFAULT_TRACING_CONFIG = { + 'sampler': { + 'type': env.str('TRACING_SAMPLER_TYPE', 'const'), + 'param': env.str('TRACING_SAMPLER_PARAM', "0"), + } + } + + +class DefaultConfig: + SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') + SQL_ECHO = env.bool('SQL_ECHO', False) + TRACING_TYPE = env.str('TRACING_TYPE', '') + ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_NAME', 'FileBasedHashRingRouter') + + +class TestingConfig(DefaultConfig): + SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI', '') + SQL_ECHO = env.bool('SQL_TEST_ECHO', False) + TRACING_TYPE = env.str('TRACING_TEST_TYPE', '') + ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_TEST_NAME', 'FileBasedHashRingRouter') + + +if __name__ == '__main__': + import logging + logger = logging.getLogger(__name__) + logger.debug('DEBUG') + logger.info('INFO') + logger.warn('WARN') + logger.error('ERROR') diff --git a/shards/mishards/test_connections.py b/shards/mishards/test_connections.py new file mode 100644 index 0000000000..819d2e03da --- /dev/null +++ b/shards/mishards/test_connections.py @@ -0,0 +1,101 @@ +import logging +import pytest +import mock + +from milvus import Milvus +from mishards.connections import (ConnectionMgr, Connection) +from mishards import exceptions + +logger = logging.getLogger(__name__) + + +@pytest.mark.usefixtures('app') +class TestConnection: + def test_manager(self): + mgr = ConnectionMgr() + + mgr.register('pod1', '111') + mgr.register('pod2', '222') + mgr.register('pod2', '222') + mgr.register('pod2', '2222') + assert len(mgr.conn_names) == 2 + + mgr.unregister('pod1') + assert len(mgr.conn_names) == 1 + + mgr.unregister('pod2') + assert len(mgr.conn_names) == 0 + + mgr.register('WOSERVER', 'xxxx') + assert len(mgr.conn_names) == 0 + + assert not mgr.conn('XXXX', None) + with pytest.raises(exceptions.ConnectionNotFoundError): + mgr.conn('XXXX', None, True) + + mgr.conn('WOSERVER', None) + + def test_connection(self): + class Conn: + def __init__(self, state): + self.state = state + + def connect(self, uri): + return self.state + + def connected(self): + return self.state + + FAIL_CONN = Conn(False) + PASS_CONN = Conn(True) + + class Retry: + def __init__(self): + self.times = 0 + + def __call__(self, conn): + self.times += 1 + logger.info('Retrying {}'.format(self.times)) + + class Func(): + def __init__(self): + self.executed = False + + def __call__(self): + self.executed = True + + max_retry = 3 + + RetryObj = Retry() + + c = Connection('client', + uri='xx', + max_retry=max_retry, + on_retry_func=RetryObj) + c.conn = FAIL_CONN + ff = Func() + this_connect = c.connect(func=ff) + with pytest.raises(exceptions.ConnectionConnectError): + this_connect() + assert RetryObj.times == max_retry + assert not ff.executed + RetryObj = Retry() + + c.conn = PASS_CONN + this_connect = c.connect(func=ff) + this_connect() + assert ff.executed + assert RetryObj.times == 0 + + this_connect = c.connect(func=None) + with pytest.raises(TypeError): + this_connect() + + errors = [] + + def error_handler(err): + errors.append(err) + + this_connect = c.connect(func=None, exception_handler=error_handler) + this_connect() + assert len(errors) == 1 diff --git a/shards/mishards/test_models.py b/shards/mishards/test_models.py new file mode 100644 index 0000000000..d60b62713e --- /dev/null +++ b/shards/mishards/test_models.py @@ -0,0 +1,39 @@ +import logging +import pytest +from mishards.factories import TableFiles, Tables, TableFilesFactory, TablesFactory +from mishards import db, create_app, settings +from mishards.factories import ( + Tables, TableFiles, + TablesFactory, TableFilesFactory +) + +logger = logging.getLogger(__name__) + + +@pytest.mark.usefixtures('app') +class TestModels: + def test_files_to_search(self): + table = TablesFactory() + new_files_cnt = 5 + to_index_cnt = 10 + raw_cnt = 20 + backup_cnt = 12 + to_delete_cnt = 9 + index_cnt = 8 + new_index_cnt = 6 + new_merge_cnt = 11 + + new_files = TableFilesFactory.create_batch(new_files_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW, date=110) + to_index_files = TableFilesFactory.create_batch(to_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_INDEX, date=110) + raw_files = TableFilesFactory.create_batch(raw_cnt, table=table, file_type=TableFiles.FILE_TYPE_RAW, date=120) + backup_files = TableFilesFactory.create_batch(backup_cnt, table=table, file_type=TableFiles.FILE_TYPE_BACKUP, date=110) + index_files = TableFilesFactory.create_batch(index_cnt, table=table, file_type=TableFiles.FILE_TYPE_INDEX, date=110) + new_index_files = TableFilesFactory.create_batch(new_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW_INDEX, date=110) + new_merge_files = TableFilesFactory.create_batch(new_merge_cnt, table=table, file_type=TableFiles.FILE_TYPE_NEW_MERGE, date=110) + to_delete_files = TableFilesFactory.create_batch(to_delete_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_DELETE, date=110) + assert table.files_to_search().count() == raw_cnt + index_cnt + to_index_cnt + + assert table.files_to_search([(100, 115)]).count() == index_cnt + to_index_cnt + assert table.files_to_search([(111, 120)]).count() == 0 + assert table.files_to_search([(111, 121)]).count() == raw_cnt + assert table.files_to_search([(110, 121)]).count() == raw_cnt + index_cnt + to_index_cnt diff --git a/shards/mishards/test_server.py b/shards/mishards/test_server.py new file mode 100644 index 0000000000..efd3912076 --- /dev/null +++ b/shards/mishards/test_server.py @@ -0,0 +1,279 @@ +import logging +import pytest +import mock +import datetime +import random +import faker +import inspect +from milvus import Milvus +from milvus.client.types import Status, IndexType, MetricType +from milvus.client.abstract import IndexParam, TableSchema +from milvus.grpc_gen import status_pb2, milvus_pb2 +from mishards import db, create_app, settings +from mishards.service_handler import ServiceHandler +from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser +from mishards.factories import TableFilesFactory, TablesFactory, TableFiles, Tables +from mishards.routings import RouterMixin + +logger = logging.getLogger(__name__) + +OK = Status(code=Status.SUCCESS, message='Success') +BAD = Status(code=Status.PERMISSION_DENIED, message='Fail') + + +@pytest.mark.usefixtures('started_app') +class TestServer: + @property + def client(self): + m = Milvus() + m.connect(host='localhost', port=settings.SERVER_TEST_PORT) + return m + + def test_server_start(self, started_app): + assert started_app.conn_mgr.metas.get('WOSERVER') == settings.WOSERVER + + def test_cmd(self, started_app): + ServiceHandler._get_server_version = mock.MagicMock(return_value=(OK, + '')) + status, _ = self.client.server_version() + assert status.OK() + + Parser.parse_proto_Command = mock.MagicMock(return_value=(BAD, 'cmd')) + status, _ = self.client.server_version() + assert not status.OK() + + def test_drop_index(self, started_app): + table_name = inspect.currentframe().f_code.co_name + ServiceHandler._drop_index = mock.MagicMock(return_value=OK) + status = self.client.drop_index(table_name) + assert status.OK() + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status = self.client.drop_index(table_name) + assert not status.OK() + + def test_describe_index(self, started_app): + table_name = inspect.currentframe().f_code.co_name + index_type = IndexType.FLAT + nlist = 1 + index_param = IndexParam(table_name=table_name, + index_type=index_type, + nlist=nlist) + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._describe_index = mock.MagicMock( + return_value=(OK, index_param)) + status, ret = self.client.describe_index(table_name) + assert status.OK() + assert ret._table_name == index_param._table_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status, _ = self.client.describe_index(table_name) + assert not status.OK() + + def test_preload(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._preload_table = mock.MagicMock(return_value=OK) + status = self.client.preload_table(table_name) + assert status.OK() + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status = self.client.preload_table(table_name) + assert not status.OK() + + @pytest.mark.skip + def test_delete_by_range(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + unpacked = table_name, datetime.datetime.today( + ), datetime.datetime.today() + + Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( + return_value=(OK, unpacked)) + ServiceHandler._delete_by_range = mock.MagicMock(return_value=OK) + status = self.client.delete_vectors_by_range( + *unpacked) + assert status.OK() + + Parser.parse_proto_DeleteByRangeParam = mock.MagicMock( + return_value=(BAD, unpacked)) + status = self.client.delete_vectors_by_range( + *unpacked) + assert not status.OK() + + def test_count_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + count = random.randint(100, 200) + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._count_table = mock.MagicMock(return_value=(OK, count)) + status, ret = self.client.get_table_row_count(table_name) + assert status.OK() + assert ret == count + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status, _ = self.client.get_table_row_count(table_name) + assert not status.OK() + + def test_show_tables(self, started_app): + tables = ['t1', 't2'] + ServiceHandler._show_tables = mock.MagicMock(return_value=(OK, tables)) + status, ret = self.client.show_tables() + assert status.OK() + assert ret == tables + + def test_describe_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + dimension = 128 + nlist = 1 + table_schema = TableSchema(table_name=table_name, + index_file_size=100, + metric_type=MetricType.L2, + dimension=dimension) + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_schema.table_name)) + ServiceHandler._describe_table = mock.MagicMock( + return_value=(OK, table_schema)) + status, _ = self.client.describe_table(table_name) + assert status.OK() + + ServiceHandler._describe_table = mock.MagicMock( + return_value=(BAD, table_schema)) + status, _ = self.client.describe_table(table_name) + assert not status.OK() + + Parser.parse_proto_TableName = mock.MagicMock(return_value=(BAD, + 'cmd')) + status, ret = self.client.describe_table(table_name) + assert not status.OK() + + def test_insert(self, started_app): + table_name = inspect.currentframe().f_code.co_name + vectors = [[random.random() for _ in range(16)] for _ in range(10)] + ids = [random.randint(1000000, 20000000) for _ in range(10)] + ServiceHandler._add_vectors = mock.MagicMock(return_value=(OK, ids)) + status, ret = self.client.add_vectors( + table_name=table_name, records=vectors) + assert status.OK() + assert ids == ret + + def test_create_index(self, started_app): + table_name = inspect.currentframe().f_code.co_name + unpacks = table_name, None + Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(OK, + unpacks)) + ServiceHandler._create_index = mock.MagicMock(return_value=OK) + status = self.client.create_index(table_name=table_name) + assert status.OK() + + Parser.parse_proto_IndexParam = mock.MagicMock(return_value=(BAD, + None)) + status = self.client.create_index(table_name=table_name) + assert not status.OK() + + def test_drop_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._delete_table = mock.MagicMock(return_value=OK) + status = self.client.delete_table(table_name=table_name) + assert status.OK() + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status = self.client.delete_table(table_name=table_name) + assert not status.OK() + + def test_has_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(OK, table_name)) + ServiceHandler._has_table = mock.MagicMock(return_value=(OK, True)) + has = self.client.has_table(table_name=table_name) + assert has + + Parser.parse_proto_TableName = mock.MagicMock( + return_value=(BAD, table_name)) + status, has = self.client.has_table(table_name=table_name) + assert not status.OK() + assert not has + + def test_create_table(self, started_app): + table_name = inspect.currentframe().f_code.co_name + dimension = 128 + table_schema = dict(table_name=table_name, + index_file_size=100, + metric_type=MetricType.L2, + dimension=dimension) + + ServiceHandler._create_table = mock.MagicMock(return_value=OK) + status = self.client.create_table(table_schema) + assert status.OK() + + Parser.parse_proto_TableSchema = mock.MagicMock(return_value=(BAD, + None)) + status = self.client.create_table(table_schema) + assert not status.OK() + + def random_data(self, n, dimension): + return [[random.random() for _ in range(dimension)] for _ in range(n)] + + def test_search(self, started_app): + table_name = inspect.currentframe().f_code.co_name + to_index_cnt = random.randint(10, 20) + table = TablesFactory(table_id=table_name, state=Tables.NORMAL) + to_index_files = TableFilesFactory.create_batch( + to_index_cnt, table=table, file_type=TableFiles.FILE_TYPE_TO_INDEX) + topk = random.randint(5, 10) + nq = random.randint(5, 10) + param = { + 'table_name': table_name, + 'query_records': self.random_data(nq, table.dimension), + 'top_k': topk, + 'nprobe': 2049 + } + + result = [ + milvus_pb2.TopKQueryResult(query_result_arrays=[ + milvus_pb2.QueryResult(id=i, distance=random.random()) + for i in range(topk) + ]) for i in range(nq) + ] + + mock_results = milvus_pb2.TopKQueryResultList(status=status_pb2.Status( + error_code=status_pb2.SUCCESS, reason="Success"), + topk_query_result=result) + + table_schema = TableSchema(table_name=table_name, + index_file_size=table.index_file_size, + metric_type=table.metric_type, + dimension=table.dimension) + + status, _ = self.client.search_vectors(**param) + assert status.code == Status.ILLEGAL_ARGUMENT + + param['nprobe'] = 2048 + RouterMixin.connection = mock.MagicMock(return_value=Milvus()) + RouterMixin.query_conn = mock.MagicMock(return_value=Milvus()) + Milvus.describe_table = mock.MagicMock(return_value=(BAD, + table_schema)) + status, ret = self.client.search_vectors(**param) + assert status.code == Status.TABLE_NOT_EXISTS + + Milvus.describe_table = mock.MagicMock(return_value=(OK, table_schema)) + Milvus.search_vectors_in_files = mock.MagicMock( + return_value=mock_results) + + status, ret = self.client.search_vectors(**param) + assert status.OK() + assert len(ret) == nq diff --git a/shards/mishards/utilities.py b/shards/mishards/utilities.py new file mode 100644 index 0000000000..42e982b5f1 --- /dev/null +++ b/shards/mishards/utilities.py @@ -0,0 +1,20 @@ +import datetime +from mishards import exceptions + + +def format_date(start, end): + return ((start.year - 1900) * 10000 + (start.month - 1) * 100 + start.day, + (end.year - 1900) * 10000 + (end.month - 1) * 100 + end.day) + + +def range_to_date(range_obj, metadata=None): + try: + start = datetime.datetime.strptime(range_obj.start_date, '%Y-%m-%d') + end = datetime.datetime.strptime(range_obj.end_date, '%Y-%m-%d') + assert start < end + except (ValueError, AssertionError): + raise exceptions.InvalidRangeError('Invalid time range: {} {}'.format( + range_obj.start_date, range_obj.end_date), + metadata=metadata) + + return format_date(start, end) diff --git a/shards/requirements.txt b/shards/requirements.txt new file mode 100644 index 0000000000..ae224e92ed --- /dev/null +++ b/shards/requirements.txt @@ -0,0 +1,36 @@ +environs==4.2.0 +factory-boy==2.12.0 +Faker==1.0.7 +fire==0.1.3 +google-auth==1.6.3 +grpcio==1.22.0 +grpcio-tools==1.22.0 +kubernetes==10.0.1 +MarkupSafe==1.1.1 +marshmallow==2.19.5 +pymysql==0.9.3 +protobuf==3.9.1 +py==1.8.0 +pyasn1==0.4.7 +pyasn1-modules==0.2.6 +pylint==2.3.1 +pymilvus-test==0.2.28 +#pymilvus==0.2.0 +pyparsing==2.4.0 +pytest==4.6.3 +pytest-level==0.1.1 +pytest-print==0.1.2 +pytest-repeat==0.8.0 +pytest-timeout==1.3.3 +python-dateutil==2.8.0 +python-dotenv==0.10.3 +pytz==2019.1 +requests==2.22.0 +requests-oauthlib==1.2.0 +rsa==4.0 +six==1.12.0 +SQLAlchemy==1.3.5 +urllib3==1.25.3 +jaeger-client>=3.4.0 +grpcio-opentracing>=1.0 +mock==2.0.0 diff --git a/shards/sd/__init__.py b/shards/sd/__init__.py new file mode 100644 index 0000000000..7943887d0f --- /dev/null +++ b/shards/sd/__init__.py @@ -0,0 +1,28 @@ +import logging +import inspect +# from utils import singleton + +logger = logging.getLogger(__name__) + + +class ProviderManager: + PROVIDERS = {} + + @classmethod + def register_service_provider(cls, target): + if inspect.isfunction(target): + cls.PROVIDERS[target.__name__] = target + elif inspect.isclass(target): + name = target.__dict__.get('NAME', None) + name = name if name else target.__class__.__name__ + cls.PROVIDERS[name] = target + else: + assert False, 'Cannot register_service_provider for: {}'.format(target) + return target + + @classmethod + def get_provider(cls, name): + return cls.PROVIDERS.get(name, None) + + +from sd import kubernetes_provider, static_provider diff --git a/shards/sd/kubernetes_provider.py b/shards/sd/kubernetes_provider.py new file mode 100644 index 0000000000..eb113db007 --- /dev/null +++ b/shards/sd/kubernetes_provider.py @@ -0,0 +1,331 @@ +import os +import sys +if __name__ == '__main__': + sys.path.append(os.path.dirname(os.path.dirname( + os.path.abspath(__file__)))) + +import re +import logging +import time +import copy +import threading +import queue +import enum +from kubernetes import client, config, watch + +from utils import singleton +from sd import ProviderManager + +logger = logging.getLogger(__name__) + +INCLUSTER_NAMESPACE_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' + + +class EventType(enum.Enum): + PodHeartBeat = 1 + Watch = 2 + + +class K8SMixin: + def __init__(self, namespace, in_cluster=False, **kwargs): + self.namespace = namespace + self.in_cluster = in_cluster + self.kwargs = kwargs + self.v1 = kwargs.get('v1', None) + if not self.namespace: + self.namespace = open(INCLUSTER_NAMESPACE_PATH).read() + + if not self.v1: + config.load_incluster_config( + ) if self.in_cluster else config.load_kube_config() + self.v1 = client.CoreV1Api() + + +class K8SHeartbeatHandler(threading.Thread, K8SMixin): + def __init__(self, + message_queue, + namespace, + label_selector, + in_cluster=False, + **kwargs): + K8SMixin.__init__(self, + namespace=namespace, + in_cluster=in_cluster, + **kwargs) + threading.Thread.__init__(self) + self.queue = message_queue + self.terminate = False + self.label_selector = label_selector + self.poll_interval = kwargs.get('poll_interval', 5) + + def run(self): + while not self.terminate: + try: + pods = self.v1.list_namespaced_pod( + namespace=self.namespace, + label_selector=self.label_selector) + event_message = {'eType': EventType.PodHeartBeat, 'events': []} + for item in pods.items: + pod = self.v1.read_namespaced_pod(name=item.metadata.name, + namespace=self.namespace) + name = pod.metadata.name + ip = pod.status.pod_ip + phase = pod.status.phase + reason = pod.status.reason + message = pod.status.message + ready = True if phase == 'Running' else False + + pod_event = dict(pod=name, + ip=ip, + ready=ready, + reason=reason, + message=message) + + event_message['events'].append(pod_event) + + self.queue.put(event_message) + + except Exception as exc: + logger.error(exc) + + time.sleep(self.poll_interval) + + def stop(self): + self.terminate = True + + +class K8SEventListener(threading.Thread, K8SMixin): + def __init__(self, message_queue, namespace, in_cluster=False, **kwargs): + K8SMixin.__init__(self, + namespace=namespace, + in_cluster=in_cluster, + **kwargs) + threading.Thread.__init__(self) + self.queue = message_queue + self.terminate = False + self.at_start_up = True + self._stop_event = threading.Event() + + def stop(self): + self.terminate = True + self._stop_event.set() + + def run(self): + resource_version = '' + w = watch.Watch() + for event in w.stream(self.v1.list_namespaced_event, + namespace=self.namespace, + field_selector='involvedObject.kind=Pod'): + if self.terminate: + break + + resource_version = int(event['object'].metadata.resource_version) + + info = dict( + eType=EventType.Watch, + pod=event['object'].involved_object.name, + reason=event['object'].reason, + message=event['object'].message, + start_up=self.at_start_up, + ) + self.at_start_up = False + # logger.info('Received event: {}'.format(info)) + self.queue.put(info) + + +class EventHandler(threading.Thread): + def __init__(self, mgr, message_queue, namespace, pod_patt, **kwargs): + threading.Thread.__init__(self) + self.mgr = mgr + self.queue = message_queue + self.kwargs = kwargs + self.terminate = False + self.pod_patt = re.compile(pod_patt) + self.namespace = namespace + + def stop(self): + self.terminate = True + + def on_drop(self, event, **kwargs): + pass + + def on_pod_started(self, event, **kwargs): + try_cnt = 3 + pod = None + while try_cnt > 0: + try_cnt -= 1 + try: + pod = self.mgr.v1.read_namespaced_pod(name=event['pod'], + namespace=self.namespace) + if not pod.status.pod_ip: + time.sleep(0.5) + continue + break + except client.rest.ApiException as exc: + time.sleep(0.5) + + if try_cnt <= 0 and not pod: + if not event['start_up']: + logger.error('Pod {} is started but cannot read pod'.format( + event['pod'])) + return + elif try_cnt <= 0 and not pod.status.pod_ip: + logger.warning('NoPodIPFoundError') + return + + logger.info('Register POD {} with IP {}'.format( + pod.metadata.name, pod.status.pod_ip)) + self.mgr.add_pod(name=pod.metadata.name, ip=pod.status.pod_ip) + + def on_pod_killing(self, event, **kwargs): + logger.info('Unregister POD {}'.format(event['pod'])) + self.mgr.delete_pod(name=event['pod']) + + def on_pod_heartbeat(self, event, **kwargs): + names = self.mgr.conn_mgr.conn_names + + running_names = set() + for each_event in event['events']: + if each_event['ready']: + self.mgr.add_pod(name=each_event['pod'], ip=each_event['ip']) + running_names.add(each_event['pod']) + else: + self.mgr.delete_pod(name=each_event['pod']) + + to_delete = names - running_names + for name in to_delete: + self.mgr.delete_pod(name) + + logger.info(self.mgr.conn_mgr.conn_names) + + def handle_event(self, event): + if event['eType'] == EventType.PodHeartBeat: + return self.on_pod_heartbeat(event) + + if not event or (event['reason'] not in ('Started', 'Killing')): + return self.on_drop(event) + + if not re.match(self.pod_patt, event['pod']): + return self.on_drop(event) + + logger.info('Handling event: {}'.format(event)) + + if event['reason'] == 'Started': + return self.on_pod_started(event) + + return self.on_pod_killing(event) + + def run(self): + while not self.terminate: + try: + event = self.queue.get(timeout=1) + self.handle_event(event) + except queue.Empty: + continue + + +class KubernetesProviderSettings: + def __init__(self, namespace, pod_patt, label_selector, in_cluster, + poll_interval, port=None, **kwargs): + self.namespace = namespace + self.pod_patt = pod_patt + self.label_selector = label_selector + self.in_cluster = in_cluster + self.poll_interval = poll_interval + self.port = int(port) if port else 19530 + + +@singleton +@ProviderManager.register_service_provider +class KubernetesProvider(object): + NAME = 'Kubernetes' + + def __init__(self, settings, conn_mgr, **kwargs): + self.namespace = settings.namespace + self.pod_patt = settings.pod_patt + self.label_selector = settings.label_selector + self.in_cluster = settings.in_cluster + self.poll_interval = settings.poll_interval + self.port = settings.port + self.kwargs = kwargs + self.queue = queue.Queue() + + self.conn_mgr = conn_mgr + + if not self.namespace: + self.namespace = open(incluster_namespace_path).read() + + config.load_incluster_config( + ) if self.in_cluster else config.load_kube_config() + self.v1 = client.CoreV1Api() + + self.listener = K8SEventListener(message_queue=self.queue, + namespace=self.namespace, + in_cluster=self.in_cluster, + v1=self.v1, + **kwargs) + + self.pod_heartbeater = K8SHeartbeatHandler( + message_queue=self.queue, + namespace=self.namespace, + label_selector=self.label_selector, + in_cluster=self.in_cluster, + v1=self.v1, + poll_interval=self.poll_interval, + **kwargs) + + self.event_handler = EventHandler(mgr=self, + message_queue=self.queue, + namespace=self.namespace, + pod_patt=self.pod_patt, + **kwargs) + + def add_pod(self, name, ip): + self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) + + def delete_pod(self, name): + self.conn_mgr.unregister(name) + + def start(self): + self.listener.daemon = True + self.listener.start() + self.event_handler.start() + + self.pod_heartbeater.start() + + def stop(self): + self.listener.stop() + self.pod_heartbeater.stop() + self.event_handler.stop() + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + + class Connect: + def register(self, name, value): + logger.error('Register: {} - {}'.format(name, value)) + + def unregister(self, name): + logger.error('Unregister: {}'.format(name)) + + @property + def conn_names(self): + return set() + + connect_mgr = Connect() + + settings = KubernetesProviderSettings(namespace='xp', + pod_patt=".*-ro-servers-.*", + label_selector='tier=ro-servers', + poll_interval=5, + in_cluster=False) + + provider_class = ProviderManager.get_provider('Kubernetes') + t = provider_class(conn_mgr=connect_mgr, settings=settings) + t.start() + cnt = 100 + while cnt > 0: + time.sleep(2) + cnt -= 1 + t.stop() diff --git a/shards/sd/static_provider.py b/shards/sd/static_provider.py new file mode 100644 index 0000000000..e88780740f --- /dev/null +++ b/shards/sd/static_provider.py @@ -0,0 +1,39 @@ +import os +import sys +if __name__ == '__main__': + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import socket +from utils import singleton +from sd import ProviderManager + + +class StaticProviderSettings: + def __init__(self, hosts, port=None): + self.hosts = hosts + self.port = int(port) if port else 19530 + + +@singleton +@ProviderManager.register_service_provider +class KubernetesProvider(object): + NAME = 'Static' + + def __init__(self, settings, conn_mgr, **kwargs): + self.conn_mgr = conn_mgr + self.hosts = [socket.gethostbyname(host) for host in settings.hosts] + self.port = settings.port + + def start(self): + for host in self.hosts: + self.add_pod(host, host) + + def stop(self): + for host in self.hosts: + self.delete_pod(host) + + def add_pod(self, name, ip): + self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) + + def delete_pod(self, name): + self.conn_mgr.unregister(name) diff --git a/shards/setup.cfg b/shards/setup.cfg new file mode 100644 index 0000000000..4a88432914 --- /dev/null +++ b/shards/setup.cfg @@ -0,0 +1,4 @@ +[tool:pytest] +testpaths = mishards +log_cli=true +log_cli_level=info diff --git a/shards/start_services.yml b/shards/start_services.yml new file mode 100644 index 0000000000..57fe061bb7 --- /dev/null +++ b/shards/start_services.yml @@ -0,0 +1,45 @@ +version: "2.3" +services: + milvus: + runtime: nvidia + restart: always + image: registry.zilliz.com/milvus/engine:branch-0.5.0-release-4316de + # ports: + # - "0.0.0.0:19530:19530" + volumes: + - /tmp/milvus/db:/opt/milvus/db + + jaeger: + restart: always + image: jaegertracing/all-in-one:1.14 + ports: + - "0.0.0.0:5775:5775/udp" + - "0.0.0.0:16686:16686" + - "0.0.0.0:9441:9441" + environment: + COLLECTOR_ZIPKIN_HTTP_PORT: 9411 + + mishards: + restart: always + image: registry.zilliz.com/milvus/mishards:v0.0.4 + ports: + - "0.0.0.0:19530:19531" + - "0.0.0.0:19532:19532" + volumes: + - /tmp/milvus/db:/tmp/milvus/db + # - /tmp/mishards_env:/source/mishards/.env + command: ["python", "mishards/main.py"] + environment: + FROM_EXAMPLE: 'true' + DEBUG: 'true' + SERVER_PORT: 19531 + WOSERVER: tcp://milvus:19530 + SD_STATIC_HOSTS: milvus + TRACING_TYPE: jaeger + TRACING_SERVICE_NAME: mishards-demo + TRACING_REPORTING_HOST: jaeger + TRACING_REPORTING_PORT: 5775 + + depends_on: + - milvus + - jaeger diff --git a/shards/tracing/__init__.py b/shards/tracing/__init__.py new file mode 100644 index 0000000000..64a5b50d15 --- /dev/null +++ b/shards/tracing/__init__.py @@ -0,0 +1,43 @@ +from contextlib import contextmanager + + +def empty_server_interceptor_decorator(target_server, interceptor): + return target_server + + +@contextmanager +def EmptySpan(*args, **kwargs): + yield None + return + + +class Tracer: + def __init__(self, + tracer=None, + interceptor=None, + server_decorator=empty_server_interceptor_decorator): + self.tracer = tracer + self.interceptor = interceptor + self.server_decorator = server_decorator + + def decorate(self, server): + return self.server_decorator(server, self.interceptor) + + @property + def empty(self): + return self.tracer is None + + def close(self): + self.tracer and self.tracer.close() + + def start_span(self, + operation_name=None, + child_of=None, + references=None, + tags=None, + start_time=None, + ignore_active_span=False): + if self.empty: + return EmptySpan() + return self.tracer.start_span(operation_name, child_of, references, + tags, start_time, ignore_active_span) diff --git a/shards/tracing/factory.py b/shards/tracing/factory.py new file mode 100644 index 0000000000..14fcde2eb3 --- /dev/null +++ b/shards/tracing/factory.py @@ -0,0 +1,40 @@ +import logging +from jaeger_client import Config +from grpc_opentracing.grpcext import intercept_server +from grpc_opentracing import open_tracing_server_interceptor + +from tracing import (Tracer, empty_server_interceptor_decorator) + +logger = logging.getLogger(__name__) + + +class TracerFactory: + @classmethod + def new_tracer(cls, + tracer_type, + tracer_config, + span_decorator=None, + **kwargs): + if not tracer_type: + return Tracer() + config = tracer_config.TRACING_CONFIG + service_name = tracer_config.TRACING_SERVICE_NAME + validate = tracer_config.TRACING_VALIDATE + # if not tracer_type: + # tracer_type = 'jaeger' + # config = tracer_config.DEFAULT_TRACING_CONFIG + + if tracer_type.lower() == 'jaeger': + config = Config(config=config, + service_name=service_name, + validate=validate) + + tracer = config.initialize_tracer() + tracer_interceptor = open_tracing_server_interceptor( + tracer, + log_payloads=tracer_config.TRACING_LOG_PAYLOAD, + span_decorator=span_decorator) + + return Tracer(tracer, tracer_interceptor, intercept_server) + + assert False, 'Unsupported tracer type: {}'.format(tracer_type) diff --git a/shards/utils/__init__.py b/shards/utils/__init__.py new file mode 100644 index 0000000000..c1d55e76c0 --- /dev/null +++ b/shards/utils/__init__.py @@ -0,0 +1,11 @@ +from functools import wraps + + +def singleton(cls): + instances = {} + @wraps(cls) + def getinstance(*args, **kw): + if cls not in instances: + instances[cls] = cls(*args, **kw) + return instances[cls] + return getinstance diff --git a/shards/utils/logger_helper.py b/shards/utils/logger_helper.py new file mode 100644 index 0000000000..b4e3b9c5b6 --- /dev/null +++ b/shards/utils/logger_helper.py @@ -0,0 +1,152 @@ +import os +import datetime +from pytz import timezone +from logging import Filter +import logging.config + + +class InfoFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.INFO + + +class DebugFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.DEBUG + + +class WarnFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.WARN + + +class ErrorFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.ERROR + + +class CriticalFilter(logging.Filter): + def filter(self, rec): + return rec.levelno == logging.CRITICAL + + +COLORS = { + 'HEADER': '\033[95m', + 'INFO': '\033[92m', + 'DEBUG': '\033[94m', + 'WARNING': '\033[93m', + 'ERROR': '\033[95m', + 'CRITICAL': '\033[91m', + 'ENDC': '\033[0m', +} + + +class ColorFulFormatColMixin: + def format_col(self, message_str, level_name): + if level_name in COLORS.keys(): + message_str = COLORS.get(level_name) + message_str + COLORS.get( + 'ENDC') + return message_str + + +class ColorfulFormatter(logging.Formatter, ColorFulFormatColMixin): + def format(self, record): + message_str = super(ColorfulFormatter, self).format(record) + + return self.format_col(message_str, level_name=record.levelname) + + +def config(log_level, log_path, name, tz='UTC'): + def build_log_file(level, log_path, name, tz): + utc_now = datetime.datetime.utcnow() + utc_tz = timezone('UTC') + local_tz = timezone(tz) + tznow = utc_now.replace(tzinfo=utc_tz).astimezone(local_tz) + return '{}-{}-{}.log'.format(os.path.join(log_path, name), tznow.strftime("%m-%d-%Y-%H:%M:%S"), + level) + + if not os.path.exists(log_path): + os.makedirs(log_path) + + LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'default': { + 'format': '%(asctime)s | %(levelname)s | %(name)s | %(threadName)s: %(message)s (%(filename)s:%(lineno)s)', + }, + 'colorful_console': { + 'format': '%(asctime)s | %(levelname)s | %(name)s | %(threadName)s: %(message)s (%(filename)s:%(lineno)s)', + '()': ColorfulFormatter, + }, + }, + 'filters': { + 'InfoFilter': { + '()': InfoFilter, + }, + 'DebugFilter': { + '()': DebugFilter, + }, + 'WarnFilter': { + '()': WarnFilter, + }, + 'ErrorFilter': { + '()': ErrorFilter, + }, + 'CriticalFilter': { + '()': CriticalFilter, + }, + }, + 'handlers': { + 'milvus_celery_console': { + 'class': 'logging.StreamHandler', + 'formatter': 'colorful_console', + }, + 'milvus_debug_file': { + 'level': 'DEBUG', + 'filters': ['DebugFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('debug', log_path, name, tz) + }, + 'milvus_info_file': { + 'level': 'INFO', + 'filters': ['InfoFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('info', log_path, name, tz) + }, + 'milvus_warn_file': { + 'level': 'WARN', + 'filters': ['WarnFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('warn', log_path, name, tz) + }, + 'milvus_error_file': { + 'level': 'ERROR', + 'filters': ['ErrorFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('error', log_path, name, tz) + }, + 'milvus_critical_file': { + 'level': 'CRITICAL', + 'filters': ['CriticalFilter'], + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'default', + 'filename': build_log_file('critical', log_path, name, tz) + }, + }, + 'loggers': { + '': { + 'handlers': ['milvus_celery_console', 'milvus_info_file', 'milvus_debug_file', 'milvus_warn_file', + 'milvus_error_file', 'milvus_critical_file'], + 'level': log_level, + 'propagate': False + }, + }, + 'propagate': False, + } + + logging.config.dictConfig(LOGGING) From f89e5cfc4e912600fcfc3209cc4b6c08602e6193 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 10:20:37 +0800 Subject: [PATCH 092/126] update manager --- shards/manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/shards/manager.py b/shards/manager.py index 931c90ebc8..d9d303d2df 100644 --- a/shards/manager.py +++ b/shards/manager.py @@ -1,6 +1,6 @@ import fire -from mishards import db from sqlalchemy import and_ +from mishards import db, settings class DBHandler: @@ -25,4 +25,5 @@ class DBHandler: if __name__ == '__main__': + db.init_db(settings.DefaultConfig.SQLALCHEMY_DATABASE_URI) fire.Fire(DBHandler) From de25bd08a859749b4b6295a48f3d027982f8b241 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 14:32:08 +0800 Subject: [PATCH 093/126] update manager --- shards/manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shards/manager.py b/shards/manager.py index d9d303d2df..666ddd377e 100644 --- a/shards/manager.py +++ b/shards/manager.py @@ -26,4 +26,5 @@ class DBHandler: if __name__ == '__main__': db.init_db(settings.DefaultConfig.SQLALCHEMY_DATABASE_URI) + from mishards import models fire.Fire(DBHandler) From 0a172fc8f6a98d9570c2f1a8cfafc1687bf6ea58 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 14:32:23 +0800 Subject: [PATCH 094/126] fix conftest --- shards/conftest.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/shards/conftest.py b/shards/conftest.py index 34e22af693..24ec19da1d 100644 --- a/shards/conftest.py +++ b/shards/conftest.py @@ -1,10 +1,19 @@ +import os import logging import pytest import grpc +import tempfile +import shutil from mishards import settings, db, create_app logger = logging.getLogger(__name__) +tpath = tempfile.mkdtemp() +dirpath = '{}/db'.format(tpath) +filepath = '{}/meta.sqlite'.format(dirpath) +os.makedirs(dirpath, 0o777) +settings.TestingConfig.SQLALCHEMY_DATABASE_URI = 'sqlite:///{}?check_same_thread=False'.format( + filepath) @pytest.fixture def app(request): @@ -15,6 +24,7 @@ def app(request): yield app db.drop_all() + # shutil.rmtree(tpath) @pytest.fixture From 822a9c960b882b722d33befc16daf90b0f2aaf07 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 14:32:23 +0800 Subject: [PATCH 095/126] fix conftest fix conftest --- shards/conftest.py | 1 + shards/mishards/service_handler.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/shards/conftest.py b/shards/conftest.py index 24ec19da1d..aa4d409979 100644 --- a/shards/conftest.py +++ b/shards/conftest.py @@ -15,6 +15,7 @@ os.makedirs(dirpath, 0o777) settings.TestingConfig.SQLALCHEMY_DATABASE_URI = 'sqlite:///{}?check_same_thread=False'.format( filepath) + @pytest.fixture def app(request): app = create_app(settings.TestingConfig) diff --git a/shards/mishards/service_handler.py b/shards/mishards/service_handler.py index 5e91c14f14..2f19152ae6 100644 --- a/shards/mishards/service_handler.py +++ b/shards/mishards/service_handler.py @@ -171,7 +171,7 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): logger.info('HasTable {}'.format(_table_name)) _status, _bool = self._has_table(_table_name, - metadata={'resp_class': milvus_pb2.BoolReply}) + metadata={'resp_class': milvus_pb2.BoolReply}) return milvus_pb2.BoolReply(status=status_pb2.Status( error_code=_status.code, reason=_status.message), From c73d1d8342e8832a3c0ca13eee17bf7e3a717570 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 14:44:39 +0800 Subject: [PATCH 096/126] update CN tutorial --- shards/Tutorial_CN.md | 132 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 shards/Tutorial_CN.md diff --git a/shards/Tutorial_CN.md b/shards/Tutorial_CN.md new file mode 100644 index 0000000000..74ddd5ef78 --- /dev/null +++ b/shards/Tutorial_CN.md @@ -0,0 +1,132 @@ +# Mishards使用文档 +--- +Milvus 旨在帮助用户实现海量非结构化数据的近似检索和分析。单个 Milvus 实例可处理十亿级数据规模,而对于百亿或者千亿规模数据的需求,则需要一个 Milvus 集群实例,该实例对于上层应用可以像单机实例一样使用,同时满足海量数据低延迟,高并发业务需求。mishards就是一个集群中间件,其内部处理请求转发,读写分离,水平扩展,动态扩容,为用户提供内存和算力可以无限扩容的 Milvus 实例。 + +## 运行环境 +--- + +### 单机快速启动实例 +**`python >= 3.4`环境** + +``` +1. cd milvus/shards +2. pip install -r requirements.txt +3. nvidia-docker run --rm -d -p 19530:19530 -v /tmp/milvus/db:/opt/milvus/db milvusdb/milvus:0.5.0-d102119-ede20b +4. sudo chown -R $USER:$USER /tmp/milvus +5. cp mishards/.env.example to mishards/.env +6 +7. 在python mishards/main.py #.env配置mishards监听19532端口 +``` + +### 容器启动实例 +`all_in_one`会在服务器上开启两个milvus实例,一个mishards实例,一个jaeger链路追踪实例 + +**启动** +``` +1. 安装docker-compose +1. cd milvus/shards/all_in_one +2. docker-compose -f all_in_one.yml up -d #监听19531端口 +``` + +**打开Jaeger UI** +``` +浏览器打开 "http://127.0.0.1:16686/" +``` + +### kubernetes中快速启动 +**准备** +``` +- kubernetes集群 +- 安装nvidia-docker +- 共享存储 +- 安装kubectl并能访问集群 +``` + +**步骤** +``` +1. cd milvus/shards/kubernetes_demo/ +2. ./start.sh allup +3. watch -n 1 kubectl get pods -n milvus -o wide 查看所有pod状态,等待所有pod都处于Runing状态 +4. kubectl get service -n milvus 查看milvus-proxy-servers的EXTERNAL-IP和PORT, 这就是mishards集群的服务地址 +``` + +**扩容计算实例** +``` +./start.sh scale-ro-server 2 扩容计算实例到2 +``` + +**扩容代理器实例** +``` +./start.sh scale-proxy 2 扩容代理服务器实例到2 +``` + +**查看日志** +``` +kubectl logs -f --tail=1000 -n milvus milvus-ro-servers-0 查看计算节点milvus-ro-servers-0日志 +``` + +## 测试 + +**启动单元测试** +``` +1. cd milvus/shards +2. pytest +``` + +**单元测试覆盖率** +``` +pytest --cov-report html:cov_html --cov=mishards +``` + +## mishards配置详解 + +### 全局 +| Name | Required | Type | Default Value | Explanation | +| --------------------------- | -------- | -------- | ------------- | ------------- | +| Debug | No | bool | True | 是否Debug工作模式 | +| TIMEZONE | No | string | "UTC" | 时区 | +| MAX_RETRY | No | int | 3 | 最大连接重试次数 | +| SERVER_PORT | No | int | 19530 | 配置服务端口 | +| WOSERVER | **Yes** | str | - | 配置后台可写Milvus实例地址。目前只支持静态设置,例"tcp://127.0.0.1:19530" | + +### 元数据 +| Name | Required | Type | Default Value | Explanation | +| --------------------------- | -------- | -------- | ------------- | ------------- | +| SQLALCHEMY_DATABASE_URI | **Yes** | string | - | 配置元数据存储数据库地址 | +| SQL_ECHO | No | bool | False | 是否打印Sql详细语句 | +| SQLALCHEMY_DATABASE_TEST_URI | No | string | - | 配置测试环境下元数据存储数据库地址 | +| SQL_TEST_ECHO | No | bool | False | 配置测试环境下是否打印Sql详细语句 | + +### 服务发现 +| Name | Required | Type | Default Value | Explanation | +| --------------------------- | -------- | -------- | ------------- | ------------- | +| SD_PROVIDER | No | string | "Kubernetes" | 配置服务发现服务类型,目前只有Static, Kubernetes可选 | +| SD_STATIC_HOSTS | No | list | [] | **SD_PROVIDER** 为**Static**时,配置服务地址列表,例"192.168.1.188,192.168.1.190"| +| SD_STATIC_PORT | No | int | 19530 | **SD_PROVIDER** 为**Static**时,配置Hosts监听端口 | +| SD_NAMESPACE | No | string | - | **SD_PROVIDER** 为**Kubernetes**时,配置集群namespace | +| SD_IN_CLUSTER | No | bool | False | **SD_PROVIDER** 为**Kubernetes**时,标明服务发现是否在集群中运行 | +| SD_POLL_INTERVAL | No | int | 5 | **SD_PROVIDER** 为**Kubernetes**时,标明服务发现监听服务列表频率,单位Second | +| SD_ROSERVER_POD_PATT | No | string | - | **SD_PROVIDER** 为**Kubernetes**时,匹配可读Milvus实例的正则表达式 | +| SD_LABEL_SELECTOR | No | string | - | **SD_PROVIDER** 为**Kubernetes**时,匹配可读Milvus实例的标签选择 | + +### 链路追踪 +| Name | Required | Type | Default Value | Explanation | +| --------------------------- | -------- | -------- | ------------- | ------------- | +| TRACING_TYPE | No | string | "" | 链路追踪方案选择,目前只有Jaeger, 默认不使用| +| TRACING_SERVICE_NAME | No | string | "mishards" | **TRACING_TYPE** 为**Jaeger**时,链路追踪服务名 | +| TRACING_SAMPLER_TYPE | No | string | "const" | **TRACING_TYPE** 为**Jaeger**时,链路追踪采样类型 | +| TRACING_SAMPLER_PARAM | No | int | 1 | **TRACING_TYPE** 为**Jaeger**时,链路追踪采样频率 | +| TRACING_LOG_PAYLOAD | No | bool | False | **TRACING_TYPE** 为**Jaeger**时,链路追踪是否采集Payload | + +### 日志 +| Name | Required | Type | Default Value | Explanation | +| --------------------------- | -------- | -------- | ------------- | ------------- | +| LOG_LEVEL | No | string | "DEBUG" if Debug is ON else "INFO" | 日志记录级别 | +| LOG_PATH | No | string | "/tmp/mishards" | 日志记录路径 | +| LOG_NAME | No | string | "logfile" | 日志记录名 | + +### 路由 +| Name | Required | Type | Default Value | Explanation | +| --------------------------- | -------- | -------- | ------------- | ------------- | +| ROUTER_CLASS_NAME | No | string | FileBasedHashRingRouter | 处理请求路由类名, 可注册自定义类 | +| ROUTER_CLASS_TEST_NAME | No | string | FileBasedHashRingRouter | 测试环境下处理请求路由类名, 可注册自定义类 | From 74429c902d0404483cababf902d480d07159e7f9 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 20:24:03 +0800 Subject: [PATCH 097/126] implement plugin framework for tracer --- shards/mishards/__init__.py | 15 +++++--- shards/mishards/settings.py | 1 + shards/requirements.txt | 1 + shards/{tracing => tracer}/__init__.py | 0 shards/tracer/factory.py | 48 +++++++++++++++++++++++++ shards/tracer/plugins/jaeger_factory.py | 33 +++++++++++++++++ shards/tracing/factory.py | 40 --------------------- 7 files changed, 93 insertions(+), 45 deletions(-) rename shards/{tracing => tracer}/__init__.py (100%) create mode 100644 shards/tracer/factory.py create mode 100644 shards/tracer/plugins/jaeger_factory.py delete mode 100644 shards/tracing/factory.py diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index 7db3d8cb5e..c5ecbe93fc 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -19,17 +19,22 @@ def create_app(testing_config=None): from sd import ProviderManager sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) - discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, conn_mgr=connect_mgr) + discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, + conn_mgr=connect_mgr) - from tracing.factory import TracerFactory from mishards.grpc_utils import GrpcSpanDecorator - tracer = TracerFactory.new_tracer(config.TRACING_TYPE, settings.TracingConfig, - span_decorator=GrpcSpanDecorator()) + from tracer.factory import TracerFactory + tracer = TracerFactory(config.TRACING_PLUGIN_PATH).create(config.TRACING_TYPE, + settings.TracingConfig, + span_decorator=GrpcSpanDecorator()) from mishards.routings import RouterFactory router = RouterFactory.new_router(config.ROUTER_CLASS_NAME, connect_mgr) - grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, router=router, discover=discover) + grpc_server.init_app(conn_mgr=connect_mgr, + tracer=tracer, + router=router, + discover=discover) from mishards import exception_handlers diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py index 21a3bb7a65..08550374ad 100644 --- a/shards/mishards/settings.py +++ b/shards/mishards/settings.py @@ -74,6 +74,7 @@ class TracingConfig: class DefaultConfig: SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') SQL_ECHO = env.bool('SQL_ECHO', False) + TRACING_PLUGIN_PATH = env.str('TRACING_PLUGIN_PATH', '') TRACING_TYPE = env.str('TRACING_TYPE', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_NAME', 'FileBasedHashRingRouter') diff --git a/shards/requirements.txt b/shards/requirements.txt index ae224e92ed..14bdde2a06 100644 --- a/shards/requirements.txt +++ b/shards/requirements.txt @@ -34,3 +34,4 @@ urllib3==1.25.3 jaeger-client>=3.4.0 grpcio-opentracing>=1.0 mock==2.0.0 +pluginbase==1.0.0 diff --git a/shards/tracing/__init__.py b/shards/tracer/__init__.py similarity index 100% rename from shards/tracing/__init__.py rename to shards/tracer/__init__.py diff --git a/shards/tracer/factory.py b/shards/tracer/factory.py new file mode 100644 index 0000000000..7ffed32bd0 --- /dev/null +++ b/shards/tracer/factory.py @@ -0,0 +1,48 @@ +import os +import logging +from functools import partial +from pluginbase import PluginBase + + +logger = logging.getLogger(__name__) + +here = os.path.abspath(os.path.dirname(__file__)) +get_path = partial(os.path.join, here) + +PLUGIN_PACKAGE_NAME = 'tracer.plugins' +plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, + searchpath=[get_path('./plugins')]) + +class TracerFactory(object): + def __init__(self, searchpath=None): + self.plugin_package_name = PLUGIN_PACKAGE_NAME + self.tracer_map = {} + searchpath = searchpath if searchpath else [] + searchpath = [searchpath] if isinstance(searchpath, str) else searchpath + self.source = plugin_base.make_plugin_source( + searchpath=searchpath, identifier=self.__class__.__name__) + + for plugin_name in self.source.list_plugins(): + plugin = self.source.load_plugin(plugin_name) + plugin.setup(self) + + def on_plugin_setup(self, plugin_class): + name = getattr(plugin_class, 'name', plugin_class.__name__) + self.tracer_map[name.lower()] = plugin_class + + def plugin(self, name): + return self.tracer_map.get(name, None) + + def create(self, + tracer_type, + tracer_config, + span_decorator=None, + **kwargs): + if not tracer_type: + return Tracer() + plugin_class = self.plugin(tracer_type.lower()) + if not plugin_class: + raise RuntimeError('Tracer Plugin \'{}\' not installed!'.format(tracer_type)) + + tracer = plugin_class.create(tracer_config, span_decorator=span_decorator, **kwargs) + return tracer diff --git a/shards/tracer/plugins/jaeger_factory.py b/shards/tracer/plugins/jaeger_factory.py new file mode 100644 index 0000000000..ec71fe427f --- /dev/null +++ b/shards/tracer/plugins/jaeger_factory.py @@ -0,0 +1,33 @@ +import logging +from jaeger_client import Config +from grpc_opentracing.grpcext import intercept_server +from grpc_opentracing import open_tracing_server_interceptor +from tracer import Tracer + +logger = logging.getLogger(__name__) + +PLUGIN_NAME = __name__ + +class JaegerFactory: + name = 'jaeger' + @classmethod + def create(cls, tracer_config, span_decorator=None, **kwargs): + tracing_config = tracer_config.TRACING_CONFIG + service_name = tracer_config.TRACING_SERVICE_NAME + validate = tracer_config.TRACING_VALIDATE + config = Config(config=tracing_config, + service_name=service_name, + validate=validate) + + tracer = config.initialize_tracer() + tracer_interceptor = open_tracing_server_interceptor( + tracer, + log_payloads=tracer_config.TRACING_LOG_PAYLOAD, + span_decorator=span_decorator) + + return Tracer(tracer, tracer_interceptor, intercept_server) + + +def setup(app): + logger.debug('Plugin \'{}\' Installed In Package: {}'.format(PLUGIN_NAME, app.plugin_package_name)) + app.on_plugin_setup(JaegerFactory) diff --git a/shards/tracing/factory.py b/shards/tracing/factory.py deleted file mode 100644 index 14fcde2eb3..0000000000 --- a/shards/tracing/factory.py +++ /dev/null @@ -1,40 +0,0 @@ -import logging -from jaeger_client import Config -from grpc_opentracing.grpcext import intercept_server -from grpc_opentracing import open_tracing_server_interceptor - -from tracing import (Tracer, empty_server_interceptor_decorator) - -logger = logging.getLogger(__name__) - - -class TracerFactory: - @classmethod - def new_tracer(cls, - tracer_type, - tracer_config, - span_decorator=None, - **kwargs): - if not tracer_type: - return Tracer() - config = tracer_config.TRACING_CONFIG - service_name = tracer_config.TRACING_SERVICE_NAME - validate = tracer_config.TRACING_VALIDATE - # if not tracer_type: - # tracer_type = 'jaeger' - # config = tracer_config.DEFAULT_TRACING_CONFIG - - if tracer_type.lower() == 'jaeger': - config = Config(config=config, - service_name=service_name, - validate=validate) - - tracer = config.initialize_tracer() - tracer_interceptor = open_tracing_server_interceptor( - tracer, - log_payloads=tracer_config.TRACING_LOG_PAYLOAD, - span_decorator=span_decorator) - - return Tracer(tracer, tracer_interceptor, intercept_server) - - assert False, 'Unsupported tracer type: {}'.format(tracer_type) From cf6df18446035bfaa46bd60434fb861ee5893e4f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 20:24:03 +0800 Subject: [PATCH 098/126] implement plugin framework for tracer implement plugin framework for tracer --- shards/tracer/plugins/jaeger_factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shards/tracer/plugins/jaeger_factory.py b/shards/tracer/plugins/jaeger_factory.py index ec71fe427f..384dbecaba 100644 --- a/shards/tracer/plugins/jaeger_factory.py +++ b/shards/tracer/plugins/jaeger_factory.py @@ -6,7 +6,7 @@ from tracer import Tracer logger = logging.getLogger(__name__) -PLUGIN_NAME = __name__ +PLUGIN_NAME = __file__ class JaegerFactory: name = 'jaeger' @@ -29,5 +29,5 @@ class JaegerFactory: def setup(app): - logger.debug('Plugin \'{}\' Installed In Package: {}'.format(PLUGIN_NAME, app.plugin_package_name)) + logger.info('Plugin \'{}\' Installed In Package: {}'.format(PLUGIN_NAME, app.plugin_package_name)) app.on_plugin_setup(JaegerFactory) From 2ab0e0eb93e655b2c7f71c73c1acb111f3b6a398 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Fri, 25 Oct 2019 20:39:44 +0800 Subject: [PATCH 099/126] fix unit test and code style changes --- shards/conftest.py | 1 + shards/mishards/__init__.py | 4 ++-- shards/tracer/factory.py | 16 +++++++++------- shards/tracer/plugins/jaeger_factory.py | 1 + 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/shards/conftest.py b/shards/conftest.py index aa4d409979..4cdcbdbe0c 100644 --- a/shards/conftest.py +++ b/shards/conftest.py @@ -25,6 +25,7 @@ def app(request): yield app db.drop_all() + app.stop() # shutil.rmtree(tpath) diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index c5ecbe93fc..72431c9b57 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -25,8 +25,8 @@ def create_app(testing_config=None): from mishards.grpc_utils import GrpcSpanDecorator from tracer.factory import TracerFactory tracer = TracerFactory(config.TRACING_PLUGIN_PATH).create(config.TRACING_TYPE, - settings.TracingConfig, - span_decorator=GrpcSpanDecorator()) + settings.TracingConfig, + span_decorator=GrpcSpanDecorator()) from mishards.routings import RouterFactory router = RouterFactory.new_router(config.ROUTER_CLASS_NAME, connect_mgr) diff --git a/shards/tracer/factory.py b/shards/tracer/factory.py index 7ffed32bd0..662ae29244 100644 --- a/shards/tracer/factory.py +++ b/shards/tracer/factory.py @@ -2,6 +2,7 @@ import os import logging from functools import partial from pluginbase import PluginBase +from tracer import Tracer logger = logging.getLogger(__name__) @@ -11,7 +12,8 @@ get_path = partial(os.path.join, here) PLUGIN_PACKAGE_NAME = 'tracer.plugins' plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, - searchpath=[get_path('./plugins')]) + searchpath=[get_path('./plugins')]) + class TracerFactory(object): def __init__(self, searchpath=None): @@ -19,8 +21,8 @@ class TracerFactory(object): self.tracer_map = {} searchpath = searchpath if searchpath else [] searchpath = [searchpath] if isinstance(searchpath, str) else searchpath - self.source = plugin_base.make_plugin_source( - searchpath=searchpath, identifier=self.__class__.__name__) + self.source = plugin_base.make_plugin_source(searchpath=searchpath, + identifier=self.__class__.__name__) for plugin_name in self.source.list_plugins(): plugin = self.source.load_plugin(plugin_name) @@ -34,10 +36,10 @@ class TracerFactory(object): return self.tracer_map.get(name, None) def create(self, - tracer_type, - tracer_config, - span_decorator=None, - **kwargs): + tracer_type, + tracer_config, + span_decorator=None, + **kwargs): if not tracer_type: return Tracer() plugin_class = self.plugin(tracer_type.lower()) diff --git a/shards/tracer/plugins/jaeger_factory.py b/shards/tracer/plugins/jaeger_factory.py index 384dbecaba..7b18a86130 100644 --- a/shards/tracer/plugins/jaeger_factory.py +++ b/shards/tracer/plugins/jaeger_factory.py @@ -8,6 +8,7 @@ logger = logging.getLogger(__name__) PLUGIN_NAME = __file__ + class JaegerFactory: name = 'jaeger' @classmethod From 63997d55ec142c73f1a84e6996861c316880567d Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 12:49:50 +0800 Subject: [PATCH 100/126] implement router plugins --- shards/mishards/__init__.py | 5 +- shards/mishards/router/__init__.py | 22 ++++++ shards/mishards/router/factory.py | 49 +++++++++++++ .../plugins/file_based_hash_ring_router.py} | 68 +++++-------------- shards/mishards/settings.py | 1 + shards/utils/pluginextension.py | 15 ++++ 6 files changed, 108 insertions(+), 52 deletions(-) create mode 100644 shards/mishards/router/__init__.py create mode 100644 shards/mishards/router/factory.py rename shards/mishards/{routings.py => router/plugins/file_based_hash_ring_router.py} (53%) create mode 100644 shards/utils/pluginextension.py diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index 72431c9b57..0c5ecd4d0e 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -28,8 +28,9 @@ def create_app(testing_config=None): settings.TracingConfig, span_decorator=GrpcSpanDecorator()) - from mishards.routings import RouterFactory - router = RouterFactory.new_router(config.ROUTER_CLASS_NAME, connect_mgr) + from mishards.router.factory import RouterFactory + router = RouterFactory(config.ROUTER_PLUGIN_PATH).create(config.ROUTER_CLASS_NAME, + conn_mgr=connect_mgr) grpc_server.init_app(conn_mgr=connect_mgr, tracer=tracer, diff --git a/shards/mishards/router/__init__.py b/shards/mishards/router/__init__.py new file mode 100644 index 0000000000..4150f3b736 --- /dev/null +++ b/shards/mishards/router/__init__.py @@ -0,0 +1,22 @@ +from mishards import exceptions + + +class RouterMixin: + def __init__(self, conn_mgr): + self.conn_mgr = conn_mgr + + def routing(self, table_name, metadata=None, **kwargs): + raise NotImplemented() + + def connection(self, metadata=None): + conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) + if conn: + conn.on_connect(metadata=metadata) + return conn.conn + + def query_conn(self, name, metadata=None): + conn = self.conn_mgr.conn(name, metadata=metadata) + if not conn: + raise exceptions.ConnectionNotFoundError(name, metadata=metadata) + conn.on_connect(metadata=metadata) + return conn.conn diff --git a/shards/mishards/router/factory.py b/shards/mishards/router/factory.py new file mode 100644 index 0000000000..2671cc3156 --- /dev/null +++ b/shards/mishards/router/factory.py @@ -0,0 +1,49 @@ +import os +import logging +from functools import partial +# from pluginbase import PluginBase +# import importlib +from utils.pluginextension import MiPluginBase + +logger = logging.getLogger(__name__) + +here = os.path.abspath(os.path.dirname(__file__)) +get_path = partial(os.path.join, here) + +PLUGIN_PACKAGE_NAME = 'router.plugins' +plugin_base = MiPluginBase(package=PLUGIN_PACKAGE_NAME, + searchpath=[get_path('./plugins')]) + + +class RouterFactory(object): + PLUGIN_TYPE = 'Router' + + def __init__(self, searchpath=None): + self.plugin_package_name = PLUGIN_PACKAGE_NAME + self.class_map = {} + searchpath = searchpath if searchpath else [] + searchpath = [searchpath] if isinstance(searchpath, str) else searchpath + self.source = plugin_base.make_plugin_source(searchpath=searchpath, + identifier=self.__class__.__name__) + + for plugin_name in self.source.list_plugins(): + plugin = self.source.load_plugin(plugin_name) + plugin.setup(self) + + def on_plugin_setup(self, plugin_class): + name = getattr(plugin_class, 'name', plugin_class.__name__) + self.class_map[name.lower()] = plugin_class + + def plugin(self, name): + return self.class_map.get(name, None) + + def create(self, class_name, class_config=None, **kwargs): + if not class_name: + raise RuntimeError('Please specify router class_name first!') + + this_class = self.plugin(class_name.lower()) + if not this_class: + raise RuntimeError('{} Plugin \'{}\' Not Installed!'.format(self.PLUGIN_TYPE, class_name)) + + router = this_class.create(class_config, **kwargs) + return router diff --git a/shards/mishards/routings.py b/shards/mishards/router/plugins/file_based_hash_ring_router.py similarity index 53% rename from shards/mishards/routings.py rename to shards/mishards/router/plugins/file_based_hash_ring_router.py index 823972726f..eddb425cfe 100644 --- a/shards/mishards/routings.py +++ b/shards/mishards/router/plugins/file_based_hash_ring_router.py @@ -1,64 +1,19 @@ import logging from sqlalchemy import exc as sqlalchemy_exc from sqlalchemy import and_ - +from mishards.models import Tables +from mishards.router import RouterMixin from mishards import exceptions, db from mishards.hash_ring import HashRing -from mishards.models import Tables logger = logging.getLogger(__name__) -class RouteManager: - ROUTER_CLASSES = {} - - @classmethod - def register_router_class(cls, target): - name = target.__dict__.get('NAME', None) - name = name if name else target.__class__.__name__ - cls.ROUTER_CLASSES[name] = target - return target - - @classmethod - def get_router_class(cls, name): - return cls.ROUTER_CLASSES.get(name, None) - - -class RouterFactory: - @classmethod - def new_router(cls, name, conn_mgr, **kwargs): - router_class = RouteManager.get_router_class(name) - assert router_class - return router_class(conn_mgr, **kwargs) - - -class RouterMixin: - def __init__(self, conn_mgr): - self.conn_mgr = conn_mgr - - def routing(self, table_name, metadata=None, **kwargs): - raise NotImplemented() - - def connection(self, metadata=None): - conn = self.conn_mgr.conn('WOSERVER', metadata=metadata) - if conn: - conn.on_connect(metadata=metadata) - return conn.conn - - def query_conn(self, name, metadata=None): - conn = self.conn_mgr.conn(name, metadata=metadata) - if not conn: - raise exceptions.ConnectionNotFoundError(name, metadata=metadata) - conn.on_connect(metadata=metadata) - return conn.conn - - -@RouteManager.register_router_class -class FileBasedHashRingRouter(RouterMixin): - NAME = 'FileBasedHashRingRouter' +class Factory(RouterMixin): + name = 'FileBasedHashRingRouter' def __init__(self, conn_mgr, **kwargs): - super(FileBasedHashRingRouter, self).__init__(conn_mgr) + super(Factory, self).__init__(conn_mgr) def routing(self, table_name, metadata=None, **kwargs): range_array = kwargs.pop('range_array', None) @@ -94,3 +49,16 @@ class FileBasedHashRingRouter(RouterMixin): routing[target_host]['file_ids'].append(str(f.id)) return routing + + @classmethod + def create(cls, config, **kwargs): + conn_mgr = kwargs.pop('conn_mgr', None) + if not conn_mgr: + raise RuntimeError('Cannot find \'conn_mgr\' to initialize \'{}\''.format(self.name)) + router = cls(conn_mgr, **kwargs) + return router + + +def setup(app): + logger.info('Plugin \'{}\' Installed In Package: {}'.format(__file__, app.plugin_package_name)) + app.on_plugin_setup(Factory) diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py index 08550374ad..c08e1d7a06 100644 --- a/shards/mishards/settings.py +++ b/shards/mishards/settings.py @@ -76,6 +76,7 @@ class DefaultConfig: SQL_ECHO = env.bool('SQL_ECHO', False) TRACING_PLUGIN_PATH = env.str('TRACING_PLUGIN_PATH', '') TRACING_TYPE = env.str('TRACING_TYPE', '') + ROUTER_PLUGIN_PATH = env.str('ROUTER_PLUGIN_PATH', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_NAME', 'FileBasedHashRingRouter') diff --git a/shards/utils/pluginextension.py b/shards/utils/pluginextension.py new file mode 100644 index 0000000000..e5339b4fdc --- /dev/null +++ b/shards/utils/pluginextension.py @@ -0,0 +1,15 @@ +import importlib +from pluginbase import PluginBase, PluginSource + + +class MiPluginSource(PluginSource): + def load_plugin(self, name): + if '.' in name: + raise ImportError('Plugin names cannot contain dots.') + with self: + return importlib.import_module(self.base.package + '.' + name) + + +class MiPluginBase(PluginBase): + def make_plugin_source(self, *args, **kwargs): + return MiPluginSource(self, *args, **kwargs) From 4c774a77f5eab004764d46d200bea36360aa8499 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 14:03:51 +0800 Subject: [PATCH 101/126] fix PluginBase import bug --- shards/mishards/router/factory.py | 8 +++----- shards/tracer/factory.py | 3 ++- shards/utils/pluginextension.py | 11 ++++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/shards/mishards/router/factory.py b/shards/mishards/router/factory.py index 2671cc3156..66d549f2a6 100644 --- a/shards/mishards/router/factory.py +++ b/shards/mishards/router/factory.py @@ -1,9 +1,7 @@ import os import logging from functools import partial -# from pluginbase import PluginBase -# import importlib -from utils.pluginextension import MiPluginBase +from utils.pluginextension import MiPluginBase as PluginBase logger = logging.getLogger(__name__) @@ -11,8 +9,8 @@ here = os.path.abspath(os.path.dirname(__file__)) get_path = partial(os.path.join, here) PLUGIN_PACKAGE_NAME = 'router.plugins' -plugin_base = MiPluginBase(package=PLUGIN_PACKAGE_NAME, - searchpath=[get_path('./plugins')]) +plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, + searchpath=[get_path('./plugins')]) class RouterFactory(object): diff --git a/shards/tracer/factory.py b/shards/tracer/factory.py index 662ae29244..9342ca3d7e 100644 --- a/shards/tracer/factory.py +++ b/shards/tracer/factory.py @@ -1,7 +1,8 @@ import os import logging from functools import partial -from pluginbase import PluginBase +from utils.pluginextension import MiPluginBase as PluginBase +# from pluginbase import PluginBase from tracer import Tracer diff --git a/shards/utils/pluginextension.py b/shards/utils/pluginextension.py index e5339b4fdc..68413a4e55 100644 --- a/shards/utils/pluginextension.py +++ b/shards/utils/pluginextension.py @@ -1,13 +1,14 @@ -import importlib +import importlib.util from pluginbase import PluginBase, PluginSource class MiPluginSource(PluginSource): def load_plugin(self, name): - if '.' in name: - raise ImportError('Plugin names cannot contain dots.') - with self: - return importlib.import_module(self.base.package + '.' + name) + plugin = super().load_plugin(name) + spec = importlib.util.spec_from_file_location(self.base.package + '.' + name, plugin.__file__) + plugin = importlib.util.module_from_spec(spec) + spec.loader.exec_module(plugin) + return plugin class MiPluginBase(PluginBase): From 83818546db7dbc40b5a8d551a169c6a59bb88b9f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 14:14:27 +0800 Subject: [PATCH 102/126] fix bug for router --- shards/mishards/router/factory.py | 2 +- shards/mishards/test_server.py | 2 +- shards/tracer/factory.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/shards/mishards/router/factory.py b/shards/mishards/router/factory.py index 66d549f2a6..845f3ceabc 100644 --- a/shards/mishards/router/factory.py +++ b/shards/mishards/router/factory.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) here = os.path.abspath(os.path.dirname(__file__)) get_path = partial(os.path.join, here) -PLUGIN_PACKAGE_NAME = 'router.plugins' +PLUGIN_PACKAGE_NAME = 'mishards.router.plugins' plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, searchpath=[get_path('./plugins')]) diff --git a/shards/mishards/test_server.py b/shards/mishards/test_server.py index efd3912076..f0cde2184c 100644 --- a/shards/mishards/test_server.py +++ b/shards/mishards/test_server.py @@ -13,7 +13,7 @@ from mishards import db, create_app, settings from mishards.service_handler import ServiceHandler from mishards.grpc_utils.grpc_args_parser import GrpcArgsParser as Parser from mishards.factories import TableFilesFactory, TablesFactory, TableFiles, Tables -from mishards.routings import RouterMixin +from mishards.router import RouterMixin logger = logging.getLogger(__name__) diff --git a/shards/tracer/factory.py b/shards/tracer/factory.py index 9342ca3d7e..fff7a885e4 100644 --- a/shards/tracer/factory.py +++ b/shards/tracer/factory.py @@ -2,7 +2,6 @@ import os import logging from functools import partial from utils.pluginextension import MiPluginBase as PluginBase -# from pluginbase import PluginBase from tracer import Tracer From 1d39ec75b09d62604ad827664d4a0412cc4bbe8b Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 16:27:16 +0800 Subject: [PATCH 103/126] implement service discovery plugins --- shards/discovery/__init__.py | 37 +++++++++++++ shards/discovery/factory.py | 53 ++++++++++++++++++ .../plugins}/kubernetes_provider.py | 55 ++++++++++++------- shards/discovery/plugins/static_provider.py | 43 +++++++++++++++ shards/mishards/__init__.py | 8 +-- shards/mishards/settings.py | 21 ++----- shards/sd/__init__.py | 28 ---------- shards/sd/static_provider.py | 39 ------------- shards/utils/__init__.py | 7 +++ 9 files changed, 183 insertions(+), 108 deletions(-) create mode 100644 shards/discovery/__init__.py create mode 100644 shards/discovery/factory.py rename shards/{sd => discovery/plugins}/kubernetes_provider.py (84%) create mode 100644 shards/discovery/plugins/static_provider.py delete mode 100644 shards/sd/__init__.py delete mode 100644 shards/sd/static_provider.py diff --git a/shards/discovery/__init__.py b/shards/discovery/__init__.py new file mode 100644 index 0000000000..a591d1cc1c --- /dev/null +++ b/shards/discovery/__init__.py @@ -0,0 +1,37 @@ +import os +import os +import sys +if __name__ == '__main__': + sys.path.append(os.path.dirname(os.path.dirname( + os.path.abspath(__file__)))) + +import logging +from utils import dotdict + +logger = logging.getLogger(__name__) + + +class DiscoveryConfig(dotdict): + CONFIG_PREFIX = 'DISCOVERY_' + + def dump(self): + logger.info('----------- DiscoveryConfig -----------------') + for k, v in self.items(): + logger.info('{}: {}'.format(k, v)) + if len(self) <= 0: + logger.error(' Empty DiscoveryConfig Found! ') + logger.info('---------------------------------------------') + + @classmethod + def Create(cls, **kwargs): + o = cls() + + for k, v in os.environ.items(): + if not k.startswith(cls.CONFIG_PREFIX): + continue + o[k] = v + for k, v in kwargs.items(): + o[k] = v + + o.dump() + return o diff --git a/shards/discovery/factory.py b/shards/discovery/factory.py new file mode 100644 index 0000000000..a5713dcf37 --- /dev/null +++ b/shards/discovery/factory.py @@ -0,0 +1,53 @@ +import os +import logging +from functools import partial +from utils.pluginextension import MiPluginBase as PluginBase +from discovery import DiscoveryConfig + +logger = logging.getLogger(__name__) + +here = os.path.abspath(os.path.dirname(__file__)) +get_path = partial(os.path.join, here) + +PLUGIN_PACKAGE_NAME = 'discovery.plugins' +plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, + searchpath=[get_path('./plugins')]) + + +class DiscoveryFactory(object): + PLUGIN_TYPE = 'Discovery' + + def __init__(self, searchpath=None): + self.plugin_package_name = PLUGIN_PACKAGE_NAME + self.class_map = {} + searchpath = searchpath if searchpath else [] + searchpath = [searchpath] if isinstance(searchpath, str) else searchpath + self.source = plugin_base.make_plugin_source(searchpath=searchpath, + identifier=self.__class__.__name__) + + for plugin_name in self.source.list_plugins(): + plugin = self.source.load_plugin(plugin_name) + plugin.setup(self) + + def on_plugin_setup(self, plugin_class): + name = getattr(plugin_class, 'name', plugin_class.__name__) + self.class_map[name.lower()] = plugin_class + + def plugin(self, name): + return self.class_map.get(name, None) + + def create(self, class_name, **kwargs): + conn_mgr = kwargs.pop('conn_mgr', None) + if not conn_mgr: + raise RuntimeError('Please pass conn_mgr to create discovery!') + + if not class_name: + raise RuntimeError('Please specify \'{}\' class_name first!'.format(self.PLUGIN_TYPE)) + + plugin_class = self.plugin(class_name.lower()) + if not plugin_class: + raise RuntimeError('{} Plugin \'{}\' Not Installed!'.format(self.PLUGIN_TYPE, class_name)) + + plugin_config = DiscoveryConfig.Create() + plugin = plugin_class.create(plugin_config=plugin_config, conn_mgr=conn_mgr, **kwargs) + return plugin diff --git a/shards/sd/kubernetes_provider.py b/shards/discovery/plugins/kubernetes_provider.py similarity index 84% rename from shards/sd/kubernetes_provider.py rename to shards/discovery/plugins/kubernetes_provider.py index eb113db007..c9d9a3ad5a 100644 --- a/shards/sd/kubernetes_provider.py +++ b/shards/discovery/plugins/kubernetes_provider.py @@ -13,9 +13,6 @@ import queue import enum from kubernetes import client, config, watch -from utils import singleton -from sd import ProviderManager - logger = logging.getLogger(__name__) INCLUSTER_NAMESPACE_PATH = '/var/run/secrets/kubernetes.io/serviceaccount/namespace' @@ -42,6 +39,8 @@ class K8SMixin: class K8SHeartbeatHandler(threading.Thread, K8SMixin): + name = 'kubernetes' + def __init__(self, message_queue, namespace, @@ -235,18 +234,19 @@ class KubernetesProviderSettings: self.port = int(port) if port else 19530 -@singleton -@ProviderManager.register_service_provider class KubernetesProvider(object): - NAME = 'Kubernetes' + name = 'kubernetes' - def __init__(self, settings, conn_mgr, **kwargs): - self.namespace = settings.namespace - self.pod_patt = settings.pod_patt - self.label_selector = settings.label_selector - self.in_cluster = settings.in_cluster - self.poll_interval = settings.poll_interval - self.port = settings.port + def __init__(self, plugin_config, conn_mgr, **kwargs): + self.namespace = plugin_config.DISCOVERY_KUBERNETES_NAMESPACE + self.pod_patt = plugin_config.DISCOVERY_KUBERNETES_POD_PATT + self.label_selector = plugin_config.DISCOVERY_KUBERNETES_LABEL_SELECTOR + self.in_cluster = plugin_config.DISCOVERY_KUBERNETES_IN_CLUSTER.lower() + self.in_cluster = self.in_cluster == 'true' + self.poll_interval = plugin_config.DISCOVERY_KUBERNETES_POLL_INTERVAL + self.poll_interval = int(self.poll_interval) if self.poll_interval else 5 + self.port = plugin_config.DISCOVERY_KUBERNETES_PORT + self.port = int(self.port) if self.port else 19530 self.kwargs = kwargs self.queue = queue.Queue() @@ -298,9 +298,23 @@ class KubernetesProvider(object): self.pod_heartbeater.stop() self.event_handler.stop() + @classmethod + def create(cls, conn_mgr, plugin_config, **kwargs): + discovery = cls(plugin_config=plugin_config, conn_mgr=conn_mgr, **kwargs) + return discovery + + +def setup(app): + logger.info('Plugin \'{}\' Installed In Package: {}'.format(__file__, app.plugin_package_name)) + app.on_plugin_setup(KubernetesProvider) + if __name__ == '__main__': logging.basicConfig(level=logging.INFO) + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__)))))) + sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__))))) class Connect: def register(self, name, value): @@ -315,14 +329,15 @@ if __name__ == '__main__': connect_mgr = Connect() - settings = KubernetesProviderSettings(namespace='xp', - pod_patt=".*-ro-servers-.*", - label_selector='tier=ro-servers', - poll_interval=5, - in_cluster=False) + from discovery import DiscoveryConfig + settings = DiscoveryConfig(DISCOVERY_KUBERNETES_NAMESPACE='xp', + DISCOVERY_KUBERNETES_POD_PATT=".*-ro-servers-.*", + DISCOVERY_KUBERNETES_LABEL_SELECTOR='tier=ro-servers', + DISCOVERY_KUBERNETES_POLL_INTERVAL=5, + DISCOVERY_KUBERNETES_IN_CLUSTER=False) - provider_class = ProviderManager.get_provider('Kubernetes') - t = provider_class(conn_mgr=connect_mgr, settings=settings) + provider_class = KubernetesProvider + t = provider_class(conn_mgr=connect_mgr, plugin_config=settings) t.start() cnt = 100 while cnt > 0: diff --git a/shards/discovery/plugins/static_provider.py b/shards/discovery/plugins/static_provider.py new file mode 100644 index 0000000000..0f8bdb3d25 --- /dev/null +++ b/shards/discovery/plugins/static_provider.py @@ -0,0 +1,43 @@ +import os +import sys +if __name__ == '__main__': + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import logging +import socket + +logger = logging.getLogger(__name__) + + +class StaticDiscovery(object): + name = 'static' + + def __init__(self, config, conn_mgr, **kwargs): + self.conn_mgr = conn_mgr + hosts = [config.DISCOVERY_STATIC_HOSTS] if isinstance(config.DISCOVERY_STATIC_HOSTS, str) else hosts + self.hosts = [socket.gethostbyname(host) for host in hosts] + self.port = config.DISCOVERY_STATIC_PORT + + def start(self): + for host in self.hosts: + self.add_pod(host, host) + + def stop(self): + for host in self.hosts: + self.delete_pod(host) + + def add_pod(self, name, ip): + self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) + + def delete_pod(self, name): + self.conn_mgr.unregister(name) + + @classmethod + def create(cls, conn_mgr, plugin_config, **kwargs): + discovery = cls(config=plugin_config, conn_mgr=conn_mgr, **kwargs) + return discovery + + +def setup(app): + logger.info('Plugin \'{}\' Installed In Package: {}'.format(__file__, app.plugin_package_name)) + app.on_plugin_setup(StaticDiscovery) diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index 0c5ecd4d0e..e0792348a9 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -16,11 +16,9 @@ def create_app(testing_config=None): from mishards.connections import ConnectionMgr connect_mgr = ConnectionMgr() - from sd import ProviderManager - - sd_proiver_class = ProviderManager.get_provider(settings.SD_PROVIDER) - discover = sd_proiver_class(settings=settings.SD_PROVIDER_SETTINGS, - conn_mgr=connect_mgr) + from discovery.factory import DiscoveryFactory + discover = DiscoveryFactory(config.DISCOVERY_PLUGIN_PATH).create(config.DISCOVERY_CLASS_NAME, + conn_mgr=connect_mgr) from mishards.grpc_utils import GrpcSpanDecorator from tracer.factory import TracerFactory diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py index c08e1d7a06..6935405091 100644 --- a/shards/mishards/settings.py +++ b/shards/mishards/settings.py @@ -11,6 +11,7 @@ if FROM_EXAMPLE: else: env.read_env() + DEBUG = env.bool('DEBUG', False) LOG_LEVEL = env.str('LOG_LEVEL', 'DEBUG' if DEBUG else 'INFO') @@ -28,22 +29,8 @@ SERVER_PORT = env.int('SERVER_PORT', 19530) SERVER_TEST_PORT = env.int('SERVER_TEST_PORT', 19530) WOSERVER = env.str('WOSERVER') -SD_PROVIDER_SETTINGS = None -SD_PROVIDER = env.str('SD_PROVIDER', 'Kubernetes') -if SD_PROVIDER == 'Kubernetes': - from sd.kubernetes_provider import KubernetesProviderSettings - SD_PROVIDER_SETTINGS = KubernetesProviderSettings( - namespace=env.str('SD_NAMESPACE', ''), - in_cluster=env.bool('SD_IN_CLUSTER', False), - poll_interval=env.int('SD_POLL_INTERVAL', 5), - pod_patt=env.str('SD_ROSERVER_POD_PATT', ''), - label_selector=env.str('SD_LABEL_SELECTOR', ''), - port=env.int('SD_PORT', 19530)) -elif SD_PROVIDER == 'Static': - from sd.static_provider import StaticProviderSettings - SD_PROVIDER_SETTINGS = StaticProviderSettings( - hosts=env.list('SD_STATIC_HOSTS', []), - port=env.int('SD_STATIC_PORT', 19530)) +DISCOVERY_STATIC_HOSTS = env.list('DISCOVERY_STATIC_HOSTS', []) +DISCOVERY_STATIC_PORT = env.int('DISCOVERY_STATIC_PORT', 19530) # TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') @@ -78,6 +65,8 @@ class DefaultConfig: TRACING_TYPE = env.str('TRACING_TYPE', '') ROUTER_PLUGIN_PATH = env.str('ROUTER_PLUGIN_PATH', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_NAME', 'FileBasedHashRingRouter') + DISCOVERY_PLUGIN_PATH = env.str('DISCOVERY_PLUGIN_PATH', '') + DISCOVERY_CLASS_NAME = env.str('DISCOVERY_CLASS_NAME', 'static') class TestingConfig(DefaultConfig): diff --git a/shards/sd/__init__.py b/shards/sd/__init__.py deleted file mode 100644 index 7943887d0f..0000000000 --- a/shards/sd/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging -import inspect -# from utils import singleton - -logger = logging.getLogger(__name__) - - -class ProviderManager: - PROVIDERS = {} - - @classmethod - def register_service_provider(cls, target): - if inspect.isfunction(target): - cls.PROVIDERS[target.__name__] = target - elif inspect.isclass(target): - name = target.__dict__.get('NAME', None) - name = name if name else target.__class__.__name__ - cls.PROVIDERS[name] = target - else: - assert False, 'Cannot register_service_provider for: {}'.format(target) - return target - - @classmethod - def get_provider(cls, name): - return cls.PROVIDERS.get(name, None) - - -from sd import kubernetes_provider, static_provider diff --git a/shards/sd/static_provider.py b/shards/sd/static_provider.py deleted file mode 100644 index e88780740f..0000000000 --- a/shards/sd/static_provider.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import sys -if __name__ == '__main__': - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import socket -from utils import singleton -from sd import ProviderManager - - -class StaticProviderSettings: - def __init__(self, hosts, port=None): - self.hosts = hosts - self.port = int(port) if port else 19530 - - -@singleton -@ProviderManager.register_service_provider -class KubernetesProvider(object): - NAME = 'Static' - - def __init__(self, settings, conn_mgr, **kwargs): - self.conn_mgr = conn_mgr - self.hosts = [socket.gethostbyname(host) for host in settings.hosts] - self.port = settings.port - - def start(self): - for host in self.hosts: - self.add_pod(host, host) - - def stop(self): - for host in self.hosts: - self.delete_pod(host) - - def add_pod(self, name, ip): - self.conn_mgr.register(name, 'tcp://{}:{}'.format(ip, self.port)) - - def delete_pod(self, name): - self.conn_mgr.unregister(name) diff --git a/shards/utils/__init__.py b/shards/utils/__init__.py index c1d55e76c0..cf444c0680 100644 --- a/shards/utils/__init__.py +++ b/shards/utils/__init__.py @@ -9,3 +9,10 @@ def singleton(cls): instances[cls] = cls(*args, **kw) return instances[cls] return getinstance + + +class dotdict(dict): + """dot.notation access to dictionary attributes""" + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ From cd0a112f5583bf2351cc4355dbf7be5ffbce615f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 16:54:31 +0800 Subject: [PATCH 104/126] add plugins base mixin --- shards/discovery/factory.py | 40 ++++---------------------------- shards/utils/plugins/__init__.py | 39 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 36 deletions(-) create mode 100644 shards/utils/plugins/__init__.py diff --git a/shards/discovery/factory.py b/shards/discovery/factory.py index a5713dcf37..80334daf68 100644 --- a/shards/discovery/factory.py +++ b/shards/discovery/factory.py @@ -1,53 +1,21 @@ -import os import logging -from functools import partial -from utils.pluginextension import MiPluginBase as PluginBase from discovery import DiscoveryConfig +from utils.plugins import BaseMixin logger = logging.getLogger(__name__) - -here = os.path.abspath(os.path.dirname(__file__)) -get_path = partial(os.path.join, here) - PLUGIN_PACKAGE_NAME = 'discovery.plugins' -plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, - searchpath=[get_path('./plugins')]) -class DiscoveryFactory(object): +class DiscoveryFactory(BaseMixin): PLUGIN_TYPE = 'Discovery' - def __init__(self, searchpath=None): - self.plugin_package_name = PLUGIN_PACKAGE_NAME - self.class_map = {} - searchpath = searchpath if searchpath else [] - searchpath = [searchpath] if isinstance(searchpath, str) else searchpath - self.source = plugin_base.make_plugin_source(searchpath=searchpath, - identifier=self.__class__.__name__) + super().__init__(searchpath=searchpath, package_name=PLUGIN_PACKAGE_NAME) - for plugin_name in self.source.list_plugins(): - plugin = self.source.load_plugin(plugin_name) - plugin.setup(self) - - def on_plugin_setup(self, plugin_class): - name = getattr(plugin_class, 'name', plugin_class.__name__) - self.class_map[name.lower()] = plugin_class - - def plugin(self, name): - return self.class_map.get(name, None) - - def create(self, class_name, **kwargs): + def _create(self, plugin_class, **kwargs): conn_mgr = kwargs.pop('conn_mgr', None) if not conn_mgr: raise RuntimeError('Please pass conn_mgr to create discovery!') - if not class_name: - raise RuntimeError('Please specify \'{}\' class_name first!'.format(self.PLUGIN_TYPE)) - - plugin_class = self.plugin(class_name.lower()) - if not plugin_class: - raise RuntimeError('{} Plugin \'{}\' Not Installed!'.format(self.PLUGIN_TYPE, class_name)) - plugin_config = DiscoveryConfig.Create() plugin = plugin_class.create(plugin_config=plugin_config, conn_mgr=conn_mgr, **kwargs) return plugin diff --git a/shards/utils/plugins/__init__.py b/shards/utils/plugins/__init__.py new file mode 100644 index 0000000000..361dda66f9 --- /dev/null +++ b/shards/utils/plugins/__init__.py @@ -0,0 +1,39 @@ +import os +import inspect +from functools import partial +from utils.pluginextension import MiPluginBase as PluginBase + + +class BaseMixin(object): + def __init__(self, package_name, searchpath=None): + self.plugin_package_name = package_name + caller_path = os.path.dirname(inspect.stack()[1][1]) + get_path = partial(os.path.join, caller_path) + plugin_base = PluginBase(package=self.plugin_package_name, + searchpath=[get_path('./plugins')]) + self.class_map = {} + searchpath = searchpath if searchpath else [] + searchpath = [searchpath] if isinstance(searchpath, str) else searchpath + self.source = plugin_base.make_plugin_source(searchpath=searchpath, + identifier=self.__class__.__name__) + + for plugin_name in self.source.list_plugins(): + plugin = self.source.load_plugin(plugin_name) + plugin.setup(self) + + def on_plugin_setup(self, plugin_class): + name = getattr(plugin_class, 'name', plugin_class.__name__) + self.class_map[name.lower()] = plugin_class + + def plugin(self, name): + return self.class_map.get(name, None) + + def create(self, class_name, **kwargs): + if not class_name: + raise RuntimeError('Please specify \'{}\' class_name first!'.format(self.PLUGIN_TYPE)) + + plugin_class = self.plugin(class_name.lower()) + if not plugin_class: + raise RuntimeError('{} Plugin \'{}\' Not Installed!'.format(self.PLUGIN_TYPE, class_name)) + + return self._create(plugin_class, **kwargs) From 4ef3e416fa67903d807640556c473f363a0062d9 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 17:02:14 +0800 Subject: [PATCH 105/126] refactor router plugins --- shards/mishards/router/factory.py | 40 +++---------------- .../plugins/file_based_hash_ring_router.py | 2 +- 2 files changed, 6 insertions(+), 36 deletions(-) diff --git a/shards/mishards/router/factory.py b/shards/mishards/router/factory.py index 845f3ceabc..ea29a26a1d 100644 --- a/shards/mishards/router/factory.py +++ b/shards/mishards/router/factory.py @@ -1,47 +1,17 @@ import os import logging -from functools import partial -from utils.pluginextension import MiPluginBase as PluginBase +from utils.plugins import BaseMixin logger = logging.getLogger(__name__) - -here = os.path.abspath(os.path.dirname(__file__)) -get_path = partial(os.path.join, here) - PLUGIN_PACKAGE_NAME = 'mishards.router.plugins' -plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, - searchpath=[get_path('./plugins')]) -class RouterFactory(object): +class RouterFactory(BaseMixin): PLUGIN_TYPE = 'Router' def __init__(self, searchpath=None): - self.plugin_package_name = PLUGIN_PACKAGE_NAME - self.class_map = {} - searchpath = searchpath if searchpath else [] - searchpath = [searchpath] if isinstance(searchpath, str) else searchpath - self.source = plugin_base.make_plugin_source(searchpath=searchpath, - identifier=self.__class__.__name__) + super().__init__(searchpath=searchpath, package_name=PLUGIN_PACKAGE_NAME) - for plugin_name in self.source.list_plugins(): - plugin = self.source.load_plugin(plugin_name) - plugin.setup(self) - - def on_plugin_setup(self, plugin_class): - name = getattr(plugin_class, 'name', plugin_class.__name__) - self.class_map[name.lower()] = plugin_class - - def plugin(self, name): - return self.class_map.get(name, None) - - def create(self, class_name, class_config=None, **kwargs): - if not class_name: - raise RuntimeError('Please specify router class_name first!') - - this_class = self.plugin(class_name.lower()) - if not this_class: - raise RuntimeError('{} Plugin \'{}\' Not Installed!'.format(self.PLUGIN_TYPE, class_name)) - - router = this_class.create(class_config, **kwargs) + def _create(self, plugin_class, **kwargs): + router = plugin_class.create(**kwargs) return router diff --git a/shards/mishards/router/plugins/file_based_hash_ring_router.py b/shards/mishards/router/plugins/file_based_hash_ring_router.py index eddb425cfe..4697189f35 100644 --- a/shards/mishards/router/plugins/file_based_hash_ring_router.py +++ b/shards/mishards/router/plugins/file_based_hash_ring_router.py @@ -51,7 +51,7 @@ class Factory(RouterMixin): return routing @classmethod - def create(cls, config, **kwargs): + def create(cls, **kwargs): conn_mgr = kwargs.pop('conn_mgr', None) if not conn_mgr: raise RuntimeError('Cannot find \'conn_mgr\' to initialize \'{}\''.format(self.name)) From ccc80808daf9b8ece9dd860a9ac9e5d1305a662e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 17:19:57 +0800 Subject: [PATCH 106/126] refactor all plugins --- shards/discovery/factory.py | 3 +- .../discovery/plugins/kubernetes_provider.py | 2 +- shards/discovery/plugins/static_provider.py | 2 +- shards/mishards/__init__.py | 2 +- shards/mishards/router/factory.py | 2 +- .../plugins/file_based_hash_ring_router.py | 2 +- shards/tracer/factory.py | 53 ++++++------------- shards/tracer/plugins/jaeger_factory.py | 11 ++-- shards/utils/plugins/__init__.py | 3 +- 9 files changed, 30 insertions(+), 50 deletions(-) diff --git a/shards/discovery/factory.py b/shards/discovery/factory.py index 80334daf68..5f5c7fcf95 100644 --- a/shards/discovery/factory.py +++ b/shards/discovery/factory.py @@ -8,6 +8,7 @@ PLUGIN_PACKAGE_NAME = 'discovery.plugins' class DiscoveryFactory(BaseMixin): PLUGIN_TYPE = 'Discovery' + def __init__(self, searchpath=None): super().__init__(searchpath=searchpath, package_name=PLUGIN_PACKAGE_NAME) @@ -17,5 +18,5 @@ class DiscoveryFactory(BaseMixin): raise RuntimeError('Please pass conn_mgr to create discovery!') plugin_config = DiscoveryConfig.Create() - plugin = plugin_class.create(plugin_config=plugin_config, conn_mgr=conn_mgr, **kwargs) + plugin = plugin_class.Create(plugin_config=plugin_config, conn_mgr=conn_mgr, **kwargs) return plugin diff --git a/shards/discovery/plugins/kubernetes_provider.py b/shards/discovery/plugins/kubernetes_provider.py index c9d9a3ad5a..aaf6091f83 100644 --- a/shards/discovery/plugins/kubernetes_provider.py +++ b/shards/discovery/plugins/kubernetes_provider.py @@ -299,7 +299,7 @@ class KubernetesProvider(object): self.event_handler.stop() @classmethod - def create(cls, conn_mgr, plugin_config, **kwargs): + def Create(cls, conn_mgr, plugin_config, **kwargs): discovery = cls(plugin_config=plugin_config, conn_mgr=conn_mgr, **kwargs) return discovery diff --git a/shards/discovery/plugins/static_provider.py b/shards/discovery/plugins/static_provider.py index 0f8bdb3d25..9bea62f2da 100644 --- a/shards/discovery/plugins/static_provider.py +++ b/shards/discovery/plugins/static_provider.py @@ -33,7 +33,7 @@ class StaticDiscovery(object): self.conn_mgr.unregister(name) @classmethod - def create(cls, conn_mgr, plugin_config, **kwargs): + def Create(cls, conn_mgr, plugin_config, **kwargs): discovery = cls(config=plugin_config, conn_mgr=conn_mgr, **kwargs) return discovery diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index e0792348a9..96463caa93 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -23,7 +23,7 @@ def create_app(testing_config=None): from mishards.grpc_utils import GrpcSpanDecorator from tracer.factory import TracerFactory tracer = TracerFactory(config.TRACING_PLUGIN_PATH).create(config.TRACING_TYPE, - settings.TracingConfig, + plugin_config=settings.TracingConfig, span_decorator=GrpcSpanDecorator()) from mishards.router.factory import RouterFactory diff --git a/shards/mishards/router/factory.py b/shards/mishards/router/factory.py index ea29a26a1d..a8f85c0df8 100644 --- a/shards/mishards/router/factory.py +++ b/shards/mishards/router/factory.py @@ -13,5 +13,5 @@ class RouterFactory(BaseMixin): super().__init__(searchpath=searchpath, package_name=PLUGIN_PACKAGE_NAME) def _create(self, plugin_class, **kwargs): - router = plugin_class.create(**kwargs) + router = plugin_class.Create(**kwargs) return router diff --git a/shards/mishards/router/plugins/file_based_hash_ring_router.py b/shards/mishards/router/plugins/file_based_hash_ring_router.py index 4697189f35..b90935129e 100644 --- a/shards/mishards/router/plugins/file_based_hash_ring_router.py +++ b/shards/mishards/router/plugins/file_based_hash_ring_router.py @@ -51,7 +51,7 @@ class Factory(RouterMixin): return routing @classmethod - def create(cls, **kwargs): + def Create(cls, **kwargs): conn_mgr = kwargs.pop('conn_mgr', None) if not conn_mgr: raise RuntimeError('Cannot find \'conn_mgr\' to initialize \'{}\''.format(self.name)) diff --git a/shards/tracer/factory.py b/shards/tracer/factory.py index fff7a885e4..0e54a5aeb6 100644 --- a/shards/tracer/factory.py +++ b/shards/tracer/factory.py @@ -1,50 +1,27 @@ import os import logging -from functools import partial -from utils.pluginextension import MiPluginBase as PluginBase from tracer import Tracer - +from utils.plugins import BaseMixin logger = logging.getLogger(__name__) - -here = os.path.abspath(os.path.dirname(__file__)) -get_path = partial(os.path.join, here) - PLUGIN_PACKAGE_NAME = 'tracer.plugins' -plugin_base = PluginBase(package=PLUGIN_PACKAGE_NAME, - searchpath=[get_path('./plugins')]) -class TracerFactory(object): +class TracerFactory(BaseMixin): + PLUGIN_TYPE = 'Tracer' + def __init__(self, searchpath=None): - self.plugin_package_name = PLUGIN_PACKAGE_NAME - self.tracer_map = {} - searchpath = searchpath if searchpath else [] - searchpath = [searchpath] if isinstance(searchpath, str) else searchpath - self.source = plugin_base.make_plugin_source(searchpath=searchpath, - identifier=self.__class__.__name__) + super().__init__(searchpath=searchpath, package_name=PLUGIN_PACKAGE_NAME) - for plugin_name in self.source.list_plugins(): - plugin = self.source.load_plugin(plugin_name) - plugin.setup(self) - - def on_plugin_setup(self, plugin_class): - name = getattr(plugin_class, 'name', plugin_class.__name__) - self.tracer_map[name.lower()] = plugin_class - - def plugin(self, name): - return self.tracer_map.get(name, None) - - def create(self, - tracer_type, - tracer_config, - span_decorator=None, - **kwargs): - if not tracer_type: + def create(self, class_name, **kwargs): + if not class_name: return Tracer() - plugin_class = self.plugin(tracer_type.lower()) - if not plugin_class: - raise RuntimeError('Tracer Plugin \'{}\' not installed!'.format(tracer_type)) + return super().create(class_name, **kwargs) - tracer = plugin_class.create(tracer_config, span_decorator=span_decorator, **kwargs) - return tracer + def _create(self, plugin_class, **kwargs): + plugin_config = kwargs.pop('plugin_config', None) + if not plugin_config: + raise RuntimeError('\'{}\' Plugin Config is Required!'.format(self.PLUGIN_TYPE)) + + plugin = plugin_class.Create(plugin_config=plugin_config, **kwargs) + return plugin diff --git a/shards/tracer/plugins/jaeger_factory.py b/shards/tracer/plugins/jaeger_factory.py index 7b18a86130..923f2f805d 100644 --- a/shards/tracer/plugins/jaeger_factory.py +++ b/shards/tracer/plugins/jaeger_factory.py @@ -12,10 +12,11 @@ PLUGIN_NAME = __file__ class JaegerFactory: name = 'jaeger' @classmethod - def create(cls, tracer_config, span_decorator=None, **kwargs): - tracing_config = tracer_config.TRACING_CONFIG - service_name = tracer_config.TRACING_SERVICE_NAME - validate = tracer_config.TRACING_VALIDATE + def Create(cls, plugin_config, **kwargs): + tracing_config = plugin_config.TRACING_CONFIG + span_decorator = kwargs.pop('span_decorator', None) + service_name = plugin_config.TRACING_SERVICE_NAME + validate = plugin_config.TRACING_VALIDATE config = Config(config=tracing_config, service_name=service_name, validate=validate) @@ -23,7 +24,7 @@ class JaegerFactory: tracer = config.initialize_tracer() tracer_interceptor = open_tracing_server_interceptor( tracer, - log_payloads=tracer_config.TRACING_LOG_PAYLOAD, + log_payloads=plugin_config.TRACING_LOG_PAYLOAD, span_decorator=span_decorator) return Tracer(tracer, tracer_interceptor, intercept_server) diff --git a/shards/utils/plugins/__init__.py b/shards/utils/plugins/__init__.py index 361dda66f9..633f1164a7 100644 --- a/shards/utils/plugins/__init__.py +++ b/shards/utils/plugins/__init__.py @@ -5,7 +5,8 @@ from utils.pluginextension import MiPluginBase as PluginBase class BaseMixin(object): - def __init__(self, package_name, searchpath=None): + + def __init__(self, package_name, searchpath=None): self.plugin_package_name = package_name caller_path = os.path.dirname(inspect.stack()[1][1]) get_path = partial(os.path.join, caller_path) From 8630077e8a0ef184506978e301afaa3bd0ae0b8b Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Sat, 26 Oct 2019 17:26:59 +0800 Subject: [PATCH 107/126] refactor tracer settings --- shards/mishards/.env.example | 26 +++++++++++++++----------- shards/mishards/__init__.py | 2 +- shards/mishards/settings.py | 4 ++-- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/shards/mishards/.env.example b/shards/mishards/.env.example index 0a23c0cf56..8c8e696c31 100644 --- a/shards/mishards/.env.example +++ b/shards/mishards/.env.example @@ -4,17 +4,6 @@ WOSERVER=tcp://127.0.0.1:19530 SERVER_PORT=19532 SERVER_TEST_PORT=19888 -SD_PROVIDER=Static - -SD_NAMESPACE=xp -SD_IN_CLUSTER=False -SD_POLL_INTERVAL=5 -SD_ROSERVER_POD_PATT=.*-ro-servers-.* -SD_LABEL_SELECTOR=tier=ro-servers - -SD_STATIC_HOSTS=127.0.0.1 -SD_STATIC_PORT=19530 - #SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_ECHO=True @@ -31,3 +20,18 @@ TRACING_SAMPLER_PARAM=1 TRACING_LOG_PAYLOAD=True #TRACING_SAMPLER_TYPE=probabilistic #TRACING_SAMPLER_PARAM=0.5 + +TRACER_PLUGIN_PATH=/tmp/plugins +# TRACER_CLASS_NAME= + +#DISCOVERY_PLUGIN_PATH= +#DISCOVERY_CLASS_NAME=kubernetes + +DISCOVERY_STATIC_HOSTS=127.0.0.1 +DISCOVERY_STATIC_PORT=19530 + +DISCOVERY_KUBERNETES_NAMESPACE=xp +DISCOVERY_KUBERNETES_POD_PATT=.*-ro-servers-.* +DISCOVERY_KUBERNETES_LABEL_SELECTOR=tier=ro-servers +DISCOVERY_KUBERNETES_POLL_INTERVAL=5 +DISCOVERY_KUBERNETES_IN_CLUSTER=False diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index 96463caa93..b4c51cc4f5 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -22,7 +22,7 @@ def create_app(testing_config=None): from mishards.grpc_utils import GrpcSpanDecorator from tracer.factory import TracerFactory - tracer = TracerFactory(config.TRACING_PLUGIN_PATH).create(config.TRACING_TYPE, + tracer = TracerFactory(config.TRACER_PLUGIN_PATH).create(config.TRACER_CLASS_NAME, plugin_config=settings.TracingConfig, span_decorator=GrpcSpanDecorator()) diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py index 6935405091..09b7b0713f 100644 --- a/shards/mishards/settings.py +++ b/shards/mishards/settings.py @@ -61,8 +61,8 @@ class TracingConfig: class DefaultConfig: SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_URI') SQL_ECHO = env.bool('SQL_ECHO', False) - TRACING_PLUGIN_PATH = env.str('TRACING_PLUGIN_PATH', '') - TRACING_TYPE = env.str('TRACING_TYPE', '') + TRACER_PLUGIN_PATH = env.str('TRACER_PLUGIN_PATH', '') + TRACER_CLASS_NAME = env.str('TRACER_CLASS_NAME', '') ROUTER_PLUGIN_PATH = env.str('ROUTER_PLUGIN_PATH', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_NAME', 'FileBasedHashRingRouter') DISCOVERY_PLUGIN_PATH = env.str('DISCOVERY_PLUGIN_PATH', '') From 9f52316704ac54812bd9e8c5a22456612ccefb62 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 28 Oct 2019 10:29:21 +0800 Subject: [PATCH 108/126] change static discovery init from env --- shards/discovery/plugins/static_provider.py | 6 ++++-- shards/mishards/settings.py | 7 +------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/shards/discovery/plugins/static_provider.py b/shards/discovery/plugins/static_provider.py index 9bea62f2da..fca8c717db 100644 --- a/shards/discovery/plugins/static_provider.py +++ b/shards/discovery/plugins/static_provider.py @@ -5,8 +5,10 @@ if __name__ == '__main__': import logging import socket +from environs import Env logger = logging.getLogger(__name__) +env = Env() class StaticDiscovery(object): @@ -14,9 +16,9 @@ class StaticDiscovery(object): def __init__(self, config, conn_mgr, **kwargs): self.conn_mgr = conn_mgr - hosts = [config.DISCOVERY_STATIC_HOSTS] if isinstance(config.DISCOVERY_STATIC_HOSTS, str) else hosts + hosts = env.list('DISCOVERY_STATIC_HOSTS', []) + self.port = env.int('DISCOVERY_STATIC_PORT', 19530) self.hosts = [socket.gethostbyname(host) for host in hosts] - self.port = config.DISCOVERY_STATIC_PORT def start(self): for host in self.hosts: diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py index 09b7b0713f..2694cd0a1f 100644 --- a/shards/mishards/settings.py +++ b/shards/mishards/settings.py @@ -29,11 +29,6 @@ SERVER_PORT = env.int('SERVER_PORT', 19530) SERVER_TEST_PORT = env.int('SERVER_TEST_PORT', 19530) WOSERVER = env.str('WOSERVER') -DISCOVERY_STATIC_HOSTS = env.list('DISCOVERY_STATIC_HOSTS', []) -DISCOVERY_STATIC_PORT = env.int('DISCOVERY_STATIC_PORT', 19530) - -# TESTING_WOSERVER = env.str('TESTING_WOSERVER', 'tcp://127.0.0.1:19530') - class TracingConfig: TRACING_SERVICE_NAME = env.str('TRACING_SERVICE_NAME', 'mishards') @@ -72,7 +67,7 @@ class DefaultConfig: class TestingConfig(DefaultConfig): SQLALCHEMY_DATABASE_URI = env.str('SQLALCHEMY_DATABASE_TEST_URI', '') SQL_ECHO = env.bool('SQL_TEST_ECHO', False) - TRACING_TYPE = env.str('TRACING_TEST_TYPE', '') + TRACER_CLASS_NAME = env.str('TRACER_CLASS_TEST_NAME', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_TEST_NAME', 'FileBasedHashRingRouter') From c2400f3167412ba18b6e15cd097553da8e92c70e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 28 Oct 2019 10:30:24 +0800 Subject: [PATCH 109/126] change all_in_one.yml for updated source changes --- shards/start_services.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/shards/start_services.yml b/shards/start_services.yml index 57fe061bb7..286230feeb 100644 --- a/shards/start_services.yml +++ b/shards/start_services.yml @@ -33,9 +33,10 @@ services: FROM_EXAMPLE: 'true' DEBUG: 'true' SERVER_PORT: 19531 - WOSERVER: tcp://milvus:19530 - SD_STATIC_HOSTS: milvus - TRACING_TYPE: jaeger + WOSERVER: tcp://milvus_wr:19530 + DISCOVERY_PLUGIN_PATH: static + DISCOVERY_STATIC_HOSTS: milvus_wr,milvus_ro + TRACER_CLASS_NAME: jaeger TRACING_SERVICE_NAME: mishards-demo TRACING_REPORTING_HOST: jaeger TRACING_REPORTING_PORT: 5775 From 3c38ac29da21300de7a3f64766b96a5b5b973f6c Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 28 Oct 2019 10:39:57 +0800 Subject: [PATCH 110/126] update env example --- shards/mishards/.env.example | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/shards/mishards/.env.example b/shards/mishards/.env.example index 8c8e696c31..c8848eaadf 100644 --- a/shards/mishards/.env.example +++ b/shards/mishards/.env.example @@ -12,8 +12,10 @@ SQL_ECHO=True SQLALCHEMY_DATABASE_TEST_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False SQL_TEST_ECHO=False +TRACER_PLUGIN_PATH=/tmp/plugins + # TRACING_TEST_TYPE=jaeger -TRACING_TYPE=jaeger +TRACER_CLASS_NAME=jaeger TRACING_SERVICE_NAME=fortest TRACING_SAMPLER_TYPE=const TRACING_SAMPLER_PARAM=1 @@ -21,9 +23,6 @@ TRACING_LOG_PAYLOAD=True #TRACING_SAMPLER_TYPE=probabilistic #TRACING_SAMPLER_PARAM=0.5 -TRACER_PLUGIN_PATH=/tmp/plugins -# TRACER_CLASS_NAME= - #DISCOVERY_PLUGIN_PATH= #DISCOVERY_CLASS_NAME=kubernetes From 783080968439153f3ca5889b7af793b2d730e8c2 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 28 Oct 2019 11:03:41 +0800 Subject: [PATCH 111/126] update CN tutorial --- shards/Tutorial_CN.md | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/shards/Tutorial_CN.md b/shards/Tutorial_CN.md index 74ddd5ef78..ef82342c6a 100644 --- a/shards/Tutorial_CN.md +++ b/shards/Tutorial_CN.md @@ -13,7 +13,7 @@ Milvus 旨在帮助用户实现海量非结构化数据的近似检索和分析 2. pip install -r requirements.txt 3. nvidia-docker run --rm -d -p 19530:19530 -v /tmp/milvus/db:/opt/milvus/db milvusdb/milvus:0.5.0-d102119-ede20b 4. sudo chown -R $USER:$USER /tmp/milvus -5. cp mishards/.env.example to mishards/.env +5. cp mishards/.env.example mishards/.env 6 7. 在python mishards/main.py #.env配置mishards监听19532端口 ``` @@ -100,23 +100,25 @@ pytest --cov-report html:cov_html --cov=mishards ### 服务发现 | Name | Required | Type | Default Value | Explanation | | --------------------------- | -------- | -------- | ------------- | ------------- | -| SD_PROVIDER | No | string | "Kubernetes" | 配置服务发现服务类型,目前只有Static, Kubernetes可选 | -| SD_STATIC_HOSTS | No | list | [] | **SD_PROVIDER** 为**Static**时,配置服务地址列表,例"192.168.1.188,192.168.1.190"| -| SD_STATIC_PORT | No | int | 19530 | **SD_PROVIDER** 为**Static**时,配置Hosts监听端口 | -| SD_NAMESPACE | No | string | - | **SD_PROVIDER** 为**Kubernetes**时,配置集群namespace | -| SD_IN_CLUSTER | No | bool | False | **SD_PROVIDER** 为**Kubernetes**时,标明服务发现是否在集群中运行 | -| SD_POLL_INTERVAL | No | int | 5 | **SD_PROVIDER** 为**Kubernetes**时,标明服务发现监听服务列表频率,单位Second | -| SD_ROSERVER_POD_PATT | No | string | - | **SD_PROVIDER** 为**Kubernetes**时,匹配可读Milvus实例的正则表达式 | -| SD_LABEL_SELECTOR | No | string | - | **SD_PROVIDER** 为**Kubernetes**时,匹配可读Milvus实例的标签选择 | +| DISCOVERY_PLUGIN_PATH | No | string | - | 用户自定义服务发现插件搜索路径,默认使用系统搜索路径| +| DISCOVERY_CLASS_NAME | No | string | static | 在服务发现插件搜索路径下搜索类并实例化。目前系统提供 **static** 和 **kubernetes** 两种类,默认使用 **static** | +| DISCOVERY_STATIC_HOSTS | No | list | [] | **DISCOVERY_CLASS_NAME** 为 **static** 时,配置服务地址列表,例"192.168.1.188,192.168.1.190"| +| DISCOVERY_STATIC_PORT | No | int | 19530 | **DISCOVERY_CLASS_NAME** 为 **static** 时,配置 Hosts 监听端口 | +| DISCOVERY_KUBERNETES_NAMESPACE | No | string | - | **DISCOVERY_CLASS_NAME** 为 **kubernetes** 时,配置集群 namespace | +| DISCOVERY_KUBERNETES_IN_CLUSTER | No | bool | False | **DISCOVERY_CLASS_NAME** 为 **kubernetes** 时,标明服务发现是否在集群中运行 | +| DISCOVERY_KUBERNETES_POLL_INTERVAL | No | int | 5 | **DISCOVERY_CLASS_NAME** 为 **kubernetes** 时,标明服务发现监听服务列表频率,单位 Second | +| DISCOVERY_KUBERNETES_POD_PATT | No | string | - | **DISCOVERY_CLASS_NAME** 为 **kubernetes** 时,匹配可读 Milvus 实例的正则表达式 | +| DISCOVERY_KUBERNETES_LABEL_SELECTOR | No | string | - | **SD_PROVIDER** 为**Kubernetes**时,匹配可读Milvus实例的标签选择 | ### 链路追踪 | Name | Required | Type | Default Value | Explanation | | --------------------------- | -------- | -------- | ------------- | ------------- | -| TRACING_TYPE | No | string | "" | 链路追踪方案选择,目前只有Jaeger, 默认不使用| -| TRACING_SERVICE_NAME | No | string | "mishards" | **TRACING_TYPE** 为**Jaeger**时,链路追踪服务名 | -| TRACING_SAMPLER_TYPE | No | string | "const" | **TRACING_TYPE** 为**Jaeger**时,链路追踪采样类型 | -| TRACING_SAMPLER_PARAM | No | int | 1 | **TRACING_TYPE** 为**Jaeger**时,链路追踪采样频率 | -| TRACING_LOG_PAYLOAD | No | bool | False | **TRACING_TYPE** 为**Jaeger**时,链路追踪是否采集Payload | +| TRACER_PLUGIN_PATH | No | string | - | 用户自定义链路追踪插件搜索路径,默认使用系统搜索路径| +| TRACER_CLASS_NAME | No | string | "" | 链路追踪方案选择,目前只实现 **Jaeger**, 默认不使用| +| TRACING_SERVICE_NAME | No | string | "mishards" | **TRACING_TYPE** 为 **Jaeger** 时,链路追踪服务名 | +| TRACING_SAMPLER_TYPE | No | string | "const" | **TRACING_TYPE** 为 **Jaeger** 时,链路追踪采样类型 | +| TRACING_SAMPLER_PARAM | No | int | 1 | **TRACING_TYPE** 为 **Jaeger** 时,链路追踪采样频率 | +| TRACING_LOG_PAYLOAD | No | bool | False | **TRACING_TYPE** 为 **Jaeger** 时,链路追踪是否采集 Payload | ### 日志 | Name | Required | Type | Default Value | Explanation | @@ -128,5 +130,6 @@ pytest --cov-report html:cov_html --cov=mishards ### 路由 | Name | Required | Type | Default Value | Explanation | | --------------------------- | -------- | -------- | ------------- | ------------- | -| ROUTER_CLASS_NAME | No | string | FileBasedHashRingRouter | 处理请求路由类名, 可注册自定义类 | +| ROUTER_PLUGIN_PATH | No | string | - | 用户自定义路由插件搜索路径,默认使用系统搜索路径| +| ROUTER_CLASS_NAME | No | string | FileBasedHashRingRouter | 处理请求路由类名, 可注册自定义类。目前系统只提供了类 **FileBasedHashRingRouter** | | ROUTER_CLASS_TEST_NAME | No | string | FileBasedHashRingRouter | 测试环境下处理请求路由类名, 可注册自定义类 | From 3403dcc5a88fadfa9d32c797667a42a40e14f9ea Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 28 Oct 2019 11:11:39 +0800 Subject: [PATCH 112/126] update kubernetes demo for changes --- shards/kubernetes_demo/milvus_configmap.yaml | 185 +++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 shards/kubernetes_demo/milvus_configmap.yaml diff --git a/shards/kubernetes_demo/milvus_configmap.yaml b/shards/kubernetes_demo/milvus_configmap.yaml new file mode 100644 index 0000000000..cb751c02f1 --- /dev/null +++ b/shards/kubernetes_demo/milvus_configmap.yaml @@ -0,0 +1,185 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: milvus-mysql-configmap + namespace: milvus +data: + milvus_mysql_config.yml: | + [mysqld] + pid-file = /var/run/mysqld/mysqld.pid + socket = /var/run/mysqld/mysqld.sock + datadir = /data + log-error = /var/log/mysql/error.log # mount out to host + # By default we only accept connections from localhost + bind-address = 0.0.0.0 + # Disabling symbolic-links is recommended to prevent assorted security risks + symbolic-links=0 + character-set-server = utf8mb4 + collation-server = utf8mb4_unicode_ci + init_connect='SET NAMES utf8mb4' + skip-character-set-client-handshake = true + max_connections = 1000 + wait_timeout = 31536000 + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: milvus-proxy-configmap + namespace: milvus +data: + milvus_proxy_config.yml: | + DEBUG=True + TESTING=False + + WOSERVER=tcp://milvus-wo-servers:19530 + SERVER_PORT=19530 + + DISCOVERY_CLASS_NAME=kubernetes + DISCOVERY_KUBERNETES_NAMESPACE=milvus + DISCOVERY_KUBERNETES_POD_PATT=.*-ro-servers-.* + DISCOVERY_KUBERNETES_LABEL_SELECTOR=tier=ro-servers + DISCOVERY_KUBERNETES_POLL_INTERVAL=10 + DISCOVERY_KUBERNETES_IN_CLUSTER=True + + SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:milvusroot@milvus-mysql:3306/milvus?charset=utf8mb4 + SQLALCHEMY_POOL_SIZE=50 + SQLALCHEMY_POOL_RECYCLE=7200 + + LOG_PATH=/var/log/milvus + TIMEZONE=Asia/Shanghai +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: milvus-roserver-configmap + namespace: milvus +data: + config.yml: | + server_config: + address: 0.0.0.0 + port: 19530 + mode: cluster_readonly + + db_config: + primary_path: /var/milvus + backend_url: mysql://root:milvusroot@milvus-mysql:3306/milvus + insert_buffer_size: 2 + + metric_config: + enable_monitor: off # true is on, false is off + + cache_config: + cpu_cache_capacity: 12 # memory pool to hold index data, unit: GB + cpu_cache_free_percent: 0.85 + insert_cache_immediately: false + # gpu_cache_capacity: 4 + # gpu_cache_free_percent: 0.85 + # gpu_ids: + # - 0 + + engine_config: + use_blas_threshold: 800 + + resource_config: + search_resources: + - gpu0 + + log.conf: | + * GLOBAL: + FORMAT = "%datetime | %level | %logger | %msg" + FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-global.log" + ENABLED = true + TO_FILE = true + TO_STANDARD_OUTPUT = true + SUBSECOND_PRECISION = 3 + PERFORMANCE_TRACKING = false + MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB + * DEBUG: + FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-debug.log" + ENABLED = true + * WARNING: + FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-warning.log" + * TRACE: + FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-trace.log" + * VERBOSE: + FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" + TO_FILE = true + TO_STANDARD_OUTPUT = true + ## Error logs + * ERROR: + ENABLED = true + FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-error.log" + * FATAL: + ENABLED = true + FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-fatal.log" + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: milvus-woserver-configmap + namespace: milvus +data: + config.yml: | + server_config: + address: 0.0.0.0 + port: 19530 + mode: cluster_writable + + db_config: + primary_path: /var/milvus + backend_url: mysql://root:milvusroot@milvus-mysql:3306/milvus + insert_buffer_size: 2 + + metric_config: + enable_monitor: off # true is on, false is off + + cache_config: + cpu_cache_capacity: 2 # memory pool to hold index data, unit: GB + cpu_cache_free_percent: 0.85 + insert_cache_immediately: false + # gpu_cache_capacity: 4 + # gpu_cache_free_percent: 0.85 + # gpu_ids: + # - 0 + + engine_config: + use_blas_threshold: 800 + + resource_config: + search_resources: + - gpu0 + + + log.conf: | + * GLOBAL: + FORMAT = "%datetime | %level | %logger | %msg" + FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-global.log" + ENABLED = true + TO_FILE = true + TO_STANDARD_OUTPUT = true + SUBSECOND_PRECISION = 3 + PERFORMANCE_TRACKING = false + MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB + * DEBUG: + FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-debug.log" + ENABLED = true + * WARNING: + FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-warning.log" + * TRACE: + FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-trace.log" + * VERBOSE: + FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" + TO_FILE = true + TO_STANDARD_OUTPUT = true + ## Error logs + * ERROR: + ENABLED = true + FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-error.log" + * FATAL: + ENABLED = true + FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-fatal.log" From 4167cecc9f7bb07be61d2a363e2c2eb3d8ef18b0 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 28 Oct 2019 11:19:46 +0800 Subject: [PATCH 113/126] changes for code style check --- shards/mishards/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shards/mishards/__init__.py b/shards/mishards/__init__.py index b4c51cc4f5..a3c55c4ae3 100644 --- a/shards/mishards/__init__.py +++ b/shards/mishards/__init__.py @@ -23,8 +23,8 @@ def create_app(testing_config=None): from mishards.grpc_utils import GrpcSpanDecorator from tracer.factory import TracerFactory tracer = TracerFactory(config.TRACER_PLUGIN_PATH).create(config.TRACER_CLASS_NAME, - plugin_config=settings.TracingConfig, - span_decorator=GrpcSpanDecorator()) + plugin_config=settings.TracingConfig, + span_decorator=GrpcSpanDecorator()) from mishards.router.factory import RouterFactory router = RouterFactory(config.ROUTER_PLUGIN_PATH).create(config.ROUTER_CLASS_NAME, From 2205d0129e3f842b2eeca6cd2fbf8ebdd3d75fd8 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 31 Oct 2019 10:47:46 +0800 Subject: [PATCH 114/126] (mishards): add Makefile --- shards/Makefile | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 shards/Makefile diff --git a/shards/Makefile b/shards/Makefile new file mode 100644 index 0000000000..8c351f05e8 --- /dev/null +++ b/shards/Makefile @@ -0,0 +1,15 @@ +build: + docker build --network=host -t milvusdb/mishards . +push: + docker push milvusdb/mishards +pull: + docker pull milvusdb/mishards +deploy: + cd all_in_one && docker-compose -f all_in_one.yml up -d && cd - +clean: + rm -rf cov_html + cd all_in_one && docker-compose -f all_in_one.yml down && cd - +check_style: + pycodestyle --config=. +make test: + pytest --cov-report html:cov_html --cov=mishards From 084215b2489d98756943bb9bd65d462392f91294 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 31 Oct 2019 11:09:40 +0800 Subject: [PATCH 115/126] (mishards): update for makefile --- shards/Makefile | 12 ++++++++---- shards/Tutorial_CN.md | 15 +++++++++++---- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/shards/Makefile b/shards/Makefile index 8c351f05e8..7ad724ec4c 100644 --- a/shards/Makefile +++ b/shards/Makefile @@ -6,10 +6,14 @@ pull: docker pull milvusdb/mishards deploy: cd all_in_one && docker-compose -f all_in_one.yml up -d && cd - -clean: - rm -rf cov_html +clean_deploy: cd all_in_one && docker-compose -f all_in_one.yml down && cd - -check_style: +clean_coverage: + rm -rf cov_html +clean: clean_coverage clean_deploy +style: pycodestyle --config=. -make test: +coverage: pytest --cov-report html:cov_html --cov=mishards +test: + pytest diff --git a/shards/Tutorial_CN.md b/shards/Tutorial_CN.md index ef82342c6a..0c44897aea 100644 --- a/shards/Tutorial_CN.md +++ b/shards/Tutorial_CN.md @@ -24,8 +24,8 @@ Milvus 旨在帮助用户实现海量非结构化数据的近似检索和分析 **启动** ``` 1. 安装docker-compose -1. cd milvus/shards/all_in_one -2. docker-compose -f all_in_one.yml up -d #监听19531端口 +2. make deploy #监听19531端口 +3. make clean_deploy #清理服务 ``` **打开Jaeger UI** @@ -70,12 +70,19 @@ kubectl logs -f --tail=1000 -n milvus milvus-ro-servers-0 查看计算节点milv **启动单元测试** ``` 1. cd milvus/shards -2. pytest +2. make test ``` **单元测试覆盖率** ``` -pytest --cov-report html:cov_html --cov=mishards +1. cd milvus/shards +2. make coverage +``` + +**代码风格检查** +``` +1. cd milvus/shards +2. make style ``` ## mishards配置详解 From f05f7b94df26c55015201e3441dc704bcd02373f Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 31 Oct 2019 11:11:41 +0800 Subject: [PATCH 116/126] (mishards): update for tutorial and all_in_one yml --- shards/Tutorial_CN.md | 5 +++-- shards/start_services.yml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/shards/Tutorial_CN.md b/shards/Tutorial_CN.md index 0c44897aea..261fa64c8c 100644 --- a/shards/Tutorial_CN.md +++ b/shards/Tutorial_CN.md @@ -24,8 +24,9 @@ Milvus 旨在帮助用户实现海量非结构化数据的近似检索和分析 **启动** ``` 1. 安装docker-compose -2. make deploy #监听19531端口 -3. make clean_deploy #清理服务 +2. make build +3. make deploy #监听19531端口 +4. make clean_deploy #清理服务 ``` **打开Jaeger UI** diff --git a/shards/start_services.yml b/shards/start_services.yml index 286230feeb..95acdd045e 100644 --- a/shards/start_services.yml +++ b/shards/start_services.yml @@ -21,7 +21,7 @@ services: mishards: restart: always - image: registry.zilliz.com/milvus/mishards:v0.0.4 + image: milvusdb/mishards ports: - "0.0.0.0:19530:19531" - "0.0.0.0:19532:19532" From 4af986acd47417849d1ed702458c5ed189119c97 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 4 Nov 2019 09:35:49 +0800 Subject: [PATCH 117/126] (shards): remove build.sh From 58a31cfe22341d28b4e7d183291683b034e7156b Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 4 Nov 2019 11:15:07 +0800 Subject: [PATCH 118/126] (shards): update makefile --- shards/Makefile | 18 +++++++++++++++++- shards/all_in_one/probe_test.py | 25 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 shards/all_in_one/probe_test.py diff --git a/shards/Makefile b/shards/Makefile index 7ad724ec4c..b1cdecdce2 100644 --- a/shards/Makefile +++ b/shards/Makefile @@ -1,13 +1,29 @@ +HOST=$(or $(host),127.0.0.1) +PORT=$(or $(port),19530) + build: docker build --network=host -t milvusdb/mishards . push: docker push milvusdb/mishards pull: docker pull milvusdb/mishards -deploy: +deploy: clean_deploy cd all_in_one && docker-compose -f all_in_one.yml up -d && cd - clean_deploy: cd all_in_one && docker-compose -f all_in_one.yml down && cd - +probe_deploy: + docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c "python all_in_one/probe_test.py" +cluster: + cd kubernetes_demo;./start.sh baseup;./start.sh appup;cd - +clean_cluster: + cd kubernetes_demo;./start.sh cleanup;cd - +cluster_status: + kubectl get pods -n milvus -o wide +probe_cluster: + @echo + $(shell kubectl get service -n milvus | grep milvus-proxy-servers | awk {'print $$4,$$5'} | awk -F"[: ]" {'print "docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c \"python all_in_one/probe_test.py --port="$$2" --host="$$1"\""'}) +probe: + docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c "python all_in_one/probe_test.py --port=${PORT} --host=${HOST}" clean_coverage: rm -rf cov_html clean: clean_coverage clean_deploy diff --git a/shards/all_in_one/probe_test.py b/shards/all_in_one/probe_test.py new file mode 100644 index 0000000000..6250465910 --- /dev/null +++ b/shards/all_in_one/probe_test.py @@ -0,0 +1,25 @@ +from milvus import Milvus + +RED = '\033[0;31m' +GREEN = '\033[0;32m' +ENDC = '' + + +def test(host='127.0.0.1', port=19531): + client = Milvus() + try: + status = client.connect(host=host, port=port) + if status.OK(): + print('{}Pass: Connected{}'.format(GREEN, ENDC)) + return 0 + else: + print('{}Error: {}{}'.format(RED, status, ENDC)) + return 1 + except Exception as exc: + print('{}Error: {}{}'.format(RED, exc, ENDC)) + return 1 + + +if __name__ == '__main__': + import fire + fire.Fire(test) From 13c445ccaa2a84369f0d9068b10f063cc19a7e2e Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 4 Nov 2019 11:33:34 +0800 Subject: [PATCH 119/126] (shards): update makefile for cluster --- shards/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shards/Makefile b/shards/Makefile index b1cdecdce2..a71ef6a70c 100644 --- a/shards/Makefile +++ b/shards/Makefile @@ -14,7 +14,7 @@ clean_deploy: probe_deploy: docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c "python all_in_one/probe_test.py" cluster: - cd kubernetes_demo;./start.sh baseup;./start.sh appup;cd - + cd kubernetes_demo;./start.sh baseup;sleep 10;./start.sh appup;cd - clean_cluster: cd kubernetes_demo;./start.sh cleanup;cd - cluster_status: From 275462eaf93c69d7f4ce01205a8ed1d996f7ebf0 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 4 Nov 2019 11:33:51 +0800 Subject: [PATCH 120/126] (shards): update cn doc --- shards/Tutorial_CN.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/shards/Tutorial_CN.md b/shards/Tutorial_CN.md index 261fa64c8c..192a0fd285 100644 --- a/shards/Tutorial_CN.md +++ b/shards/Tutorial_CN.md @@ -14,8 +14,8 @@ Milvus 旨在帮助用户实现海量非结构化数据的近似检索和分析 3. nvidia-docker run --rm -d -p 19530:19530 -v /tmp/milvus/db:/opt/milvus/db milvusdb/milvus:0.5.0-d102119-ede20b 4. sudo chown -R $USER:$USER /tmp/milvus 5. cp mishards/.env.example mishards/.env -6 -7. 在python mishards/main.py #.env配置mishards监听19532端口 +6. 在python mishards/main.py #.env配置mishards监听19532端口 +7. make probe port=19532 #健康检查 ``` ### 容器启动实例 @@ -23,10 +23,12 @@ Milvus 旨在帮助用户实现海量非结构化数据的近似检索和分析 **启动** ``` +cd milvus/shards 1. 安装docker-compose 2. make build 3. make deploy #监听19531端口 4. make clean_deploy #清理服务 +5. make probe_deplopy #健康检查 ``` **打开Jaeger UI** @@ -45,19 +47,21 @@ Milvus 旨在帮助用户实现海量非结构化数据的近似检索和分析 **步骤** ``` -1. cd milvus/shards/kubernetes_demo/ -2. ./start.sh allup -3. watch -n 1 kubectl get pods -n milvus -o wide 查看所有pod状态,等待所有pod都处于Runing状态 -4. kubectl get service -n milvus 查看milvus-proxy-servers的EXTERNAL-IP和PORT, 这就是mishards集群的服务地址 +cd milvus/shards +1. make deploy_cluster #启动集群 +2. make probe_cluster #健康检查 +3. make clean_cluster #关闭集群 ``` **扩容计算实例** ``` +cd milvus/shards/kubernetes_demo/ ./start.sh scale-ro-server 2 扩容计算实例到2 ``` **扩容代理器实例** ``` +cd milvus/shards/kubernetes_demo/ ./start.sh scale-proxy 2 扩容代理服务器实例到2 ``` From 013566dec4af677d419dba4c487ea86f40fbf8c2 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Mon, 4 Nov 2019 11:34:58 +0800 Subject: [PATCH 121/126] (shards): clean cluster --- shards/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shards/Makefile b/shards/Makefile index a71ef6a70c..c8aa6127f8 100644 --- a/shards/Makefile +++ b/shards/Makefile @@ -26,7 +26,7 @@ probe: docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c "python all_in_one/probe_test.py --port=${PORT} --host=${HOST}" clean_coverage: rm -rf cov_html -clean: clean_coverage clean_deploy +clean: clean_coverage clean_deploy clean_cluster style: pycodestyle --config=. coverage: From df8a018549785b5f7192367f5f183264460fafa5 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Wed, 6 Nov 2019 17:01:16 +0800 Subject: [PATCH 122/126] (shards): all all missing changes after pick-cherry from xupeng's branch --- shards/.dockerignore | 13 + shards/all_in_one/all_in_one.yml | 53 +++ shards/all_in_one/ro_server.yml | 41 ++ shards/kubernetes_demo/README.md | 107 +++++ shards/kubernetes_demo/milvus_auxiliary.yaml | 67 ++++ shards/kubernetes_demo/milvus_data_pvc.yaml | 57 +++ shards/kubernetes_demo/milvus_proxy.yaml | 88 +++++ shards/kubernetes_demo/milvus_rbac.yaml | 24 ++ .../milvus_stateful_servers.yaml | 68 ++++ .../kubernetes_demo/milvus_write_servers.yaml | 70 ++++ shards/kubernetes_demo/start.sh | 368 ++++++++++++++++++ shards/manager.py | 12 - shards/mishards/.env.example | 2 +- shards/mishards/settings.py | 13 +- 14 files changed, 958 insertions(+), 25 deletions(-) create mode 100644 shards/.dockerignore create mode 100644 shards/all_in_one/all_in_one.yml create mode 100644 shards/all_in_one/ro_server.yml create mode 100644 shards/kubernetes_demo/README.md create mode 100644 shards/kubernetes_demo/milvus_auxiliary.yaml create mode 100644 shards/kubernetes_demo/milvus_data_pvc.yaml create mode 100644 shards/kubernetes_demo/milvus_proxy.yaml create mode 100644 shards/kubernetes_demo/milvus_rbac.yaml create mode 100644 shards/kubernetes_demo/milvus_stateful_servers.yaml create mode 100644 shards/kubernetes_demo/milvus_write_servers.yaml create mode 100755 shards/kubernetes_demo/start.sh diff --git a/shards/.dockerignore b/shards/.dockerignore new file mode 100644 index 0000000000..e450610057 --- /dev/null +++ b/shards/.dockerignore @@ -0,0 +1,13 @@ +.git +.gitignore +.env +.coverage +.dockerignore +cov_html/ + +.pytest_cache +__pycache__ +*/__pycache__ +*.md +*.yml +*.yaml diff --git a/shards/all_in_one/all_in_one.yml b/shards/all_in_one/all_in_one.yml new file mode 100644 index 0000000000..40473fe8b9 --- /dev/null +++ b/shards/all_in_one/all_in_one.yml @@ -0,0 +1,53 @@ +version: "2.3" +services: + milvus_wr: + runtime: nvidia + restart: always + image: milvusdb/milvus:0.5.0-d102119-ede20b + volumes: + - /tmp/milvus/db:/opt/milvus/db + + milvus_ro: + runtime: nvidia + restart: always + image: milvusdb/milvus:0.5.0-d102119-ede20b + volumes: + - /tmp/milvus/db:/opt/milvus/db + - ./ro_server.yml:/opt/milvus/conf/server_config.yaml + + jaeger: + restart: always + image: jaegertracing/all-in-one:1.14 + ports: + - "0.0.0.0:5775:5775/udp" + - "0.0.0.0:16686:16686" + - "0.0.0.0:9441:9441" + environment: + COLLECTOR_ZIPKIN_HTTP_PORT: 9411 + + mishards: + restart: always + image: milvusdb/mishards + ports: + - "0.0.0.0:19531:19531" + - "0.0.0.0:19532:19532" + volumes: + - /tmp/milvus/db:/tmp/milvus/db + # - /tmp/mishards_env:/source/mishards/.env + command: ["python", "mishards/main.py"] + environment: + FROM_EXAMPLE: 'true' + DEBUG: 'true' + SERVER_PORT: 19531 + WOSERVER: tcp://milvus_wr:19530 + DISCOVERY_PLUGIN_PATH: static + DISCOVERY_STATIC_HOSTS: milvus_wr,milvus_ro + TRACER_CLASS_NAME: jaeger + TRACING_SERVICE_NAME: mishards-demo + TRACING_REPORTING_HOST: jaeger + TRACING_REPORTING_PORT: 5775 + + depends_on: + - milvus_wr + - milvus_ro + - jaeger diff --git a/shards/all_in_one/ro_server.yml b/shards/all_in_one/ro_server.yml new file mode 100644 index 0000000000..10cf695448 --- /dev/null +++ b/shards/all_in_one/ro_server.yml @@ -0,0 +1,41 @@ +server_config: + address: 0.0.0.0 # milvus server ip address (IPv4) + port: 19530 # port range: 1025 ~ 65534 + deploy_mode: cluster_readonly # deployment type: single, cluster_readonly, cluster_writable + time_zone: UTC+8 + +db_config: + primary_path: /opt/milvus # path used to store data and meta + secondary_path: # path used to store data only, split by semicolon + + backend_url: sqlite://:@:/ # URI format: dialect://username:password@host:port/database + # Keep 'dialect://:@:/', and replace other texts with real values + # Replace 'dialect' with 'mysql' or 'sqlite' + + insert_buffer_size: 4 # GB, maximum insert buffer size allowed + # sum of insert_buffer_size and cpu_cache_capacity cannot exceed total memory + + preload_table: # preload data at startup, '*' means load all tables, empty value means no preload + # you can specify preload tables like this: table1,table2,table3 + +metric_config: + enable_monitor: false # enable monitoring or not + collector: prometheus # prometheus + prometheus_config: + port: 8080 # port prometheus uses to fetch metrics + +cache_config: + cpu_cache_capacity: 16 # GB, CPU memory used for cache + cpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered + gpu_cache_capacity: 4 # GB, GPU memory used for cache + gpu_cache_threshold: 0.85 # percentage of data that will be kept when cache cleanup is triggered + cache_insert_data: false # whether to load inserted data into cache + +engine_config: + use_blas_threshold: 20 # if nq < use_blas_threshold, use SSE, faster with fluctuated response times + # if nq >= use_blas_threshold, use OpenBlas, slower with stable response times + +resource_config: + search_resources: # define the GPUs used for search computation, valid value: gpux + - gpu0 + index_build_device: gpu0 # GPU used for building index diff --git a/shards/kubernetes_demo/README.md b/shards/kubernetes_demo/README.md new file mode 100644 index 0000000000..933fcd56a8 --- /dev/null +++ b/shards/kubernetes_demo/README.md @@ -0,0 +1,107 @@ +This document is a gentle introduction to Milvus Cluster, that does not use complex to understand distributed systems concepts. It provides instructions about how to setup a cluster, test, and operate it, without going into the details that are covered in the Milvus Cluster specification but just describing how the system behaves from the point of view of the user. + +However this tutorial tries to provide information about the availability and consistency characteristics of Milvus Cluster from the point of view of the final user, stated in a simple to understand way. + +If you plan to run a serious Milvus Cluster deployment, the more formal specification is a suggested reading, even if not strictly required. However it is a good idea to start from this document, play with Milvus Cluster some time, and only later read the specification. + +## Milvus Cluster Introduction +### Infrastructure +* Kubenetes Cluster With Nvida GPU Node +* Install Nvida Docker in Cluster + +### Requried Docker Registry +* Milvus Server: ```registry.zilliz.com/milvus/engine:${version>=0.3.1}``` +* Milvus Celery Apps: ```registry.zilliz.com/milvus/celery-apps:${version>=v0.2.1}``` + +### Cluster Ability +* Milvus Cluster provides a way to run a Milvus installation where query requests are automatically sharded across multiple milvus readonly nodes. +* Milvus Cluster provides availability during partitions, that is in pratical terms the ability to continue the operations when some nodes fail or are not able to communicate. + +### Metastore +Milvus supports 2 backend databases for deployment: +* Splite3: Single mode only. +* MySQL: Single/Cluster mode +* ETCD: `TODO` + +### Storage +Milvus supports 2 backend storage for deployment: +* Local filesystem: Convenient for use and deployment but not reliable. +* S3 OOS: Reliable: Need extra configuration. Need external storage service. + +### Message Queue +Milvus supports various MQ backend for deployment: +* Redis +* Rabbitmq +* MySQL/PG/MongoDB + +### Cache +* Milvus supports `Redis` as Cache backend for deployment. To reduce the system complexity, we recommend to use `Redis` as MQ backend. + +### Workflow +* Milvus Cluster use Celery as workflow scheduler. +* Milvus Cluster workflow calculation node can be scaled. +* Milvus Cluster only contains 1 worflow monitor node. Monitor node detects caculation nodes status and provides decision for work scheduling. +* Milvus Cluster supports different workflow result backend and we recommend to use `Redis` as result backend for performance consideration. + +### Writeonly Node +* Milvus can be configured in write-only mode. +* Right now Milvus Cluster only provide 1 write-only node. + +### Readonly Node +* Milvus can be configured in readonly mode. +* Milvus Cluster automatically shard incoming query requests across multiple readonly nodes. +* Milvus Cluster supports readonly nodes scaling. +* Milvus Cluster provides pratical solution to avoid performance degradation during cluster rebalance. + +### Proxy +* Milvus Cluster communicates with clients by proxy. +* Milvus Cluster supports proxy scaling. + +### Monitor +* Milvus Cluster suports metrics monitoring by prometheus. +* Milvus Cluster suports workflow tasks monitoring by flower. +* Milvus Cluster suports cluster monitoring by all kubernetes ecosystem monitoring tools. + +## Milvus Cluster Kubernetes Resources +### PersistentVolumeClaim +* LOG PersistentVolume: `milvus-log-disk` + +### ConfigMap +* Celery workflow configmap: `milvus-celery-configmap`::`milvus_celery_config.yml` +* Proxy configmap: `milvus-proxy-configmap`::`milvus_proxy_config.yml` +* Readonly nodes configmap: `milvus-roserver-configmap`::`config.yml`, `milvus-roserver-configmap`::`log.conf` +* Write-only nodes configmap: `milvus-woserver-configmap`::`config.yml`, `milvus-woserver-configmap`::`log.conf` + +### Services +* Mysql service: `milvus-mysql` +* Redis service: `milvus-redis` +* Rroxy service: `milvus-proxy-servers` +* Write-only servers service: `milvus-wo-servers` + +### StatefulSet +* Readonly stateful servers: `milvus-ro-servers` + +### Deployment +* Worflow monitor: `milvus-monitor` +* Worflow workers: `milvus-workers` +* Write-only servers: `milvus-wo-servers` +* Proxy: `milvus-proxy` + +## Milvus Cluster Configuration +### Write-only server: +```milvus-woserver-configmap::config.yml: + server_config.mode: cluster + db_config.db_backend_url: mysql://${user}:${password}@milvus-mysql/${dbname} +``` +### Readonly server: +```milvus-roserver-configmap::config.yml: + server_config.mode: read_only + db_config.db_backend_url: mysql://\${user}:${password}@milvus-mysql/${dbname} +``` +### Celery workflow: +```milvus-celery-configmap::milvus_celery_config.yml: + DB_URI=mysql+mysqlconnector://${user}:${password}@milvus-mysql/${dbname} +``` +### Proxy workflow: +```milvus-proxy-configmap::milvus_proxy_config.yml: +``` diff --git a/shards/kubernetes_demo/milvus_auxiliary.yaml b/shards/kubernetes_demo/milvus_auxiliary.yaml new file mode 100644 index 0000000000..fff27adc6f --- /dev/null +++ b/shards/kubernetes_demo/milvus_auxiliary.yaml @@ -0,0 +1,67 @@ +kind: Service +apiVersion: v1 +metadata: + name: milvus-mysql + namespace: milvus +spec: + type: ClusterIP + selector: + app: milvus + tier: mysql + ports: + - protocol: TCP + port: 3306 + targetPort: 3306 + name: mysql + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: milvus-mysql + namespace: milvus +spec: + selector: + matchLabels: + app: milvus + tier: mysql + replicas: 1 + template: + metadata: + labels: + app: milvus + tier: mysql + spec: + containers: + - name: milvus-mysql + image: mysql:5.7 + imagePullPolicy: IfNotPresent + # lifecycle: + # postStart: + # exec: + # command: ["/bin/sh", "-c", "mysql -h milvus-mysql -uroot -p${MYSQL_ROOT_PASSWORD} -e \"CREATE DATABASE IF NOT EXISTS ${DATABASE};\"; \ + # mysql -uroot -p${MYSQL_ROOT_PASSWORD} -e \"GRANT ALL PRIVILEGES ON ${DATABASE}.* TO 'root'@'%';\""] + env: + - name: MYSQL_ROOT_PASSWORD + value: milvusroot + - name: DATABASE + value: milvus + ports: + - name: mysql-port + containerPort: 3306 + volumeMounts: + - name: milvus-mysql-disk + mountPath: /data + subPath: mysql + - name: milvus-mysql-configmap + mountPath: /etc/mysql/mysql.conf.d/mysqld.cnf + subPath: milvus_mysql_config.yml + + volumes: + - name: milvus-mysql-disk + persistentVolumeClaim: + claimName: milvus-mysql-disk + - name: milvus-mysql-configmap + configMap: + name: milvus-mysql-configmap diff --git a/shards/kubernetes_demo/milvus_data_pvc.yaml b/shards/kubernetes_demo/milvus_data_pvc.yaml new file mode 100644 index 0000000000..480354507d --- /dev/null +++ b/shards/kubernetes_demo/milvus_data_pvc.yaml @@ -0,0 +1,57 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: milvus-db-disk + namespace: milvus +spec: + accessModes: + - ReadWriteMany + storageClassName: default + resources: + requests: + storage: 50Gi + +--- + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: milvus-log-disk + namespace: milvus +spec: + accessModes: + - ReadWriteMany + storageClassName: default + resources: + requests: + storage: 50Gi + +--- + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: milvus-mysql-disk + namespace: milvus +spec: + accessModes: + - ReadWriteMany + storageClassName: default + resources: + requests: + storage: 50Gi + +--- + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: milvus-redis-disk + namespace: milvus +spec: + accessModes: + - ReadWriteOnce + storageClassName: default + resources: + requests: + storage: 5Gi diff --git a/shards/kubernetes_demo/milvus_proxy.yaml b/shards/kubernetes_demo/milvus_proxy.yaml new file mode 100644 index 0000000000..13916b7b2b --- /dev/null +++ b/shards/kubernetes_demo/milvus_proxy.yaml @@ -0,0 +1,88 @@ +kind: Service +apiVersion: v1 +metadata: + name: milvus-proxy-servers + namespace: milvus +spec: + type: LoadBalancer + selector: + app: milvus + tier: proxy + ports: + - name: tcp + protocol: TCP + port: 19530 + targetPort: 19530 + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: milvus-proxy + namespace: milvus +spec: + selector: + matchLabels: + app: milvus + tier: proxy + replicas: 1 + template: + metadata: + labels: + app: milvus + tier: proxy + spec: + containers: + - name: milvus-proxy + image: milvusdb/mishards:0.1.0-rc0 + imagePullPolicy: Always + command: ["python", "mishards/main.py"] + resources: + limits: + memory: "3Gi" + cpu: "4" + requests: + memory: "2Gi" + ports: + - name: tcp + containerPort: 5000 + env: + # - name: SQL_ECHO + # value: "True" + - name: DEBUG + value: "False" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MILVUS_CLIENT + value: "False" + - name: LOG_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: LOG_PATH + value: /var/log/milvus + - name: SD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: SD_ROSERVER_POD_PATT + value: ".*-ro-servers-.*" + volumeMounts: + - name: milvus-proxy-configmap + mountPath: /source/mishards/.env + subPath: milvus_proxy_config.yml + - name: milvus-log-disk + mountPath: /var/log/milvus + subPath: proxylog + # imagePullSecrets: + # - name: regcred + volumes: + - name: milvus-proxy-configmap + configMap: + name: milvus-proxy-configmap + - name: milvus-log-disk + persistentVolumeClaim: + claimName: milvus-log-disk diff --git a/shards/kubernetes_demo/milvus_rbac.yaml b/shards/kubernetes_demo/milvus_rbac.yaml new file mode 100644 index 0000000000..e6f302be15 --- /dev/null +++ b/shards/kubernetes_demo/milvus_rbac.yaml @@ -0,0 +1,24 @@ +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pods-list +rules: +- apiGroups: [""] + resources: ["pods", "events"] + verbs: ["list", "get", "watch"] + +--- + +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pods-list +subjects: +- kind: ServiceAccount + name: default + namespace: milvus +roleRef: + kind: ClusterRole + name: pods-list + apiGroup: rbac.authorization.k8s.io +--- diff --git a/shards/kubernetes_demo/milvus_stateful_servers.yaml b/shards/kubernetes_demo/milvus_stateful_servers.yaml new file mode 100644 index 0000000000..4ff5045599 --- /dev/null +++ b/shards/kubernetes_demo/milvus_stateful_servers.yaml @@ -0,0 +1,68 @@ +kind: Service +apiVersion: v1 +metadata: + name: milvus-ro-servers + namespace: milvus +spec: + type: ClusterIP + selector: + app: milvus + tier: ro-servers + ports: + - protocol: TCP + port: 19530 + targetPort: 19530 + +--- + +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: milvus-ro-servers + namespace: milvus +spec: + serviceName: "milvus-ro-servers" + replicas: 1 + template: + metadata: + labels: + app: milvus + tier: ro-servers + spec: + terminationGracePeriodSeconds: 11 + containers: + - name: milvus-ro-server + image: milvusdb/milvus:0.5.0-d102119-ede20b + imagePullPolicy: Always + ports: + - containerPort: 19530 + resources: + limits: + memory: "16Gi" + cpu: "8.0" + requests: + memory: "14Gi" + volumeMounts: + - name: milvus-db-disk + mountPath: /var/milvus + subPath: dbdata + - name: milvus-roserver-configmap + mountPath: /opt/milvus/conf/server_config.yaml + subPath: config.yml + - name: milvus-roserver-configmap + mountPath: /opt/milvus/conf/log_config.conf + subPath: log.conf + # imagePullSecrets: + # - name: regcred + # tolerations: + # - key: "worker" + # operator: "Equal" + # value: "performance" + # effect: "NoSchedule" + volumes: + - name: milvus-roserver-configmap + configMap: + name: milvus-roserver-configmap + - name: milvus-db-disk + persistentVolumeClaim: + claimName: milvus-db-disk diff --git a/shards/kubernetes_demo/milvus_write_servers.yaml b/shards/kubernetes_demo/milvus_write_servers.yaml new file mode 100644 index 0000000000..6aec4b0373 --- /dev/null +++ b/shards/kubernetes_demo/milvus_write_servers.yaml @@ -0,0 +1,70 @@ +kind: Service +apiVersion: v1 +metadata: + name: milvus-wo-servers + namespace: milvus +spec: + type: ClusterIP + selector: + app: milvus + tier: wo-servers + ports: + - protocol: TCP + port: 19530 + targetPort: 19530 + +--- + +apiVersion: apps/v1beta1 +kind: Deployment +metadata: + name: milvus-wo-servers + namespace: milvus +spec: + selector: + matchLabels: + app: milvus + tier: wo-servers + replicas: 1 + template: + metadata: + labels: + app: milvus + tier: wo-servers + spec: + containers: + - name: milvus-wo-server + image: milvusdb/milvus:0.5.0-d102119-ede20b + imagePullPolicy: Always + ports: + - containerPort: 19530 + resources: + limits: + memory: "5Gi" + cpu: "1.0" + requests: + memory: "4Gi" + volumeMounts: + - name: milvus-db-disk + mountPath: /var/milvus + subPath: dbdata + - name: milvus-woserver-configmap + mountPath: /opt/milvus/conf/server_config.yaml + subPath: config.yml + - name: milvus-woserver-configmap + mountPath: /opt/milvus/conf/log_config.conf + subPath: log.conf + # imagePullSecrets: + # - name: regcred + # tolerations: + # - key: "worker" + # operator: "Equal" + # value: "performance" + # effect: "NoSchedule" + volumes: + - name: milvus-woserver-configmap + configMap: + name: milvus-woserver-configmap + - name: milvus-db-disk + persistentVolumeClaim: + claimName: milvus-db-disk diff --git a/shards/kubernetes_demo/start.sh b/shards/kubernetes_demo/start.sh new file mode 100755 index 0000000000..7441aa5d70 --- /dev/null +++ b/shards/kubernetes_demo/start.sh @@ -0,0 +1,368 @@ +#!/bin/bash + +UL=`tput smul` +NOUL=`tput rmul` +BOLD=`tput bold` +NORMAL=`tput sgr0` +RED='\033[0;31m' +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +ENDC='\033[0m' + +function showHelpMessage () { + echo -e "${BOLD}Usage:${NORMAL} ${RED}$0${ENDC} [option...] {cleanup${GREEN}|${ENDC}baseup${GREEN}|${ENDC}appup${GREEN}|${ENDC}appdown${GREEN}|${ENDC}allup}" >&2 + echo + echo " -h, --help show help message" + echo " ${BOLD}cleanup, delete all resources${NORMAL}" + echo " ${BOLD}baseup, start all required base resources${NORMAL}" + echo " ${BOLD}appup, start all pods${NORMAL}" + echo " ${BOLD}appdown, remove all pods${NORMAL}" + echo " ${BOLD}allup, start all base resources and pods${NORMAL}" + echo " ${BOLD}scale-proxy, scale proxy${NORMAL}" + echo " ${BOLD}scale-ro-server, scale readonly servers${NORMAL}" + echo " ${BOLD}scale-worker, scale calculation workers${NORMAL}" +} + +function showscaleHelpMessage () { + echo -e "${BOLD}Usage:${NORMAL} ${RED}$0 $1${ENDC} [option...] {1|2|3|4|...}" >&2 + echo + echo " -h, --help show help message" + echo " ${BOLD}number, (int) target scale number" +} + +function PrintScaleSuccessMessage() { + echo -e "${BLUE}${BOLD}Successfully Scaled: ${1} --> ${2}${ENDC}" +} + +function PrintPodStatusMessage() { + echo -e "${BOLD}${1}${NORMAL}" +} + +timeout=60 + +function setUpMysql () { + mysqlUserName=$(kubectl describe configmap -n milvus milvus-roserver-configmap | + grep backend_url | + awk '{print $2}' | + awk '{split($0, level1, ":"); + split(level1[2], level2, "/"); + print level2[3]}') + mysqlPassword=$(kubectl describe configmap -n milvus milvus-roserver-configmap | + grep backend_url | + awk '{print $2}' | + awk '{split($0, level1, ":"); + split(level1[3], level3, "@"); + print level3[1]}') + mysqlDBName=$(kubectl describe configmap -n milvus milvus-roserver-configmap | + grep backend_url | + awk '{print $2}' | + awk '{split($0, level1, ":"); + split(level1[4], level4, "/"); + print level4[2]}') + mysqlContainer=$(kubectl get pods -n milvus | grep milvus-mysql | awk '{print $1}') + + kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "CREATE DATABASE IF NOT EXISTS $mysqlDBName;" + + checkDBExists=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l) + counter=0 + while [ $checkDBExists -lt 1 ]; do + sleep 1 + let counter=counter+1 + if [ $counter == $timeout ]; then + echo "Creating MySQL database $mysqlDBName timeout" + return 1 + fi + checkDBExists=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l) + done; + + kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "GRANT ALL PRIVILEGES ON $mysqlDBName.* TO '$mysqlUserName'@'%';" + kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "FLUSH PRIVILEGES;" + checkGrant=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l) + counter=0 + while [ $checkGrant -lt 1 ]; do + sleep 1 + let counter=counter+1 + if [ $counter == $timeout ]; then + echo "Granting all privileges on $mysqlDBName to $mysqlUserName timeout" + return 1 + fi + checkGrant=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l) + done; +} + +function checkStatefulSevers() { + stateful_replicas=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Replicas:" | awk '{print $2}') + stateful_running_pods=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Pods Status:" | awk '{print $3}') + + counter=0 + prev=$stateful_running_pods + PrintPodStatusMessage "Running milvus-ro-servers Pods: $stateful_running_pods/$stateful_replicas" + while [ $stateful_replicas != $stateful_running_pods ]; do + echo -e "${YELLOW}Wait another 1 sec --- ${counter}${ENDC}" + sleep 1; + + let counter=counter+1 + if [ $counter -eq $timeout ]; then + return 1; + fi + + stateful_running_pods=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Pods Status:" | awk '{print $3}') + if [ $stateful_running_pods -ne $prev ]; then + PrintPodStatusMessage "Running milvus-ro-servers Pods: $stateful_running_pods/$stateful_replicas" + fi + prev=$stateful_running_pods + done; + return 0; +} + +function checkDeployment() { + deployment_name=$1 + replicas=$(kubectl describe deployment -n milvus $deployment_name | grep "Replicas:" | awk '{print $2}') + running=$(kubectl get pods -n milvus | grep $deployment_name | grep Running | wc -l) + + counter=0 + prev=$running + PrintPodStatusMessage "Running $deployment_name Pods: $running/$replicas" + while [ $replicas != $running ]; do + echo -e "${YELLOW}Wait another 1 sec --- ${counter}${ENDC}" + sleep 1; + + let counter=counter+1 + if [ $counter == $timeout ]; then + return 1 + fi + + running=$(kubectl get pods -n milvus | grep "$deployment_name" | grep Running | wc -l) + if [ $running -ne $prev ]; then + PrintPodStatusMessage "Running $deployment_name Pods: $running/$replicas" + fi + prev=$running + done +} + + +function startDependencies() { + kubectl apply -f milvus_data_pvc.yaml + kubectl apply -f milvus_configmap.yaml + kubectl apply -f milvus_auxiliary.yaml + + counter=0 + while [ $(kubectl get pvc -n milvus | grep Bound | wc -l) != 4 ]; do + sleep 1; + let counter=counter+1 + if [ $counter == $timeout ]; then + echo "baseup timeout" + return 1 + fi + done + checkDeployment "milvus-mysql" +} + +function startApps() { + counter=0 + errmsg="" + echo -e "${GREEN}${BOLD}Checking required resouces...${NORMAL}${ENDC}" + while [ $counter -lt $timeout ]; do + sleep 1; + if [ $(kubectl get pvc -n milvus 2>/dev/null | grep Bound | wc -l) != 4 ]; then + echo -e "${YELLOW}No pvc. Wait another sec... $counter${ENDC}"; + errmsg='No pvc'; + let counter=counter+1; + continue + fi + if [ $(kubectl get configmap -n milvus 2>/dev/null | grep milvus | wc -l) != 4 ]; then + echo -e "${YELLOW}No configmap. Wait another sec... $counter${ENDC}"; + errmsg='No configmap'; + let counter=counter+1; + continue + fi + if [ $(kubectl get ep -n milvus 2>/dev/null | grep milvus-mysql | awk '{print $2}') == "" ]; then + echo -e "${YELLOW}No mysql. Wait another sec... $counter${ENDC}"; + errmsg='No mysql'; + let counter=counter+1; + continue + fi + # if [ $(kubectl get ep -n milvus 2>/dev/null | grep milvus-redis | awk '{print $2}') == "" ]; then + # echo -e "${NORMAL}${YELLOW}No redis. Wait another sec... $counter${ENDC}"; + # errmsg='No redis'; + # let counter=counter+1; + # continue + # fi + break; + done + + if [ $counter -ge $timeout ]; then + echo -e "${RED}${BOLD}Start APP Error: $errmsg${NORMAL}${ENDC}" + exit 1; + fi + + echo -e "${GREEN}${BOLD}Setup requried database ...${NORMAL}${ENDC}" + setUpMysql + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Setup MySQL database timeout${NORMAL}${ENDC}" + exit 1 + fi + + echo -e "${GREEN}${BOLD}Start servers ...${NORMAL}${ENDC}" + kubectl apply -f milvus_stateful_servers.yaml + kubectl apply -f milvus_write_servers.yaml + + checkStatefulSevers + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Starting milvus-ro-servers timeout${NORMAL}${ENDC}" + exit 1 + fi + + checkDeployment "milvus-wo-servers" + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Starting milvus-wo-servers timeout${NORMAL}${ENDC}" + exit 1 + fi + + echo -e "${GREEN}${BOLD}Start rolebinding ...${NORMAL}${ENDC}" + kubectl apply -f milvus_rbac.yaml + + echo -e "${GREEN}${BOLD}Start proxies ...${NORMAL}${ENDC}" + kubectl apply -f milvus_proxy.yaml + + checkDeployment "milvus-proxy" + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Starting milvus-proxy timeout${NORMAL}${ENDC}" + exit 1 + fi + + # echo -e "${GREEN}${BOLD}Start flower ...${NORMAL}${ENDC}" + # kubectl apply -f milvus_flower.yaml + # checkDeployment "milvus-flower" + # if [ $? -ne 0 ]; then + # echo -e "${RED}${BOLD}Starting milvus-flower timeout${NORMAL}${ENDC}" + # exit 1 + # fi + +} + +function removeApps () { + # kubectl delete -f milvus_flower.yaml 2>/dev/null + kubectl delete -f milvus_proxy.yaml 2>/dev/null + kubectl delete -f milvus_stateful_servers.yaml 2>/dev/null + kubectl delete -f milvus_write_servers.yaml 2>/dev/null + kubectl delete -f milvus_rbac.yaml 2>/dev/null + # kubectl delete -f milvus_monitor.yaml 2>/dev/null +} + +function scaleDeployment() { + deployment_name=$1 + subcommand=$2 + des=$3 + + case $des in + -h|--help|"") + showscaleHelpMessage $subcommand + exit 3 + ;; + esac + + cur=$(kubectl get deployment -n milvus $deployment_name |grep $deployment_name |awk '{split($2, status, "/"); print status[2];}') + echo -e "${GREEN}Current Running ${BOLD}$cur ${GREEN}${deployment_name}, Scaling to ${BOLD}$des ...${ENDC}"; + scalecmd="kubectl scale deployment -n milvus ${deployment_name} --replicas=${des}" + ${scalecmd} + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Scale Error: ${GREEN}${scalecmd}${ENDC}" + exit 1 + fi + + checkDeployment $deployment_name + + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Scale ${deployment_name} timeout${NORMAL}${ENDC}" + scalecmd="kubectl scale deployment -n milvus ${deployment_name} --replicas=${cur}" + ${scalecmd} + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Scale Rollback Error: ${GREEN}${scalecmd}${ENDC}" + exit 2 + fi + echo -e "${BLUE}${BOLD}Scale Rollback to ${cur}${ENDC}" + exit 1 + fi + PrintScaleSuccessMessage $cur $des +} + +function scaleROServers() { + subcommand=$1 + des=$2 + case $des in + -h|--help|"") + showscaleHelpMessage $subcommand + exit 3 + ;; + esac + + cur=$(kubectl get statefulset -n milvus milvus-ro-servers |tail -n 1 |awk '{split($2, status, "/"); print status[2];}') + echo -e "${GREEN}Current Running ${BOLD}$cur ${GREEN}Readonly Servers, Scaling to ${BOLD}$des ...${ENDC}"; + scalecmd="kubectl scale sts milvus-ro-servers -n milvus --replicas=${des}" + ${scalecmd} + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Scale Error: ${GREEN}${scalecmd}${ENDC}" + exit 1 + fi + + checkStatefulSevers + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Scale milvus-ro-servers timeout${NORMAL}${ENDC}" + scalecmd="kubectl scale sts milvus-ro-servers -n milvus --replicas=${cur}" + ${scalecmd} + if [ $? -ne 0 ]; then + echo -e "${RED}${BOLD}Scale Rollback Error: ${GREEN}${scalecmd}${ENDC}" + exit 2 + fi + echo -e "${BLUE}${BOLD}Scale Rollback to ${cur}${ENDC}" + exit 1 + fi + + PrintScaleSuccessMessage $cur $des +} + + +case "$1" in + +cleanup) + kubectl delete -f . 2>/dev/null + echo -e "${BLUE}${BOLD}All resources are removed${NORMAL}${ENDC}" + ;; + +appdown) + removeApps; + echo -e "${BLUE}${BOLD}All pods are removed${NORMAL}${ENDC}" + ;; + +baseup) + startDependencies; + echo -e "${BLUE}${BOLD}All pvc, configmap and services up${NORMAL}${ENDC}" + ;; + +appup) + startApps; + echo -e "${BLUE}${BOLD}All pods up${NORMAL}${ENDC}" + ;; + +allup) + startDependencies; + sleep 2 + startApps; + echo -e "${BLUE}${BOLD}All resources and pods up${NORMAL}${ENDC}" + ;; + +scale-ro-server) + scaleROServers $1 $2 + ;; + +scale-proxy) + scaleDeployment "milvus-proxy" $1 $2 + ;; + +-h|--help|*) + showHelpMessage + ;; + +esac diff --git a/shards/manager.py b/shards/manager.py index 666ddd377e..4157b9343e 100644 --- a/shards/manager.py +++ b/shards/manager.py @@ -1,5 +1,4 @@ import fire -from sqlalchemy import and_ from mishards import db, settings @@ -12,17 +11,6 @@ class DBHandler: def drop_all(cls): db.drop_all() - @classmethod - def fun(cls, tid): - from mishards.factories import TablesFactory, TableFilesFactory, Tables - f = db.Session.query(Tables).filter(and_( - Tables.table_id == tid, - Tables.state != Tables.TO_DELETE) - ).first() - print(f) - - # f1 = TableFilesFactory() - if __name__ == '__main__': db.init_db(settings.DefaultConfig.SQLALCHEMY_DATABASE_URI) diff --git a/shards/mishards/.env.example b/shards/mishards/.env.example index c8848eaadf..f1c812a269 100644 --- a/shards/mishards/.env.example +++ b/shards/mishards/.env.example @@ -6,7 +6,7 @@ SERVER_TEST_PORT=19888 #SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 SQLALCHEMY_DATABASE_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False -SQL_ECHO=True +SQL_ECHO=False #SQLALCHEMY_DATABASE_TEST_URI=mysql+pymysql://root:root@127.0.0.1:3306/milvus?charset=utf8mb4 SQLALCHEMY_DATABASE_TEST_URI=sqlite:////tmp/milvus/db/meta.sqlite?check_same_thread=False diff --git a/shards/mishards/settings.py b/shards/mishards/settings.py index 2694cd0a1f..8d7361dddc 100644 --- a/shards/mishards/settings.py +++ b/shards/mishards/settings.py @@ -13,6 +13,7 @@ else: DEBUG = env.bool('DEBUG', False) +MAX_RETRY = env.int('MAX_RETRY', 3) LOG_LEVEL = env.str('LOG_LEVEL', 'DEBUG' if DEBUG else 'INFO') LOG_PATH = env.str('LOG_PATH', '/tmp/mishards') @@ -22,9 +23,6 @@ TIMEZONE = env.str('TIMEZONE', 'UTC') from utils.logger_helper import config config(LOG_LEVEL, LOG_PATH, LOG_NAME, TIMEZONE) -TIMEOUT = env.int('TIMEOUT', 60) -MAX_RETRY = env.int('MAX_RETRY', 3) - SERVER_PORT = env.int('SERVER_PORT', 19530) SERVER_TEST_PORT = env.int('SERVER_TEST_PORT', 19530) WOSERVER = env.str('WOSERVER') @@ -69,12 +67,3 @@ class TestingConfig(DefaultConfig): SQL_ECHO = env.bool('SQL_TEST_ECHO', False) TRACER_CLASS_NAME = env.str('TRACER_CLASS_TEST_NAME', '') ROUTER_CLASS_NAME = env.str('ROUTER_CLASS_TEST_NAME', 'FileBasedHashRingRouter') - - -if __name__ == '__main__': - import logging - logger = logging.getLogger(__name__) - logger.debug('DEBUG') - logger.info('INFO') - logger.warn('WARN') - logger.error('ERROR') From 3845d970fe0c83aeb9cdd0ca91efc9431d6dadbc Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 7 Nov 2019 13:54:07 +0800 Subject: [PATCH 123/126] (shards): update change log for #226 --- CHANGELOG.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d46ed6070..7f6a3d37f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Please mark all change in change log and use the ticket from JIRA. ## Feature - \#12 - Pure CPU version for Milvus +- \#226 - Experimental shards middleware for Milvus ## Improvement @@ -84,7 +85,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-658 - Fix SQ8 Hybrid can't search - MS-665 - IVF_SQ8H search crash when no GPU resource in search_resources - \#9 - Change default gpu_cache_capacity to 4 -- \#20 - C++ sdk example get grpc error +- \#20 - C++ sdk example get grpc error - \#23 - Add unittest to improve code coverage - \#31 - make clang-format failed after run build.sh -l - \#39 - Create SQ8H index hang if using github server version @@ -136,7 +137,7 @@ Please mark all change in change log and use the ticket from JIRA. - MS-635 - Add compile option to support customized faiss - MS-660 - add ubuntu_build_deps.sh - \#18 - Add all test cases - + # Milvus 0.4.0 (2019-09-12) ## Bug @@ -345,11 +346,11 @@ Please mark all change in change log and use the ticket from JIRA. - MS-82 - Update server startup welcome message - MS-83 - Update vecwise to Milvus - MS-77 - Performance issue of post-search action -- MS-22 - Enhancement for MemVector size control +- MS-22 - Enhancement for MemVector size control - MS-92 - Unify behavior of debug and release build - MS-98 - Install all unit test to installation directory - MS-115 - Change is_startup of metric_config switch from true to on -- MS-122 - Archive criteria config +- MS-122 - Archive criteria config - MS-124 - HasTable interface - MS-126 - Add more error code - MS-128 - Change default db path From 066952ca23ff81f0f9befbdc6273dd0b1b847132 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 7 Nov 2019 14:01:18 +0800 Subject: [PATCH 124/126] (shards): remove build.sh --- shards/build.sh | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100755 shards/build.sh diff --git a/shards/build.sh b/shards/build.sh deleted file mode 100755 index fad30518f2..0000000000 --- a/shards/build.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -BOLD=`tput bold` -NORMAL=`tput sgr0` -YELLOW='\033[1;33m' -ENDC='\033[0m' - -echo -e "${BOLD}MISHARDS_REGISTRY=${MISHARDS_REGISTRY}${ENDC}" - -function build_image() { - dockerfile=$1 - remote_registry=$2 - tagged=$2 - buildcmd="docker build -t ${tagged} -f ${dockerfile} ." - echo -e "${BOLD}$buildcmd${NORMAL}" - $buildcmd - pushcmd="docker push ${remote_registry}" - echo -e "${BOLD}$pushcmd${NORMAL}" - $pushcmd - echo -e "${YELLOW}${BOLD}Image: ${remote_registry}${NORMAL}${ENDC}" -} - -case "$1" in - -all) - [[ -z $MISHARDS_REGISTRY ]] && { - echo -e "${YELLOW}Error: Please set docker registry first:${ENDC}\n\t${BOLD}export MISHARDS_REGISTRY=xxxx\n${ENDC}" - exit 1 - } - - version="" - [[ ! -z $2 ]] && version=":${2}" - build_image "Dockerfile" "${MISHARDS_REGISTRY}${version}" "${MISHARDS_REGISTRY}" - ;; -*) - echo "Usage: [option...] {base | apps}" - echo "all, Usage: build.sh all [tagname|] => {docker_registry}:\${tagname}" - ;; -esac From ca3447fae2af2a749939a18836ffde4f1d04e2a3 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 7 Nov 2019 14:06:05 +0800 Subject: [PATCH 125/126] (shards/refactor): remove README.md under kubernetes_demo --- shards/kubernetes_demo/README.md | 107 ------------------------------- 1 file changed, 107 deletions(-) delete mode 100644 shards/kubernetes_demo/README.md diff --git a/shards/kubernetes_demo/README.md b/shards/kubernetes_demo/README.md deleted file mode 100644 index 933fcd56a8..0000000000 --- a/shards/kubernetes_demo/README.md +++ /dev/null @@ -1,107 +0,0 @@ -This document is a gentle introduction to Milvus Cluster, that does not use complex to understand distributed systems concepts. It provides instructions about how to setup a cluster, test, and operate it, without going into the details that are covered in the Milvus Cluster specification but just describing how the system behaves from the point of view of the user. - -However this tutorial tries to provide information about the availability and consistency characteristics of Milvus Cluster from the point of view of the final user, stated in a simple to understand way. - -If you plan to run a serious Milvus Cluster deployment, the more formal specification is a suggested reading, even if not strictly required. However it is a good idea to start from this document, play with Milvus Cluster some time, and only later read the specification. - -## Milvus Cluster Introduction -### Infrastructure -* Kubenetes Cluster With Nvida GPU Node -* Install Nvida Docker in Cluster - -### Requried Docker Registry -* Milvus Server: ```registry.zilliz.com/milvus/engine:${version>=0.3.1}``` -* Milvus Celery Apps: ```registry.zilliz.com/milvus/celery-apps:${version>=v0.2.1}``` - -### Cluster Ability -* Milvus Cluster provides a way to run a Milvus installation where query requests are automatically sharded across multiple milvus readonly nodes. -* Milvus Cluster provides availability during partitions, that is in pratical terms the ability to continue the operations when some nodes fail or are not able to communicate. - -### Metastore -Milvus supports 2 backend databases for deployment: -* Splite3: Single mode only. -* MySQL: Single/Cluster mode -* ETCD: `TODO` - -### Storage -Milvus supports 2 backend storage for deployment: -* Local filesystem: Convenient for use and deployment but not reliable. -* S3 OOS: Reliable: Need extra configuration. Need external storage service. - -### Message Queue -Milvus supports various MQ backend for deployment: -* Redis -* Rabbitmq -* MySQL/PG/MongoDB - -### Cache -* Milvus supports `Redis` as Cache backend for deployment. To reduce the system complexity, we recommend to use `Redis` as MQ backend. - -### Workflow -* Milvus Cluster use Celery as workflow scheduler. -* Milvus Cluster workflow calculation node can be scaled. -* Milvus Cluster only contains 1 worflow monitor node. Monitor node detects caculation nodes status and provides decision for work scheduling. -* Milvus Cluster supports different workflow result backend and we recommend to use `Redis` as result backend for performance consideration. - -### Writeonly Node -* Milvus can be configured in write-only mode. -* Right now Milvus Cluster only provide 1 write-only node. - -### Readonly Node -* Milvus can be configured in readonly mode. -* Milvus Cluster automatically shard incoming query requests across multiple readonly nodes. -* Milvus Cluster supports readonly nodes scaling. -* Milvus Cluster provides pratical solution to avoid performance degradation during cluster rebalance. - -### Proxy -* Milvus Cluster communicates with clients by proxy. -* Milvus Cluster supports proxy scaling. - -### Monitor -* Milvus Cluster suports metrics monitoring by prometheus. -* Milvus Cluster suports workflow tasks monitoring by flower. -* Milvus Cluster suports cluster monitoring by all kubernetes ecosystem monitoring tools. - -## Milvus Cluster Kubernetes Resources -### PersistentVolumeClaim -* LOG PersistentVolume: `milvus-log-disk` - -### ConfigMap -* Celery workflow configmap: `milvus-celery-configmap`::`milvus_celery_config.yml` -* Proxy configmap: `milvus-proxy-configmap`::`milvus_proxy_config.yml` -* Readonly nodes configmap: `milvus-roserver-configmap`::`config.yml`, `milvus-roserver-configmap`::`log.conf` -* Write-only nodes configmap: `milvus-woserver-configmap`::`config.yml`, `milvus-woserver-configmap`::`log.conf` - -### Services -* Mysql service: `milvus-mysql` -* Redis service: `milvus-redis` -* Rroxy service: `milvus-proxy-servers` -* Write-only servers service: `milvus-wo-servers` - -### StatefulSet -* Readonly stateful servers: `milvus-ro-servers` - -### Deployment -* Worflow monitor: `milvus-monitor` -* Worflow workers: `milvus-workers` -* Write-only servers: `milvus-wo-servers` -* Proxy: `milvus-proxy` - -## Milvus Cluster Configuration -### Write-only server: -```milvus-woserver-configmap::config.yml: - server_config.mode: cluster - db_config.db_backend_url: mysql://${user}:${password}@milvus-mysql/${dbname} -``` -### Readonly server: -```milvus-roserver-configmap::config.yml: - server_config.mode: read_only - db_config.db_backend_url: mysql://\${user}:${password}@milvus-mysql/${dbname} -``` -### Celery workflow: -```milvus-celery-configmap::milvus_celery_config.yml: - DB_URI=mysql+mysqlconnector://${user}:${password}@milvus-mysql/${dbname} -``` -### Proxy workflow: -```milvus-proxy-configmap::milvus_proxy_config.yml: -``` From b7030040b0eecfd71ddcbbc848e09ca929f576a0 Mon Sep 17 00:00:00 2001 From: "peng.xu" Date: Thu, 7 Nov 2019 14:08:17 +0800 Subject: [PATCH 126/126] (shards/refactor): remove start_services.yml --- shards/start_services.yml | 46 --------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 shards/start_services.yml diff --git a/shards/start_services.yml b/shards/start_services.yml deleted file mode 100644 index 95acdd045e..0000000000 --- a/shards/start_services.yml +++ /dev/null @@ -1,46 +0,0 @@ -version: "2.3" -services: - milvus: - runtime: nvidia - restart: always - image: registry.zilliz.com/milvus/engine:branch-0.5.0-release-4316de - # ports: - # - "0.0.0.0:19530:19530" - volumes: - - /tmp/milvus/db:/opt/milvus/db - - jaeger: - restart: always - image: jaegertracing/all-in-one:1.14 - ports: - - "0.0.0.0:5775:5775/udp" - - "0.0.0.0:16686:16686" - - "0.0.0.0:9441:9441" - environment: - COLLECTOR_ZIPKIN_HTTP_PORT: 9411 - - mishards: - restart: always - image: milvusdb/mishards - ports: - - "0.0.0.0:19530:19531" - - "0.0.0.0:19532:19532" - volumes: - - /tmp/milvus/db:/tmp/milvus/db - # - /tmp/mishards_env:/source/mishards/.env - command: ["python", "mishards/main.py"] - environment: - FROM_EXAMPLE: 'true' - DEBUG: 'true' - SERVER_PORT: 19531 - WOSERVER: tcp://milvus_wr:19530 - DISCOVERY_PLUGIN_PATH: static - DISCOVERY_STATIC_HOSTS: milvus_wr,milvus_ro - TRACER_CLASS_NAME: jaeger - TRACING_SERVICE_NAME: mishards-demo - TRACING_REPORTING_HOST: jaeger - TRACING_REPORTING_PORT: 5775 - - depends_on: - - milvus - - jaeger