diff --git a/shards/Makefile b/shards/Makefile index 5127882d6f..2bf460c28e 100644 --- a/shards/Makefile +++ b/shards/Makefile @@ -27,7 +27,7 @@ cluster_status: kubectl get pods -n milvus -o wide probe_cluster: @echo - $(shell kubectl get service -n milvus | grep milvus-proxy-servers | awk {'print $$4,$$5'} | awk -F"[: ]" {'print "docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c \"python all_in_one/probe_test.py --port="$$2" --host="$$1"\""'}) + $(shell kubectl get service -n mishards | grep mishards-proxy-servers | awk {'print $$4,$$5'} | awk -F"[: ]" {'print "docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c \"python all_in_one/probe_test.py --port="$$2" --host="$$1"\""'}) probe: docker run --rm --name probe --net=host milvusdb/mishards /bin/bash -c "python all_in_one/probe_test.py --port=${PORT} --host=${HOST}" clean_coverage: diff --git a/shards/discovery/plugins/kubernetes_provider.py b/shards/discovery/plugins/kubernetes_provider.py index 4ab59415b5..311c8dbc1a 100644 --- a/shards/discovery/plugins/kubernetes_provider.py +++ b/shards/discovery/plugins/kubernetes_provider.py @@ -11,7 +11,10 @@ import copy import threading import queue import enum -from kubernetes import client, config, watch +from functools import partial +from collections import defaultdict +from kubernetes import client, config as kconfig, watch +from mishards.topology import StatusType logger = logging.getLogger(__name__) @@ -33,8 +36,8 @@ class K8SMixin: self.namespace = open(INCLUSTER_NAMESPACE_PATH).read() if not self.v1: - config.load_incluster_config( - ) if self.in_cluster else config.load_kube_config() + kconfig.load_incluster_config( + ) if self.in_cluster else kconfig.load_kube_config() self.v1 = client.CoreV1Api() @@ -133,6 +136,7 @@ class K8SEventListener(threading.Thread, K8SMixin): class EventHandler(threading.Thread): + PENDING_THRESHOLD = 3 def __init__(self, mgr, message_queue, namespace, pod_patt, **kwargs): threading.Thread.__init__(self) self.mgr = mgr @@ -141,6 +145,25 @@ class EventHandler(threading.Thread): self.terminate = False self.pod_patt = re.compile(pod_patt) self.namespace = namespace + self.pending_add = defaultdict(int) + self.pending_delete = defaultdict(int) + + def record_pending_add(self, pod, true_cb=None): + self.pending_add[pod] += 1 + self.pending_delete.pop(pod, None) + if self.pending_add[pod] >= self.PENDING_THRESHOLD: + true_cb and true_cb() + return True + return False + + def record_pending_delete(self, pod, true_cb=None): + self.pending_delete[pod] += 1 + self.pending_add.pop(pod, None) + if self.pending_delete[pod] >= 1: + true_cb and true_cb() + return True + + return False def stop(self): self.terminate = True @@ -165,37 +188,47 @@ class EventHandler(threading.Thread): if try_cnt <= 0 and not pod: if not event['start_up']: - logger.error('Pod {} is started but cannot read pod'.format( + logger.warning('Pod {} is started but cannot read pod'.format( event['pod'])) return elif try_cnt <= 0 and not pod.status.pod_ip: logger.warning('NoPodIPFoundError') return - logger.info('Register POD {} with IP {}'.format( - pod.metadata.name, pod.status.pod_ip)) - self.mgr.add_pod(name=pod.metadata.name, ip=pod.status.pod_ip) + self.record_pending_add(pod.metadata.name, + true_cb=partial(self.mgr.add_pod, pod.metadata.name, pod.status.pod_ip)) def on_pod_killing(self, event, **kwargs): - logger.info('Unregister POD {}'.format(event['pod'])) - self.mgr.delete_pod(name=event['pod']) + self.record_pending_delete(event['pod'], + true_cb=partial(self.mgr.delete_pod, event['pod'])) def on_pod_heartbeat(self, event, **kwargs): - names = self.mgr.readonly_topo.group_names + names = set(copy.deepcopy(list(self.mgr.readonly_topo.group_names))) - running_names = set() + pods_with_event = set() for each_event in event['events']: + pods_with_event.add(each_event['pod']) if each_event['ready']: - self.mgr.add_pod(name=each_event['pod'], ip=each_event['ip']) - running_names.add(each_event['pod']) + self.record_pending_add(each_event['pod'], + true_cb=partial(self.mgr.add_pod, each_event['pod'], each_event['ip'])) else: - self.mgr.delete_pod(name=each_event['pod']) + self.record_pending_delete(each_event['pod'], + true_cb=partial(self.mgr.delete_pod, each_event['pod'])) - to_delete = names - running_names - for name in to_delete: - self.mgr.delete_pod(name) + pods_no_event = names - pods_with_event + for name in pods_no_event: + self.record_pending_delete(name, + true_cb=partial(self.mgr.delete_pod, name)) - logger.info(self.mgr.readonly_topo.group_names) + latest = self.mgr.readonly_topo.group_names + deleted = names - latest + added = latest - names + if deleted: + logger.info('Deleted Pods: {}'.format(list(deleted))) + if added: + logger.info('Added Pods: {}'.format(list(added))) + + logger.debug('All Pods: {}'.format(list(latest))) def handle_event(self, event): if event['eType'] == EventType.PodHeartBeat: @@ -237,15 +270,15 @@ class KubernetesProviderSettings: class KubernetesProvider(object): name = 'kubernetes' - def __init__(self, plugin_config, readonly_topo, **kwargs): - self.namespace = plugin_config.DISCOVERY_KUBERNETES_NAMESPACE - self.pod_patt = plugin_config.DISCOVERY_KUBERNETES_POD_PATT - self.label_selector = plugin_config.DISCOVERY_KUBERNETES_LABEL_SELECTOR - self.in_cluster = plugin_config.DISCOVERY_KUBERNETES_IN_CLUSTER.lower() + def __init__(self, config, readonly_topo, **kwargs): + self.namespace = config.DISCOVERY_KUBERNETES_NAMESPACE + self.pod_patt = config.DISCOVERY_KUBERNETES_POD_PATT + self.label_selector = config.DISCOVERY_KUBERNETES_LABEL_SELECTOR + self.in_cluster = config.DISCOVERY_KUBERNETES_IN_CLUSTER.lower() self.in_cluster = self.in_cluster == 'true' - self.poll_interval = plugin_config.DISCOVERY_KUBERNETES_POLL_INTERVAL + self.poll_interval = config.DISCOVERY_KUBERNETES_POLL_INTERVAL self.poll_interval = int(self.poll_interval) if self.poll_interval else 5 - self.port = plugin_config.DISCOVERY_KUBERNETES_PORT + self.port = config.DISCOVERY_KUBERNETES_PORT self.port = int(self.port) if self.port else 19530 self.kwargs = kwargs self.queue = queue.Queue() @@ -255,8 +288,8 @@ class KubernetesProvider(object): if not self.namespace: self.namespace = open(incluster_namespace_path).read() - config.load_incluster_config( - ) if self.in_cluster else config.load_kube_config() + kconfig.load_incluster_config( + ) if self.in_cluster else kconfig.load_kube_config() self.v1 = client.CoreV1Api() self.listener = K8SEventListener(message_queue=self.queue, @@ -281,6 +314,8 @@ class KubernetesProvider(object): **kwargs) def add_pod(self, name, ip): + logger.debug('Register POD {} with IP {}'.format( + name, ip)) ok = True status = StatusType.OK try: @@ -292,8 +327,8 @@ class KubernetesProvider(object): ok = False logger.error('Connection error to: {}'.format(addr)) - if ok and status == StatusType.OK: - logger.info('KubernetesProvider Add Group \"{}\" Of 1 Address: {}'.format(name, uri)) + # if ok and status == StatusType.OK: + # logger.info('KubernetesProvider Add Group \"{}\" Of 1 Address: {}'.format(name, uri)) return ok def delete_pod(self, name): @@ -306,6 +341,7 @@ class KubernetesProvider(object): self.event_handler.start() self.pod_heartbeater.start() + return True def stop(self): self.listener.stop() diff --git a/shards/kubernetes_demo/milvus_stateful_servers.yaml b/shards/kubernetes_demo/milvus_stateful_servers.yaml deleted file mode 100644 index 4ff5045599..0000000000 --- a/shards/kubernetes_demo/milvus_stateful_servers.yaml +++ /dev/null @@ -1,68 +0,0 @@ -kind: Service -apiVersion: v1 -metadata: - name: milvus-ro-servers - namespace: milvus -spec: - type: ClusterIP - selector: - app: milvus - tier: ro-servers - ports: - - protocol: TCP - port: 19530 - targetPort: 19530 - ---- - -apiVersion: apps/v1beta1 -kind: StatefulSet -metadata: - name: milvus-ro-servers - namespace: milvus -spec: - serviceName: "milvus-ro-servers" - replicas: 1 - template: - metadata: - labels: - app: milvus - tier: ro-servers - spec: - terminationGracePeriodSeconds: 11 - containers: - - name: milvus-ro-server - image: milvusdb/milvus:0.5.0-d102119-ede20b - imagePullPolicy: Always - ports: - - containerPort: 19530 - resources: - limits: - memory: "16Gi" - cpu: "8.0" - requests: - memory: "14Gi" - volumeMounts: - - name: milvus-db-disk - mountPath: /var/milvus - subPath: dbdata - - name: milvus-roserver-configmap - mountPath: /opt/milvus/conf/server_config.yaml - subPath: config.yml - - name: milvus-roserver-configmap - mountPath: /opt/milvus/conf/log_config.conf - subPath: log.conf - # imagePullSecrets: - # - name: regcred - # tolerations: - # - key: "worker" - # operator: "Equal" - # value: "performance" - # effect: "NoSchedule" - volumes: - - name: milvus-roserver-configmap - configMap: - name: milvus-roserver-configmap - - name: milvus-db-disk - persistentVolumeClaim: - claimName: milvus-db-disk diff --git a/shards/kubernetes_demo/milvus_auxiliary.yaml b/shards/kubernetes_demo/mishards_auxiliary.yaml similarity index 72% rename from shards/kubernetes_demo/milvus_auxiliary.yaml rename to shards/kubernetes_demo/mishards_auxiliary.yaml index fff27adc6f..83d7436715 100644 --- a/shards/kubernetes_demo/milvus_auxiliary.yaml +++ b/shards/kubernetes_demo/mishards_auxiliary.yaml @@ -1,12 +1,12 @@ kind: Service apiVersion: v1 metadata: - name: milvus-mysql - namespace: milvus + name: mishards-mysql + namespace: mishards spec: type: ClusterIP selector: - app: milvus + app: mishards tier: mysql ports: - protocol: TCP @@ -19,22 +19,22 @@ spec: apiVersion: apps/v1 kind: Deployment metadata: - name: milvus-mysql - namespace: milvus + name: mishards-mysql + namespace: mishards spec: selector: matchLabels: - app: milvus + app: mishards tier: mysql replicas: 1 template: metadata: labels: - app: milvus + app: mishards tier: mysql spec: containers: - - name: milvus-mysql + - name: mishards-mysql image: mysql:5.7 imagePullPolicy: IfNotPresent # lifecycle: @@ -51,17 +51,17 @@ spec: - name: mysql-port containerPort: 3306 volumeMounts: - - name: milvus-mysql-disk + - name: mishards-mysql-disk mountPath: /data subPath: mysql - - name: milvus-mysql-configmap + - name: mishards-mysql-configmap mountPath: /etc/mysql/mysql.conf.d/mysqld.cnf - subPath: milvus_mysql_config.yml + subPath: mishards_mysql_config.yml volumes: - - name: milvus-mysql-disk + - name: mishards-mysql-disk persistentVolumeClaim: - claimName: milvus-mysql-disk - - name: milvus-mysql-configmap + claimName: mishards-mysql-disk + - name: mishards-mysql-configmap configMap: - name: milvus-mysql-configmap + name: mishards-mysql-configmap diff --git a/shards/kubernetes_demo/milvus_configmap.yaml b/shards/kubernetes_demo/mishards_configmap.yaml similarity index 53% rename from shards/kubernetes_demo/milvus_configmap.yaml rename to shards/kubernetes_demo/mishards_configmap.yaml index cb751c02f1..c099a438bc 100644 --- a/shards/kubernetes_demo/milvus_configmap.yaml +++ b/shards/kubernetes_demo/mishards_configmap.yaml @@ -1,10 +1,10 @@ apiVersion: v1 kind: ConfigMap metadata: - name: milvus-mysql-configmap - namespace: milvus + name: mishards-mysql-configmap + namespace: mishards data: - milvus_mysql_config.yml: | + mishards_mysql_config.yml: | [mysqld] pid-file = /var/run/mysqld/mysqld.pid socket = /var/run/mysqld/mysqld.sock @@ -26,71 +26,79 @@ data: apiVersion: v1 kind: ConfigMap metadata: - name: milvus-proxy-configmap - namespace: milvus + name: mishards-proxy-configmap + namespace: mishards data: - milvus_proxy_config.yml: | + mishards_proxy_config.yml: | DEBUG=True - TESTING=False - WOSERVER=tcp://milvus-wo-servers:19530 + WOSERVER=tcp://mishards-wo-servers:19530 SERVER_PORT=19530 DISCOVERY_CLASS_NAME=kubernetes - DISCOVERY_KUBERNETES_NAMESPACE=milvus + DISCOVERY_KUBERNETES_NAMESPACE=mishards DISCOVERY_KUBERNETES_POD_PATT=.*-ro-servers-.* DISCOVERY_KUBERNETES_LABEL_SELECTOR=tier=ro-servers DISCOVERY_KUBERNETES_POLL_INTERVAL=10 DISCOVERY_KUBERNETES_IN_CLUSTER=True - SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:milvusroot@milvus-mysql:3306/milvus?charset=utf8mb4 + SQLALCHEMY_DATABASE_URI=mysql+pymysql://root:milvusroot@mishards-mysql:3306/milvus?charset=utf8mb4 SQLALCHEMY_POOL_SIZE=50 SQLALCHEMY_POOL_RECYCLE=7200 LOG_PATH=/var/log/milvus TIMEZONE=Asia/Shanghai + --- apiVersion: v1 kind: ConfigMap metadata: - name: milvus-roserver-configmap - namespace: milvus + name: mishards-roserver-configmap + namespace: mishards data: config.yml: | + version: 0.3 server_config: address: 0.0.0.0 port: 19530 - mode: cluster_readonly + deploy_mode: cluster_readonly + web_port: 19121 db_config: + backend_url: mysql://root:milvusroot@mishards-mysql:3306/milvus + auto_flush_interval: 1 + + storage_config: primary_path: /var/milvus - backend_url: mysql://root:milvusroot@milvus-mysql:3306/milvus - insert_buffer_size: 2 metric_config: - enable_monitor: off # true is on, false is off + enable_monitor: false cache_config: - cpu_cache_capacity: 12 # memory pool to hold index data, unit: GB - cpu_cache_free_percent: 0.85 - insert_cache_immediately: false - # gpu_cache_capacity: 4 - # gpu_cache_free_percent: 0.85 - # gpu_ids: - # - 0 + cpu_cache_capacity: 6 # memory pool to hold index data, unit: GB + insert_buffer_size: 1 + cache_insert_data: false + + gpu_resource_config: + enabled: false + cache_capacity: 1 + search_resources: + - gpu0 + build_index_resources: + - gpu0 engine_config: - use_blas_threshold: 800 + use_blas_threshold: 800 + gpu_search_threshold: 1000 - resource_config: - search_resources: - - gpu0 + wal_config: + enable: false log.conf: | * GLOBAL: FORMAT = "%datetime | %level | %logger | %msg" - FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-global.log" + FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-global.log" ENABLED = true TO_FILE = true TO_STANDARD_OUTPUT = true @@ -98,12 +106,12 @@ data: PERFORMANCE_TRACKING = false MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB * DEBUG: - FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-debug.log" + FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-debug.log" ENABLED = true * WARNING: - FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-warning.log" + FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-warning.log" * TRACE: - FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-trace.log" + FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-trace.log" * VERBOSE: FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" TO_FILE = true @@ -111,54 +119,61 @@ data: ## Error logs * ERROR: ENABLED = true - FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-error.log" + FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-error.log" * FATAL: ENABLED = true - FILENAME = "/var/milvus/logs/milvus-ro-%datetime{%H:%m}-fatal.log" + FILENAME = "/var/lib/milvus/logs/milvus-ro-%datetime{%H:%m}-fatal.log" --- apiVersion: v1 kind: ConfigMap metadata: - name: milvus-woserver-configmap - namespace: milvus + name: mishards-woserver-configmap + namespace: mishards data: config.yml: | + version: 0.3 server_config: address: 0.0.0.0 port: 19530 - mode: cluster_writable + deploy_mode: cluster_writable + web_port: 19121 db_config: + backend_url: mysql://root:milvusroot@mishards-mysql:3306/milvus + auto_flush_interval: 1 + + storage_config: primary_path: /var/milvus - backend_url: mysql://root:milvusroot@milvus-mysql:3306/milvus - insert_buffer_size: 2 metric_config: - enable_monitor: off # true is on, false is off + enable_monitor: false cache_config: - cpu_cache_capacity: 2 # memory pool to hold index data, unit: GB - cpu_cache_free_percent: 0.85 - insert_cache_immediately: false - # gpu_cache_capacity: 4 - # gpu_cache_free_percent: 0.85 - # gpu_ids: - # - 0 + cpu_cache_capacity: 2 # memory pool to hold index data, unit: GB + insert_buffer_size: 2 + cache_insert_data: false + + gpu_resource_config: + enabled: false + cache_capacity: 1 + search_resources: + - gpu0 + build_index_resources: + - gpu0 engine_config: - use_blas_threshold: 800 - - resource_config: - search_resources: - - gpu0 + use_blas_threshold: 800 + gpu_search_threshold: 1000 + wal_config: + enable: false log.conf: | * GLOBAL: FORMAT = "%datetime | %level | %logger | %msg" - FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-global.log" + FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-global.log" ENABLED = true TO_FILE = true TO_STANDARD_OUTPUT = true @@ -166,12 +181,12 @@ data: PERFORMANCE_TRACKING = false MAX_LOG_FILE_SIZE = 2097152 ## Throw log files away after 2MB * DEBUG: - FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-debug.log" + FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-debug.log" ENABLED = true * WARNING: - FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-warning.log" + FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-warning.log" * TRACE: - FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-trace.log" + FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-trace.log" * VERBOSE: FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" TO_FILE = true @@ -179,7 +194,7 @@ data: ## Error logs * ERROR: ENABLED = true - FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-error.log" + FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-error.log" * FATAL: ENABLED = true - FILENAME = "/var/milvus/logs/milvus-wo-%datetime{%H:%m}-fatal.log" + FILENAME = "/var/lib/milvus/logs/milvus-wo-%datetime{%H:%m}-fatal.log" diff --git a/shards/kubernetes_demo/milvus_data_pvc.yaml b/shards/kubernetes_demo/mishards_data_pvc.yaml similarity index 77% rename from shards/kubernetes_demo/milvus_data_pvc.yaml rename to shards/kubernetes_demo/mishards_data_pvc.yaml index 480354507d..a05c4bb9e1 100644 --- a/shards/kubernetes_demo/milvus_data_pvc.yaml +++ b/shards/kubernetes_demo/mishards_data_pvc.yaml @@ -1,8 +1,8 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: milvus-db-disk - namespace: milvus + name: mishards-db-disk + namespace: mishards spec: accessModes: - ReadWriteMany @@ -16,8 +16,8 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: milvus-log-disk - namespace: milvus + name: mishards-log-disk + namespace: mishards spec: accessModes: - ReadWriteMany @@ -31,8 +31,8 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: milvus-mysql-disk - namespace: milvus + name: mishards-mysql-disk + namespace: mishards spec: accessModes: - ReadWriteMany @@ -46,8 +46,8 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: milvus-redis-disk - namespace: milvus + name: mishards-redis-disk + namespace: mishards spec: accessModes: - ReadWriteOnce diff --git a/shards/kubernetes_demo/milvus_proxy.yaml b/shards/kubernetes_demo/mishards_proxy.yaml similarity index 73% rename from shards/kubernetes_demo/milvus_proxy.yaml rename to shards/kubernetes_demo/mishards_proxy.yaml index 13916b7b2b..d36dffb826 100644 --- a/shards/kubernetes_demo/milvus_proxy.yaml +++ b/shards/kubernetes_demo/mishards_proxy.yaml @@ -1,12 +1,12 @@ kind: Service apiVersion: v1 metadata: - name: milvus-proxy-servers - namespace: milvus + name: mishards-proxy-servers + namespace: mishards spec: type: LoadBalancer selector: - app: milvus + app: mishards tier: proxy ports: - name: tcp @@ -19,29 +19,29 @@ spec: apiVersion: apps/v1 kind: Deployment metadata: - name: milvus-proxy - namespace: milvus + name: mishards-proxy + namespace: mishards spec: selector: matchLabels: - app: milvus + app: mishards tier: proxy replicas: 1 template: metadata: labels: - app: milvus + app: mishards tier: proxy spec: containers: - - name: milvus-proxy - image: milvusdb/mishards:0.1.0-rc0 + - name: mishards-proxy + image: milvusdb/mishards imagePullPolicy: Always command: ["python", "mishards/main.py"] resources: limits: - memory: "3Gi" - cpu: "4" + memory: "2Gi" + cpu: "2" requests: memory: "2Gi" ports: @@ -71,18 +71,18 @@ spec: - name: SD_ROSERVER_POD_PATT value: ".*-ro-servers-.*" volumeMounts: - - name: milvus-proxy-configmap + - name: mishards-proxy-configmap mountPath: /source/mishards/.env - subPath: milvus_proxy_config.yml - - name: milvus-log-disk + subPath: mishards_proxy_config.yml + - name: mishards-log-disk mountPath: /var/log/milvus subPath: proxylog # imagePullSecrets: # - name: regcred volumes: - - name: milvus-proxy-configmap + - name: mishards-proxy-configmap configMap: - name: milvus-proxy-configmap - - name: milvus-log-disk + name: mishards-proxy-configmap + - name: mishards-log-disk persistentVolumeClaim: - claimName: milvus-log-disk + claimName: mishards-log-disk diff --git a/shards/kubernetes_demo/milvus_rbac.yaml b/shards/kubernetes_demo/mishards_rbac.yaml similarity index 94% rename from shards/kubernetes_demo/milvus_rbac.yaml rename to shards/kubernetes_demo/mishards_rbac.yaml index e6f302be15..14772ed982 100644 --- a/shards/kubernetes_demo/milvus_rbac.yaml +++ b/shards/kubernetes_demo/mishards_rbac.yaml @@ -16,7 +16,7 @@ metadata: subjects: - kind: ServiceAccount name: default - namespace: milvus + namespace: mishards roleRef: kind: ClusterRole name: pods-list diff --git a/shards/kubernetes_demo/mishards_stateful_servers.yaml b/shards/kubernetes_demo/mishards_stateful_servers.yaml new file mode 100644 index 0000000000..5a4ddaa503 --- /dev/null +++ b/shards/kubernetes_demo/mishards_stateful_servers.yaml @@ -0,0 +1,74 @@ +kind: Service +apiVersion: v1 +metadata: + name: mishards-ro-servers + namespace: mishards +spec: + type: ClusterIP + selector: + app: mishards + tier: ro-servers + ports: + - protocol: TCP + port: 19530 + targetPort: 19530 + +--- + +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: mishards-ro-servers + namespace: mishards +spec: + serviceName: "mishards-ro-servers" + replicas: 1 + template: + metadata: + labels: + app: mishards + tier: ro-servers + spec: + terminationGracePeriodSeconds: 11 + containers: + - name: mishards-ro-server + image: milvusdb/milvus:0.8.0-gpu-d041520-464400 + imagePullPolicy: Always + ports: + - containerPort: 19530 + resources: + limits: + memory: "8Gi" + cpu: "6.0" + requests: + memory: "8Gi" + volumeMounts: + - name: mishards-db-disk + mountPath: /var/milvus + subPath: dbdata + - name: mishards-roserver-configmap + mountPath: /var/lib/milvus/conf/server_config.yaml + subPath: config.yml + - name: mishards-roserver-configmap + mountPath: /var/lib/milvus/conf/log_config.conf + subPath: log.conf + - name: mishards-log-disk + mountPath: /var/lib/milvus/logs + subPath: rolog + # imagePullSecrets: + # - name: regcred + # tolerations: + # - key: "worker" + # operator: "Equal" + # value: "performance" + # effect: "NoSchedule" + volumes: + - name: mishards-roserver-configmap + configMap: + name: mishards-roserver-configmap + - name: mishards-db-disk + persistentVolumeClaim: + claimName: mishards-db-disk + - name: mishards-log-disk + persistentVolumeClaim: + claimName: mishards-log-disk diff --git a/shards/kubernetes_demo/milvus_write_servers.yaml b/shards/kubernetes_demo/mishards_write_servers.yaml similarity index 51% rename from shards/kubernetes_demo/milvus_write_servers.yaml rename to shards/kubernetes_demo/mishards_write_servers.yaml index 6aec4b0373..3c5974cd82 100644 --- a/shards/kubernetes_demo/milvus_write_servers.yaml +++ b/shards/kubernetes_demo/mishards_write_servers.yaml @@ -1,12 +1,12 @@ kind: Service apiVersion: v1 metadata: - name: milvus-wo-servers - namespace: milvus + name: mishards-wo-servers + namespace: mishards spec: type: ClusterIP selector: - app: milvus + app: mishards tier: wo-servers ports: - protocol: TCP @@ -18,42 +18,45 @@ spec: apiVersion: apps/v1beta1 kind: Deployment metadata: - name: milvus-wo-servers - namespace: milvus + name: mishards-wo-servers + namespace: mishards spec: selector: matchLabels: - app: milvus + app: mishards tier: wo-servers replicas: 1 template: metadata: labels: - app: milvus + app: mishards tier: wo-servers spec: containers: - - name: milvus-wo-server - image: milvusdb/milvus:0.5.0-d102119-ede20b + - name: mishards-wo-server + image: milvusdb/milvus:0.8.0-gpu-d041520-464400 imagePullPolicy: Always ports: - containerPort: 19530 resources: limits: - memory: "5Gi" - cpu: "1.0" + memory: "4Gi" + cpu: "2.0" requests: memory: "4Gi" volumeMounts: - - name: milvus-db-disk + - name: mishards-db-disk mountPath: /var/milvus subPath: dbdata - - name: milvus-woserver-configmap - mountPath: /opt/milvus/conf/server_config.yaml + - name: mishards-woserver-configmap + mountPath: /var/lib/milvus/conf/server_config.yaml subPath: config.yml - - name: milvus-woserver-configmap - mountPath: /opt/milvus/conf/log_config.conf + - name: mishards-woserver-configmap + mountPath: /var/lib/milvus/conf/log_config.conf subPath: log.conf + - name: mishards-log-disk + mountPath: /var/lib/milvus/logs + subPath: wslog # imagePullSecrets: # - name: regcred # tolerations: @@ -62,9 +65,12 @@ spec: # value: "performance" # effect: "NoSchedule" volumes: - - name: milvus-woserver-configmap + - name: mishards-woserver-configmap configMap: - name: milvus-woserver-configmap - - name: milvus-db-disk + name: mishards-woserver-configmap + - name: mishards-db-disk persistentVolumeClaim: - claimName: milvus-db-disk + claimName: mishards-db-disk + - name: mishards-log-disk + persistentVolumeClaim: + claimName: mishards-log-disk diff --git a/shards/kubernetes_demo/start.sh b/shards/kubernetes_demo/start.sh index 7441aa5d70..a191eff162 100755 --- a/shards/kubernetes_demo/start.sh +++ b/shards/kubernetes_demo/start.sh @@ -42,29 +42,29 @@ function PrintPodStatusMessage() { timeout=60 function setUpMysql () { - mysqlUserName=$(kubectl describe configmap -n milvus milvus-roserver-configmap | + mysqlUserName=$(kubectl describe configmap -n mishards mishards-roserver-configmap | grep backend_url | awk '{print $2}' | awk '{split($0, level1, ":"); split(level1[2], level2, "/"); print level2[3]}') - mysqlPassword=$(kubectl describe configmap -n milvus milvus-roserver-configmap | + mysqlPassword=$(kubectl describe configmap -n mishards mishards-roserver-configmap | grep backend_url | awk '{print $2}' | awk '{split($0, level1, ":"); split(level1[3], level3, "@"); print level3[1]}') - mysqlDBName=$(kubectl describe configmap -n milvus milvus-roserver-configmap | + mysqlDBName=$(kubectl describe configmap -n mishards mishards-roserver-configmap | grep backend_url | awk '{print $2}' | awk '{split($0, level1, ":"); split(level1[4], level4, "/"); print level4[2]}') - mysqlContainer=$(kubectl get pods -n milvus | grep milvus-mysql | awk '{print $1}') + mysqlContainer=$(kubectl get pods -n mishards | grep mishards-mysql | awk '{print $1}') - kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "CREATE DATABASE IF NOT EXISTS $mysqlDBName;" + kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "CREATE DATABASE IF NOT EXISTS $mysqlDBName;" - checkDBExists=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l) + checkDBExists=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l) counter=0 while [ $checkDBExists -lt 1 ]; do sleep 1 @@ -73,12 +73,12 @@ function setUpMysql () { echo "Creating MySQL database $mysqlDBName timeout" return 1 fi - checkDBExists=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l) + checkDBExists=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '$mysqlDBName';" | grep -o $mysqlDBName | wc -l) done; - kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "GRANT ALL PRIVILEGES ON $mysqlDBName.* TO '$mysqlUserName'@'%';" - kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "FLUSH PRIVILEGES;" - checkGrant=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l) + kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "GRANT ALL PRIVILEGES ON $mysqlDBName.* TO '$mysqlUserName'@'%';" + kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "FLUSH PRIVILEGES;" + checkGrant=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l) counter=0 while [ $checkGrant -lt 1 ]; do sleep 1 @@ -87,17 +87,17 @@ function setUpMysql () { echo "Granting all privileges on $mysqlDBName to $mysqlUserName timeout" return 1 fi - checkGrant=$(kubectl exec -n milvus $mysqlContainer -- mysql -h milvus-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l) + checkGrant=$(kubectl exec -n mishards $mysqlContainer -- mysql -h mishards-mysql -u$mysqlUserName -p$mysqlPassword -e "SHOW GRANTS for $mysqlUserName;" | grep -o "GRANT ALL PRIVILEGES ON \`$mysqlDBName\`\.\*" | wc -l) done; } function checkStatefulSevers() { - stateful_replicas=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Replicas:" | awk '{print $2}') - stateful_running_pods=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Pods Status:" | awk '{print $3}') + stateful_replicas=$(kubectl describe statefulset -n mishards mishards-ro-servers | grep "Replicas:" | awk '{print $3}') + stateful_running_pods=$(kubectl describe statefulset -n mishards mishards-ro-servers | grep "Pods Status:" | awk '{print $3}') counter=0 prev=$stateful_running_pods - PrintPodStatusMessage "Running milvus-ro-servers Pods: $stateful_running_pods/$stateful_replicas" + PrintPodStatusMessage "Running mishards-ro-servers Pods: $stateful_running_pods/$stateful_replicas" while [ $stateful_replicas != $stateful_running_pods ]; do echo -e "${YELLOW}Wait another 1 sec --- ${counter}${ENDC}" sleep 1; @@ -107,9 +107,9 @@ function checkStatefulSevers() { return 1; fi - stateful_running_pods=$(kubectl describe statefulset -n milvus milvus-ro-servers | grep "Pods Status:" | awk '{print $3}') + stateful_running_pods=$(kubectl describe statefulset -n mishards mishards-ro-servers | grep "Pods Status:" | awk '{print $3}') if [ $stateful_running_pods -ne $prev ]; then - PrintPodStatusMessage "Running milvus-ro-servers Pods: $stateful_running_pods/$stateful_replicas" + PrintPodStatusMessage "Running mishards-ro-servers Pods: $stateful_running_pods/$stateful_replicas" fi prev=$stateful_running_pods done; @@ -118,8 +118,8 @@ function checkStatefulSevers() { function checkDeployment() { deployment_name=$1 - replicas=$(kubectl describe deployment -n milvus $deployment_name | grep "Replicas:" | awk '{print $2}') - running=$(kubectl get pods -n milvus | grep $deployment_name | grep Running | wc -l) + replicas=$(kubectl describe deployment -n mishards $deployment_name | grep "Replicas:" | awk '{print $2}') + running=$(kubectl get pods -n mishards | grep $deployment_name | grep Running | wc -l) counter=0 prev=$running @@ -133,7 +133,7 @@ function checkDeployment() { return 1 fi - running=$(kubectl get pods -n milvus | grep "$deployment_name" | grep Running | wc -l) + running=$(kubectl get pods -n mishards | grep "$deployment_name" | grep Running | wc -l) if [ $running -ne $prev ]; then PrintPodStatusMessage "Running $deployment_name Pods: $running/$replicas" fi @@ -143,12 +143,12 @@ function checkDeployment() { function startDependencies() { - kubectl apply -f milvus_data_pvc.yaml - kubectl apply -f milvus_configmap.yaml - kubectl apply -f milvus_auxiliary.yaml + kubectl apply -f mishards_data_pvc.yaml + kubectl apply -f mishards_configmap.yaml + kubectl apply -f mishards_auxiliary.yaml counter=0 - while [ $(kubectl get pvc -n milvus | grep Bound | wc -l) != 4 ]; do + while [ $(kubectl get pvc -n mishards | grep Bound | wc -l) != 4 ]; do sleep 1; let counter=counter+1 if [ $counter == $timeout ]; then @@ -156,7 +156,7 @@ function startDependencies() { return 1 fi done - checkDeployment "milvus-mysql" + checkDeployment "mishards-mysql" } function startApps() { @@ -165,19 +165,19 @@ function startApps() { echo -e "${GREEN}${BOLD}Checking required resouces...${NORMAL}${ENDC}" while [ $counter -lt $timeout ]; do sleep 1; - if [ $(kubectl get pvc -n milvus 2>/dev/null | grep Bound | wc -l) != 4 ]; then + if [ $(kubectl get pvc -n mishards 2>/dev/null | grep Bound | wc -l) != 4 ]; then echo -e "${YELLOW}No pvc. Wait another sec... $counter${ENDC}"; errmsg='No pvc'; let counter=counter+1; continue fi - if [ $(kubectl get configmap -n milvus 2>/dev/null | grep milvus | wc -l) != 4 ]; then + if [ $(kubectl get configmap -n mishards 2>/dev/null | grep mishards | wc -l) != 4 ]; then echo -e "${YELLOW}No configmap. Wait another sec... $counter${ENDC}"; errmsg='No configmap'; let counter=counter+1; continue fi - if [ $(kubectl get ep -n milvus 2>/dev/null | grep milvus-mysql | awk '{print $2}') == "" ]; then + if [ $(kubectl get ep -n mishards 2>/dev/null | grep mishards-mysql | awk '{print $2}') == "" ]; then echo -e "${YELLOW}No mysql. Wait another sec... $counter${ENDC}"; errmsg='No mysql'; let counter=counter+1; @@ -205,30 +205,30 @@ function startApps() { fi echo -e "${GREEN}${BOLD}Start servers ...${NORMAL}${ENDC}" - kubectl apply -f milvus_stateful_servers.yaml - kubectl apply -f milvus_write_servers.yaml + kubectl apply -f mishards_stateful_servers.yaml + kubectl apply -f mishards_write_servers.yaml checkStatefulSevers if [ $? -ne 0 ]; then - echo -e "${RED}${BOLD}Starting milvus-ro-servers timeout${NORMAL}${ENDC}" + echo -e "${RED}${BOLD}Starting mishards-ro-servers timeout${NORMAL}${ENDC}" exit 1 fi - checkDeployment "milvus-wo-servers" + checkDeployment "mishards-wo-servers" if [ $? -ne 0 ]; then - echo -e "${RED}${BOLD}Starting milvus-wo-servers timeout${NORMAL}${ENDC}" + echo -e "${RED}${BOLD}Starting mishards-wo-servers timeout${NORMAL}${ENDC}" exit 1 fi echo -e "${GREEN}${BOLD}Start rolebinding ...${NORMAL}${ENDC}" - kubectl apply -f milvus_rbac.yaml + kubectl apply -f mishards_rbac.yaml echo -e "${GREEN}${BOLD}Start proxies ...${NORMAL}${ENDC}" - kubectl apply -f milvus_proxy.yaml + kubectl apply -f mishards_proxy.yaml - checkDeployment "milvus-proxy" + checkDeployment "mishards-proxy" if [ $? -ne 0 ]; then - echo -e "${RED}${BOLD}Starting milvus-proxy timeout${NORMAL}${ENDC}" + echo -e "${RED}${BOLD}Starting mishards-proxy timeout${NORMAL}${ENDC}" exit 1 fi @@ -244,10 +244,10 @@ function startApps() { function removeApps () { # kubectl delete -f milvus_flower.yaml 2>/dev/null - kubectl delete -f milvus_proxy.yaml 2>/dev/null - kubectl delete -f milvus_stateful_servers.yaml 2>/dev/null - kubectl delete -f milvus_write_servers.yaml 2>/dev/null - kubectl delete -f milvus_rbac.yaml 2>/dev/null + kubectl delete -f mishards_proxy.yaml 2>/dev/null + kubectl delete -f mishards_stateful_servers.yaml 2>/dev/null + kubectl delete -f mishards_write_servers.yaml 2>/dev/null + kubectl delete -f mishards_rbac.yaml 2>/dev/null # kubectl delete -f milvus_monitor.yaml 2>/dev/null } @@ -263,9 +263,9 @@ function scaleDeployment() { ;; esac - cur=$(kubectl get deployment -n milvus $deployment_name |grep $deployment_name |awk '{split($2, status, "/"); print status[2];}') + cur=$(kubectl get deployment -n mishards $deployment_name |grep $deployment_name |awk '{split($2, status, "/"); print status[2];}') echo -e "${GREEN}Current Running ${BOLD}$cur ${GREEN}${deployment_name}, Scaling to ${BOLD}$des ...${ENDC}"; - scalecmd="kubectl scale deployment -n milvus ${deployment_name} --replicas=${des}" + scalecmd="kubectl scale deployment -n mishards ${deployment_name} --replicas=${des}" ${scalecmd} if [ $? -ne 0 ]; then echo -e "${RED}${BOLD}Scale Error: ${GREEN}${scalecmd}${ENDC}" @@ -276,7 +276,7 @@ function scaleDeployment() { if [ $? -ne 0 ]; then echo -e "${RED}${BOLD}Scale ${deployment_name} timeout${NORMAL}${ENDC}" - scalecmd="kubectl scale deployment -n milvus ${deployment_name} --replicas=${cur}" + scalecmd="kubectl scale deployment -n mishards ${deployment_name} --replicas=${cur}" ${scalecmd} if [ $? -ne 0 ]; then echo -e "${RED}${BOLD}Scale Rollback Error: ${GREEN}${scalecmd}${ENDC}" @@ -298,9 +298,9 @@ function scaleROServers() { ;; esac - cur=$(kubectl get statefulset -n milvus milvus-ro-servers |tail -n 1 |awk '{split($2, status, "/"); print status[2];}') + cur=$(kubectl get statefulset -n mishards mishards-ro-servers |tail -n 1 |awk '{split($2, status, "/"); print status[2];}') echo -e "${GREEN}Current Running ${BOLD}$cur ${GREEN}Readonly Servers, Scaling to ${BOLD}$des ...${ENDC}"; - scalecmd="kubectl scale sts milvus-ro-servers -n milvus --replicas=${des}" + scalecmd="kubectl scale sts mishards-ro-servers -n mishards --replicas=${des}" ${scalecmd} if [ $? -ne 0 ]; then echo -e "${RED}${BOLD}Scale Error: ${GREEN}${scalecmd}${ENDC}" @@ -309,8 +309,8 @@ function scaleROServers() { checkStatefulSevers if [ $? -ne 0 ]; then - echo -e "${RED}${BOLD}Scale milvus-ro-servers timeout${NORMAL}${ENDC}" - scalecmd="kubectl scale sts milvus-ro-servers -n milvus --replicas=${cur}" + echo -e "${RED}${BOLD}Scale mishards-ro-servers timeout${NORMAL}${ENDC}" + scalecmd="kubectl scale sts mishards-ro-servers -n mishards --replicas=${cur}" ${scalecmd} if [ $? -ne 0 ]; then echo -e "${RED}${BOLD}Scale Rollback Error: ${GREEN}${scalecmd}${ENDC}" @@ -358,7 +358,7 @@ scale-ro-server) ;; scale-proxy) - scaleDeployment "milvus-proxy" $1 $2 + scaleDeployment "mishards-proxy" $1 $2 ;; -h|--help|*) diff --git a/shards/mishards/topology.py b/shards/mishards/topology.py index 166b37a564..2d5e3277c1 100644 --- a/shards/mishards/topology.py +++ b/shards/mishards/topology.py @@ -35,7 +35,8 @@ class TopoGroup: self.cv = threading.Condition() def on_duplicate(self, topo_object): - logger.warning('Duplicated topo_object \"{}\" into group \"{}\"'.format(topo_object, self.name)) + pass + # logger.warning('Duplicated topo_object \"{}\" into group \"{}\"'.format(topo_object, self.name)) def on_added(self, topo_object): return True @@ -85,15 +86,15 @@ class Topology: self.cv = threading.Condition() def on_duplicated_group(self, group): - logger.warning('Duplicated group \"{}\" found!'.format(group)) + # logger.warning('Duplicated group \"{}\" found!'.format(group)) return StatusType.DUPLICATED def on_pre_add_group(self, group): - logger.debug('Pre add group \"{}\"'.format(group)) + # logger.debug('Pre add group \"{}\"'.format(group)) return StatusType.OK def on_post_add_group(self, group): - logger.debug('Post add group \"{}\"'.format(group)) + # logger.debug('Post add group \"{}\"'.format(group)) return StatusType.OK def get_group(self, name): @@ -116,13 +117,16 @@ class Topology: return self.on_post_add_group(group) def on_delete_not_existed_group(self, group): - logger.warning('Deleting non-existed group \"{}\"'.format(group)) + # logger.warning('Deleting non-existed group \"{}\"'.format(group)) + pass def on_pre_delete_group(self, group): - logger.debug('Pre delete group \"{}\"'.format(group)) + pass + # logger.debug('Pre delete group \"{}\"'.format(group)) def on_post_delete_group(self, group): - logger.debug('Post delete group \"{}\"'.format(group)) + pass + # logger.debug('Post delete group \"{}\"'.format(group)) def _delete_group_no_lock(self, group): logger.info('Deleting group \"{}\"'.format(group)) @@ -132,7 +136,7 @@ class Topology: def delete_group(self, group): self.on_pre_delete_group(group) with self.cv: - deleted_group = self._delete_group_lock(group) + deleted_group = self._delete_group_no_lock(group) if not deleted_group: return self.on_delete_not_existed_group(group) return self.on_post_delete_group(group)