diff --git a/.env b/.env index 6271baa25f..eb85d5acc5 100644 --- a/.env +++ b/.env @@ -3,6 +3,8 @@ IMAGE_ARCH=amd64 OS_NAME=ubuntu20.04 DATE_VERSION=20230215-5351fee LATEST_DATE_VERSION=20230215-5351fee +GPU_DATE_VERSION=20230317-a1c7b0c +LATEST_GPU_DATE_VERSION=20230317-a1c7b0c MINIO_ADDRESS=minio:9000 PULSAR_ADDRESS=pulsar://pulsar:6650 -ETCD_ENDPOINTS=etcd:2379 +ETCD_ENDPOINTS=etcd:2379 \ No newline at end of file diff --git a/.github/workflows/publish-builder.yaml b/.github/workflows/publish-builder.yaml index fe59a06779..0ecdd63c07 100644 --- a/.github/workflows/publish-builder.yaml +++ b/.github/workflows/publish-builder.yaml @@ -6,13 +6,13 @@ on: push: # file paths to consider in the event. Optional; defaults to all. paths: - - 'build/docker/builder/**' + - 'build/docker/builder/cpu/**' - '.github/workflows/publish-builder.yaml' - '!**.md' pull_request: # file paths to consider in the event. Optional; defaults to all. paths: - - 'build/docker/builder/**' + - 'build/docker/builder/cpu/**' - '.github/workflows/publish-builder.yaml' - '!**.md' diff --git a/Makefile b/Makefile index f55197d420..b3f4e53c2a 100644 --- a/Makefile +++ b/Makefile @@ -39,8 +39,8 @@ milvus-gpu: build-cpp-gpu print-build-info @echo "Building Milvus-gpu ..." @source $(PWD)/scripts/setenv.sh && \ mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && \ - GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ - ${AARCH64_FLAG} -o $(INSTALL_PATH)/milvus-gpu $(PWD)/cmd/main.go 1>/dev/null + GO111MODULE=on $(GO) build -ldflags="-r $${RPATH} -X '$(OBJPREFIX).BuildTags=$(BUILD_TAGS_GPU)' -X '$(OBJPREFIX).BuildTime=$(BUILD_TIME)' -X '$(OBJPREFIX).GitCommit=$(GIT_COMMIT)' -X '$(OBJPREFIX).GoVersion=$(GO_VERSION)'" \ + ${AARCH64_FLAG} -o $(INSTALL_PATH)/milvus $(PWD)/cmd/main.go 1>/dev/null get-build-deps: @(env bash $(PWD)/scripts/install_deps.sh) @@ -122,6 +122,7 @@ integration-test: ${APPLE_SILICON_FLAG} -o $(INSTALL_PATH)/integration-test $(INTERATION_PATH)/ 1>/dev/null BUILD_TAGS = $(shell git describe --tags --always --dirty="-dev") +BUILD_TAGS_GPU = ${BUILD_TAGS}-gpu BUILD_TIME = $(shell date -u) GIT_COMMIT = $(shell git rev-parse --short HEAD) GO_VERSION = $(shell go version) @@ -282,6 +283,16 @@ install: milvus -cp -r -P $(PWD)/internal/core/output/lib64/*.so* $(LIBRARY_PATH) 2>/dev/null @echo "Installation successful." +gpu-install: milvus-gpu + @echo "Installing binary to './bin'" + @mkdir -p $(GOPATH)/bin && cp -f $(PWD)/bin/milvus $(GOPATH)/bin/milvus + @mkdir -p $(LIBRARY_PATH) + -cp -r -P $(PWD)/internal/core/output/lib/*.dylib* $(LIBRARY_PATH) 2>/dev/null + -cp -r -P $(PWD)/internal/core/output/lib/*.so* $(LIBRARY_PATH) 2>/dev/null + -cp -r -P $(PWD)/internal/core/output/lib64/*.so* $(LIBRARY_PATH) 2>/dev/null + @echo "Installation successful." + + clean: @echo "Cleaning up all the generated files" @find . -name '*.test' | xargs rm -fv @@ -348,3 +359,4 @@ generate-mockery: getdeps $(PWD)/bin/mockery --name=QueryCoordComponent --dir=$(PWD)/internal/types --output=$(PWD)/internal/types --filename=mock_querycoord.go --with-expecter --structname=MockQueryCoord --outpkg=types --inpackage $(PWD)/bin/mockery --name=QueryNodeComponent --dir=$(PWD)/internal/types --output=$(PWD)/internal/types --filename=mock_querynode.go --with-expecter --structname=MockQueryNode --outpkg=types --inpackage ci-ut: build-cpp-with-coverage generated-proto-go-without-cpp codecov-cpp codecov-go + diff --git a/build/build_image_gpu.sh b/build/build_image_gpu.sh new file mode 100755 index 0000000000..a36f2619c8 --- /dev/null +++ b/build/build_image_gpu.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Licensed to the LF AI & Data foundation under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Exit immediately for non zero status +set -e +# Print commands +set -x + +# Absolute path to the toplevel milvus directory. +toplevel=$(dirname "$(cd "$(dirname "${0}")"; pwd)") + +OS_NAME="${OS_NAME:-ubuntu20.04}" +MILVUS_IMAGE_REPO="${MILVUS_IMAGE_REPO:-milvusdb/milvus}" +MILVUS_IMAGE_TAG="${MILVUS_IMAGE_TAG:-gpu-latest}" + +MILVUS_BASE_IMAGE_REPO="${MILVUS_BASE_IMAGE_REPO:-milvusdb/milvus-base}" +MILVUS_BASE_IMAGE_TAG="gpu-local" +BUILD_ARGS="" + +pushd "${toplevel}" +BUILD_BASE_IMAGE=${BUILD_BASE_IMAGE:-"false"} + +# Seperate base dockerfile to ignore install dependencies when build milvus image +if [[ ${OS_NAME} == "ubuntu20.04" && ${BUILD_BASE_IMAGE} == "true" ]]; then + docker build -f "./build/docker/milvus/gpu/${OS_NAME}/Dockerfile.base" -t "${MILVUS_BASE_IMAGE_REPO}:${MILVUS_BASE_IMAGE_TAG}" . + BUILD_ARGS="--build-arg MILVUS_BASE_IMAGE_REPO=${MILVUS_BASE_IMAGE_REPO} --build-arg MILVUS_BASE_IMAGE_TAG=${MILVUS_BASE_IMAGE_TAG}" +fi + +docker build ${BUILD_ARGS} -f "./build/docker/milvus/gpu/${OS_NAME}/Dockerfile" -t "${MILVUS_IMAGE_REPO}:${MILVUS_IMAGE_TAG}" . + +image_size=$(docker inspect ${MILVUS_IMAGE_REPO}:${MILVUS_IMAGE_TAG} -f '{{.Size}}'| awk '{ byte =$1 /1024/1024/1024; print byte " GB" }') + +echo "Image Size for ${MILVUS_IMAGE_REPO}:${MILVUS_IMAGE_TAG} is ${image_size}" + +popd diff --git a/build/builder_gpu.sh b/build/builder_gpu.sh new file mode 100755 index 0000000000..c1927f46dc --- /dev/null +++ b/build/builder_gpu.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Absolute path to the toplevel milvus directory. +toplevel=$(dirname "$(cd "$(dirname "${0}")"; pwd)") + +export OS_NAME="${OS_NAME:-ubuntu20.04}" + +pushd "${toplevel}" + +if [[ "${1-}" == "pull" ]]; then + docker-compose pull --ignore-pull-failures gpubuilder + exit 0 +fi + +if [[ "${1-}" == "down" ]]; then + docker-compose down + exit 0 +fi + +# Attempt to run in the container with the same UID/GID as we have on the host, +# as this results in the correct permissions on files created in the shared +# volumes. This isn't always possible, however, as IDs less than 100 are +# reserved by Debian, and IDs in the low 100s are dynamically assigned to +# various system users and groups. To be safe, if we see a UID/GID less than +# 500, promote it to 501. This is notably necessary on macOS Lion and later, +# where administrator accounts are created with a GID of 20. This solution is +# not foolproof, but it works well in practice. +uid=$(id -u) +gid=$(id -g) +[ "$uid" -lt 500 ] && uid=501 +[ "$gid" -lt 500 ] && gid=$uid + +mkdir -p "${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/amd64-${OS_NAME}-ccache" +mkdir -p "${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/amd64-${OS_NAME}-go-mod" +mkdir -p "${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/amd64-${OS_NAME}-vscode-extensions" +mkdir -p "${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/amd64-${OS_NAME}-conan" +chmod -R 777 "${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}" + +docker-compose pull --ignore-pull-failures gpubuilder +if [[ "${CHECK_BUILDER:-}" == "1" ]]; then + docker-compose build gpubuilder +fi + +if [[ "$(id -u)" != "0" ]]; then + docker-compose run --no-deps --rm -u "$uid:$gid" gpubuilder "$@" +else + docker-compose run --no-deps --rm --entrypoint "/tini -- /entrypoint.sh" gpubuilder "$@" +fi + +popd diff --git a/build/docker/builder/gpu/ubuntu20.04/Dockerfile b/build/docker/builder/gpu/ubuntu20.04/Dockerfile new file mode 100644 index 0000000000..8f327e22d8 --- /dev/null +++ b/build/docker/builder/gpu/ubuntu20.04/Dockerfile @@ -0,0 +1,76 @@ +# Copyright (C) 2019-2022 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. + +FROM nvidia/cuda:11.6.0-devel-ubuntu20.04 + +RUN apt-get update && apt-get install -y --no-install-recommends wget curl ca-certificates gnupg2 && \ + wget -qO- "https://cmake.org/files/v3.24/cmake-3.24.0-linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local && \ + apt-get update && apt-get install -y --no-install-recommends \ + g++ gcc gfortran git make ccache libssl-dev zlib1g-dev unzip \ + clang-format-10 clang-tidy-10 lcov libtool m4 autoconf automake python3 python3-pip \ + pkg-config uuid-dev libaio-dev libgoogle-perftools-dev libopenblas-dev && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + +RUN pip3 install conan==1.58.0 + +# Instal openblas +# RUN wget https://github.com/xianyi/OpenBLAS/archive/v0.3.21.tar.gz && \ +# tar zxvf v0.3.21.tar.gz && cd OpenBLAS-0.3.21 && \ +# make NO_STATIC=1 NO_LAPACK=1 NO_LAPACKE=1 NO_CBLAS=1 NO_AFFINITY=1 USE_OPENMP=1 \ +# TARGET=HASWELL DYNAMIC_ARCH=1 \ +# NUM_THREADS=64 MAJOR_VERSION=3 libs shared && \ +# make PREFIX=/usr/local NUM_THREADS=64 MAJOR_VERSION=3 install && \ +# rm -f /usr/local/include/cblas.h /usr/local/include/lapack* && \ +# cd .. && rm -rf OpenBLAS-0.3.21 && rm v0.3.21.tar.gz + +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib" + +# Install Go +ENV GOPATH /go +ENV GOROOT /usr/local/go +ENV GO111MODULE on +ENV PATH $GOPATH/bin:$GOROOT/bin:$PATH +RUN mkdir -p /usr/local/go && wget -qO- "https://golang.org/dl/go1.18.3.linux-amd64.tar.gz" | tar --strip-components=1 -xz -C /usr/local/go && \ + mkdir -p "$GOPATH/src" "$GOPATH/bin" && \ + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b ${GOPATH}/bin v1.46.2 && \ + # export GO111MODULE=on && go get github.com/quasilyte/go-ruleguard/cmd/ruleguard@v0.2.1 && \ + go install github.com/ramya-rao-a/go-outline@latest && \ + go install golang.org/x/tools/gopls@latest && \ + go install github.com/uudashr/gopkgs/v2/cmd/gopkgs@latest && \ + go install github.com/go-delve/delve/cmd/dlv@latest && \ + go install honnef.co/go/tools/cmd/staticcheck@2022.1 && \ + go clean --modcache && \ + chmod -R 777 "$GOPATH" && chmod -R a+w $(go env GOTOOLDIR) + +RUN ln -s /go/bin/dlv /go/bin/dlv-dap + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gdb gdbserver && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + +RUN echo 'root:root' | chpasswd + +# refer: https://code.visualstudio.com/docs/remote/containers-advanced#_avoiding-extension-reinstalls-on-container-rebuild +RUN mkdir -p /home/milvus/.vscode-server/extensions \ + /home/milvus/.vscode-server-insiders/extensions \ + && chmod -R 777 /home/milvus + +COPY --chown=0:0 build/docker/builder/entrypoint.sh / + +RUN wget -qO- "https://github.com/jeffoverflow/autouseradd/releases/download/1.2.0/autouseradd-1.2.0-amd64.tar.gz" | tar xz -C / --strip-components 1 + +RUN wget -O /tini https://github.com/krallin/tini/releases/download/v0.19.0/tini && \ + chmod +x /tini + +ENTRYPOINT [ "/tini", "--", "autouseradd", "--user", "milvus", "--", "/entrypoint.sh" ] +CMD ["tail", "-f", "/dev/null"] diff --git a/build/docker/milvus/gpu/ubuntu20.04/Dockerfile b/build/docker/milvus/gpu/ubuntu20.04/Dockerfile new file mode 100644 index 0000000000..a50c52c2a8 --- /dev/null +++ b/build/docker/milvus/gpu/ubuntu20.04/Dockerfile @@ -0,0 +1,23 @@ +# Copyright (C) 2019-2022 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. +ARG MILVUS_BASE_IMAGE_REPO="milvusdb/milvus-base" +ARG MILVUS_BASE_IMAGE_TAG="gpu-20230317-e62607a" +FROM ${MILVUS_BASE_IMAGE_REPO}:${MILVUS_BASE_IMAGE_TAG} + +COPY ./bin/ /milvus/bin/ + +COPY ./configs/ /milvus/configs/ + +COPY ./lib/ /milvus/lib/ + +ENV PATH=/milvus/bin:$PATH +ENV LD_LIBRARY_PATH=/milvus/lib:$LD_LIBRARY_PATH:/usr/lib +ENV LD_PRELOAD=/milvus/lib/libjemalloc.so diff --git a/build/docker/milvus/gpu/ubuntu20.04/Dockerfile.base b/build/docker/milvus/gpu/ubuntu20.04/Dockerfile.base new file mode 100644 index 0000000000..c42f8cbb15 --- /dev/null +++ b/build/docker/milvus/gpu/ubuntu20.04/Dockerfile.base @@ -0,0 +1,38 @@ +# Copyright (C) 2019-2022 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. + +FROM milvusdb/openblas:ubuntu20.04-20220914-179ea77 AS openblas + +#FROM alpine +#FROM ubuntu:focal-20220426 +#ENV DEBIAN_FRONTEND=noninteractive + +FROM nvidia/cuda:11.6.0-runtime-ubuntu20.04 + +ENV TZ=UTC + +RUN apt-get update && \ + apt-get install -y --no-install-recommends curl libtbb-dev gfortran netcat iputils-ping ca-certificates liblapack3 libzstd-dev uuid-dev libaio-dev libboost-program-options-dev libboost-filesystem-dev && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=openblas /usr/local/lib/libopenblasp-r0.3.21.so /usr/lib/ + +RUN ln -s /usr/lib/libopenblasp-r0.3.21.so /usr/lib/libopenblas.so.3 && \ + ln -s /usr/lib/libopenblas.so.3 /usr/lib/libopenblas.so.0 && \ + ln -s /usr/lib/libopenblas.so.0 /usr/lib/libopenblas.so + +# Add Tini +ADD https://github.com/krallin/tini/releases/download/v0.19.0/tini /tini +RUN chmod +x /tini +ENTRYPOINT ["/tini", "--"] + +WORKDIR /milvus/ diff --git a/ci/jenkins/PRGPU.groovy b/ci/jenkins/PRGPU.groovy new file mode 100644 index 0000000000..7bc1f03f65 --- /dev/null +++ b/ci/jenkins/PRGPU.groovy @@ -0,0 +1,239 @@ +#!/usr/bin/env groovy + +int total_timeout_minutes = 60 * 5 +int e2e_timeout_seconds = 70 * 60 +def imageTag='' +int case_timeout_seconds = 10 * 60 +def chart_version='4.0.6' +pipeline { + options { + timestamps() + timeout(time: total_timeout_minutes, unit: 'MINUTES') + // buildDiscarder logRotator(artifactDaysToKeepStr: '30') + // parallelsAlwaysFailFast() + // preserveStashes(buildCount: 5) + // disableConcurrentBuilds(abortPrevious: true) + + } + agent { + kubernetes { + inheritFrom 'default' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/rte-gpu.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + environment { + PROJECT_NAME = 'milvus' + SEMVER = "${BRANCH_NAME.contains('/') ? BRANCH_NAME.substring(BRANCH_NAME.lastIndexOf('/') + 1) : BRANCH_NAME}" + DOCKER_BUILDKIT = 1 + ARTIFACTS = "${env.WORKSPACE}/_artifacts" + CI_DOCKER_CREDENTIAL_ID = "harbor-milvus-io-registry" + MILVUS_HELM_NAMESPACE = "milvus-ci" + DISABLE_KIND = true + HUB = 'harbor.milvus.io/milvus' + JENKINS_BUILD_ID = "${env.BUILD_ID}" + CI_MODE="pr" + SHOW_MILVUS_CONFIGMAP= true + } + + stages { + stage ('Build'){ + steps { + container('main') { + dir ('build'){ + sh './set_docker_mirror.sh' + } + dir ('tests/scripts') { + script { + sh 'printenv' + def date = sh(returnStdout: true, script: 'date +%Y%m%d').trim() + sh 'git config --global --add safe.directory /home/jenkins/agent/workspace' + def gitShortCommit = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim() + imageTag="gpu-${env.BRANCH_NAME}-${date}-${gitShortCommit}" + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + sh """ + TAG="${imageTag}" \ + ./e2e-k8s.sh \ + --skip-export-logs \ + --skip-install \ + --skip-cleanup \ + --skip-setup \ + --gpu \ + --skip-test + """ + + // stash imageTag info for rebuild install & E2E Test only + sh "echo ${imageTag} > imageTag.txt" + stash includes: 'imageTag.txt', name: 'imageTag' + + } + } + } + } + } + } + + stage('Install & E2E Test') { + matrix { + axes { + axis { + name 'MILVUS_SERVER_TYPE' + values 'standalone', 'distributed' + } + axis { + name 'MILVUS_CLIENT' + values 'pymilvus' + } + } + + stages { + stage('Install') { + steps { + container('main') { + stash includes: 'tests/**', name: 'testCode', useDefaultExcludes: false + dir ('tests/scripts') { + script { + sh 'printenv' + def clusterEnabled = "false" + if ("${MILVUS_SERVER_TYPE}" == 'distributed') { + clusterEnabled = "true" + } + sh 'git config --global --add safe.directory /home/jenkins/agent/workspace' + + if ("${MILVUS_CLIENT}" == "pymilvus") { + if ("${imageTag}"==''){ + dir ("imageTag"){ + try{ + unstash 'imageTag' + imageTag=sh(returnStdout: true, script: 'cat imageTag.txt | tr -d \'\n\r\'') + }catch(e){ + print "No Image Tag info remained ,please rerun build to build new image." + exit 1 + } + } + } + withCredentials([usernamePassword(credentialsId: "${env.CI_DOCKER_CREDENTIAL_ID}", usernameVariable: 'CI_REGISTRY_USERNAME', passwordVariable: 'CI_REGISTRY_PASSWORD')]){ + sh """ + MILVUS_CLUSTER_ENABLED=${clusterEnabled} \ + MILVUS_HELM_REPO="http://nexus-nexus-repository-manager.nexus:8081/repository/milvus-proxy" \ + TAG=${imageTag}\ + ./e2e-k8s.sh \ + --skip-export-logs \ + --skip-cleanup \ + --skip-setup \ + --skip-test \ + --skip-build \ + --skip-build-image \ + --install-extra-arg " + --set etcd.metrics.enabled=true \ + --set etcd.metrics.podMonitor.enabled=true \ + --set indexCoordinator.gc.interval=1 \ + --set indexNode.disk.enabled=true \ + --set queryNode.disk.enabled=true \ + --set standalone.disk.enabled=true \ + --version ${chart_version} \ + -f values/ci/pr-gpu.yaml" + """ + } + } else { + error "Error: Unsupported Milvus client: ${MILVUS_CLIENT}" + } + } + } + } + + } + } + stage('E2E Test'){ + options { + skipDefaultCheckout() + } + agent { + kubernetes { + inheritFrom 'default' + defaultContainer 'main' + yamlFile 'ci/jenkins/pod/e2e.yaml' + customWorkspace '/home/jenkins/agent/workspace' + } + } + steps { + container('pytest') { + unstash('testCode') + script { + sh 'ls -lah' + } + dir ('tests/scripts') { + script { + def release_name=sh(returnStdout: true, script: './get_release_name.sh') + def clusterEnabled = 'false' + if ("${MILVUS_SERVER_TYPE}" == "distributed") { + clusterEnabled = "true" + } + if ("${MILVUS_CLIENT}" == "pymilvus") { + sh """ + MILVUS_HELM_RELEASE_NAME="${release_name}" \ + MILVUS_HELM_NAMESPACE="milvus-ci" \ + MILVUS_CLUSTER_ENABLED="${clusterEnabled}" \ + TEST_TIMEOUT="${e2e_timeout_seconds}" \ + ./ci_e2e.sh "-n 6 -x --tags L0 L1 --timeout ${case_timeout_seconds}" + """ + + } else { + error "Error: Unsupported Milvus client: ${MILVUS_CLIENT}" + } + } + } + } + } + post{ + always { + container('pytest'){ + dir("${env.ARTIFACTS}") { + sh "tar -zcvf ${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${MILVUS_CLIENT}-pytest-logs.tar.gz /tmp/ci_logs/test --remove-files || true" + archiveArtifacts artifacts: "${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${MILVUS_CLIENT}-pytest-logs.tar.gz ", allowEmptyArchive: true + } + } + } + } + } + } + post{ + always { + container('main') { + dir ('tests/scripts') { + script { + def release_name=sh(returnStdout: true, script: './get_release_name.sh') + sh "kubectl get pods -n ${MILVUS_HELM_NAMESPACE} | grep ${release_name} " + sh "./uninstall_milvus.sh --release-name ${release_name}" + sh "./ci_logs.sh --log-dir /ci-logs --artifacts-name ${env.ARTIFACTS}/artifacts-${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${SEMVER}-${env.BUILD_NUMBER}-${MILVUS_CLIENT}-e2e-logs \ + --release-name ${release_name}" + dir("${env.ARTIFACTS}") { + archiveArtifacts artifacts: "artifacts-${PROJECT_NAME}-${MILVUS_SERVER_TYPE}-${SEMVER}-${env.BUILD_NUMBER}-${MILVUS_CLIENT}-e2e-logs.tar.gz", allowEmptyArchive: true + } + } + } + } + } + } + } + + } + } + // post{ + // unsuccessful { + // container('jnlp') { + // dir ('tests/scripts') { + // script { + // def authorEmail = sh(returnStdout: true, script: './get_author_email.sh ') + // emailext subject: '$DEFAULT_SUBJECT', + // body: '$DEFAULT_CONTENT', + // recipientProviders: [developers(), culprits()], + // replyTo: '$DEFAULT_REPLYTO', + // to: "${authorEmail},devops@zilliz.com" + // } + // } + // } + // } + // } +} diff --git a/ci/jenkins/pod/rte-gpu.yaml b/ci/jenkins/pod/rte-gpu.yaml new file mode 100644 index 0000000000..ec808b84a3 --- /dev/null +++ b/ci/jenkins/pod/rte-gpu.yaml @@ -0,0 +1,77 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app: milvus-e2e + namespace: milvus-ci +spec: + enableServiceLinks: false + containers: + - name: main + image: milvusdb/krte:20211213-dcc15e9 + env: + - name: DOCKER_IN_DOCKER_ENABLED + value: "true" + - name: DOCKER_VOLUME_DIRECTORY + value: "/mnt/disk/.docker-gpu" + tty: true + securityContext: + privileged: true + args: ["cat"] + resources: + limits: + cpu: "6" + memory: 12Gi + requests: + cpu: "0.5" + memory: 5Gi + volumeMounts: + - mountPath: /docker-graph + name: docker-graph + - mountPath: /var/lib/docker + name: docker-root + - mountPath: /lib/modules + name: modules + readOnly: true + - mountPath: /sys/fs/cgroup + name: cgroup + - mountPath: /mnt/disk/.docker-gpu + name: build-cache + subPath: docker-volume-gpu + - mountPath: /ci-logs + name: ci-logs + - name: pytest + image: harbor.milvus.io/dockerhub/milvusdb/pytest:20230303-0cb8153 + resources: + limits: + cpu: "6" + memory: 12Gi + requests: + cpu: "0.5" + memory: 5Gi + volumeMounts: + - mountPath: /ci-logs + name: ci-logs + volumes: + - emptyDir: {} + name: docker-graph + - emptyDir: {} + name: docker-root + - hostPath: + path: /tmp/krte/cache + type: DirectoryOrCreate + name: build-cache + - hostPath: + path: /lib/modules + type: Directory + name: modules + - hostPath: + path: /sys/fs/cgroup + type: Directory + name: cgroup + - name: ci-logs + nfs: + path: /ci-logs + server: 172.16.70.239 + nodeSelector: + nvidia.com/gpu.present: 'true' diff --git a/docker-compose.yml b/docker-compose.yml index ebb1150edd..12c4366f9b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,6 +44,44 @@ services: /bin/bash -c " make check-proto-product && make verifiers && make unittest" + gpubuilder: + # image: harbor.milvus.io/milvus/milvus-env:gpu-amd64-ubuntu20.04-20230317-a1c7b0c + image: ${IMAGE_REPO}/milvus-env:gpu-${IMAGE_ARCH}-${OS_NAME}-${GPU_DATE_VERSION} + # Build devcontainer + build: + context: . + dockerfile: build/docker/builder/gpu/${OS_NAME}/Dockerfile + # dockerfile: build/docker/builder/cpu/${OS_NAME}/Dockerfile + cache_from: + - ${IMAGE_REPO}/milvus-env:${IMAGE_ARCH}-${OS_NAME}-${LATEST_DATE_VERSION} + # user: {{ CURRENT_ID }} + shm_size: 2G + # expose 19530 port so we can directly access milvus inside build container + # ports: + # - "19530:19530" + environment: + <<: *ccache + OS_NAME: ${OS_NAME} + PULSAR_ADDRESS: ${PULSAR_ADDRESS} + ETCD_ENDPOINTS: ${ETCD_ENDPOINTS} + MINIO_ADDRESS: ${MINIO_ADDRESS} + CONAN_USER_HOME: /home/milvus + volumes: &builder-volumes-gpu + - .:/go/src/github.com/milvus-io/milvus:delegated + - ${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/${IMAGE_ARCH}-${OS_NAME}-ccache:/ccache:delegated + - ${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/${IMAGE_ARCH}-${OS_NAME}-go-mod:/go/pkg/mod:delegated + - ${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/${IMAGE_ARCH}-${OS_NAME}-vscode-extensions:/home/milvus/.vscode-server/extensions:delegated + - ${DOCKER_VOLUME_DIRECTORY:-.docker-gpu}/${IMAGE_ARCH}-${OS_NAME}-conan:/home/milvus/.conan:delegated + working_dir: '/go/src/github.com/milvus-io/milvus' + depends_on: + - etcd + - minio + - pulsar + # Command + command: &builder-command-gpu > + /bin/bash -c " + make check-proto-product && make verifiers && make unittest" + etcd: image: milvusdb/etcd:3.5.5-r2 environment: diff --git a/tests/scripts/e2e-k8s.sh b/tests/scripts/e2e-k8s.sh index 0aa5c851d3..d12582d8e6 100755 --- a/tests/scripts/e2e-k8s.sh +++ b/tests/scripts/e2e-k8s.sh @@ -114,6 +114,11 @@ while (( "$#" )); do MANUAL=true shift ;; + --gpu) + GPU_BUILD=true + shift + ;; + --topology) case $2 in SINGLE_CLUSTER | MULTICLUSTER_SINGLE_NETWORK | MULTICLUSTER ) @@ -197,7 +202,17 @@ Use \"$0 --help\" for more information about a given command. esac done -export BUILD_COMMAND="${BUILD_COMMAND:-make install}" +if [[ -n "${GPU_BUILD:-}" ]]; then + export BUILD_COMMAND="${BUILD_COMMAND:-make gpu-install}" + export BUILD_SCRIPT="builder_gpu.sh" + export BUILD_IMAGE_SCRIPT="build_image_gpu.sh" + export TAG="${TAG:-gpu-latest}" +else + export BUILD_COMMAND="${BUILD_COMMAND:-make install}" + export BUILD_SCRIPT="builder.sh" + export BUILD_IMAGE_SCRIPT="build_image.sh" + export TAG="${TAG:-latest}" +fi export MANUAL="${MANUAL:-}" @@ -224,7 +239,7 @@ export ARTIFACTS="${ARTIFACTS:-$(mktemp -d)}" export SINGLE_CLUSTER_NAME="${SINGLE_CLUSTER_NAME:-kind}" export HUB="${HUB:-milvusdb}" -export TAG="${TAG:-latest}" + export CI="true" @@ -296,7 +311,7 @@ if [[ -z "${SKIP_BUILD:-}" ]]; then trace "setup kind registry" setup_kind_registry fi pushd "${ROOT}" - trace "build milvus" "${ROOT}/build/builder.sh" /bin/bash -c "${BUILD_COMMAND}" + trace "build milvus" "${ROOT}/build/${BUILD_SCRIPT}" /bin/bash -c "${BUILD_COMMAND}" popd fi @@ -324,7 +339,7 @@ if [[ -z "${SKIP_BUILD_IMAGE:-}" ]]; then pushd "${ROOT}" # Build Milvus Docker Image - trace "build milvus image" "${ROOT}/build/build_image.sh" + trace "build milvus image" "${ROOT}/build/${BUILD_IMAGE_SCRIPT}" trace "push milvus image" docker push "${MILVUS_IMAGE_REPO}:${MILVUS_IMAGE_TAG}" popd fi diff --git a/tests/scripts/values/ci/pr-gpu.yaml b/tests/scripts/values/ci/pr-gpu.yaml new file mode 100644 index 0000000000..acfcbb657e --- /dev/null +++ b/tests/scripts/values/ci/pr-gpu.yaml @@ -0,0 +1,190 @@ +metrics: + serviceMonitor: + enabled: true +proxy: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.1" + memory: "256Mi" + +rootCoordinator: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.1" + memory: "256Mi" + +queryCoordinator: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.4" + memory: "100Mi" + +queryNode: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.5" + memory: "500Mi" + +indexCoordinator: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.1" + memory: "50Mi" + +indexNode: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.5" + memory: "500Mi" + +dataCoordinator: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.1" + memory: "50Mi" + +dataNode: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + cpu: "0.5" + memory: "500Mi" + +pulsar: + proxy: + configData: + PULSAR_MEM: > + -Xms2048m -Xmx2048m + PULSAR_GC: > + -XX:MaxDirectMemorySize=2048m + httpNumThreads: "50" + resources: + requests: + cpu: "0.5" + memory: "2Gi" + # Resources for the websocket proxy + wsResources: + requests: + memory: "512Mi" + cpu: "0.3" + broker: + resources: + requests: + cpu: "0.5" + memory: "4Gi" + configData: + PULSAR_MEM: > + -Xms4096m + -Xmx4096m + -XX:MaxDirectMemorySize=8192m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + maxMessageSize: "104857600" + defaultRetentionTimeInMinutes: "10080" + defaultRetentionSizeInMB: "8192" + backlogQuotaDefaultLimitGB: "8" + backlogQuotaDefaultRetentionPolicy: producer_exception + + bookkeeper: + configData: + PULSAR_MEM: > + -Xms4096m + -Xmx4096m + -XX:MaxDirectMemorySize=8192m + PULSAR_GC: > + -Dio.netty.leakDetectionLevel=disabled + -Dio.netty.recycler.linkCapacity=1024 + -XX:+UseG1GC -XX:MaxGCPauseMillis=10 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:ParallelGCThreads=32 + -XX:ConcGCThreads=32 + -XX:G1NewSizePercent=50 + -XX:+DisableExplicitGC + -XX:-ResizePLAB + -XX:+ExitOnOutOfMemoryError + -XX:+PerfDisableSharedMem + -XX:+PrintGCDetails + nettyMaxFrameSizeBytes: "104867840" + resources: + requests: + cpu: "0.5" + memory: "4Gi" + + bastion: + resources: + requests: + cpu: "0.3" + memory: "50Mi" + + autorecovery: + resources: + requests: + cpu: "0.5" + memory: "512Mi" + + zookeeper: + configData: + PULSAR_MEM: > + -Xms1024m + -Xmx1024m + PULSAR_GC: > + -Dcom.sun.management.jmxremote + -Djute.maxbuffer=10485760 + -XX:+ParallelRefProcEnabled + -XX:+UnlockExperimentalVMOptions + -XX:+DoEscapeAnalysis + -XX:+DisableExplicitGC + -XX:+PerfDisableSharedMem + -Dzookeeper.forceSync=no + resources: + requests: + cpu: "0.3" + memory: "1Gi" +etcd: + replicaCount: 1 + resources: + requests: + cpu: "0.1" + memory: "100Mi" +minio: + resources: + requests: + cpu: "0.3" + memory: "512Mi" +standalone: + nodeSelector: + nvidia.com/gpu.present: 'true' + resources: + requests: + nvidia.com/gpu: 1 + cpu: "0.5" + memory: "3.5Gi" + limits: + nvidia.com/gpu: 1