diff --git a/.hadolint.yaml b/.hadolint.yaml index 44fab0b27a..a024f89710 100644 --- a/.hadolint.yaml +++ b/.hadolint.yaml @@ -2,3 +2,5 @@ ignored: - DL3003 - DL3007 - DL3008 +# disable following sourced files + - SC1091 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index bb4d896fdb..be60dce72a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,17 +7,26 @@ Please mark all change in change log and use the issue from GitHub ## Bug - \#1705 Limit the insert data batch size - \#1776 Error out when index SQ8H run in CPU mode +- \#1925 To flush all collections, flush cannot work - \#1929 Skip MySQL meta schema field width check - \#1946 Fix load index file CPU2GPU fail during searching - \#1955 Switch create_index operation to background once client break connection - \#1997 Index file missed after compact +- \#2002 Remove log error msg `Attributes is null` - \#2073 Fix CheckDBConfigBackendUrl error message - \#2076 CheckMetricConfigAddress error message +- \#2120 Fix Search expected failed if search params set invalid +- \#2121 Allow regex match partition tag when search - \#2128 Check has_partition params - \#2131 Distance/ID returned is not correct if searching with duplicate ids - \#2141 Fix server start failed if wal directory exist - \#2169 Fix SingleIndexTest.IVFSQHybrid unittest +- \#2194 Fix get collection info failed - \#2196 Fix server start failed if wal is disabled +- \#2203 0.8.0 id=-1 is returned when total count < topk +- \#2228 Fix show partitions failed in http module +- \#2231 Use server_config to define hard-delete delay time for segment files +- \#2261 Re-define result returned by has_collection if collection in delete state ## Feature - \#1751 Add api SearchByID @@ -30,6 +39,8 @@ Please mark all change in change log and use the issue from GitHub - \#2064 Warn when use SQLite as metadata management - \#2111 Check GPU environment before start server - \#2206 Log file rotating +- \#2240 Obtain running rpc requests information +- \#2268 Intelligently detect openblas library in system to avoid installing from source code every time ## Improvement - \#221 Refactor LOG macro @@ -44,6 +55,9 @@ Please mark all change in change log and use the issue from GitHub - \#2185 Change id to string format in http module - \#2186 Update endpoints in http module - \#2190 Fix memory usage is twice of index size when using GPU searching +- \#2248 Use hostname and port as instance label of metrics +- \#2252 Upgrade mishards APIs and requirements +- \#2256 k-means clustering algorithm use only Euclidean distance metric ## Task diff --git a/ci/docker/centos-7-core.dockerfile b/ci/docker/centos-7-core.dockerfile index c70d21d13e..6b97217b26 100644 --- a/ci/docker/centos-7-core.dockerfile +++ b/ci/docker/centos-7-core.dockerfile @@ -7,17 +7,25 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] RUN yum install -y epel-release centos-release-scl-rh && yum install -y wget curl which && \ wget -qO- "https://cmake.org/files/v3.14/cmake-3.14.3-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local && \ - yum install -y ccache make automake git python3-pip libcurl-devel python3-devel boost-static mysql-devel \ + yum install -y make automake git python3-pip libcurl-devel python3-devel boost-static mysql-devel \ devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran llvm-toolset-7.0-clang llvm-toolset-7.0-clang-tools-extra \ - mysql lcov openblas-devel lapack-devel \ - && \ - rm -rf /var/cache/yum/* - -RUN echo "source scl_source enable devtoolset-7" >> /etc/profile.d/devtoolset-7.sh -RUN echo "source scl_source enable llvm-toolset-7.0" >> /etc/profile.d/llvm-toolset-7.sh + mysql lcov && \ + rm -rf /var/cache/yum/* && \ + echo "source scl_source enable devtoolset-7" >> /etc/profile.d/devtoolset-7.sh && \ + echo "source scl_source enable llvm-toolset-7.0" >> /etc/profile.d/llvm-toolset-7.sh ENV CLANG_TOOLS_PATH="/opt/rh/llvm-toolset-7.0/root/usr/bin" +RUN source /etc/profile.d/devtoolset-7.sh && \ + wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN yum install -y ccache && \ + rm -rf /var/cache/yum/* + # use login shell to activate environment un the RUN commands SHELL [ "/bin/bash", "-c", "-l" ] diff --git a/ci/docker/ubuntu-18.04-core.dockerfile b/ci/docker/ubuntu-18.04-core.dockerfile index 57891b8b92..a034674675 100644 --- a/ci/docker/ubuntu-18.04-core.dockerfile +++ b/ci/docker/ubuntu-18.04-core.dockerfile @@ -13,11 +13,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends wget ca-certifi sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && \ wget -qO- "https://cmake.org/files/v3.14/cmake-3.14.3-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local && \ apt-get update && apt-get install -y --no-install-recommends \ - g++ git gfortran lsb-core ccache \ + g++ git gfortran lsb-core \ libboost-serialization-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev \ curl libtool automake libssl-dev pkg-config libcurl4-openssl-dev python3-pip \ clang-format-6.0 clang-tidy-6.0 \ - lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 libopenblas-dev liblapack3 && \ + lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 && \ apt-get remove --purge -y && \ rm -rf /var/lib/apt/lists/* @@ -26,6 +26,16 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libmysqlclient.so \ RUN sh -c 'echo export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2019.5.281/linux/mkl/lib/intel64:\$LD_LIBRARY_PATH > /etc/profile.d/mkl.sh' +RUN wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN apt-get update && apt-get install -y --no-install-recommends ccache && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + # use login shell to activate environment un the RUN commands SHELL [ "/bin/bash", "-c", "-l" ] diff --git a/ci/jenkins/pod/milvus-cpu-version-centos7-build-env-pod.yaml b/ci/jenkins/pod/milvus-cpu-version-centos7-build-env-pod.yaml index 3a765bb583..31f1f2abd3 100644 --- a/ci/jenkins/pod/milvus-cpu-version-centos7-build-env-pod.yaml +++ b/ci/jenkins/pod/milvus-cpu-version-centos7-build-env-pod.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: milvus-cpu-build-env - image: registry.zilliz.com/milvus/milvus-cpu-build-env:v0.7.0-centos7 + image: registry.zilliz.com/milvus/milvus-cpu-build-env:v0.9.0-centos7 env: - name: POD_IP valueFrom: @@ -17,7 +17,7 @@ spec: - name: OS_NAME value: "centos7" - name: BUILD_ENV_IMAGE_ID - value: "225b4d9c26d67b70b476964b4dd6e216de4b464d7a973a8c0c7ed1313c4d81ad" + value: "f2386d84d312e42891c8c70219b12fde014c21fbdbc0e59bede7e7609b1ba58b" command: - cat tty: true diff --git a/ci/jenkins/pod/milvus-cpu-version-ubuntu18.04-build-env-pod.yaml b/ci/jenkins/pod/milvus-cpu-version-ubuntu18.04-build-env-pod.yaml index 6758c682d1..c7f57226bc 100644 --- a/ci/jenkins/pod/milvus-cpu-version-ubuntu18.04-build-env-pod.yaml +++ b/ci/jenkins/pod/milvus-cpu-version-ubuntu18.04-build-env-pod.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: milvus-cpu-build-env - image: registry.zilliz.com/milvus/milvus-cpu-build-env:v0.7.0-ubuntu18.04 + image: registry.zilliz.com/milvus/milvus-cpu-build-env:v0.9.0-ubuntu18.04 env: - name: POD_IP valueFrom: @@ -17,7 +17,7 @@ spec: - name: OS_NAME value: "ubuntu18.04" - name: BUILD_ENV_IMAGE_ID - value: "23476391bec80c64f10d44a6370c73c71f011a6b95114b10ff82a60e771e11c7" + value: "4719a06f1b77393fed7a4336058baab74745715a431193d3876e9b51262505bd" command: - cat tty: true diff --git a/ci/jenkins/pod/milvus-gpu-version-centos7-build-env-pod.yaml b/ci/jenkins/pod/milvus-gpu-version-centos7-build-env-pod.yaml index 57fd514fd2..fc6bf831b0 100644 --- a/ci/jenkins/pod/milvus-gpu-version-centos7-build-env-pod.yaml +++ b/ci/jenkins/pod/milvus-gpu-version-centos7-build-env-pod.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: milvus-gpu-build-env - image: registry.zilliz.com/milvus/milvus-gpu-build-env:v0.7.0-centos7 + image: registry.zilliz.com/milvus/milvus-gpu-build-env:v0.9.0-centos7 env: - name: POD_IP valueFrom: @@ -17,7 +17,7 @@ spec: - name: OS_NAME value: "centos7" - name: BUILD_ENV_IMAGE_ID - value: "a5ec9914737ea4727d88ae36b4a73ca5d817f19438ba913cc1de6a1ee2ed2336" + value: "7087442c4c5a7a7adbd7324c58b7b1ac19a25acfd86d6017b5752c4c6521f90e" command: - cat tty: true diff --git a/ci/jenkins/pod/milvus-gpu-version-ubuntu18.04-build-env-pod.yaml b/ci/jenkins/pod/milvus-gpu-version-ubuntu18.04-build-env-pod.yaml index cc9812fb67..d0e42a888f 100644 --- a/ci/jenkins/pod/milvus-gpu-version-ubuntu18.04-build-env-pod.yaml +++ b/ci/jenkins/pod/milvus-gpu-version-ubuntu18.04-build-env-pod.yaml @@ -8,7 +8,7 @@ metadata: spec: containers: - name: milvus-gpu-build-env - image: registry.zilliz.com/milvus/milvus-gpu-build-env:v0.7.0-ubuntu18.04 + image: registry.zilliz.com/milvus/milvus-gpu-build-env:v0.9.0-ubuntu18.04 env: - name: POD_IP valueFrom: @@ -17,7 +17,7 @@ spec: - name: OS_NAME value: "ubuntu18.04" - name: BUILD_ENV_IMAGE_ID - value: "da9023b0f858f072672f86483a869aa87e90a5140864f89e5a012ec766d96dea" + value: "0aa65ebac377834ceb9644c320f114b97b488d11762948770b994f73e5ae518f" command: - cat tty: true diff --git a/ci/jenkins/step/shardsDevNightlyTest.groovy b/ci/jenkins/step/shardsDevNightlyTest.groovy index 6f9b07229a..c8cc760065 100644 --- a/ci/jenkins/step/shardsDevNightlyTest.groovy +++ b/ci/jenkins/step/shardsDevNightlyTest.groovy @@ -13,7 +13,7 @@ timeout(time: 180, unit: 'MINUTES') { helm status -n milvus ${env.SHARDS_HELM_RELEASE_NAME}" def helmResult = sh script: helmStatusCMD, returnStatus: true if (!helmResult) { - sh "helm uninstall -n milvus ${env.SHARDS_HELM_RELEASE_NAME} || sleep 1m" + sh "helm uninstall -n milvus ${env.SHARDS_HELM_RELEASE_NAME} && sleep 1m" } throw exc } diff --git a/ci/jenkins/step/singleDevNightlyTest.groovy b/ci/jenkins/step/singleDevNightlyTest.groovy index ffb8d48689..285718913c 100644 --- a/ci/jenkins/step/singleDevNightlyTest.groovy +++ b/ci/jenkins/step/singleDevNightlyTest.groovy @@ -13,7 +13,7 @@ timeout(time: 180, unit: 'MINUTES') { helm status -n milvus ${env.HELM_RELEASE_NAME}" def helmResult = sh script: helmStatusCMD, returnStatus: true if (!helmResult) { - sh "helm uninstall -n milvus ${env.HELM_RELEASE_NAME} || sleep 1m" + sh "helm uninstall -n milvus ${env.HELM_RELEASE_NAME} && sleep 1m" } throw exc } @@ -43,7 +43,7 @@ timeout(time: 180, unit: 'MINUTES') { helm status -n milvus ${env.HELM_RELEASE_NAME}" def helmResult = sh script: helmStatusCMD, returnStatus: true if (!helmResult) { - sh "helm uninstall -n milvus ${env.HELM_RELEASE_NAME} || sleep 1m" + sh "helm uninstall -n milvus ${env.HELM_RELEASE_NAME} && sleep 1m" } throw exc } diff --git a/ci/jenkins/step/singleDevTest.groovy b/ci/jenkins/step/singleDevTest.groovy index d21dac7ef5..97faca175f 100644 --- a/ci/jenkins/step/singleDevTest.groovy +++ b/ci/jenkins/step/singleDevTest.groovy @@ -13,7 +13,7 @@ timeout(time: 120, unit: 'MINUTES') { helm status -n milvus ${env.HELM_RELEASE_NAME}" def helmResult = sh script: helmStatusCMD, returnStatus: true if (!helmResult) { - sh "helm uninstall -n milvus ${env.HELM_RELEASE_NAME} || sleep 1m" + sh "helm uninstall -n milvus ${env.HELM_RELEASE_NAME} && sleep 1m" } throw exc } diff --git a/ci/scripts/build.sh b/ci/scripts/build.sh index 66ee6524bd..3260fb13d9 100755 --- a/ci/scripts/build.sh +++ b/ci/scripts/build.sh @@ -123,6 +123,7 @@ CMAKE_CMD="cmake \ -DFAISS_WITH_MKL=${WITH_MKL} \ -DArrow_SOURCE=AUTO \ -DFAISS_SOURCE=AUTO \ +-DOpenBLAS_SOURCE=AUTO \ -DMILVUS_WITH_FIU=${FIU_ENABLE} \ ${MILVUS_CORE_DIR}" echo ${CMAKE_CMD} diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index bd79c3e0e1..62fc5b8e34 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -90,7 +90,7 @@ if (MILVUS_VERSION_MAJOR STREQUAL "" OR MILVUS_VERSION_MINOR STREQUAL "" OR MILVUS_VERSION_PATCH STREQUAL "") message(WARNING "Failed to determine Milvus version from git branch name") - set(MILVUS_VERSION "0.8.0") + set(MILVUS_VERSION "0.9.0") endif () message(STATUS "Build version = ${MILVUS_VERSION}") @@ -217,8 +217,6 @@ else () @ONLY) endif () -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.template ${CMAKE_CURRENT_SOURCE_DIR}/conf/log_config.conf) - install(DIRECTORY scripts/ DESTINATION scripts FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ @@ -232,7 +230,6 @@ install(DIRECTORY scripts/migration WORLD_EXECUTE WORLD_READ) install(FILES conf/server_config.yaml - conf/log_config.conf DESTINATION conf) diff --git a/core/build.sh b/core/build.sh index 83287d9d4b..0bccf03308 100755 --- a/core/build.sh +++ b/core/build.sh @@ -16,6 +16,7 @@ FAISS_ROOT="" #FAISS root path FAISS_SOURCE="BUNDLED" WITH_PROMETHEUS="ON" FIU_ENABLE="OFF" +BUILD_OPENBLAS="ON" while getopts "p:d:t:f:ulrcghzmei" arg; do case $arg in @@ -112,6 +113,7 @@ CMAKE_CMD="cmake \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DFAISS_ROOT=${FAISS_ROOT} \ -DFAISS_SOURCE=${FAISS_SOURCE} \ +-DOpenBLAS_SOURCE=AUTO \ -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} \ -DBUILD_COVERAGE=${BUILD_COVERAGE} \ -DMILVUS_DB_PATH=${DB_PATH} \ @@ -119,7 +121,7 @@ CMAKE_CMD="cmake \ -DMILVUS_GPU_VERSION=${GPU_VERSION} \ -DFAISS_WITH_MKL=${WITH_MKL} \ -DMILVUS_WITH_PROMETHEUS=${WITH_PROMETHEUS} \ --DMILVUS_WITH_FIU=${FIU_ENABLE} +-DMILVUS_WITH_FIU=${FIU_ENABLE} \ ../" echo ${CMAKE_CMD} ${CMAKE_CMD} diff --git a/core/conf/demo/log_config.conf b/core/conf/demo/log_config.conf deleted file mode 100644 index ce3e7c8eda..0000000000 --- a/core/conf/demo/log_config.conf +++ /dev/null @@ -1,27 +0,0 @@ -* GLOBAL: - FORMAT = "%datetime | %level | %logger | %msg" - FILENAME = "/var/lib/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-global.log" - ENABLED = true - TO_FILE = true - TO_STANDARD_OUTPUT = false - SUBSECOND_PRECISION = 3 - PERFORMANCE_TRACKING = false - MAX_LOG_FILE_SIZE = 209715200 ## Throw log files away after 200MB -* DEBUG: - FILENAME = "/var/lib/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-debug.log" - ENABLED = true -* WARNING: - FILENAME = "/var/lib/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-warning.log" -* TRACE: - FILENAME = "/var/lib/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-trace.log" -* VERBOSE: - FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" - TO_FILE = false - TO_STANDARD_OUTPUT = false -## Error logs -* ERROR: - ENABLED = true - FILENAME = "/var/lib/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-error.log" -* FATAL: - ENABLED = true - FILENAME = "/var/lib/milvus/logs/milvus-%datetime{%y-%M-%d-%H:%m}-fatal.log" diff --git a/core/conf/demo/server_config.yaml b/core/conf/demo/server_config.yaml index e4a6fa8e47..d72eda34d0 100644 --- a/core/conf/demo/server_config.yaml +++ b/core/conf/demo/server_config.yaml @@ -9,7 +9,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under the License. -version: 0.3 +version: 0.4 #----------------------+------------------------------------------------------------+------------+-----------------+ # Server Config | Description | Type | Default | @@ -68,9 +68,13 @@ db_config: # secondary_path | A semicolon-separated list of secondary directories used | Path | | # | to save vector data and index data. | | | #----------------------+------------------------------------------------------------+------------+-----------------+ +# file_cleanup_timeout | time gap between soft-delete and hard-delete | Integer | 10 (s) | +# | range [0, 3600] | | | +#----------------------+------------------------------------------------------------+------------+-----------------+ storage_config: primary_path: /var/lib/milvus secondary_path: + file_cleanup_timeout: 10 #----------------------+------------------------------------------------------------+------------+-----------------+ # Metric Config | Description | Type | Default | @@ -186,3 +190,39 @@ wal_config: recovery_error_ignore: true buffer_size: 256 wal_path: /var/lib/milvus/wal + +#----------------------+------------------------------------------------------------+------------+-----------------+ +# Logs | Description | Type | Default | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# trace.enable | Whether to enable trace level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# debug.enable | Whether to enable debug level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# info.enable | Whether to enable info level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# warning.enable | Whether to enable warning level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# error.enable | Whether to enable error level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# fatal.enable | Whether to enable fatal level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# path | Location of logs files. | String | | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# max_log_file_size | Max size of a single log file. After exceeding this value, | Integer | 256 (MB) | +# | rename this file to xxx.log.n, means the nth file. | | | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# delete_exceeds | Milvus will keep up to ${delete_exceeds} log files per | Integer | 10 | +# | level. For example, after xxx.log.11 file is generated, | | | +# | the xxx.log.1 file will be deleted. | | | +#----------------------+------------------------------------------------------------+------------+-----------------+ +logs: + trace.enable: true + debug.enable: true + info.enable: true + warning.enable: true + error.enable: true + fatal.enable: true + path: /var/lib/milvus/logs + max_log_file_size: 256 + delete_exceeds: 10 + diff --git a/core/conf/log_config.template b/core/conf/log_config.template deleted file mode 100644 index c5a5be4cd3..0000000000 --- a/core/conf/log_config.template +++ /dev/null @@ -1,30 +0,0 @@ -* GLOBAL: - FORMAT = "[%datetime][%level]%msg" - FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-global.log" - ENABLED = true - TO_FILE = true - TO_STANDARD_OUTPUT = false - SUBSECOND_PRECISION = 3 - PERFORMANCE_TRACKING = false - MAX_LOG_FILE_SIZE = 209715200 ## Throw log files away after 200MB -* INFO: - FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-info.log" - ENABLED = true -* DEBUG: - FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-debug.log" - ENABLED = true -* WARNING: - FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-warning.log" -* TRACE: - FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-trace.log" -* VERBOSE: - FORMAT = "%datetime{%d/%M/%y} | %level-%vlevel | %msg" - TO_FILE = false - TO_STANDARD_OUTPUT = false -## Error logs -* ERROR: - ENABLED = true - FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-error.log" -* FATAL: - ENABLED = true - FILENAME = "@MILVUS_DB_PATH@/logs/milvus-%datetime{%y-%M-%d-%H:%m}-fatal.log" diff --git a/core/conf/server_config.template b/core/conf/server_config.template index 9db7cb0252..85115390cc 100644 --- a/core/conf/server_config.template +++ b/core/conf/server_config.template @@ -9,7 +9,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing permissions and limitations under the License. -version: 0.3 +version: 0.4 #----------------------+------------------------------------------------------------+------------+-----------------+ # Server Config | Description | Type | Default | @@ -68,9 +68,13 @@ db_config: # secondary_path | A semicolon-separated list of secondary directories used | Path | | # | to save vector data and index data. | | | #----------------------+------------------------------------------------------------+------------+-----------------+ +# file_cleanup_timeout | time gap between soft-delete and hard-delete | Integer | 10 (s) | +# | range [0, 3600] | | | +#----------------------+------------------------------------------------------------+------------+-----------------+ storage_config: primary_path: @MILVUS_DB_PATH@ secondary_path: + file_cleanup_timeout: 10 #----------------------+------------------------------------------------------------+------------+-----------------+ # Metric Config | Description | Type | Default | @@ -187,6 +191,30 @@ wal_config: buffer_size: 256 wal_path: @MILVUS_DB_PATH@/wal +#----------------------+------------------------------------------------------------+------------+-----------------+ +# Logs | Description | Type | Default | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# trace.enable | Whether to enable trace level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# debug.enable | Whether to enable debug level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# info.enable | Whether to enable info level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# warning.enable | Whether to enable warning level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# error.enable | Whether to enable error level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# fatal.enable | Whether to enable fatal level logging in Milvus. | Boolean | true | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# path | Location of logs files. | String | | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# max_log_file_size | Max size of a single log file. After exceeding this value, | Integer | 256 (MB) | +# | rename this file to xxx.log.n, means the nth file. | | | +#----------------------+------------------------------------------------------------+------------+-----------------+ +# delete_exceeds | Milvus will keep up to ${delete_exceeds} log files per | Integer | 10 | +# | level. For example, after xxx.log.11 file is generated, | | | +# | the xxx.log.1 file will be deleted. | | | +#----------------------+------------------------------------------------------------+------------+-----------------+ logs: trace.enable: true debug.enable: true diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 9629f2c9d6..270e83f5cf 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -37,6 +37,7 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/db db_main_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/engine db_engine_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/insert db_insert_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/meta db_meta_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/db/merge db_merge_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/wal db_wal_files) set(grpc_service_files @@ -143,6 +144,7 @@ set(engine_files ${db_engine_files} ${db_insert_files} ${db_meta_files} + ${db_merge_files} ${db_wal_files} ${metrics_files} ${storage_files} @@ -331,18 +333,10 @@ install(FILES ${CMAKE_BINARY_DIR}/fiu_ep-prefix/src/fiu_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}fiu${CMAKE_SHARED_LIBRARY_SUFFIX}.1.00 DESTINATION lib) -if (CMAKE_BUILD_TYPE STREQUAL "Release") +if(EXISTS ${CMAKE_BINARY_DIR}/src/index/openblas_ep-prefix/src/openblas_ep/lib/) install(FILES ${CMAKE_BINARY_DIR}/src/index/openblas_ep-prefix/src/openblas_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_BINARY_DIR}/src/index/openblas_ep-prefix/src/openblas_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas${CMAKE_SHARED_LIBRARY_SUFFIX}.0 ${CMAKE_BINARY_DIR}/src/index/openblas_ep-prefix/src/openblas_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas${CMAKE_SHARED_LIBRARY_SUFFIX}.0.3 DESTINATION lib) -elseif(CMAKE_BUILD_TYPE STREQUAL "Debug") - install(FILES - ${CMAKE_BINARY_DIR}/src/index/openblas_ep-prefix/src/openblas_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas_d${CMAKE_SHARED_LIBRARY_SUFFIX} - ${CMAKE_BINARY_DIR}/src/index/openblas_ep-prefix/src/openblas_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas_d${CMAKE_SHARED_LIBRARY_SUFFIX}.0 - ${CMAKE_BINARY_DIR}/src/index/openblas_ep-prefix/src/openblas_ep/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas_d${CMAKE_SHARED_LIBRARY_SUFFIX}.0.3 - DESTINATION lib) -else() - message("unknown CMAKE_BUILD_TYPE") endif() diff --git a/core/src/codecs/default/DefaultAttrsFormat.cpp b/core/src/codecs/default/DefaultAttrsFormat.cpp index 0a784b64a5..27c6cbe2da 100644 --- a/core/src/codecs/default/DefaultAttrsFormat.cpp +++ b/core/src/codecs/default/DefaultAttrsFormat.cpp @@ -121,8 +121,8 @@ DefaultAttrsFormat::write(const milvus::storage::FSHandlerPtr& fs_ptr, const mil auto it = attrs_ptr->attrs.begin(); if (it == attrs_ptr->attrs.end()) { - std::string err_msg = "Attributes is null"; - LOG_ENGINE_ERROR_ << err_msg; + // std::string err_msg = "Attributes is null"; + // LOG_ENGINE_ERROR_ << err_msg; return; } diff --git a/core/src/config/Config.cpp b/core/src/config/Config.cpp index 0c1f7c2f37..e1e757fcf1 100644 --- a/core/src/config/Config.cpp +++ b/core/src/config/Config.cpp @@ -74,18 +74,20 @@ const char* CONFIG_STORAGE_PRIMARY_PATH = "primary_path"; const char* CONFIG_STORAGE_PRIMARY_PATH_DEFAULT = "/tmp/milvus"; const char* CONFIG_STORAGE_SECONDARY_PATH = "secondary_path"; const char* CONFIG_STORAGE_SECONDARY_PATH_DEFAULT = ""; -const char* CONFIG_STORAGE_S3_ENABLE = "s3_enable"; -const char* CONFIG_STORAGE_S3_ENABLE_DEFAULT = "false"; -const char* CONFIG_STORAGE_S3_ADDRESS = "s3_address"; -const char* CONFIG_STORAGE_S3_ADDRESS_DEFAULT = "127.0.0.1"; -const char* CONFIG_STORAGE_S3_PORT = "s3_port"; -const char* CONFIG_STORAGE_S3_PORT_DEFAULT = "9000"; -const char* CONFIG_STORAGE_S3_ACCESS_KEY = "s3_access_key"; -const char* CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT = "minioadmin"; -const char* CONFIG_STORAGE_S3_SECRET_KEY = "s3_secret_key"; -const char* CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT = "minioadmin"; -const char* CONFIG_STORAGE_S3_BUCKET = "s3_bucket"; -const char* CONFIG_STORAGE_S3_BUCKET_DEFAULT = "milvus-bucket"; +const char* CONFIG_STORAGE_FILE_CLEANUP_TIMEOUT = "file_cleanup_timeout"; +const char* CONFIG_STORAGE_FILE_CLEANUP_TIMEOUT_DEFAULT = "10"; +// const char* CONFIG_STORAGE_S3_ENABLE = "s3_enable"; +// const char* CONFIG_STORAGE_S3_ENABLE_DEFAULT = "false"; +// const char* CONFIG_STORAGE_S3_ADDRESS = "s3_address"; +// const char* CONFIG_STORAGE_S3_ADDRESS_DEFAULT = "127.0.0.1"; +// const char* CONFIG_STORAGE_S3_PORT = "s3_port"; +// const char* CONFIG_STORAGE_S3_PORT_DEFAULT = "9000"; +// const char* CONFIG_STORAGE_S3_ACCESS_KEY = "s3_access_key"; +// const char* CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT = "minioadmin"; +// const char* CONFIG_STORAGE_S3_SECRET_KEY = "s3_secret_key"; +// const char* CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT = "minioadmin"; +// const char* CONFIG_STORAGE_S3_BUCKET = "s3_bucket"; +// const char* CONFIG_STORAGE_S3_BUCKET_DEFAULT = "milvus-bucket"; /* cache config */ const char* CONFIG_CACHE = "cache_config"; @@ -183,7 +185,7 @@ constexpr int32_t PORT_NUMBER_MIN = 1024; constexpr int32_t PORT_NUMBER_MAX = 65535; static const std::unordered_map milvus_config_version_map( - {{"0.6.0", "0.1"}, {"0.7.0", "0.2"}, {"0.7.1", "0.2"}, {"0.8.0", "0.3"}}); + {{"0.6.0", "0.1"}, {"0.7.0", "0.2"}, {"0.7.1", "0.2"}, {"0.8.0", "0.3"}, {"0.9.0", "0.4"}}); ///////////////////////////////////////////////////////////// Config::Config() { @@ -239,10 +241,7 @@ Config::LoadConfigFile(const std::string& filename) { } ConfigMgr* mgr = YamlConfigMgr::GetInstance(); - Status s = mgr->LoadConfigFile(filename); - if (!s.ok()) { - return s; - } + STATUS_CHECK(mgr->LoadConfigFile(filename)); // store config file path config_file_ = filename; @@ -253,172 +252,172 @@ Config::LoadConfigFile(const std::string& filename) { Status Config::ValidateConfig() { std::string config_version; - CONFIG_CHECK(GetConfigVersion(config_version)); + STATUS_CHECK(GetConfigVersion(config_version)); /* server config */ std::string server_addr; - CONFIG_CHECK(GetServerConfigAddress(server_addr)); + STATUS_CHECK(GetServerConfigAddress(server_addr)); std::string server_port; - CONFIG_CHECK(GetServerConfigPort(server_port)); + STATUS_CHECK(GetServerConfigPort(server_port)); std::string server_mode; - CONFIG_CHECK(GetServerConfigDeployMode(server_mode)); + STATUS_CHECK(GetServerConfigDeployMode(server_mode)); std::string server_time_zone; - CONFIG_CHECK(GetServerConfigTimeZone(server_time_zone)); + STATUS_CHECK(GetServerConfigTimeZone(server_time_zone)); bool server_web_enable; - CONFIG_CHECK(GetServerConfigWebEnable(server_web_enable)); + STATUS_CHECK(GetServerConfigWebEnable(server_web_enable)); std::string server_web_port; - CONFIG_CHECK(GetServerConfigWebPort(server_web_port)); + STATUS_CHECK(GetServerConfigWebPort(server_web_port)); /* db config */ std::string db_backend_url; - CONFIG_CHECK(GetDBConfigBackendUrl(db_backend_url)); + STATUS_CHECK(GetDBConfigBackendUrl(db_backend_url)); std::string db_preload_collection; - CONFIG_CHECK(GetDBConfigPreloadCollection(db_preload_collection)); + STATUS_CHECK(GetDBConfigPreloadCollection(db_preload_collection)); int64_t db_archive_disk_threshold; - CONFIG_CHECK(GetDBConfigArchiveDiskThreshold(db_archive_disk_threshold)); + STATUS_CHECK(GetDBConfigArchiveDiskThreshold(db_archive_disk_threshold)); int64_t db_archive_days_threshold; - CONFIG_CHECK(GetDBConfigArchiveDaysThreshold(db_archive_days_threshold)); + STATUS_CHECK(GetDBConfigArchiveDaysThreshold(db_archive_days_threshold)); int64_t auto_flush_interval; - CONFIG_CHECK(GetDBConfigAutoFlushInterval(auto_flush_interval)); + STATUS_CHECK(GetDBConfigAutoFlushInterval(auto_flush_interval)); /* storage config */ std::string storage_primary_path; - CONFIG_CHECK(GetStorageConfigPrimaryPath(storage_primary_path)); + STATUS_CHECK(GetStorageConfigPrimaryPath(storage_primary_path)); std::string storage_secondary_path; - CONFIG_CHECK(GetStorageConfigSecondaryPath(storage_secondary_path)); + STATUS_CHECK(GetStorageConfigSecondaryPath(storage_secondary_path)); - bool storage_s3_enable; - CONFIG_CHECK(GetStorageConfigS3Enable(storage_s3_enable)); - // std::cout << "S3 " << (storage_s3_enable ? "ENABLED !" : "DISABLED !") << std::endl; - - std::string storage_s3_address; - CONFIG_CHECK(GetStorageConfigS3Address(storage_s3_address)); - - std::string storage_s3_port; - CONFIG_CHECK(GetStorageConfigS3Port(storage_s3_port)); - - std::string storage_s3_access_key; - CONFIG_CHECK(GetStorageConfigS3AccessKey(storage_s3_access_key)); - - std::string storage_s3_secret_key; - CONFIG_CHECK(GetStorageConfigS3SecretKey(storage_s3_secret_key)); - - std::string storage_s3_bucket; - CONFIG_CHECK(GetStorageConfigS3Bucket(storage_s3_bucket)); + // bool storage_s3_enable; + // STATUS_CHECK(GetStorageConfigS3Enable(storage_s3_enable)); + // // std::cout << "S3 " << (storage_s3_enable ? "ENABLED !" : "DISABLED !") << std::endl; + // + // std::string storage_s3_address; + // STATUS_CHECK(GetStorageConfigS3Address(storage_s3_address)); + // + // std::string storage_s3_port; + // STATUS_CHECK(GetStorageConfigS3Port(storage_s3_port)); + // + // std::string storage_s3_access_key; + // STATUS_CHECK(GetStorageConfigS3AccessKey(storage_s3_access_key)); + // + // std::string storage_s3_secret_key; + // STATUS_CHECK(GetStorageConfigS3SecretKey(storage_s3_secret_key)); + // + // std::string storage_s3_bucket; + // STATUS_CHECK(GetStorageConfigS3Bucket(storage_s3_bucket)); /* metric config */ bool metric_enable_monitor; - CONFIG_CHECK(GetMetricConfigEnableMonitor(metric_enable_monitor)); + STATUS_CHECK(GetMetricConfigEnableMonitor(metric_enable_monitor)); std::string metric_address; - CONFIG_CHECK(GetMetricConfigAddress(metric_address)); + STATUS_CHECK(GetMetricConfigAddress(metric_address)); std::string metric_port; - CONFIG_CHECK(GetMetricConfigPort(metric_port)); + STATUS_CHECK(GetMetricConfigPort(metric_port)); /* cache config */ int64_t cache_cpu_cache_capacity; - CONFIG_CHECK(GetCacheConfigCpuCacheCapacity(cache_cpu_cache_capacity)); + STATUS_CHECK(GetCacheConfigCpuCacheCapacity(cache_cpu_cache_capacity)); float cache_cpu_cache_threshold; - CONFIG_CHECK(GetCacheConfigCpuCacheThreshold(cache_cpu_cache_threshold)); + STATUS_CHECK(GetCacheConfigCpuCacheThreshold(cache_cpu_cache_threshold)); int64_t cache_insert_buffer_size; - CONFIG_CHECK(GetCacheConfigInsertBufferSize(cache_insert_buffer_size)); + STATUS_CHECK(GetCacheConfigInsertBufferSize(cache_insert_buffer_size)); bool cache_insert_data; - CONFIG_CHECK(GetCacheConfigCacheInsertData(cache_insert_data)); + STATUS_CHECK(GetCacheConfigCacheInsertData(cache_insert_data)); /* engine config */ int64_t engine_use_blas_threshold; - CONFIG_CHECK(GetEngineConfigUseBlasThreshold(engine_use_blas_threshold)); + STATUS_CHECK(GetEngineConfigUseBlasThreshold(engine_use_blas_threshold)); int64_t engine_omp_thread_num; - CONFIG_CHECK(GetEngineConfigOmpThreadNum(engine_omp_thread_num)); + STATUS_CHECK(GetEngineConfigOmpThreadNum(engine_omp_thread_num)); std::string engine_simd_type; - CONFIG_CHECK(GetEngineConfigSimdType(engine_simd_type)); + STATUS_CHECK(GetEngineConfigSimdType(engine_simd_type)); #ifdef MILVUS_GPU_VERSION int64_t engine_gpu_search_threshold; - CONFIG_CHECK(GetEngineConfigGpuSearchThreshold(engine_gpu_search_threshold)); + STATUS_CHECK(GetEngineConfigGpuSearchThreshold(engine_gpu_search_threshold)); #endif /* gpu resource config */ #ifdef MILVUS_GPU_VERSION bool gpu_resource_enable; - CONFIG_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); + STATUS_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); std::cout << "GPU resources " << (gpu_resource_enable ? "ENABLED !" : "DISABLED !") << std::endl; if (gpu_resource_enable) { int64_t resource_cache_capacity; - CONFIG_CHECK(GetGpuResourceConfigCacheCapacity(resource_cache_capacity)); + STATUS_CHECK(GetGpuResourceConfigCacheCapacity(resource_cache_capacity)); float resource_cache_threshold; - CONFIG_CHECK(GetGpuResourceConfigCacheThreshold(resource_cache_threshold)); + STATUS_CHECK(GetGpuResourceConfigCacheThreshold(resource_cache_threshold)); std::vector search_resources; - CONFIG_CHECK(GetGpuResourceConfigSearchResources(search_resources)); + STATUS_CHECK(GetGpuResourceConfigSearchResources(search_resources)); std::vector index_build_resources; - CONFIG_CHECK(GetGpuResourceConfigBuildIndexResources(index_build_resources)); + STATUS_CHECK(GetGpuResourceConfigBuildIndexResources(index_build_resources)); } #endif /* tracing config */ std::string tracing_config_path; - CONFIG_CHECK(GetTracingConfigJsonConfigPath(tracing_config_path)); + STATUS_CHECK(GetTracingConfigJsonConfigPath(tracing_config_path)); /* wal config */ bool enable; - CONFIG_CHECK(GetWalConfigEnable(enable)); + STATUS_CHECK(GetWalConfigEnable(enable)); bool recovery_error_ignore; - CONFIG_CHECK(GetWalConfigRecoveryErrorIgnore(recovery_error_ignore)); + STATUS_CHECK(GetWalConfigRecoveryErrorIgnore(recovery_error_ignore)); int64_t buffer_size; - CONFIG_CHECK(GetWalConfigBufferSize(buffer_size)); + STATUS_CHECK(GetWalConfigBufferSize(buffer_size)); std::string wal_path; - CONFIG_CHECK(GetWalConfigWalPath(wal_path)); + STATUS_CHECK(GetWalConfigWalPath(wal_path)); /* logs config */ bool trace_enable; - CONFIG_CHECK(GetLogsTraceEnable(trace_enable)); + STATUS_CHECK(GetLogsTraceEnable(trace_enable)); bool debug_enable; - CONFIG_CHECK(GetLogsDebugEnable(trace_enable)); + STATUS_CHECK(GetLogsDebugEnable(trace_enable)); bool info_enable; - CONFIG_CHECK(GetLogsInfoEnable(trace_enable)); + STATUS_CHECK(GetLogsInfoEnable(trace_enable)); bool warning_enable; - CONFIG_CHECK(GetLogsWarningEnable(trace_enable)); + STATUS_CHECK(GetLogsWarningEnable(trace_enable)); bool error_enable; - CONFIG_CHECK(GetLogsErrorEnable(trace_enable)); + STATUS_CHECK(GetLogsErrorEnable(trace_enable)); bool fatal_enable; - CONFIG_CHECK(GetLogsFatalEnable(trace_enable)); + STATUS_CHECK(GetLogsFatalEnable(trace_enable)); std::string logs_path; - CONFIG_CHECK(GetLogsPath(logs_path)); + STATUS_CHECK(GetLogsPath(logs_path)); int64_t logs_max_log_file_size; - CONFIG_CHECK(GetLogsMaxLogFileSize(logs_max_log_file_size)); + STATUS_CHECK(GetLogsMaxLogFileSize(logs_max_log_file_size)); int64_t delete_exceeds; - CONFIG_CHECK(GetLogsDeleteExceeds(delete_exceeds)); + STATUS_CHECK(GetLogsDeleteExceeds(delete_exceeds)); return Status::OK(); } @@ -426,76 +425,76 @@ Config::ValidateConfig() { Status Config::ResetDefaultConfig() { /* server config */ - CONFIG_CHECK(SetServerConfigAddress(CONFIG_SERVER_ADDRESS_DEFAULT)); - CONFIG_CHECK(SetServerConfigPort(CONFIG_SERVER_PORT_DEFAULT)); - CONFIG_CHECK(SetServerConfigDeployMode(CONFIG_SERVER_DEPLOY_MODE_DEFAULT)); - CONFIG_CHECK(SetServerConfigTimeZone(CONFIG_SERVER_TIME_ZONE_DEFAULT)); - CONFIG_CHECK(SetServerConfigWebEnable(CONFIG_SERVER_WEB_ENABLE_DEFAULT)); - CONFIG_CHECK(SetServerConfigWebPort(CONFIG_SERVER_WEB_PORT_DEFAULT)); + STATUS_CHECK(SetServerConfigAddress(CONFIG_SERVER_ADDRESS_DEFAULT)); + STATUS_CHECK(SetServerConfigPort(CONFIG_SERVER_PORT_DEFAULT)); + STATUS_CHECK(SetServerConfigDeployMode(CONFIG_SERVER_DEPLOY_MODE_DEFAULT)); + STATUS_CHECK(SetServerConfigTimeZone(CONFIG_SERVER_TIME_ZONE_DEFAULT)); + STATUS_CHECK(SetServerConfigWebEnable(CONFIG_SERVER_WEB_ENABLE_DEFAULT)); + STATUS_CHECK(SetServerConfigWebPort(CONFIG_SERVER_WEB_PORT_DEFAULT)); /* db config */ - CONFIG_CHECK(SetDBConfigBackendUrl(CONFIG_DB_BACKEND_URL_DEFAULT)); - CONFIG_CHECK(SetDBConfigPreloadCollection(CONFIG_DB_PRELOAD_COLLECTION_DEFAULT)); - CONFIG_CHECK(SetDBConfigArchiveDiskThreshold(CONFIG_DB_ARCHIVE_DISK_THRESHOLD_DEFAULT)); - CONFIG_CHECK(SetDBConfigArchiveDaysThreshold(CONFIG_DB_ARCHIVE_DAYS_THRESHOLD_DEFAULT)); - CONFIG_CHECK(SetDBConfigAutoFlushInterval(CONFIG_DB_AUTO_FLUSH_INTERVAL_DEFAULT)); + STATUS_CHECK(SetDBConfigBackendUrl(CONFIG_DB_BACKEND_URL_DEFAULT)); + STATUS_CHECK(SetDBConfigPreloadCollection(CONFIG_DB_PRELOAD_COLLECTION_DEFAULT)); + STATUS_CHECK(SetDBConfigArchiveDiskThreshold(CONFIG_DB_ARCHIVE_DISK_THRESHOLD_DEFAULT)); + STATUS_CHECK(SetDBConfigArchiveDaysThreshold(CONFIG_DB_ARCHIVE_DAYS_THRESHOLD_DEFAULT)); + STATUS_CHECK(SetDBConfigAutoFlushInterval(CONFIG_DB_AUTO_FLUSH_INTERVAL_DEFAULT)); /* storage config */ - CONFIG_CHECK(SetStorageConfigPrimaryPath(CONFIG_STORAGE_PRIMARY_PATH_DEFAULT)); - CONFIG_CHECK(SetStorageConfigSecondaryPath(CONFIG_STORAGE_SECONDARY_PATH_DEFAULT)); - CONFIG_CHECK(SetStorageConfigS3Enable(CONFIG_STORAGE_S3_ENABLE_DEFAULT)); - CONFIG_CHECK(SetStorageConfigS3Address(CONFIG_STORAGE_S3_ADDRESS_DEFAULT)); - CONFIG_CHECK(SetStorageConfigS3Port(CONFIG_STORAGE_S3_PORT_DEFAULT)); - CONFIG_CHECK(SetStorageConfigS3AccessKey(CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT)); - CONFIG_CHECK(SetStorageConfigS3SecretKey(CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT)); - CONFIG_CHECK(SetStorageConfigS3Bucket(CONFIG_STORAGE_S3_BUCKET_DEFAULT)); + STATUS_CHECK(SetStorageConfigPrimaryPath(CONFIG_STORAGE_PRIMARY_PATH_DEFAULT)); + STATUS_CHECK(SetStorageConfigSecondaryPath(CONFIG_STORAGE_SECONDARY_PATH_DEFAULT)); + STATUS_CHECK(SetStorageConfigFileCleanupTimeout(CONFIG_STORAGE_FILE_CLEANUP_TIMEOUT_DEFAULT)); + // STATUS_CHECK(SetStorageConfigS3Enable(CONFIG_STORAGE_S3_ENABLE_DEFAULT)); + // STATUS_CHECK(SetStorageConfigS3Address(CONFIG_STORAGE_S3_ADDRESS_DEFAULT)); + // STATUS_CHECK(SetStorageConfigS3Port(CONFIG_STORAGE_S3_PORT_DEFAULT)); + // STATUS_CHECK(SetStorageConfigS3AccessKey(CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT)); + // STATUS_CHECK(SetStorageConfigS3SecretKey(CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT)); + // STATUS_CHECK(SetStorageConfigS3Bucket(CONFIG_STORAGE_S3_BUCKET_DEFAULT)); /* metric config */ - CONFIG_CHECK(SetMetricConfigEnableMonitor(CONFIG_METRIC_ENABLE_MONITOR_DEFAULT)); - CONFIG_CHECK(SetMetricConfigAddress(CONFIG_METRIC_ADDRESS_DEFAULT)); - CONFIG_CHECK(SetMetricConfigPort(CONFIG_METRIC_PORT_DEFAULT)); + STATUS_CHECK(SetMetricConfigEnableMonitor(CONFIG_METRIC_ENABLE_MONITOR_DEFAULT)); + STATUS_CHECK(SetMetricConfigAddress(CONFIG_METRIC_ADDRESS_DEFAULT)); + STATUS_CHECK(SetMetricConfigPort(CONFIG_METRIC_PORT_DEFAULT)); /* cache config */ - CONFIG_CHECK(SetCacheConfigCpuCacheCapacity(CONFIG_CACHE_CPU_CACHE_CAPACITY_DEFAULT)); - CONFIG_CHECK(SetCacheConfigCpuCacheThreshold(CONFIG_CACHE_CPU_CACHE_THRESHOLD_DEFAULT)); - CONFIG_CHECK(SetCacheConfigInsertBufferSize(CONFIG_CACHE_INSERT_BUFFER_SIZE_DEFAULT)); - CONFIG_CHECK(SetCacheConfigCacheInsertData(CONFIG_CACHE_CACHE_INSERT_DATA_DEFAULT)); + STATUS_CHECK(SetCacheConfigCpuCacheCapacity(CONFIG_CACHE_CPU_CACHE_CAPACITY_DEFAULT)); + STATUS_CHECK(SetCacheConfigCpuCacheThreshold(CONFIG_CACHE_CPU_CACHE_THRESHOLD_DEFAULT)); + STATUS_CHECK(SetCacheConfigInsertBufferSize(CONFIG_CACHE_INSERT_BUFFER_SIZE_DEFAULT)); + STATUS_CHECK(SetCacheConfigCacheInsertData(CONFIG_CACHE_CACHE_INSERT_DATA_DEFAULT)); /* engine config */ - CONFIG_CHECK(SetEngineConfigUseBlasThreshold(CONFIG_ENGINE_USE_BLAS_THRESHOLD_DEFAULT)); - CONFIG_CHECK(SetEngineConfigOmpThreadNum(CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT)); - CONFIG_CHECK(SetEngineConfigSimdType(CONFIG_ENGINE_SIMD_TYPE_DEFAULT)); - - /* wal config */ - CONFIG_CHECK(SetWalConfigEnable(CONFIG_WAL_ENABLE_DEFAULT)); - CONFIG_CHECK(SetWalConfigRecoveryErrorIgnore(CONFIG_WAL_RECOVERY_ERROR_IGNORE_DEFAULT)); - CONFIG_CHECK(SetWalConfigBufferSize(CONFIG_WAL_BUFFER_SIZE_DEFAULT)); - CONFIG_CHECK(SetWalConfigWalPath(CONFIG_WAL_WAL_PATH_DEFAULT)); - - /* logs config */ - CONFIG_CHECK(SetLogsTraceEnable(CONFIG_LOGS_TRACE_ENABLE_DEFAULT)); - CONFIG_CHECK(SetLogsDebugEnable(CONFIG_LOGS_DEBUG_ENABLE_DEFAULT)); - CONFIG_CHECK(SetLogsInfoEnable(CONFIG_LOGS_INFO_ENABLE_DEFAULT)); - CONFIG_CHECK(SetLogsWarningEnable(CONFIG_LOGS_WARNING_ENABLE_DEFAULT)); - CONFIG_CHECK(SetLogsErrorEnable(CONFIG_LOGS_ERROR_ENABLE_DEFAULT)); - CONFIG_CHECK(SetLogsFatalEnable(CONFIG_LOGS_FATAL_ENABLE_DEFAULT)); - CONFIG_CHECK(SetLogsPath(CONFIG_LOGS_PATH_DEFAULT)); - CONFIG_CHECK(SetLogsMaxLogFileSize(CONFIG_LOGS_MAX_LOG_FILE_SIZE_DEFAULT)); - CONFIG_CHECK(SetLogsDeleteExceeds(CONFIG_LOGS_DELETE_EXCEEDS_DEFAULT)); - + STATUS_CHECK(SetEngineConfigUseBlasThreshold(CONFIG_ENGINE_USE_BLAS_THRESHOLD_DEFAULT)); + STATUS_CHECK(SetEngineConfigOmpThreadNum(CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT)); + STATUS_CHECK(SetEngineConfigSimdType(CONFIG_ENGINE_SIMD_TYPE_DEFAULT)); #ifdef MILVUS_GPU_VERSION - CONFIG_CHECK(SetEngineConfigGpuSearchThreshold(CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT)); + STATUS_CHECK(SetEngineConfigGpuSearchThreshold(CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT)); #endif /* gpu resource config */ #ifdef MILVUS_GPU_VERSION - CONFIG_CHECK(SetGpuResourceConfigEnable(CONFIG_GPU_RESOURCE_ENABLE_DEFAULT)); - CONFIG_CHECK(SetGpuResourceConfigCacheCapacity(CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT)); - CONFIG_CHECK(SetGpuResourceConfigCacheThreshold(CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT)); - CONFIG_CHECK(SetGpuResourceConfigSearchResources(CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT)); - CONFIG_CHECK(SetGpuResourceConfigBuildIndexResources(CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT)); + STATUS_CHECK(SetGpuResourceConfigEnable(CONFIG_GPU_RESOURCE_ENABLE_DEFAULT)); + STATUS_CHECK(SetGpuResourceConfigCacheCapacity(CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT)); + STATUS_CHECK(SetGpuResourceConfigCacheThreshold(CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT)); + STATUS_CHECK(SetGpuResourceConfigSearchResources(CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT)); + STATUS_CHECK(SetGpuResourceConfigBuildIndexResources(CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT)); #endif + /* wal config */ + STATUS_CHECK(SetWalConfigEnable(CONFIG_WAL_ENABLE_DEFAULT)); + STATUS_CHECK(SetWalConfigRecoveryErrorIgnore(CONFIG_WAL_RECOVERY_ERROR_IGNORE_DEFAULT)); + STATUS_CHECK(SetWalConfigBufferSize(CONFIG_WAL_BUFFER_SIZE_DEFAULT)); + STATUS_CHECK(SetWalConfigWalPath(CONFIG_WAL_WAL_PATH_DEFAULT)); + + /* logs config */ + STATUS_CHECK(SetLogsTraceEnable(CONFIG_LOGS_TRACE_ENABLE_DEFAULT)); + STATUS_CHECK(SetLogsDebugEnable(CONFIG_LOGS_DEBUG_ENABLE_DEFAULT)); + STATUS_CHECK(SetLogsInfoEnable(CONFIG_LOGS_INFO_ENABLE_DEFAULT)); + STATUS_CHECK(SetLogsWarningEnable(CONFIG_LOGS_WARNING_ENABLE_DEFAULT)); + STATUS_CHECK(SetLogsErrorEnable(CONFIG_LOGS_ERROR_ENABLE_DEFAULT)); + STATUS_CHECK(SetLogsFatalEnable(CONFIG_LOGS_FATAL_ENABLE_DEFAULT)); + STATUS_CHECK(SetLogsPath(CONFIG_LOGS_PATH_DEFAULT)); + STATUS_CHECK(SetLogsMaxLogFileSize(CONFIG_LOGS_MAX_LOG_FILE_SIZE_DEFAULT)); + STATUS_CHECK(SetLogsDeleteExceeds(CONFIG_LOGS_DELETE_EXCEEDS_DEFAULT)); + return Status::OK(); } @@ -551,18 +550,18 @@ Config::SetConfigCli(const std::string& parent_key, const std::string& child_key status = SetStorageConfigPrimaryPath(value); } else if (child_key == CONFIG_STORAGE_SECONDARY_PATH) { status = SetStorageConfigSecondaryPath(value); - } else if (child_key == CONFIG_STORAGE_S3_ENABLE) { - status = SetStorageConfigS3Enable(value); - } else if (child_key == CONFIG_STORAGE_S3_ADDRESS) { - status = SetStorageConfigS3Address(value); - } else if (child_key == CONFIG_STORAGE_S3_PORT) { - status = SetStorageConfigS3Port(value); - } else if (child_key == CONFIG_STORAGE_S3_ACCESS_KEY) { - status = SetStorageConfigS3AccessKey(value); - } else if (child_key == CONFIG_STORAGE_S3_SECRET_KEY) { - status = SetStorageConfigS3SecretKey(value); - } else if (child_key == CONFIG_STORAGE_S3_BUCKET) { - status = SetStorageConfigS3Bucket(value); + // } else if (child_key == CONFIG_STORAGE_S3_ENABLE) { + // status = SetStorageConfigS3Enable(value); + // } else if (child_key == CONFIG_STORAGE_S3_ADDRESS) { + // status = SetStorageConfigS3Address(value); + // } else if (child_key == CONFIG_STORAGE_S3_PORT) { + // status = SetStorageConfigS3Port(value); + // } else if (child_key == CONFIG_STORAGE_S3_ACCESS_KEY) { + // status = SetStorageConfigS3AccessKey(value); + // } else if (child_key == CONFIG_STORAGE_S3_SECRET_KEY) { + // status = SetStorageConfigS3SecretKey(value); + // } else if (child_key == CONFIG_STORAGE_S3_BUCKET) { + // status = SetStorageConfigS3Bucket(value); } else { status = Status(SERVER_UNEXPECTED_ERROR, invalid_node_str); } @@ -736,14 +735,12 @@ Config::UpdateFileConfigFromMem(const std::string& parent_key, const std::string // convert value string to standard string stored in yaml file std::string value_str; - if (child_key == CONFIG_CACHE_CACHE_INSERT_DATA || child_key == CONFIG_STORAGE_S3_ENABLE || + if (child_key == CONFIG_CACHE_CACHE_INSERT_DATA || + // child_key == CONFIG_STORAGE_S3_ENABLE || child_key == CONFIG_METRIC_ENABLE_MONITOR || child_key == CONFIG_GPU_RESOURCE_ENABLE || child_key == CONFIG_WAL_ENABLE || child_key == CONFIG_WAL_RECOVERY_ERROR_IGNORE) { bool ok = false; - status = StringHelpFunctions::ConvertToBoolean(value, ok); - if (!status.ok()) { - return status; - } + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(value, ok)); value_str = ok ? "true" : "false"; } else if (child_key == CONFIG_GPU_RESOURCE_SEARCH_RESOURCES || child_key == CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES) { @@ -823,7 +820,6 @@ Config::RegisterCallBack(const std::string& node, const std::string& sub_node, c } auto& callback_map = config_callback_.at(cb_node); - callback_map[key] = cb; return Status::OK(); @@ -1089,67 +1085,93 @@ Config::CheckStorageConfigSecondaryPath(const std::string& value) { } Status -Config::CheckStorageConfigS3Enable(const std::string& value) { - if (!ValidationUtil::ValidateStringIsBool(value).ok()) { - std::string msg = - "Invalid storage config: " + value + ". Possible reason: storage_config.s3_enable is not a boolean."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } - return Status::OK(); -} +Config::CheckStorageConfigFileCleanupTimeout(const std::string& value) { + auto status = Status::OK(); -Status -Config::CheckStorageConfigS3Address(const std::string& value) { - if (!ValidationUtil::ValidateIpAddress(value).ok()) { - std::string msg = "Invalid s3 address: " + value + ". Possible reason: storage_config.s3_address is invalid."; - return Status(SERVER_INVALID_ARGUMENT, msg); + if (value.empty()) { + return status; } - return Status::OK(); -} -Status -Config::CheckStorageConfigS3Port(const std::string& value) { if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { - std::string msg = "Invalid s3 port: " + value + ". Possible reason: storage_config.s3_port is not a number."; + std::string msg = "Invalid file cleanup timeout: " + value + + ". Possible reason: storage_config.file_cleanup_timeout is not a positive integer."; return Status(SERVER_INVALID_ARGUMENT, msg); } else { - try { - int32_t port = std::stoi(value); - if (!(port > PORT_NUMBER_MIN && port < PORT_NUMBER_MAX)) { - std::string msg = "Invalid s3 port: " + value + - ". Possible reason: storage_config.s3_port is not in range (1024, 65535)."; - return Status(SERVER_INVALID_ARGUMENT, msg); - } - } catch (...) { - return Status(SERVER_INVALID_ARGUMENT, "Invalid storage_config.s3_port: " + value); + const int64_t min = 0, max = 3600; + int64_t file_cleanup_timeout = std::stoll(value); + if (file_cleanup_timeout < min || file_cleanup_timeout > max) { + std::string msg = "Invalid file cleanup timeout: " + value + + ". Possible reason: storage_config.file_cleanup_timeout is not in range [" + + std::to_string(min) + ", " + std::to_string(max) + "]."; + return Status(SERVER_INVALID_ARGUMENT, msg); } } + return Status::OK(); } -Status -Config::CheckStorageConfigS3AccessKey(const std::string& value) { - if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "storage_config.s3_access_key is empty."); - } - return Status::OK(); -} - -Status -Config::CheckStorageConfigS3SecretKey(const std::string& value) { - if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "storage_config.s3_secret_key is empty."); - } - return Status::OK(); -} - -Status -Config::CheckStorageConfigS3Bucket(const std::string& value) { - if (value.empty()) { - return Status(SERVER_INVALID_ARGUMENT, "storage_config.s3_bucket is empty."); - } - return Status::OK(); -} +// Status +// Config::CheckStorageConfigS3Enable(const std::string& value) { +// if (!ValidationUtil::ValidateStringIsBool(value).ok()) { +// std::string msg = +// "Invalid storage config: " + value + ". Possible reason: storage_config.s3_enable is not a boolean."; +// return Status(SERVER_INVALID_ARGUMENT, msg); +// } +// return Status::OK(); +// } +// +// Status +// Config::CheckStorageConfigS3Address(const std::string& value) { +// if (!ValidationUtil::ValidateIpAddress(value).ok()) { +// std::string msg = "Invalid s3 address: " + value + ". Possible reason: storage_config.s3_address is invalid."; +// return Status(SERVER_INVALID_ARGUMENT, msg); +// } +// return Status::OK(); +// } +// +// Status +// Config::CheckStorageConfigS3Port(const std::string& value) { +// if (!ValidationUtil::ValidateStringIsNumber(value).ok()) { +// std::string msg = "Invalid s3 port: " + value + ". Possible reason: storage_config.s3_port is not a number."; +// return Status(SERVER_INVALID_ARGUMENT, msg); +// } else { +// try { +// int32_t port = std::stoi(value); +// if (!(port > PORT_NUMBER_MIN && port < PORT_NUMBER_MAX)) { +// std::string msg = "Invalid s3 port: " + value + +// ". Possible reason: storage_config.s3_port is not in range (1024, 65535)."; +// return Status(SERVER_INVALID_ARGUMENT, msg); +// } +// } catch (...) { +// return Status(SERVER_INVALID_ARGUMENT, "Invalid storage_config.s3_port: " + value); +// } +// } +// return Status::OK(); +// } +// +// Status +// Config::CheckStorageConfigS3AccessKey(const std::string& value) { +// if (value.empty()) { +// return Status(SERVER_INVALID_ARGUMENT, "storage_config.s3_access_key is empty."); +// } +// return Status::OK(); +// } +// +// Status +// Config::CheckStorageConfigS3SecretKey(const std::string& value) { +// if (value.empty()) { +// return Status(SERVER_INVALID_ARGUMENT, "storage_config.s3_secret_key is empty."); +// } +// return Status::OK(); +// } +// +// Status +// Config::CheckStorageConfigS3Bucket(const std::string& value) { +// if (value.empty()) { +// return Status(SERVER_INVALID_ARGUMENT, "storage_config.s3_bucket is empty."); +// } +// return Status::OK(); +// } /* metric config */ Status @@ -1379,7 +1401,7 @@ Config::CheckGpuResourceConfigCacheCapacity(const std::string& value) { } else { int64_t gpu_cache_capacity = std::stoll(value) * GB; std::vector gpu_ids; - CONFIG_CHECK(GetGpuResourceConfigBuildIndexResources(gpu_ids)); + STATUS_CHECK(GetGpuResourceConfigBuildIndexResources(gpu_ids)); for (int64_t gpu_id : gpu_ids) { size_t gpu_memory; @@ -1459,7 +1481,7 @@ Config::CheckGpuResourceConfigSearchResources(const std::vector& va std::unordered_set value_set; for (auto& resource : value) { - CONFIG_CHECK(CheckGpuResource(resource)); + STATUS_CHECK(CheckGpuResource(resource)); value_set.insert(resource); } @@ -1486,7 +1508,7 @@ Config::CheckGpuResourceConfigBuildIndexResources(const std::vector std::unordered_set value_set; for (auto& resource : value) { - CONFIG_CHECK(CheckGpuResource(resource)); + STATUS_CHECK(CheckGpuResource(resource)); value_set.insert(resource); } @@ -1501,6 +1523,7 @@ Config::CheckGpuResourceConfigBuildIndexResources(const std::vector } #endif + /* tracing config */ Status Config::CheckTracingConfigJsonConfigPath(const std::string& value) { @@ -1792,7 +1815,7 @@ Config::GetServerConfigTimeZone(std::string& value) { Status Config::GetServerConfigWebEnable(bool& value) { std::string str = GetConfigStr(CONFIG_SERVER, CONFIG_SERVER_WEB_ENABLE, CONFIG_SERVER_WEB_ENABLE_DEFAULT); - CONFIG_CHECK(CheckServerConfigWebEnable(str)); + STATUS_CHECK(CheckServerConfigWebEnable(str)); return StringHelpFunctions::ConvertToBoolean(str, value); } @@ -1813,7 +1836,7 @@ Status Config::GetDBConfigArchiveDiskThreshold(int64_t& value) { std::string str = GetConfigStr(CONFIG_DB, CONFIG_DB_ARCHIVE_DISK_THRESHOLD, CONFIG_DB_ARCHIVE_DISK_THRESHOLD_DEFAULT); - CONFIG_CHECK(CheckDBConfigArchiveDiskThreshold(str)); + STATUS_CHECK(CheckDBConfigArchiveDiskThreshold(str)); value = std::stoll(str); return Status::OK(); } @@ -1822,7 +1845,7 @@ Status Config::GetDBConfigArchiveDaysThreshold(int64_t& value) { std::string str = GetConfigStr(CONFIG_DB, CONFIG_DB_ARCHIVE_DAYS_THRESHOLD, CONFIG_DB_ARCHIVE_DAYS_THRESHOLD_DEFAULT); - CONFIG_CHECK(CheckDBConfigArchiveDaysThreshold(str)); + STATUS_CHECK(CheckDBConfigArchiveDaysThreshold(str)); value = std::stoll(str); return Status::OK(); } @@ -1836,7 +1859,7 @@ Config::GetDBConfigPreloadCollection(std::string& value) { Status Config::GetDBConfigAutoFlushInterval(int64_t& value) { std::string str = GetConfigStr(CONFIG_DB, CONFIG_DB_AUTO_FLUSH_INTERVAL, CONFIG_DB_AUTO_FLUSH_INTERVAL_DEFAULT); - CONFIG_CHECK(CheckDBConfigAutoFlushInterval(str)); + STATUS_CHECK(CheckDBConfigAutoFlushInterval(str)); value = std::stoll(str); return Status::OK(); } @@ -1855,49 +1878,58 @@ Config::GetStorageConfigSecondaryPath(std::string& value) { } Status -Config::GetStorageConfigS3Enable(bool& value) { - std::string str = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_ENABLE, CONFIG_STORAGE_S3_ENABLE_DEFAULT); - CONFIG_CHECK(CheckStorageConfigS3Enable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); +Config::GetStorageConfigFileCleanupTimeup(int64_t& value) { + std::string str = + GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_FILE_CLEANUP_TIMEOUT, CONFIG_STORAGE_FILE_CLEANUP_TIMEOUT_DEFAULT); + STATUS_CHECK(CheckStorageConfigFileCleanupTimeout(str)); + value = std::stoll(str); return Status::OK(); } -Status -Config::GetStorageConfigS3Address(std::string& value) { - value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_ADDRESS, CONFIG_STORAGE_S3_ADDRESS_DEFAULT); - return CheckStorageConfigS3Address(value); -} - -Status -Config::GetStorageConfigS3Port(std::string& value) { - value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_PORT, CONFIG_STORAGE_S3_PORT_DEFAULT); - return CheckStorageConfigS3Port(value); -} - -Status -Config::GetStorageConfigS3AccessKey(std::string& value) { - value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_ACCESS_KEY, CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT); - return Status::OK(); -} - -Status -Config::GetStorageConfigS3SecretKey(std::string& value) { - value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_SECRET_KEY, CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT); - return Status::OK(); -} - -Status -Config::GetStorageConfigS3Bucket(std::string& value) { - value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_BUCKET, CONFIG_STORAGE_S3_BUCKET_DEFAULT); - return Status::OK(); -} +// Status +// Config::GetStorageConfigS3Enable(bool& value) { +// std::string str = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_ENABLE, CONFIG_STORAGE_S3_ENABLE_DEFAULT); +// STATUS_CHECK(CheckStorageConfigS3Enable(str)); +// STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); +// return Status::OK(); +// } +// +// Status +// Config::GetStorageConfigS3Address(std::string& value) { +// value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_ADDRESS, CONFIG_STORAGE_S3_ADDRESS_DEFAULT); +// return CheckStorageConfigS3Address(value); +// } +// +// Status +// Config::GetStorageConfigS3Port(std::string& value) { +// value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_PORT, CONFIG_STORAGE_S3_PORT_DEFAULT); +// return CheckStorageConfigS3Port(value); +// } +// +// Status +// Config::GetStorageConfigS3AccessKey(std::string& value) { +// value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_ACCESS_KEY, CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT); +// return Status::OK(); +// } +// +// Status +// Config::GetStorageConfigS3SecretKey(std::string& value) { +// value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_SECRET_KEY, CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT); +// return Status::OK(); +// } +// +// Status +// Config::GetStorageConfigS3Bucket(std::string& value) { +// value = GetConfigStr(CONFIG_STORAGE, CONFIG_STORAGE_S3_BUCKET, CONFIG_STORAGE_S3_BUCKET_DEFAULT); +// return Status::OK(); +// } /* metric config */ Status Config::GetMetricConfigEnableMonitor(bool& value) { std::string str = GetConfigStr(CONFIG_METRIC, CONFIG_METRIC_ENABLE_MONITOR, CONFIG_METRIC_ENABLE_MONITOR_DEFAULT); - CONFIG_CHECK(CheckMetricConfigEnableMonitor(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckMetricConfigEnableMonitor(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } @@ -1918,7 +1950,7 @@ Status Config::GetCacheConfigCpuCacheCapacity(int64_t& value) { std::string str = GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_CAPACITY, CONFIG_CACHE_CPU_CACHE_CAPACITY_DEFAULT); - CONFIG_CHECK(CheckCacheConfigCpuCacheCapacity(str)); + STATUS_CHECK(CheckCacheConfigCpuCacheCapacity(str)); value = std::stoll(str); return Status::OK(); } @@ -1927,7 +1959,7 @@ Status Config::GetCacheConfigCpuCacheThreshold(float& value) { std::string str = GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_THRESHOLD, CONFIG_CACHE_CPU_CACHE_THRESHOLD_DEFAULT); - CONFIG_CHECK(CheckCacheConfigCpuCacheThreshold(str)); + STATUS_CHECK(CheckCacheConfigCpuCacheThreshold(str)); value = std::stof(str); return Status::OK(); } @@ -1936,7 +1968,7 @@ Status Config::GetCacheConfigInsertBufferSize(int64_t& value) { std::string str = GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_INSERT_BUFFER_SIZE, CONFIG_CACHE_INSERT_BUFFER_SIZE_DEFAULT); - CONFIG_CHECK(CheckCacheConfigInsertBufferSize(str)); + STATUS_CHECK(CheckCacheConfigInsertBufferSize(str)); value = std::stoll(str); return Status::OK(); } @@ -1945,7 +1977,7 @@ Status Config::GetCacheConfigCacheInsertData(bool& value) { std::string str = GetConfigStr(CONFIG_CACHE, CONFIG_CACHE_CACHE_INSERT_DATA, CONFIG_CACHE_CACHE_INSERT_DATA_DEFAULT); - CONFIG_CHECK(CheckCacheConfigCacheInsertData(str)); + STATUS_CHECK(CheckCacheConfigCacheInsertData(str)); std::transform(str.begin(), str.end(), str.begin(), ::tolower); value = (str == "true" || str == "on" || str == "yes" || str == "1"); return Status::OK(); @@ -1956,7 +1988,7 @@ Status Config::GetEngineConfigUseBlasThreshold(int64_t& value) { std::string str = GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_USE_BLAS_THRESHOLD, CONFIG_ENGINE_USE_BLAS_THRESHOLD_DEFAULT); - CONFIG_CHECK(CheckEngineConfigUseBlasThreshold(str)); + STATUS_CHECK(CheckEngineConfigUseBlasThreshold(str)); value = std::stoll(str); return Status::OK(); } @@ -1964,7 +1996,7 @@ Config::GetEngineConfigUseBlasThreshold(int64_t& value) { Status Config::GetEngineConfigOmpThreadNum(int64_t& value) { std::string str = GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_OMP_THREAD_NUM, CONFIG_ENGINE_OMP_THREAD_NUM_DEFAULT); - CONFIG_CHECK(CheckEngineConfigOmpThreadNum(str)); + STATUS_CHECK(CheckEngineConfigOmpThreadNum(str)); value = std::stoll(str); return Status::OK(); } @@ -1976,16 +2008,14 @@ Config::GetEngineConfigSimdType(std::string& value) { } #ifdef MILVUS_GPU_VERSION - Status Config::GetEngineConfigGpuSearchThreshold(int64_t& value) { std::string str = GetConfigStr(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT); - CONFIG_CHECK(CheckEngineConfigGpuSearchThreshold(str)); + STATUS_CHECK(CheckEngineConfigGpuSearchThreshold(str)); value = std::stoll(str); return Status::OK(); } - #endif /* gpu resource config */ @@ -1994,15 +2024,15 @@ Config::GetEngineConfigGpuSearchThreshold(int64_t& value) { Status Config::GetGpuResourceConfigEnable(bool& value) { std::string str = GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE, CONFIG_GPU_RESOURCE_ENABLE_DEFAULT); - CONFIG_CHECK(CheckGpuResourceConfigEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckGpuResourceConfigEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } Status Config::GetGpuResourceConfigCacheCapacity(int64_t& value) { bool gpu_resource_enable = false; - CONFIG_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); + STATUS_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); fiu_do_on("Config.GetGpuResourceConfigCacheCapacity.diable_gpu_resource", gpu_resource_enable = false); if (!gpu_resource_enable) { std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable is set to false."; @@ -2010,7 +2040,7 @@ Config::GetGpuResourceConfigCacheCapacity(int64_t& value) { } std::string str = GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT); - CONFIG_CHECK(CheckGpuResourceConfigCacheCapacity(str)); + STATUS_CHECK(CheckGpuResourceConfigCacheCapacity(str)); value = std::stoll(str); return Status::OK(); } @@ -2018,7 +2048,7 @@ Config::GetGpuResourceConfigCacheCapacity(int64_t& value) { Status Config::GetGpuResourceConfigCacheThreshold(float& value) { bool gpu_resource_enable = false; - CONFIG_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); + STATUS_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); fiu_do_on("Config.GetGpuResourceConfigCacheThreshold.diable_gpu_resource", gpu_resource_enable = false); if (!gpu_resource_enable) { std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable is set to false."; @@ -2026,7 +2056,7 @@ Config::GetGpuResourceConfigCacheThreshold(float& value) { } std::string str = GetConfigStr(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_THRESHOLD, CONFIG_GPU_RESOURCE_CACHE_THRESHOLD_DEFAULT); - CONFIG_CHECK(CheckGpuResourceConfigCacheThreshold(str)); + STATUS_CHECK(CheckGpuResourceConfigCacheThreshold(str)); value = std::stof(str); return Status::OK(); } @@ -2034,7 +2064,7 @@ Config::GetGpuResourceConfigCacheThreshold(float& value) { Status Config::GetGpuResourceConfigSearchResources(std::vector& value) { bool gpu_resource_enable = false; - CONFIG_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); + STATUS_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); fiu_do_on("get_gpu_config_search_resources.disable_gpu_resource_fail", gpu_resource_enable = false); if (!gpu_resource_enable) { std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable is set to false."; @@ -2044,7 +2074,7 @@ Config::GetGpuResourceConfigSearchResources(std::vector& value) { CONFIG_GPU_RESOURCE_DELIMITER, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES_DEFAULT); std::vector res_vec; server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); - CONFIG_CHECK(CheckGpuResourceConfigSearchResources(res_vec)); + STATUS_CHECK(CheckGpuResourceConfigSearchResources(res_vec)); value.clear(); for (std::string& res : res_vec) { value.push_back(std::stoll(res.substr(3))); @@ -2055,7 +2085,7 @@ Config::GetGpuResourceConfigSearchResources(std::vector& value) { Status Config::GetGpuResourceConfigBuildIndexResources(std::vector& value) { bool gpu_resource_enable = false; - CONFIG_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); + STATUS_CHECK(GetGpuResourceConfigEnable(gpu_resource_enable)); fiu_do_on("get_gpu_config_build_index_resources.disable_gpu_resource_fail", gpu_resource_enable = false); if (!gpu_resource_enable) { std::string msg = "GPU not supported. Possible reason: gpu_resource_config.enable is set to false."; @@ -2066,7 +2096,7 @@ Config::GetGpuResourceConfigBuildIndexResources(std::vector& value) { CONFIG_GPU_RESOURCE_DELIMITER, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES_DEFAULT); std::vector res_vec; server::StringHelpFunctions::SplitStringByDelimeter(str, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); - CONFIG_CHECK(CheckGpuResourceConfigBuildIndexResources(res_vec)); + STATUS_CHECK(CheckGpuResourceConfigBuildIndexResources(res_vec)); value.clear(); for (std::string& res : res_vec) { value.push_back(std::stoll(res.substr(3))); @@ -2096,8 +2126,8 @@ Config::GetTracingConfigJsonConfigPath(std::string& value) { Status Config::GetWalConfigEnable(bool& wal_enable) { std::string str = GetConfigStr(CONFIG_WAL, CONFIG_WAL_ENABLE, CONFIG_WAL_ENABLE_DEFAULT); - CONFIG_CHECK(CheckWalConfigEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, wal_enable)); + STATUS_CHECK(CheckWalConfigEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, wal_enable)); return Status::OK(); } @@ -2105,15 +2135,15 @@ Status Config::GetWalConfigRecoveryErrorIgnore(bool& recovery_error_ignore) { std::string str = GetConfigStr(CONFIG_WAL, CONFIG_WAL_RECOVERY_ERROR_IGNORE, CONFIG_WAL_RECOVERY_ERROR_IGNORE_DEFAULT); - CONFIG_CHECK(CheckWalConfigRecoveryErrorIgnore(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, recovery_error_ignore)); + STATUS_CHECK(CheckWalConfigRecoveryErrorIgnore(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, recovery_error_ignore)); return Status::OK(); } Status Config::GetWalConfigBufferSize(int64_t& buffer_size) { std::string str = GetConfigStr(CONFIG_WAL, CONFIG_WAL_BUFFER_SIZE, CONFIG_WAL_BUFFER_SIZE_DEFAULT); - CONFIG_CHECK(CheckWalConfigBufferSize(str)); + STATUS_CHECK(CheckWalConfigBufferSize(str)); buffer_size = std::stoll(str); if (buffer_size > CONFIG_WAL_BUFFER_SIZE_MAX) { buffer_size = CONFIG_WAL_BUFFER_SIZE_MAX; @@ -2126,7 +2156,7 @@ Config::GetWalConfigBufferSize(int64_t& buffer_size) { Status Config::GetWalConfigWalPath(std::string& wal_path) { wal_path = GetConfigStr(CONFIG_WAL, CONFIG_WAL_WAL_PATH, CONFIG_WAL_WAL_PATH_DEFAULT); - CONFIG_CHECK(CheckWalConfigWalPath(wal_path)); + STATUS_CHECK(CheckWalConfigWalPath(wal_path)); return Status::OK(); } @@ -2134,62 +2164,62 @@ Config::GetWalConfigWalPath(std::string& wal_path) { Status Config::GetLogsTraceEnable(bool& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_TRACE_ENABLE, CONFIG_LOGS_TRACE_ENABLE_DEFAULT); - CONFIG_CHECK(CheckLogsTraceEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckLogsTraceEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } Status Config::GetLogsDebugEnable(bool& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_DEBUG_ENABLE, CONFIG_LOGS_DEBUG_ENABLE_DEFAULT); - CONFIG_CHECK(CheckLogsDebugEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckLogsDebugEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } Status Config::GetLogsInfoEnable(bool& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_INFO_ENABLE, CONFIG_LOGS_INFO_ENABLE_DEFAULT); - CONFIG_CHECK(CheckLogsInfoEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckLogsInfoEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } Status Config::GetLogsWarningEnable(bool& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_WARNING_ENABLE, CONFIG_LOGS_WARNING_ENABLE_DEFAULT); - CONFIG_CHECK(CheckLogsWarningEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckLogsWarningEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } Status Config::GetLogsErrorEnable(bool& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_ERROR_ENABLE, CONFIG_LOGS_ERROR_ENABLE_DEFAULT); - CONFIG_CHECK(CheckLogsErrorEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckLogsErrorEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } Status Config::GetLogsFatalEnable(bool& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_FATAL_ENABLE, CONFIG_LOGS_FATAL_ENABLE_DEFAULT); - CONFIG_CHECK(CheckLogsFatalEnable(str)); - CONFIG_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); + STATUS_CHECK(CheckLogsFatalEnable(str)); + STATUS_CHECK(StringHelpFunctions::ConvertToBoolean(str, value)); return Status::OK(); } Status Config::GetLogsPath(std::string& value) { value = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_PATH, CONFIG_LOGS_PATH_DEFAULT); - CONFIG_CHECK(CheckLogsPath(value)); + STATUS_CHECK(CheckLogsPath(value)); return Status::OK(); } Status Config::GetLogsMaxLogFileSize(int64_t& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_MAX_LOG_FILE_SIZE, CONFIG_LOGS_MAX_LOG_FILE_SIZE_DEFAULT); - CONFIG_CHECK(CheckLogsMaxLogFileSize(str)); + STATUS_CHECK(CheckLogsMaxLogFileSize(str)); value = std::stoll(str); if (value == 0) { // OFF @@ -2205,7 +2235,7 @@ Config::GetLogsMaxLogFileSize(int64_t& value) { Status Config::GetLogsDeleteExceeds(int64_t& value) { std::string str = GetConfigStr(CONFIG_LOGS, CONFIG_LOGS_DELETE_EXCEEDS, CONFIG_LOGS_DELETE_EXCEEDS_DEFAULT); - CONFIG_CHECK(CheckLogsDeleteExceeds(str)); + STATUS_CHECK(CheckLogsDeleteExceeds(str)); value = std::stoll(str); if (value == 0) { // OFF @@ -2228,304 +2258,222 @@ Config::GetServerRestartRequired(bool& required) { /* server config */ Status Config::SetServerConfigAddress(const std::string& value) { - CONFIG_CHECK(CheckServerConfigAddress(value)); + STATUS_CHECK(CheckServerConfigAddress(value)); return SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_ADDRESS, value); } Status Config::SetServerConfigPort(const std::string& value) { - CONFIG_CHECK(CheckServerConfigPort(value)); + STATUS_CHECK(CheckServerConfigPort(value)); return SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_PORT, value); } Status Config::SetServerConfigDeployMode(const std::string& value) { - CONFIG_CHECK(CheckServerConfigDeployMode(value)); + STATUS_CHECK(CheckServerConfigDeployMode(value)); return SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_DEPLOY_MODE, value); } Status Config::SetServerConfigTimeZone(const std::string& value) { - CONFIG_CHECK(CheckServerConfigTimeZone(value)); + STATUS_CHECK(CheckServerConfigTimeZone(value)); return SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_TIME_ZONE, value); } Status Config::SetServerConfigWebEnable(const std::string& value) { - CONFIG_CHECK(CheckServerConfigWebEnable(value)); + STATUS_CHECK(CheckServerConfigWebEnable(value)); return SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_WEB_ENABLE, value); } Status Config::SetServerConfigWebPort(const std::string& value) { - CONFIG_CHECK(CheckServerConfigWebPort(value)); + STATUS_CHECK(CheckServerConfigWebPort(value)); return SetConfigValueInMem(CONFIG_SERVER, CONFIG_SERVER_WEB_PORT, value); } /* db config */ Status Config::SetDBConfigBackendUrl(const std::string& value) { - CONFIG_CHECK(CheckDBConfigBackendUrl(value)); + STATUS_CHECK(CheckDBConfigBackendUrl(value)); return SetConfigValueInMem(CONFIG_DB, CONFIG_DB_BACKEND_URL, value); } Status Config::SetDBConfigPreloadCollection(const std::string& value) { - CONFIG_CHECK(CheckDBConfigPreloadCollection(value)); + STATUS_CHECK(CheckDBConfigPreloadCollection(value)); std::string cor_value = value == "*" ? "\'*\'" : value; return SetConfigValueInMem(CONFIG_DB, CONFIG_DB_PRELOAD_COLLECTION, cor_value); } Status Config::SetDBConfigArchiveDiskThreshold(const std::string& value) { - CONFIG_CHECK(CheckDBConfigArchiveDiskThreshold(value)); + STATUS_CHECK(CheckDBConfigArchiveDiskThreshold(value)); return SetConfigValueInMem(CONFIG_DB, CONFIG_DB_ARCHIVE_DISK_THRESHOLD, value); } Status Config::SetDBConfigArchiveDaysThreshold(const std::string& value) { - CONFIG_CHECK(CheckDBConfigArchiveDaysThreshold(value)); + STATUS_CHECK(CheckDBConfigArchiveDaysThreshold(value)); return SetConfigValueInMem(CONFIG_DB, CONFIG_DB_ARCHIVE_DAYS_THRESHOLD, value); } Status Config::SetDBConfigAutoFlushInterval(const std::string& value) { - CONFIG_CHECK(CheckDBConfigAutoFlushInterval(value)); + STATUS_CHECK(CheckDBConfigAutoFlushInterval(value)); return SetConfigValueInMem(CONFIG_DB, CONFIG_DB_AUTO_FLUSH_INTERVAL, value); } /* storage config */ Status Config::SetStorageConfigPrimaryPath(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigPrimaryPath(value)); + STATUS_CHECK(CheckStorageConfigPrimaryPath(value)); return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_PRIMARY_PATH, value); } Status Config::SetStorageConfigSecondaryPath(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigSecondaryPath(value)); + STATUS_CHECK(CheckStorageConfigSecondaryPath(value)); return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_SECONDARY_PATH, value); } Status -Config::SetStorageConfigS3Enable(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigS3Enable(value)); - return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_ENABLE, value); +Config::SetStorageConfigFileCleanupTimeout(const std::string& value) { + STATUS_CHECK(CheckStorageConfigFileCleanupTimeout(value)); + return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_FILE_CLEANUP_TIMEOUT, value); } -Status -Config::SetStorageConfigS3Address(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigS3Address(value)); - return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_ADDRESS, value); -} - -Status -Config::SetStorageConfigS3Port(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigS3Port(value)); - return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_PORT, value); -} - -Status -Config::SetStorageConfigS3AccessKey(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigS3AccessKey(value)); - return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_ACCESS_KEY, value); -} - -Status -Config::SetStorageConfigS3SecretKey(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigS3SecretKey(value)); - return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_SECRET_KEY, value); -} - -Status -Config::SetStorageConfigS3Bucket(const std::string& value) { - CONFIG_CHECK(CheckStorageConfigS3Bucket(value)); - return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_BUCKET, value); -} +// Status +// Config::SetStorageConfigS3Enable(const std::string& value) { +// STATUS_CHECK(CheckStorageConfigS3Enable(value)); +// return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_ENABLE, value); +// } +// +// Status +// Config::SetStorageConfigS3Address(const std::string& value) { +// STATUS_CHECK(CheckStorageConfigS3Address(value)); +// return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_ADDRESS, value); +// } +// +// Status +// Config::SetStorageConfigS3Port(const std::string& value) { +// STATUS_CHECK(CheckStorageConfigS3Port(value)); +// return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_PORT, value); +// } +// +// Status +// Config::SetStorageConfigS3AccessKey(const std::string& value) { +// STATUS_CHECK(CheckStorageConfigS3AccessKey(value)); +// return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_ACCESS_KEY, value); +// } +// +// Status +// Config::SetStorageConfigS3SecretKey(const std::string& value) { +// STATUS_CHECK(CheckStorageConfigS3SecretKey(value)); +// return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_SECRET_KEY, value); +// } +// +// Status +// Config::SetStorageConfigS3Bucket(const std::string& value) { +// STATUS_CHECK(CheckStorageConfigS3Bucket(value)); +// return SetConfigValueInMem(CONFIG_STORAGE, CONFIG_STORAGE_S3_BUCKET, value); +// } /* metric config */ Status Config::SetMetricConfigEnableMonitor(const std::string& value) { - CONFIG_CHECK(CheckMetricConfigEnableMonitor(value)); + STATUS_CHECK(CheckMetricConfigEnableMonitor(value)); return SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_ENABLE_MONITOR, value); } Status Config::SetMetricConfigAddress(const std::string& value) { - CONFIG_CHECK(CheckMetricConfigAddress(value)); + STATUS_CHECK(CheckMetricConfigAddress(value)); return SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_ADDRESS, value); } Status Config::SetMetricConfigPort(const std::string& value) { - CONFIG_CHECK(CheckMetricConfigPort(value)); + STATUS_CHECK(CheckMetricConfigPort(value)); return SetConfigValueInMem(CONFIG_METRIC, CONFIG_METRIC_PORT, value); } /* cache config */ Status Config::SetCacheConfigCpuCacheCapacity(const std::string& value) { - CONFIG_CHECK(CheckCacheConfigCpuCacheCapacity(value)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_CAPACITY, value)); + STATUS_CHECK(CheckCacheConfigCpuCacheCapacity(value)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_CAPACITY, value)); return ExecCallBacks(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_CAPACITY, value); } Status Config::SetCacheConfigCpuCacheThreshold(const std::string& value) { - CONFIG_CHECK(CheckCacheConfigCpuCacheThreshold(value)); + STATUS_CHECK(CheckCacheConfigCpuCacheThreshold(value)); return SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CPU_CACHE_THRESHOLD, value); } Status Config::SetCacheConfigInsertBufferSize(const std::string& value) { - CONFIG_CHECK(CheckCacheConfigInsertBufferSize(value)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_INSERT_BUFFER_SIZE, value)); + STATUS_CHECK(CheckCacheConfigInsertBufferSize(value)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_INSERT_BUFFER_SIZE, value)); return ExecCallBacks(CONFIG_CACHE, CONFIG_CACHE_INSERT_BUFFER_SIZE, value); } Status Config::SetCacheConfigCacheInsertData(const std::string& value) { - CONFIG_CHECK(CheckCacheConfigCacheInsertData(value)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CACHE_INSERT_DATA, value)); + STATUS_CHECK(CheckCacheConfigCacheInsertData(value)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_CACHE, CONFIG_CACHE_CACHE_INSERT_DATA, value)); return ExecCallBacks(CONFIG_CACHE, CONFIG_CACHE_CACHE_INSERT_DATA, value); } /* engine config */ Status Config::SetEngineConfigUseBlasThreshold(const std::string& value) { - CONFIG_CHECK(CheckEngineConfigUseBlasThreshold(value)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_USE_BLAS_THRESHOLD, value)); + STATUS_CHECK(CheckEngineConfigUseBlasThreshold(value)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_USE_BLAS_THRESHOLD, value)); return ExecCallBacks(CONFIG_ENGINE, CONFIG_ENGINE_USE_BLAS_THRESHOLD, value); } Status Config::SetEngineConfigOmpThreadNum(const std::string& value) { - CONFIG_CHECK(CheckEngineConfigOmpThreadNum(value)); + STATUS_CHECK(CheckEngineConfigOmpThreadNum(value)); return SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_OMP_THREAD_NUM, value); } Status Config::SetEngineConfigSimdType(const std::string& value) { - CONFIG_CHECK(CheckEngineConfigSimdType(value)); + STATUS_CHECK(CheckEngineConfigSimdType(value)); return SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_SIMD_TYPE, value); } -/* tracing config */ -Status -Config::SetTracingConfigJsonConfigPath(const std::string& value) { - CONFIG_CHECK(CheckTracingConfigJsonConfigPath(value)); - return SetConfigValueInMem(CONFIG_TRACING, CONFIG_TRACING_JSON_CONFIG_PATH, value); -} - -/* wal config */ -Status -Config::SetWalConfigEnable(const std::string& value) { - CONFIG_CHECK(CheckWalConfigEnable(value)); - return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_ENABLE, value); -} - -Status -Config::SetWalConfigRecoveryErrorIgnore(const std::string& value) { - CONFIG_CHECK(CheckWalConfigRecoveryErrorIgnore(value)); - return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_RECOVERY_ERROR_IGNORE, value); -} - -Status -Config::SetWalConfigBufferSize(const std::string& value) { - CONFIG_CHECK(CheckWalConfigBufferSize(value)); - return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_BUFFER_SIZE, value); -} - -Status -Config::SetWalConfigWalPath(const std::string& value) { - CONFIG_CHECK(CheckWalConfigWalPath(value)); - return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_WAL_PATH, value); -} - -/* logs config */ -Status -Config::SetLogsTraceEnable(const std::string& value) { - CONFIG_CHECK(CheckLogsTraceEnable(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_TRACE_ENABLE, value); -} - -Status -Config::SetLogsDebugEnable(const std::string& value) { - CONFIG_CHECK(CheckLogsDebugEnable(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_DEBUG_ENABLE, value); -} - -Status -Config::SetLogsInfoEnable(const std::string& value) { - CONFIG_CHECK(CheckLogsInfoEnable(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_INFO_ENABLE, value); -} - -Status -Config::SetLogsWarningEnable(const std::string& value) { - CONFIG_CHECK(CheckLogsWarningEnable(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_WARNING_ENABLE, value); -} - -Status -Config::SetLogsErrorEnable(const std::string& value) { - CONFIG_CHECK(CheckLogsErrorEnable(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_ERROR_ENABLE, value); -} - -Status -Config::SetLogsFatalEnable(const std::string& value) { - CONFIG_CHECK(CheckLogsFatalEnable(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_FATAL_ENABLE, value); -} - -Status -Config::SetLogsPath(const std::string& value) { - CONFIG_CHECK(CheckLogsPath(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_PATH, value); -} - -Status -Config::SetLogsMaxLogFileSize(const std::string& value) { - CONFIG_CHECK(CheckLogsMaxLogFileSize(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_MAX_LOG_FILE_SIZE, value); -} - -Status -Config::SetLogsDeleteExceeds(const std::string& value) { - CONFIG_CHECK(CheckLogsDeleteExceeds(value)); - return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_DELETE_EXCEEDS, value); -} - #ifdef MILVUS_GPU_VERSION Status Config::SetEngineConfigGpuSearchThreshold(const std::string& value) { - CONFIG_CHECK(CheckEngineConfigGpuSearchThreshold(value)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, value)); + STATUS_CHECK(CheckEngineConfigGpuSearchThreshold(value)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, value)); return ExecCallBacks(CONFIG_ENGINE, CONFIG_ENGINE_GPU_SEARCH_THRESHOLD, value); } #endif /* gpu resource config */ #ifdef MILVUS_GPU_VERSION - Status Config::SetGpuResourceConfigEnable(const std::string& value) { - CONFIG_CHECK(CheckGpuResourceConfigEnable(value)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE, value)); + STATUS_CHECK(CheckGpuResourceConfigEnable(value)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE, value)); return ExecCallBacks(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_ENABLE, value); } Status Config::SetGpuResourceConfigCacheCapacity(const std::string& value) { - CONFIG_CHECK(CheckGpuResourceConfigCacheCapacity(value)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, value)); + STATUS_CHECK(CheckGpuResourceConfigCacheCapacity(value)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, value)); return ExecCallBacks(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_CAPACITY, value); } Status Config::SetGpuResourceConfigCacheThreshold(const std::string& value) { - CONFIG_CHECK(CheckGpuResourceConfigCacheThreshold(value)); + STATUS_CHECK(CheckGpuResourceConfigCacheThreshold(value)); return SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_CACHE_THRESHOLD, value); } @@ -2533,8 +2481,8 @@ Status Config::SetGpuResourceConfigSearchResources(const std::string& value) { std::vector res_vec; server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); - CONFIG_CHECK(CheckGpuResourceConfigSearchResources(res_vec)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES, value)); + STATUS_CHECK(CheckGpuResourceConfigSearchResources(res_vec)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES, value)); return ExecCallBacks(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_SEARCH_RESOURCES, value); } @@ -2542,12 +2490,98 @@ Status Config::SetGpuResourceConfigBuildIndexResources(const std::string& value) { std::vector res_vec; server::StringHelpFunctions::SplitStringByDelimeter(value, CONFIG_GPU_RESOURCE_DELIMITER, res_vec); - CONFIG_CHECK(CheckGpuResourceConfigBuildIndexResources(res_vec)); - CONFIG_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, value)); + STATUS_CHECK(CheckGpuResourceConfigBuildIndexResources(res_vec)); + STATUS_CHECK(SetConfigValueInMem(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, value)); return ExecCallBacks(CONFIG_GPU_RESOURCE, CONFIG_GPU_RESOURCE_BUILD_INDEX_RESOURCES, value); } - #endif +/* tracing config */ +Status +Config::SetTracingConfigJsonConfigPath(const std::string& value) { + STATUS_CHECK(CheckTracingConfigJsonConfigPath(value)); + return SetConfigValueInMem(CONFIG_TRACING, CONFIG_TRACING_JSON_CONFIG_PATH, value); +} + +/* wal config */ +Status +Config::SetWalConfigEnable(const std::string& value) { + STATUS_CHECK(CheckWalConfigEnable(value)); + return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_ENABLE, value); +} + +Status +Config::SetWalConfigRecoveryErrorIgnore(const std::string& value) { + STATUS_CHECK(CheckWalConfigRecoveryErrorIgnore(value)); + return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_RECOVERY_ERROR_IGNORE, value); +} + +Status +Config::SetWalConfigBufferSize(const std::string& value) { + STATUS_CHECK(CheckWalConfigBufferSize(value)); + return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_BUFFER_SIZE, value); +} + +Status +Config::SetWalConfigWalPath(const std::string& value) { + STATUS_CHECK(CheckWalConfigWalPath(value)); + return SetConfigValueInMem(CONFIG_WAL, CONFIG_WAL_WAL_PATH, value); +} + +/* logs config */ +Status +Config::SetLogsTraceEnable(const std::string& value) { + STATUS_CHECK(CheckLogsTraceEnable(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_TRACE_ENABLE, value); +} + +Status +Config::SetLogsDebugEnable(const std::string& value) { + STATUS_CHECK(CheckLogsDebugEnable(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_DEBUG_ENABLE, value); +} + +Status +Config::SetLogsInfoEnable(const std::string& value) { + STATUS_CHECK(CheckLogsInfoEnable(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_INFO_ENABLE, value); +} + +Status +Config::SetLogsWarningEnable(const std::string& value) { + STATUS_CHECK(CheckLogsWarningEnable(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_WARNING_ENABLE, value); +} + +Status +Config::SetLogsErrorEnable(const std::string& value) { + STATUS_CHECK(CheckLogsErrorEnable(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_ERROR_ENABLE, value); +} + +Status +Config::SetLogsFatalEnable(const std::string& value) { + STATUS_CHECK(CheckLogsFatalEnable(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_FATAL_ENABLE, value); +} + +Status +Config::SetLogsPath(const std::string& value) { + STATUS_CHECK(CheckLogsPath(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_PATH, value); +} + +Status +Config::SetLogsMaxLogFileSize(const std::string& value) { + STATUS_CHECK(CheckLogsMaxLogFileSize(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_MAX_LOG_FILE_SIZE, value); +} + +Status +Config::SetLogsDeleteExceeds(const std::string& value) { + STATUS_CHECK(CheckLogsDeleteExceeds(value)); + return SetConfigValueInMem(CONFIG_LOGS, CONFIG_LOGS_DELETE_EXCEEDS, value); +} + } // namespace server } // namespace milvus diff --git a/core/src/config/Config.h b/core/src/config/Config.h index 6bdd28f471..921b7193ef 100644 --- a/core/src/config/Config.h +++ b/core/src/config/Config.h @@ -25,14 +25,6 @@ namespace server { using ConfigCallBackF = std::function; -#define CONFIG_CHECK(func) \ - do { \ - Status s = func; \ - if (!s.ok()) { \ - return s; \ - } \ - } while (false) - extern const char* CONFIG_NODE_DELIMITER; extern const char* CONFIG_VERSION; @@ -70,18 +62,19 @@ extern const char* CONFIG_STORAGE_PRIMARY_PATH; extern const char* CONFIG_STORAGE_PRIMARY_PATH_DEFAULT; extern const char* CONFIG_STORAGE_SECONDARY_PATH; extern const char* CONFIG_STORAGE_SECONDARY_PATH_DEFAULT; -extern const char* CONFIG_STORAGE_S3_ENABLE; -extern const char* CONFIG_STORAGE_S3_ENABLE_DEFAULT; -extern const char* CONFIG_STORAGE_S3_ADDRESS; -extern const char* CONFIG_STORAGE_S3_ADDRESS_DEFAULT; -extern const char* CONFIG_STORAGE_S3_PORT; -extern const char* CONFIG_STORAGE_S3_PORT_DEFAULT; -extern const char* CONFIG_STORAGE_S3_ACCESS_KEY; -extern const char* CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT; -extern const char* CONFIG_STORAGE_S3_SECRET_KEY; -extern const char* CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT; -extern const char* CONFIG_STORAGE_S3_BUCKET; -extern const char* CONFIG_STORAGE_S3_BUCKET_DEFAULT; +extern const char* CONFIG_STORAGE_FILE_CLEANUP_TIMEOUT; +// extern const char* CONFIG_STORAGE_S3_ENABLE; +// extern const char* CONFIG_STORAGE_S3_ENABLE_DEFAULT; +// extern const char* CONFIG_STORAGE_S3_ADDRESS; +// extern const char* CONFIG_STORAGE_S3_ADDRESS_DEFAULT; +// extern const char* CONFIG_STORAGE_S3_PORT; +// extern const char* CONFIG_STORAGE_S3_PORT_DEFAULT; +// extern const char* CONFIG_STORAGE_S3_ACCESS_KEY; +// extern const char* CONFIG_STORAGE_S3_ACCESS_KEY_DEFAULT; +// extern const char* CONFIG_STORAGE_S3_SECRET_KEY; +// extern const char* CONFIG_STORAGE_S3_SECRET_KEY_DEFAULT; +// extern const char* CONFIG_STORAGE_S3_BUCKET; +// extern const char* CONFIG_STORAGE_S3_BUCKET_DEFAULT; /* cache config */ extern const char* CONFIG_CACHE; @@ -117,11 +110,7 @@ extern const char* CONFIG_ENGINE_GPU_SEARCH_THRESHOLD_DEFAULT; /* gpu resource config */ extern const char* CONFIG_GPU_RESOURCE; extern const char* CONFIG_GPU_RESOURCE_ENABLE; -#ifdef MILVUS_GPU_VERSION extern const char* CONFIG_GPU_RESOURCE_ENABLE_DEFAULT; -#else -extern const char* CONFIG_GPU_RESOURCE_ENABLE_DEFAULT; -#endif extern const char* CONFIG_GPU_RESOURCE_CACHE_CAPACITY; extern const char* CONFIG_GPU_RESOURCE_CACHE_CAPACITY_DEFAULT; extern const char* CONFIG_GPU_RESOURCE_CACHE_THRESHOLD; @@ -256,17 +245,19 @@ class Config { Status CheckStorageConfigSecondaryPath(const std::string& value); Status - CheckStorageConfigS3Enable(const std::string& value); - Status - CheckStorageConfigS3Address(const std::string& value); - Status - CheckStorageConfigS3Port(const std::string& value); - Status - CheckStorageConfigS3AccessKey(const std::string& value); - Status - CheckStorageConfigS3SecretKey(const std::string& value); - Status - CheckStorageConfigS3Bucket(const std::string& value); + CheckStorageConfigFileCleanupTimeout(const std::string& value); + // Status + // CheckStorageConfigS3Enable(const std::string& value); + // Status + // CheckStorageConfigS3Address(const std::string& value); + // Status + // CheckStorageConfigS3Port(const std::string& value); + // Status + // CheckStorageConfigS3AccessKey(const std::string& value); + // Status + // CheckStorageConfigS3SecretKey(const std::string& value); + // Status + // CheckStorageConfigS3Bucket(const std::string& value); /* metric config */ Status @@ -389,17 +380,19 @@ class Config { Status GetStorageConfigSecondaryPath(std::string& value); Status - GetStorageConfigS3Enable(bool& value); - Status - GetStorageConfigS3Address(std::string& value); - Status - GetStorageConfigS3Port(std::string& value); - Status - GetStorageConfigS3AccessKey(std::string& value); - Status - GetStorageConfigS3SecretKey(std::string& value); - Status - GetStorageConfigS3Bucket(std::string& value); + GetStorageConfigFileCleanupTimeup(int64_t& value); + // Status + // GetStorageConfigS3Enable(bool& value); + // Status + // GetStorageConfigS3Address(std::string& value); + // Status + // GetStorageConfigS3Port(std::string& value); + // Status + // GetStorageConfigS3AccessKey(std::string& value); + // Status + // GetStorageConfigS3SecretKey(std::string& value); + // Status + // GetStorageConfigS3Bucket(std::string& value); /* metric config */ Status @@ -514,17 +507,19 @@ class Config { Status SetStorageConfigSecondaryPath(const std::string& value); Status - SetStorageConfigS3Enable(const std::string& value); - Status - SetStorageConfigS3Address(const std::string& value); - Status - SetStorageConfigS3Port(const std::string& value); - Status - SetStorageConfigS3AccessKey(const std::string& value); - Status - SetStorageConfigS3SecretKey(const std::string& value); - Status - SetStorageConfigS3Bucket(const std::string& value); + SetStorageConfigFileCleanupTimeout(const std::string& value); + // Status + // SetStorageConfigS3Enable(const std::string& value); + // Status + // SetStorageConfigS3Address(const std::string& value); + // Status + // SetStorageConfigS3Port(const std::string& value); + // Status + // SetStorageConfigS3AccessKey(const std::string& value); + // Status + // SetStorageConfigS3SecretKey(const std::string& value); + // Status + // SetStorageConfigS3Bucket(const std::string& value); /* metric config */ Status @@ -551,6 +546,22 @@ class Config { SetEngineConfigOmpThreadNum(const std::string& value); Status SetEngineConfigSimdType(const std::string& value); +#ifdef MILVUS_GPU_VERSION + Status + SetEngineConfigGpuSearchThreshold(const std::string& value); + + /* gpu resource config */ + Status + SetGpuResourceConfigEnable(const std::string& value); + Status + SetGpuResourceConfigCacheCapacity(const std::string& value); + Status + SetGpuResourceConfigCacheThreshold(const std::string& value); + Status + SetGpuResourceConfigSearchResources(const std::string& value); + Status + SetGpuResourceConfigBuildIndexResources(const std::string& value); +#endif /* tracing config */ Status @@ -586,23 +597,6 @@ class Config { Status SetLogsDeleteExceeds(const std::string& value); -#ifdef MILVUS_GPU_VERSION - Status - SetEngineConfigGpuSearchThreshold(const std::string& value); - - /* gpu resource config */ - Status - SetGpuResourceConfigEnable(const std::string& value); - Status - SetGpuResourceConfigCacheCapacity(const std::string& value); - Status - SetGpuResourceConfigCacheThreshold(const std::string& value); - Status - SetGpuResourceConfigSearchResources(const std::string& value); - Status - SetGpuResourceConfigBuildIndexResources(const std::string& value); -#endif - private: bool restart_required_ = false; std::string config_file_; diff --git a/core/src/db/DB.h b/core/src/db/DB.h index 94057ccb5d..3cc00d5eab 100644 --- a/core/src/db/DB.h +++ b/core/src/db/DB.h @@ -56,10 +56,10 @@ class DB { DescribeCollection(meta::CollectionSchema& table_schema_) = 0; virtual Status - HasCollection(const std::string& collection_id, bool& has_or_not_) = 0; + HasCollection(const std::string& collection_id, bool& has_or_not) = 0; virtual Status - HasNativeCollection(const std::string& collection_id, bool& has_or_not_) = 0; + HasNativeCollection(const std::string& collection_id, bool& has_or_not) = 0; virtual Status AllCollections(std::vector& table_schema_array) = 0; diff --git a/core/src/db/DBImpl.cpp b/core/src/db/DBImpl.cpp index 7216c3693e..b0abd4abec 100644 --- a/core/src/db/DBImpl.cpp +++ b/core/src/db/DBImpl.cpp @@ -31,6 +31,7 @@ #include "cache/CpuCacheMgr.h" #include "cache/GpuCacheMgr.h" #include "db/IDGenerator.h" +#include "db/merge/MergeManagerFactory.h" #include "engine/EngineFactory.h" #include "index/thirdparty/faiss/utils/distances.h" #include "insert/MemManagerFactory.h" @@ -78,6 +79,7 @@ DBImpl::DBImpl(const DBOptions& options) : options_(options), initialized_(false), merge_thread_pool_(1, 1), index_thread_pool_(1, 1) { meta_ptr_ = MetaFactory::Build(options.meta_, options.mode_); mem_mgr_ = MemManagerFactory::Build(meta_ptr_, options_); + merge_mgr_ptr_ = MergeManagerFactory::Build(meta_ptr_, options_); if (options_.wal_enable_) { wal::MXLogConfiguration mxlog_config; @@ -275,30 +277,16 @@ DBImpl::HasCollection(const std::string& collection_id, bool& has_or_not) { return SHUTDOWN_ERROR; } - return meta_ptr_->HasCollection(collection_id, has_or_not); + return meta_ptr_->HasCollection(collection_id, has_or_not, false); } Status -DBImpl::HasNativeCollection(const std::string& collection_id, bool& has_or_not_) { +DBImpl::HasNativeCollection(const std::string& collection_id, bool& has_or_not) { if (!initialized_.load(std::memory_order_acquire)) { return SHUTDOWN_ERROR; } - engine::meta::CollectionSchema collection_schema; - collection_schema.collection_id_ = collection_id; - auto status = DescribeCollection(collection_schema); - if (!status.ok()) { - has_or_not_ = false; - return status; - } else { - if (!collection_schema.owner_collection_.empty()) { - has_or_not_ = false; - return Status(DB_NOT_FOUND, ""); - } - - has_or_not_ = true; - return Status::OK(); - } + return meta_ptr_->HasCollection(collection_id, has_or_not, true); } Status @@ -1920,101 +1908,6 @@ DBImpl::StartMergeTask() { // LOG_ENGINE_DEBUG_ << "End StartMergeTask"; } -Status -DBImpl::MergeFiles(const std::string& collection_id, meta::FilesHolder& files_holder) { - // const std::lock_guard lock(flush_merge_compact_mutex_); - - LOG_ENGINE_DEBUG_ << "Merge files for collection: " << collection_id; - - // step 1: create collection file - meta::SegmentSchema collection_file; - collection_file.collection_id_ = collection_id; - collection_file.file_type_ = meta::SegmentSchema::NEW_MERGE; - Status status = meta_ptr_->CreateCollectionFile(collection_file); - if (!status.ok()) { - LOG_ENGINE_ERROR_ << "Failed to create collection: " << status.ToString(); - return status; - } - - // step 2: merge files - /* - ExecutionEnginePtr index = - EngineFactory::Build(collection_file.dimension_, collection_file.location_, - (EngineType)collection_file.engine_type_, (MetricType)collection_file.metric_type_, collection_file.nlist_); -*/ - meta::SegmentsSchema updated; - - std::string new_segment_dir; - utils::GetParentPath(collection_file.location_, new_segment_dir); - auto segment_writer_ptr = std::make_shared(new_segment_dir); - - // attention: here is a copy, not reference, since files_holder.UnmarkFile will change the array internal - milvus::engine::meta::SegmentsSchema files = files_holder.HoldFiles(); - for (auto& file : files) { - server::CollectMergeFilesMetrics metrics; - std::string segment_dir_to_merge; - utils::GetParentPath(file.location_, segment_dir_to_merge); - segment_writer_ptr->Merge(segment_dir_to_merge, collection_file.file_id_); - - files_holder.UnmarkFile(file); - - auto file_schema = file; - file_schema.file_type_ = meta::SegmentSchema::TO_DELETE; - updated.push_back(file_schema); - auto size = segment_writer_ptr->Size(); - if (size >= file_schema.index_file_size_) { - break; - } - } - - // step 3: serialize to disk - try { - status = segment_writer_ptr->Serialize(); - fiu_do_on("DBImpl.MergeFiles.Serialize_ThrowException", throw std::exception()); - fiu_do_on("DBImpl.MergeFiles.Serialize_ErrorStatus", status = Status(DB_ERROR, "")); - } catch (std::exception& ex) { - std::string msg = "Serialize merged index encounter exception: " + std::string(ex.what()); - LOG_ENGINE_ERROR_ << msg; - status = Status(DB_ERROR, msg); - } - - if (!status.ok()) { - LOG_ENGINE_ERROR_ << "Failed to persist merged segment: " << new_segment_dir << ". Error: " << status.message(); - - // if failed to serialize merge file to disk - // typical error: out of disk space, out of memory or permission denied - collection_file.file_type_ = meta::SegmentSchema::TO_DELETE; - status = meta_ptr_->UpdateCollectionFile(collection_file); - LOG_ENGINE_DEBUG_ << "Failed to update file to index, mark file: " << collection_file.file_id_ - << " to to_delete"; - - return status; - } - - // step 4: update collection files state - // if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size - // else set file type to RAW, no need to build index - if (!utils::IsRawIndexType(collection_file.engine_type_)) { - collection_file.file_type_ = (segment_writer_ptr->Size() >= collection_file.index_file_size_) - ? meta::SegmentSchema::TO_INDEX - : meta::SegmentSchema::RAW; - } else { - collection_file.file_type_ = meta::SegmentSchema::RAW; - } - collection_file.file_size_ = segment_writer_ptr->Size(); - collection_file.row_count_ = segment_writer_ptr->VectorCount(); - updated.push_back(collection_file); - status = meta_ptr_->UpdateCollectionFiles(updated); - LOG_ENGINE_DEBUG_ << "New merged segment " << collection_file.segment_id_ << " of size " - << segment_writer_ptr->Size() << " bytes"; - - if (options_.insert_cache_immediately_) { - segment_writer_ptr->Cache(); - } - - return status; -} - Status DBImpl::MergeHybridFiles(const std::string& collection_id, meta::FilesHolder& files_holder) { // const std::lock_guard lock(flush_merge_compact_mutex_); @@ -2110,44 +2003,22 @@ DBImpl::MergeHybridFiles(const std::string& collection_id, meta::FilesHolder& fi return status; } -Status -DBImpl::BackgroundMergeFiles(const std::string& collection_id) { - const std::lock_guard lock(flush_merge_compact_mutex_); - - meta::FilesHolder files_holder; - auto status = meta_ptr_->FilesToMerge(collection_id, files_holder); - if (!status.ok()) { - LOG_ENGINE_ERROR_ << "Failed to get merge files for collection: " << collection_id; - return status; - } - - if (files_holder.HoldFiles().size() < options_.merge_trigger_number_) { - LOG_ENGINE_TRACE_ << "Files number not greater equal than merge trigger number, skip merge action"; - return Status::OK(); - } - - MergeFiles(collection_id, files_holder); - - if (!initialized_.load(std::memory_order_acquire)) { - LOG_ENGINE_DEBUG_ << "Server will shutdown, skip merge action for collection: " << collection_id; - } - - return Status::OK(); -} - void DBImpl::BackgroundMerge(std::set collection_ids) { // LOG_ENGINE_TRACE_ << " Background merge thread start"; Status status; for (auto& collection_id : collection_ids) { - status = BackgroundMergeFiles(collection_id); + const std::lock_guard lock(flush_merge_compact_mutex_); + + auto status = merge_mgr_ptr_->MergeFiles(collection_id); if (!status.ok()) { - LOG_ENGINE_ERROR_ << "Merge files for collection " << collection_id << " failed: " << status.ToString(); + LOG_ENGINE_ERROR_ << "Failed to get merge files for collection: " << collection_id + << " reason:" << status.message(); } if (!initialized_.load(std::memory_order_acquire)) { - LOG_ENGINE_DEBUG_ << "Server will shutdown, skip merge action"; + LOG_ENGINE_DEBUG_ << "Server will shutdown, skip merge action for collection: " << collection_id; break; } } @@ -2155,11 +2026,8 @@ DBImpl::BackgroundMerge(std::set collection_ids) { meta_ptr_->Archive(); { - uint64_t ttl = 10 * meta::SECOND; // default: file will be hard-deleted few seconds after soft-deleted - if (options_.mode_ == DBOptions::MODE::CLUSTER_WRITABLE) { - ttl = meta::HOUR; - } - + uint64_t timeout = (options_.file_cleanup_timeout_ > 0) ? options_.file_cleanup_timeout_ : 10; + uint64_t ttl = timeout * meta::SECOND; // default: file will be hard-deleted few seconds after soft-deleted meta_ptr_->CleanUpFilesWithTTL(ttl); } diff --git a/core/src/db/DBImpl.h b/core/src/db/DBImpl.h index 4e3b86fc8c..4843aaaa63 100644 --- a/core/src/db/DBImpl.h +++ b/core/src/db/DBImpl.h @@ -29,6 +29,7 @@ #include "db/IndexFailedChecker.h" #include "db/Types.h" #include "db/insert/MemManager.h" +#include "db/merge/MergeManager.h" #include "db/meta/FilesHolder.h" #include "utils/ThreadPool.h" #include "wal/WalManager.h" @@ -226,12 +227,6 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi void StartMergeTask(); - Status - MergeFiles(const std::string& collection_id, meta::FilesHolder& files_holder); - - Status - BackgroundMergeFiles(const std::string& collection_id); - void BackgroundMerge(std::set collection_ids); @@ -290,6 +285,7 @@ class DBImpl : public DB, public server::CacheConfigHandler, public server::Engi meta::MetaPtr meta_ptr_; MemManagerPtr mem_mgr_; + MergeManagerPtr merge_mgr_ptr_; std::shared_ptr wal_mgr_; std::thread bg_wal_thread_; diff --git a/core/src/db/Options.h b/core/src/db/Options.h index 473b18614d..b446d049ec 100644 --- a/core/src/db/Options.h +++ b/core/src/db/Options.h @@ -72,6 +72,7 @@ struct DBOptions { bool insert_cache_immediately_ = false; int64_t auto_flush_interval_ = 1; + int64_t file_cleanup_timeout_ = 10; // wal relative configurations bool wal_enable_ = true; diff --git a/core/src/db/Utils.cpp b/core/src/db/Utils.cpp index 10bcdcd743..f7327f4096 100644 --- a/core/src/db/Utils.cpp +++ b/core/src/db/Utils.cpp @@ -158,15 +158,15 @@ GetCollectionFilePath(const DBMetaOptions& options, meta::SegmentSchema& table_f std::string parent_path = ConstructParentFolder(options.path_, table_file); std::string file_path = parent_path + "/" + table_file.file_id_; - bool s3_enable = false; - server::Config& config = server::Config::GetInstance(); - config.GetStorageConfigS3Enable(s3_enable); - fiu_do_on("GetCollectionFilePath.enable_s3", s3_enable = true); - if (s3_enable) { - /* need not check file existence */ - table_file.location_ = file_path; - return Status::OK(); - } + // bool s3_enable = false; + // server::Config& config = server::Config::GetInstance(); + // config.GetStorageConfigS3Enable(s3_enable); + // fiu_do_on("GetCollectionFilePath.enable_s3", s3_enable = true); + // if (s3_enable) { + // /* need not check file existence */ + // table_file.location_ = file_path; + // return Status::OK(); + // } if (boost::filesystem::exists(parent_path)) { table_file.location_ = file_path; diff --git a/core/src/db/engine/ExecutionEngineImpl.cpp b/core/src/db/engine/ExecutionEngineImpl.cpp index ecf8f007e6..ffe144dabb 100644 --- a/core/src/db/engine/ExecutionEngineImpl.cpp +++ b/core/src/db/engine/ExecutionEngineImpl.cpp @@ -465,7 +465,7 @@ ExecutionEngineImpl::Load(bool to_cache) { bool gpu_enable = false; #ifdef MILVUS_GPU_VERSION server::Config& config = server::Config::GetInstance(); - CONFIG_CHECK(config.GetGpuResourceConfigEnable(gpu_enable)); + STATUS_CHECK(config.GetGpuResourceConfigEnable(gpu_enable)); #endif if (!gpu_enable && index_->index_mode() == knowhere::IndexMode::MODE_GPU) { std::string err_msg = "Index with type " + index_->index_type() + " must be used in GPU mode"; diff --git a/core/src/db/merge/MergeLayeredStrategy.cpp b/core/src/db/merge/MergeLayeredStrategy.cpp new file mode 100644 index 0000000000..dddd114a89 --- /dev/null +++ b/core/src/db/merge/MergeLayeredStrategy.cpp @@ -0,0 +1,138 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#include "db/merge/MergeLayeredStrategy.h" +#include "db/Utils.h" +#include "db/meta/MetaConsts.h" +#include "utils/Log.h" + +#include +#include + +namespace milvus { +namespace engine { + +Status +MergeLayeredStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) { + using LayerGroups = std::map; + // distribute files to groups according to file size(in byte) + LayerGroups layers = { + {1UL << 22, meta::SegmentsSchema()}, // 4MB + {1UL << 24, meta::SegmentsSchema()}, // 16MB + {1UL << 26, meta::SegmentsSchema()}, // 64MB + {1UL << 28, meta::SegmentsSchema()}, // 256MB + {1UL << 30, meta::SegmentsSchema()}, // 1GB + }; + + meta::SegmentsSchema& files = files_holder.HoldFiles(); + meta::SegmentsSchema huge_files; + // iterater from end, because typically the files_holder get files in order from largest to smallest + for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) { + meta::SegmentSchema& file = *iter; + if (file.index_file_size_ > 0 && file.file_size_ > file.index_file_size_) { + // release file that no need to merge + files_holder.UnmarkFile(file); + continue; + } + + bool match = false; + for (auto& pair : layers) { + if ((*iter).file_size_ < pair.first) { + pair.second.push_back(file); + match = true; + break; + } + } + + if (!match) { + huge_files.push_back(file); + } + } + + const int64_t force_merge_threashold = 60; // force merge files older than 1 minute + auto now = utils::GetMicroSecTimeStamp(); + meta::SegmentsSchema force_merge_file; + for (auto& pair : layers) { + // skip empty layer + if (pair.second.empty()) { + continue; + } + + // layer has multiple files, merge along with the force_merge_file + if (!force_merge_file.empty()) { + for (auto& file : force_merge_file) { + pair.second.push_back(file); + } + force_merge_file.clear(); + } + + // layer only has one file, if the file is too old, force merge it, else no need to merge it + if (pair.second.size() == 1) { + if (now - pair.second[0].created_on_ > force_merge_threashold * meta::US_PS) { + force_merge_file.push_back(pair.second[0]); + pair.second.clear(); + } + } + } + + // if force_merge_file is not allocated by any layer, combine it to huge_files + if (!force_merge_file.empty() && !huge_files.empty()) { + for (auto& file : force_merge_file) { + huge_files.push_back(file); + } + force_merge_file.clear(); + } + + // return result + for (auto& pair : layers) { + if (pair.second.size() == 1) { + // release file that no need to merge + files_holder.UnmarkFile(pair.second[0]); + } else if (pair.second.size() > 1) { + // create group + meta::SegmentsSchema temp_files; + temp_files.swap(pair.second); + files_groups.emplace_back(temp_files); + } + } + + if (huge_files.size() >= 1) { + meta::SegmentsSchema temp_files; + temp_files.swap(huge_files); + for (auto& file : force_merge_file) { + temp_files.push_back(file); + } + + if (temp_files.size() >= 2) { + // create group + files_groups.emplace_back(temp_files); + } else { + for (auto& file : huge_files) { + // release file that no need to merge + files_holder.UnmarkFile(file); + } + for (auto& file : force_merge_file) { + // release file that no need to merge + files_holder.UnmarkFile(file); + } + } + } else { + for (auto& file : force_merge_file) { + // release file that no need to merge + files_holder.UnmarkFile(file); + } + } + + return Status::OK(); +} + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeLayeredStrategy.h b/core/src/db/merge/MergeLayeredStrategy.h new file mode 100644 index 0000000000..4442d6a359 --- /dev/null +++ b/core/src/db/merge/MergeLayeredStrategy.h @@ -0,0 +1,29 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include + +#include "db/merge/MergeStrategy.h" +#include "utils/Status.h" + +namespace milvus { +namespace engine { + +class MergeLayeredStrategy : public MergeStrategy { + public: + Status + RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) override; +}; // MergeLayeredStrategy + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeManager.h b/core/src/db/merge/MergeManager.h new file mode 100644 index 0000000000..698e64e5be --- /dev/null +++ b/core/src/db/merge/MergeManager.h @@ -0,0 +1,43 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "db/Types.h" +#include "db/meta/FilesHolder.h" +#include "utils/Status.h" + +namespace milvus { +namespace engine { + +enum class MergeStrategyType { + SIMPLE = 1, + LAYERED = 2, +}; + +class MergeManager { + public: + virtual Status + UseStrategy(MergeStrategyType type) = 0; + virtual Status + MergeFiles(const std::string& collection_id) = 0; +}; // MergeManager + +using MergeManagerPtr = std::shared_ptr; + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeManagerFactory.cpp b/core/src/db/merge/MergeManagerFactory.cpp new file mode 100644 index 0000000000..b4e2e430ce --- /dev/null +++ b/core/src/db/merge/MergeManagerFactory.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#include "db/merge/MergeManagerFactory.h" +#include "db/merge/MergeManagerImpl.h" +#include "utils/Exception.h" +#include "utils/Log.h" + +namespace milvus { +namespace engine { + +MergeManagerPtr +MergeManagerFactory::Build(const meta::MetaPtr& meta_ptr, const DBOptions& options) { + return std::make_shared(meta_ptr, options, MergeStrategyType::SIMPLE); +} + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeManagerFactory.h b/core/src/db/merge/MergeManagerFactory.h new file mode 100644 index 0000000000..533a321161 --- /dev/null +++ b/core/src/db/merge/MergeManagerFactory.h @@ -0,0 +1,29 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include "MergeManager.h" +#include "db/Options.h" + +#include + +namespace milvus { +namespace engine { + +class MergeManagerFactory { + public: + static MergeManagerPtr + Build(const meta::MetaPtr& meta_ptr, const DBOptions& options); +}; + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeManagerImpl.cpp b/core/src/db/merge/MergeManagerImpl.cpp new file mode 100644 index 0000000000..3ba9667ab4 --- /dev/null +++ b/core/src/db/merge/MergeManagerImpl.cpp @@ -0,0 +1,89 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#include "db/merge/MergeManagerImpl.h" +#include "db/merge/MergeLayeredStrategy.h" +#include "db/merge/MergeSimpleStrategy.h" +#include "db/merge/MergeStrategy.h" +#include "db/merge/MergeTask.h" +#include "utils/Exception.h" +#include "utils/Log.h" + +namespace milvus { +namespace engine { + +MergeManagerImpl::MergeManagerImpl(const meta::MetaPtr& meta_ptr, const DBOptions& options, MergeStrategyType type) + : meta_ptr_(meta_ptr), options_(options) { + UseStrategy(type); +} + +Status +MergeManagerImpl::UseStrategy(MergeStrategyType type) { + switch (type) { + case MergeStrategyType::SIMPLE: { + strategy_ = std::make_shared(); + break; + } + case MergeStrategyType::LAYERED: { + strategy_ = std::make_shared(); + break; + } + default: { + std::string msg = "Unsupported merge strategy type: " + std::to_string((int32_t)type); + LOG_ENGINE_ERROR_ << msg; + throw Exception(DB_ERROR, msg); + } + } + + return Status::OK(); +} + +Status +MergeManagerImpl::MergeFiles(const std::string& collection_id) { + if (strategy_ == nullptr) { + std::string msg = "No merge strategy specified"; + LOG_ENGINE_ERROR_ << msg; + return Status(DB_ERROR, msg); + } + + meta::FilesHolder files_holder; + auto status = meta_ptr_->FilesToMerge(collection_id, files_holder); + if (!status.ok()) { + LOG_ENGINE_ERROR_ << "Failed to get merge files for collection: " << collection_id; + return status; + } + + if (files_holder.HoldFiles().size() < 2) { + return Status::OK(); + } + + MergeFilesGroups files_groups; + status = strategy_->RegroupFiles(files_holder, files_groups); + if (!status.ok()) { + LOG_ENGINE_ERROR_ << "Failed to regroup files for: " << collection_id + << ", continue to merge all files into one"; + + MergeTask task(meta_ptr_, options_, files_holder.HoldFiles()); + return task.Execute(); + } + + for (auto& group : files_groups) { + MergeTask task(meta_ptr_, options_, files_holder.HoldFiles()); + status = task.Execute(); + + files_holder.UnmarkFiles(group); + } + + return status; +} + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeManagerImpl.h b/core/src/db/merge/MergeManagerImpl.h new file mode 100644 index 0000000000..257bf10014 --- /dev/null +++ b/core/src/db/merge/MergeManagerImpl.h @@ -0,0 +1,48 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "db/merge/MergeManager.h" +#include "db/merge/MergeStrategy.h" +#include "utils/Status.h" + +namespace milvus { +namespace engine { + +class MergeManagerImpl : public MergeManager { + public: + MergeManagerImpl(const meta::MetaPtr& meta_ptr, const DBOptions& options, MergeStrategyType type); + + Status + UseStrategy(MergeStrategyType type) override; + + Status + MergeFiles(const std::string& collection_id) override; + + private: + meta::MetaPtr meta_ptr_; + DBOptions options_; + + MergeStrategyPtr strategy_; +}; // MergeManagerImpl + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeSimpleStrategy.cpp b/core/src/db/merge/MergeSimpleStrategy.cpp new file mode 100644 index 0000000000..825d0d4bbe --- /dev/null +++ b/core/src/db/merge/MergeSimpleStrategy.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#include "db/merge/MergeSimpleStrategy.h" +#include "utils/Log.h" + +namespace milvus { +namespace engine { + +Status +MergeSimpleStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) { + files_groups.push_back(files_holder.HoldFiles()); + return Status::OK(); +} + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeSimpleStrategy.h b/core/src/db/merge/MergeSimpleStrategy.h new file mode 100644 index 0000000000..3d6406ca29 --- /dev/null +++ b/core/src/db/merge/MergeSimpleStrategy.h @@ -0,0 +1,29 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include + +#include "db/merge/MergeStrategy.h" +#include "utils/Status.h" + +namespace milvus { +namespace engine { + +class MergeSimpleStrategy : public MergeStrategy { + public: + Status + RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) override; +}; // MergeSimpleStrategy + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeStrategy.h b/core/src/db/merge/MergeStrategy.h new file mode 100644 index 0000000000..cb00babb25 --- /dev/null +++ b/core/src/db/merge/MergeStrategy.h @@ -0,0 +1,38 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "db/Types.h" +#include "db/meta/FilesHolder.h" +#include "utils/Status.h" + +namespace milvus { +namespace engine { + +using MergeFilesGroups = std::vector; + +class MergeStrategy { + public: + virtual Status + RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) = 0; +}; // MergeStrategy + +using MergeStrategyPtr = std::shared_ptr; + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeTask.cpp b/core/src/db/merge/MergeTask.cpp new file mode 100644 index 0000000000..113159593a --- /dev/null +++ b/core/src/db/merge/MergeTask.cpp @@ -0,0 +1,128 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#include "db/merge/MergeTask.h" +#include "db/Utils.h" +#include "metrics/Metrics.h" +#include "segment/SegmentReader.h" +#include "segment/SegmentWriter.h" +#include "utils/Log.h" + +#include +#include + +namespace milvus { +namespace engine { + +MergeTask::MergeTask(const meta::MetaPtr& meta_ptr, const DBOptions& options, meta::SegmentsSchema& files) + : meta_ptr_(meta_ptr), options_(options), files_(files) { +} + +Status +MergeTask::Execute() { + if (files_.empty()) { + return Status::OK(); + } + + // check input + std::string collection_id = files_.front().collection_id_; + for (auto& file : files_) { + if (file.collection_id_ != collection_id) { + return Status(DB_ERROR, "Cannot merge files across collections"); + } + } + + // step 1: create collection file + meta::SegmentSchema collection_file; + collection_file.collection_id_ = collection_id; + collection_file.file_type_ = meta::SegmentSchema::NEW_MERGE; + Status status = meta_ptr_->CreateCollectionFile(collection_file); + if (!status.ok()) { + LOG_ENGINE_ERROR_ << "Failed to create collection: " << status.ToString(); + return status; + } + + // step 2: merge files + meta::SegmentsSchema updated; + + std::string new_segment_dir; + utils::GetParentPath(collection_file.location_, new_segment_dir); + auto segment_writer_ptr = std::make_shared(new_segment_dir); + + // attention: here is a copy, not reference, since files_holder.UnmarkFile will change the array internal + std::string info = "Merge task files size info:"; + for (auto& file : files_) { + info += std::to_string(file.file_size_); + info += ", "; + + server::CollectMergeFilesMetrics metrics; + std::string segment_dir_to_merge; + utils::GetParentPath(file.location_, segment_dir_to_merge); + segment_writer_ptr->Merge(segment_dir_to_merge, collection_file.file_id_); + + auto file_schema = file; + file_schema.file_type_ = meta::SegmentSchema::TO_DELETE; + updated.push_back(file_schema); + auto size = segment_writer_ptr->Size(); + if (size >= file_schema.index_file_size_) { + break; + } + } + LOG_ENGINE_DEBUG_ << info; + + // step 3: serialize to disk + try { + status = segment_writer_ptr->Serialize(); + } catch (std::exception& ex) { + std::string msg = "Serialize merged index encounter exception: " + std::string(ex.what()); + LOG_ENGINE_ERROR_ << msg; + status = Status(DB_ERROR, msg); + } + + if (!status.ok()) { + LOG_ENGINE_ERROR_ << "Failed to persist merged segment: " << new_segment_dir << ". Error: " << status.message(); + + // if failed to serialize merge file to disk + // typical error: out of disk space, out of memory or permission denied + collection_file.file_type_ = meta::SegmentSchema::TO_DELETE; + status = meta_ptr_->UpdateCollectionFile(collection_file); + LOG_ENGINE_DEBUG_ << "Failed to update file to index, mark file: " << collection_file.file_id_ + << " to to_delete"; + + return status; + } + + // step 4: update collection files state + // if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size + // else set file type to RAW, no need to build index + if (!utils::IsRawIndexType(collection_file.engine_type_)) { + collection_file.file_type_ = (segment_writer_ptr->Size() >= collection_file.index_file_size_) + ? meta::SegmentSchema::TO_INDEX + : meta::SegmentSchema::RAW; + } else { + collection_file.file_type_ = meta::SegmentSchema::RAW; + } + collection_file.file_size_ = segment_writer_ptr->Size(); + collection_file.row_count_ = segment_writer_ptr->VectorCount(); + updated.push_back(collection_file); + status = meta_ptr_->UpdateCollectionFiles(updated); + LOG_ENGINE_DEBUG_ << "New merged segment " << collection_file.segment_id_ << " of size " + << segment_writer_ptr->Size() << " bytes"; + + if (options_.insert_cache_immediately_) { + segment_writer_ptr->Cache(); + } + + return status; +} + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/merge/MergeTask.h b/core/src/db/merge/MergeTask.h new file mode 100644 index 0000000000..af0933a665 --- /dev/null +++ b/core/src/db/merge/MergeTask.h @@ -0,0 +1,36 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#pragma once + +#include "db/merge/MergeManager.h" +#include "db/meta/MetaTypes.h" +#include "utils/Status.h" + +namespace milvus { +namespace engine { + +class MergeTask { + public: + MergeTask(const meta::MetaPtr& meta, const DBOptions& options, meta::SegmentsSchema& files); + + Status + Execute(); + + private: + meta::MetaPtr meta_ptr_; + DBOptions options_; + + meta::SegmentsSchema files_; +}; // MergeTask + +} // namespace engine +} // namespace milvus diff --git a/core/src/db/meta/Meta.h b/core/src/db/meta/Meta.h index 1bea3af863..a010c037b1 100644 --- a/core/src/db/meta/Meta.h +++ b/core/src/db/meta/Meta.h @@ -55,7 +55,7 @@ class Meta { DescribeCollection(CollectionSchema& table_schema) = 0; virtual Status - HasCollection(const std::string& collection_id, bool& has_or_not) = 0; + HasCollection(const std::string& collection_id, bool& has_or_not, bool is_root = false) = 0; virtual Status AllCollections(std::vector& table_schema_array) = 0; diff --git a/core/src/db/meta/MySQLMetaImpl.cpp b/core/src/db/meta/MySQLMetaImpl.cpp index 18d61e4e1c..7b35f92ded 100644 --- a/core/src/db/meta/MySQLMetaImpl.cpp +++ b/core/src/db/meta/MySQLMetaImpl.cpp @@ -541,7 +541,7 @@ MySQLMetaImpl::DescribeCollection(CollectionSchema& collection_schema) { } Status -MySQLMetaImpl::HasCollection(const std::string& collection_id, bool& has_or_not) { +MySQLMetaImpl::HasCollection(const std::string& collection_id, bool& has_or_not, bool is_root) { try { server::MetricCollector metric; mysqlpp::StoreQueryResult res; @@ -557,20 +557,23 @@ MySQLMetaImpl::HasCollection(const std::string& collection_id, bool& has_or_not) mysqlpp::Query HasCollectionQuery = connectionPtr->query(); // since collection_id is a unique column we just need to check whether it exists or not - HasCollectionQuery << "SELECT EXISTS" - << " (SELECT 1 FROM " << META_TABLES << " WHERE table_id = " << mysqlpp::quote - << collection_id << " AND state <> " << std::to_string(CollectionSchema::TO_DELETE) - << ")" - << " AS " << mysqlpp::quote << "check" - << ";"; + if (is_root) { + HasCollectionQuery << "SELECT id FROM " << META_TABLES << " WHERE table_id = " << mysqlpp::quote + << collection_id << " AND state <> " << std::to_string(CollectionSchema::TO_DELETE) + << " AND owner_table = " << mysqlpp::quote << "" + << ";"; + } else { + HasCollectionQuery << "SELECT id FROM " << META_TABLES << " WHERE table_id = " << mysqlpp::quote + << collection_id << " AND state <> " << std::to_string(CollectionSchema::TO_DELETE) + << ";"; + } LOG_ENGINE_DEBUG_ << "HasCollection: " << HasCollectionQuery.str(); res = HasCollectionQuery.store(); } // Scoped Connection - int check = res[0]["check"]; - has_or_not = (check == 1); + has_or_not = (res.num_rows() > 0); } catch (std::exception& e) { return HandleException("Failed to check collection existence", e.what()); } @@ -2505,7 +2508,8 @@ MySQLMetaImpl::DropAll() { } mysqlpp::Query statement = connectionPtr->query(); - statement << "DROP TABLE IF EXISTS " << TABLES_SCHEMA.name() << ", " << TABLEFILES_SCHEMA.name() << ";"; + statement << "DROP TABLE IF EXISTS " << TABLES_SCHEMA.name() << ", " << TABLEFILES_SCHEMA.name() << ", " + << ENVIRONMENT_SCHEMA.name() << ", " << FIELDS_SCHEMA.name() << ";"; LOG_ENGINE_DEBUG_ << "DropAll: " << statement.str(); diff --git a/core/src/db/meta/MySQLMetaImpl.h b/core/src/db/meta/MySQLMetaImpl.h index a6412c97dd..0a1084d8c7 100644 --- a/core/src/db/meta/MySQLMetaImpl.h +++ b/core/src/db/meta/MySQLMetaImpl.h @@ -38,7 +38,7 @@ class MySQLMetaImpl : public Meta { DescribeCollection(CollectionSchema& collection_schema) override; Status - HasCollection(const std::string& collection_id, bool& has_or_not) override; + HasCollection(const std::string& collection_id, bool& has_or_not, bool is_root = false) override; Status AllCollections(std::vector& collection_schema_array) override; diff --git a/core/src/db/meta/SqliteMetaImpl.cpp b/core/src/db/meta/SqliteMetaImpl.cpp index f677fae364..f6b4eae1bd 100644 --- a/core/src/db/meta/SqliteMetaImpl.cpp +++ b/core/src/db/meta/SqliteMetaImpl.cpp @@ -272,7 +272,7 @@ SqliteMetaImpl::DescribeCollection(CollectionSchema& collection_schema) { } Status -SqliteMetaImpl::HasCollection(const std::string& collection_id, bool& has_or_not) { +SqliteMetaImpl::HasCollection(const std::string& collection_id, bool& has_or_not, bool is_root) { has_or_not = false; try { @@ -281,11 +281,21 @@ SqliteMetaImpl::HasCollection(const std::string& collection_id, bool& has_or_not // multi-threads call sqlite update may get exception('bad logic', etc), so we add a lock here std::lock_guard meta_lock(meta_mutex_); - auto collections = ConnectorPtr->select( - columns(&CollectionSchema::id_), - where(c(&CollectionSchema::collection_id_) == collection_id - and c(&CollectionSchema::state_) != (int)CollectionSchema::TO_DELETE)); - if (collections.size() == 1) { + + auto select_columns = columns(&CollectionSchema::id_, &CollectionSchema::owner_collection_); + decltype(ConnectorPtr->select(select_columns)) selected; + if (is_root) { + selected = ConnectorPtr->select(select_columns, + where(c(&CollectionSchema::collection_id_) == collection_id + and c(&CollectionSchema::state_) != (int)CollectionSchema::TO_DELETE + and c(&CollectionSchema::owner_collection_) == "")); + } else { + selected = ConnectorPtr->select(select_columns, + where(c(&CollectionSchema::collection_id_) == collection_id + and c(&CollectionSchema::state_) != (int)CollectionSchema::TO_DELETE)); + } + + if (selected.size() == 1) { has_or_not = true; } else { has_or_not = false; @@ -1742,6 +1752,8 @@ SqliteMetaImpl::DropAll() { try { ConnectorPtr->drop_table(META_TABLES); ConnectorPtr->drop_table(META_TABLEFILES); + ConnectorPtr->drop_table(META_ENVIRONMENT); + ConnectorPtr->drop_table(META_FIELDS); } catch (std::exception& e) { return HandleException("Encounter exception when drop all meta", e.what()); } diff --git a/core/src/db/meta/SqliteMetaImpl.h b/core/src/db/meta/SqliteMetaImpl.h index 67a0e688b0..82b30861b9 100644 --- a/core/src/db/meta/SqliteMetaImpl.h +++ b/core/src/db/meta/SqliteMetaImpl.h @@ -40,7 +40,7 @@ class SqliteMetaImpl : public Meta { DescribeCollection(CollectionSchema& collection_schema) override; Status - HasCollection(const std::string& collection_id, bool& has_or_not) override; + HasCollection(const std::string& collection_id, bool& has_or_not, bool is_root = false) override; Status AllCollections(std::vector& collection_schema_array) override; diff --git a/core/src/index/archive/KnowhereResource.cpp b/core/src/index/archive/KnowhereResource.cpp index 698a3f7649..4615aff8af 100644 --- a/core/src/index/archive/KnowhereResource.cpp +++ b/core/src/index/archive/KnowhereResource.cpp @@ -36,7 +36,7 @@ Status KnowhereResource::Initialize() { server::Config& config = server::Config::GetInstance(); std::string simd_type; - CONFIG_CHECK(config.GetEngineConfigSimdType(simd_type)); + STATUS_CHECK(config.GetEngineConfigSimdType(simd_type)); if (simd_type == "avx512") { faiss::faiss_use_avx512 = true; faiss::faiss_use_avx2 = false; @@ -64,7 +64,7 @@ KnowhereResource::Initialize() { #ifdef MILVUS_GPU_VERSION bool enable_gpu = false; - CONFIG_CHECK(config.GetGpuResourceConfigEnable(enable_gpu)); + STATUS_CHECK(config.GetGpuResourceConfigEnable(enable_gpu)); fiu_do_on("KnowhereResource.Initialize.disable_gpu", enable_gpu = false); if (not enable_gpu) return Status::OK(); @@ -79,7 +79,7 @@ KnowhereResource::Initialize() { // get build index gpu resource std::vector build_index_gpus; - CONFIG_CHECK(config.GetGpuResourceConfigBuildIndexResources(build_index_gpus)); + STATUS_CHECK(config.GetGpuResourceConfigBuildIndexResources(build_index_gpus)); for (auto gpu_id : build_index_gpus) { gpu_resources.insert(std::make_pair(gpu_id, GpuResourceSetting())); @@ -87,7 +87,7 @@ KnowhereResource::Initialize() { // get search gpu resource std::vector search_gpus; - CONFIG_CHECK(config.GetGpuResourceConfigSearchResources(search_gpus)); + STATUS_CHECK(config.GetGpuResourceConfigSearchResources(search_gpus)); for (auto& gpu_id : search_gpus) { gpu_resources.insert(std::make_pair(gpu_id, GpuResourceSetting())); diff --git a/core/src/index/cmake/FindFAISS.cmake b/core/src/index/cmake/FindFAISS.cmake index 2d61592fcb..3aa5f4e6e4 100644 --- a/core/src/index/cmake/FindFAISS.cmake +++ b/core/src/index/cmake/FindFAISS.cmake @@ -38,7 +38,7 @@ if (FAISS_FOUND) set_target_properties( faiss PROPERTIES - INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + INTERFACE_LINK_LIBRARIES ${OpenBLAS_LIBRARIES}) endif () endif () endif () diff --git a/core/src/index/cmake/FindOpenBLAS.cmake b/core/src/index/cmake/FindOpenBLAS.cmake new file mode 100644 index 0000000000..3a7b307998 --- /dev/null +++ b/core/src/index/cmake/FindOpenBLAS.cmake @@ -0,0 +1,91 @@ + +if (OpenBLAS_FOUND) # the git version propose a OpenBLASConfig.cmake + message(STATUS "OpenBLASConfig found") + set(OpenBLAS_INCLUDE_DIR ${OpenBLAS_INCLUDE_DIRS}) +else() + message("OpenBLASConfig not found") + unset(OpenBLAS_DIR CACHE) + set(OpenBLAS_INCLUDE_SEARCH_PATHS + /usr/local/openblas/include + /usr/include + /usr/include/openblas + /usr/include/openblas-base + /usr/local/include + /usr/local/include/openblas + /usr/local/include/openblas-base + /opt/OpenBLAS/include + /usr/local/opt/openblas/include + $ENV{OpenBLAS_HOME} + $ENV{OpenBLAS_HOME}/include + ) + + set(OpenBLAS_LIB_SEARCH_PATHS + /usr/local/openblas/lib + /lib/ + /lib/openblas-base + /lib64/ + /usr/lib + /usr/lib/openblas-base + /usr/lib64 + /usr/local/lib + /usr/local/lib64 + /usr/local/opt/openblas/lib + /opt/OpenBLAS/lib + $ENV{OpenBLAS} + $ENV{OpenBLAS}/lib + $ENV{OpenBLAS_HOME} + $ENV{OpenBLAS_HOME}/lib + ) + set(DEFAULT_OpenBLAS_LIB_PATH + /usr/local/openblas/lib + ${OPENBLAS_PREFIX}/lib) + + message("DEFAULT_OpenBLAS_LIB_PATH: ${DEFAULT_OpenBLAS_LIB_PATH}") + find_path(OpenBLAS_INCLUDE_DIR NAMES openblas_config.h lapacke.h PATHS ${OpenBLAS_INCLUDE_SEARCH_PATHS}) + find_library(OpenBLAS_LIB NAMES openblas PATHS ${DEFAULT_OpenBLAS_LIB_PATH} NO_DEFAULT_PATH) + find_library(OpenBLAS_LIB NAMES openblas PATHS ${OpenBLAS_LIB_SEARCH_PATHS}) + # mostly for debian + find_library(Lapacke_LIB NAMES lapacke PATHS ${DEFAULT_OpenBLAS_LIB_PATH} NO_DEFAULT_PATH) + find_library(Lapacke_LIB NAMES lapacke PATHS ${OpenBLAS_LIB_SEARCH_PATHS}) + + set(OpenBLAS_FOUND ON) + + # Check include files + if(NOT OpenBLAS_INCLUDE_DIR) + set(OpenBLAS_FOUND OFF) + message(STATUS "Could not find OpenBLAS include. Turning OpenBLAS_FOUND off") + else() + message(STATUS "find OpenBLAS include:${OpenBLAS_INCLUDE_DIR} ") + endif() + + # Check libraries + if(NOT OpenBLAS_LIB) + set(OpenBLAS_FOUND OFF) + message(STATUS "Could not find OpenBLAS lib. Turning OpenBLAS_FOUND off") + else() + message(STATUS "find OpenBLAS lib:${OpenBLAS_LIB} ") + endif() + + if (OpenBLAS_FOUND) + set(OpenBLAS_LIBRARIES ${OpenBLAS_LIB}) + STRING(REGEX REPLACE "/libopenblas.so" "" OpenBLAS_LIB_DIR ${OpenBLAS_LIBRARIES}) + message(STATUS "find OpenBLAS libraries:${OpenBLAS_LIBRARIES} ") + if (Lapacke_LIB) + set(OpenBLAS_LIBRARIES ${OpenBLAS_LIBRARIES} ${Lapacke_LIB}) + endif() + if (NOT OpenBLAS_FIND_QUIETLY) + message(STATUS "Found OpenBLAS libraries: ${OpenBLAS_LIBRARIES}") + message(STATUS "Found OpenBLAS include: ${OpenBLAS_INCLUDE_DIR}") + endif() + else() + if (OpenBLAS_FIND_REQUIRED) + message(FATAL_ERROR "Could not find OpenBLAS") + endif() + endif() +endif() + +mark_as_advanced( + OpenBLAS_INCLUDE_DIR + OpenBLAS_LIBRARIES + OpenBLAS_LIB_DIR +) diff --git a/core/src/index/cmake/ThirdPartyPackagesCore.cmake b/core/src/index/cmake/ThirdPartyPackagesCore.cmake index 3da16afe70..9e043e63ab 100644 --- a/core/src/index/cmake/ThirdPartyPackagesCore.cmake +++ b/core/src/index/cmake/ThirdPartyPackagesCore.cmake @@ -10,7 +10,6 @@ # or implied. See the License for the specific language governing permissions and limitations under the License. set(KNOWHERE_THIRDPARTY_DEPENDENCIES - Arrow FAISS GTest @@ -318,18 +317,15 @@ endif () set(OPENBLAS_PREFIX "${INDEX_BINARY_DIR}/openblas_ep-prefix/src/openblas_ep") macro(build_openblas) message(STATUS "Building OpenBLAS-${OPENBLAS_VERSION} from source") - set(OPENBLAS_INCLUDE_DIR "${OPENBLAS_PREFIX}/include") - if (CMAKE_BUILD_TYPE STREQUAL "Release") - set(OPENBLAS_SHARED_LIB - "${OPENBLAS_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas${CMAKE_SHARED_LIBRARY_SUFFIX}") - elseif(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(OPENBLAS_SHARED_LIB - "${OPENBLAS_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas_d${CMAKE_SHARED_LIBRARY_SUFFIX}") - endif() + set(OpenBLAS_INCLUDE_DIR "${OPENBLAS_PREFIX}/include") + set(OpenBLAS_LIB_DIR "${OPENBLAS_PREFIX}/lib") + set(OPENBLAS_SHARED_LIB + "${OPENBLAS_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}openblas${CMAKE_SHARED_LIBRARY_SUFFIX}") set(OPENBLAS_STATIC_LIB "${OPENBLAS_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}") set(OPENBLAS_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} + -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DTARGET=CORE2 @@ -342,7 +338,7 @@ macro(build_openblas) -DINTERFACE64=0 -DNUM_THREADS=128 -DNO_LAPACKE=1 - "-DVERSION=${VERSION}" + "-DVERSION=${OPENBLAS_VERSION}" "-DCMAKE_INSTALL_PREFIX=${OPENBLAS_PREFIX}" -DCMAKE_INSTALL_LIBDIR=lib) @@ -365,21 +361,23 @@ macro(build_openblas) ${OPENBLAS_SHARED_LIB} ${OPENBLAS_STATIC_LIB}) - file(MAKE_DIRECTORY "${OPENBLAS_INCLUDE_DIR}") + file(MAKE_DIRECTORY "${OpenBLAS_INCLUDE_DIR}") add_library(openblas SHARED IMPORTED) set_target_properties( openblas - PROPERTIES IMPORTED_LOCATION "${OPENBLAS_SHARED_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${OPENBLAS_INCLUDE_DIR}") - + PROPERTIES + IMPORTED_LOCATION "${OPENBLAS_SHARED_LIB}" + LIBRARY_OUTPUT_NAME "openblas" + INTERFACE_INCLUDE_DIRECTORIES "${OpenBLAS_INCLUDE_DIR}") add_dependencies(openblas openblas_ep) + get_target_property(OpenBLAS_INCLUDE_DIR openblas INTERFACE_INCLUDE_DIRECTORIES) + set(OpenBLAS_LIBRARIES "${OPENBLAS_SHARED_LIB}") endmacro() if (KNOWHERE_WITH_OPENBLAS) resolve_dependency(OpenBLAS) - get_target_property(OPENBLAS_INCLUDE_DIR openblas INTERFACE_INCLUDE_DIRECTORIES) - include_directories(SYSTEM "${OPENBLAS_INCLUDE_DIR}") - link_directories(SYSTEM ${OPENBLAS_PREFIX}/lib) + include_directories(SYSTEM "${OpenBLAS_INCLUDE_DIR}") + link_directories(SYSTEM "${OpenBLAS_LIB_DIR}") endif() # ---------------------------------------------------------------------- @@ -525,8 +523,13 @@ macro(build_faiss) ) else () message(STATUS "Build Faiss with OpenBlas/LAPACK") - set(FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS} - "LDFLAGS=-L${OPENBLAS_PREFIX}/lib -L${LAPACK_PREFIX}/lib") + if(OpenBLAS_FOUND) + set(FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS} + "LDFLAGS=-L${OpenBLAS_LIB_DIR}") + else() + set(FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS} + "LDFLAGS=-L${OPENBLAS_PREFIX}/lib") + endif() endif () if (KNOWHERE_GPU_VERSION) @@ -577,6 +580,11 @@ macro(build_faiss) ${FAISS_STATIC_LIB}) endif () + if(NOT OpenBLAS_FOUND) + message("add faiss dependencies: openblas_ep") + ExternalProject_Add_StepDependencies(faiss_ep configure openblas_ep) + endif() + file(MAKE_DIRECTORY "${FAISS_INCLUDE_DIR}") add_library(faiss STATIC IMPORTED) @@ -595,11 +603,9 @@ macro(build_faiss) set_target_properties( faiss PROPERTIES -# INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) - INTERFACE_LINK_LIBRARIES "openblas") + INTERFACE_LINK_LIBRARIES "${OpenBLAS_LIBRARIES}") endif () - add_dependencies(faiss faiss_ep) endmacro() diff --git a/core/src/index/knowhere/CMakeLists.txt b/core/src/index/knowhere/CMakeLists.txt index d57f2ff906..28ba1b35df 100644 --- a/core/src/index/knowhere/CMakeLists.txt +++ b/core/src/index/knowhere/CMakeLists.txt @@ -71,7 +71,7 @@ if (FAISS_WITH_MKL) ) else () set(depend_libs ${depend_libs} - ${BLAS_LIBRARIES} + ${OpenBLAS_LIBRARIES} ${LAPACK_LIBRARIES} ) endif () @@ -118,7 +118,7 @@ set(INDEX_INCLUDE_DIRS ${INDEX_SOURCE_DIR}/thirdparty/SPTAG/AnnService # ${ARROW_INCLUDE_DIR} ${FAISS_INCLUDE_DIR} - ${OPENBLAS_INCLUDE_DIR} + ${OpenBLAS_INCLUDE_DIR} ${LAPACK_INCLUDE_DIR} ) diff --git a/core/src/index/thirdparty/faiss/Clustering.cpp b/core/src/index/thirdparty/faiss/Clustering.cpp index 6864b98e26..eb414afa57 100644 --- a/core/src/index/thirdparty/faiss/Clustering.cpp +++ b/core/src/index/thirdparty/faiss/Clustering.cpp @@ -191,7 +191,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) { float err = 0; for (int i = 0; i < niter; i++) { double t0s = getmillisecs(); - index.search (nx, x, 1, dis, assign); + index.assign(nx, x, assign, dis); InterruptCallback::check(); t_search_tot += getmillisecs() - t0s; diff --git a/core/src/index/thirdparty/faiss/Index.cpp b/core/src/index/thirdparty/faiss/Index.cpp index 31da98979c..72d7b76280 100644 --- a/core/src/index/thirdparty/faiss/Index.cpp +++ b/core/src/index/thirdparty/faiss/Index.cpp @@ -36,11 +36,13 @@ void Index::range_search (idx_t , const float *, float, FAISS_THROW_MSG ("range search not implemented"); } -void Index::assign (idx_t n, const float * x, idx_t * labels, idx_t k) +void Index::assign (idx_t n, const float *x, idx_t *labels, float *distance) { - float * distances = new float[n * k]; - ScopeDeleter del(distances); - search (n, x, k, distances, labels); + float *dis_inner = (distance == nullptr) ? new float[n] : distance; + search (n, x, 1, dis_inner, labels); + if (distance == nullptr) { + delete[] dis_inner; + } } void Index::add_with_ids(idx_t n, const float* x, const idx_t* xids) { diff --git a/core/src/index/thirdparty/faiss/Index.h b/core/src/index/thirdparty/faiss/Index.h index a4eb987dce..1d461d2d41 100644 --- a/core/src/index/thirdparty/faiss/Index.h +++ b/core/src/index/thirdparty/faiss/Index.h @@ -183,9 +183,9 @@ struct Index { * * This function is identical as search but only return labels of neighbors. * @param x input vectors to search, size n * d - * @param labels output labels of the NNs, size n*k + * @param labels output labels of the NNs, size n */ - void assign (idx_t n, const float * x, idx_t * labels, idx_t k = 1); + virtual void assign (idx_t n, const float *x, idx_t *labels, float *distance = nullptr); /// removes all elements from the database. virtual void reset() = 0; diff --git a/core/src/index/thirdparty/faiss/IndexFlat.cpp b/core/src/index/thirdparty/faiss/IndexFlat.cpp index efd290dbc0..7cc2304881 100644 --- a/core/src/index/thirdparty/faiss/IndexFlat.cpp +++ b/core/src/index/thirdparty/faiss/IndexFlat.cpp @@ -64,6 +64,30 @@ void IndexFlat::search(idx_t n, const float* x, idx_t k, float* distances, idx_t } } +void IndexFlat::assign(idx_t n, const float * x, idx_t * labels, float* distances) +{ + // usually used in IVF k-means algorithm + + float *dis_inner = (distances == nullptr) ? new float[n] : distances; + switch (metric_type) { + case METRIC_INNER_PRODUCT: + case METRIC_L2: { + // ignore the metric_type, both use L2 + elkan_L2_sse(x, xb.data(), d, n, ntotal, labels, dis_inner); + break; + } + default: { + // binary metrics + // There may be something wrong, but maintain the original logic now. + Index::assign(n, x, labels, dis_inner); + break; + } + } + if (distances == nullptr) { + delete[] dis_inner; + } +} + void IndexFlat::range_search (idx_t n, const float *x, float radius, RangeSearchResult *result, ConcurrentBitsetPtr bitset) const diff --git a/core/src/index/thirdparty/faiss/IndexFlat.h b/core/src/index/thirdparty/faiss/IndexFlat.h index 24420c7cf3..6f63d22c2e 100644 --- a/core/src/index/thirdparty/faiss/IndexFlat.h +++ b/core/src/index/thirdparty/faiss/IndexFlat.h @@ -36,6 +36,12 @@ struct IndexFlat: Index { idx_t* labels, ConcurrentBitsetPtr bitset = nullptr) const override; + void assign ( + idx_t n, + const float * x, + idx_t * labels, + float* distances = nullptr) override; + void range_search( idx_t n, const float* x, diff --git a/core/src/index/thirdparty/faiss/c_api/Index_c.cpp b/core/src/index/thirdparty/faiss/c_api/Index_c.cpp index 87085fd192..21d175a15c 100644 --- a/core/src/index/thirdparty/faiss/c_api/Index_c.cpp +++ b/core/src/index/thirdparty/faiss/c_api/Index_c.cpp @@ -57,9 +57,9 @@ int faiss_Index_range_search(const FaissIndex* index, idx_t n, const float* x, f } CATCH_AND_HANDLE } -int faiss_Index_assign(FaissIndex* index, idx_t n, const float * x, idx_t * labels, idx_t k) { +int faiss_Index_assign(FaissIndex* index, idx_t n, const float * x, idx_t * labels) { try { - reinterpret_cast(index)->assign(n, x, labels, k); + reinterpret_cast(index)->assign(n, x, labels); } CATCH_AND_HANDLE } diff --git a/core/src/index/thirdparty/faiss/c_api/Index_c.h b/core/src/index/thirdparty/faiss/c_api/Index_c.h index 5e143211e4..34215036d0 100644 --- a/core/src/index/thirdparty/faiss/c_api/Index_c.h +++ b/core/src/index/thirdparty/faiss/c_api/Index_c.h @@ -106,9 +106,9 @@ int faiss_Index_range_search(const FaissIndex* index, idx_t n, const float* x, * This function is identical as search but only return labels of neighbors. * @param index opaque pointer to index object * @param x input vectors to search, size n * d - * @param labels output labels of the NNs, size n*k + * @param labels output labels of the NNs, size n */ -int faiss_Index_assign(FaissIndex* index, idx_t n, const float * x, idx_t * labels, idx_t k); +int faiss_Index_assign(FaissIndex* index, idx_t n, const float * x, idx_t * labels); /** removes all elements from the database. * @param index opaque pointer to index object diff --git a/core/src/index/thirdparty/faiss/utils/distances.cpp b/core/src/index/thirdparty/faiss/utils/distances.cpp index dd8e656511..a80c7c32d7 100644 --- a/core/src/index/thirdparty/faiss/utils/distances.cpp +++ b/core/src/index/thirdparty/faiss/utils/distances.cpp @@ -352,68 +352,6 @@ static void knn_L2sqr_sse ( */ } -static void elkan_L2_sse ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res) { - - if (nx == 0 || ny == 0) { - return; - } - - const size_t bs_y = 1024; - float *data = (float *) malloc((bs_y * (bs_y - 1) / 2) * sizeof (float)); - - for (size_t j0 = 0; j0 < ny; j0 += bs_y) { - size_t j1 = j0 + bs_y; - if (j1 > ny) j1 = ny; - - auto Y = [&](size_t i, size_t j) -> float& { - assert(i != j); - i -= j0, j -= j0; - return (i > j) ? data[j + i * (i - 1) / 2] : data[i + j * (j - 1) / 2]; - }; - -#pragma omp parallel for - for (size_t i = j0 + 1; i < j1; i++) { - const float *y_i = y + i * d; - for (size_t j = j0; j < i; j++) { - const float *y_j = y + j * d; - Y(i, j) = sqrt(fvec_L2sqr(y_i, y_j, d)); - } - } - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - const float *x_i = x + i * d; - - int64_t ids_i = j0; - float val_i = sqrt(fvec_L2sqr(x_i, y + j0 * d, d)); - float val_i_2 = val_i * 2; - for (size_t j = j0 + 1; j < j1; j++) { - if (val_i_2 <= Y(ids_i, j)) { - continue; - } - const float *y_j = y + j * d; - float disij = sqrt(fvec_L2sqr(x_i, y_j, d)); - if (disij < val_i) { - ids_i = j; - val_i = disij; - val_i_2 = val_i * 2; - } - } - - if (j0 == 0 || res->val[i] > val_i) { - res->val[i] = val_i; - res->ids[i] = ids_i; - } - } - } - - free(data); -} - /** Find the nearest neighbors for nx queries in a set of ny vectors */ static void knn_inner_product_blas ( const float * x, @@ -668,11 +606,7 @@ void knn_L2sqr (const float * x, float_maxheap_array_t * res, ConcurrentBitsetPtr bitset) { - if (bitset == nullptr && res->k == 1 && nx >= ny * 2) { - // Note: L2 but not L2sqr - // usually used in IVF::train - elkan_L2_sse(x, y, d, nx, ny, res); - } else if (d % 4 == 0 && nx < distance_compute_blas_threshold) { + if (d % 4 == 0 && nx < distance_compute_blas_threshold) { knn_L2sqr_sse (x, y, d, nx, ny, res, bitset); } else { NopDistanceCorrection nop; @@ -1067,5 +1001,67 @@ void pairwise_L2sqr (int64_t d, } +void elkan_L2_sse ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + int64_t *ids, float *val) { + + if (nx == 0 || ny == 0) { + return; + } + + const size_t bs_y = 1024; + float *data = (float *) malloc((bs_y * (bs_y - 1) / 2) * sizeof (float)); + + for (size_t j0 = 0; j0 < ny; j0 += bs_y) { + size_t j1 = j0 + bs_y; + if (j1 > ny) j1 = ny; + + auto Y = [&](size_t i, size_t j) -> float& { + assert(i != j); + i -= j0, j -= j0; + return (i > j) ? data[j + i * (i - 1) / 2] : data[i + j * (j - 1) / 2]; + }; + +#pragma omp parallel for + for (size_t i = j0 + 1; i < j1; i++) { + const float *y_i = y + i * d; + for (size_t j = j0; j < i; j++) { + const float *y_j = y + j * d; + Y(i, j) = sqrt(fvec_L2sqr(y_i, y_j, d)); + } + } + +#pragma omp parallel for + for (size_t i = 0; i < nx; i++) { + const float *x_i = x + i * d; + + int64_t ids_i = j0; + float val_i = sqrt(fvec_L2sqr(x_i, y + j0 * d, d)); + float val_i_2 = val_i * 2; + for (size_t j = j0 + 1; j < j1; j++) { + if (val_i_2 <= Y(ids_i, j)) { + continue; + } + const float *y_j = y + j * d; + float disij = sqrt(fvec_L2sqr(x_i, y_j, d)); + if (disij < val_i) { + ids_i = j; + val_i = disij; + val_i_2 = val_i * 2; + } + } + + if (j0 == 0 || val[i] > val_i) { + val[i] = val_i; + ids[i] = ids_i; + } + } + } + + free(data); +} + } // namespace faiss diff --git a/core/src/index/thirdparty/faiss/utils/distances.h b/core/src/index/thirdparty/faiss/utils/distances.h index e227c37514..9d0f5a066a 100644 --- a/core/src/index/thirdparty/faiss/utils/distances.h +++ b/core/src/index/thirdparty/faiss/utils/distances.h @@ -247,6 +247,21 @@ void range_search_inner_product ( RangeSearchResult *result); +/*************************************************************************** + * elkan + ***************************************************************************/ +/** Return the nearest neighors of each of the nx vectors x among the ny + * + * @param x query vectors, size nx * d + * @param y database vectors, size ny * d + * @param ids result array ids + * @param val result array value + */ +void elkan_L2_sse ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + int64_t *ids, float *val); } // namespace faiss diff --git a/core/src/index/unittest/CMakeLists.txt b/core/src/index/unittest/CMakeLists.txt index 631c8355a0..9e6d7c2674 100644 --- a/core/src/index/unittest/CMakeLists.txt +++ b/core/src/index/unittest/CMakeLists.txt @@ -17,7 +17,7 @@ if (FAISS_WITH_MKL) ) else () set(depend_libs ${depend_libs} - ${BLAS_LIBRARIES} + ${OpenBLAS_LIBRARIES} ${LAPACK_LIBRARIES} ) endif () diff --git a/core/src/index/unittest/faiss_benchmark/CMakeLists.txt b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt index 90288c0cc7..9c3175506e 100644 --- a/core/src/index/unittest/faiss_benchmark/CMakeLists.txt +++ b/core/src/index/unittest/faiss_benchmark/CMakeLists.txt @@ -24,7 +24,7 @@ if (KNOWHERE_GPU_VERSION) ) else () set(depend_libs ${depend_libs} - ${BLAS_LIBRARIES} + ${OpenBLAS_LIBRARIES} ${LAPACK_LIBRARIES} ) endif () diff --git a/core/src/index/unittest/faiss_ori/CMakeLists.txt b/core/src/index/unittest/faiss_ori/CMakeLists.txt index a93824bf19..b612590ad9 100644 --- a/core/src/index/unittest/faiss_ori/CMakeLists.txt +++ b/core/src/index/unittest/faiss_ori/CMakeLists.txt @@ -19,7 +19,7 @@ if (KNOWHERE_GPU_VERSION) ) else () set(depend_libs ${depend_libs} - ${BLAS_LIBRARIES} + ${OpenBLAS_LIBRARIES} ${LAPACK_LIBRARIES} ) diff --git a/core/src/metrics/prometheus/PrometheusMetrics.cpp b/core/src/metrics/prometheus/PrometheusMetrics.cpp index 30db0913bb..1fa4a80205 100644 --- a/core/src/metrics/prometheus/PrometheusMetrics.cpp +++ b/core/src/metrics/prometheus/PrometheusMetrics.cpp @@ -15,6 +15,7 @@ #include "metrics/SystemInfo.h" #include "utils/Log.h" +#include #include #include @@ -25,20 +26,29 @@ Status PrometheusMetrics::Init() { try { Config& config = Config::GetInstance(); - CONFIG_CHECK(config.GetMetricConfigEnableMonitor(startup_)); + STATUS_CHECK(config.GetMetricConfigEnableMonitor(startup_)); if (!startup_) { return Status::OK(); } // Following should be read from config file. - std::string push_port, push_address; - CONFIG_CHECK(config.GetMetricConfigPort(push_port)); - CONFIG_CHECK(config.GetMetricConfigAddress(push_address)); + std::string server_port, push_port, push_address; + STATUS_CHECK(config.GetServerConfigPort(server_port)); + STATUS_CHECK(config.GetMetricConfigPort(push_port)); + STATUS_CHECK(config.GetMetricConfigAddress(push_address)); const std::string uri = std::string("/metrics"); // const std::size_t num_threads = 2; - auto labels = prometheus::Gateway::GetInstanceLabel("pushgateway"); + std::string hostportstr; + char hostname[1024]; + if (gethostname(hostname, sizeof(hostname)) == 0) { + hostportstr = std::string(hostname) + ":" + server_port; + } else { + hostportstr = "pushgateway"; + } + + auto labels = prometheus::Gateway::GetInstanceLabel(hostportstr); // Init pushgateway gateway_ = std::make_shared(push_address, push_port, "milvus_metrics", labels); diff --git a/core/src/scheduler/selector/BuildIndexPass.cpp b/core/src/scheduler/selector/BuildIndexPass.cpp index 42a0736b0f..4047f2b999 100644 --- a/core/src/scheduler/selector/BuildIndexPass.cpp +++ b/core/src/scheduler/selector/BuildIndexPass.cpp @@ -47,13 +47,9 @@ BuildIndexPass::Run(const TaskPtr& task) { LOG_SERVER_WARNING_ << "BuildIndexPass cannot get build index gpu!"; return false; } - - if (specified_gpu_id_ >= build_gpus_.size()) { - specified_gpu_id_ = specified_gpu_id_ % build_gpus_.size(); - } - LOG_SERVER_DEBUG_ << "Specify gpu" << specified_gpu_id_ << " to build index!"; - res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_gpus_[specified_gpu_id_]); - specified_gpu_id_ = (specified_gpu_id_ + 1) % build_gpus_.size(); + LOG_SERVER_DEBUG_ << "Specify gpu" << build_gpus_[idx_] << " to build index!"; + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, build_gpus_[idx_]); + idx_ = (idx_ + 1) % build_gpus_.size(); } auto label = std::make_shared(std::weak_ptr(res_ptr)); diff --git a/core/src/scheduler/selector/BuildIndexPass.h b/core/src/scheduler/selector/BuildIndexPass.h index 7f3b8047fc..220ac69049 100644 --- a/core/src/scheduler/selector/BuildIndexPass.h +++ b/core/src/scheduler/selector/BuildIndexPass.h @@ -40,7 +40,7 @@ class BuildIndexPass : public Pass, public server::GpuResourceConfigHandler { Run(const TaskPtr& task) override; private: - uint64_t specified_gpu_id_ = 0; + uint64_t idx_ = 0; }; using BuildIndexPassPtr = std::shared_ptr; diff --git a/core/src/scheduler/selector/FaissFlatPass.cpp b/core/src/scheduler/selector/FaissFlatPass.cpp index c3b86ee99b..01b21b9a14 100644 --- a/core/src/scheduler/selector/FaissFlatPass.cpp +++ b/core/src/scheduler/selector/FaissFlatPass.cpp @@ -61,11 +61,10 @@ FaissFlatPass::Run(const TaskPtr& task) { "search", 0); res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); } else { - auto best_device_id = count_ % search_gpus_.size(); - LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissFlatPass: nq > gpu_search_threshold, specify gpu %d to search!", - "search", 0, best_device_id); - ++count_; - res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[best_device_id]); + LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissFlatPass: nq >= gpu_search_threshold, specify gpu %d to search!", + "search", 0, search_gpus_[idx_]); + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[idx_]); + idx_ = (idx_ + 1) % search_gpus_.size(); } auto label = std::make_shared(res_ptr); task->label() = label; diff --git a/core/src/scheduler/selector/FaissFlatPass.h b/core/src/scheduler/selector/FaissFlatPass.h index a1ebeb9b47..54231c4b16 100644 --- a/core/src/scheduler/selector/FaissFlatPass.h +++ b/core/src/scheduler/selector/FaissFlatPass.h @@ -41,7 +41,7 @@ class FaissFlatPass : public Pass, public server::GpuResourceConfigHandler { Run(const TaskPtr& task) override; private: - int64_t count_ = 0; + int64_t idx_ = 0; }; using FaissFlatPassPtr = std::shared_ptr; diff --git a/core/src/scheduler/selector/FaissIVFFlatPass.cpp b/core/src/scheduler/selector/FaissIVFFlatPass.cpp index 6ba62b3a0c..25ea7f0f49 100644 --- a/core/src/scheduler/selector/FaissIVFFlatPass.cpp +++ b/core/src/scheduler/selector/FaissIVFFlatPass.cpp @@ -62,11 +62,10 @@ FaissIVFFlatPass::Run(const TaskPtr& task) { "search", 0); res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); } else { - auto best_device_id = count_ % search_gpus_.size(); - LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFFlatPass: nq > gpu_search_threshold, specify gpu %d to search!", - "search", 0, best_device_id); - count_++; - res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[best_device_id]); + LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFFlatPass: nq >= gpu_search_threshold, specify gpu %d to search!", + "search", 0, search_gpus_[idx_]); + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[idx_]); + idx_ = (idx_ + 1) % search_gpus_.size(); } auto label = std::make_shared(res_ptr); task->label() = label; diff --git a/core/src/scheduler/selector/FaissIVFFlatPass.h b/core/src/scheduler/selector/FaissIVFFlatPass.h index dda315c64a..2c1b4e7ebb 100644 --- a/core/src/scheduler/selector/FaissIVFFlatPass.h +++ b/core/src/scheduler/selector/FaissIVFFlatPass.h @@ -41,7 +41,7 @@ class FaissIVFFlatPass : public Pass, public server::GpuResourceConfigHandler { Run(const TaskPtr& task) override; private: - int64_t count_ = 0; + int64_t idx_ = 0; }; using FaissIVFFlatPassPtr = std::shared_ptr; diff --git a/core/src/scheduler/selector/FaissIVFPQPass.cpp b/core/src/scheduler/selector/FaissIVFPQPass.cpp index de213f4079..32acc0c4bc 100644 --- a/core/src/scheduler/selector/FaissIVFPQPass.cpp +++ b/core/src/scheduler/selector/FaissIVFPQPass.cpp @@ -64,11 +64,10 @@ FaissIVFPQPass::Run(const TaskPtr& task) { "search", 0); res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); } else { - auto best_device_id = count_ % search_gpus_.size(); - LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPQPass: nq > gpu_search_threshold, specify gpu %d to search!", - "search", 0, best_device_id); - ++count_; - res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[best_device_id]); + LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFPQPass: nq >= gpu_search_threshold, specify gpu %d to search!", + "search", 0, search_gpus_[idx_]); + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[idx_]); + idx_ = (idx_ + 1) % search_gpus_.size(); } auto label = std::make_shared(res_ptr); task->label() = label; diff --git a/core/src/scheduler/selector/FaissIVFPQPass.h b/core/src/scheduler/selector/FaissIVFPQPass.h index f4fca581d2..b4fa296537 100644 --- a/core/src/scheduler/selector/FaissIVFPQPass.h +++ b/core/src/scheduler/selector/FaissIVFPQPass.h @@ -41,7 +41,7 @@ class FaissIVFPQPass : public Pass, public server::GpuResourceConfigHandler { Run(const TaskPtr& task) override; private: - int64_t count_ = 0; + int64_t idx_ = 0; }; using FaissIVFPQPassPtr = std::shared_ptr; diff --git a/core/src/scheduler/selector/FaissIVFSQ8HPass.cpp b/core/src/scheduler/selector/FaissIVFSQ8HPass.cpp index e1f79a1650..7dc3546ef0 100644 --- a/core/src/scheduler/selector/FaissIVFSQ8HPass.cpp +++ b/core/src/scheduler/selector/FaissIVFSQ8HPass.cpp @@ -62,11 +62,10 @@ FaissIVFSQ8HPass::Run(const TaskPtr& task) { "search", 0); res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); } else { - auto best_device_id = count_ % search_gpus_.size(); - LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFSQ8HPass: nq > gpu_search_threshold, specify gpu %d to search!", - "search", 0, best_device_id); - ++count_; - res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[best_device_id]); + LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFSQ8HPass: nq >= gpu_search_threshold, specify gpu %d to search!", + "search", 0, search_gpus_[idx_]); + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[idx_]); + idx_ = (idx_ + 1) % search_gpus_.size(); } auto label = std::make_shared(res_ptr); task->label() = label; diff --git a/core/src/scheduler/selector/FaissIVFSQ8HPass.h b/core/src/scheduler/selector/FaissIVFSQ8HPass.h index c9da0f5829..1050352c01 100644 --- a/core/src/scheduler/selector/FaissIVFSQ8HPass.h +++ b/core/src/scheduler/selector/FaissIVFSQ8HPass.h @@ -41,7 +41,7 @@ class FaissIVFSQ8HPass : public Pass, public server::GpuResourceConfigHandler { Run(const TaskPtr& task) override; private: - int64_t count_ = 0; + int64_t idx_ = 0; }; using FaissIVFSQ8HPassPtr = std::shared_ptr; diff --git a/core/src/scheduler/selector/FaissIVFSQ8Pass.cpp b/core/src/scheduler/selector/FaissIVFSQ8Pass.cpp index 28a210a61b..c93adbfbe4 100644 --- a/core/src/scheduler/selector/FaissIVFSQ8Pass.cpp +++ b/core/src/scheduler/selector/FaissIVFSQ8Pass.cpp @@ -62,11 +62,10 @@ FaissIVFSQ8Pass::Run(const TaskPtr& task) { "search", 0); res_ptr = ResMgrInst::GetInstance()->GetResource("cpu"); } else { - auto best_device_id = count_ % search_gpus_.size(); - LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFSQ8Pass: nq > gpu_search_threshold, specify gpu %d to search!", - "search", 0, best_device_id); - count_++; - res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[best_device_id]); + LOG_SERVER_DEBUG_ << LogOut("[%s][%d] FaissIVFSQ8Pass: nq >= gpu_search_threshold, specify gpu %d to search!", + "search", 0, search_gpus_[idx_]); + res_ptr = ResMgrInst::GetInstance()->GetResource(ResourceType::GPU, search_gpus_[idx_]); + idx_ = (idx_ + 1) % search_gpus_.size(); } auto label = std::make_shared(res_ptr); task->label() = label; diff --git a/core/src/scheduler/selector/FaissIVFSQ8Pass.h b/core/src/scheduler/selector/FaissIVFSQ8Pass.h index b3207763f7..99530c7635 100644 --- a/core/src/scheduler/selector/FaissIVFSQ8Pass.h +++ b/core/src/scheduler/selector/FaissIVFSQ8Pass.h @@ -41,7 +41,7 @@ class FaissIVFSQ8Pass : public Pass, public server::GpuResourceConfigHandler { Run(const TaskPtr& task) override; private: - int64_t count_ = 0; + int64_t idx_ = 0; }; using FaissIVFSQ8PassPtr = std::shared_ptr; diff --git a/core/src/server/DBWrapper.cpp b/core/src/server/DBWrapper.cpp index 1ec00fb17e..f1ca8aa8dc 100644 --- a/core/src/server/DBWrapper.cpp +++ b/core/src/server/DBWrapper.cpp @@ -64,6 +64,12 @@ DBWrapper::StartService() { StringHelpFunctions::SplitStringByDelimeter(db_slave_path, ";", opt.meta_.slave_paths_); + s = config.GetStorageConfigFileCleanupTimeup(opt.file_cleanup_timeout_); + if (!s.ok()) { + std::cerr << s.ToString() << std::endl; + return s; + } + // cache config s = config.GetCacheConfigCacheInsertData(opt.insert_cache_immediately_); if (!s.ok()) { @@ -71,6 +77,14 @@ DBWrapper::StartService() { return s; } + int64_t insert_buffer_size = 1 * engine::GB; + s = config.GetCacheConfigInsertBufferSize(insert_buffer_size); + if (!s.ok()) { + std::cerr << s.ToString() << std::endl; + return s; + } + opt.insert_buffer_size_ = insert_buffer_size * engine::GB; + std::string mode; s = config.GetServerConfigDeployMode(mode); if (!s.ok()) { diff --git a/core/src/server/Server.cpp b/core/src/server/Server.cpp index 22bfdb3f67..31f6f14a89 100644 --- a/core/src/server/Server.cpp +++ b/core/src/server/Server.cpp @@ -159,6 +159,14 @@ Server::Start() { Config& config = Config::GetInstance(); + std::string meta_uri; + STATUS_CHECK(config.GetDBConfigBackendUrl(meta_uri)); + if (meta_uri.length() > 6 && strcasecmp("sqlite", meta_uri.substr(0, 6).c_str()) == 0) { + std::cout << "WARNNING: You are using SQLite as the meta data management, " + "which can't be used in production. Please change it to MySQL!" + << std::endl; + } + /* Init opentracing tracer from config */ std::string tracing_config_path; s = config.GetTracingConfigJsonConfigPath(tracing_config_path); @@ -201,58 +209,25 @@ Server::Start() { std::string logs_path; int64_t max_log_file_size = 0; int64_t delete_exceeds = 0; - s = config.GetLogsTraceEnable(trace_enable); - if (!s.ok()) { - return s; - } - s = config.GetLogsDebugEnable(debug_enable); - if (!s.ok()) { - return s; - } - s = config.GetLogsInfoEnable(info_enable); - if (!s.ok()) { - return s; - } - s = config.GetLogsWarningEnable(warning_enable); - if (!s.ok()) { - return s; - } - s = config.GetLogsErrorEnable(error_enable); - if (!s.ok()) { - return s; - } - s = config.GetLogsFatalEnable(fatal_enable); - if (!s.ok()) { - return s; - } - s = config.GetLogsPath(logs_path); - if (!s.ok()) { - return s; - } - s = config.GetLogsMaxLogFileSize(max_log_file_size); - if (!s.ok()) { - return s; - } - s = config.GetLogsDeleteExceeds(delete_exceeds); - if (!s.ok()) { - return s; - } + STATUS_CHECK(config.GetLogsTraceEnable(trace_enable)); + STATUS_CHECK(config.GetLogsDebugEnable(debug_enable)); + STATUS_CHECK(config.GetLogsInfoEnable(info_enable)); + STATUS_CHECK(config.GetLogsWarningEnable(warning_enable)); + STATUS_CHECK(config.GetLogsErrorEnable(error_enable)); + STATUS_CHECK(config.GetLogsFatalEnable(fatal_enable)); + STATUS_CHECK(config.GetLogsPath(logs_path)); + STATUS_CHECK(config.GetLogsMaxLogFileSize(max_log_file_size)); + STATUS_CHECK(config.GetLogsDeleteExceeds(delete_exceeds)); InitLog(trace_enable, debug_enable, info_enable, warning_enable, error_enable, fatal_enable, logs_path, max_log_file_size, delete_exceeds); } std::string deploy_mode; - s = config.GetServerConfigDeployMode(deploy_mode); - if (!s.ok()) { - return s; - } + STATUS_CHECK(config.GetServerConfigDeployMode(deploy_mode)); if (deploy_mode == "single" || deploy_mode == "cluster_writable") { std::string db_path; - s = config.GetStorageConfigPrimaryPath(db_path); - if (!s.ok()) { - return s; - } + STATUS_CHECK(config.GetStorageConfigPrimaryPath(db_path)); try { // True if a new directory was created, otherwise false. @@ -268,17 +243,11 @@ Server::Start() { } bool wal_enable = false; - s = config.GetWalConfigEnable(wal_enable); - if (!s.ok()) { - return s; - } + STATUS_CHECK(config.GetWalConfigEnable(wal_enable)); if (wal_enable) { std::string wal_path; - s = config.GetWalConfigWalPath(wal_path); - if (!s.ok()) { - return s; - } + STATUS_CHECK(config.GetWalConfigWalPath(wal_path)); try { // True if a new directory was created, otherwise false. @@ -301,21 +270,10 @@ Server::Start() { #else LOG_SERVER_INFO_ << "CPU edition"; #endif - s = StorageChecker::CheckStoragePermission(); - if (!s.ok()) { - return s; - } - - s = CpuChecker::CheckCpuInstructionSet(); - if (!s.ok()) { - return s; - } - + STATUS_CHECK(StorageChecker::CheckStoragePermission()); + STATUS_CHECK(CpuChecker::CheckCpuInstructionSet()); #ifdef MILVUS_GPU_VERSION - s = GpuChecker::CheckGpuEnvironment(); - if (!s.ok()) { - return s; - } + STATUS_CHECK(GpuChecker::CheckGpuEnvironment()); #endif /* record config and hardware information into log */ LogConfigInFile(config_filename_); diff --git a/core/src/server/context/Context.cpp b/core/src/server/context/Context.cpp index a95e7beb9a..424ab98849 100644 --- a/core/src/server/context/Context.cpp +++ b/core/src/server/context/Context.cpp @@ -54,6 +54,16 @@ Context::IsConnectionBroken() const { return context_->IsConnectionBroken(); } +BaseRequest::RequestType +Context::GetRequestType() const { + return request_type_; +} + +void +Context::SetRequestType(BaseRequest::RequestType type) { + request_type_ = type; +} + ///////////////////////////////////////////////////////////////////////////////////////////////// ContextChild::ContextChild(const ContextPtr& context, const std::string& operation_name) { if (context) { diff --git a/core/src/server/context/Context.h b/core/src/server/context/Context.h index 94ea83ea59..72892d335d 100644 --- a/core/src/server/context/Context.h +++ b/core/src/server/context/Context.h @@ -18,6 +18,7 @@ #include #include "server/context/ConnectionContext.h" +#include "server/delivery/request/BaseRequest.h" #include "tracing/TraceContext.h" namespace milvus { @@ -50,8 +51,15 @@ class Context { bool IsConnectionBroken() const; + BaseRequest::RequestType + GetRequestType() const; + + void + SetRequestType(BaseRequest::RequestType type); + private: std::string request_id_; + BaseRequest::RequestType request_type_; std::shared_ptr trace_context_; ConnectionContextPtr context_; }; diff --git a/core/src/server/delivery/request/BaseRequest.cpp b/core/src/server/delivery/request/BaseRequest.cpp index 8427c3165e..558d8e3049 100644 --- a/core/src/server/delivery/request/BaseRequest.cpp +++ b/core/src/server/delivery/request/BaseRequest.cpp @@ -10,12 +10,14 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include "server/delivery/request/BaseRequest.h" + +#include + +#include "server/context/Context.h" #include "utils/CommonUtil.h" #include "utils/Exception.h" #include "utils/Log.h" -#include - namespace milvus { namespace server { @@ -81,6 +83,9 @@ BaseRequest::BaseRequest(const std::shared_ptr& context bool async) : context_(context), type_(type), async_(async), done_(false) { request_group_ = milvus::server::RequestGroup(type); + if (nullptr != context_) { + context_->SetRequestType(type_); + } } BaseRequest::~BaseRequest() { diff --git a/core/src/server/delivery/request/BaseRequest.h b/core/src/server/delivery/request/BaseRequest.h index 2ac52b86da..8a629942e3 100644 --- a/core/src/server/delivery/request/BaseRequest.h +++ b/core/src/server/delivery/request/BaseRequest.h @@ -17,7 +17,6 @@ #include "grpc/gen-status/status.grpc.pb.h" #include "grpc/gen-status/status.pb.h" #include "query/GeneralQuery.h" -#include "server/context/Context.h" #include "utils/Json.h" #include "utils/Status.h" @@ -103,6 +102,8 @@ struct PartitionParam { } }; +class Context; + class BaseRequest { public: enum RequestType { diff --git a/core/src/server/delivery/request/FlushRequest.cpp b/core/src/server/delivery/request/FlushRequest.cpp index 2f0fd7c6ef..f8bec5effa 100644 --- a/core/src/server/delivery/request/FlushRequest.cpp +++ b/core/src/server/delivery/request/FlushRequest.cpp @@ -49,6 +49,13 @@ FlushRequest::OnExecute() { Status status = Status::OK(); LOG_SERVER_DEBUG_ << hdr; + // flush all collections + if (collection_names_.empty()) { + status = DBWrapper::DB()->Flush(); + return status; + } + + // flush specified collections for (auto& name : collection_names_) { // only process root collection, ignore partition collection engine::meta::CollectionSchema collection_schema; diff --git a/core/src/server/delivery/request/HasCollectionRequest.cpp b/core/src/server/delivery/request/HasCollectionRequest.cpp index 5e16a4f345..10f4c7d5d2 100644 --- a/core/src/server/delivery/request/HasCollectionRequest.cpp +++ b/core/src/server/delivery/request/HasCollectionRequest.cpp @@ -50,20 +50,10 @@ HasCollectionRequest::OnExecute() { status = DBWrapper::DB()->HasNativeCollection(collection_name_, has_collection_); fiu_do_on("HasCollectionRequest.OnExecute.throw_std_exception", throw std::exception()); - // only process root collection, ignore partition collection - if (has_collection_) { - engine::meta::CollectionSchema collection_schema; - collection_schema.collection_id_ = collection_name_; - status = DBWrapper::DB()->DescribeCollection(collection_schema); - if (!collection_schema.owner_collection_.empty()) { - has_collection_ = false; - } - } + return status; } catch (std::exception& ex) { return Status(SERVER_UNEXPECTED_ERROR, ex.what()); } - - return Status::OK(); } } // namespace server diff --git a/core/src/server/delivery/request/SearchCombineRequest.cpp b/core/src/server/delivery/request/SearchCombineRequest.cpp index 996cfaecc3..fa0c666640 100644 --- a/core/src/server/delivery/request/SearchCombineRequest.cpp +++ b/core/src/server/delivery/request/SearchCombineRequest.cpp @@ -384,6 +384,22 @@ SearchCombineRequest::OnExecute() { return status; } + // avoid memcpy crash, check id count = target vector count * topk + if (result_ids.size() != total_count * search_topk_) { + status = Status(DB_ERROR, "Result count doesn't match target vectors count"); + // let all request return + FreeRequests(status); + return status; + } + + // avoid memcpy crash, check distance count = id count + if (result_distances.size() != result_ids.size()) { + status = Status(DB_ERROR, "Result distance and id count doesn't match"); + // let all request return + FreeRequests(status); + return status; + } + // step 5: construct result array offset = 0; for (auto& request : request_list_) { diff --git a/core/src/server/delivery/strategy/SearchReqStrategy.cpp b/core/src/server/delivery/strategy/SearchReqStrategy.cpp index 2ab3f6ff1f..3b49ed6964 100644 --- a/core/src/server/delivery/strategy/SearchReqStrategy.cpp +++ b/core/src/server/delivery/strategy/SearchReqStrategy.cpp @@ -41,14 +41,11 @@ SearchReqStrategy::ReScheduleQueue(const BaseRequestPtr& request, std::queueGetRequestType() == BaseRequest::kSearch) { SearchRequestPtr last_search_req = std::static_pointer_cast(last_req); if (SearchCombineRequest::CanCombine(last_search_req, new_search_req)) { - // pop last request - queue.pop(); - // combine request SearchCombineRequestPtr combine_request = std::make_shared(); combine_request->Combine(last_search_req); combine_request->Combine(new_search_req); - queue.push(combine_request); + queue.back() = combine_request; // replace the last request to combine request LOG_SERVER_DEBUG_ << "Combine 2 search request"; } else { // directly put to queue diff --git a/core/src/server/grpc_impl/GrpcRequestHandler.cpp b/core/src/server/grpc_impl/GrpcRequestHandler.cpp index 46b1bfc1e5..3c1d312284 100644 --- a/core/src/server/grpc_impl/GrpcRequestHandler.cpp +++ b/core/src/server/grpc_impl/GrpcRequestHandler.cpp @@ -73,6 +73,25 @@ ErrorMap(ErrorCode code) { } } +std::string +RequestMap(BaseRequest::RequestType request_type) { + static const std::unordered_map request_map = { + {BaseRequest::kInsert, "Insert"}, + {BaseRequest::kCreateIndex, "CreateIndex"}, + {BaseRequest::kSearch, "Search"}, + {BaseRequest::kSearchByID, "SearchByID"}, + {BaseRequest::kHybridSearch, "HybridSearch"}, + {BaseRequest::kFlush, "Flush"}, + {BaseRequest::kCompact, "Compact"}, + }; + + if (request_map.find(request_type) != request_map.end()) { + return request_map.at(request_type); + } else { + return "OtherRequest"; + } +} + namespace { void CopyRowRecords(const google::protobuf::RepeatedPtrField<::milvus::grpc::RowRecord>& grpc_records, @@ -670,8 +689,30 @@ GrpcRequestHandler::Cmd(::grpc::ServerContext* context, const ::milvus::grpc::Co LOG_SERVER_INFO_ << LogOut("Request [%s] %s begin.", GetContext(context)->RequestID().c_str(), __func__); std::string reply; - Status status = request_handler_.Cmd(GetContext(context), request->cmd(), reply); - response->set_string_reply(reply); + Status status; + + std::string cmd = request->cmd(); + std::vector requests; + if (cmd == "requests") { + std::lock_guard lock(context_map_mutex_); + for (auto& iter : context_map_) { + if (nullptr == iter.second) { + continue; + } + if (iter.second->RequestID() == get_request_id(context)) { + continue; + } + auto request_str = RequestMap(iter.second->GetRequestType()) + "-" + iter.second->RequestID(); + requests.emplace_back(request_str); + } + nlohmann::json reply_json; + reply_json["requests"] = requests; + reply = reply_json.dump(); + response->set_string_reply(reply); + } else { + status = request_handler_.Cmd(GetContext(context), cmd, reply); + response->set_string_reply(reply); + } LOG_SERVER_INFO_ << LogOut("Request [%s] %s end.", GetContext(context)->RequestID().c_str(), __func__); SET_RESPONSE(response->mutable_status(), status, context); diff --git a/core/src/server/grpc_impl/GrpcServer.cpp b/core/src/server/grpc_impl/GrpcServer.cpp index 4598bfb5a8..4cc62fe3b5 100644 --- a/core/src/server/grpc_impl/GrpcServer.cpp +++ b/core/src/server/grpc_impl/GrpcServer.cpp @@ -77,8 +77,8 @@ GrpcServer::StartService() { Config& config = Config::GetInstance(); std::string address, port; - CONFIG_CHECK(config.GetServerConfigAddress(address)); - CONFIG_CHECK(config.GetServerConfigPort(port)); + STATUS_CHECK(config.GetServerConfigAddress(address)); + STATUS_CHECK(config.GetServerConfigPort(port)); std::string server_address(address + ":" + port); diff --git a/core/src/server/web_impl/WebServer.cpp b/core/src/server/web_impl/WebServer.cpp index cd9f3918c4..d6bba3828e 100644 --- a/core/src/server/web_impl/WebServer.cpp +++ b/core/src/server/web_impl/WebServer.cpp @@ -47,7 +47,7 @@ WebServer::StartService() { Config& config = Config::GetInstance(); std::string port; - CONFIG_CHECK(config.GetServerConfigWebPort(port)); + STATUS_CHECK(config.GetServerConfigWebPort(port)); { AppComponent components = AppComponent(std::stoi(port)); diff --git a/core/src/server/web_impl/component/AppComponent.hpp b/core/src/server/web_impl/component/AppComponent.hpp index 6b04c51ae7..b4af0dab22 100644 --- a/core/src/server/web_impl/component/AppComponent.hpp +++ b/core/src/server/web_impl/component/AppComponent.hpp @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/core/src/server/web_impl/handler/WebRequestHandler.cpp b/core/src/server/web_impl/handler/WebRequestHandler.cpp index 8571e66188..0b5f536bd6 100644 --- a/core/src/server/web_impl/handler/WebRequestHandler.cpp +++ b/core/src/server/web_impl/handler/WebRequestHandler.cpp @@ -1457,7 +1457,35 @@ WebRequestHandler::ShowSegments(const OString& collection_name, const OQueryPara ASSIGN_RETURN_STATUS_DTO(status) } - nlohmann::json result_json = nlohmann::json::parse(info); + nlohmann::json info_json = nlohmann::json::parse(info); + nlohmann::json segments_json = nlohmann::json::array(); + for (auto& par : info_json["partitions"]) { + if (!(all_required || tag.empty() || tag == par["tag"])) { + continue; + } + + auto segments = par["segments"]; + if (!segments.is_null()) { + for (auto& seg : segments) { + seg["partition_tag"] = par["tag"]; + segments_json.push_back(seg); + } + } + } + nlohmann::json result_json; + if (!all_required) { + int64_t size = segments_json.size(); + int iter_begin = std::min(size, offset); + int iter_end = std::min(size, offset + page_size); + + nlohmann::json segments_slice_json = nlohmann::json::array(); + segments_slice_json.insert(segments_slice_json.begin(), segments_json.begin() + iter_begin, + segments_json.begin() + iter_end); + result_json["segments"] = segments_slice_json; // segments_json; + } else { + result_json["segments"] = segments_json; + } + result_json["count"] = segments_json.size(); AddStatusToJson(result_json, status.code(), status.message()); response = result_json.dump().c_str(); @@ -1535,9 +1563,14 @@ WebRequestHandler::Insert(const OString& collection_name, const OString& body, V } auto& id_array = vectors.id_array_; id_array.clear(); - for (auto& id_str : ids_json) { - int64_t id = std::stol(id_str.get()); - id_array.emplace_back(id); + try { + for (auto& id_str : ids_json) { + int64_t id = std::stol(id_str.get()); + id_array.emplace_back(id); + } + } catch (std::exception& e) { + std::string err_msg = std::string("Cannot convert vectors id. details: ") + e.what(); + RETURN_STATUS_DTO(SERVER_UNEXPECTED_ERROR, err_msg.c_str()); } } diff --git a/core/src/utils/Status.h b/core/src/utils/Status.h index 6c1c9109b4..e67d6ed048 100644 --- a/core/src/utils/Status.h +++ b/core/src/utils/Status.h @@ -17,6 +17,15 @@ namespace milvus { +class Status; +#define STATUS_CHECK(func) \ + do { \ + Status s = func; \ + if (!s.ok()) { \ + return s; \ + } \ + } while (false) + using StatusCode = ErrorCode; class Status { diff --git a/core/thirdparty/versions.txt b/core/thirdparty/versions.txt index ad1ba3e299..ba5adc8cf9 100644 --- a/core/thirdparty/versions.txt +++ b/core/thirdparty/versions.txt @@ -11,7 +11,7 @@ GRPC_VERSION=master ZLIB_VERSION=v1.2.11 OPENTRACING_VERSION=v1.5.1 FIU_VERSION=1.00 -OATPP_VERSION=1.0.0 +OATPP_VERSION=1.0.1 AWS_VERSION=1.7.250 # vim: set filetype=sh: diff --git a/core/unittest/CMakeLists.txt b/core/unittest/CMakeLists.txt index 93eb9809fc..a3d085a146 100644 --- a/core/unittest/CMakeLists.txt +++ b/core/unittest/CMakeLists.txt @@ -20,6 +20,9 @@ include_directories(${MILVUS_ENGINE_SRC}) include_directories(${MILVUS_THIRDPARTY_SRC}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-status) +include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-milvus) + aux_source_directory(${MILVUS_ENGINE_SRC}/cache cache_files) aux_source_directory(${MILVUS_ENGINE_SRC}/config config_files) aux_source_directory(${MILVUS_ENGINE_SRC}/config/handler config_handler_files) @@ -28,6 +31,7 @@ aux_source_directory(${MILVUS_ENGINE_SRC}/db db_main_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/engine db_engine_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/insert db_insert_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/meta db_meta_files) +aux_source_directory(${MILVUS_ENGINE_SRC}/db/merge db_merge_files) aux_source_directory(${MILVUS_ENGINE_SRC}/db/wal db_wal_files) aux_source_directory(${MILVUS_ENGINE_SRC}/search search_files) aux_source_directory(${MILVUS_ENGINE_SRC}/query query_files) @@ -140,6 +144,7 @@ set(common_files ${db_engine_files} ${db_insert_files} ${db_meta_files} + ${db_merge_files} ${db_wal_files} ${metrics_files} ${thirdparty_files} @@ -149,6 +154,7 @@ set(common_files ${helper_files} ${server_init_files} ${server_context_files} + ${grpc_service_files} ${tracing_files} ${codecs_files} ${codecs_default_files} @@ -157,6 +163,14 @@ set(common_files ${query_files} ) +set(grpc_lib + grpcpp_channelz + grpc++ + grpc + grpc_protobuf + grpc_protoc + ) + set(unittest_libs sqlite libboost_system.a @@ -172,6 +186,7 @@ set(unittest_libs opentracing opentracing_mocktracer fiu + ${grpc_lib} ) if (MILVUS_WITH_AWS) diff --git a/core/unittest/server/CMakeLists.txt b/core/unittest/server/CMakeLists.txt index abd80750de..e8e81299ae 100644 --- a/core/unittest/server/CMakeLists.txt +++ b/core/unittest/server/CMakeLists.txt @@ -23,27 +23,16 @@ set(test_files include_directories("${CUDA_TOOLKIT_ROOT_DIR}/include") link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") -include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-status) -include_directories(${MILVUS_ENGINE_SRC}/grpc/gen-milvus) - set(util_files ${MILVUS_ENGINE_SRC}/utils/StringHelpFunctions.cpp ${MILVUS_ENGINE_SRC}/utils/LogUtil.cpp ${MILVUS_ENGINE_SRC}/utils/SignalUtil.cpp) -set(grpc_service_files - ${MILVUS_ENGINE_SRC}/grpc/gen-milvus/milvus.grpc.pb.cc - ${MILVUS_ENGINE_SRC}/grpc/gen-milvus/milvus.pb.cc - ${MILVUS_ENGINE_SRC}/grpc/gen-status/status.grpc.pb.cc - ${MILVUS_ENGINE_SRC}/grpc/gen-status/status.pb.cc - ) - set(server_test_files ${common_files} ${server_files} ${server_init_files} ${grpc_server_files} - ${grpc_service_files} ${server_delivery_files} ${web_server_files} ${util_files} @@ -53,19 +42,13 @@ set(server_test_files add_executable(test_server ${server_test_files}) -set(grpc_lib - grpcpp_channelz - grpc++ - grpc - grpc_protobuf - grpc_protoc - ) + target_link_libraries(test_server knowhere metrics stdc++ - ${grpc_lib} + # ${grpc_lib} ${unittest_libs} oatpp ) diff --git a/core/unittest/server/test_config.cpp b/core/unittest/server/test_config.cpp index 423fa43bda..4acf627bd1 100644 --- a/core/unittest/server/test_config.cpp +++ b/core/unittest/server/test_config.cpp @@ -209,35 +209,35 @@ TEST_F(ConfigTest, SERVER_CONFIG_VALID_TEST) { ASSERT_TRUE(config.GetStorageConfigSecondaryPath(str_val).ok()); ASSERT_TRUE(str_val == storage_secondary_path); - bool storage_s3_enable = true; - ASSERT_TRUE(config.SetStorageConfigS3Enable(std::to_string(storage_s3_enable)).ok()); - ASSERT_TRUE(config.GetStorageConfigS3Enable(bool_val).ok()); - ASSERT_TRUE(bool_val == storage_s3_enable); - - std::string storage_s3_addr = "192.168.1.100"; - ASSERT_TRUE(config.SetStorageConfigS3Address(storage_s3_addr).ok()); - ASSERT_TRUE(config.GetStorageConfigS3Address(str_val).ok()); - ASSERT_TRUE(str_val == storage_s3_addr); - - std::string storage_s3_port = "12345"; - ASSERT_TRUE(config.SetStorageConfigS3Port(storage_s3_port).ok()); - ASSERT_TRUE(config.GetStorageConfigS3Port(str_val).ok()); - ASSERT_TRUE(str_val == storage_s3_port); - - std::string storage_s3_access_key = "minioadmin"; - ASSERT_TRUE(config.SetStorageConfigS3AccessKey(storage_s3_access_key).ok()); - ASSERT_TRUE(config.GetStorageConfigS3AccessKey(str_val).ok()); - ASSERT_TRUE(str_val == storage_s3_access_key); - - std::string storage_s3_secret_key = "minioadmin"; - ASSERT_TRUE(config.SetStorageConfigS3SecretKey(storage_s3_secret_key).ok()); - ASSERT_TRUE(config.GetStorageConfigS3SecretKey(str_val).ok()); - ASSERT_TRUE(str_val == storage_s3_secret_key); - - std::string storage_s3_bucket = "s3bucket"; - ASSERT_TRUE(config.SetStorageConfigS3Bucket(storage_s3_bucket).ok()); - ASSERT_TRUE(config.GetStorageConfigS3Bucket(str_val).ok()); - ASSERT_TRUE(str_val == storage_s3_bucket); +// bool storage_s3_enable = true; +// ASSERT_TRUE(config.SetStorageConfigS3Enable(std::to_string(storage_s3_enable)).ok()); +// ASSERT_TRUE(config.GetStorageConfigS3Enable(bool_val).ok()); +// ASSERT_TRUE(bool_val == storage_s3_enable); +// +// std::string storage_s3_addr = "192.168.1.100"; +// ASSERT_TRUE(config.SetStorageConfigS3Address(storage_s3_addr).ok()); +// ASSERT_TRUE(config.GetStorageConfigS3Address(str_val).ok()); +// ASSERT_TRUE(str_val == storage_s3_addr); +// +// std::string storage_s3_port = "12345"; +// ASSERT_TRUE(config.SetStorageConfigS3Port(storage_s3_port).ok()); +// ASSERT_TRUE(config.GetStorageConfigS3Port(str_val).ok()); +// ASSERT_TRUE(str_val == storage_s3_port); +// +// std::string storage_s3_access_key = "minioadmin"; +// ASSERT_TRUE(config.SetStorageConfigS3AccessKey(storage_s3_access_key).ok()); +// ASSERT_TRUE(config.GetStorageConfigS3AccessKey(str_val).ok()); +// ASSERT_TRUE(str_val == storage_s3_access_key); +// +// std::string storage_s3_secret_key = "minioadmin"; +// ASSERT_TRUE(config.SetStorageConfigS3SecretKey(storage_s3_secret_key).ok()); +// ASSERT_TRUE(config.GetStorageConfigS3SecretKey(str_val).ok()); +// ASSERT_TRUE(str_val == storage_s3_secret_key); +// +// std::string storage_s3_bucket = "s3bucket"; +// ASSERT_TRUE(config.SetStorageConfigS3Bucket(storage_s3_bucket).ok()); +// ASSERT_TRUE(config.GetStorageConfigS3Bucket(str_val).ok()); +// ASSERT_TRUE(str_val == storage_s3_bucket); /* metric config */ bool metric_enable_monitor = false; @@ -417,9 +417,9 @@ TEST_F(ConfigTest, SERVER_CONFIG_CLI_TEST) { ASSERT_TRUE(s.ok()); /* storage config */ - std::string storage_s3_enable = "true"; - get_cmd = gen_get_command(ms::CONFIG_STORAGE, ms::CONFIG_STORAGE_S3_ENABLE); - set_cmd = gen_set_command(ms::CONFIG_STORAGE, ms::CONFIG_STORAGE_S3_ENABLE, storage_s3_enable); + std::string storage_primary_path = "/tmp/milvus1"; + get_cmd = gen_get_command(ms::CONFIG_STORAGE, ms::CONFIG_STORAGE_PRIMARY_PATH); + set_cmd = gen_set_command(ms::CONFIG_STORAGE, ms::CONFIG_STORAGE_PRIMARY_PATH, storage_primary_path); s = config.ProcessConfigCli(dummy, set_cmd); ASSERT_TRUE(s.ok()); s = config.ProcessConfigCli(result, get_cmd); @@ -599,18 +599,18 @@ TEST_F(ConfigTest, SERVER_CONFIG_INVALID_TEST) { ASSERT_FALSE(config.SetStorageConfigSecondaryPath("../milvus,./zilliz").ok()); ASSERT_FALSE(config.SetStorageConfigSecondaryPath("/home/^^__^^,/zilliz").ok()); - ASSERT_FALSE(config.SetStorageConfigS3Enable("10").ok()); - - ASSERT_FALSE(config.SetStorageConfigS3Address("127.0.0").ok()); - - ASSERT_FALSE(config.SetStorageConfigS3Port("100").ok()); - ASSERT_FALSE(config.SetStorageConfigS3Port("100000").ok()); - - ASSERT_FALSE(config.SetStorageConfigS3AccessKey("").ok()); - - ASSERT_FALSE(config.SetStorageConfigS3SecretKey("").ok()); - - ASSERT_FALSE(config.SetStorageConfigS3Bucket("").ok()); +// ASSERT_FALSE(config.SetStorageConfigS3Enable("10").ok()); +// +// ASSERT_FALSE(config.SetStorageConfigS3Address("127.0.0").ok()); +// +// ASSERT_FALSE(config.SetStorageConfigS3Port("100").ok()); +// ASSERT_FALSE(config.SetStorageConfigS3Port("100000").ok()); +// +// ASSERT_FALSE(config.SetStorageConfigS3AccessKey("").ok()); +// +// ASSERT_FALSE(config.SetStorageConfigS3SecretKey("").ok()); +// +// ASSERT_FALSE(config.SetStorageConfigS3Bucket("").ok()); /* metric config */ ASSERT_FALSE(config.SetMetricConfigEnableMonitor("Y").ok()); diff --git a/core/unittest/server/test_rpc.cpp b/core/unittest/server/test_rpc.cpp index 4294ab3889..ad5de73cfb 100644 --- a/core/unittest/server/test_rpc.cpp +++ b/core/unittest/server/test_rpc.cpp @@ -934,6 +934,10 @@ TEST_F(RpcHandlerTest, CMD_TEST) { handler->Cmd(&context, &command, &reply); ASSERT_EQ(reply.string_reply(), MILVUS_VERSION); + command.set_cmd("requests"); + handler->Cmd(&context, &command, &reply); + ASSERT_EQ(reply.status().error_code(), ::grpc::Status::OK.error_code()); + command.set_cmd("tasktable"); handler->Cmd(&context, &command, &reply); ASSERT_EQ(reply.status().error_code(), ::grpc::Status::OK.error_code()); diff --git a/core/unittest/server/test_web.cpp b/core/unittest/server/test_web.cpp index 7135909730..02e0e38b98 100644 --- a/core/unittest/server/test_web.cpp +++ b/core/unittest/server/test_web.cpp @@ -1026,11 +1026,12 @@ TEST_F(WebControllerTest, SHOW_SEGMENTS) { std::string json_str = response->readBodyToString()->c_str(); auto result_json = nlohmann::json::parse(json_str); - ASSERT_TRUE(result_json.contains("row_count")); - - ASSERT_TRUE(result_json.contains("partitions")); - auto segments_json = result_json["partitions"]; + ASSERT_TRUE(result_json.contains("count")); + ASSERT_TRUE(result_json.contains("segments")); + auto segments_json = result_json["segments"]; ASSERT_TRUE(segments_json.is_array()); + auto seg0_json = segments_json[0]; + ASSERT_TRUE(seg0_json.contains("partition_tag")); // ASSERT_EQ(10, segments_json.size()); } @@ -1049,7 +1050,7 @@ TEST_F(WebControllerTest, GET_SEGMENT_INFO) { std::string json_str = response->readBodyToString()->c_str(); auto result_json = nlohmann::json::parse(json_str); - auto segment0_json = result_json["partitions"][0]["segments"][0]; + auto segment0_json = result_json["segments"][0]; std::string segment_name = segment0_json["name"]; // get segment ids @@ -1104,15 +1105,15 @@ TEST_F(WebControllerTest, SEGMENT_FILTER) { std::string json_str = response->readBodyToString()->c_str(); auto result_json = nlohmann::json::parse(json_str); - ASSERT_TRUE(result_json.contains("row_count")); + ASSERT_TRUE(result_json.contains("count")); - ASSERT_TRUE(result_json.contains("partitions")); - auto partitions_json = result_json["partitions"]; - ASSERT_TRUE(partitions_json.is_array()); - for (auto& part : partitions_json) { - ASSERT_TRUE(part.contains("tag")); + ASSERT_TRUE(result_json.contains("segments")); + auto segments_json = result_json["segments"]; + ASSERT_TRUE(segments_json.is_array()); + for (auto& part : segments_json) { + ASSERT_TRUE(part.contains("partition_tag")); } - ASSERT_EQ("_default", partitions_json[0]["tag"].get()); + ASSERT_EQ("_default", segments_json[0]["partition_tag"].get()); } TEST_F(WebControllerTest, SEARCH) { diff --git a/docker/build_env/cpu/centos7/Dockerfile b/docker/build_env/cpu/centos7/Dockerfile index 153946af51..92542ffcf5 100644 --- a/docker/build_env/cpu/centos7/Dockerfile +++ b/docker/build_env/cpu/centos7/Dockerfile @@ -15,17 +15,25 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] RUN yum install -y epel-release centos-release-scl-rh && yum install -y wget curl which && \ wget -qO- "https://cmake.org/files/v3.14/cmake-3.14.3-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local && \ - yum install -y ccache make automake git python3-pip libcurl-devel python3-devel boost-static mysql-devel \ + yum install -y make automake git python3-pip libcurl-devel python3-devel boost-static mysql-devel \ devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran llvm-toolset-7.0-clang llvm-toolset-7.0-clang-tools-extra \ - mysql lcov openblas-devel lapack-devel \ - && \ - rm -rf /var/cache/yum/* - -RUN echo "source scl_source enable devtoolset-7" >> /etc/profile.d/devtoolset-7.sh -RUN echo "source scl_source enable llvm-toolset-7.0" >> /etc/profile.d/llvm-toolset-7.sh + mysql lcov && \ + rm -rf /var/cache/yum/* && \ + echo "source scl_source enable devtoolset-7" >> /etc/profile.d/devtoolset-7.sh && \ + echo "source scl_source enable llvm-toolset-7.0" >> /etc/profile.d/llvm-toolset-7.sh ENV CLANG_TOOLS_PATH="/opt/rh/llvm-toolset-7.0/root/usr/bin" +RUN source /etc/profile.d/devtoolset-7.sh && \ + wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN yum install -y ccache && \ + rm -rf /var/cache/yum/* + COPY docker-entrypoint.sh /app/docker-entrypoint.sh WORKDIR /root diff --git a/docker/build_env/cpu/ubuntu16.04/Dockerfile b/docker/build_env/cpu/ubuntu16.04/Dockerfile index be428c2bf7..5da66be6cb 100644 --- a/docker/build_env/cpu/ubuntu16.04/Dockerfile +++ b/docker/build_env/cpu/ubuntu16.04/Dockerfile @@ -21,11 +21,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends wget ca-certifi sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && \ wget -qO- "https://cmake.org/files/v3.14/cmake-3.14.3-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local && \ apt-get update && apt-get install -y --no-install-recommends \ - g++ git gfortran lsb-core ccache \ + g++ git gfortran lsb-core \ libboost-serialization-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev \ curl libtool automake libssl-dev pkg-config libcurl4-openssl-dev python3-pip \ clang-format-6.0 clang-tidy-6.0 \ - lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 libopenblas-dev liblapack3 && \ + lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 && \ apt-get remove --purge -y && \ rm -rf /var/lib/apt/lists/* @@ -34,6 +34,16 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libmysqlclient.so \ RUN sh -c 'echo export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2019.5.281/linux/mkl/lib/intel64:\$LD_LIBRARY_PATH > /etc/profile.d/mkl.sh' +RUN wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN apt-get update && apt-get install -y --no-install-recommends ccache && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + COPY docker-entrypoint.sh /app/docker-entrypoint.sh WORKDIR /root diff --git a/docker/build_env/cpu/ubuntu18.04/Dockerfile b/docker/build_env/cpu/ubuntu18.04/Dockerfile index 79ddbe4367..81ac022bd8 100644 --- a/docker/build_env/cpu/ubuntu18.04/Dockerfile +++ b/docker/build_env/cpu/ubuntu18.04/Dockerfile @@ -21,11 +21,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends wget ca-certifi sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && \ wget -qO- "https://cmake.org/files/v3.14/cmake-3.14.3-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local && \ apt-get update && apt-get install -y --no-install-recommends \ - g++ git gfortran lsb-core ccache \ + g++ git gfortran lsb-core \ libboost-serialization-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev \ curl libtool automake libssl-dev pkg-config libcurl4-openssl-dev python3-pip \ clang-format-6.0 clang-tidy-6.0 \ - lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 libopenblas-dev liblapack3 && \ + lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 && \ apt-get remove --purge -y && \ rm -rf /var/lib/apt/lists/* @@ -34,6 +34,16 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libmysqlclient.so \ RUN sh -c 'echo export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2019.5.281/linux/mkl/lib/intel64:\$LD_LIBRARY_PATH > /etc/profile.d/mkl.sh' +RUN wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN apt-get update && apt-get install -y --no-install-recommends ccache && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + COPY docker-entrypoint.sh /app/docker-entrypoint.sh WORKDIR /root diff --git a/docker/build_env/gpu/centos7/Dockerfile b/docker/build_env/gpu/centos7/Dockerfile index ae7bdd83dc..ebccc6750a 100644 --- a/docker/build_env/gpu/centos7/Dockerfile +++ b/docker/build_env/gpu/centos7/Dockerfile @@ -17,17 +17,25 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] RUN yum install -y epel-release centos-release-scl-rh && yum install -y wget curl which && \ wget -qO- "https://cmake.org/files/v3.14/cmake-3.14.3-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local && \ - yum install -y ccache make automake git python3-pip libcurl-devel python3-devel boost-static mysql-devel \ + yum install -y make automake git python3-pip libcurl-devel python3-devel boost-static mysql-devel \ devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran llvm-toolset-7.0-clang llvm-toolset-7.0-clang-tools-extra \ - mysql lcov openblas-devel lapack-devel \ - && \ - rm -rf /var/cache/yum/* - -RUN echo "source scl_source enable devtoolset-7" >> /etc/profile.d/devtoolset-7.sh -RUN echo "source scl_source enable llvm-toolset-7.0" >> /etc/profile.d/llvm-toolset-7.sh + mysql lcov && \ + rm -rf /var/cache/yum/* && \ + echo "source scl_source enable devtoolset-7" >> /etc/profile.d/devtoolset-7.sh && \ + echo "source scl_source enable llvm-toolset-7.0" >> /etc/profile.d/llvm-toolset-7.sh ENV CLANG_TOOLS_PATH="/opt/rh/llvm-toolset-7.0/root/usr/bin" +RUN source /etc/profile.d/devtoolset-7.sh && \ + wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN yum install -y ccache && \ + rm -rf /var/cache/yum/* + COPY docker-entrypoint.sh /app/docker-entrypoint.sh WORKDIR /root diff --git a/docker/build_env/gpu/ubuntu16.04/Dockerfile b/docker/build_env/gpu/ubuntu16.04/Dockerfile index ea229e3f82..2233c4b1fc 100644 --- a/docker/build_env/gpu/ubuntu16.04/Dockerfile +++ b/docker/build_env/gpu/ubuntu16.04/Dockerfile @@ -21,10 +21,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends wget && \ apt-key add /tmp/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && \ apt-get update && apt-get install -y --no-install-recommends \ - git flex bison gfortran lsb-core ccache \ + git flex bison gfortran lsb-core \ curl libtool automake libboost1.58-all-dev libssl-dev pkg-config libcurl4-openssl-dev python3-pip \ clang-format-6.0 clang-tidy-6.0 \ - lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 libopenblas-dev liblapack3 && \ + lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 && \ apt-get remove --purge -y && \ rm -rf /var/lib/apt/lists/* @@ -32,6 +32,16 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libmysqlclient.so /usr/lib/x86_64-linux-gnu/ RUN sh -c 'echo export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2019.5.281/linux/mkl/lib/intel64:\$LD_LIBRARY_PATH > /etc/profile.d/mkl.sh' +RUN wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN apt-get update && apt-get install -y --no-install-recommends ccache && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + COPY docker-entrypoint.sh /app/docker-entrypoint.sh WORKDIR /root diff --git a/docker/build_env/gpu/ubuntu18.04/Dockerfile b/docker/build_env/gpu/ubuntu18.04/Dockerfile index 0cbaecde7f..847a97f2ee 100644 --- a/docker/build_env/gpu/ubuntu18.04/Dockerfile +++ b/docker/build_env/gpu/ubuntu18.04/Dockerfile @@ -21,10 +21,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends wget && \ apt-key add /tmp/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && \ apt-get update && apt-get install -y --no-install-recommends \ - git flex bison gfortran lsb-core ccache \ + git flex bison gfortran lsb-core \ curl libtool automake libboost-all-dev libssl-dev pkg-config libcurl4-openssl-dev python3-pip \ clang-format-6.0 clang-tidy-6.0 \ - lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 libopenblas-dev liblapack3 && \ + lcov mysql-client libmysqlclient-dev intel-mkl-gnu-2019.5-281 intel-mkl-core-2019.5-281 && \ apt-get remove --purge -y && \ rm -rf /var/lib/apt/lists/* @@ -32,6 +32,16 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libmysqlclient.so /usr/lib/x86_64-linux-gnu/ RUN sh -c 'echo export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2019.5.281/linux/mkl/lib/intel64:\$LD_LIBRARY_PATH > /etc/profile.d/mkl.sh' +RUN wget https://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz && \ + tar zxvf v0.3.9.tar.gz && cd OpenBLAS-0.3.9 && \ + make TARGET=CORE2 DYNAMIC_ARCH=1 DYNAMIC_OLDER=1 USE_THREAD=0 USE_OPENMP=0 FC=gfortran CC=gcc COMMON_OPT="-O3 -g -fPIC" FCOMMON_OPT="-O3 -g -fPIC -frecursive" NMAX="NUM_THREADS=128" LIBPREFIX="libopenblas" LAPACKE="NO_LAPACKE=1" INTERFACE64=0 NO_STATIC=1 && \ + make PREFIX=/usr install && \ + cd .. && rm -rf OpenBLAS-0.3.9 && rm v0.3.9.tar.gz + +RUN apt-get update && apt-get install -y --no-install-recommends ccache && \ + apt-get remove --purge -y && \ + rm -rf /var/lib/apt/lists/* + COPY docker-entrypoint.sh /app/docker-entrypoint.sh WORKDIR /root diff --git a/sdk/examples/qps/src/ClientTest.cpp b/sdk/examples/qps/src/ClientTest.cpp index 7f6700a7b9..65420b9f2f 100644 --- a/sdk/examples/qps/src/ClientTest.cpp +++ b/sdk/examples/qps/src/ClientTest.cpp @@ -174,7 +174,7 @@ ClientTest::InsertEntities(std::shared_ptr& conn) { milvus_sdk::TimeRecorder rc(title); milvus::Status stat = conn->Insert(parameters_.collection_name_, "", entity_array, record_ids); if (!stat.ok()) { - std::cout << "CreateIndex function call status: " << stat.message() << std::endl; + std::cout << "Insert function call status: " << stat.message() << std::endl; } // std::cout << "InsertEntities function call status: " << stat.message() << std::endl; // std::cout << "Returned id array count: " << record_ids.size() << std::endl; diff --git a/sdk/grpc/ClientProxy.cpp b/sdk/grpc/ClientProxy.cpp index 8d449929e1..b69abbf431 100644 --- a/sdk/grpc/ClientProxy.cpp +++ b/sdk/grpc/ClientProxy.cpp @@ -29,13 +29,10 @@ UriCheck(const std::string& uri) { return (index != std::string::npos); } -template +template void -ConstructSearchParam(const std::string& collection_name, - const std::vector& partition_tag_array, - int64_t topk, - const std::string& extra_params, - T& search_param) { +ConstructSearchParam(const std::string& collection_name, const std::vector& partition_tag_array, + int64_t topk, const std::string& extra_params, T& search_param) { search_param.set_collection_name(collection_name); search_param.set_topk(topk); milvus::grpc::KeyValuePair* kv = search_param.add_extra_params(); @@ -65,12 +62,22 @@ ConstructTopkResult(const ::milvus::grpc::TopKQueryResult& grpc_result, TopKQuer topk_query_result.reserve(grpc_result.row_num()); int64_t nq = grpc_result.row_num(); int64_t topk = grpc_result.ids().size() / nq; - for (int64_t i = 0; i < grpc_result.row_num(); i++) { + for (int64_t i = 0; i < nq; i++) { milvus::QueryResult one_result; one_result.ids.resize(topk); one_result.distances.resize(topk); memcpy(one_result.ids.data(), grpc_result.ids().data() + topk * i, topk * sizeof(int64_t)); memcpy(one_result.distances.data(), grpc_result.distances().data() + topk * i, topk * sizeof(float)); + + int valid_size = one_result.ids.size(); + while (valid_size > 0 && one_result.ids[valid_size - 1] == -1) { + valid_size--; + } + if (valid_size != topk) { + one_result.ids.resize(valid_size); + one_result.distances.resize(valid_size); + } + topk_query_result.emplace_back(one_result); } } @@ -286,8 +293,7 @@ ClientProxy::GetEntityByID(const std::string& collection_name, int64_t entity_id } Status -ClientProxy::GetEntitiesByID(const std::string& collection_name, - const std::vector& id_array, +ClientProxy::GetEntitiesByID(const std::string& collection_name, const std::vector& id_array, std::vector& entities_data) { try { entities_data.clear(); @@ -358,11 +364,7 @@ ClientProxy::Search(const std::string& collection_name, const std::vector& id_array, int64_t topk, - const std::string& extra_params, TopKQueryResult& topk_query_result) { + const std::vector& id_array, int64_t topk, const std::string& extra_params, + TopKQueryResult& topk_query_result) { try { // step 1: convert vectors data ::milvus::grpc::SearchByIDParam search_param; - ConstructSearchParam(collection_name, - partition_tag_array, - topk, - extra_params, - search_param); + ConstructSearchParam(collection_name, partition_tag_array, topk, extra_params, search_param); for (auto& id : id_array) { search_param.add_id_array(id); @@ -664,9 +662,7 @@ CopyVectorField(::milvus::grpc::RowRecord* target, const Entity& src) { } Status -ClientProxy::InsertEntity(const std::string& collection_name, - const std::string& partition_tag, - HEntity& entities, +ClientProxy::InsertEntity(const std::string& collection_name, const std::string& partition_tag, HEntity& entities, std::vector& id_array) { Status status; try { @@ -774,10 +770,8 @@ WriteQueryToProto(::milvus::grpc::GeneralQuery* general_query, BooleanQueryPtr b } Status -ClientProxy::HybridSearch(const std::string& collection_name, - const std::vector& partition_list, - BooleanQueryPtr& boolean_query, - const std::string& extra_params, +ClientProxy::HybridSearch(const std::string& collection_name, const std::vector& partition_list, + BooleanQueryPtr& boolean_query, const std::string& extra_params, TopKQueryResult& topk_query_result) { try { // convert boolean_query to proto diff --git a/shards/mishards/connections.py b/shards/mishards/connections.py index ab8c78e302..e94e99f578 100644 --- a/shards/mishards/connections.py +++ b/shards/mishards/connections.py @@ -5,7 +5,7 @@ import threading from functools import wraps from collections import defaultdict from milvus import Milvus -from milvus.client.hooks import BaseSearchHook +# from milvus.client.hooks import BaseSearchHook from mishards import (settings, exceptions, topology) from utils import singleton @@ -13,216 +13,216 @@ from utils import singleton logger = logging.getLogger(__name__) -class Searchook(BaseSearchHook): - - def on_response(self, *args, **kwargs): - return True - - -class Connection: - def __init__(self, name, uri, max_retry=1, error_handlers=None, **kwargs): - self.name = name - self.uri = uri - self.max_retry = max_retry - self.retried = 0 - self.conn = Milvus() - self.error_handlers = [] if not error_handlers else error_handlers - self.on_retry_func = kwargs.get('on_retry_func', None) - - # define search hook - self.conn.set_hook(search_in_file=Searchook()) - # self._connect() - - def __str__(self): - return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) - - def _connect(self, metadata=None): - try: - self.conn.connect(uri=self.uri) - except Exception as e: - if not self.error_handlers: - raise exceptions.ConnectionConnectError(message=str(e), metadata=metadata) - for handler in self.error_handlers: - handler(e, metadata=metadata) - - @property - def can_retry(self): - return self.retried < self.max_retry - - @property - def connected(self): - return self.conn.connected() - - def on_retry(self): - if self.on_retry_func: - self.on_retry_func(self) - else: - self.retried > 1 and logger.warning('{} is retrying {}'.format(self, self.retried)) - - def on_connect(self, metadata=None): - while not self.connected and self.can_retry: - self.retried += 1 - self.on_retry() - self._connect(metadata=metadata) - - if not self.can_retry and not self.connected: - raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry, - metadata=metadata)) - - self.retried = 0 - - def connect(self, func, exception_handler=None): - @wraps(func) - def inner(*args, **kwargs): - self.on_connect() - try: - return func(*args, **kwargs) - except Exception as e: - if exception_handler: - exception_handler(e) - else: - raise e - return inner - - def __str__(self): - return ''.format(self.name, id(self)) - - def __repr__(self): - return self.__str__() - - -class Duration: - def __init__(self): - self.start_ts = time.time() - self.end_ts = None - - def stop(self): - if self.end_ts: - return False - - self.end_ts = time.time() - return True - - @property - def value(self): - if not self.end_ts: - return None - - return self.end_ts - self.start_ts - - -class ProxyMixin: - def __getattr__(self, name): - target = self.__dict__.get(name, None) - if target or not self.connection: - return target - return getattr(self.connection, name) - - -class ScopedConnection(ProxyMixin): - def __init__(self, pool, connection): - self.pool = pool - self.connection = connection - self.duration = Duration() - - def __del__(self): - self.release() - - def __str__(self): - return self.connection.__str__() - - def release(self): - if not self.pool or not self.connection: - return - self.pool.release(self.connection) - self.duration.stop() - self.pool.record_duration(self.connection, self.duration) - self.pool = None - self.connection = None - - -class ConnectionPool(topology.TopoObject): - def __init__(self, name, uri, max_retry=1, capacity=-1, **kwargs): - super().__init__(name) - self.capacity = capacity - self.pending_pool = set() - self.active_pool = set() - self.connection_ownership = {} - self.uri = uri - self.max_retry = max_retry - self.kwargs = kwargs - self.cv = threading.Condition() - self.durations = defaultdict(list) - - def record_duration(self, conn, duration): - if len(self.durations[conn]) >= 10000: - self.durations[conn].pop(0) - - self.durations[conn].append(duration) - - def stats(self): - out = {'connections': {}} - connections = out['connections'] - take_time = [] - for conn, durations in self.durations.items(): - total_time = sum(d.value for d in durations) - connections[id(conn)] = { - 'total_time': total_time, - 'called_times': len(durations) - } - take_time.append(total_time) - - out['max-time'] = max(take_time) - out['num'] = len(self.durations) - logger.debug(json.dumps(out, indent=2)) - return out - - def __len__(self): - return len(self.pending_pool) + len(self.active_pool) - - @property - def active_num(self): - return len(self.active_pool) - - def _is_full(self): - if self.capacity < 0: - return False - return len(self) >= self.capacity - - def fetch(self, timeout=1): - with self.cv: - timeout_times = 0 - while (len(self.pending_pool) == 0 and self._is_full() and timeout_times < 1): - self.cv.notifyAll() - self.cv.wait(timeout) - timeout_times += 1 - - connection = None - if timeout_times >= 1: - return connection - - # logger.error('[Connection] Pool \"{}\" SIZE={} ACTIVE={}'.format(self.name, len(self), self.active_num)) - if len(self.pending_pool) == 0: - connection = self.create() - else: - connection = self.pending_pool.pop() - # logger.debug('[Connection] Registerring \"{}\" into pool \"{}\"'.format(connection, self.name)) - self.active_pool.add(connection) - scoped_connection = ScopedConnection(self, connection) - return scoped_connection - - def release(self, connection): - with self.cv: - if connection not in self.active_pool: - raise RuntimeError('\"{}\" not found in pool \"{}\"'.format(connection, self.name)) - # logger.debug('[Connection] Releasing \"{}\" from pool \"{}\"'.format(connection, self.name)) - # logger.debug('[Connection] Pool \"{}\" SIZE={} ACTIVE={}'.format(self.name, len(self), self.active_num)) - self.active_pool.remove(connection) - self.pending_pool.add(connection) - - def create(self): - connection = Connection(name=self.name, uri=self.uri, max_retry=self.max_retry, **self.kwargs) - return connection +# class Searchook(BaseSearchHook): +# +# def on_response(self, *args, **kwargs): +# return True +# +# +# class Connection: +# def __init__(self, name, uri, max_retry=1, error_handlers=None, **kwargs): +# self.name = name +# self.uri = uri +# self.max_retry = max_retry +# self.retried = 0 +# self.conn = Milvus() +# self.error_handlers = [] if not error_handlers else error_handlers +# self.on_retry_func = kwargs.get('on_retry_func', None) +# +# # define search hook +# self.conn.set_hook(search_in_file=Searchook()) +# # self._connect() +# +# def __str__(self): +# return 'Connection:name=\"{}\";uri=\"{}\"'.format(self.name, self.uri) +# +# def _connect(self, metadata=None): +# try: +# self.conn.connect(uri=self.uri) +# except Exception as e: +# if not self.error_handlers: +# raise exceptions.ConnectionConnectError(message=str(e), metadata=metadata) +# for handler in self.error_handlers: +# handler(e, metadata=metadata) +# +# @property +# def can_retry(self): +# return self.retried < self.max_retry +# +# @property +# def connected(self): +# return self.conn.connected() +# +# def on_retry(self): +# if self.on_retry_func: +# self.on_retry_func(self) +# else: +# self.retried > 1 and logger.warning('{} is retrying {}'.format(self, self.retried)) +# +# def on_connect(self, metadata=None): +# while not self.connected and self.can_retry: +# self.retried += 1 +# self.on_retry() +# self._connect(metadata=metadata) +# +# if not self.can_retry and not self.connected: +# raise exceptions.ConnectionConnectError(message='Max retry {} reached!'.format(self.max_retry, +# metadata=metadata)) +# +# self.retried = 0 +# +# def connect(self, func, exception_handler=None): +# @wraps(func) +# def inner(*args, **kwargs): +# self.on_connect() +# try: +# return func(*args, **kwargs) +# except Exception as e: +# if exception_handler: +# exception_handler(e) +# else: +# raise e +# return inner +# +# def __str__(self): +# return ''.format(self.name, id(self)) +# +# def __repr__(self): +# return self.__str__() +# +# +# class Duration: +# def __init__(self): +# self.start_ts = time.time() +# self.end_ts = None +# +# def stop(self): +# if self.end_ts: +# return False +# +# self.end_ts = time.time() +# return True +# +# @property +# def value(self): +# if not self.end_ts: +# return None +# +# return self.end_ts - self.start_ts +# +# +# class ProxyMixin: +# def __getattr__(self, name): +# target = self.__dict__.get(name, None) +# if target or not self.connection: +# return target +# return getattr(self.connection, name) +# +# +# class ScopedConnection(ProxyMixin): +# def __init__(self, pool, connection): +# self.pool = pool +# self.connection = connection +# self.duration = Duration() +# +# def __del__(self): +# self.release() +# +# def __str__(self): +# return self.connection.__str__() +# +# def release(self): +# if not self.pool or not self.connection: +# return +# self.pool.release(self.connection) +# self.duration.stop() +# self.pool.record_duration(self.connection, self.duration) +# self.pool = None +# self.connection = None +# +# +# class ConnectionPool(topology.TopoObject): +# def __init__(self, name, uri, max_retry=1, capacity=-1, **kwargs): +# super().__init__(name) +# self.capacity = capacity +# self.pending_pool = set() +# self.active_pool = set() +# self.connection_ownership = {} +# self.uri = uri +# self.max_retry = max_retry +# self.kwargs = kwargs +# self.cv = threading.Condition() +# self.durations = defaultdict(list) +# +# def record_duration(self, conn, duration): +# if len(self.durations[conn]) >= 10000: +# self.durations[conn].pop(0) +# +# self.durations[conn].append(duration) +# +# def stats(self): +# out = {'connections': {}} +# connections = out['connections'] +# take_time = [] +# for conn, durations in self.durations.items(): +# total_time = sum(d.value for d in durations) +# connections[id(conn)] = { +# 'total_time': total_time, +# 'called_times': len(durations) +# } +# take_time.append(total_time) +# +# out['max-time'] = max(take_time) +# out['num'] = len(self.durations) +# logger.debug(json.dumps(out, indent=2)) +# return out +# +# def __len__(self): +# return len(self.pending_pool) + len(self.active_pool) +# +# @property +# def active_num(self): +# return len(self.active_pool) +# +# def _is_full(self): +# if self.capacity < 0: +# return False +# return len(self) >= self.capacity +# +# def fetch(self, timeout=1): +# with self.cv: +# timeout_times = 0 +# while (len(self.pending_pool) == 0 and self._is_full() and timeout_times < 1): +# self.cv.notifyAll() +# self.cv.wait(timeout) +# timeout_times += 1 +# +# connection = None +# if timeout_times >= 1: +# return connection +# +# # logger.error('[Connection] Pool \"{}\" SIZE={} ACTIVE={}'.format(self.name, len(self), self.active_num)) +# if len(self.pending_pool) == 0: +# connection = self.create() +# else: +# connection = self.pending_pool.pop() +# # logger.debug('[Connection] Registerring \"{}\" into pool \"{}\"'.format(connection, self.name)) +# self.active_pool.add(connection) +# scoped_connection = ScopedConnection(self, connection) +# return scoped_connection +# +# def release(self, connection): +# with self.cv: +# if connection not in self.active_pool: +# raise RuntimeError('\"{}\" not found in pool \"{}\"'.format(connection, self.name)) +# # logger.debug('[Connection] Releasing \"{}\" from pool \"{}\"'.format(connection, self.name)) +# # logger.debug('[Connection] Pool \"{}\" SIZE={} ACTIVE={}'.format(self.name, len(self), self.active_num)) +# self.active_pool.remove(connection) +# self.pending_pool.add(connection) +# +# def create(self): +# connection = Connection(name=self.name, uri=self.uri, max_retry=self.max_retry, **self.kwargs) +# return connection class ConnectionGroup(topology.TopoGroup): @@ -237,9 +237,9 @@ class ConnectionGroup(topology.TopoGroup): return out def on_pre_add(self, topo_object): - conn = topo_object.fetch() - conn.on_connect(metadata=None) - status, version = conn.conn.server_version() + # conn = topo_object.fetch() + # conn.on_connect(metadata=None) + status, version = topo_object.server_version() if not status.OK(): logger.error('Cannot connect to newly added address: {}. Remove it now'.format(topo_object.name)) return False @@ -254,7 +254,7 @@ class ConnectionGroup(topology.TopoGroup): uri = kwargs.get('uri', None) if not uri: raise RuntimeError('\"uri\" is required to create connection pool') - pool = ConnectionPool(name=name, **kwargs) + pool = Milvus(name=name, **kwargs) status = self.add(pool) if status != topology.StatusType.OK: pool = None diff --git a/shards/mishards/grpc_utils/grpc_args_parser.py b/shards/mishards/grpc_utils/grpc_args_parser.py index 67ca043b7f..7a5e36c464 100644 --- a/shards/mishards/grpc_utils/grpc_args_parser.py +++ b/shards/mishards/grpc_utils/grpc_args_parser.py @@ -116,9 +116,9 @@ class GrpcArgsParser(object): @error_status def parse_proto_VectorIdentity(cls, param): _collection_name = param.collection_name - _id = param.id + _ids = list(param.id_array) - return _collection_name, _id + return _collection_name, _ids @classmethod @error_status diff --git a/shards/mishards/router/__init__.py b/shards/mishards/router/__init__.py index 033aa3f5b1..3f064cb3d6 100644 --- a/shards/mishards/router/__init__.py +++ b/shards/mishards/router/__init__.py @@ -10,11 +10,13 @@ class RouterMixin: raise NotImplemented() def connection(self, metadata=None): - conn = self.writable_topo.get_group('default').get('WOSERVER').fetch() - if conn: - conn.on_connect(metadata=metadata) + # conn = self.writable_topo.get_group('default').get('WOSERVER').fetch() + conn = self.writable_topo.get_group('default').get('WOSERVER') + # if conn: + # conn.on_connect(metadata=metadata) # PXU TODO: should return conn - return conn.conn + return conn + # return conn.conn def query_conn(self, name, metadata=None): if not name: @@ -27,9 +29,15 @@ class RouterMixin: raise exceptions.ConnectionNotFoundError( message=f'Conn Group {name} is Empty. Please Check your configurations', metadata=metadata) - conn = group.get(name).fetch() - if not conn: - raise exceptions.ConnectionNotFoundError( - message=f'Conn {name} Not Found', metadata=metadata) - conn.on_connect(metadata=metadata) + # conn = group.get(name).fetch() + # if not conn: + # raise exceptions.ConnectionNotFoundError( + # message=f'Conn {name} Not Found', metadata=metadata) + # conn.on_connect(metadata=metadata) + + # conn = self.readonly_topo.get_group(name).get(name).fetch() + conn = self.readonly_topo.get_group(name).get(name) + # if not conn: + # raise exceptions.ConnectionNotFoundError(name, metadata=metadata) + # conn.on_connect(metadata=metadata) return conn diff --git a/shards/mishards/router/plugins/file_based_hash_ring_router.py b/shards/mishards/router/plugins/file_based_hash_ring_router.py index d4c66cce64..8e691075bd 100644 --- a/shards/mishards/router/plugins/file_based_hash_ring_router.py +++ b/shards/mishards/router/plugins/file_based_hash_ring_router.py @@ -1,4 +1,5 @@ import logging +import re from sqlalchemy import exc as sqlalchemy_exc from sqlalchemy import and_, or_ from mishards.models import Tables, TableFiles @@ -31,8 +32,8 @@ class Factory(RouterMixin): else: # TODO: collection default partition is '_default' cond = and_(Tables.state != Tables.TO_DELETE, - Tables.owner_table == collection_name, - Tables.partition_tag.in_(partition_tags)) + Tables.owner_table == collection_name) + # Tables.partition_tag.in_(partition_tags)) if '_default' in partition_tags: default_par_cond = and_(Tables.table_id == collection_name, Tables.state != Tables.TO_DELETE) cond = or_(cond, default_par_cond) @@ -45,7 +46,19 @@ class Factory(RouterMixin): logger.error("Cannot find collection {} / {} in metadata".format(collection_name, partition_tags)) raise exceptions.CollectionNotFoundError('{}:{}'.format(collection_name, partition_tags), metadata=metadata) - collection_list = [str(collection.table_id) for collection in collections] + collection_list = [] + if not partition_tags: + collection_list = [str(collection.table_id) for collection in collections] + else: + for collection in collections: + if collection.table_id == collection_name: + collection_list.append(collection_name) + continue + + for tag in partition_tags: + if re.match(tag, collection.partition_tag): + collection_list.append(collection.table_id) + break file_type_cond = or_( TableFiles.file_type == TableFiles.FILE_TYPE_RAW, diff --git a/shards/mishards/service_handler.py b/shards/mishards/service_handler.py index a0661d8439..51f496d160 100644 --- a/shards/mishards/service_handler.py +++ b/shards/mishards/service_handler.py @@ -122,48 +122,36 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): metadata = kwargs.get('metadata', None) - rs = [] all_topk_results = [] - def search(addr, collection_id, file_ids, vectors, topk, params, **kwargs): - logger.info( - 'Send Search Request: addr={};collection_id={};ids={};nq={};topk={};params={}' - .format(addr, collection_id, file_ids, len(vectors), topk, params)) - - conn = self.router.query_conn(addr, metadata=metadata) - start = time.time() - span = kwargs.get('span', None) - span = span if span else (None if self.tracer.empty else - context.get_active_span().context) - - with self.tracer.start_span('search_{}'.format(addr), - child_of=span): - ret = conn.conn.search_vectors_in_files(collection_name=collection_id, - file_ids=file_ids, - query_records=vectors, - top_k=topk, - params=params) - if ret.status.error_code != 0: - logger.error("Search fail {}".format(ret.status)) - - end = time.time() - all_topk_results.append(ret) - with self.tracer.start_span('do_search', child_of=p_span) as span: - with ThreadPoolExecutor(max_workers=self.max_workers) as pool: + if len(routing) == 0: + logger.warning('SearchVector: partition_tags = {}'.format(partition_tags)) + ft = self.router.connection().search(collection_id, topk, vectors, list(partition_tags), search_params, _async=True) + ret = ft.result(raw=True) + all_topk_results.append(ret) + else: + futures = [] for addr, file_ids in routing.items(): - res = pool.submit(search, - addr, - collection_id, - file_ids, - vectors, - topk, - search_params, - span=span) - rs.append(res) + conn = self.router.query_conn(addr, metadata=metadata) + start = time.time() + span = kwargs.get('span', None) + span = span if span else (None if self.tracer.empty else + context.get_active_span().context) - for res in rs: - res.result() + with self.tracer.start_span('search_{}'.format(addr), + child_of=span): + logger.warning("Search file ids is {}".format(file_ids)) + future = conn.search_vectors_in_files(collection_name=collection_id, + file_ids=file_ids, + query_records=vectors, + top_k=topk, + params=search_params, _async=True) + futures.append(future) + + for f in futures: + ret = f.result(raw=True) + all_topk_results.append(ret) reverse = collection_meta.metric_type == Types.MetricType.IP with self.tracer.start_span('do_merge', child_of=p_span): @@ -231,6 +219,13 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): return status_pb2.Status(error_code=_status.code, reason=_status.message) + @mark_grpc_method + def HasPartition(self, request, context): + _collection_name, _tag = Parser.parse_proto_PartitionParam(request) + _status, _ok = self.router.connection().has_partition(_collection_name, _tag) + return milvus_pb2.BoolReply(status_pb2.Status(error_code=_status.code, + reason=_status.message), bool_reply=_ok) + @mark_grpc_method def ShowPartitions(self, request, context): _status, _collection_name = Parser.parse_proto_CollectionName(request) @@ -370,6 +365,72 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): def SearchInFiles(self, request, context): raise NotImplemented() + @mark_grpc_method + def SearchByID(self, request, context): + metadata = {'resp_class': milvus_pb2.TopKQueryResult} + + collection_name = request.collection_name + + topk = request.topk + + if len(request.extra_params) == 0: + raise exceptions.SearchParamError(message="Search param loss", metadata=metadata) + params = ujson.loads(str(request.extra_params[0].value)) + + logger.info('Search {}: topk={} params={}'.format( + collection_name, topk, params)) + + if topk > self.MAX_TOPK or topk <= 0: + raise exceptions.InvalidTopKError( + message='Invalid topk: {}'.format(topk), metadata=metadata) + + collection_meta = self.collection_meta.get(collection_name, None) + + if not collection_meta: + status, info = self.router.connection( + metadata=metadata).describe_collection(collection_name) + if not status.OK(): + raise exceptions.CollectionNotFoundError(collection_name, + metadata=metadata) + + self.collection_meta[collection_name] = info + collection_meta = info + + start = time.time() + + query_record_array = [] + if int(collection_meta.metric_type) >= MetricType.HAMMING.value: + for query_record in request.query_record_array: + query_record_array.append(bytes(query_record.binary_data)) + else: + for query_record in request.query_record_array: + query_record_array.append(list(query_record.float_data)) + + partition_tags = getattr(request, "partition_tag_array", []) + ids = getattr(request, "id_array", []) + search_result = self.router.connection(metadata=metadata).search_by_ids(collection_name, ids, topk, partition_tags, params) + # status, id_results, dis_results = self._do_query(context, + # collection_name, + # collection_meta, + # query_record_array, + # topk, + # params, + # partition_tags=getattr(request, "partition_tag_array", []), + # metadata=metadata) + + now = time.time() + logger.info('SearchVector takes: {}'.format(now - start)) + return search_result + # + # topk_result_list = milvus_pb2.TopKQueryResult( + # status=status_pb2.Status(error_code=status.error_code, + # reason=status.reason), + # row_num=len(request.query_record_array) if len(id_results) else 0, + # ids=id_results, + # distances=dis_results) + # return topk_result_list + # raise NotImplemented() + def _describe_collection(self, collection_name, metadata=None): return self.router.connection(metadata=metadata).describe_collection(collection_name) @@ -416,32 +477,16 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): metadata = {'resp_class': milvus_pb2.CollectionInfo} - logger.info('ShowCollectionInfo {}'.format(_collection_name)) _status, _info = self._collection_info(metadata=metadata, collection_name=_collection_name) + _info_str = ujson.dumps(_info) if _status.OK(): - _collection_info = milvus_pb2.CollectionInfo( + return milvus_pb2.CollectionInfo( status=status_pb2.Status(error_code=_status.code, reason=_status.message), - total_row_count=_info.count + json_info=_info_str ) - for par_stat in _info.partitions_stat: - _par = milvus_pb2.PartitionStat( - tag=par_stat.tag, - total_row_count=par_stat.count - ) - for seg_stat in par_stat.segments_stat: - _par.segments_stat.add( - segment_name=seg_stat.segment_name, - row_count=seg_stat.count, - index_name=seg_stat.index_name, - data_size=seg_stat.data_size, - ) - - _collection_info.partitions_stat.append(_par) - return _collection_info - return milvus_pb2.CollectionInfo( status=status_pb2.Status(error_code=_status.code, reason=_status.message), @@ -564,35 +609,35 @@ class ServiceHandler(milvus_pb2_grpc.MilvusServiceServicer): grpc_index.extra_params.add(key='params', value=ujson.dumps(_index_param._params)) return grpc_index - def _get_vector_by_id(self, collection_name, vec_id, metadata): - return self.router.connection(metadata=metadata).get_vector_by_id(collection_name, vec_id) + def _get_vectors_by_id(self, collection_name, ids, metadata): + return self.router.connection(metadata=metadata).get_vectors_by_ids(collection_name, ids) @mark_grpc_method - def GetVectorByID(self, request, context): + def GetVectorsByID(self, request, context): _status, unpacks = Parser.parse_proto_VectorIdentity(request) if not _status.OK(): return status_pb2.Status(error_code=_status.code, reason=_status.message) - metadata = {'resp_class': milvus_pb2.VectorData} + metadata = {'resp_class': milvus_pb2.VectorsData} - _collection_name, _id = unpacks + _collection_name, _ids = unpacks logger.info('GetVectorByID {}'.format(_collection_name)) - _status, vector = self._get_vector_by_id(_collection_name, _id, metadata) + _status, vectors = self._get_vectors_by_id(_collection_name, _ids, metadata) + _rpc_status = status_pb2.Status(error_code=_status.code, reason=_status.message) + if not vectors: + return milvus_pb2.VectorsData(status=_rpc_status, ) - if not vector: - return milvus_pb2.VectorData(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), ) - - if isinstance(vector, bytes): - records = milvus_pb2.RowRecord(binary_data=vector) + if len(vectors) == 0: + return milvus_pb2.VectorsData(status=_rpc_status, vectors_data=[]) + if isinstance(vectors[0], bytes): + records = [milvus_pb2.RowRecord(binary_data=v) for v in vectors] else: - records = milvus_pb2.RowRecord(float_data=vector) + records = [milvus_pb2.RowRecord(float_data=v) for v in vectors] - return milvus_pb2.VectorData(status=status_pb2.Status( - error_code=_status.code, reason=_status.message), - vector_data=records - ) + response = milvus_pb2.VectorsData(status=_rpc_status) + response.vectors_data.extend(records) + return response def _get_vector_ids(self, collection_name, segment_name, metadata): return self.router.connection(metadata=metadata).get_vector_ids(collection_name, segment_name) diff --git a/shards/requirements.txt b/shards/requirements.txt index b4c1921c3f..e291e04969 100644 --- a/shards/requirements.txt +++ b/shards/requirements.txt @@ -14,8 +14,8 @@ py==1.8.0 pyasn1==0.4.7 pyasn1-modules==0.2.6 pylint==2.5.0 -pymilvus==0.2.10 -#pymilvus-test==0.3.3 +#pymilvus==0.2.10 +pymilvus-test==0.3.11 pyparsing==2.4.0 pytest==4.6.3 pytest-level==0.1.1 diff --git a/tests/milvus-java-test/src/main/java/com/TestSearchByIds.java b/tests/milvus-java-test/src/main/java/com/TestSearchByIds.java index 66767d54eb..0391268dd2 100644 --- a/tests/milvus-java-test/src/main/java/com/TestSearchByIds.java +++ b/tests/milvus-java-test/src/main/java/com/TestSearchByIds.java @@ -80,6 +80,8 @@ public class TestSearchByIds { List> res_search = client.searchByIds(searchParam).getQueryResultsList(); // reason: "Failed to query by id in collection L2_FmVKbqSZaN, result doesn\'t match id count" assert (!client.searchByIds(searchParam).getResponse().ok()); +// Assert.assertEquals(res_search.size(), default_ids.size()); +// Assert.assertEquals(res_search.get(0).get(0).getVectorId(), -1); } @Test(dataProvider = "Collection", dataProviderClass = MainClass.class) diff --git a/tests/milvus_python_test/test_add_vectors.py b/tests/milvus_python_test/test_add_vectors.py index 0e9407ff90..6307418d09 100644 --- a/tests/milvus_python_test/test_add_vectors.py +++ b/tests/milvus_python_test/test_add_vectors.py @@ -1,4 +1,5 @@ import time +import pdb import threading import logging import threading @@ -676,6 +677,121 @@ class TestAddBase: status, ids = connect.add_vectors(collection_name=collection_list[i], records=vectors) assert status.OK() +class TestAddAsync: + @pytest.fixture( + scope="function", + params=[ + 1, + 1000 + ], + ) + def insert_count(self, request): + yield request.param + + def check_status(self, status, result): + logging.getLogger().info("In callback check status") + assert status.OK() + + def check_status_not_ok(self, status, result): + logging.getLogger().info("In callback check status") + assert not status.OK() + + + def test_insert_async(self, connect, collection, insert_count): + ''' + target: test add vectors with different length of vectors + method: set different vectors as add method params + expected: length of ids is equal to the length of vectors + ''' + nb = insert_count + insert_vec_list = gen_vectors(nb, dim) + future = connect.add_vectors(collection, insert_vec_list, _async=True) + status, ids = future.result() + connect.flush([collection]) + assert len(ids) == nb + assert status.OK() + + @pytest.mark.level(2) + def test_insert_async_false(self, connect, collection, insert_count): + ''' + target: test add vectors with different length of vectors + method: set different vectors as add method params + expected: length of ids is equal to the length of vectors + ''' + nb = insert_count + insert_vec_list = gen_vectors(nb, dim) + status, ids = connect.add_vectors(collection, insert_vec_list, _async=False) + connect.flush([collection]) + assert len(ids) == nb + assert status.OK() + + def test_insert_async_callback(self, connect, collection, insert_count): + ''' + target: test add vectors with different length of vectors + method: set different vectors as add method params + expected: length of ids is equal to the length of vectors + ''' + nb = insert_count + insert_vec_list = gen_vectors(nb, dim) + future = connect.add_vectors(collection, insert_vec_list, _async=True, _callback=self.check_status) + future.done() + + @pytest.mark.level(2) + def test_insert_async_long(self, connect, collection): + ''' + target: test add vectors with different length of vectors + method: set different vectors as add method params + expected: length of ids is equal to the length of vectors + ''' + nb = 50000 + insert_vec_list = gen_vectors(nb, dim) + future = connect.add_vectors(collection, insert_vec_list, _async=True, _callback=self.check_status) + status, result = future.result() + assert status.OK() + assert len(result) == nb + connect.flush([collection]) + status, count = connect.count_collection(collection) + assert status.OK() + logging.getLogger().info(status) + logging.getLogger().info(count) + assert count == nb + + def test_insert_async_callback_timeout(self, connect, collection): + ''' + target: test add vectors with different length of vectors + method: set different vectors as add method params + expected: length of ids is equal to the length of vectors + ''' + nb = 100000 + insert_vec_list = gen_vectors(nb, dim) + future = connect.add_vectors(collection, insert_vec_list, _async=True, _callback=self.check_status, timeout=1) + future.done() + + def test_insert_async_invalid_params(self, connect, collection): + ''' + target: test add vectors with different length of vectors + method: set different vectors as add method params + expected: length of ids is equal to the length of vectors + ''' + insert_vec_list = gen_vectors(nb, dim) + collection_new = gen_unique_str() + future = connect.add_vectors(collection_new, insert_vec_list, _async=True) + status, result = future.result() + assert not status.OK() + + # TODO: add assertion + def test_insert_async_invalid_params_raise_exception(self, connect, collection): + ''' + target: test add vectors with different length of vectors + method: set different vectors as add method params + expected: length of ids is equal to the length of vectors + ''' + insert_vec_list = [] + collection_new = gen_unique_str() + with pytest.raises(Exception) as e: + future = connect.add_vectors(collection_new, insert_vec_list, _async=True) + + class TestAddIP: """ ****************************************************************** diff --git a/tests/milvus_python_test/test_connect.py b/tests/milvus_python_test/test_connect.py index a5ee634f6e..e380d94379 100644 --- a/tests/milvus_python_test/test_connect.py +++ b/tests/milvus_python_test/test_connect.py @@ -104,11 +104,11 @@ class TestConnect: ''' uri_value = "" if self.local_ip(args): - milvus = get_milvus(uri=uri_value, handler=args["handler"]) + milvus = get_milvus(None, None, uri=uri_value, handler=args["handler"]) # assert milvus.connected() else: with pytest.raises(Exception) as e: - milvus = get_milvus(uri=uri_value, handler=args["handler"]) + milvus = get_milvus(None, None, uri=uri_value, handler=args["handler"]) # assert not milvus.connected() # disable diff --git a/tests/milvus_python_test/test_flush.py b/tests/milvus_python_test/test_flush.py index e17ffac191..c32b9ad397 100644 --- a/tests/milvus_python_test/test_flush.py +++ b/tests/milvus_python_test/test_flush.py @@ -233,6 +233,44 @@ class TestFlushBase: assert res == 0 +class TestFlushAsync: + """ + ****************************************************************** + The following cases are used to test `flush` function + ****************************************************************** + """ + def check_status(self, status, result): + logging.getLogger().info("In callback check status") + assert status.OK() + + def test_flush_empty_collection(self, connect, collection): + ''' + method: flush collection with no vectors + expected: status ok + ''' + future = connect.flush([collection], _async=True) + status = future.result() + assert status.OK() + + def test_flush_async(self, connect, collection): + vectors = gen_vectors(nb, dim) + status, ids = connect.add_vectors(collection, vectors) + future = connect.flush([collection], _async=True) + status = future.result() + assert status.OK() + + def test_flush_async(self, connect, collection): + nb = 100000 + vectors = gen_vectors(nb, dim) + connect.add_vectors(collection, vectors) + logging.getLogger().info("before") + future = connect.flush([collection], _async=True, _callback=self.check_status) + logging.getLogger().info("after") + future.done() + status = future.result() + assert status.OK() + + class TestCollectionNameInvalid(object): """ Test adding vectors with invalid collection names diff --git a/tests/milvus_python_test/test_index.py b/tests/milvus_python_test/test_index.py index 832c89d8b5..3949f38a4d 100644 --- a/tests/milvus_python_test/test_index.py +++ b/tests/milvus_python_test/test_index.py @@ -1806,3 +1806,75 @@ class TestCreateIndexParamsInvalid(object): logging.getLogger().info(result) assert result._collection_name == collection assert result._index_type == IndexType.FLAT + +class TestIndexAsync: + """ + ****************************************************************** + The following cases are used to test `create_index` function + ****************************************************************** + """ + @pytest.fixture( + scope="function", + params=gen_index() + ) + def get_index(self, request, connect): + if str(connect._cmd("mode")[1]) == "CPU": + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in CPU mode") + if str(connect._cmd("mode")[1]) == "GPU": + if request.param["index_type"] == IndexType.IVF_PQ: + pytest.skip("ivfpq not support in GPU mode") + return request.param + + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_simple_index(self, request, connect): + if str(connect._cmd("mode")[1]) == "CPU": + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in CPU mode") + if str(connect._cmd("mode")[1]) == "GPU": + # if request.param["index_type"] == IndexType.IVF_PQ: + if request.param["index_type"] not in [IndexType.IVF_FLAT]: + # pytest.skip("ivfpq not support in GPU mode") + pytest.skip("debug ivf_flat in GPU mode") + return request.param + + def check_status(self, status): + logging.getLogger().info("In callback check status") + assert status.OK() + + """ + ****************************************************************** + The following cases are used to test `create_index` function + ****************************************************************** + """ + + @pytest.mark.timeout(BUILD_TIMEOUT) + def test_create_index(self, connect, collection, get_simple_index): + ''' + target: test create index interface + method: create collection and add vectors in it, create index + expected: return code equals to 0, and search success + ''' + index_param = get_simple_index["index_param"] + index_type = get_simple_index["index_type"] + logging.getLogger().info(get_simple_index) + vectors = gen_vectors(nb, dim) + status, ids = connect.add_vectors(collection, vectors) + logging.getLogger().info("start index") + # future = connect.create_index(collection, index_type, index_param, _async=True, _callback=self.check_status) + future = connect.create_index(collection, index_type, index_param, _async=True) + logging.getLogger().info("before result") + status = future.result() + assert status.OK() + + def test_create_index_with_invalid_collectionname(self, connect): + collection_name = " " + nlist = NLIST + index_param = {"nlist": nlist} + future = connect.create_index(collection_name, IndexType.IVF_SQ8, index_param, _async=True) + status = future.result() + assert not status.OK() + diff --git a/tests/milvus_python_test/test_partition.py b/tests/milvus_python_test/test_partition.py index 5814a3b711..95c2eb3831 100644 --- a/tests/milvus_python_test/test_partition.py +++ b/tests/milvus_python_test/test_partition.py @@ -228,6 +228,78 @@ class TestShowBase: assert status.OK() +class TestHasBase: + + """ + ****************************************************************** + The following cases are used to test `has_partition` function + ****************************************************************** + """ + @pytest.fixture( + scope="function", + params=gen_invalid_collection_names() + ) + def get_tag_name(self, request): + yield request.param + + def test_has_partition(self, connect, collection): + ''' + target: test has_partition, check status and result + method: create partition first, then call function: has_partition + expected: status ok, result true + ''' + status = connect.create_partition(collection, tag) + status, res = connect.has_partition(collection, tag) + assert status.OK() + logging.getLogger().info(res) + assert res + + def test_has_partition_multi_partitions(self, connect, collection): + ''' + target: test has_partition, check status and result + method: create partition first, then call function: has_partition + expected: status ok, result true + ''' + for tag_name in [tag, "tag_new", "tag_new_new"]: + status = connect.create_partition(collection, tag_name) + for tag_name in [tag, "tag_new", "tag_new_new"]: + status, res = connect.has_partition(collection, tag_name) + assert status.OK() + assert res + + def test_has_partition_tag_not_existed(self, connect, collection): + ''' + target: test has_partition, check status and result + method: then call function: has_partition, with tag not existed + expected: status ok, result empty + ''' + status, res = connect.has_partition(collection, tag) + assert status.OK() + logging.getLogger().info(res) + assert not res + + def test_has_partition_collection_not_existed(self, connect, collection): + ''' + target: test has_partition, check status and result + method: then call function: has_partition, with collection not existed + expected: status not ok + ''' + status, res = connect.has_partition("not_existed_collection", tag) + assert not status.OK() + + @pytest.mark.level(2) + def test_has_partition_with_invalid_tag_name(self, connect, collection, get_tag_name): + ''' + target: test has partition, with invalid tag name, check status returned + method: call function: has_partition + expected: status ok + ''' + tag_name = get_tag_name + status = connect.create_partition(collection, tag) + status, res = connect.has_partition(collection, tag_name) + assert status.OK() + + class TestDropBase: """ diff --git a/tests/milvus_python_test/test_search_by_ids.py b/tests/milvus_python_test/test_search_by_ids.py index dd204b3c4a..79e65c1e14 100755 --- a/tests/milvus_python_test/test_search_by_ids.py +++ b/tests/milvus_python_test/test_search_by_ids.py @@ -29,12 +29,14 @@ raw_vectors, binary_vectors = gen_binary_vectors(6000, dim) class TestSearchBase: - @pytest.fixture(scope="function", autouse=True) - def skip_check(self, connect): - if str(connect._cmd("mode")[1]) == "CPU" or str(connect._cmd("mode")[1]) == "GPU": - reason = "GPU mode not support" - logging.getLogger().info(reason) - pytest.skip(reason) + # @pytest.fixture(scope="function", autouse=True) + # def skip_check(self, connect): + # if str(connect._cmd("mode")[1]) == "CPU": + # if request.param["index_type"] == IndexType.IVF_SQ8H: + # pytest.skip("sq8h not support in CPU mode") + # if str(connect._cmd("mode")[1]) == "GPU": + # if request.param["index_type"] == IndexType.IVF_PQ: + # pytest.skip("ivfpq not support in GPU mode") def init_data(self, connect, collection, nb=6000): ''' @@ -82,16 +84,6 @@ class TestSearchBase: connect.flush([collection]) return add_vectors, ids - def check_no_result(self, results): - if len(results) == 0: - return True - flag = True - for r in results: - flag = flag and (r.id == -1) - if not flag: - return False - return flag - def init_data_partition(self, connect, collection, partition_tag, nb=6000): ''' Generate vectors and add it in collection, before search vectors @@ -104,6 +96,7 @@ class TestSearchBase: add_vectors = sklearn.preprocessing.normalize(add_vectors, axis=1, norm='l2') add_vectors = add_vectors.tolist() status, ids = connect.add_vectors(collection, add_vectors, partition_tag=partition_tag) + assert status.OK() connect.flush([collection]) return add_vectors, ids @@ -178,6 +171,22 @@ class TestSearchBase: assert result[0][0].distance <= epsilon assert check_result(result[0], ids[0]) + def test_search_flat_same_ids(self, connect, collection): + ''' + target: test basic search fuction, all the search params is corrent, change top-k value + method: search with the given vector id, check the result + expected: search status ok, and the length of the result is top_k + ''' + vectors, ids = self.init_data(connect, collection) + query_ids = [ids[0], ids[0]] + status, result = connect.search_by_ids(collection, query_ids, top_k, params={}) + assert status.OK() + assert len(result[0]) == min(len(vectors), top_k) + assert result[0][0].distance <= epsilon + assert result[1][0].distance <= epsilon + assert check_result(result[0], ids[0]) + assert check_result(result[1], ids[0]) + def test_search_flat_max_topk(self, connect, collection): ''' target: test basic search fuction, all the search params is corrent, change top-k value @@ -186,7 +195,7 @@ class TestSearchBase: ''' top_k = 2049 vectors, ids = self.init_data(connect, collection) - query_ids = ids[0] + query_ids = [ids[0]] status, result = connect.search_by_ids(collection, query_ids, top_k, params={}) assert not status.OK() @@ -200,7 +209,7 @@ class TestSearchBase: query_ids = non_exist_id status, result = connect.search_by_ids(collection, query_ids, top_k, params={}) assert status.OK() - assert len(result[0]) == min(len(vectors), top_k) + assert len(result[0]) == 0 def test_search_collection_empty(self, connect, collection): ''' @@ -209,9 +218,11 @@ class TestSearchBase: expected: search status ok, and the length of the result is top_k ''' query_ids = non_exist_id + logging.getLogger().info(query_ids) + logging.getLogger().info(collection) + logging.getLogger().info(connect.describe_collection(collection)) status, result = connect.search_by_ids(collection, query_ids, top_k, params={}) - assert status.OK() - assert len(result) == 0 + assert not status.OK() def test_search_index_l2(self, connect, collection, get_simple_index): ''' @@ -221,6 +232,8 @@ class TestSearchBase: ''' index_param = get_simple_index["index_param"] index_type = get_simple_index["index_type"] + if index_type == IndexType.IVF_PQ: + pytest.skip("skip pq") vectors, ids = self.init_data(connect, collection) status = connect.create_index(collection, index_type, index_param) query_ids = [ids[0]] @@ -239,6 +252,8 @@ class TestSearchBase: ''' index_param = get_simple_index["index_param"] index_type = get_simple_index["index_type"] + if index_type == IndexType.IVF_PQ: + pytest.skip("skip pq") vectors, ids = self.init_data(connect, collection) status = connect.create_index(collection, index_type, index_param) query_ids = ids[0:nq] @@ -246,7 +261,7 @@ class TestSearchBase: status, result = connect.search_by_ids(collection, query_ids, top_k, params=search_param) assert status.OK() assert len(result) == nq - for i in nq: + for i in range(nq): assert len(result[i]) == min(len(vectors), top_k) assert result[i][0].distance <= epsilon assert check_result(result[i], ids[i]) @@ -259,17 +274,19 @@ class TestSearchBase: ''' index_param = get_simple_index["index_param"] index_type = get_simple_index["index_type"] + if index_type == IndexType.IVF_PQ: + pytest.skip("skip pq") vectors, ids = self.init_data(connect, collection) status = connect.create_index(collection, index_type, index_param) query_ids = ids[0:nq] - query_ids[0] = non_exist_id + query_ids[0] = 1 search_param = get_search_param(index_type) - status, result = connect.search_by_ids(collection, [query_ids], top_k, params=search_param) + status, result = connect.search_by_ids(collection, query_ids, top_k, params=search_param) assert status.OK() assert len(result) == nq - for i in nq: + for i in range(nq): if i == 0: - assert result[i].id == -1 + assert len(result[i]) == 0 else: assert len(result[i]) == min(len(vectors), top_k) assert result[i][0].distance <= epsilon @@ -277,15 +294,16 @@ class TestSearchBase: def test_search_index_delete(self, connect, collection): vectors, ids = self.init_data(connect, collection) - query_ids = ids[0] - status = connect.delete_by_id(collection, [query_ids]) + query_ids = ids[0:nq] + status = connect.delete_by_id(collection, [query_ids[0]]) assert status.OK() - status = connect.flush(collection) - status, result = connect.search_by_ids(collection, [query_ids], top_k, params={}) + status = connect.flush([collection]) + status, result = connect.search_by_ids(collection, query_ids, top_k, params={}) assert status.OK() - assert len(result) == 1 - assert result[0][0].distance <= epsilon - assert result[0][0].id != ids[0] + assert len(result) == nq + assert len(result[0]) == 0 + assert len(result[1]) == top_k + assert result[1][0].distance <= epsilon def test_search_l2_partition_tag_not_existed(self, connect, collection): ''' @@ -295,28 +313,31 @@ class TestSearchBase: ''' status = connect.create_partition(collection, tag) vectors, ids = self.init_data(connect, collection) - query_ids = ids[0] - status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[tag], params=search_param) - assert status.OK() + query_ids = [ids[0]] + new_tag = gen_unique_str() + status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[new_tag], params={}) + assert not status.OK() + logging.getLogger().info(status) assert len(result) == 0 - def test_search_l2_partition_other(self, connect, collection): - tag = gen_unique_str() + def test_search_l2_partition_empty(self, connect, collection): status = connect.create_partition(collection, tag) vectors, ids = self.init_data(connect, collection) - query_ids = ids[0] - status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[tag], params=search_param) - assert status.OK() + query_ids = [ids[0]] + status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[tag], params={}) + assert not status.OK() + logging.getLogger().info(status) assert len(result) == 0 def test_search_l2_partition(self, connect, collection): + status = connect.create_partition(collection, tag) vectors, ids = self.init_data_partition(connect, collection, tag) - query_ids = ids[-1] + query_ids = ids[-1:] status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[tag]) assert status.OK() assert len(result) == 1 assert len(result[0]) == min(len(vectors), top_k) - assert check_result(result[0], query_ids) + assert check_result(result[0], query_ids[-1]) def test_search_l2_partition_B(self, connect, collection): status = connect.create_partition(collection, tag) @@ -325,7 +346,7 @@ class TestSearchBase: status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[tag]) assert status.OK() assert len(result) == nq - for i in nq: + for i in range(nq): assert len(result[i]) == min(len(vectors), top_k) assert result[i][0].distance <= epsilon assert check_result(result[i], ids[i]) @@ -338,14 +359,17 @@ class TestSearchBase: vectors, new_ids = self.init_data_partition(connect, collection, new_tag, nb=nb+1) tmp = 2 query_ids = ids[0:tmp] - query_ids.extend(new_ids[0:nq-tmp]) + query_ids.extend(new_ids[tmp:nq]) status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[tag, new_tag], params={}) assert status.OK() assert len(result) == nq - for i in nq: + for i in range(nq): assert len(result[i]) == min(len(vectors), top_k) assert result[i][0].distance <= epsilon - assert check_result(result[i], ids[i]) + if i < tmp: + assert result[i][0].id == ids[i] + else: + assert result[i][0].id == new_ids[i] def test_search_l2_index_partitions_match_one_tag(self, connect, collection): new_tag = "new_tag" @@ -355,18 +379,19 @@ class TestSearchBase: vectors, new_ids = self.init_data_partition(connect, collection, new_tag, nb=nb+1) tmp = 2 query_ids = ids[0:tmp] - query_ids.extend(new_ids[0:nq-tmp]) + query_ids.extend(new_ids[tmp:nq]) status, result = connect.search_by_ids(collection, query_ids, top_k, partition_tags=[new_tag], params={}) assert status.OK() assert len(result) == nq - for i in nq: + for i in range(nq): if i < tmp: assert result[i][0].distance > epsilon assert result[i][0].id != ids[i] else: assert len(result[i]) == min(len(vectors), top_k) assert result[i][0].distance <= epsilon - assert check_result(result[i], ids[i]) + assert result[i][0].id == new_ids[i] + assert result[i][1].distance > epsilon # def test_search_by_ids_without_connect(self, dis_connect, collection): # ''' @@ -411,7 +436,7 @@ class TestSearchBase: status, result = connect.search_by_ids(jac_collection, query_ids, top_k, params=search_param) assert status.OK() assert len(result) == nq - for i in nq: + for i in range(nq): assert len(result[i]) == min(len(vectors), top_k) assert result[i][0].distance <= epsilon assert check_result(result[i], ids[i]) @@ -499,7 +524,7 @@ class TestSearchParamsInvalid(object): def check_result(result, id): - if len(result) >= 5: - return id in [x.id for x in result[:5]] + if len(result) >= top_k: + return id in [x.id for x in result[:top_k]] else: return id in (i.id for i in result) diff --git a/tests/milvus_python_test/test_search_vectors.py b/tests/milvus_python_test/test_search_vectors.py index d43b231596..dc887ef849 100644 --- a/tests/milvus_python_test/test_search_vectors.py +++ b/tests/milvus_python_test/test_search_vectors.py @@ -666,7 +666,7 @@ class TestSearchBase: status, result = connect.search_vectors(substructure_collection, top_k, query_vecs, params=search_param) logging.getLogger().info(status) logging.getLogger().info(result) - assert result[0][0].id == -1 + assert len(result[0]) == 0 def test_search_distance_substructure_flat_index_B(self, connect, substructure_collection): ''' @@ -690,12 +690,12 @@ class TestSearchBase: status, result = connect.search_vectors(substructure_collection, top_k, query_vecs, params=search_param) logging.getLogger().info(status) logging.getLogger().info(result) + assert len(result[0]) == 1 + assert len(result[1]) == 1 assert result[0][0].distance <= epsilon assert result[0][0].id == ids[0] assert result[1][0].distance <= epsilon assert result[1][0].id == ids[1] - assert result[0][1].id == -1 - assert result[1][1].id == -1 def test_search_distance_superstructure_flat_index(self, connect, superstructure_collection): ''' @@ -720,7 +720,7 @@ class TestSearchBase: status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param) logging.getLogger().info(status) logging.getLogger().info(result) - assert result[0][0].id == -1 + assert len(result[0]) == 0 def test_search_distance_superstructure_flat_index_B(self, connect, superstructure_collection): ''' @@ -744,12 +744,12 @@ class TestSearchBase: status, result = connect.search_vectors(superstructure_collection, top_k, query_vecs, params=search_param) logging.getLogger().info(status) logging.getLogger().info(result) + assert len(result[0]) == 2 + assert len(result[1]) == 2 assert result[0][0].id in ids assert result[0][0].distance <= epsilon assert result[1][0].id in ids assert result[1][0].distance <= epsilon - assert result[0][2].id == -1 - assert result[1][2].id == -1 def test_search_distance_tanimoto_flat_index(self, connect, tanimoto_collection): '''