diff --git a/.github/workflows/code-checker.yaml b/.github/workflows/code-checker.yaml index c38e61b08a..2066837fd2 100644 --- a/.github/workflows/code-checker.yaml +++ b/.github/workflows/code-checker.yaml @@ -22,32 +22,48 @@ on: jobs: ubuntu: - name: Code Checker AMD64 Ubuntu ${{ matrix.ubuntu }} - runs-on: ubuntu-${{ matrix.ubuntu }} + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} timeout-minutes: 60 strategy: fail-fast: false matrix: - ubuntu: [18.04] + include: + - name: Code Checker AMD64 Ubuntu 18.04 + os: ubuntu-18.04 + - name: Code Checker MacOS 11 + os: macos-11 env: - UBUNTU: ${{ matrix.ubuntu }} + UBUNTU: 18.04 steps: - name: Checkout uses: actions/checkout@v2 - name: Cache CCache Volumes + if: ${{ matrix.os == 'ubuntu-18.04' }} uses: actions/cache@v1 with: - path: .docker/amd64-ubuntu${{ matrix.ubuntu }}-ccache - key: ubuntu${{ matrix.ubuntu }}-ccache-${{ hashFiles('internal/core/**') }} - restore-keys: ubuntu${{ matrix.ubuntu }}-ccache- + path: .docker/amd64-ubuntu18.04-ccache + key: ubuntu18.04-ccache-${{ hashFiles('internal/core/**') }} + restore-keys: ubuntu18.04-ccache- - name: Cache Go Mod Volumes + if: ${{ matrix.os == 'ubuntu-18.04' }} uses: actions/cache@v1 with: - path: .docker/amd64-ubuntu${{ matrix.ubuntu }}-go-mod - key: ubuntu${{ matrix.ubuntu }}-go-mod-${{ hashFiles('**/go.sum') }} - restore-keys: ubuntu${{ matrix.ubuntu }}-go-mod- + path: .docker/amd64-ubuntu18.04-go-mod + key: ubuntu18.04-go-mod-${{ hashFiles('**/go.sum') }} + restore-keys: ubuntu18.04-go-mod- - name: Code Check + if: ${{ matrix.os == 'ubuntu-18.04' }} env: CHECK_BUILDER: "1" run: | - ./build/builder.sh /bin/bash -c "make check-proto-product && make verifiers" + ./build/builder.sh /bin/bash -c "make check-proto-product && make verifiers" + - name: Code Check + if: ${{ matrix.os == 'macos-11' }} + run: | + bash < <(curl -s -S -L https://raw.githubusercontent.com/moovweb/gvm/master/binscripts/gvm-installer) + source ~/.gvm/scripts/gvm + gvm install go1.17.2 + gvm use go1.17.2 + brew install boost libomp ninja tbb + make check-proto-product && make verifiers \ No newline at end of file diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 2e38862e1a..44cbdfa6ae 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -1,6 +1,6 @@ # Development -This document will help to set up your development environment and run tests. If you encounter a problem, please file an issue. +This document will help to set up your Milvus development environment and to run tests. Please [file an issue](https://github.com/milvus-io/milvus/issues/new/choose) if there's a problem. Table of contents ================= @@ -14,27 +14,27 @@ Table of contents - [Docker & Docker Compose](#docker--docker-compose) - [Building Milvus](#building-milvus) - [A Quick Start for Testing Milvus](#a-quick-start-for-testing-milvus) - - [Presubmission Verification](#presubmission-verification) + - [Pre-submission Verification](#pre-submission-verification) - [Unit Tests](#unit-tests) - [Code coverage](#code-coverage) - [E2E Tests](#e2e-tests) - [Test on local branch](#test-on-local-branch) - - [On Linux](#on-linux) + - [With Linux and MacOS](#with-linux-and-macos) - [With docker](#with-docker) - [GitHub Flow](#github-flow) ## Building Milvus with Docker -Official releases are built with Docker containers. To build Milvus with Docker please follow [these instructions](https://github.com/milvus-io/milvus/blob/master/build/README.md). +Our official Milvus versions are releases as Docker images. To build Milvus Docker on your own, please follow [these instructions](https://github.com/milvus-io/milvus/blob/master/build/README.md). ## Building Milvus on a local OS/shell environment -The details below outline the hardware and software requirements for building on Linux. +The details below outline the hardware and software requirements for building on Linux and MacOS. ### Hardware Requirements -Milvus is written in Go and C++, it requires a lot of resources to compile it. We recommend the following for any physical or virtual machine being used for building Milvus. +The following specification (either physical or virtual machine resources) is recommended for Milvus to build and run from source code. ``` - 8GB of RAM @@ -43,50 +43,48 @@ Milvus is written in Go and C++, it requires a lot of resources to compile it. W ### Software Requirements -In fact, all Linux distributions are available to develop Milvus. The following only contains commands on Ubuntu and CentOS, because we mainly use them. If you develop Milvus on other distributions, you are welcome to improve this document. +All Linux distributions are available for Milvus development. However a majority of our contributor worked with Ubuntu or CentOS systems, with a small portion of Mac (both x86_64 and Apple Silicon) contributors. If you would like Milvus to build and run on other distributions, you are more than welcome to file an issue and contribute! -#### Dependencies -- Debian/Ubuntu +Here's a list of verified OS types where Milvus can successfully build and run: -```shell -$ sudo apt update -$ sudo apt install -y build-essential ccache gfortran \ - libssl-dev zlib1g-dev python3-dev libcurl4-openssl-dev libtbb-dev\ - libboost-regex-dev libboost-program-options-dev libboost-system-dev \ - libboost-filesystem-dev libboost-serialization-dev libboost-python-dev +* Debian/Ubuntu +* CentOS +* MacOS (x86_64) +* MacOS (Apple Silicon) + +#### Prerequisites +Linux systems (Recommend Ubuntu 18.04 or later): +```bash +go: >= 1.15 +cmake: >= 3.18 +gcc: 7.5 ``` -- CentOS - -```shell -$ sudo yum install -y epel-release centos-release-scl-rh && \ -$ sudo yum install -y git make automake openssl-devel zlib-devel \ - libcurl-devel python3-devel \ - devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran \ - llvm-toolset-7.0-clang llvm-toolset-7.0-clang-tools-extra \ - ccache lcov - -$ echo "source scl_source enable devtoolset-7" | sudo tee -a /etc/profile.d/devtoolset-7.sh -$ echo "source scl_source enable llvm-toolset-7.0" | sudo tee -a /etc/profile.d/llvm-toolset-7.sh -$ echo "export CLANG_TOOLS_PATH=/opt/rh/llvm-toolset-7.0/root/usr/bin" | sudo tee -a /etc/profile.d/llvm-toolset-7.sh -$ source "/etc/profile.d/llvm-toolset-7.sh" - -# Install tbb -$ git clone https://github.com/wjakob/tbb.git && \ - cd tbb/build && \ - cmake .. && make -j && \ - sudo make install && \ - cd ../../ && rm -rf tbb/ - -# Install boost -$ wget -q https://boostorg.jfrog.io/artifactory/main/release/1.65.1/source/boost_1_65_1.tar.gz && \ - tar zxf boost_1_65_1.tar.gz && cd boost_1_65_1 && \ - ./bootstrap.sh --prefix=/usr/local --with-toolset=gcc --without-libraries=python && \ - sudo ./b2 -j2 --prefix=/usr/local --without-python toolset=gcc install && \ - cd ../ && rm -rf ./boost_1_65_1* - +MacOS systems with x86_64 (Big Sur 11.5 or later recommended): +```bash +go: >= 1.15 +cmake: >= 3.18 +llvm: >= 12 ``` +MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): +```bash +go: >= 1.17 (Arch=ARM64) +cmake: >= 3.18 +llvm: >= 13 +``` + + +#### Installing Dependencies + +In the Milvus repository root, simply run: +```bash +$ ./scripts/install_deps.sh +``` + +#### Caveats +* [Google Test](https://github.com/google/googletest.git) is automatically cloned from GitHub, which in some case could conflict with your local google test library. + Once you have finished, confirm that `gcc` and `make` are installed: ```shell @@ -103,7 +101,7 @@ Confirm that cmake is available: ```shell $ cmake --version ``` -Note: 3.18 or higher cmake version is required to build Milvus. +Note: 3.18 or higher cmake version is required to build Milvus. #### Go @@ -114,7 +112,7 @@ Confirm that your `GOPATH` and `GOBIN` environment variables are correctly set a ```shell $ go version ``` -Note: go1.15 is required to build Milvus. +Note: go >= 1.15 is required to build Milvus. #### Docker & Docker Compose @@ -142,11 +140,11 @@ If you want to know more, you can read Makefile. ## A Quick Start for Testing Milvus -### Presubmission Verification +### Pre-submission Verification -Presubmission verification provides a battery of checks and tests to give your pull request the best chance of being accepted. Developers need to run as many verification tests as possible locally. +Pre-submission verification provides a battery of checks and tests to give your pull request the best chance of being accepted. Developers need to run as many verification tests as possible locally. -To run all presubmission verification tests, use this command: +To run all pre-submission verification tests, use this command: ```shell $ make verifiers @@ -154,49 +152,42 @@ $ make verifiers ### Unit Tests -Pull requests need to pass all unit tests. To run every unit test, use this command: +It is required that all pull request candidates should pass all Milvus unit tests. +Beforce running unit tests, you need to first bring up the Milvus deployment environment. +You may set up a local docker environment with our docker compose yaml file to start unit testing. +For Apple Silicon users (Apple M1): ```shell +$ cd deployments/docker/dev +$ docker-compose -f docker-compose-apple-silicon.yml up -d +$ cd ../../../ $ make unittest ``` - -Before using `make unittest` command, we should run a milvus's deployment environment which helps us to do go test. Here we use local docker environment, use the following commands: +For others: ```shell -# Using cluster environment $ cd deployments/docker/dev $ docker-compose up -d $ cd ../../../ $ make unittest - -# Or using standalone environment -$ cd deployments/docker/standalone -$ docker-compose up -d -$ cd ../../../ -$ make unittest ``` -To run only cpp test, we can use this command: +To run only cpp test: ```shell -make test-cpp +$ make test-cpp ``` -To run only go test, we can use this command: +To run only go test: ```shell -make test-go +$ make test-go ``` -To run single test case, for instance, run TestSearchTask in /internal/proxy directory, use +To run a single test case (TestSearchTask in /internal/proxy directory, for example): ```shell $ go test -v ./internal/proxy/ -test.run TestSearchTask ``` ### Code coverage -Before submitting your Pull Request, make sure your code change is covered by unit test. Use the following commands to check code coverage rate: - -Install lcov(cpp code coverage tool): -```shell -$ sudo apt-get install lcov -``` +Before submitting your pull request, make sure your code change is covered by unit test. Use the following commands to check code coverage rate: Run unit test and generate code coverage report: ```shell @@ -242,7 +233,7 @@ $ pytest --tags=L0 -n auto ``` ### Test on local branch -#### On Linux +#### With Linux and MacOS After preparing deployment environment, we can start the cluster on your host machine ```shell @@ -264,3 +255,29 @@ $ install with docker compose ## GitHub Flow To check out code to work on, please refer to the [GitHub Flow](https://guides.github.com/introduction/flow/). + +## FAQs +Q: The go building phase fails on Apple Silicon (Mac M1) machines. + +A: Please double-check that you have [right Go version](https://go.dev/dl/) installed, i.e. with OS=macOS and Arch=ARM64. + +--- +Q: "make" fails with "*ld: library not found for -lSystem*" on MacOS. + +A: There are a couple of things you could try: +1. Use **Software Update** (from **About this Mac** -> **Overview**) to install updates. +2. Try the following commands: +```bash +sudo rm -rf /Library/Developer/CommandLineTools +sudo xcode-select --install +``` + +--- +Q: Rocksdb fails to compile with "*ld: warning: object file was built for newer macOS version (11.6) than being linked (11.0).*" on MacOS. + +A: Use **Software Update** (from **About this Mac** -> **Overview**) to install updates. + +--- +Q: Some Go unit tests failed. + +A: We are aware that some tests can be flaky occasionally. If there's something you believe is abnormal (i.e. tests that fail every single time). You are more than welcome to [file an issue](https://github.com/milvus-io/milvus/issues/new/choose)! \ No newline at end of file diff --git a/Makefile b/Makefile index 0f6d0d1c3c..9e641d23ed 100644 --- a/Makefile +++ b/Makefile @@ -15,10 +15,18 @@ GOPATH := $(shell $(GO) env GOPATH) INSTALL_PATH := $(PWD)/bin LIBRARY_PATH := $(PWD)/lib +OS := $(shell uname -s) +ARCH := $(shell arch) +mode = Release all: build-cpp build-go -mode = Release +pre-proc: + @echo "Running pre-processing" +ifeq ($(OS),Darwin) # MacOS X + @echo "MacOS system identified. Switching to customized gorocksdb fork..." + @go mod edit -replace=github.com/tecbot/gorocksdb=github.com/soothing-rain/gorocksdb@latest +endif get-build-deps: @(env bash $(PWD)/scripts/install_deps.sh) @@ -74,9 +82,19 @@ ifdef GO_DIFF_FILES @${GOPATH}/bin/ruleguard -rules ruleguard.rules.go $(GO_DIFF_FILES) else @echo "Running $@ check" +ifeq ($(OS),Darwin) # MacOS X +ifeq ($(ARCH),arm64) + @${GOPATH}/bin/darwin_arm64/ruleguard -rules ruleguard.rules.go ./internal/... + @${GOPATH}/bin/darwin_arm64/ruleguard -rules ruleguard.rules.go ./cmd/... +else @${GOPATH}/bin/ruleguard -rules ruleguard.rules.go ./internal/... @${GOPATH}/bin/ruleguard -rules ruleguard.rules.go ./cmd/... -# @${GOPATH}/bin/ruleguard -rules ruleguard.rules.go ./tests/go/... +endif +else + @${GOPATH}/bin/ruleguard -rules ruleguard.rules.go ./internal/... + @${GOPATH}/bin/ruleguard -rules ruleguard.rules.go ./cmd/... +endif + #@${GOPATH}/bin/ruleguard -rules ruleguard.rules.go ./tests/go/... endif verifiers: build-cpp getdeps cppcheck fmt static-check ruleguard @@ -87,7 +105,7 @@ binlog: @mkdir -p $(INSTALL_PATH) && go env -w CGO_ENABLED="1" && GO111MODULE=on $(GO) build -o $(INSTALL_PATH)/binlog $(PWD)/cmd/tools/binlog/main.go 1>/dev/null BUILD_TAGS = $(shell git describe --tags --always --dirty="-dev") -BUILD_TIME = $(shell date --utc) +BUILD_TIME = $(shell date -u) GIT_COMMIT = $(shell git rev-parse --short HEAD) GO_VERSION = $(shell go version) @@ -105,17 +123,17 @@ milvus: build-cpp print-build-info build-go: milvus -build-cpp: +build-cpp: pre-proc @echo "Building Milvus cpp library ..." @(env bash $(PWD)/scripts/core_build.sh -t ${mode} -f "$(CUSTOM_THIRDPARTY_PATH)") - @(env bash $(PWD)/scripts/cwrapper_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)") - @(env bash $(PWD)/scripts/cwrapper_rocksdb_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)") + @(env bash $(PWD)/scripts/cwrapper_build.sh -t ${mode} -f "$(CUSTOM_THIRDPARTY_PATH)") + @(env bash $(PWD)/scripts/cwrapper_rocksdb_build.sh -t ${mode} -f "$(CUSTOM_THIRDPARTY_PATH)") -build-cpp-with-unittest: +build-cpp-with-unittest: pre-proc @echo "Building Milvus cpp library with unittest ..." - @(env bash $(PWD)/scripts/core_build.sh -t ${mode} -u -c -f "$(CUSTOM_THIRDPARTY_PATH)") - @(env bash $(PWD)/scripts/cwrapper_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)") - @(env bash $(PWD)/scripts/cwrapper_rocksdb_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)") + @(env bash $(PWD)/scripts/core_build.sh -t ${mode} -u -c -f "$(CUSTOM_THIRDPARTY_PATH)") + @(env bash $(PWD)/scripts/cwrapper_build.sh -t ${mode} -f "$(CUSTOM_THIRDPARTY_PATH)") + @(env bash $(PWD)/scripts/cwrapper_rocksdb_build.sh -t ${mode} -f "$(CUSTOM_THIRDPARTY_PATH)") # Run the tests. unittest: test-cpp test-go diff --git a/README.md b/README.md index 5d5599aa64..d30e29d37b 100644 --- a/README.md +++ b/README.md @@ -67,11 +67,25 @@ Milvus was released under the [open-source Apache License 2.0](https://github.co Check the requirements first. +Linux systems (Ubuntu 18.04 or later recommended): ```bash -go: 1.15 -cmake: >=3.18 +go: >= 1.15 +cmake: >= 3.18 gcc: 7.5 -protobuf: >=3.7 +``` + +MacOS systems with x86_64 (Big Sur 11.5 or later recommended): +```bash +go: >= 1.15 +cmake: >= 3.18 +llvm: >= 12 +``` + +MacOS systems with Apple Silicon (Monterey 12.0.1 or later recommended): +```bash +go: >= 1.17 (Arch=ARM64) +cmake: >= 3.18 +llvm: >= 13 ``` Clone Milvus repo and build. diff --git a/deployments/docker/dev/docker-compose-apple-silicon.yml b/deployments/docker/dev/docker-compose-apple-silicon.yml new file mode 100644 index 0000000000..03c2a3b225 --- /dev/null +++ b/deployments/docker/dev/docker-compose-apple-silicon.yml @@ -0,0 +1,55 @@ +version: '3.5' + +services: + etcd: + image: quay.io/coreos/etcd:v3.5.0 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd + command: etcd -listen-peer-urls=http://127.0.0.1:2380 -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379,http://0.0.0.0:4001 -initial-advertise-peer-urls=http://127.0.0.1:2380 --initial-cluster default=http://127.0.0.1:2380 + ports: + - "2379:2379" + - "2380:2380" + - "4001:4001" + + pulsar: + image: milvusdb/pulsar:v2.7.3-m1 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/pulsar:/pulsar/data + environment: + # bin/apply-config-from-env.py script will modify the configuration file based on the environment variables + # nettyMaxFrameSizeBytes must be calculated from maxMessageSize + 10240 (padding) + - nettyMaxFrameSizeBytes=104867840 # this is 104857600 + 10240 (padding) + - defaultRetentionTimeInMinutes=10080 + - defaultRetentionSizeInMB=8192 + # maxMessageSize is missing from standalone.conf, must use PULSAR_PREFIX_ to get it configured + - PULSAR_PREFIX_maxMessageSize=104857600 + - PULSAR_GC=-XX:+UseG1GC + ports: + - "6650:6650" + - "18080:8080" + + minio: + image: minio/minio:RELEASE.2020-12-03T00-03-10Z + ports: + - "9000:9000" + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data + command: minio server /minio_data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + jaeger: + image: jaegertracing/all-in-one:latest + ports: + - "6831:6831/udp" + - "16686:16686" + +networks: + default: + name: milvus_dev diff --git a/go.mod b/go.mod index b9dd650ca6..25709f2f49 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.11.0 github.com/shirou/gopsutil v3.21.8+incompatible + github.com/soothing-rain/gorocksdb v0.0.0-20220113075731-e68e68ed4c62 // indirect github.com/spaolacci/murmur3 v1.1.0 github.com/spf13/cast v1.3.1 github.com/spf13/viper v1.8.1 diff --git a/go.sum b/go.sum index cb5477bf26..f08af8d7b4 100644 --- a/go.sum +++ b/go.sum @@ -483,6 +483,10 @@ github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9 github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= +github.com/soothing-rain/gorocksdb v0.0.0-20220113074926-f4b9b182e17e h1:E1iqJnSWf+BVfBbuBKLaW4rSCZAAMH1ui1uT+IXTgz4= +github.com/soothing-rain/gorocksdb v0.0.0-20220113074926-f4b9b182e17e/go.mod h1:OEi1TEIyGy/kDfY3ZwoHfn+/dIK0ZOOKJ2ReaYf4sao= +github.com/soothing-rain/gorocksdb v0.0.0-20220113075731-e68e68ed4c62 h1:EAmHsU58jrwMjgnVVxgnlqlBG+1uFuv+Om1qXr5GfII= +github.com/soothing-rain/gorocksdb v0.0.0-20220113075731-e68e68ed4c62/go.mod h1:OEi1TEIyGy/kDfY3ZwoHfn+/dIK0ZOOKJ2ReaYf4sao= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= diff --git a/internal/core/CMakeLists.txt b/internal/core/CMakeLists.txt index 627e810b0f..05a45e0db4 100644 --- a/internal/core/CMakeLists.txt +++ b/internal/core/CMakeLists.txt @@ -16,10 +16,53 @@ cmake_minimum_required( VERSION 3.18 ) +if ( APPLE ) + set( CMAKE_CROSSCOMPILING TRUE ) + set( RUN_HAVE_GNU_POSIX_REGEX 0 ) + set( CMAKE_C_COMPILER "/usr/local/opt/llvm/bin/clang" ) + set( CMAKE_CXX_COMPILER "/usr/local/opt/llvm/bin/clang++" ) +endif () + add_definitions(-DELPP_THREAD_SAFE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) message( STATUS "Building using CMake version: ${CMAKE_VERSION}" ) +project(core) +include(CheckCXXCompilerFlag) +if ( APPLE ) + message(STATUS "==============Darwin Environment==============") + check_cxx_compiler_flag(-std=c++11 HAS_STD_CPP11_FLAG) + if(HAS_STD_CPP11_FLAG) + add_compile_options(-std=c++11) + endif() + if(CMAKE_C_COMPILER_ID MATCHES "Clang") + set(OpenMP_C "${CMAKE_C_COMPILER}" CACHE STRING "" FORCE) + set(OpenMP_C_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument" CACHE STRING "" FORCE) + set(OpenMP_C_LIB_NAMES "libomp" "libgomp" "libiomp5" CACHE STRING "" FORCE) + set(OpenMP_libomp_LIBRARY ${OpenMP_C_LIB_NAMES} CACHE STRING "" FORCE) + set(OpenMP_libgomp_LIBRARY ${OpenMP_C_LIB_NAMES} CACHE STRING "" FORCE) + set(OpenMP_libiomp5_LIBRARY ${OpenMP_C_LIB_NAMES} CACHE STRING "" FORCE) + endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(OpenMP_CXX "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE) + set(OpenMP_CXX_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument" CACHE STRING "" FORCE) + set(OpenMP_CXX_LIB_NAMES "libomp" "libgomp" "libiomp5" CACHE STRING "" FORCE) + set(OpenMP_libomp_LIBRARY ${OpenMP_CXX_LIB_NAMES} CACHE STRING "" FORCE) + set(OpenMP_libgomp_LIBRARY ${OpenMP_CXX_LIB_NAMES} CACHE STRING "" FORCE) + set(OpenMP_libiomp5_LIBRARY ${OpenMP_CXX_LIB_NAMES} CACHE STRING "" FORCE) + endif() +elseif (${CMAKE_SYSTEM_NAME} MATCHES "Linux") + message(STATUS "==============Linux Environment===============") + set(LINUX TRUE) +else () + message(FATAL_ERROR "Unsupported platform!" ) +endif () + +find_package(OpenMP REQUIRED) +if (OPENMP_FOUND) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() + set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ) include( Utils ) @@ -61,26 +104,43 @@ include( DefineOptions ) using_ccache_if_defined(MILVUS_USE_CCACHE) include( ExternalProject ) +include( GNUInstallDirs ) include( FetchContent ) include_directories(thirdparty) set( FETCHCONTENT_BASE_DIR ${MILVUS_BINARY_DIR}/3rdparty_download ) set(FETCHCONTENT_QUIET OFF) include( ThirdPartyPackages ) -find_package(OpenMP REQUIRED) -if (OPENMP_FOUND) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() # **************************** Compiler arguments **************************** message( STATUS "Building Milvus CPU version" ) +if (LINUX) append_flags( CMAKE_CXX_FLAGS FLAGS - "-fPIC" - "-DELPP_THREAD_SAFE" - "-fopenmp" - "-Werror" - ) + "-fPIC" + "-DELPP_THREAD_SAFE" + "-fopenmp" + "-Werror" + ) +endif () + +if ( APPLE ) + append_flags( CMAKE_CXX_FLAGS + FLAGS + "-fPIC" + "-DELPP_THREAD_SAFE" + "-fopenmp" + "-Wno-error" + "-Wsign-compare" + "-Wall" + "-pedantic" + "-Wno-unused-command-line-argument" + "-Wextra" + "-Wno-unused-parameter" + "-Wno-deprecated" + "-DBOOST_STACKTRACE_GNU_SOURCE_NOT_REQUIRED=1" + ) +endif () # **************************** Coding style check tools **************************** find_package( ClangTools ) @@ -107,20 +167,18 @@ if ( NOT LINT_EXCLUSIONS_FILE ) set( LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt ) endif () -find_program( CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR} ) -message( STATUS "Found cpplint executable at ${CPPLINT_BIN}" ) - -# -# "make lint" targets -# -add_custom_target( lint +add_custom_target(lint ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_cpplint.py --cpplint_binary ${CPPLINT_BIN} --exclude_globs ${LINT_EXCLUSIONS_FILE} - --source_dir ${CMAKE_CURRENT_SOURCE_DIR} + --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src ${MILVUS_LINT_QUIET} ) + +find_program( CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR} ) +message( STATUS "Found cpplint executable at ${CPPLINT_BIN}" ) + # # "make clang-format" and "make check-clang-format" targets # @@ -190,7 +248,9 @@ add_subdirectory( src ) if ( BUILD_UNIT_TEST STREQUAL "ON" ) append_flags( CMAKE_CXX_FLAGS FLAGS "-DELPP_DISABLE_LOGS") add_subdirectory(unittest) - add_subdirectory(bench) + if (LINUX) + add_subdirectory(bench) + endif () endif () add_custom_target( Clean-All COMMAND ${CMAKE_BUILD_TOOL} clean ) @@ -205,7 +265,7 @@ set( GPU_ENABLE "false" ) # Install segcore install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/segcore/ - DESTINATION include/segcore/ + DESTINATION include/segcore FILES_MATCHING PATTERN "*_c.h" ) @@ -217,7 +277,7 @@ install( # Install indexbuilder install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/indexbuilder/ - DESTINATION include/indexbuilder/ + DESTINATION include/indexbuilder FILES_MATCHING PATTERN "*_c.h" ) @@ -228,6 +288,6 @@ install(FILES ${CMAKE_BINARY_DIR}/src/indexbuilder/libmilvus_indexbuilder${CMAKE # Install common install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/common/ - DESTINATION include/common/ + DESTINATION include/common FILES_MATCHING PATTERN "*_c.h" -) +) \ No newline at end of file diff --git a/internal/core/bench/CMakeLists.txt b/internal/core/bench/CMakeLists.txt index f16ccb7c3f..ffadb26b75 100644 --- a/internal/core/bench/CMakeLists.txt +++ b/internal/core/bench/CMakeLists.txt @@ -11,7 +11,6 @@ include_directories(${CMAKE_HOME_DIRECTORY}/src) include_directories(${CMAKE_HOME_DIRECTORY}/unittest) -include_directories(${CMAKE_HOME_DIRECTORY}/src/index/knowhere) set(bench_srcs bench_naive.cpp @@ -37,6 +36,8 @@ target_link_libraries(indexbuilder_bench milvus_indexbuilder log pthread + knowhere + milvus_utils ) target_link_libraries(indexbuilder_bench benchmark::benchmark_main) diff --git a/internal/core/bench/bench_indexbuilder.cpp b/internal/core/bench/bench_indexbuilder.cpp index fc9aae3120..6b3ae5c716 100644 --- a/internal/core/bench/bench_indexbuilder.cpp +++ b/internal/core/bench/bench_indexbuilder.cpp @@ -13,10 +13,10 @@ #include #include #include +#include +#include #include "pb/index_cgo_msg.pb.h" -#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h" -#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h" #include "indexbuilder/IndexWrapper.h" #include "indexbuilder/index_c.h" #include "indexbuilder/utils.h" diff --git a/internal/core/build.sh b/internal/core/build.sh index 8f226030c2..56fdbe1d37 100755 --- a/internal/core/build.sh +++ b/internal/core/build.sh @@ -23,8 +23,7 @@ while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symli done SCRIPTS_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - -BUILD_OUTPUT_DIR="${SCRIPTS_DIR}/../../cmake_build" +BUILD_OUTPUT_DIR="./cmake_build" BUILD_TYPE="Release" BUILD_UNITTEST="OFF" INSTALL_PREFIX="${SCRIPTS_DIR}/output" @@ -130,6 +129,19 @@ if [[ ${MAKE_CLEAN} == "ON" ]]; then exit 0 fi +unameOut="$(uname -s)" +case "${unameOut}" in + Darwin*) + llvm_prefix="$(brew --prefix llvm)" + export CLANG_TOOLS_PATH="${llvm_prefix}/bin" + export CC="${llvm_prefix}/bin/clang" + export CXX="${llvm_prefix}/bin/clang++" + export LDFLAGS="-L${llvm_prefix}/lib -L/usr/local/opt/libomp/lib" + export CXXFLAGS="-I${llvm_prefix}/include -I/usr/local/include -I/usr/local/opt/libomp/include" + ;; + *) echo "==System:${unameOut}"; +esac + CMAKE_CMD="cmake \ -DBUILD_UNIT_TEST=${BUILD_UNITTEST} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} @@ -143,6 +155,7 @@ CMAKE_CMD="cmake \ -DMILVUS_WITH_PROMETHEUS=${WITH_PROMETHEUS} \ -DMILVUS_CUDA_ARCH=${CUDA_ARCH} \ -DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} \ +-DKNOWHERE_GPU_VERSION=${SUPPORT_GPU} \ ${SCRIPTS_DIR}" echo ${CMAKE_CMD} ${CMAKE_CMD} diff --git a/internal/core/cmake/ThirdPartyPackages.cmake b/internal/core/cmake/ThirdPartyPackages.cmake index c129b46d63..383e422a36 100644 --- a/internal/core/cmake/ThirdPartyPackages.cmake +++ b/internal/core/cmake/ThirdPartyPackages.cmake @@ -85,7 +85,7 @@ set(THIRDPARTY_DIR "${MILVUS_SOURCE_DIR}/thirdparty") # ---------------------------------------------------------------------- # ExternalProject options -string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) +#string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}") set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}") diff --git a/internal/core/src/CMakeLists.txt b/internal/core/src/CMakeLists.txt index 62c2399af4..9548f2e440 100644 --- a/internal/core/src/CMakeLists.txt +++ b/internal/core/src/CMakeLists.txt @@ -17,15 +17,7 @@ include_directories(${MILVUS_ENGINE_SRC}) include_directories(${MILVUS_THIRDPARTY_SRC}) - set(FOUND_OPENBLAS "unknown") -add_subdirectory(index) -set(INDEX_INCLUDE_DIRS ${INDEX_INCLUDE_DIRS} PARENT_SCOPE) -foreach (DIR ${INDEX_INCLUDE_DIRS}) - include_directories(${DIR}) -endforeach () - - add_subdirectory( exceptions ) add_subdirectory( utils ) add_subdirectory( log ) diff --git a/internal/core/src/common/CMakeLists.txt b/internal/core/src/common/CMakeLists.txt index 94fb5ee32b..04d23d8938 100644 --- a/internal/core/src/common/CMakeLists.txt +++ b/internal/core/src/common/CMakeLists.txt @@ -18,4 +18,5 @@ set(COMMON_SRC add_library(milvus_common ${COMMON_SRC} ) -target_link_libraries(milvus_common milvus_proto yaml-cpp) + +target_link_libraries(milvus_common knowhere milvus_proto yaml-cpp ) diff --git a/internal/core/src/common/Types.h b/internal/core/src/common/Types.h index ae88df7e0a..de189230ea 100644 --- a/internal/core/src/common/Types.h +++ b/internal/core/src/common/Types.h @@ -24,8 +24,8 @@ #include #include -#include "faiss/utils/BitsetView.h" -#include "faiss/MetricType.h" +#include "knowhere/utils/BitsetView.h" +#include "knowhere/common/MetricType.h" #include "pb/schema.pb.h" #include "utils/Types.h" diff --git a/internal/core/src/config/CMakeLists.txt b/internal/core/src/config/CMakeLists.txt index 47d7b448d5..8b6169aa8b 100644 --- a/internal/core/src/config/CMakeLists.txt +++ b/internal/core/src/config/CMakeLists.txt @@ -22,7 +22,7 @@ add_library(milvus_config ${CONFIG_SRC} ) target_link_libraries(milvus_config + knowhere milvus_proto milvus_utils - knowhere ) diff --git a/internal/core/src/config/ConfigKnowhere.cpp b/internal/core/src/config/ConfigKnowhere.cpp index 0b67f72e7d..e08c9b6319 100644 --- a/internal/core/src/config/ConfigKnowhere.cpp +++ b/internal/core/src/config/ConfigKnowhere.cpp @@ -19,7 +19,6 @@ #include "ConfigKnowhere.h" #include "exceptions/EasyAssert.h" #include "easyloggingpp/easylogging++.h" -#include "faiss/FaissHook.h" #include "log/Log.h" #include "knowhere/archive/KnowhereConfig.h" diff --git a/internal/core/src/index/build.sh b/internal/core/src/index/build.sh old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/NGT/lib/CMakeLists.txt b/internal/core/src/index/thirdparty/NGT/lib/CMakeLists.txt deleted file mode 100644 index 59c47c089a..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -if( ${UNIX} ) - add_subdirectory(${PROJECT_SOURCE_DIR}/lib/NGT) -endif() diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/ArrayFile.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/ArrayFile.cpp deleted file mode 100644 index 733c9ea493..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/ArrayFile.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "ArrayFile.h" -#include -#include - -class ItemID { -public: - void serialize(std::ostream &os, NGT::ObjectSpace *ospace = 0) { - os.write((char*)&value, sizeof(value)); - } - void deserialize(std::istream &is, NGT::ObjectSpace *ospace = 0) { - is.read((char*)&value, sizeof(value)); - } - static size_t getSerializedDataSize() { - return sizeof(uint64_t); - } - uint64_t value; -}; - -void -sampleForUsage() { - { - ArrayFile itemIDFile; - itemIDFile.create("test.data", ItemID::getSerializedDataSize()); - itemIDFile.open("test.data"); - ItemID itemID; - size_t id; - - id = 1; - itemID.value = 4910002490100; - itemIDFile.put(id, itemID); - itemID.value = 0; - itemIDFile.get(id, itemID); - std::cerr << "value=" << itemID.value << std::endl; - assert(itemID.value == 4910002490100); - - id = 2; - itemID.value = 4910002490101; - itemIDFile.put(id, itemID); - itemID.value = 0; - itemIDFile.get(id, itemID); - std::cerr << "value=" << itemID.value << std::endl; - assert(itemID.value == 4910002490101); - - itemID.value = 4910002490102; - id = itemIDFile.insert(itemID); - itemID.value = 0; - itemIDFile.get(id, itemID); - std::cerr << "value=" << itemID.value << std::endl; - assert(itemID.value == 4910002490102); - - itemIDFile.close(); - } - { - ArrayFile itemIDFile; - itemIDFile.create("test.data", ItemID::getSerializedDataSize()); - itemIDFile.open("test.data"); - ItemID itemID; - size_t id; - - id = 10; - itemIDFile.get(id, itemID); - std::cerr << "value=" << itemID.value << std::endl; - assert(itemID.value == 4910002490100); - - id = 20; - itemIDFile.get(id, itemID); - std::cerr << "value=" << itemID.value << std::endl; - assert(itemID.value == 4910002490101); - } - -} - - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/ArrayFile.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/ArrayFile.h deleted file mode 100644 index 1293509c0f..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/ArrayFile.h +++ /dev/null @@ -1,220 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace NGT { - class ObjectSpace; -}; - -template -class ArrayFile { - private: - struct FileHeadStruct { - size_t recordSize; - uint64_t extraData; // reserve - }; - - struct RecordStruct { - bool deleteFlag; - uint64_t extraData; // reserve - }; - - bool _isOpen; - std::fstream _stream; - FileHeadStruct _fileHead; - - bool _readFileHead(); - pthread_mutex_t _mutex; - - public: - ArrayFile(); - ~ArrayFile(); - bool create(const std::string &file, size_t recordSize); - bool open(const std::string &file); - void close(); - size_t insert(TYPE &data, NGT::ObjectSpace *objectSpace = 0); - void put(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace = 0); - bool get(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace = 0); - void remove(const size_t id); - bool isOpen() const; - size_t size(); - size_t getRecordSize() { return _fileHead.recordSize; } -}; - - -// constructor -template -ArrayFile::ArrayFile() - : _isOpen(false), _mutex((pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER){ - if(pthread_mutex_init(&_mutex, NULL) < 0) throw std::runtime_error("pthread init error."); -} - -// destructor -template -ArrayFile::~ArrayFile() { - pthread_mutex_destroy(&_mutex); - close(); -} - -template -bool ArrayFile::create(const std::string &file, size_t recordSize) { - std::fstream tmpstream; - tmpstream.open(file.c_str()); - if(tmpstream){ - return false; - } - - tmpstream.open(file.c_str(), std::ios::out); - tmpstream.seekp(0, std::ios::beg); - FileHeadStruct fileHead = {recordSize, 0}; - tmpstream.write((char *)(&fileHead), sizeof(FileHeadStruct)); - tmpstream.close(); - - return true; -} - -template -bool ArrayFile::open(const std::string &file) { - _stream.open(file.c_str(), std::ios::in | std::ios::out); - if(!_stream){ - _isOpen = false; - return false; - } - _isOpen = true; - - bool ret = _readFileHead(); - return ret; -} - -template -void ArrayFile::close(){ - _stream.close(); - _isOpen = false; -} - -template -size_t ArrayFile::insert(TYPE &data, NGT::ObjectSpace *objectSpace) { - _stream.seekp(sizeof(RecordStruct), std::ios::end); - int64_t write_pos = _stream.tellg(); - for(size_t i = 0; i < _fileHead.recordSize; i++) { _stream.write("", 1); } - _stream.seekp(write_pos, std::ios::beg); - data.serialize(_stream, objectSpace); - - int64_t offset_pos = _stream.tellg(); - offset_pos -= sizeof(FileHeadStruct); - size_t id = offset_pos / (sizeof(RecordStruct) + _fileHead.recordSize); - if(offset_pos % (sizeof(RecordStruct) + _fileHead.recordSize) == 0){ - id -= 1; - } - - return id; -} - -template -void ArrayFile::put(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace) { - uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); - offset_pos += sizeof(RecordStruct); - _stream.seekp(offset_pos, std::ios::beg); - - for(size_t i = 0; i < _fileHead.recordSize; i++) { _stream.write("", 1); } - _stream.seekp(offset_pos, std::ios::beg); - data.serialize(_stream, objectSpace); -} - -template -bool ArrayFile::get(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace) { - pthread_mutex_lock(&_mutex); - - if( size() <= id ){ - pthread_mutex_unlock(&_mutex); - return false; - } - - uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); - offset_pos += sizeof(RecordStruct); - _stream.seekg(offset_pos, std::ios::beg); - if (!_stream.fail()) { - data.deserialize(_stream, objectSpace); - } - if (_stream.fail()) { - const int trialCount = 10; - for (int tc = 0; tc < trialCount; tc++) { - _stream.clear(); - _stream.seekg(offset_pos, std::ios::beg); - if (_stream.fail()) { - continue; - } - data.deserialize(_stream, objectSpace); - if (_stream.fail()) { - continue; - } else { - break; - } - } - if (_stream.fail()) { - throw std::runtime_error("ArrayFile::get: Error!"); - } - } - - pthread_mutex_unlock(&_mutex); - return true; -} - -template -void ArrayFile::remove(const size_t id) { - uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); - _stream.seekp(offset_pos, std::ios::beg); - RecordStruct recordHead = {1, 0}; - _stream.write((char *)(&recordHead), sizeof(RecordStruct)); -} - -template -bool ArrayFile::isOpen() const -{ - return _isOpen; -} - -template -size_t ArrayFile::size() -{ - _stream.seekp(0, std::ios::end); - int64_t offset_pos = _stream.tellg(); - offset_pos -= sizeof(FileHeadStruct); - size_t num = offset_pos / (sizeof(RecordStruct) + _fileHead.recordSize); - - return num; -} - -template -bool ArrayFile::_readFileHead() { - _stream.seekp(0, std::ios::beg); - _stream.read((char *)(&_fileHead), sizeof(FileHeadStruct)); - if(_stream.bad()){ - return false; - } - return true; -} - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/CMakeLists.txt b/internal/core/src/index/thirdparty/NGT/lib/NGT/CMakeLists.txt deleted file mode 100644 index ef241d82ef..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/CMakeLists.txt +++ /dev/null @@ -1,40 +0,0 @@ -if( ${UNIX} ) - option(NGT_SHARED_MEMORY_ALLOCATOR "enable shared memory" OFF) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/defines.h.in ${CMAKE_CURRENT_BINARY_DIR}/defines.h) - include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/lib" "${PROJECT_BINARY_DIR}/lib/") - include_directories("${PROJECT_SOURCE_DIR}/../") - - file(GLOB NGT_SOURCES *.cpp) - file(GLOB HEADER_FILES *.h *.hpp) - file(GLOB NGTQ_HEADER_FILES NGTQ/*.h NGTQ/*.hpp) - - add_library(ngtstatic STATIC ${NGT_SOURCES}) - set_target_properties(ngtstatic PROPERTIES OUTPUT_NAME ngt) - set_target_properties(ngtstatic PROPERTIES COMPILE_FLAGS "-fPIC") - target_link_libraries(ngtstatic) - if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") - target_link_libraries(ngtstatic OpenMP::OpenMP_CXX) - endif() - - add_library(ngt SHARED ${NGT_SOURCES}) - set_target_properties(ngt PROPERTIES VERSION ${ngt_VERSION}) - set_target_properties(ngt PROPERTIES SOVERSION ${ngt_SOVERSION}) - add_dependencies(ngt ngtstatic) - if(${APPLE}) - if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") - target_link_libraries(ngt OpenMP::OpenMP_CXX) - else() - target_link_libraries(ngt gomp) - endif() - else(${APPLE}) - target_link_libraries(ngt gomp rt) - endif(${APPLE}) - - install(TARGETS - ngt - ngtstatic - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) - -endif() diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Capi.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Capi.cpp deleted file mode 100644 index 9c927739cf..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Capi.cpp +++ /dev/null @@ -1,988 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include -#include -#include - -#include "NGT/Index.h" -#include "NGT/GraphOptimizer.h" -#include "Capi.h" - -static bool operate_error_string_(const std::stringstream &ss, NGTError error){ - if(error != NULL){ - try{ - std::string *error_str = static_cast(error); - *error_str = ss.str(); - }catch(std::exception &err){ - std::cerr << ss.str() << " > " << err.what() << std::endl; - return false; - } - }else{ - std::cerr << ss.str() << std::endl; - } - return true; -} - -NGTIndex ngt_open_index(const char *index_path, NGTError error) { - try{ - std::string index_path_str(index_path); - NGT::Index *index = new NGT::Index(index_path_str); - index->disableLog(); - return static_cast(index); - }catch(std::exception &err){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -} - -NGTIndex ngt_create_graph_and_tree(const char *database, NGTProperty prop, NGTError error) { - NGT::Index *index = NULL; - try{ - std::string database_str(database); - NGT::Property prop_i = *(static_cast(prop)); - NGT::Index::createGraphAndTree(database_str, prop_i, true); - index = new NGT::Index(database_str); - index->disableLog(); - return static_cast(index); - }catch(std::exception &err){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - delete index; - return NULL; - } -} - -NGTIndex ngt_create_graph_and_tree_in_memory(NGTProperty prop, NGTError error) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << __FUNCTION__ << " is unavailable for shared-memory-type NGT."; - operate_error_string_(ss, error); - return NULL; -#else - try{ - NGT::Index *index = new NGT::GraphAndTreeIndex(*(static_cast(prop))); - index->disableLog(); - return static_cast(index); - }catch(std::exception &err){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -#endif -} - -NGTProperty ngt_create_property(NGTError error) { - try{ - return static_cast(new NGT::Property()); - }catch(std::exception &err){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -} - -bool ngt_save_index(const NGTIndex index, const char *database, NGTError error) { - try{ - std::string database_str(database); - (static_cast(index))->saveIndex(database_str); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -bool ngt_get_property(NGTIndex index, NGTProperty prop, NGTError error) { - if(index == NULL || prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - try{ - (static_cast(index))->getProperty(*(static_cast(prop))); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -int32_t ngt_get_property_dimension(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return -1; - } - return (*static_cast(prop)).dimension; -} - -bool ngt_set_property_dimension(NGTProperty prop, int32_t value, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - (*static_cast(prop)).dimension = value; - return true; -} - -bool ngt_set_property_edge_size_for_creation(NGTProperty prop, int16_t value, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - (*static_cast(prop)).edgeSizeForCreation = value; - return true; -} - -bool ngt_set_property_edge_size_for_search(NGTProperty prop, int16_t value, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - (*static_cast(prop)).edgeSizeForSearch = value; - return true; -} - -int32_t ngt_get_property_object_type(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return -1; - } - return (*static_cast(prop)).objectType; -} - -bool ngt_is_property_object_type_float(int32_t object_type) { - return (object_type == NGT::ObjectSpace::ObjectType::Float); -} - -bool ngt_is_property_object_type_integer(int32_t object_type) { - return (object_type == NGT::ObjectSpace::ObjectType::Uint8); -} - -bool ngt_set_property_object_type_float(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).objectType = NGT::ObjectSpace::ObjectType::Float; - return true; -} - -bool ngt_set_property_object_type_integer(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).objectType = NGT::ObjectSpace::ObjectType::Uint8; - return true; -} - -bool ngt_set_property_distance_type_l1(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeL1; - return true; -} - -bool ngt_set_property_distance_type_l2(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; - return true; -} - -bool ngt_set_property_distance_type_angle(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeAngle; - return true; -} - -bool ngt_set_property_distance_type_hamming(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeHamming; - return true; -} - -bool ngt_set_property_distance_type_jaccard(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeJaccard; - return true; -} - -bool ngt_set_property_distance_type_cosine(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeCosine; - return true; -} - -bool ngt_set_property_distance_type_normalized_angle(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedAngle; - return true; -} - -bool ngt_set_property_distance_type_normalized_cosine(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return false; - } - - (*static_cast(prop)).distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedCosine; - return true; -} - -NGTObjectDistances ngt_create_empty_results(NGTError error) { - try{ - return static_cast(new NGT::ObjectDistances()); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -} - -static bool ngt_search_index_(NGT::Index* pindex, NGT::Object *ngtquery, size_t size, float epsilon, float radius, NGTObjectDistances results, int edge_size = INT_MIN) { - // set search prameters. - NGT::SearchContainer sc(*ngtquery); // search parametera container. - - sc.setResults(static_cast(results)); // set the result set. - sc.setSize(size); // the number of resultant objects. - sc.setRadius(radius); // search radius. - sc.setEpsilon(epsilon); // set exploration coefficient. - if (edge_size != INT_MIN) { - sc.setEdgeSize(edge_size);// set # of edges for each node - } - - pindex->search(sc); - - // delete the query object. - pindex->deleteObject(ngtquery); - return true; -} - -bool ngt_search_index(NGTIndex index, double *query, int32_t query_dim, size_t size, float epsilon, float radius, NGTObjectDistances results, NGTError error) { - if(index == NULL || query == NULL || results == NULL || query_dim <= 0){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " query = " << query << " results = " << results << " query_dim = " << query_dim; - operate_error_string_(ss, error); - return false; - } - - NGT::Index* pindex = static_cast(index); - NGT::Object *ngtquery = NULL; - - if(radius < 0.0){ - radius = FLT_MAX; - } - - try{ - std::vector vquery(&query[0], &query[query_dim]); - ngtquery = pindex->allocateObject(vquery); - ngt_search_index_(pindex, ngtquery, size, epsilon, radius, results); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - if(ngtquery != NULL){ - pindex->deleteObject(ngtquery); - } - return false; - } - return true; -} - -bool ngt_search_index_as_float(NGTIndex index, float *query, int32_t query_dim, size_t size, float epsilon, float radius, NGTObjectDistances results, NGTError error) { - if(index == NULL || query == NULL || results == NULL || query_dim <= 0){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " query = " << query << " results = " << results << " query_dim = " << query_dim; - operate_error_string_(ss, error); - return false; - } - - NGT::Index* pindex = static_cast(index); - NGT::Object *ngtquery = NULL; - - if(radius < 0.0){ - radius = FLT_MAX; - } - - try{ - std::vector vquery(&query[0], &query[query_dim]); - ngtquery = pindex->allocateObject(vquery); - ngt_search_index_(pindex, ngtquery, size, epsilon, radius, results); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - if(ngtquery != NULL){ - pindex->deleteObject(ngtquery); - } - return false; - } - return true; -} - -bool ngt_search_index_with_query(NGTIndex index, NGTQuery query, NGTObjectDistances results, NGTError error) { - if(index == NULL || query.query == NULL || results == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " query = " << query.query << " results = " << results; - operate_error_string_(ss, error); - return false; - } - - NGT::Index* pindex = static_cast(index); - int32_t dim = pindex->getObjectSpace().getDimension(); - - NGT::Object *ngtquery = NULL; - - if(query.radius < 0.0){ - query.radius = FLT_MAX; - } - - try{ - std::vector vquery(&query.query[0], &query.query[dim]); - ngtquery = pindex->allocateObject(vquery); - ngt_search_index_(pindex, ngtquery, query.size, query.epsilon, query.radius, results, query.edge_size); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - if(ngtquery != NULL){ - pindex->deleteObject(ngtquery); - } - return false; - } - return true; -} - - -// * deprecated * -int32_t ngt_get_size(NGTObjectDistances results, NGTError error) { - if(results == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: results = " << results; - operate_error_string_(ss, error); - return -1; - } - - return (static_cast(results))->size(); -} - -uint32_t ngt_get_result_size(NGTObjectDistances results, NGTError error) { - if(results == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: results = " << results; - operate_error_string_(ss, error); - return 0; - } - - return (static_cast(results))->size(); -} - -NGTObjectDistance ngt_get_result(const NGTObjectDistances results, const uint32_t i, NGTError error) { - try{ - NGT::ObjectDistances objects = *(static_cast(results)); - NGTObjectDistance ret_val = {objects[i].id, objects[i].distance}; - return ret_val; - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - - NGTObjectDistance err_val = {0}; - return err_val; - } -} - -ObjectID ngt_insert_index(NGTIndex index, double *obj, uint32_t obj_dim, NGTError error) { - if(index == NULL || obj == NULL || obj_dim == 0){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim; - operate_error_string_(ss, error); - return 0; - } - - try{ - NGT::Index* pindex = static_cast(index); - std::vector vobj(&obj[0], &obj[obj_dim]); - return pindex->insert(vobj); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return 0; - } -} - -ObjectID ngt_append_index(NGTIndex index, double *obj, uint32_t obj_dim, NGTError error) { - if(index == NULL || obj == NULL || obj_dim == 0){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim; - operate_error_string_(ss, error); - return 0; - } - - try{ - NGT::Index* pindex = static_cast(index); - std::vector vobj(&obj[0], &obj[obj_dim]); - return pindex->append(vobj); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return 0; - } -} - -ObjectID ngt_insert_index_as_float(NGTIndex index, float *obj, uint32_t obj_dim, NGTError error) { - if(index == NULL || obj == NULL || obj_dim == 0){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim; - operate_error_string_(ss, error); - return 0; - } - - try{ - NGT::Index* pindex = static_cast(index); - std::vector vobj(&obj[0], &obj[obj_dim]); - return pindex->insert(vobj); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return 0; - } -} - -ObjectID ngt_append_index_as_float(NGTIndex index, float *obj, uint32_t obj_dim, NGTError error) { - if(index == NULL || obj == NULL || obj_dim == 0){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index << " obj = " << obj << " obj_dim = " << obj_dim; - operate_error_string_(ss, error); - return 0; - } - - try{ - NGT::Index* pindex = static_cast(index); - std::vector vobj(&obj[0], &obj[obj_dim]); - return pindex->append(vobj); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return 0; - } -} - -bool ngt_batch_append_index(NGTIndex index, float *obj, uint32_t data_count, NGTError error) { - try{ - NGT::Index* pindex = static_cast(index); - pindex->append(obj, data_count); - return true; - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } -} - -bool ngt_batch_insert_index(NGTIndex index, float *obj, uint32_t data_count, uint32_t *ids, NGTError error) { - NGT::Index* pindex = static_cast(index); - int32_t dim = pindex->getObjectSpace().getDimension(); - - bool status = true; - float *objptr = obj; - for (size_t idx = 0; idx < data_count; idx++, objptr += dim) { - try{ - std::vector vobj(objptr, objptr + dim); - ids[idx] = pindex->insert(vobj); - }catch(std::exception &err) { - status = false; - ids[idx] = 0; - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - } - } - return status; -} - -bool ngt_create_index(NGTIndex index, uint32_t pool_size, NGTError error) { - if(index == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: idnex = " << index; - operate_error_string_(ss, error); - return false; - } - - try{ - (static_cast(index))->createIndex(pool_size); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -bool ngt_remove_index(NGTIndex index, ObjectID id, NGTError error) { - if(index == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: idnex = " << index; - operate_error_string_(ss, error); - return false; - } - - try{ - (static_cast(index))->remove(id); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -NGTObjectSpace ngt_get_object_space(NGTIndex index, NGTError error) { - if(index == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: idnex = " << index; - operate_error_string_(ss, error); - return NULL; - } - - try{ - return static_cast(&(static_cast(index))->getObjectSpace()); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -} - -float* ngt_get_object_as_float(NGTObjectSpace object_space, ObjectID id, NGTError error) { - if(object_space == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: object_space = " << object_space; - operate_error_string_(ss, error); - return NULL; - } - try{ - return static_cast((static_cast(object_space))->getObject(id)); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -} - -uint8_t* ngt_get_object_as_integer(NGTObjectSpace object_space, ObjectID id, NGTError error) { - if(object_space == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: object_space = " << object_space; - operate_error_string_(ss, error); - return NULL; - } - try{ - return static_cast((static_cast(object_space))->getObject(id)); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -} - -void ngt_destroy_results(NGTObjectDistances results) { - if(results == NULL) return; - delete(static_cast(results)); -} - -void ngt_destroy_property(NGTProperty prop) { - if(prop == NULL) return; - delete(static_cast(prop)); -} - -void ngt_close_index(NGTIndex index) { - if(index == NULL) return; - (static_cast(index))->close(); - delete(static_cast(index)); -} - -int16_t ngt_get_property_edge_size_for_creation(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return -1; - } - return (*static_cast(prop)).edgeSizeForCreation; -} - -int16_t ngt_get_property_edge_size_for_search(NGTProperty prop, NGTError error) { - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return -1; - } - return (*static_cast(prop)).edgeSizeForSearch; -} - -int32_t ngt_get_property_distance_type(NGTProperty prop, NGTError error){ - if(prop == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: prop = " << prop; - operate_error_string_(ss, error); - return -1; - } - return (*static_cast(prop)).distanceType; -} - -NGTError ngt_create_error_object() -{ - try{ - std::string *error_str = new std::string(); - return static_cast(error_str); - }catch(std::exception &err){ - std::cerr << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - return NULL; - } -} - -const char *ngt_get_error_string(const NGTError error) -{ - std::string *error_str = static_cast(error); - return error_str->c_str(); -} - -void ngt_clear_error_string(NGTError error) -{ - std::string *error_str = static_cast(error); - *error_str = ""; -} - -void ngt_destroy_error_object(NGTError error) -{ - std::string *error_str = static_cast(error); - delete error_str; -} - -NGTOptimizer ngt_create_optimizer(bool logDisabled, NGTError error) -{ - try{ - return static_cast(new NGT::GraphOptimizer(logDisabled)); - }catch(std::exception &err){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return NULL; - } -} - -bool ngt_optimizer_adjust_search_coefficients(NGTOptimizer optimizer, const char *index, NGTError error) { - if(optimizer == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: optimizer = " << optimizer; - operate_error_string_(ss, error); - return false; - } - try{ - (static_cast(optimizer))->adjustSearchCoefficients(std::string(index)); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -bool ngt_optimizer_execute(NGTOptimizer optimizer, const char *inIndex, const char *outIndex, NGTError error) { - if(optimizer == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: optimizer = " << optimizer; - operate_error_string_(ss, error); - return false; - } - try{ - (static_cast(optimizer))->execute(std::string(inIndex), std::string(outIndex)); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -// obsolute because of a lack of a parameter -bool ngt_optimizer_set(NGTOptimizer optimizer, int outgoing, int incoming, int nofqs, - float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m, NGTError error) { - if(optimizer == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: optimizer = " << optimizer; - operate_error_string_(ss, error); - return false; - } - try{ - (static_cast(optimizer))->set(outgoing, incoming, nofqs, baseAccuracyFrom, baseAccuracyTo, - rateAccuracyFrom, rateAccuracyTo, gte, m); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -bool ngt_optimizer_set_minimum(NGTOptimizer optimizer, int outgoing, int incoming, - int nofqs, int nofrs, NGTError error) { - if(optimizer == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: optimizer = " << optimizer; - operate_error_string_(ss, error); - return false; - } - try{ - (static_cast(optimizer))->set(outgoing, incoming, nofqs, nofrs); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -bool ngt_optimizer_set_extension(NGTOptimizer optimizer, - float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m, NGTError error) { - if(optimizer == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: optimizer = " << optimizer; - operate_error_string_(ss, error); - return false; - } - try{ - (static_cast(optimizer))->setExtension(baseAccuracyFrom, baseAccuracyTo, - rateAccuracyFrom, rateAccuracyTo, gte, m); - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -bool ngt_optimizer_set_processing_modes(NGTOptimizer optimizer, bool searchParameter, - bool prefetchParameter, bool accuracyTable, NGTError error) -{ - if(optimizer == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: optimizer = " << optimizer; - operate_error_string_(ss, error); - return false; - } - - (static_cast(optimizer))->setProcessingModes(searchParameter, prefetchParameter, - accuracyTable); - return true; -} - -void ngt_destroy_optimizer(NGTOptimizer optimizer) -{ - if(optimizer == NULL) return; - delete(static_cast(optimizer)); -} - -bool ngt_refine_anng(NGTIndex index, float epsilon, float accuracy, int noOfEdges, int exploreEdgeSize, size_t batchSize, NGTError error) -{ - NGT::Index* pindex = static_cast(index); - try { - NGT::GraphReconstructor::refineANNG(*pindex, true, epsilon, accuracy, noOfEdges, exploreEdgeSize, batchSize); - } catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; -} - -bool ngt_get_edges(NGTIndex index, ObjectID id, NGTObjectDistances edges, NGTError error) -{ - if(index == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index; - operate_error_string_(ss, error); - return false; - } - - NGT::Index* pindex = static_cast(index); - NGT::GraphIndex &graph = static_cast(pindex->getIndex()); - - try { - NGT::ObjectDistances &objects = *static_cast(edges); - objects = *graph.getNode(id); - }catch(std::exception &err){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - - return true; -} - -uint32_t ngt_get_object_repository_size(NGTIndex index, NGTError error) -{ - if(index == NULL){ - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : parametor error: index = " << index; - operate_error_string_(ss, error); - return false; - } - NGT::Index& pindex = *static_cast(index); - return pindex.getObjectRepositorySize(); -} - -NGTAnngEdgeOptimizationParameter ngt_get_anng_edge_optimization_parameter() -{ - NGT::GraphOptimizer::ANNGEdgeOptimizationParameter gp; - NGTAnngEdgeOptimizationParameter parameter; - - parameter.no_of_queries = gp.noOfQueries; - parameter.no_of_results = gp.noOfResults; - parameter.no_of_threads = gp.noOfThreads; - parameter.target_accuracy = gp.targetAccuracy; - parameter.target_no_of_objects = gp.targetNoOfObjects; - parameter.no_of_sample_objects = gp.noOfSampleObjects; - parameter.max_of_no_of_edges = gp.maxNoOfEdges; - parameter.log = false; - - return parameter; -} - -bool ngt_optimize_number_of_edges(const char *indexPath, NGTAnngEdgeOptimizationParameter parameter, NGTError error) -{ - - NGT::GraphOptimizer::ANNGEdgeOptimizationParameter p; - - p.noOfQueries = parameter.no_of_queries; - p.noOfResults = parameter.no_of_results; - p.noOfThreads = parameter.no_of_threads; - p.targetAccuracy = parameter.target_accuracy; - p.targetNoOfObjects = parameter.target_no_of_objects; - p.noOfSampleObjects = parameter.no_of_sample_objects; - p.maxNoOfEdges = parameter.max_of_no_of_edges; - - try { - NGT::GraphOptimizer graphOptimizer(!parameter.log); // false=log - std::string path(indexPath); - auto edge = graphOptimizer.optimizeNumberOfEdgesForANNG(path, p); - if (parameter.log) { - std::cerr << "the optimized number of edges is" << edge.first << "(" << edge.second << ")" << std::endl; - } - }catch(std::exception &err) { - std::stringstream ss; - ss << "Capi : " << __FUNCTION__ << "() : Error: " << err.what(); - operate_error_string_(ss, error); - return false; - } - return true; - -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Capi.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Capi.h deleted file mode 100644 index e8cc7922df..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Capi.h +++ /dev/null @@ -1,210 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -typedef unsigned int ObjectID; -typedef void* NGTIndex; -typedef void* NGTProperty; -typedef void* NGTObjectSpace; -typedef void* NGTObjectDistances; -typedef void* NGTError; -typedef void* NGTOptimizer; - -typedef struct { - ObjectID id; - float distance; -} NGTObjectDistance; - -typedef struct { - float *query; - size_t size; // # of returned objects - float epsilon; - float accuracy; // expected accuracy - float radius; - size_t edge_size; // # of edges to explore for each node -} NGTQuery; - -typedef struct { - size_t no_of_queries; - size_t no_of_results; - size_t no_of_threads; - float target_accuracy; - size_t target_no_of_objects; - size_t no_of_sample_objects; - size_t max_of_no_of_edges; - bool log; -} NGTAnngEdgeOptimizationParameter; - -NGTIndex ngt_open_index(const char *, NGTError); - -NGTIndex ngt_create_graph_and_tree(const char *, NGTProperty, NGTError); - -NGTIndex ngt_create_graph_and_tree_in_memory(NGTProperty, NGTError); - -NGTProperty ngt_create_property(NGTError); - -bool ngt_save_index(const NGTIndex, const char *, NGTError); - -bool ngt_get_property(const NGTIndex, NGTProperty, NGTError); - -int32_t ngt_get_property_dimension(NGTProperty, NGTError); - -bool ngt_set_property_dimension(NGTProperty, int32_t, NGTError); - -bool ngt_set_property_edge_size_for_creation(NGTProperty, int16_t, NGTError); - -bool ngt_set_property_edge_size_for_search(NGTProperty, int16_t, NGTError); - -int32_t ngt_get_property_object_type(NGTProperty, NGTError); - -bool ngt_is_property_object_type_float(int32_t); - -bool ngt_is_property_object_type_integer(int32_t); - -bool ngt_set_property_object_type_float(NGTProperty, NGTError); - -bool ngt_set_property_object_type_integer(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_l1(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_l2(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_angle(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_hamming(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_jaccard(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_cosine(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_normalized_angle(NGTProperty, NGTError); - -bool ngt_set_property_distance_type_normalized_cosine(NGTProperty, NGTError); - -NGTObjectDistances ngt_create_empty_results(NGTError); - -bool ngt_search_index(NGTIndex, double*, int32_t, size_t, float, float, NGTObjectDistances, NGTError); - -bool ngt_search_index_as_float(NGTIndex, float*, int32_t, size_t, float, float, NGTObjectDistances, NGTError); - -bool ngt_search_index_with_query(NGTIndex, NGTQuery, NGTObjectDistances, NGTError); - -int32_t ngt_get_size(NGTObjectDistances, NGTError); // deprecated - -uint32_t ngt_get_result_size(NGTObjectDistances, NGTError); - -NGTObjectDistance ngt_get_result(const NGTObjectDistances, const uint32_t, NGTError); - -ObjectID ngt_insert_index(NGTIndex, double*, uint32_t, NGTError); - -ObjectID ngt_append_index(NGTIndex, double*, uint32_t, NGTError); - -ObjectID ngt_insert_index_as_float(NGTIndex, float*, uint32_t, NGTError); - -ObjectID ngt_append_index_as_float(NGTIndex, float*, uint32_t, NGTError); - -bool ngt_batch_append_index(NGTIndex, float*, uint32_t, NGTError); - -bool ngt_batch_insert_index(NGTIndex, float*, uint32_t, uint32_t *, NGTError); - -bool ngt_create_index(NGTIndex, uint32_t, NGTError); - -bool ngt_remove_index(NGTIndex, ObjectID, NGTError); - -NGTObjectSpace ngt_get_object_space(NGTIndex, NGTError); - -float* ngt_get_object_as_float(NGTObjectSpace, ObjectID, NGTError); - -uint8_t* ngt_get_object_as_integer(NGTObjectSpace, ObjectID, NGTError); - -void ngt_destroy_results(NGTObjectDistances); - -void ngt_destroy_property(NGTProperty); - -void ngt_close_index(NGTIndex); - -int16_t ngt_get_property_edge_size_for_creation(NGTProperty, NGTError); - -int16_t ngt_get_property_edge_size_for_search(NGTProperty, NGTError); - -int32_t ngt_get_property_distance_type(NGTProperty, NGTError); - -NGTError ngt_create_error_object(); - -const char *ngt_get_error_string(const NGTError); - -void ngt_clear_error_string(NGTError); - -void ngt_destroy_error_object(NGTError); - -NGTOptimizer ngt_create_optimizer(bool logDisabled, NGTError); - -bool ngt_optimizer_adjust_search_coefficients(NGTOptimizer, const char *, NGTError); - -bool ngt_optimizer_execute(NGTOptimizer, const char *, const char *, NGTError); - -bool ngt_optimizer_set(NGTOptimizer optimizer, int outgoing, int incoming, int nofqs, - float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m, NGTError error); - -bool ngt_optimizer_set_minimum(NGTOptimizer optimizer, int outgoing, int incoming, - int nofqs, int nofrs, NGTError error); - -bool ngt_optimizer_set_extension(NGTOptimizer optimizer, - float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m, NGTError error); - -bool ngt_optimizer_set_processing_modes(NGTOptimizer optimizer, bool searchParameter, - bool prefetchParameter, bool accuracyTable, NGTError error); - -void ngt_destroy_optimizer(NGTOptimizer); - -// refine: the specified index by searching each node. -// epsilon, exepectedAccuracy and edgeSize: the same as the prameters for search. but if edgeSize is INT_MIN, default is used. -// noOfEdges: if this is not 0, kNNG with k = noOfEdges is build -// batchSize: batch size for parallelism. -bool ngt_refine_anng(NGTIndex index, float epsilon, float expectedAccuracy, - int noOfEdges, int edgeSize, size_t batchSize, NGTError error); - -// get edges of the node that is specified with id. -bool ngt_get_edges(NGTIndex index, ObjectID id, NGTObjectDistances edges, NGTError error); - -// get the size of the specified object repository. -// Since the size includes empty objects, the size is not the number of objects. -// The size is mostly the largest ID of the objects - 1; -uint32_t ngt_get_object_repository_size(NGTIndex index, NGTError error); - -// return parameters for ngt_optimize_number_of_edges. You can customize them before calling ngt_optimize_number_of_edges. -NGTAnngEdgeOptimizationParameter ngt_get_anng_edge_optimization_parameter(); - -// optimize the number of initial edges for ANNG that is specified with indexPath. -// The parameter should be a struct which is returned by nt_get_optimization_parameter. -bool ngt_optimize_number_of_edges(const char *indexPath, NGTAnngEdgeOptimizationParameter parameter, NGTError error); - -#ifdef __cplusplus -} -#endif diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Clustering.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Clustering.h deleted file mode 100644 index 4edd8c2536..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Clustering.h +++ /dev/null @@ -1,880 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "NGT/Index.h" -#include "defines.h" - -using namespace std; - -#if defined(NGT_AVX_DISABLED) -#define NGT_CLUSTER_NO_AVX -#else -#if defined(__AVX2__) -#define NGT_CLUSTER_AVX2 -#else -#define NGT_CLUSTER_NO_AVX -#endif -#endif - -#if defined(NGT_CLUSTER_NO_AVX) -// #warning "*** SIMD is *NOT* available! ***" -#else -#include -#endif - -#include -#include - -namespace NGT { - -class Clustering { - public: - enum InitializationMode { - InitializationModeHead = 0, - InitializationModeRandom = 1, - InitializationModeKmeansPlusPlus = 2 - }; - - enum ClusteringType { - ClusteringTypeKmeansWithNGT = 0, - ClusteringTypeKmeansWithoutNGT = 1, - ClusteringTypeKmeansWithIteration = 2, - ClusteringTypeKmeansWithNGTForCentroids = 3 - }; - - class Entry { - public: - Entry() : vectorID(0), centroidID(0), distance(0.0) { - } - Entry(size_t vid, size_t cid, double d) : vectorID(vid), centroidID(cid), distance(d) { - } - bool - operator<(const Entry& e) const { - return distance > e.distance; - } - uint32_t vectorID; - uint32_t centroidID; - double distance; - }; - - class DescendingEntry { - public: - DescendingEntry(size_t vid, double d) : vectorID(vid), distance(d) { - } - bool - operator<(const DescendingEntry& e) const { - return distance < e.distance; - } - size_t vectorID; - double distance; - }; - - class Cluster { - public: - Cluster(std::vector& c) : centroid(c), radius(0.0) { - } - Cluster(const Cluster& c) { - *this = c; - } - Cluster& - operator=(const Cluster& c) { - members = c.members; - centroid = c.centroid; - radius = c.radius; - return *this; - } - - std::vector members; - std::vector centroid; - double radius; - }; - - Clustering(InitializationMode im = InitializationModeHead, ClusteringType ct = ClusteringTypeKmeansWithNGT, - size_t mi = 100) - : clusteringType(ct), initializationMode(im), maximumIteration(mi) { - initialize(); - } - - void - initialize() { - epsilonFrom = 0.12; - epsilonTo = epsilonFrom; - epsilonStep = 0.04; - resultSizeCoefficient = 5; - } - - static void - convert(std::vector& strings, std::vector& vector) { - vector.clear(); - for (auto it = strings.begin(); it != strings.end(); ++it) { - vector.push_back(stod(*it)); - } - } - - static void - extractVector(const std::string& str, std::vector& vec) { - std::vector tokens; - NGT::Common::tokenize(str, tokens, " \t"); - convert(tokens, vec); - } - - static void - loadVectors(const std::string& file, std::vector >& vectors) { - std::ifstream is(file); - if (!is) { - throw std::runtime_error("loadVectors::Cannot open " + file); - } - std::string line; - while (getline(is, line)) { - std::vector v; - extractVector(line, v); - vectors.push_back(v); - } - } - - static void - saveVectors(const std::string& file, std::vector >& vectors) { - std::ofstream os(file); - for (auto vit = vectors.begin(); vit != vectors.end(); ++vit) { - std::vector& v = *vit; - for (auto it = v.begin(); it != v.end(); ++it) { - os << std::setprecision(9) << (*it); - if (it + 1 != v.end()) { - os << "\t"; - } - } - os << std::endl; - } - } - - static void - saveVector(const std::string& file, std::vector& vectors) { - std::ofstream os(file); - for (auto vit = vectors.begin(); vit != vectors.end(); ++vit) { - os << *vit << std::endl; - } - } - - static void - loadClusters(const std::string& file, std::vector& clusters, size_t numberOfClusters = 0) { - std::ifstream is(file); - if (!is) { - throw std::runtime_error("loadClusters::Cannot open " + file); - } - std::string line; - while (getline(is, line)) { - std::vector v; - extractVector(line, v); - clusters.push_back(v); - if ((numberOfClusters != 0) && (clusters.size() >= numberOfClusters)) { - break; - } - } - if ((numberOfClusters != 0) && (clusters.size() < numberOfClusters)) { -// std::cerr << "initial cluster data are not enough. " << clusters.size() << ":" << numberOfClusters -// << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("initial cluster data are not enough. " + std::to_string(clusters.size()) + ":" + std::to_string(numberOfClusters)); - exit(1); - } - } -#if !defined(NGT_CLUSTER_NO_AVX) - static double - sumOfSquares(float* a, float* b, size_t size) { - __m256 sum = _mm256_setzero_ps(); - float* last = a + size; - float* lastgroup = last - 7; - while (a < lastgroup) { - __m256 v = _mm256_sub_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b)); - sum = _mm256_add_ps(sum, _mm256_mul_ps(v, v)); - a += 8; - b += 8; - } - __attribute__((aligned(32))) float f[8]; - _mm256_store_ps(f, sum); - double s = f[0] + f[1] + f[2] + f[3] + f[4] + f[5] + f[6] + f[7]; - while (a < last) { - double d = *a++ - *b++; - s += d * d; - } - return s; - } -#else // !defined(NGT_AVX_DISABLED) && defined(__AVX__) - static double - sumOfSquares(float* a, float* b, size_t size) { - double csum = 0.0; - float* x = a; - float* y = b; - for (size_t i = 0; i < size; i++) { - double d = (double)*x++ - (double)*y++; - csum += d * d; - } - return csum; - } -#endif // !defined(NGT_AVX_DISABLED) && defined(__AVX__) - - static double - distanceL2(std::vector& vector1, std::vector& vector2) { - return sqrt(sumOfSquares(&vector1[0], &vector2[0], vector1.size())); - } - - static double - distanceL2(std::vector >& vector1, std::vector >& vector2) { - assert(vector1.size() == vector2.size()); - double distance = 0.0; - for (size_t i = 0; i < vector1.size(); i++) { - distance += distanceL2(vector1[i], vector2[i]); - } - distance /= (double)vector1.size(); - return distance; - } - - static double - meanSumOfSquares(std::vector& vector1, std::vector& vector2) { - return sumOfSquares(&vector1[0], &vector2[0], vector1.size()) / (double)vector1.size(); - } - - static void - subtract(std::vector& a, std::vector& b) { - assert(a.size() == b.size()); - auto bit = b.begin(); - for (auto ait = a.begin(); ait != a.end(); ++ait, ++bit) { - *ait = *ait - *bit; - } - } - - static void - getInitialCentroidsFromHead(std::vector >& vectors, std::vector& clusters, - size_t size) { - size = size > vectors.size() ? vectors.size() : size; - clusters.clear(); - for (size_t i = 0; i < size; i++) { - clusters.push_back(Cluster(vectors[i])); - } - } - - static void - getInitialCentroidsRandomly(std::vector >& vectors, std::vector& clusters, size_t size, - size_t seed) { - clusters.clear(); - std::random_device rnd; - if (seed == 0) { - seed = rnd(); - } - std::mt19937 mt(seed); - - for (size_t i = 0; i < size; i++) { - size_t idx = mt() * vectors.size() / mt.max(); - if (idx >= size) { - i--; - continue; - } - clusters.push_back(Cluster(vectors[idx])); - } - assert(clusters.size() == size); - } - - static void - getInitialCentroidsKmeansPlusPlus(std::vector >& vectors, std::vector& clusters, - size_t size) { - size = size > vectors.size() ? vectors.size() : size; - clusters.clear(); - std::random_device rnd; - std::mt19937 mt(rnd()); - size_t idx = (long long)mt() * (long long)vectors.size() / (long long)mt.max(); - clusters.push_back(Cluster(vectors[idx])); - - NGT::Timer timer; - for (size_t k = 1; k < size; k++) { - double sum = 0; - std::priority_queue sortedObjects; - // get d^2 and sort -#pragma omp parallel for - for (size_t vi = 0; vi < vectors.size(); vi++) { - auto vit = vectors.begin() + vi; - double mind = DBL_MAX; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - double d = distanceL2(*vit, (*cit).centroid); - d *= d; - if (d < mind) { - mind = d; - } - } -#pragma omp critical - { - sortedObjects.push(DescendingEntry(distance(vectors.begin(), vit), mind)); - sum += mind; - } - } - double l = (double)mt() / (double)mt.max() * sum; - while (!sortedObjects.empty()) { - sum -= sortedObjects.top().distance; - if (l >= sum) { - clusters.push_back(Cluster(vectors[sortedObjects.top().vectorID])); - break; - } - sortedObjects.pop(); - } - } - } - - static void - assign(std::vector >& vectors, std::vector& clusters, - size_t clusterSize = std::numeric_limits::max()) { - // compute distances to the nearest clusters, and construct heap by the distances. - NGT::Timer timer; - timer.start(); - - std::vector sortedObjects(vectors.size()); -#pragma omp parallel for - for (size_t vi = 0; vi < vectors.size(); vi++) { - auto vit = vectors.begin() + vi; - { - double mind = DBL_MAX; - size_t mincidx = -1; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - double d = distanceL2(*vit, (*cit).centroid); - if (d < mind) { - mind = d; - mincidx = distance(clusters.begin(), cit); - } - } - sortedObjects[vi] = Entry(vi, mincidx, mind); - } - } - std::sort(sortedObjects.begin(), sortedObjects.end()); - - // clear - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - (*cit).members.clear(); - } - - // distribute objects to the nearest clusters in the same size constraint. - for (auto soi = sortedObjects.rbegin(); soi != sortedObjects.rend();) { - Entry& entry = *soi; - if (entry.centroidID >= clusters.size()) { -// std::cerr << "Something wrong. " << entry.centroidID << ":" << clusters.size() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Something wrong. " + std::to_string(entry.centroidID) + ":" + std::to_string(clusters.size())); - soi++; - continue; - } - if (clusters[entry.centroidID].members.size() < clusterSize) { - clusters[entry.centroidID].members.push_back(entry); - soi++; - } else { - double mind = DBL_MAX; - size_t mincidx = -1; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - if ((*cit).members.size() >= clusterSize) { - continue; - } - double d = distanceL2(vectors[entry.vectorID], (*cit).centroid); - if (d < mind) { - mind = d; - mincidx = distance(clusters.begin(), cit); - } - } - entry = Entry(entry.vectorID, mincidx, mind); - int pt = distance(sortedObjects.rbegin(), soi); - std::sort(sortedObjects.begin(), soi.base()); - soi = sortedObjects.rbegin() + pt; - assert(pt == distance(sortedObjects.rbegin(), soi)); - } - } - - moveFartherObjectsToEmptyClusters(clusters); - } - - static void - moveFartherObjectsToEmptyClusters(std::vector& clusters) { - size_t emptyClusterCount = 0; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - if ((*cit).members.size() == 0) { - emptyClusterCount++; - double max = 0.0; - auto maxit = clusters.begin(); - for (auto scit = clusters.begin(); scit != clusters.end(); ++scit) { - if ((*scit).members.size() >= 2 && (*scit).members.back().distance > max) { - maxit = scit; - max = (*scit).members.back().distance; - } - } - (*cit).members.push_back((*maxit).members.back()); - (*cit).members.back().centroidID = distance(clusters.begin(), cit); - (*maxit).members.pop_back(); - } - } - emptyClusterCount = 0; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - if ((*cit).members.size() == 0) { - emptyClusterCount++; - } - } - } - - static void - assignWithNGT(NGT::Index& index, std::vector >& vectors, std::vector& clusters, - float& radius, size_t& resultSize, float epsilon = 0.12, size_t notRetrievedObjectCount = 0) { - size_t dataSize = vectors.size(); - assert(index.getObjectRepositorySize() - 1 == vectors.size()); - vector > results(clusters.size()); -#pragma omp parallel for - for (size_t ci = 0; ci < clusters.size(); ci++) { - auto cit = clusters.begin() + ci; - NGT::ObjectDistances objects; // result set - NGT::Object* query = 0; - query = index.allocateObject((*cit).centroid); - // set search prameters. - NGT::SearchContainer sc(*query); // search parametera container. - sc.setResults(&objects); // set the result set. - sc.setEpsilon(epsilon); // set exploration coefficient. - if (radius > 0.0) { - sc.setRadius(radius); - sc.setSize(dataSize / 2); - } else { - sc.setSize(resultSize); // the number of resultant objects. - } - index.search(sc); - results[ci].reserve(objects.size()); - for (size_t idx = 0; idx < objects.size(); idx++) { - size_t oidx = objects[idx].id - 1; - results[ci].push_back(Entry(oidx, ci, objects[idx].distance)); - } - - index.deleteObject(query); - } - size_t resultCount = 0; - for (auto ri = results.begin(); ri != results.end(); ++ri) { - resultCount += (*ri).size(); - } - vector sortedResults; - sortedResults.reserve(resultCount); - for (auto ri = results.begin(); ri != results.end(); ++ri) { - auto end = (*ri).begin(); - for (; end != (*ri).end(); ++end) { - } - std::copy((*ri).begin(), end, std::back_inserter(sortedResults)); - } - - vector processedObjects(dataSize, false); - for (auto i = sortedResults.begin(); i != sortedResults.end(); ++i) { - processedObjects[(*i).vectorID] = true; - } - - notRetrievedObjectCount = 0; - vector notRetrievedObjectIDs; - for (size_t idx = 0; idx < dataSize; idx++) { - if (!processedObjects[idx]) { - notRetrievedObjectCount++; - notRetrievedObjectIDs.push_back(idx); - } - } - - sort(sortedResults.begin(), sortedResults.end()); - - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - (*cit).members.clear(); - } - - for (auto i = sortedResults.rbegin(); i != sortedResults.rend(); ++i) { - size_t objectID = (*i).vectorID; - size_t clusterID = (*i).centroidID; - if (processedObjects[objectID]) { - processedObjects[objectID] = false; - clusters[clusterID].members.push_back(*i); - clusters[clusterID].members.back().centroidID = clusterID; - radius = (*i).distance; - } - } - - vector notRetrievedObjects(notRetrievedObjectIDs.size()); - -#pragma omp parallel for - for (size_t vi = 0; vi < notRetrievedObjectIDs.size(); vi++) { - auto vit = notRetrievedObjectIDs.begin() + vi; - { - double mind = DBL_MAX; - size_t mincidx = -1; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - double d = distanceL2(vectors[*vit], (*cit).centroid); - if (d < mind) { - mind = d; - mincidx = distance(clusters.begin(), cit); - } - } - notRetrievedObjects[vi] = Entry(*vit, mincidx, mind); // Entry(vectorID, centroidID, distance) - } - } - - sort(notRetrievedObjects.begin(), notRetrievedObjects.end()); - - for (auto nroit = notRetrievedObjects.begin(); nroit != notRetrievedObjects.end(); ++nroit) { - clusters[(*nroit).centroidID].members.push_back(*nroit); - } - - moveFartherObjectsToEmptyClusters(clusters); - } - - static double - calculateCentroid(std::vector >& vectors, std::vector& clusters) { - double distance = 0; - size_t memberCount = 0; - for (auto it = clusters.begin(); it != clusters.end(); ++it) { - memberCount += (*it).members.size(); - if ((*it).members.size() != 0) { - std::vector mean(vectors[0].size(), 0.0); - for (auto memit = (*it).members.begin(); memit != (*it).members.end(); ++memit) { - auto mit = mean.begin(); - auto& v = vectors[(*memit).vectorID]; - for (auto vit = v.begin(); vit != v.end(); ++vit, ++mit) { - *mit += *vit; - } - } - for (auto mit = mean.begin(); mit != mean.end(); ++mit) { - *mit /= (*it).members.size(); - } - distance += distanceL2((*it).centroid, mean); - (*it).centroid = mean; - } else { -// cerr << "Clustering: Fatal Error. No member!" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Clustering: Fatal Error. No member!"); - abort(); - } - } - return distance; - } - - static void - saveClusters(const std::string& file, std::vector& clusters) { - std::ofstream os(file); - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - std::vector& v = (*cit).centroid; - for (auto it = v.begin(); it != v.end(); ++it) { - os << std::setprecision(9) << (*it); - if (it + 1 != v.end()) { - os << "\t"; - } - } - os << std::endl; - } - } - - double - kmeansWithoutNGT(std::vector >& vectors, size_t numberOfClusters, - std::vector& clusters) { - size_t clusterSize = std::numeric_limits::max(); - if (clusterSizeConstraint) { - clusterSize = ceil((double)vectors.size() / (double)numberOfClusters); - } - - double diff = 0; - for (size_t i = 0; i < maximumIteration; i++) { -// std::cerr << "iteration=" << i << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("iteration=" + std::to_string(i)); - assign(vectors, clusters, clusterSize); - // centroid is recomputed. - // diff is distance between the current centroids and the previous centroids. - diff = calculateCentroid(vectors, clusters); - if (diff == 0) { - break; - } - } - return diff == 0; - } - - double - kmeansWithNGT(NGT::Index& index, std::vector >& vectors, size_t numberOfClusters, - std::vector& clusters, float epsilon) { - diffHistory.clear(); - NGT::Timer timer; - timer.start(); - float radius; - double diff = 0.0; - size_t resultSize; - resultSize = resultSizeCoefficient * vectors.size() / clusters.size(); - for (size_t i = 0; i < maximumIteration; i++) { - size_t notRetrievedObjectCount = 0; - radius = -1.0; - assignWithNGT(index, vectors, clusters, radius, resultSize, epsilon, notRetrievedObjectCount); - // centroid is recomputed. - // diff is distance between the current centroids and the previous centroids. - std::vector prevClusters = clusters; - diff = calculateCentroid(vectors, clusters); - timer.stop(); -// std::cerr << "iteration=" << i << " time=" << timer << " diff=" << diff << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("iteration=" + std::to_string(i) + " time=" + std::to_string(timer.time)+ " diff=" + std::to_string(diff)); - timer.start(); - diffHistory.push_back(diff); - - if (diff == 0) { - break; - } - } - return diff; - } - - double - kmeansWithNGT(std::vector >& vectors, size_t numberOfClusters, std::vector& clusters) { - pid_t pid = getpid(); - std::stringstream str; - str << "cluster-ngt." << pid; - string database = str.str(); - string dataFile; - size_t dataSize = 0; - size_t dim = clusters.front().centroid.size(); - NGT::Property property; - property.dimension = dim; - property.graphType = NGT::Property::GraphType::GraphTypeANNG; - property.objectType = NGT::Index::Property::ObjectType::Float; - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; - - NGT::Index::createGraphAndTree(database, property, dataFile, dataSize); - - float* data = new float[vectors.size() * dim]; - float* ptr = data; - dataSize = vectors.size(); - for (auto vi = vectors.begin(); vi != vectors.end(); ++vi) { - memcpy(ptr, &((*vi)[0]), dim * sizeof(float)); - ptr += dim; - } - size_t threadSize = 20; - NGT::Index::append(database, data, dataSize, threadSize); - delete[] data; - - NGT::Index index(database); - - return kmeansWithNGT(index, vectors, numberOfClusters, clusters, epsilonFrom); - } - - double - kmeansWithNGT(NGT::Index& index, size_t numberOfClusters, std::vector& clusters) { - NGT::GraphIndex& graph = static_cast(index.getIndex()); - NGT::ObjectSpace& os = graph.getObjectSpace(); - size_t size = os.getRepository().size(); - std::vector > vectors(size - 1); - for (size_t idx = 1; idx < size; idx++) { - try { - os.getObject(idx, vectors[idx - 1]); - } catch (...) { -// cerr << "Cannot get object " << idx << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Cannot get object " + std::to_string(idx)); - } - } -// cerr << "# of data for clustering=" << vectors.size() << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("# of data for clustering=" + std::to_string(vectors.size())); - double diff = DBL_MAX; - clusters.clear(); - setupInitialClusters(vectors, numberOfClusters, clusters); - for (float epsilon = epsilonFrom; epsilon <= epsilonTo; epsilon += epsilonStep) { -// cerr << "epsilon=" << epsilon << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("epsilon=" + std::to_string(epsilon)); - diff = kmeansWithNGT(index, vectors, numberOfClusters, clusters, epsilon); - if (diff == 0.0) { - return diff; - } - } - return diff; - } - - double - kmeansWithNGT(NGT::Index& index, size_t numberOfClusters, NGT::Index& outIndex) { - std::vector clusters; - double diff = kmeansWithNGT(index, numberOfClusters, clusters); - for (auto i = clusters.begin(); i != clusters.end(); ++i) { - outIndex.insert((*i).centroid); - } - outIndex.createIndex(16); - return diff; - } - - double - kmeansWithNGT(NGT::Index& index, size_t numberOfClusters) { - NGT::Property prop; - index.getProperty(prop); - string path = index.getPath(); - index.save(); - index.close(); - string outIndexName = path; - string inIndexName = path + ".tmp"; - std::rename(outIndexName.c_str(), inIndexName.c_str()); - NGT::Index::createGraphAndTree(outIndexName, prop); - index.open(outIndexName); - NGT::Index inIndex(inIndexName); - double diff = kmeansWithNGT(inIndex, numberOfClusters, index); - inIndex.close(); - NGT::Index::destroy(inIndexName); - return diff; - } - - double - kmeansWithNGT(string& indexName, size_t numberOfClusters) { - NGT::Index inIndex(indexName); - double diff = kmeansWithNGT(inIndex, numberOfClusters); - inIndex.save(); - inIndex.close(); - return diff; - } - - static double - calculateMSE(std::vector >& vectors, std::vector& clusters) { - double mse = 0.0; - size_t count = 0; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - count += (*cit).members.size(); - for (auto mit = (*cit).members.begin(); mit != (*cit).members.end(); ++mit) { - mse += meanSumOfSquares((*cit).centroid, vectors[(*mit).vectorID]); - } - } - assert(vectors.size() == count); - return mse / (double)vectors.size(); - } - - static double - calculateML2(std::vector >& vectors, std::vector& clusters) { - double d = 0.0; - size_t count = 0; - for (auto cit = clusters.begin(); cit != clusters.end(); ++cit) { - count += (*cit).members.size(); - double localD = 0.0; - for (auto mit = (*cit).members.begin(); mit != (*cit).members.end(); ++mit) { - double distance = distanceL2((*cit).centroid, vectors[(*mit).vectorID]); - d += distance; - localD += distance; - } - } - if (vectors.size() != count) { -// std::cerr << "Warning! vectors.size() != count" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning! vectors.size() != count"); - } - - return d / (double)vectors.size(); - } - - static double - calculateML2FromSpecifiedCentroids(std::vector >& vectors, std::vector& clusters, - std::vector& centroidIds) { - double d = 0.0; - size_t count = 0; - for (auto it = centroidIds.begin(); it != centroidIds.end(); ++it) { - Cluster& cluster = clusters[(*it)]; - count += cluster.members.size(); - for (auto mit = cluster.members.begin(); mit != cluster.members.end(); ++mit) { - d += distanceL2(cluster.centroid, vectors[(*mit).vectorID]); - } - } - return d / (double)vectors.size(); - } - - void - setupInitialClusters(std::vector >& vectors, size_t numberOfClusters, - std::vector& clusters) { - if (clusters.empty()) { - switch (initializationMode) { - case InitializationModeHead: { - getInitialCentroidsFromHead(vectors, clusters, numberOfClusters); - break; - } - case InitializationModeRandom: { - getInitialCentroidsRandomly(vectors, clusters, numberOfClusters, 0); - break; - } - case InitializationModeKmeansPlusPlus: { - getInitialCentroidsKmeansPlusPlus(vectors, clusters, numberOfClusters); - break; - } - default: -// std::cerr << "proper initMode is not specified." << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("proper initMode is not specified."); - exit(1); - } - } - } - - bool - kmeans(std::vector >& vectors, size_t numberOfClusters, std::vector& clusters) { - setupInitialClusters(vectors, numberOfClusters, clusters); - - switch (clusteringType) { - case ClusteringTypeKmeansWithoutNGT: - return kmeansWithoutNGT(vectors, numberOfClusters, clusters); - break; - case ClusteringTypeKmeansWithNGT: - return kmeansWithNGT(vectors, numberOfClusters, clusters); - break; - default: -// cerr << "kmeans::fatal error!. invalid clustering type. " << clusteringType << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("kmeans::fatal error!. invalid clustering type. " + std::to_string(clusteringType)); - abort(); - break; - } - } - - static void - evaluate(std::vector >& vectors, std::vector& clusters, char mode, - std::vector centroidIds = std::vector()) { - size_t clusterSize = std::numeric_limits::max(); - assign(vectors, clusters, clusterSize); - -// std::cout << "The number of vectors=" << vectors.size() << std::endl; -// std::cout << "The number of centroids=" << clusters.size() << std::endl; - if (centroidIds.size() == 0) { - switch (mode) { - case 'e': -// std::cout << "MSE=" << calculateMSE(vectors, clusters) << std::endl; - break; - case '2': - default: -// std::cout << "ML2=" << calculateML2(vectors, clusters) << std::endl; - break; - } - } else { - switch (mode) { - case 'e': - break; - case '2': - default: -// std::cout << "ML2=" << calculateML2FromSpecifiedCentroids(vectors, clusters, centroidIds) -// << std::endl; - break; - } - } - } - - ClusteringType clusteringType; - InitializationMode initializationMode; - size_t numberOfClusters; - bool clusterSizeConstraint; - size_t maximumIteration; - float epsilonFrom; - float epsilonTo; - float epsilonStep; - size_t resultSizeCoefficient; - vector diffHistory; -}; - -} // namespace NGT diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Command.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Command.cpp deleted file mode 100644 index 2dce27e966..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Command.cpp +++ /dev/null @@ -1,1074 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "NGT/Command.h" -#include "NGT/GraphReconstructor.h" -#include "NGT/Optimizer.h" -#include "NGT/GraphOptimizer.h" - - -using namespace std; - - - void - NGT::Command::create(Args &args) - { - const string usage = "Usage: ngt create " - "-d dimension [-p #-of-thread] [-i index-type(t|g)] [-g graph-type(a|k|b|o|i)] " - "[-t truncation-edge-limit] [-E edge-size] [-S edge-size-for-search] [-L edge-size-limit] " - "[-e epsilon] [-o object-type(f|c)] [-D distance-function(1|2|a|A|h|j|c|C)] [-n #-of-inserted-objects] " - "[-P path-adjustment-interval] [-B dynamic-edge-size-base] [-A object-alignment(t|f)] " - "[-T build-time-limit] [-O outgoing x incoming] " -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - "[-N maximum-#-of-inserted-objects] " -#endif - "index(output) [data.tsv(input)]"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "ngt: Error: DB is not specified." << endl; - cerr << usage << endl; - return; - } - string data; - try { - data = args.get("#2"); - } catch (...) {} - - NGT::Property property; - - property.edgeSizeForCreation = args.getl("E", 10); - property.edgeSizeForSearch = args.getl("S", 40); - property.batchSizeForCreation = args.getl("b", 200); - property.insertionRadiusCoefficient = args.getf("e", 0.1) + 1.0; - property.truncationThreshold = args.getl("t", 0); - property.dimension = args.getl("d", 0); - property.threadPoolSize = args.getl("p", 24); - property.pathAdjustmentInterval = args.getl("P", 0); - property.dynamicEdgeSizeBase = args.getl("B", 30); - property.buildTimeLimit = args.getf("T", 0.0); - - if (property.dimension <= 0) { - cerr << "ngt: Error: Specify greater than 0 for # of your data dimension by a parameter -d." << endl; - cerr << usage << endl; - return; - } - - property.objectAlignment = args.getChar("A", 'f') == 't' ? NGT::Property::ObjectAlignmentTrue : NGT::Property::ObjectAlignmentFalse; - - char graphType = args.getChar("g", 'a'); - switch(graphType) { - case 'a': property.graphType = NGT::Property::GraphType::GraphTypeANNG; break; - case 'k': property.graphType = NGT::Property::GraphType::GraphTypeKNNG; break; - case 'b': property.graphType = NGT::Property::GraphType::GraphTypeBKNNG; break; - case 'd': property.graphType = NGT::Property::GraphType::GraphTypeDNNG; break; - case 'o': property.graphType = NGT::Property::GraphType::GraphTypeONNG; break; - case 'i': property.graphType = NGT::Property::GraphType::GraphTypeIANNG; break; - default: - cerr << "ngt: Error: Invalid graph type. " << graphType << endl; - cerr << usage << endl; - return; - } - - if (property.graphType == NGT::Property::GraphType::GraphTypeONNG) { - property.outgoingEdge = 10; - property.incomingEdge = 80; - string str = args.getString("O", "-"); - if (str != "-") { - vector tokens; - NGT::Common::tokenize(str, tokens, "x"); - if (str != "-" && tokens.size() != 2) { - cerr << "ngt: Error: outgoing/incoming edge size specification is invalid. (out)x(in) " << str << endl; - cerr << usage << endl; - return; - } - property.outgoingEdge = NGT::Common::strtod(tokens[0]); - property.incomingEdge = NGT::Common::strtod(tokens[1]); - cerr << "ngt: ONNG out x in=" << property.outgoingEdge << "x" << property.incomingEdge << endl; - } - } - - char seedType = args.getChar("s", '-'); - switch(seedType) { - case 'f': property.seedType = NGT::Property::SeedType::SeedTypeFixedNodes; break; - case '1': property.seedType = NGT::Property::SeedType::SeedTypeFirstNode; break; - case 'r': property.seedType = NGT::Property::SeedType::SeedTypeRandomNodes; break; - case 'l': property.seedType = NGT::Property::SeedType::SeedTypeAllLeafNodes; break; - default: - case '-': property.seedType = NGT::Property::SeedType::SeedTypeNone; break; - } - - char objectType = args.getChar("o", 'f'); - char distanceType = args.getChar("D", '2'); - - size_t dataSize = args.getl("n", 0); - char indexType = args.getChar("i", 't'); - - if (debugLevel >= 1) { - cerr << "edgeSizeForCreation=" << property.edgeSizeForCreation << endl; - cerr << "edgeSizeForSearch=" << property.edgeSizeForSearch << endl; - cerr << "edgeSizeLimit=" << property.edgeSizeLimitForCreation << endl; - cerr << "batch size=" << property.batchSizeForCreation << endl; - cerr << "graphType=" << property.graphType << endl; - cerr << "epsilon=" << property.insertionRadiusCoefficient - 1.0 << endl; - cerr << "thread size=" << property.threadPoolSize << endl; - cerr << "dimension=" << property.dimension << endl; - cerr << "indexType=" << indexType << endl; - } - - switch (objectType) { - case 'f': - property.objectType = NGT::Index::Property::ObjectType::Float; - break; - case 'c': - property.objectType = NGT::Index::Property::ObjectType::Uint8; - break; - default: - cerr << "ngt: Error: Invalid object type. " << objectType << endl; - cerr << usage << endl; - return; - } - - switch (distanceType) { - case '1': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL1; - break; - case '2': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; - break; - case 'a': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeAngle; - break; - case 'A': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedAngle; - break; - case 'h': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeHamming; - break; - case 'j': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeJaccard; - break; - case 'J': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeSparseJaccard; - break; - case 'c': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeCosine; - break; - case 'C': - property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeNormalizedCosine; - break; - default: - cerr << "ngt: Error: Invalid distance type. " << distanceType << endl; - cerr << usage << endl; - return; - } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - size_t maxNoOfObjects = args.getl("N", 0); - if (maxNoOfObjects > 0) { - property.graphSharedMemorySize - = property.treeSharedMemorySize - = property.objectSharedMemorySize = 512 * ceil(maxNoOfObjects / 50000000); - } -#endif - - switch (indexType) { - case 't': - NGT::Index::createGraphAndTree(database, property, data, dataSize); - break; - case 'g': - NGT::Index::createGraph(database, property, data, dataSize); - break; - } - } - - void - NGT::Command::append(Args &args) - { - const string usage = "Usage: ngt append [-p #-of-thread] [-d dimension] [-n data-size] " - "index(output) [data.tsv(input)]"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "ngt: Error: DB is not specified." << endl; - cerr << usage << endl; - return; - } - string data; - try { - data = args.get("#2"); - } catch (...) { - cerr << "ngt: Warning: No specified object file. Just build an index for the existing objects." << endl; - } - - int threadSize = args.getl("p", 50); - size_t dimension = args.getl("d", 0); - size_t dataSize = args.getl("n", 0); - - if (debugLevel >= 1) { - cerr << "thread size=" << threadSize << endl; - cerr << "dimension=" << dimension << endl; - } - - - try { - NGT::Index::append(database, data, threadSize, dataSize); - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; - } - } - - - void - NGT::Command::search(NGT::Index &index, NGT::Command::SearchParameter &searchParameter, istream &is, ostream &stream) - { - - if (searchParameter.outputMode[0] == 'e') { - stream << "# Beginning of Evaluation" << endl; - } - - string line; - double totalTime = 0; - size_t queryCount = 0; - while(getline(is, line)) { - if (searchParameter.querySize > 0 && queryCount >= searchParameter.querySize) { - break; - } - NGT::Object *object = index.allocateObject(line, " \t"); - queryCount++; - size_t step = searchParameter.step == 0 ? UINT_MAX : searchParameter.step; - for (size_t n = 0; n <= step; n++) { - NGT::SearchContainer sc(*object); - double epsilon; - if (searchParameter.step != 0) { - epsilon = searchParameter.beginOfEpsilon + (searchParameter.endOfEpsilon - searchParameter.beginOfEpsilon) * n / step; - } else { - epsilon = searchParameter.beginOfEpsilon + searchParameter.stepOfEpsilon * n; - if (epsilon > searchParameter.endOfEpsilon) { - break; - } - } - NGT::ObjectDistances objects; - sc.setResults(&objects); - sc.setSize(searchParameter.size); - sc.setRadius(searchParameter.radius); - if (searchParameter.accuracy > 0.0) { - sc.setExpectedAccuracy(searchParameter.accuracy); - } else { - sc.setEpsilon(epsilon); - } - sc.setEdgeSize(searchParameter.edgeSize); - NGT::Timer timer; - try { - if (searchParameter.outputMode[0] == 'e') { - double time = 0.0; - uint64_t ntime = 0; - double minTime = DBL_MAX; - size_t trial = searchParameter.trial <= 1 ? 2 : searchParameter.trial; - for (size_t t = 0; t < trial; t++) { - switch (searchParameter.indexType) { - case 't': timer.start(); index.search(sc); timer.stop(); break; - case 'g': timer.start(); index.searchUsingOnlyGraph(sc); timer.stop(); break; - case 's': timer.start(); index.linearSearch(sc); timer.stop(); break; - } - if (minTime > timer.time) { - minTime = timer.time; - } - time += timer.time; - ntime += timer.ntime; - } - time /= (double)searchParameter.trial; - ntime /= searchParameter.trial; - timer.time = minTime; - timer.ntime = ntime; - } else { - switch (searchParameter.indexType) { - case 't': timer.start(); index.search(sc); timer.stop(); break; - case 'g': timer.start(); index.searchUsingOnlyGraph(sc); timer.stop(); break; - case 's': timer.start(); index.linearSearch(sc); timer.stop(); break; - } - } - } catch (NGT::Exception &err) { - if (searchParameter.outputMode != "ei") { - // not ignore exceptions - throw err; - } - } - totalTime += timer.time; - if (searchParameter.outputMode[0] == 'e') { - stream << "# Query No.=" << queryCount << endl; - stream << "# Query=" << line.substr(0, 20) + " ..." << endl; - stream << "# Index Type=" << searchParameter.indexType << endl; - stream << "# Size=" << searchParameter.size << endl; - stream << "# Radius=" << searchParameter.radius << endl; - stream << "# Epsilon=" << epsilon << endl; - stream << "# Query Time (msec)=" << timer.time * 1000.0 << endl; - stream << "# Distance Computation=" << sc.distanceComputationCount << endl; - stream << "# Visit Count=" << sc.visitCount << endl; - } else { - stream << "Query No." << queryCount << endl; - stream << "Rank\tID\tDistance" << endl; - } - for (size_t i = 0; i < objects.size(); i++) { - stream << i + 1 << "\t" << objects[i].id << "\t"; - stream << objects[i].distance << endl; - } - if (searchParameter.outputMode[0] == 'e') { - stream << "# End of Search" << endl; - } else { - stream << "Query Time= " << timer.time << " (sec), " << timer.time * 1000.0 << " (msec)" << endl; - } - } // for - index.deleteObject(object); - if (searchParameter.outputMode[0] == 'e') { - stream << "# End of Query" << endl; - } - } // while - if (searchParameter.outputMode[0] == 'e') { - stream << "# Average Query Time (msec)=" << totalTime * 1000.0 / (double)queryCount << endl; - stream << "# Number of queries=" << queryCount << endl; - stream << "# End of Evaluation" << endl; - - if (searchParameter.outputMode == "e+") { - // show graph information - size_t esize = searchParameter.edgeSize; - long double distance = 0.0; - size_t numberOfNodes = 0; - size_t numberOfEdges = 0; - - NGT::GraphIndex &graph = (NGT::GraphIndex&)index.getIndex(); - for (size_t id = 1; id < graph.repository.size(); id++) { - NGT::GraphNode *node = 0; - try { - node = graph.getNode(id); - } catch(NGT::Exception &err) { - cerr << "Graph::search: Warning. Cannot get the node. ID=" << id << ":" << err.what() << " If the node was removed, no problem." << endl; - continue; - } - numberOfNodes++; - if (numberOfNodes % 1000000 == 0) { - cerr << "Processed " << numberOfNodes << endl; - } - for (size_t i = 0; i < node->size(); i++) { - if (esize != 0 && i >= esize) { - break; - } - numberOfEdges++; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - distance += (*node).at(i, graph.repository.allocator).distance; -#else - distance += (*node)[i].distance; -#endif - } - } - - stream << "# # of nodes=" << numberOfNodes << endl; - stream << "# # of edges=" << numberOfEdges << endl; - stream << "# Average number of edges=" << (double)numberOfEdges / (double)numberOfNodes << endl; - stream << "# Average distance of edges=" << setprecision(10) << distance / (double)numberOfEdges << endl; - } - } else { - stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " - << totalTime * 1000.0 / (double)queryCount << " (msec), (" - << totalTime << "/" << queryCount << ")" << endl; - } - } - - - void - NGT::Command::search(Args &args) { - const string usage = "Usage: ngt search [-i index-type(g|t|s)] [-n result-size] [-e epsilon] [-E edge-size] " - "[-m open-mode(r|w)] [-o output-mode] index(input) query.tsv(input)"; - - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; - } - - SearchParameter searchParameter(args); - - if (debugLevel >= 1) { - cerr << "indexType=" << searchParameter.indexType << endl; - cerr << "size=" << searchParameter.size << endl; - cerr << "edgeSize=" << searchParameter.edgeSize << endl; - cerr << "epsilon=" << searchParameter.beginOfEpsilon << "<->" << searchParameter.endOfEpsilon << "," - << searchParameter.stepOfEpsilon << endl; - } - - try { - NGT::Index index(database, searchParameter.openMode == 'r'); - search(index, searchParameter, cout); - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; - } - - } - - - void - NGT::Command::remove(Args &args) - { - const string usage = "Usage: ngt remove [-d object-ID-type(f|d)] [-m f] index(input) object-ID(input)"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; - } - try { - args.get("#2"); - } catch (...) { - cerr << "ngt: Error: ID is not specified" << endl; - cerr << usage << endl; - return; - } - char dataType = args.getChar("d", 'f'); - char mode = args.getChar("m", '-'); - bool force = false; - if (mode == 'f') { - force = true; - } - if (debugLevel >= 1) { - cerr << "dataType=" << dataType << endl; - } - - try { - vector objects; - if (dataType == 'f') { - string ids; - try { - ids = args.get("#2"); - } catch (...) { - cerr << "ngt: Error: Data file is not specified" << endl; - cerr << usage << endl; - return; - } - ifstream is(ids); - if (!is) { - cerr << "ngt: Error: Cannot open the specified file. " << ids << endl; - cerr << usage << endl; - return; - } - string line; - int count = 0; - while(getline(is, line)) { - count++; - vector tokens; - NGT::Common::tokenize(line, tokens, "\t "); - if (tokens.size() == 0 || tokens[0].size() == 0) { - continue; - } - char *e; - size_t id; - try { - id = strtol(tokens[0].c_str(), &e, 10); - objects.push_back(id); - } catch (...) { - cerr << "Illegal data. " << tokens[0] << endl; - } - if (*e != 0) { - cerr << "Illegal data. " << e << endl; - } - cerr << "removed ID=" << id << endl; - } - } else { - size_t id = args.getl("#2", 0); - cerr << "removed ID=" << id << endl; - objects.push_back(id); - } - NGT::Index::remove(database, objects, force); - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; - } - } - - void - NGT::Command::exportIndex(Args &args) - { - const string usage = "Usage: ngt export index(input) export-file(output)"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; - } - string exportFile; - try { - exportFile = args.get("#2"); - } catch (...) { - cerr << "ngt: Error: ID is not specified" << endl; - cerr << usage << endl; - return; - } - try { - NGT::Index::exportIndex(database, exportFile); - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; - } - } - - void - NGT::Command::importIndex(Args &args) - { - const string usage = "Usage: ngt import index(output) import-file(input)"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; - } - string importFile; - try { - importFile = args.get("#2"); - } catch (...) { - cerr << "ngt: Error: ID is not specified" << endl; - cerr << usage << endl; - return; - } - - try { - NGT::Index::importIndex(database, importFile); - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; - } - - } - - void - NGT::Command::prune(Args &args) - { - const string usage = "Usage: ngt prune -e #-of-forcedly-pruned-edges -s #-of-selecively-pruned-edge index(in/out)"; - string indexName; - try { - indexName = args.get("#1"); - } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; - } - - // the number of forcedly pruned edges - size_t forcedlyPrunedEdgeSize = args.getl("e", 0); - // the number of selectively pruned edges - size_t selectivelyPrunedEdgeSize = args.getl("s", 0); - - cerr << "forcedly pruned edge size=" << forcedlyPrunedEdgeSize << endl; - cerr << "selectively pruned edge size=" << selectivelyPrunedEdgeSize << endl; - - if (selectivelyPrunedEdgeSize == 0 && forcedlyPrunedEdgeSize == 0) { - cerr << "prune: Error! Either of selective edge size or remaining edge size should be specified." << endl; - cerr << usage << endl; - return; - } - - if (forcedlyPrunedEdgeSize != 0 && selectivelyPrunedEdgeSize != 0 && selectivelyPrunedEdgeSize >= forcedlyPrunedEdgeSize) { - cerr << "prune: Error! selective edge size is less than remaining edge size." << endl; - cerr << usage << endl; - return; - } - - NGT::Index index(indexName); - cerr << "loaded the input index." << endl; - - NGT::GraphIndex &graph = (NGT::GraphIndex&)index.getIndex(); - - for (size_t id = 1; id < graph.repository.size(); id++) { - try { - NGT::GraphNode &node = *graph.getNode(id); - if (id % 1000000 == 0) { - cerr << "Processed " << id << endl; - } - if (forcedlyPrunedEdgeSize > 0 && node.size() >= forcedlyPrunedEdgeSize) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - node.resize(forcedlyPrunedEdgeSize, graph.repository.allocator); -#else - node.resize(forcedlyPrunedEdgeSize); -#endif - } - if (selectivelyPrunedEdgeSize > 0 && node.size() >= selectivelyPrunedEdgeSize) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - cerr << "not implemented" << endl; - abort(); -#else - size_t rank = 0; - for (NGT::GraphNode::iterator i = node.begin(); i != node.end(); ++rank) { - if (rank >= selectivelyPrunedEdgeSize) { - bool found = false; - for (size_t t1 = 0; t1 < node.size() && found == false; ++t1) { - if (t1 >= selectivelyPrunedEdgeSize) { - break; - } - if (rank == t1) { - continue; - } - NGT::GraphNode &node2 = *graph.getNode(node[t1].id); - for (size_t t2 = 0; t2 < node2.size(); ++t2) { - if (t2 >= selectivelyPrunedEdgeSize) { - break; - } - if (node2[t2].id == (*i).id) { - found = true; - break; - } - } // for - } // for - if (found) { - //remove - i = node.erase(i); - continue; - } - } - i++; - } // for -#endif - } - - } catch(NGT::Exception &err) { - cerr << "Graph::search: Warning. Cannot get the node. ID=" << id << ":" << err.what() << endl; - continue; - } - } - - graph.saveIndex(indexName); - - } - - void - NGT::Command::reconstructGraph(Args &args) - { - const string usage = "Usage: ngt reconstruct-graph [-m mode] [-P path-adjustment-mode] -o #-of-outgoing-edges -i #-of-incoming(reversed)-edges [-q #-of-queries] [-n #-of-results] index(input) index(output)\n" - "\t-m mode\n" - "\t\ts: Edge adjustment. (default)\n" - "\t\tS: Edge adjustment and path adjustment.\n" - "\t\tc: Edge adjustment with the constraint.\n" - "\t\tC: Edge adjustment with the constraint and path adjustment.\n" - "\t\tP: Path adjustment.\n" - "\t-P path-adjustment-mode\n" - "\t\ta: Advanced method. High-speed. Not guarantee the paper's method. (default)\n" - "\t\tothers: Slow and less memory usage, but guarantee the paper's method.\n"; - - string inIndexPath; - try { - inIndexPath = args.get("#1"); - } catch (...) { - cerr << "ngt::reconstructGraph: Input index is not specified." << endl; - cerr << usage << endl; - return; - } - string outIndexPath; - try { - outIndexPath = args.get("#2"); - } catch (...) { - cerr << "ngt::reconstructGraph: Output index is not specified." << endl; - cerr << usage << endl; - return; - } - - char mode = args.getChar("m", 'S'); - size_t nOfQueries = args.getl("q", 100); // # of query objects - size_t nOfResults = args.getl("n", 20); // # of resultant objects - double gtEpsilon = args.getf("e", 0.1); - double margin = args.getf("M", 0.2); - char smode = args.getChar("s", '-'); - - // the number (rank) of original edges - int numOfOutgoingEdges = args.getl("o", -1); - // the number (rank) of reverse edges - int numOfIncomingEdges = args.getl("i", -1); - - NGT::GraphOptimizer graphOptimizer(false); - - if (mode == 'P') { - numOfOutgoingEdges = 0; - numOfIncomingEdges = 0; - std::cerr << "ngt::reconstructGraph: Warning. \'-m P\' and not zero for # of in/out edges are specified at the same time." << std::endl; - } - graphOptimizer.shortcutReduction = (mode == 'S' || mode == 'C' || mode == 'P') ? true : false; - graphOptimizer.searchParameterOptimization = (smode == '-' || smode == 's') ? true : false; - graphOptimizer.prefetchParameterOptimization = (smode == '-' || smode == 'p') ? true : false; - graphOptimizer.accuracyTableGeneration = (smode == '-' || smode == 'a') ? true : false; - graphOptimizer.margin = margin; - graphOptimizer.gtEpsilon = gtEpsilon; - - graphOptimizer.set(numOfOutgoingEdges, numOfIncomingEdges, nOfQueries, nOfResults); - graphOptimizer.execute(inIndexPath, outIndexPath); - - std::cout << "Successfully completed." << std::endl; - } - - void - NGT::Command::optimizeSearchParameters(Args &args) - { - const string usage = "Usage: ngt optimize-search-parameters [-m optimization-target(s|p|a)] [-q #-of-queries] [-n #-of-results] index\n" - "\t-m mode\n" - "\t\ts: optimize search parameters (the number of explored edges).\n" - "\t\tp: optimize prefetch prameters.\n" - "\t\ta: generate an accuracy table to specify an expected accuracy instead of an epsilon for search.\n"; - - string indexPath; - try { - indexPath = args.get("#1"); - } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; - } - - char mode = args.getChar("m", '-'); - - size_t nOfQueries = args.getl("q", 100); // # of query objects - size_t nOfResults = args.getl("n", 20); // # of resultant objects - - - try { - NGT::GraphOptimizer graphOptimizer(false); - - graphOptimizer.searchParameterOptimization = (mode == '-' || mode == 's') ? true : false; - graphOptimizer.prefetchParameterOptimization = (mode == '-' || mode == 'p') ? true : false; - graphOptimizer.accuracyTableGeneration = (mode == '-' || mode == 'a') ? true : false; - graphOptimizer.numOfQueries = nOfQueries; - graphOptimizer.numOfResults = nOfResults; - - graphOptimizer.set(0, 0, nOfQueries, nOfResults); - graphOptimizer.optimizeSearchParameters(indexPath); - - std::cout << "Successfully completed." << std::endl; - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - } - - } - - void - NGT::Command::refineANNG(Args &args) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "refineANNG. Not implemented." << std::endl; - abort(); -#else - const string usage = "Usage: ngt refine-anng [-e epsilon] [-a expected-accuracy] anng-index refined-anng-index"; - - string inIndexPath; - try { - inIndexPath = args.get("#1"); - } catch (...) { - cerr << "Input index is not specified" << endl; - cerr << usage << endl; - return; - } - - string outIndexPath; - try { - outIndexPath = args.get("#2"); - } catch (...) { - cerr << "Output index is not specified" << endl; - cerr << usage << endl; - return; - } - - NGT::Index index(inIndexPath); - - float epsilon = args.getf("e", 0.1); - float expectedAccuracy = args.getf("a", 0.0); - int noOfEdges = args.getl("k", 0); // to reconstruct kNNG - int exploreEdgeSize = args.getf("E", INT_MIN); - size_t batchSize = args.getl("b", 10000); - - try { - GraphReconstructor::refineANNG(index, epsilon, expectedAccuracy, noOfEdges, exploreEdgeSize, batchSize); - } catch (NGT::Exception &err) { - std::cerr << "Error!! Cannot refine the index. " << err.what() << std::endl; - return; - } - index.saveIndex(outIndexPath); -#endif - } - - void - NGT::Command::repair(Args &args) - { - const string usage = "Usage: ng[ [-m c|r|R] repair index \n" - "\t-m mode\n" - "\t\tc: Check. (default)\n" - "\t\tr: Repair and save it as [index].repair.\n" - "\t\tR: Repair and overwrite into the specified index.\n"; - - string indexPath; - try { - indexPath = args.get("#1"); - } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; - } - - char mode = args.getChar("m", 'c'); - - bool repair = false; - if (mode == 'r' || mode == 'R') { - repair = true; - } - string path = indexPath; - if (mode == 'r') { - path = indexPath + ".repair"; - const string com = "cp -r " + indexPath + " " + path; - int stat = system(com.c_str()); - if (stat != 0) { - std::cerr << "ngt::repair: Cannot create the specified index. " << path << std::endl; - cerr << usage << endl; - return; - } - } - - NGT::Index index(path); - - NGT::ObjectRepository &objectRepository = index.getObjectSpace().getRepository(); - NGT::GraphIndex &graphIndex = static_cast(index.getIndex()); - NGT::GraphAndTreeIndex &graphAndTreeIndex = static_cast(index.getIndex()); - size_t objSize = objectRepository.size(); - std::cerr << "aggregate removed objects from the repository." << std::endl; - std::set removedIDs; - for (ObjectID id = 1; id < objSize; id++) { - if (objectRepository.isEmpty(id)) { - removedIDs.insert(id); - } - } - - std::cerr << "aggregate objects from the tree." << std::endl; - std::set ids; - graphAndTreeIndex.DVPTree::getAllObjectIDs(ids); - size_t idsSize = ids.size() == 0 ? 0 : (*ids.rbegin()) + 1; - if (objSize < idsSize) { - std::cerr << "The sizes of the repository and tree are inconsistent. " << objSize << ":" << idsSize << std::endl; - } - size_t invalidTreeObjectCount = 0; - size_t uninsertedTreeObjectCount = 0; - std::cerr << "remove invalid objects from the tree." << std::endl; - size_t size = objSize > idsSize ? objSize : idsSize; - for (size_t id = 1; id < size; id++) { - if (ids.find(id) != ids.end()) { - if (removedIDs.find(id) != removedIDs.end() || id >= objSize) { - if (repair) { - graphAndTreeIndex.DVPTree::removeNaively(id); - std::cerr << "Found the removed object in the tree. Removed it from the tree. " << id << std::endl; - } else { - std::cerr << "Found the removed object in the tree. " << id << std::endl; - } - invalidTreeObjectCount++; - } - } else { - if (removedIDs.find(id) == removedIDs.end() && id < objSize) { - std::cerr << "Not found an object in the tree. However, it might be a duplicated object. " << id << std::endl; - uninsertedTreeObjectCount++; - try { - graphIndex.repository.remove(id); - } catch(...) {} - } - } - } - - if (objSize != graphIndex.repository.size()) { - std::cerr << "The sizes of the repository and graph are inconsistent. " << objSize << ":" << graphIndex.repository.size() << std::endl; - } - size_t invalidGraphObjectCount = 0; - size_t uninsertedGraphObjectCount = 0; - size = objSize > graphIndex.repository.size() ? objSize : graphIndex.repository.size(); - std::cerr << "remove invalid objects from the graph." << std::endl; - for (size_t id = 1; id < size; id++) { - try { - graphIndex.getNode(id); - if (removedIDs.find(id) != removedIDs.end() || id >= objSize) { - if (repair) { - graphAndTreeIndex.DVPTree::removeNaively(id); - try { - graphIndex.repository.remove(id); - } catch(...) {} - std::cerr << "Found the removed object in the graph. Removed it from the graph. " << id << std::endl; - } else { - std::cerr << "Found the removed object in the graph. " << id << std::endl; - } - invalidGraphObjectCount++; - } - } catch (...) { - if (removedIDs.find(id) == removedIDs.end() && id < objSize) { - std::cerr << "Not found an object in the graph. It should be inserted into the graph. " << id << std::endl; - uninsertedGraphObjectCount++; - try { - graphAndTreeIndex.DVPTree::removeNaively(id); - } catch(...) {} - } - } - } - - size_t invalidEdgeCount = 0; -//#pragma omp parallel for - for (size_t id = 1; id < graphIndex.repository.size(); id++) { - try { - NGT::GraphNode &node = *graphIndex.getNode(id); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (auto n = node.begin(graphIndex.repository.allocator); n != node.end(graphIndex.repository.allocator);) { -#else - for (auto n = node.begin(); n != node.end();) { -#endif - if (removedIDs.find((*n).id) != removedIDs.end() || (*n).id >= objSize) { - - std::cerr << "Not found the destination object of the edge. " << id << ":" << (*n).id << std::endl; - invalidEdgeCount++; - if (repair) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - n = node.erase(n, graphIndex.repository.allocator); -#else - n = node.erase(n); -#endif - continue; - } - } - ++n; - } - } catch(...) {} - } - - if (repair) { - if (objSize < graphIndex.repository.size()) { - graphIndex.repository.resize(objSize); - } - } - - std::cerr << "The number of invalid tree objects=" << invalidTreeObjectCount << std::endl; - std::cerr << "The number of invalid graph objects=" << invalidGraphObjectCount << std::endl; - std::cerr << "The number of uninserted tree objects (Can be ignored)=" << uninsertedTreeObjectCount << std::endl; - std::cerr << "The number of uninserted graph objects=" << uninsertedGraphObjectCount << std::endl; - std::cerr << "The number of invalid edges=" << invalidEdgeCount << std::endl; - - if (repair) { - try { - if (uninsertedGraphObjectCount > 0) { - std::cerr << "Building index." << std::endl; - index.createIndex(16); - } - std::cerr << "Saving index." << std::endl; - index.saveIndex(path); - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - return; - } - } - } - - - void - NGT::Command::optimizeNumberOfEdgesForANNG(Args &args) - { - const string usage = "Usage: ngt optimize-#-of-edges [-q #-of-queries] [-k #-of-retrieved-objects] " - "[-p #-of-threads] [-a target-accuracy] [-o target-#-of-objects] [-s #-of-sampe-objects] " - "[-e maximum-#-of-edges] anng-index"; - - string indexPath; - try { - indexPath = args.get("#1"); - } catch (...) { - cerr << "Index is not specified" << endl; - cerr << usage << endl; - return; - } - - GraphOptimizer::ANNGEdgeOptimizationParameter parameter; - - parameter.noOfQueries = args.getl("q", 200); - parameter.noOfResults = args.getl("k", 50); - parameter.noOfThreads = args.getl("p", 16); - parameter.targetAccuracy = args.getf("a", 0.9); - parameter.targetNoOfObjects = args.getl("o", 0); // zero will replaced # of the repository size. - parameter.noOfSampleObjects = args.getl("s", 100000); - parameter.maxNoOfEdges = args.getl("e", 100); - - NGT::GraphOptimizer graphOptimizer(false); // false=log - auto optimizedEdge = graphOptimizer.optimizeNumberOfEdgesForANNG(indexPath, parameter); - std::cout << "The optimized # of edges=" << optimizedEdge.first << "(" << optimizedEdge.second << ")" << std::endl; - std::cout << "Successfully completed." << std::endl; - } - - - - void - NGT::Command::info(Args &args) - { - const string usage = "Usage: ngt info [-E #-of-edges] [-m h|e] index"; - - cerr << "NGT version: " << NGT::Index::getVersion() << endl; - - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "ngt: Error: DB is not specified" << endl; - cerr << usage << endl; - return; - } - - size_t edgeSize = args.getl("E", UINT_MAX); - char mode = args.getChar("m", '-'); - - try { - NGT::Index index(database); - NGT::GraphIndex::showStatisticsOfGraph(static_cast(index.getIndex()), mode, edgeSize); - if (mode == 'v') { - vector status; - index.verify(status); - } - } catch (NGT::Exception &err) { - cerr << "ngt: Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "ngt: Error" << endl; - cerr << usage << endl; - } - } - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Command.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Command.h deleted file mode 100644 index fd0e3e3985..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Command.h +++ /dev/null @@ -1,127 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#pragma once - -#include "NGT/Index.h" - -namespace NGT { - - -class Command { -public: - class SearchParameter { - public: - SearchParameter() { - openMode = 'r'; - query = ""; - querySize = 0; - indexType = 't'; - size = 20; - edgeSize = -1; - outputMode = "-"; - radius = FLT_MAX; - step = 0; - trial = 1; - beginOfEpsilon = endOfEpsilon = stepOfEpsilon = 0.1; - accuracy = 0.0; - } - SearchParameter(Args &args) { parse(args); } - void parse(Args &args) { - openMode = args.getChar("m", 'r'); - try { - query = args.get("#2"); - } catch (...) { - NGTThrowException("ngt: Error: Query is not specified"); - } - querySize = args.getl("Q", 0); - indexType = args.getChar("i", 't'); - size = args.getl("n", 20); - // edgeSize - // -1(default) : using the size which was specified at the index creation. - // 0 : no limitation for the edge size. - // -2('e') : automatically set it according to epsilon. - if (args.getChar("E", '-') == 'e') { - edgeSize = -2; - } else { - edgeSize = args.getl("E", -1); - } - outputMode = args.getString("o", "-"); - radius = args.getf("r", FLT_MAX); - trial = args.getl("t", 1); - { - beginOfEpsilon = endOfEpsilon = stepOfEpsilon = 0.1; - std::string epsilon = args.getString("e", "0.1"); - std::vector tokens; - NGT::Common::tokenize(epsilon, tokens, ":"); - if (tokens.size() >= 1) { beginOfEpsilon = endOfEpsilon = NGT::Common::strtod(tokens[0]); } - if (tokens.size() >= 2) { endOfEpsilon = NGT::Common::strtod(tokens[1]); } - if (tokens.size() >= 3) { stepOfEpsilon = NGT::Common::strtod(tokens[2]); } - step = 0; - if (tokens.size() >= 4) { step = NGT::Common::strtol(tokens[3]); } - } - accuracy = args.getf("a", 0.0); - } - char openMode; - std::string query; - size_t querySize; - char indexType; - int size; - long edgeSize; - std::string outputMode; - float radius; - float beginOfEpsilon; - float endOfEpsilon; - float stepOfEpsilon; - float accuracy; - size_t step; - size_t trial; - }; - - Command():debugLevel(0) {} - - void create(Args &args); - void append(Args &args); - static void search(NGT::Index &index, SearchParameter &searchParameter, std::ostream &stream) - { - std::ifstream is(searchParameter.query); - if (!is) { - std::cerr << "Cannot open the specified file. " << searchParameter.query << std::endl; - return; - } - search(index, searchParameter, is, stream); - } - static void search(NGT::Index &index, SearchParameter &searchParameter, std::istream &is, std::ostream &stream); - void search(Args &args); - void remove(Args &args); - void exportIndex(Args &args); - void importIndex(Args &args); - void prune(Args &args); - void reconstructGraph(Args &args); - void optimizeSearchParameters(Args &args); - void optimizeNumberOfEdgesForANNG(Args &args); - void refineANNG(Args &args); - void repair(Args &args); - - void info(Args &args); - void setDebugLevel(int level) { debugLevel = level; } - int getDebugLevel() { return debugLevel; } - -protected: - int debugLevel; - -}; - -}; // NGT diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Common.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Common.cpp deleted file mode 100644 index c8946e55f5..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Common.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "NGT/lib/NGT/Common.h" -#include "NGT/lib/NGT/ObjectSpace.h" - -int64_t -NGT::SearchContainer::memSize() { - auto workres_size = workingResult.size() == 0 ? 0 : workingResult.size() * workingResult.top().memSize(); - return sizeof(size_t) * 3 + sizeof(float) * 3 + result->memSize() + 1 + workres_size + Container::memSize(); -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Common.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Common.h deleted file mode 100644 index a97f05f660..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Common.h +++ /dev/null @@ -1,1941 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "NGT/defines.h" -#include "NGT/SharedMemoryAllocator.h" - -#define ADVANCED_USE_REMOVED_LIST -#define SHARED_REMOVED_LIST - -namespace NGT { - typedef unsigned int ObjectID; - typedef float Distance; - -#define NGTThrowException(MESSAGE) throw NGT::Exception(__FILE__, (size_t)__LINE__, MESSAGE) -#define NGTThrowSpecificException(MESSAGE, TYPE) throw NGT::TYPE(__FILE__, (size_t)__LINE__, MESSAGE) - - class Exception : public std::exception { - public: - Exception():message("No message") {} - Exception(const std::string &file, size_t line, std::stringstream &m) { set(file, line, m.str()); } - Exception(const std::string &file, size_t line, const std::string &m) { set(file, line, m); } - void set(const std::string &file, size_t line, const std::string &m) { - std::stringstream ss; - ss << file << ":" << line << ": " << m; - message = ss.str(); - } - ~Exception() throw() {} - Exception &operator=(const Exception &e) { - message = e.message; - return *this; - } - virtual const char *what() const throw() { - return message.c_str(); - } - std::string &getMessage() { return message; } - protected: - std::string message; - }; - - class Args : public std::map - { - public: - Args() {} - Args(int argc, char **argv) - { - std::vector opts; - int optcount = 0; - insert(std::make_pair(std::string("#-"),std::string(argv[0]))); - for (int i = 1; i < argc; ++i) { - opts.push_back(std::string(argv[i])); - } - for (auto i = opts.begin(); i != opts.end(); ++i) { - std::string &opt = *i; - std::string key, value; - if (opt.size() > 2 && opt.substr(0, 2) == "--") { - auto pos = opt.find('='); - if (pos == std::string::npos) { - key = opt.substr(2); - value = ""; - } else { - key = opt.substr(2, pos - 2); - value = opt.substr(++pos); - } - } else if (opt.size() > 1 && opt[0] == '-') { - if (opt.size() == 2) { - key = opt[1]; - if (key == "h") { - value = ""; - } else { - ++i; - if (i != opts.end()) { - value = *i; - } else { - value = ""; - --i; - } - } - } else { - key = opt[1]; - value = opt.substr(2); - } - } else { - key = "#" + std::to_string(optcount++); - value = opt; - } - auto status = insert(std::make_pair(key,value)); - if (!status.second) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Args: Duplicated options. [" + opt + "]"); -// std::cerr << "Args: Duplicated options. [" << opt << "]" << std::endl; - } - } - } - std::set getUnusedOptions() { - std::set o; - for (auto i = begin(); i != end(); ++i) { - o.insert((*i).first); - } - for (auto i = usedOptions.begin(); i != usedOptions.end(); ++i) { - o.erase(*i); - } - return o; - } - std::string checkUnusedOptions() { - auto uopt = getUnusedOptions(); - std::stringstream msg; - if (!uopt.empty()) { - msg << "Unused options: "; - for (auto i = uopt.begin(); i != uopt.end(); ++i) { - msg << *i << " "; - } - } - return msg.str(); - } - std::string &find(const char *s) { return get(s); } - char getChar(const char *s, char v) { - try { - return get(s)[0]; - } catch (...) { - return v; - } - } - std::string getString(const char *s, const char *v) { - try { - return get(s); - } catch (...) { - return v; - } - } - std::string &get(const char *s) { - Args::iterator ai; - ai = map::find(std::string(s)); - if (ai == this->end()) { - std::stringstream msg; - msg << s << ": Not specified" << std::endl; - NGTThrowException(msg.str()); - } - usedOptions.insert(ai->first); - return ai->second; - } - long getl(const char *s, long v) { - char *e; - long val; - try { - val = strtol(get(s).c_str(), &e, 10); - } catch (...) { - return v; - } - if (*e != 0) { - std::stringstream msg; - msg << "ARGS::getl: Illegal string. Option=-" << s << " Specified value=" << get(s) - << " Illegal string=" << e << std::endl; - NGTThrowException(msg.str()); - } - return val; - } - float getf(const char *s, float v) { - char *e; - float val; - try { - val = strtof(get(s).c_str(), &e); - } catch (...) { - return v; - } - if (*e != 0) { - std::stringstream msg; - msg << "ARGS::getf: Illegal string. Option=-" << s << " Specified value=" << get(s) - << " Illegal string=" << e << std::endl; - NGTThrowException(msg.str()); - } - return val; - } - std::set usedOptions; - }; - - class Common { - public: - static void tokenize(const std::string &str, std::vector &token, const std::string seps) { - std::string::size_type current = 0; - std::string::size_type next; - while ((next = str.find_first_of(seps, current)) != std::string::npos) { - token.push_back(str.substr(current, next - current)); - current = next + 1; - } - std::string t = str.substr(current); - token.push_back(t); - } - - static double strtod(const std::string &str) { - char *e; - double val = std::strtod(str.c_str(), &e); - if (*e != 0) { - std::stringstream msg; - msg << "Invalid string. " << e; - NGTThrowException(msg); - } - return val; - } - - static float strtof(const std::string &str) { - char *e; - double val = std::strtof(str.c_str(), &e); - if (*e != 0) { - std::stringstream msg; - msg << "Invalid string. " << e; - NGTThrowException(msg); - } - return val; - } - - static long strtol(const std::string &str, int base = 10) { - char *e; - long val = std::strtol(str.c_str(), &e, base); - if (*e != 0) { - std::stringstream msg; - msg << "Invalid string. " << e; - NGTThrowException(msg); - } - return val; - } - - - static std::string getProcessStatus(const std::string &stat) { - pid_t pid = getpid(); - std::stringstream str; - str << "/proc/" << pid << "/status"; - std::ifstream procStatus(str.str()); - if (!procStatus.fail()) { - std::string line; - while (getline(procStatus, line)) { - std::vector tokens; - NGT::Common::tokenize(line, tokens, ": \t"); - if (tokens[0] == stat) { - for (size_t i = 1; i < tokens.size(); i++) { - if (tokens[i].empty()) { - continue; - } - return tokens[i]; - } - } - } - } - return "-1"; - } - - // size unit is kbyte - static int getProcessVmSize() { return strtol(getProcessStatus("VmSize")); } - static int getProcessVmPeak() { return strtol(getProcessStatus("VmPeak")); } - static int getProcessVmRSS() { return strtol(getProcessStatus("VmRSS")); } - }; - - class StdOstreamRedirector { - public: - StdOstreamRedirector(bool e = false, const std::string path = "/dev/null", mode_t m = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, int f = 2) { - logFilePath = path; - mode = m; - logFD = -1; - fdNo = f; - enabled = e; - } - ~StdOstreamRedirector() { end(); } - - void enable() { enabled = true; } - void disable() { enabled = false; } - void begin() { - if (!enabled) { - return; - } - if (logFilePath == "/dev/null") { - logFD = open(logFilePath.c_str(), O_WRONLY|O_APPEND, mode); - } else { - logFD = open(logFilePath.c_str(), O_CREAT|O_WRONLY|O_APPEND, mode); - } - if (logFD < 0) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Logger: Cannot begin logging."); -// std::cerr << "Logger: Cannot begin logging." << std::endl; - logFD = -1; - return; - } - savedFdNo = dup(fdNo); - std::cerr << std::flush; - dup2(logFD, fdNo); - } - - void end() { - if (logFD < 0) { - return; - } - std::cerr << std::flush; - dup2(savedFdNo, fdNo); - savedFdNo = -1; - } - - int64_t memSize() { return sizeof(*this); } - - std::string logFilePath; - mode_t mode; - int logFD; - int savedFdNo; - int fdNo; - bool enabled; - }; - - template - class CompactVector { - public: - typedef TYPE * iterator; - - CompactVector() : vector(0), vectorSize(0), allocatedSize(0){} - virtual ~CompactVector() { clear(); } - - void clear() { - if (vector != 0) { - delete[] vector; - } - vector = 0; - vectorSize = 0; - allocatedSize = 0; - } - - TYPE &front() { return vector[0]; } - TYPE &back() { return vector[vectorSize - 1]; } - bool empty() { return vector == 0; } - iterator begin() { return &(vector[0]); } - iterator end() { return begin() + vectorSize; } - TYPE &operator[](size_t idx) const { return vector[idx]; } - - CompactVector &operator=(CompactVector &v) { - assert((vectorSize == v.vectorSize) || (vectorSize == 0)); - if (vectorSize == v.vectorSize) { - for (size_t i = 0; i < vectorSize; i++) { - vector[i] = v[i]; - } - return *this; - } else { - reserve(v.vectorSize); - assert(allocatedSize >= v.vectorSize); - for (size_t i = 0; i < v.vectorSize; i++) { - push_back(v.at(i)); - } - vectorSize = v.vectorSize; - assert(vectorSize == v.vectorSize); - } - return *this; - } - - TYPE &at(size_t idx) const { - if (idx >= vectorSize) { - std::stringstream msg; - msg << "CompactVector: beyond the range. " << idx << ":" << vectorSize; - NGTThrowException(msg); - } - return vector[idx]; - } - - iterator erase(iterator b, iterator e) { - iterator ret; - e = end() < e ? end() : e; - for (iterator i = b; i < e; i++) { - ret = erase(i); - } - return ret; - } - - iterator erase(iterator i) { - iterator back = i; - vectorSize--; - iterator e = end(); - for (; i < e; i++) { - *i = *(i + 1); - } - return ++back; - } - - void pop_back() { - if (vectorSize > 0) { - vectorSize--; - } - } - - iterator insert(iterator &i, const TYPE &data) { - if (size() == 0) { - push_back(data); - return end(); - } - off_t oft = i - begin(); - extend(); - i = begin() + oft; - iterator b = begin(); - for (iterator ci = end(); ci > i && ci != b; ci--) { - *ci = *(ci - 1); - } - *i = data; - vectorSize++; - return i + 1; - } - - void push_back(const TYPE &data) { - extend(); - vector[vectorSize] = data; - vectorSize++; - } - - void reserve(size_t s) { - if (s <= allocatedSize) { - return; - } else { - TYPE *newptr = new TYPE[s]; - TYPE *dstptr = newptr; - TYPE *srcptr = vector; - TYPE *endptr = srcptr + vectorSize; - while (srcptr < endptr) { - *dstptr++ = *srcptr; - (*srcptr).~TYPE(); - srcptr++; - } - allocatedSize = s; - if (vector != 0) { - delete[] vector; - } - vector = newptr; - } - } - - void resize(size_t s, TYPE v = TYPE()) { - if (s > allocatedSize) { - size_t asize = allocatedSize == 0 ? 1 : allocatedSize; - while (asize < s) { - asize <<= 1; - } - reserve(asize); - TYPE *base = vector; - TYPE *dstptr = base + vectorSize; - TYPE *endptr = base + s; - for (; dstptr < endptr; dstptr++) { - *dstptr = v; - } - } - vectorSize = s; - } - - size_t size() const { return (size_t)vectorSize; } - - void extend() { - if (vectorSize == allocatedSize) { - if (vectorSize == 0) { - reserve(2); - } else { - uint64_t size = vectorSize; - size <<= 1; - if (size > 0xffff) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("CompactVector is too big. " + std::to_string(size)); -// std::cerr << "CompactVector is too big. " << size << std::endl; - abort(); - } - reserve(size); - } - } - } - - virtual int64_t memSize() { return vector->memSize() * vectorSize + sizeof(vectorSize) * 2; } - - TYPE *vector; - uint16_t vectorSize; - uint16_t allocatedSize; - }; - - class CompactString { - public: - CompactString():vector(0) { } - - CompactString(const CompactString &v):vector(0) { *this = v; } - - ~CompactString() { clear(); } - - void clear() { - if (vector != 0) { - delete[] vector; - } - vector = 0; - } - - CompactString &operator=(const std::string &v) { return *this = v.c_str(); } - - CompactString &operator=(const CompactString &v) { return *this = v.vector; } - - CompactString &operator=(const char *str) { - if (str == 0 || strlen(str) == 0) { - clear(); - return *this; - } - if (size() != strlen(str)) { - clear(); - vector = new char[strlen(str) + 1]; - } - strcpy(vector, str); - return *this; - } - - char &at(size_t idx) const { - if (idx >= size()) { - NGTThrowException("CompactString: beyond the range"); - } - return vector[idx]; - } - - char *c_str() { return vector; } - size_t size() const { - if (vector == 0) { - return 0; - } else { - return (size_t)strlen(vector); - } - } - - virtual int64_t memSize() { return size(); } - - char *vector; - }; - - // BooleanSet has been already optimized. - class BooleanSet { - public: - BooleanSet(size_t s) { - size = (s >> 6) + 1; // 2^6=64 - size = ((size >> 2) << 2) + 4; - bitvec.resize(size); - } - inline uint64_t getBitString(size_t i) { return (uint64_t)1 << (i & (64 - 1)); } - inline uint64_t &getEntry(size_t i) { return bitvec[i >> 6]; } - inline bool operator[](size_t i) { - return (getEntry(i) & getBitString(i)) != 0; - } - inline void set(size_t i) { - getEntry(i) |= getBitString(i); - } - inline void insert(size_t i) { set(i); } - inline void reset(size_t i) { - getEntry(i) &= ~getBitString(i); - } - inline int64_t memSize() { return size * sizeof(uint64_t); } - std::vector bitvec; - uint64_t size; - }; - - - class PropertySet : public std::map { - public: - void set(const std::string &key, const std::string &value) { - iterator it = find(key); - if (it == end()) { - insert(std::pair(key, value)); - } else { - (*it).second = value; - } - } - template void set(const std::string &key, VALUE_TYPE value) { - std::stringstream vstr; - vstr << value; - iterator it = find(key); - if (it == end()) { - insert(std::pair(key, vstr.str())); - } else { - (*it).second = vstr.str(); - } - } - - std::string get(const std::string &key) { - iterator it = find(key); - if (it != end()) { - return it->second; - } - return ""; - } - float getf(const std::string &key, float defvalue) { - iterator it = find(key); - if (it != end()) { - char *e = 0; - float val = strtof(it->second.c_str(), &e); - if (*e != 0) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning: Illegal property. " + key + ":" + it->second + " (" + e + ")"); -// std::cerr << "Warning: Illegal property. " << key << ":" << it->second << " (" << e << ")" << std::endl; - return defvalue; - } - return val; - } - return defvalue; - } - void updateAndInsert(PropertySet &prop) { - for (std::map::iterator i = prop.begin(); i != prop.end(); ++i) { - set((*i).first, (*i).second); - } - } - long getl(const std::string &key, long defvalue) { - iterator it = find(key); - if (it != end()) { - char *e = 0; - float val = strtol(it->second.c_str(), &e, 10); - if (*e != 0) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning: Illegal property. " + key + ":" + it->second + " (" + e + ")"); -// std::cerr << "Warning: Illegal property. " << key << ":" << it->second << " (" << e << ")" << std::endl; - } - return val; - } - return defvalue; - } - void load(const std::string &f) { - std::ifstream st(f); - if (!st) { - std::stringstream msg; - msg << "PropertySet::load: Cannot load the property file " << f << "."; - NGTThrowException(msg); - } - load(st); - } - void save(const std::string &f) { - std::ofstream st(f); - if (!st) { - std::stringstream msg; - msg << "PropertySet::save: Cannot save. " << f << std::endl; - NGTThrowException(msg); - } - save(st); - } - void save(std::ofstream &os) { - for (std::map::iterator i = this->begin(); i != this->end(); i++) { - os << i->first << "\t" << i->second << std::endl; - } - } - // for milvus - void save(std::stringstream & prf) - { - for (std::map::iterator i = this->begin(); i != this->end(); i++) - { - prf << i->first << "\t" << i->second << std::endl; - } - } - - // for milvus - void load(std::stringstream & is) - { - std::string line; - while (getline(is, line)) - { - std::vector tokens; - NGT::Common::tokenize(line, tokens, "\t"); - if (tokens.size() != 2) - { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Property file is illegal. " + line); -// std::cerr << "Property file is illegal. " << line << std::endl; - continue; - } - set(tokens[0], tokens[1]); - } - } - - void load(std::ifstream &is) { - std::string line; - while (getline(is, line)) { - std::vector tokens; - NGT::Common::tokenize(line, tokens, "\t"); - if (tokens.size() != 2) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Property file is illegal. " + line); -// std::cerr << "Property file is illegal. " << line << std::endl; - continue; - } - set(tokens[0], tokens[1]); - } - } - }; - - namespace Serializer { - static inline void read(std::istream & is, uint8_t * v, size_t s) { is.read((char *)v, s); } - - static inline void write(std::ostream & os, const uint8_t * v, size_t s) { os.write((const char *)v, s); } - - template - void write(std::ostream & os, const TYPE v) - { - os.write((const char *)&v, sizeof(TYPE)); - } - - template void writeAsText(std::ostream &os, const TYPE v) { - if (typeid(TYPE) == typeid(unsigned char)) { - os << (int)v; - } else { - os << v; - } - } - - template - void read(std::istream & is, TYPE & v) - { - is.read((char *)&v, sizeof(TYPE)); - } - - template void readAsText(std::istream &is, TYPE &v) { - if (typeid(TYPE) == typeid(unsigned char)) { - unsigned int tmp; - is >> tmp; - if (tmp > 255) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Error! Invalid. " + std::to_string(tmp)); -// std::cerr << "Error! Invalid. " << tmp << std::endl; - } - v = (TYPE)tmp; - } else { - is >> v; - } - } - - template void write(std::ostream &os, const std::vector &v) { - unsigned int s = v.size(); - write(os, s); - for (unsigned int i = 0; i < s; i++) { - write(os, v[i]); - } - } - - template void writeAsText(std::ostream &os, const std::vector &v) { - unsigned int s = v.size(); - os << s << " "; - for (unsigned int i = 0; i < s; i++) { - writeAsText(os, v[i]); - os << " "; - } - } - - template void write(std::ostream &os, const CompactVector &v) { - unsigned int s = v.size(); - write(os, s); - for (unsigned int i = 0; i < s; i++) { - write(os, v[i]); - } - } - - template void writeAsText(std::ostream &os, const CompactVector &v) { - unsigned int s = v.size(); - for (unsigned int i = 0; i < s; i++) { - writeAsText(os, v[i]); - os << " "; - } - } - - template void writeAsText(std::ostream &os, TYPE *v, size_t s) { - os << s << " "; - for (unsigned int i = 0; i < s; i++) { - writeAsText(os, v[i]); - os << " "; - } - } - - template void read(std::istream &is, std::vector &v) { - v.clear(); - unsigned int s; - read(is, s); - v.reserve(s); - for (unsigned int i = 0; i < s; i++) { - TYPE val; - read(is, val); - v.push_back(val); - } - } - - template void readAsText(std::istream &is, std::vector &v) { - v.clear(); - unsigned int s; - is >> s; - for (unsigned int i = 0; i < s; i++) { - TYPE val; - Serializer::readAsText(is, val); - v.push_back(val); - } - } - - - template void read(std::istream &is, CompactVector &v) { - v.clear(); - unsigned int s; - read(is, s); - v.reserve(s); - for (unsigned int i = 0; i < s; i++) { - TYPE val; - read(is, val); - v.push_back(val); - } - } - - template void readAsText(std::istream &is, CompactVector &v) { - v.clear(); - unsigned int s; - is >> s; - for (unsigned int i = 0; i < s; i++) { - TYPE val; - Serializer::readAsText(is, val); - v.push_back(val); - } - } - - template void readAsText(std::istream &is, TYPE *v, size_t s) { - unsigned int size; - is >> size; - if (s != size) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("readAsText: something wrong. " + std::to_string(size) + ":" + std::to_string(s)); -// std::cerr << "readAsText: something wrong. " << size << ":" << s << std::endl; - return; - } - for (unsigned int i = 0; i < s; i++) { - TYPE val; - Serializer::readAsText(is, val); - v[i] = val; - } - } - - - } // namespace Serialize - - - class ObjectSpace; - - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - template - class Vector { - public: - typedef TYPE * iterator; - - Vector() : vector(0), vectorSize(0), allocatedSize(0) {} - - void clear(SharedMemoryAllocator &allocator) { - if (vector != 0) { - allocator.free(allocator.getAddr(vector)); - } - vector = 0; - vectorSize = 0; - allocatedSize = 0; - } - - TYPE &front(SharedMemoryAllocator &allocator) { return (*this).at(0, allocator); } - TYPE &back(SharedMemoryAllocator &allocator) { return (*this).at(vectorSize - 1, allocator); } - bool empty() { return vectorSize == 0; } - iterator begin(SharedMemoryAllocator &allocator) { - return (TYPE*)allocator.getAddr((off_t)vector); - } - iterator end(SharedMemoryAllocator &allocator) { - return begin(allocator) + vectorSize; - } - - Vector &operator=(Vector &v) { - assert((vectorSize == v.vectorSize) || (vectorSize == 0)); - if (vectorSize == v.vectorSize) { - for (size_t i = 0; i < vectorSize; i++) { - (*this)[i] = v[i]; - } - return *this; - } else { - reserve(v.vectorSize); - assert(allocatedSize >= v.vectorSize); - for (size_t i = 0; i < v.vectorSize; i++) { - push_back(v.at(i)); - } - vectorSize = v.vectorSize; - assert(vectorSize == v.vectorSize); - } - return *this; - } - - TYPE &at(size_t idx, SharedMemoryAllocator &allocator) { - if (idx >= vectorSize) { - std::stringstream msg; - msg << "Vector: beyond the range. " << idx << ":" << vectorSize; - NGTThrowException(msg); - } - return *(begin(allocator) + idx); - } - - iterator erase(iterator b, iterator e, SharedMemoryAllocator &allocator) { - iterator ret; - e = end(allocator) < e ? end(allocator) : e; - for (iterator i = b; i < e; i++) { - ret = erase(i, allocator); - } - return ret; - } - - iterator erase(iterator i, SharedMemoryAllocator &allocator) { - iterator back = i; - vectorSize--; - iterator e = end(allocator); - for (; i < e; i++) { - *i = *(i + 1); - } - return back; - } - - void pop_back() { - if (vectorSize > 0) { - vectorSize--; - } - } - iterator insert(iterator &i, const TYPE &data, SharedMemoryAllocator &allocator) { - if (size() == 0) { - push_back(data, allocator); - return end(allocator); - } - off_t oft = i - begin(allocator); - extend(allocator); - i = begin(allocator) + oft; - iterator b = begin(allocator); - for (iterator ci = end(allocator); ci > i && ci != b; ci--) { - *ci = *(ci - 1); - } - *i = data; - vectorSize++; - return i + 1; - } - - void push_back(const TYPE &data, SharedMemoryAllocator &allocator) { - extend(allocator); - vectorSize++; - (*this).at(vectorSize - 1, allocator) = data; - } - - void reserve(size_t s, SharedMemoryAllocator &allocator) { - if (s <= allocatedSize) { - return; - } else { - TYPE *newptr = new(allocator) TYPE[s]; - TYPE *dstptr = newptr; - TYPE *srcptr = (TYPE*)allocator.getAddr(vector); - TYPE *endptr = srcptr + vectorSize; - while (srcptr < endptr) { - *dstptr++ = *srcptr; - (*srcptr).~TYPE(); - srcptr++; - } - allocatedSize = s; - if (vector != 0) { - allocator.free(allocator.getAddr(vector)); - } - vector = allocator.getOffset(newptr); - } - } - - void resize(size_t s, SharedMemoryAllocator &allocator, TYPE v = TYPE()) { - if (s > allocatedSize) { - size_t asize = allocatedSize == 0 ? 1 : allocatedSize; - while (asize < s) { - asize <<= 1; - } - reserve(asize, allocator); - TYPE *base = (TYPE*)allocator.getAddr(vector); - TYPE *dstptr = base + vectorSize; - TYPE *endptr = base + s; - for (; dstptr < endptr; dstptr++) { - *dstptr = v; - } - } - vectorSize = s; - } - - void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { - unsigned int s = size(); - os << s << " "; - for (unsigned int i = 0; i < s; i++) { - Serializer::writeAsText(os, (*this)[i]); - os << " "; - } - } - void deserializeAsText(std::istream &is, ObjectSpace *objectspace = 0) { - clear(); - size_t s; - Serializer::readAsText(is, s); - resize(s); - for (unsigned int i = 0; i < s; i++) { - Serializer::readAsText(is, (*this)[i]); - } - } - size_t size() { return vectorSize; } - - public: - void extend(SharedMemoryAllocator &allocator) { - extend(vectorSize, allocator); - } - - void extend(size_t idx, SharedMemoryAllocator &allocator) { - if (idx >= allocatedSize) { - if (idx == 0) { - reserve(2, allocator); - } else { - uint64_t size = allocatedSize == 0 ? 1 : allocatedSize; - do { - size <<= 1; - } while (size <= idx); - if (size > 0xffffffff) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Vector is too big. " + std::to_string(size)); -// std::cerr << "Vector is too big. " << size << std::endl; - abort(); - } - reserve(size, allocator); - } - } - } - - off_t vector; - uint32_t vectorSize; - uint32_t allocatedSize; - }; - -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - class ObjectSpace; - template - class PersistentRepository { - public: - typedef Vector ARRAY; - typedef TYPE ** iterator; - - PersistentRepository():array(0) {} - ~PersistentRepository() { close(); } - - void open(const std::string &mapfile, size_t sharedMemorySize) { - assert(array == 0); - SharedMemoryAllocator &allocator = getAllocator(); -#ifdef ADVANCED_USE_REMOVED_LIST - off_t *entryTable = (off_t*)allocator.construct(mapfile, sharedMemorySize); - if (entryTable == 0) { - entryTable = (off_t*)construct(); - allocator.setEntry(entryTable); - } - assert(entryTable != 0); - initialize(entryTable); -#else - void *entry = allocator.construct(mapfile, sharedMemorySize); - if (entry == 0) { - array = (ARRAY*)construct(); - allocator.setEntry(array); - } else { - array = (ARRAY*)entry; - } -#endif - } - - void close() { - getAllocator().destruct(); - } - -#ifdef ADVANCED_USE_REMOVED_LIST - void *construct() { - SharedMemoryAllocator &allocator = getAllocator(); - off_t *entryTable = new(allocator) off_t[2]; - entryTable[0] = allocator.getOffset(new(allocator) ARRAY); - entryTable[1] = allocator.getOffset(new(allocator) Vector); - return entryTable; - } - void initialize(void *e) { - SharedMemoryAllocator &allocator = getAllocator(); - off_t *entryTable = (off_t*)e; - array = (ARRAY*)allocator.getAddr(entryTable[0]); - removedList = (Vector*)allocator.getAddr(entryTable[1]); - } - size_t removedListPop() { - assert(removedList->size() != 0); - size_t idx = removedList->back(allocator); - removedList->pop_back(); - return idx; - } - - void removedListPush(size_t id) { - if (removedList->size() == 0) { - removedList->push_back(id, allocator); - return; - } - Vector::iterator rmi - = std::lower_bound(removedList->begin(allocator), removedList->end(allocator), id, std::greater()); - if ((rmi != removedList->end(allocator)) && ((*rmi) == id)) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("removedListPush: already existed! continue... ID=" + std::to_string(id)); -// std::cerr << "removedListPush: already existed! continue... ID=" << id << std::endl; - return; - } - removedList->insert(rmi, id, allocator); - } -#else - void *construct() { - SharedMemoryAllocator &allocator = getAllocator(); - return new(allocator) ARRAY; - } - void initialize(void *e) { - assert(array == 0); - assert(e != 0); - array = (ARRAY*)e; - } -#endif - TYPE *allocate() { return new(allocator) TYPE(allocator); } - - size_t push(TYPE *n) { - if (size() == 0) { - push_back(0); - } - push_back(n); - return size() - 1; - } - - size_t insert(TYPE *n) { -#ifdef ADVANCED_USE_REMOVED_LIST - if (!removedList->empty()) { - size_t idx = removedListPop(); - put(idx, n); - return idx; - } -#endif - return push(n); - } - - bool isEmpty(size_t idx) { - if (idx < size()) { - return (*array).at(idx, allocator) == 0; - } else { - return true; - } - } - - void put(size_t idx, TYPE *n) { - (*array).extend(idx, allocator); - if (size() <= idx) { - resize(idx + 1); - } - if ((*this)[idx] != 0) { - NGTThrowException("put: Not empty"); - } - set(idx, n); - } - - void erase(size_t idx) { - if (isEmpty(idx)) { - NGTThrowException("erase: Not in-memory or invalid id"); - } - (*this)[idx]->~TYPE(); - allocator.free((*this)[idx]); - set(idx, (TYPE*)0); - } - - void remove(size_t idx) { - erase(idx); -#ifdef ADVANCED_USE_REMOVED_LIST - removedListPush(idx); -#endif - } - - inline TYPE *get(size_t idx) { - if (isEmpty(idx)) { - std::stringstream msg; - msg << "get: Not in-memory or invalid offset of node. " << idx << ":" << array->size(); - NGTThrowException(msg.str()); - } - return (*this)[idx]; - } - - void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { - NGT::Serializer::write(os, array->size()); - for (size_t idx = 0; idx < array->size(); idx++) { - if ((*this)[idx] == 0) { - NGT::Serializer::write(os, '-'); - } else { - NGT::Serializer::write(os, '+'); - if (objectspace == 0) { - assert(0); - //(*this)[idx]->serialize(os, allocator); - } else { - assert(0); - //(*this)[idx]->serialize(os, allocator, objectspace); - } - } - } - } - - void deserialize(std::ifstream &is, ObjectSpace *objectspace = 0) { - if (!is.is_open()) { - NGTThrowException("NGT::Common: Not open the specified stream yet."); - } - deleteAll(); - (*this).push_back((TYPE*)0); - size_t s; - NGT::Serializer::read(is, s); - for (size_t i = 0; i < s; i++) { - char type; - NGT::Serializer::read(is, type); - switch(type) { - case '-': - { - (*this).push_back((TYPE*)0); -#ifdef ADVANCED_USE_REMOVED_LIST - if (i != 0) { - removedListPush(i); - } -#endif - } - break; - case '+': - { - if (objectspace == 0) { - TYPE *v = new(allocator) TYPE(allocator); - //v->deserialize(is, allocator); - assert(0); - (*this).push_back(v); - } else { - TYPE *v = new(allocator) TYPE(allocator, objectspace); - //v->deserialize(is, allocator, objectspace); - assert(0); - (*this).push_back(v); - } - } - break; - default: - { - assert(type == '-' || type == '+'); - break; - } - } - } - } - - void serializeAsText(std::ofstream &os, ObjectSpace *objectspace = 0) { - os.setf(std::ios_base::fmtflags(0), std::ios_base::floatfield); - os << std::setprecision(8); - - os << array->size() << std::endl; - for (size_t idx = 0; idx < array->size(); idx++) { - if ((*this)[idx] == 0) { - os << idx << " - " << std::endl; - } else { - os << idx << " + "; - if (objectspace == 0) { - (*this)[idx]->serializeAsText(os, allocator); - } else { - (*this)[idx]->serializeAsText(os, allocator, objectspace); - } - os << std::endl; - } - } - os << std::fixed; - } - - - void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0) { - if (!is.is_open()) { - NGTThrowException("NGT::Common: Not open the specified stream yet."); - } - deleteAll(); - size_t s; - NGT::Serializer::readAsText(is, s); - (*this).reserve(s); - for (size_t i = 0; i < s; i++) { - size_t idx; - NGT::Serializer::readAsText(is, idx); - if (i != idx) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("PersistentRepository: Error. index of a specified import file is invalid. " + std::to_string(idx) + ":" + std::to_string(i)); -// std::cerr << "PersistentRepository: Error. index of a specified import file is invalid. " << idx << ":" << i << std::endl; - } - char type; - NGT::Serializer::readAsText(is, type); - switch(type) { - case '-': - { - (*this).push_back((TYPE*)0); -#ifdef ADVANCED_USE_REMOVED_LIST - if (i != 0) { - removedListPush(i); - } -#endif - } - break; - case '+': - { - if (objectspace == 0) { - TYPE *v = new(allocator) TYPE(allocator); - v->deserializeAsText(is, allocator); - (*this).push_back(v); - } else { - TYPE *v = new(allocator) TYPE(allocator, objectspace); - v->deserializeAsText(is, allocator, objectspace); - (*this).push_back(v); - } - } - break; - default: - { - assert(type == '-' || type == '+'); - break; - } - } - } - } - void deleteAll() { - for (size_t i = 0; i < array->size(); i++) { - if ((*array).at(i, allocator) != 0) { - allocator.free((*this)[i]); - } - } - array->clear(allocator); -#ifdef ADVANCED_USE_REMOVED_LIST - while (!removedList->empty()) { removedListPop(); } -#endif - } - - void set(size_t idx, TYPE *n) { - array->at(idx, allocator) = allocator.getOffset(n); - } - SharedMemoryAllocator &getAllocator() { return allocator; } - void clear() { array->clear(allocator); } - iterator begin() { return (iterator)array->begin(allocator); } - iterator end() { return (iterator)array->end(allocator); } - bool empty() { return array->empty(); } - TYPE *operator[](size_t idx) { - return (TYPE*)allocator.getAddr((*array).at(idx, allocator)); - } - TYPE *at(size_t idx) { - return (TYPE*)allocator.getAddr(array->at(idx, allocator)); - } - void push_back(TYPE *data) { - array->push_back(allocator.getOffset(data), allocator); - } - void reserve(size_t s) { array->reserve(s, allocator); } - void resize(size_t s) { array->resize(s, allocator, (off_t)0); } - size_t size() { return array->size(); } - size_t getAllocatedSize() { return array->allocatedSize; } - - ARRAY *array; - - SharedMemoryAllocator allocator; - -#ifdef ADVANCED_USE_REMOVED_LIST - size_t count() { return size() == 0 ? 0 : size() - removedList->size() - 1; } - protected: - Vector *removedList; -#endif - - }; - -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - class ObjectSpace; - - - template - class Repository : public std::vector { - public: - - static TYPE *allocate() { return new TYPE; } - - size_t push(TYPE *n) { - if (std::vector::size() == 0) { - std::vector::push_back(0); - } - std::vector::push_back(n); - return std::vector::size() - 1; - } - - size_t insert(TYPE *n) { -#ifdef ADVANCED_USE_REMOVED_LIST - if (!removedList.empty()) { - size_t idx = removedList.top(); - removedList.pop(); - put(idx, n); - return idx; - } -#endif - return push(n); - } - - bool isEmpty(size_t idx) { - if (idx < std::vector::size()) { - return (*this)[idx] == 0; - } else { - return true; - } - } - - void put(size_t idx, TYPE *n) { - if (std::vector::size() <= idx) { - std::vector::resize(idx + 1, 0); - } - if ((*this)[idx] != 0) { - NGTThrowException("put: Not empty"); - } - (*this)[idx] = n; - } - - void erase(size_t idx) { - if (isEmpty(idx)) { - NGTThrowException("erase: Not in-memory or invalid id"); - } - delete (*this)[idx]; - (*this)[idx] = 0; - } - - void remove(size_t idx) { - erase(idx); -#ifdef ADVANCED_USE_REMOVED_LIST - removedList.push(idx); -#endif - } - - TYPE **getPtr() { return &(*this)[0]; } - - inline TYPE *get(size_t idx) { - if (isEmpty(idx)) { - std::stringstream msg; - msg << "get: Not in-memory or invalid offset of node. idx=" << idx << " size=" << this->size(); - NGTThrowException(msg.str()); - } - return (*this)[idx]; - } - - inline TYPE *getWithoutCheck(size_t idx) { return (*this)[idx]; } - - void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { - if (!os.is_open()) { - NGTThrowException("NGT::Common: Not open the specified stream yet."); - } - NGT::Serializer::write(os, std::vector::size()); - for (size_t idx = 0; idx < std::vector::size(); idx++) { - if ((*this)[idx] == 0) { - NGT::Serializer::write(os, '-'); - } else { - NGT::Serializer::write(os, '+'); - if (objectspace == 0) { - (*this)[idx]->serialize(os); - } else { - (*this)[idx]->serialize(os, objectspace); - } - } - } - } - - // for milvus - void serialize(std::stringstream & os, ObjectSpace * objectspace = 0) - { - NGT::Serializer::write(os, std::vector::size()); - for (size_t idx = 0; idx < std::vector::size(); idx++) - { - if ((*this)[idx] == 0) - { - NGT::Serializer::write(os, '-'); - } - else - { - NGT::Serializer::write(os, '+'); - if (objectspace == 0) - { - (*this)[idx]->serialize(os); - } - else - { - (*this)[idx]->serialize(os, objectspace); - } - } - } - } - - void deserialize(std::ifstream &is, ObjectSpace *objectspace = 0) { - if (!is.is_open()) { - NGTThrowException("NGT::Common: Not open the specified stream yet."); - } - deleteAll(); - size_t s; - NGT::Serializer::read(is, s); - std::vector::reserve(s); - for (size_t i = 0; i < s; i++) { - char type; - NGT::Serializer::read(is, type); - switch(type) { - case '-': - { - std::vector::push_back(0); -#ifdef ADVANCED_USE_REMOVED_LIST - if (i != 0) { - removedList.push(i); - } -#endif - } - break; - case '+': - { - if (objectspace == 0) { - TYPE *v = new TYPE; - v->deserialize(is); - std::vector::push_back(v); - } else { - TYPE *v = new TYPE(objectspace); - v->deserialize(is, objectspace); - std::vector::push_back(v); - } - } - break; - default: - { - assert(type == '-' || type == '+'); - break; - } - } - } - } - - void deserialize(std::stringstream & is, ObjectSpace * objectspace = 0) - { - deleteAll(); - size_t s; - NGT::Serializer::read(is, s); - std::vector::reserve(s); - for (size_t i = 0; i < s; i++) - { - char type; - NGT::Serializer::read(is, type); - switch (type) - { - case '-': - { - std::vector::push_back(0); -#ifdef ADVANCED_USE_REMOVED_LIST - if (i != 0) { - removedList.push(i); - } -#endif - } - break; - case '+': - { - if (objectspace == 0) { - TYPE *v = new TYPE; - v->deserialize(is); - std::vector::push_back(v); - } else { - TYPE *v = new TYPE(objectspace); - v->deserialize(is, objectspace); - std::vector::push_back(v); - } - } - break; - default: - { - assert(type == '-' || type == '+'); - break; - } - } - } - } - - void serializeAsText(std::ofstream &os, ObjectSpace *objectspace = 0) { - if (!os.is_open()) { - NGTThrowException("NGT::Common: Not open the specified stream yet."); - } - // The format is almost the same as the default and the best in terms of the string length. - os.setf(std::ios_base::fmtflags(0), std::ios_base::floatfield); - os << std::setprecision(8); - - os << std::vector::size() << std::endl; - for (size_t idx = 0; idx < std::vector::size(); idx++) { - if ((*this)[idx] == 0) { - os << idx << " - " << std::endl; - } else { - os << idx << " + "; - if (objectspace == 0) { - (*this)[idx]->serializeAsText(os); - } else { - (*this)[idx]->serializeAsText(os, objectspace); - } - os << std::endl; - } - } - os << std::fixed; - } - - void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0) { - if (!is.is_open()) { - NGTThrowException("NGT::Common: Not open the specified stream yet."); - } - deleteAll(); - size_t s; - NGT::Serializer::readAsText(is, s); - std::vector::reserve(s); - for (size_t i = 0; i < s; i++) { - size_t idx; - NGT::Serializer::readAsText(is, idx); - if (i != idx) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Repository: Error. index of a specified import file is invalid. " + std::to_string(idx) + ":" + std::to_string(i)); -// std::cerr << "Repository: Error. index of a specified import file is invalid. " << idx << ":" << i << std::endl; - } - char type; - NGT::Serializer::readAsText(is, type); - switch(type) { - case '-': - { - std::vector::push_back(0); -#ifdef ADVANCED_USE_REMOVED_LIST - if (i != 0) { - removedList.push(i); - } -#endif - } - break; - case '+': - { - if (objectspace == 0) { - TYPE *v = new TYPE; - v->deserializeAsText(is); - std::vector::push_back(v); - } else { - TYPE *v = new TYPE(objectspace); - v->deserializeAsText(is, objectspace); - std::vector::push_back(v); - } - } - break; - default: - { - assert(type == '-' || type == '+'); - break; - } - } - } - } - - void deleteAll() { - for (size_t i = 0; i < this->size(); i++) { - if ((*this)[i] != 0) { - delete (*this)[i]; - (*this)[i] = 0; - } - } - this->clear(); -#ifdef ADVANCED_USE_REMOVED_LIST - while(!removedList.empty()){ removedList.pop(); }; -#endif - } - - void set(size_t idx, TYPE *n) { - (*this)[idx] = n; - } - -#ifdef ADVANCED_USE_REMOVED_LIST - size_t count() { return std::vector::size() == 0 ? 0 : std::vector::size() - removedList.size() - 1; } - virtual int64_t memSize() { return std::vector::size() == 0 ? 0 : (*this)[1]->memSize() * std::vector::size() + removedList.size() * sizeof(size_t); } - protected: - std::priority_queue, std::greater > removedList; -#endif - }; - -#pragma pack(2) - class ObjectDistance { - public: - ObjectDistance():id(0), distance(0.0) {} - ObjectDistance(unsigned int i, float d):id(i), distance(d) {} - inline bool operator==(const ObjectDistance &o) const { - return (distance == o.distance) && (id == o.id); - } - inline void set(unsigned int i, float d) { id = i; distance = d; } - inline bool operator<(const ObjectDistance &o) const { - if (distance == o.distance) { - return id < o.id; - } else { - return distance < o.distance; - } - } - inline bool operator>(const ObjectDistance &o) const { - if (distance == o.distance) { - return id > o.id; - } else { - return distance > o.distance; - } - } - void serialize(std::ofstream &os) { - NGT::Serializer::write(os, id); - NGT::Serializer::write(os, distance); - } - // for milvus - void serialize(std::stringstream & os) - { - NGT::Serializer::write(os, id); - NGT::Serializer::write(os, distance); - } - void deserialize(std::ifstream &is) { - NGT::Serializer::read(is, id); - NGT::Serializer::read(is, distance); - } - - // for milvus - void deserialize(std::stringstream & is) - { - NGT::Serializer::read(is, id); - NGT::Serializer::read(is, distance); - } - - void serializeAsText(std::ofstream &os) { - os.unsetf(std::ios_base::floatfield); - os << std::setprecision(8) << id << " " << distance; - } - - void deserializeAsText(std::ifstream &is) { - is >> id; - is >> distance; - } - - friend std::ostream &operator<<(std::ostream& os, const ObjectDistance &o) { - os << o.id << " " << o.distance; - return os; - } - friend std::istream &operator>>(std::istream& is, ObjectDistance &o) { - is >> o.id; - is >> o.distance; - return is; - } - int64_t memSize() const { return sizeof(id) + sizeof(distance); } - uint32_t id; - float distance; - }; - -#pragma pack() - - class Object; - class ObjectDistances; - - class Container { - public: - Container(Object &o, ObjectID i):object(o), id(i) {} - Container(Container &c):object(c.object), id(c.id) {} - virtual int64_t memSize() { return sizeof(ObjectID); } - Object &object; - ObjectID id; - }; - - typedef std::priority_queue, std::less > ResultPriorityQueue; - - class SearchContainer : public NGT::Container { - public: - SearchContainer(Object &f, ObjectID i): Container(f, i) { initialize(); } - SearchContainer(Object &f): Container(f, 0) { initialize(); } - SearchContainer(SearchContainer &sc): Container(sc) { *this = sc; } - SearchContainer(SearchContainer &sc, Object &f): Container(f, sc.id) { *this = sc; } - SearchContainer(): Container(*reinterpret_cast(0), 0) { initialize(); } - - SearchContainer &operator=(SearchContainer &sc) { - size = sc.size; - radius = sc.radius; - explorationCoefficient = sc.explorationCoefficient; - result = sc.result; - distanceComputationCount = sc.distanceComputationCount; - edgeSize = sc.edgeSize; - workingResult = sc.workingResult; - useAllNodesInLeaf = sc.useAllNodesInLeaf; - expectedAccuracy = sc.expectedAccuracy; - visitCount = sc.visitCount; - return *this; - } - virtual ~SearchContainer() {} - virtual void initialize() { - size = 10; - radius = FLT_MAX; - explorationCoefficient = 1.1; - result = 0; - edgeSize = -1; // dynamically prune the edges during search. -1 means following the index property. 0 means using all edges. - useAllNodesInLeaf = false; - expectedAccuracy = -1.0; - } - void setSize(size_t s) { size = s; }; - void setResults(ObjectDistances *r) { result = r; } - void setRadius(Distance r) { radius = r; } - void setEpsilon(float e) { explorationCoefficient = e + 1.0; } - void setEdgeSize(int e) { edgeSize = e; } - void setExpectedAccuracy(float a) { expectedAccuracy = a; } - - inline bool resultIsAvailable() { return result != 0; } - ObjectDistances &getResult() { - if (result == 0) { - NGTThrowException("Inner error: results is not set"); - } - return *result; - } - - ResultPriorityQueue &getWorkingResult() { return workingResult; } - - virtual int64_t memSize(); -// virtual int64_t memSize() { -// auto workres_size = workingResult.size() == 0 ? 0 : workingResult.size() * workingResult.top().memSize(); -// return sizeof(size_t) * 3 + sizeof(float) * 3 + result->memSize() + 1 + workres_size + Container::memSize(); -// } - - size_t size; - Distance radius; - float explorationCoefficient; - int edgeSize; - size_t distanceComputationCount; - ResultPriorityQueue workingResult; - bool useAllNodesInLeaf; - size_t visitCount; - float expectedAccuracy; - - private: - ObjectDistances *result; - }; - - class SearchQuery : public NGT::SearchContainer { - public: - template SearchQuery(const std::vector &q):query(0) { setQuery(q); } - template SearchQuery(SearchContainer &sc, const std::vector &q): SearchContainer(sc), query(0) { setQuery(q); } - ~SearchQuery() { deleteQuery(); } - - template void setQuery(const std::vector &q) { - if (query != 0) { - deleteQuery(); - } - query = new std::vector(q); - queryType = &typeid(QTYPE); - if (*queryType != typeid(float) && *queryType != typeid(double) && *queryType != typeid(uint8_t)) { - query = 0; - queryType = 0; - std::stringstream msg; - msg << "NGT::SearchQuery: Invalid query type!"; - NGTThrowException(msg); - } - } - void *getQuery() { return query; } - const std::type_info &getQueryType() { return *queryType; } - virtual int64_t memSize() { return std::strlen((char*)getQuery()) + sizeof(getQueryType()) + SearchContainer::memSize(); } - private: - void deleteQuery() { - if (query == 0) { - return; - } - if (*queryType == typeid(float)) { - delete static_cast*>(query); - } else if (*queryType == typeid(double)) { - delete static_cast*>(query); - } else if (*queryType == typeid(uint8_t)) { - delete static_cast*>(query); - } - query = 0; - queryType = 0; - } - void *query; - const std::type_info *queryType; - }; - - class InsertContainer : public Container { - public: - InsertContainer(Object &f, ObjectID i):Container(f, i) {} - }; - - class Timer { - public: - Timer():time(0) {} - - void reset() { time = 0; ntime = 0; } - - void start() { - struct timespec res; - clock_getres(CLOCK_REALTIME, &res); - reset(); - clock_gettime(CLOCK_REALTIME, &startTime); - } - - void restart() { - clock_gettime(CLOCK_REALTIME, &startTime); - } - - void stop() { - clock_gettime(CLOCK_REALTIME, &stopTime); - sec = stopTime.tv_sec - startTime.tv_sec; - nsec = stopTime.tv_nsec - startTime.tv_nsec; - if (nsec < 0) { - sec -= 1; - nsec += 1000000000L; - } - time += (double)sec + (double)nsec / 1000000000.0; - ntime += sec * 1000000000L + nsec; - } - - friend std::ostream &operator<<(std::ostream &os, Timer &t) { - os << std::setprecision(6) << t.time << " (sec)"; - return os; - } - - struct timespec startTime; - struct timespec stopTime; - - int64_t sec; - int64_t nsec; - int64_t ntime; // nano second - double time; // second - }; - -} // namespace NGT - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/GetCoreNumber.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/GetCoreNumber.cpp deleted file mode 100644 index 2cdc485e57..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/GetCoreNumber.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "NGT/GetCoreNumber.h" - -namespace NGT -{ -int getCoreNumber() -{ -#ifndef __linux__ - SYSTEM_INFO sys_info; - GetSystemInfo(&sys_info); - return sysInfo.dwNumberOfProcessors; -#else - return get_nprocs(); -#endif -} -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/GetCoreNumber.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/GetCoreNumber.h deleted file mode 100644 index 6190735dd1..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/GetCoreNumber.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __linux__ -# include "windows.h" -#else - -# include "sys/sysinfo.h" -# include "unistd.h" -#endif - -namespace NGT -{ -int getCoreNumber(); -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Graph.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Graph.cpp deleted file mode 100644 index eca46bd288..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Graph.cpp +++ /dev/null @@ -1,1258 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "defines.h" -#include "Graph.h" -#include "Thread.h" -#include "Index.h" - - - -using namespace std; -using namespace NGT; - -void -NeighborhoodGraph::Property::set(NGT::Property &prop) { - if (prop.truncationThreshold != -1) truncationThreshold = prop.truncationThreshold; - if (prop.edgeSizeForCreation != -1) edgeSizeForCreation = prop.edgeSizeForCreation; - if (prop.edgeSizeForSearch != -1) edgeSizeForSearch = prop.edgeSizeForSearch; - if (prop.edgeSizeLimitForCreation != -1) edgeSizeLimitForCreation = prop.edgeSizeLimitForCreation; - if (prop.insertionRadiusCoefficient != -1) insertionRadiusCoefficient = prop.insertionRadiusCoefficient; - if (prop.seedSize != -1) seedSize = prop.seedSize; - if (prop.seedType != SeedTypeNone) seedType = prop.seedType; - if (prop.truncationThreadPoolSize != -1) truncationThreadPoolSize = prop.truncationThreadPoolSize; - if (prop.batchSizeForCreation != -1) batchSizeForCreation = prop.batchSizeForCreation; - if (prop.dynamicEdgeSizeBase != -1) dynamicEdgeSizeBase = prop.dynamicEdgeSizeBase; - if (prop.dynamicEdgeSizeRate != -1) dynamicEdgeSizeRate = prop.dynamicEdgeSizeRate; - if (prop.buildTimeLimit != -1) buildTimeLimit = prop.buildTimeLimit; - if (prop.outgoingEdge != -1) outgoingEdge = prop.outgoingEdge; - if (prop.incomingEdge != -1) incomingEdge = prop.incomingEdge; - if (prop.graphType != GraphTypeNone) graphType = prop.graphType; -} - -void -NeighborhoodGraph::Property::get(NGT::Property &prop) { - prop.truncationThreshold = truncationThreshold; - prop.edgeSizeForCreation = edgeSizeForCreation; - prop.edgeSizeForSearch = edgeSizeForSearch; - prop.edgeSizeLimitForCreation = edgeSizeLimitForCreation; - prop.insertionRadiusCoefficient = insertionRadiusCoefficient; - prop.seedSize = seedSize; - prop.seedType = seedType; - prop.truncationThreadPoolSize = truncationThreadPoolSize; - prop.batchSizeForCreation = batchSizeForCreation; - prop.dynamicEdgeSizeBase = dynamicEdgeSizeBase; - prop.dynamicEdgeSizeRate = dynamicEdgeSizeRate; - prop.graphType = graphType; - prop.buildTimeLimit = buildTimeLimit; - prop.outgoingEdge = outgoingEdge; - prop.incomingEdge = incomingEdge; -} - - -#ifdef NGT_GRAPH_READ_ONLY_GRAPH -void -NeighborhoodGraph::Search::normalizedCosineSimilarityFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::cosineSimilarityFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::normalizedAngleFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::angleFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l1Float(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l2Float(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::sparseJaccardFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l1Uint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l2Uint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::hammingUint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::jaccardUint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -//// - -void -NeighborhoodGraph::Search::normalizedCosineSimilarityFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::cosineSimilarityFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::normalizedAngleFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::angleFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l1FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l2FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::sparseJaccardFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l1Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::l2Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::hammingUint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - -void -NeighborhoodGraph::Search::jaccardUint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - graph.searchReadOnlyGraph(sc, seeds); -} - - - -#endif - -void -NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds) -{ - ObjectRepository &objectRepository = getObjectRepository(); - NGT::ObjectSpace::Comparator &comparator = objectSpace->getComparator(); - ObjectDistances tmp; - tmp.reserve(seeds.size()); - size_t seedSize = seeds.size(); -#ifndef NGT_PREFETCH_DISABLED - const size_t prefetchSize = objectSpace->getPrefetchSize(); - const size_t prefetchOffset = objectSpace->getPrefetchOffset(); -#if !defined(NGT_SHARED_MEMORY_ALLOCATOR) - PersistentObject **objects = objectRepository.getPtr(); -#endif - size_t poft = prefetchOffset < seedSize ? prefetchOffset : seedSize; - for (size_t i = 0; i < poft; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - MemoryCache::prefetch(reinterpret_cast(objectRepository.get(seeds[i].id)), prefetchSize); -#else - MemoryCache::prefetch(reinterpret_cast(objects[seeds[i].id]), prefetchSize); -#endif - } -#endif - for (size_t i = 0; i < seedSize; i++) { -#ifndef NGT_PREFETCH_DISABLED - if (i + prefetchOffset < seedSize) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - MemoryCache::prefetch(reinterpret_cast(objectRepository.get(seeds[i + prefetchOffset].id)), prefetchSize); -#else - MemoryCache::prefetch(reinterpret_cast(objects[seeds[i + prefetchOffset].id]), prefetchSize); -#endif - } -#endif - if (objectRepository.isEmpty(seeds[i].id)) { - cerr << "setupseeds:warning! unavailable object:" << seeds[i].id << "." << endl; - continue; - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - seeds[i].distance = comparator(sc.object, *objectRepository.get(seeds[i].id)); -#else - seeds[i].distance = comparator(sc.object, *objects[seeds[i].id]); -#endif - } - -#ifdef NGT_DISTANCE_COMPUTATION_COUNT - sc.distanceComputationCount += seeds.size(); -#endif - -} - -void -NeighborhoodGraph::setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, double (&comparator)(const void*, const void*, size_t)) -{ - ObjectRepository &objectRepository = getObjectRepository(); - const size_t dimension = objectSpace->getPaddedDimension(); - size_t seedSize = seeds.size(); -#ifndef NGT_PREFETCH_DISABLED - const size_t prefetchSize = objectSpace->getPrefetchSize(); - const size_t prefetchOffset = objectSpace->getPrefetchOffset(); -#if !defined(NGT_SHARED_MEMORY_ALLOCATOR) - PersistentObject **objects = objectRepository.getPtr(); -#endif - size_t poft = prefetchOffset < seedSize ? prefetchOffset : seedSize; - for (size_t i = 0; i < poft; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - MemoryCache::prefetch(reinterpret_cast(objectRepository.get(seeds[i].id)), prefetchSize); -#else - MemoryCache::prefetch(reinterpret_cast(objects[seeds[i].id]), prefetchSize); -#endif - } -#endif - for (size_t i = 0; i < seedSize; i++) { -#ifndef NGT_PREFETCH_DISABLED - if (i + prefetchOffset < seedSize) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - MemoryCache::prefetch(reinterpret_cast(objectRepository.get(seeds[i + prefetchOffset].id)), prefetchSize); -#else - MemoryCache::prefetch(reinterpret_cast(objects[seeds[i + prefetchOffset].id]), prefetchSize); -#endif - } -#endif - if (objectRepository.isEmpty(seeds[i].id)) { - cerr << "setupseeds:warning! unavailable object:" << seeds[i].id << "." << endl; - continue; - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - seeds[i].distance = comparator(static_cast(&sc.object[0]), static_cast(objectRepository.get(seeds[i].id)), dimension); -#else - seeds[i].distance = comparator(&sc.object[0], &(*objects[seeds[i].id])[0], dimension); -#endif - } - -#ifdef NGT_DISTANCE_COMPUTATION_COUNT - sc.distanceComputationCount += seeds.size(); -#endif -} - - -void -NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, - UncheckedSet &unchecked, DistanceCheckedSet &distanceChecked) -{ - std::sort(seeds.begin(), seeds.end()); - - for (ObjectDistances::iterator ri = seeds.begin(); ri != seeds.end(); ri++) { - if ((results.size() < (unsigned int)sc.size) && ((*ri).distance <= sc.radius)) { - results.push((*ri)); - } else { - break; - } - } - - if (results.size() >= sc.size) { - sc.radius = results.top().distance; - } - - for (ObjectDistances::iterator ri = seeds.begin(); ri != seeds.end(); ri++) { -#if !defined(NGT_GRAPH_CHECK_VECTOR) || defined(NGT_GRAPH_CHECK_BOOLEANSET) - distanceChecked.insert((*ri).id); -#else - distanceChecked[(*ri).id] = 1; -#endif - unchecked.push(*ri); - } -} - -#if !defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) -void -NeighborhoodGraph::setupSeeds(NGT::SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, - UncheckedSet &unchecked, DistanceCheckedSetForLargeDataset &distanceChecked) -{ - std::sort(seeds.begin(), seeds.end()); - - for (ObjectDistances::iterator ri = seeds.begin(); ri != seeds.end(); ri++) { - if ((results.size() < (unsigned int)sc.size) && ((*ri).distance <= sc.radius)) { - results.push((*ri)); - } else { - break; - } - } - - if (results.size() >= sc.size) { - sc.radius = results.top().distance; - } - - for (ObjectDistances::iterator ri = seeds.begin(); ri != seeds.end(); ri++) { - distanceChecked.insert((*ri).id); - //distanceChecked[(*ri).id] = 1; - unchecked.push(*ri); - } -} -#endif - -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - - template - void - NeighborhoodGraph::searchReadOnlyGraph(NGT::SearchContainer &sc, ObjectDistances &seeds) - { - if (sc.explorationCoefficient == 0.0) { - sc.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT; - } - - // setup edgeSize - size_t edgeSize = getEdgeSize(sc); - - UncheckedSet unchecked; - - CHECK_LIST distanceChecked(searchRepository.size()); - - ResultSet results; - - setupDistances(sc, seeds, COMPARATOR::compare); - setupSeeds(sc, seeds, results, unchecked, distanceChecked); - - Distance explorationRadius = sc.explorationCoefficient * sc.radius; - const size_t dimension = objectSpace->getPaddedDimension(); - ReadOnlyGraphNode *nodes = &searchRepository.front(); - ReadOnlyGraphNode *neighbors = 0; - ObjectDistance result; - ObjectDistance target; - const size_t prefetchSize = objectSpace->getPrefetchSize(); - const size_t prefetchOffset = objectSpace->getPrefetchOffset(); - pair *neighborptr; - pair *neighborendptr; - while (!unchecked.empty()) { - target = unchecked.top(); - unchecked.pop(); - if (target.distance > explorationRadius) { - break; - } - neighbors = &nodes[target.id]; - neighborptr = &(*neighbors)[0]; - size_t neighborSize = neighbors->size() < edgeSize ? neighbors->size() : edgeSize; - neighborendptr = neighborptr + neighborSize; - - pair* nsPtrs[neighborSize]; - size_t nsPtrsSize = 0; - - for (; neighborptr < neighborendptr; ++neighborptr) { - if (!distanceChecked[(*(neighborptr)).first]) { - nsPtrs[nsPtrsSize] = neighborptr; - if (nsPtrsSize < prefetchOffset) { - unsigned char *ptr = reinterpret_cast((*(neighborptr)).second); - MemoryCache::prefetch(ptr, prefetchSize); - } - nsPtrsSize++; - } - } - for (size_t idx = 0; idx < nsPtrsSize; idx++) { - neighborptr = nsPtrs[idx]; - if (idx + prefetchOffset < nsPtrsSize) { - unsigned char *ptr = reinterpret_cast((*(nsPtrs[idx + prefetchOffset])).second); - MemoryCache::prefetch(ptr, prefetchSize); - } -#ifdef NGT_VISIT_COUNT - sc.visitCount++; -#endif - auto &neighbor = *neighborptr; - distanceChecked.insert(neighbor.first); - -#ifdef NGT_DISTANCE_COMPUTATION_COUNT - sc.distanceComputationCount++; -#endif - - Distance distance = COMPARATOR::compare((void*)&sc.object[0], - (void*)&(*static_cast(neighbor.second))[0], dimension); - - if (distance <= explorationRadius) { - result.set(neighbor.first, distance); - unchecked.push(result); - if (distance <= sc.radius) { - results.push(result); - if (results.size() >= sc.size) { - if (results.size() > sc.size) { - results.pop(); - } - sc.radius = results.top().distance; - explorationRadius = sc.explorationCoefficient * sc.radius; - } - } - } - } - } - - if (sc.resultIsAvailable()) { - ObjectDistances &qresults = sc.getResult(); - qresults.moveFrom(results); - } else { - sc.workingResult = std::move(results); - } - } - -#endif - - void - NeighborhoodGraph::search(NGT::SearchContainer &sc, ObjectDistances &seeds) - { - if (sc.explorationCoefficient == 0.0) { - sc.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT; - } - - // setup edgeSize - size_t edgeSize = getEdgeSize(sc); - - UncheckedSet unchecked; -#if defined(NGT_GRAPH_CHECK_BITSET) - DistanceCheckedSet distanceChecked(0); -#elif defined(NGT_GRAPH_CHECK_BOOLEANSET) - DistanceCheckedSet distanceChecked(repository.size()); -#elif defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) - DistanceCheckedSet distanceChecked(repository.size()); -#elif defined(NGT_GRAPH_CHECK_VECTOR) - DistanceCheckedSet distanceChecked(repository.size()); -#else - DistanceCheckedSet distanceChecked; -#endif - - ResultSet results; - setupDistances(sc, seeds); - setupSeeds(sc, seeds, results, unchecked, distanceChecked); - Distance explorationRadius = sc.explorationCoefficient * sc.radius; - NGT::ObjectSpace::Comparator &comparator = objectSpace->getComparator(); - ObjectRepository &objectRepository = getObjectRepository(); - const size_t prefetchSize = objectSpace->getPrefetchSize(); - ObjectDistance result; -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - NodeWithPosition target; -#else - ObjectDistance target; -#endif - const size_t prefetchOffset = objectSpace->getPrefetchOffset(); - ObjectDistance *neighborptr; - ObjectDistance *neighborendptr; - while (!unchecked.empty()) { - target = unchecked.top(); - unchecked.pop(); - if (target.distance > explorationRadius) { - break; - } - GraphNode *neighbors = 0; - try { - neighbors = getNode(target.id); - } catch(Exception &err) { - cerr << "Graph::search: Warning. " << err.what() << " ID=" << target.id << endl; - continue; - } - if (neighbors->size() == 0) { - continue; - } -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - uint32_t position = target.position; -#endif -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - neighborptr = &(*neighbors).at(position, repository.allocator); -#else - neighborptr = &(*neighbors).at(0, repository.allocator); -#endif -#else -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - neighborptr = &(*neighbors)[position]; -#else - neighborptr = &(*neighbors)[0]; -#endif -#endif - neighborendptr = neighborptr; - size_t neighborSize = neighbors->size() < edgeSize ? neighbors->size() : edgeSize; - neighborendptr += neighborSize; -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - neighborendptr -= position; -#endif - size_t poft = prefetchOffset < neighborSize ? prefetchOffset : neighborSize; - for (size_t i = 0; i < poft; i++) { - if (!distanceChecked[(*(neighborptr + i)).id]) { - unsigned char *ptr = reinterpret_cast(objectRepository.get((*(neighborptr + i)).id)); - MemoryCache::prefetch(ptr, prefetchSize); - } - } -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - for (; neighborptr < neighborendptr; ++neighborptr, position++) { -#else - for (; neighborptr < neighborendptr; ++neighborptr) { -#endif - if ((neighborptr + prefetchOffset < neighborendptr) && !distanceChecked[(*(neighborptr + prefetchOffset)).id]) { - unsigned char *ptr = reinterpret_cast(objectRepository.get((*(neighborptr + prefetchOffset)).id)); - MemoryCache::prefetch(ptr, prefetchSize); - } - sc.visitCount++; - ObjectDistance &neighbor = *neighborptr; - if (distanceChecked[neighbor.id]) { - continue; - } - distanceChecked.insert(neighbor.id); - -#ifdef NGT_EXPLORATION_COEFFICIENT_OPTIMIZATION - sc.explorationCoefficient = exp(-(double)distanceChecked.size() / 20000.0) / 10.0 + 1.0; -#endif - - Distance distance = comparator(sc.object, *objectRepository.get(neighbor.id)); - sc.distanceComputationCount++; - if (distance <= explorationRadius) { - result.set(neighbor.id, distance); - unchecked.push(result); - if (distance <= sc.radius) { - results.push(result); - if (results.size() >= sc.size) { - if (results.top().distance >= distance) { - if (results.size() > sc.size) { - results.pop(); - } - sc.radius = results.top().distance; - explorationRadius = sc.explorationCoefficient * sc.radius; - } - } - } -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - if ((distance < target.distance) && (distance <= explorationRadius) && ((neighborptr + 2) < neighborendptr)) { - target.position = position + 1; - unchecked.push(target); - break; - } -#endif - } - } - - } - if (sc.resultIsAvailable()) { - ObjectDistances &qresults = sc.getResult(); - qresults.clear(); - qresults.moveFrom(results); - } else { - sc.workingResult = std::move(results); - } - } - - // for milvus - void NeighborhoodGraph::search(NGT::SearchContainer & sc, ObjectDistances & seeds, const faiss::BitsetView bitset) - { - if (sc.explorationCoefficient == 0.0) - { - sc.explorationCoefficient = NGT_EXPLORATION_COEFFICIENT; - } - - // setup edgeSize - size_t edgeSize = getEdgeSize(sc); - - UncheckedSet unchecked; -#if defined(NGT_GRAPH_CHECK_BITSET) - DistanceCheckedSet distanceChecked(0); -#elif defined(NGT_GRAPH_CHECK_BOOLEANSET) - DistanceCheckedSet distanceChecked(repository.size()); -#elif defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) - DistanceCheckedSet distanceChecked(repository.size()); -#elif defined(NGT_GRAPH_CHECK_VECTOR) - DistanceCheckedSet distanceChecked(repository.size()); -#else - DistanceCheckedSet distanceChecked; -#endif - - ResultSet results; - setupDistances(sc, seeds); - setupSeeds(sc, seeds, results, unchecked, distanceChecked); - Distance explorationRadius = sc.explorationCoefficient * sc.radius; - NGT::ObjectSpace::Comparator & comparator = objectSpace->getComparator(); - ObjectRepository & objectRepository = getObjectRepository(); - const size_t prefetchSize = objectSpace->getPrefetchSize(); - ObjectDistance result; -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - NodeWithPosition target; -#else - ObjectDistance target; -#endif - const size_t prefetchOffset = objectSpace->getPrefetchOffset(); - ObjectDistance * neighborptr; - ObjectDistance * neighborendptr; - while (!unchecked.empty()) - { - target = unchecked.top(); - unchecked.pop(); - if (target.distance > explorationRadius) - { - break; - } - GraphNode * neighbors = 0; - try - { - neighbors = getNode(target.id); - } catch(Exception &err) { - cerr << "Graph::search: Warning. " << err.what() << " ID=" << target.id << endl; - continue; - } - if (neighbors->size() == 0) - { - continue; - } -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - uint32_t position = target.position; -#endif -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - neighborptr = &(*neighbors).at(position, repository.allocator); -#else - neighborptr = &(*neighbors).at(0, repository.allocator); -#endif -#else -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - neighborptr = &(*neighbors)[position]; -#else - neighborptr = &(*neighbors)[0]; -#endif -#endif - neighborendptr = neighborptr; - size_t neighborSize = neighbors->size() < edgeSize ? neighbors->size() : edgeSize; - neighborendptr += neighborSize; -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - neighborendptr -= position; -#endif - size_t poft = prefetchOffset < neighborSize ? prefetchOffset : neighborSize; - for (size_t i = 0; i < poft; i++) - { - if (!distanceChecked[(*(neighborptr + i)).id]) - { - unsigned char * ptr = reinterpret_cast(objectRepository.get((*(neighborptr + i)).id)); - MemoryCache::prefetch(ptr, prefetchSize); - } - } -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - for (; neighborptr < neighborendptr; ++neighborptr, position++) - { -#else - for (; neighborptr < neighborendptr; ++neighborptr) - { -#endif - if ((neighborptr + prefetchOffset < neighborendptr) && !distanceChecked[(*(neighborptr + prefetchOffset)).id]) - { - unsigned char * ptr = reinterpret_cast(objectRepository.get((*(neighborptr + prefetchOffset)).id)); - MemoryCache::prefetch(ptr, prefetchSize); - } - sc.visitCount++; - ObjectDistance & neighbor = *neighborptr; - if (distanceChecked[neighbor.id]) - { - continue; - } - distanceChecked.insert(neighbor.id); - - // judge if id in blacklist - if (!bitset.empty() && bitset.test((int64_t)neighbor.id - 1)) { - continue; - } - -#ifdef NGT_EXPLORATION_COEFFICIENT_OPTIMIZATION - sc.explorationCoefficient = exp(-(double)distanceChecked.size() / 20000.0) / 10.0 + 1.0; -#endif - - Distance distance = comparator(sc.object, *objectRepository.get(neighbor.id)); - sc.distanceComputationCount++; - if (distance <= explorationRadius) - { - result.set(neighbor.id, distance); - unchecked.push(result); - if (distance <= sc.radius) - { - results.push(result); - if (results.size() >= sc.size) - { - if (results.top().distance >= distance) - { - if (results.size() > sc.size) - { - results.pop(); - } - sc.radius = results.top().distance; - explorationRadius = sc.explorationCoefficient * sc.radius; - } - } - } -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - if ((distance < target.distance) && (distance <= explorationRadius) && ((neighborptr + 2) < neighborendptr)) - { - target.position = position + 1; - unchecked.push(target); - break; - } -#endif - } - } - } - if (sc.resultIsAvailable()) - { - ObjectDistances & qresults = sc.getResult(); - qresults.clear(); - qresults.moveFrom(results); - } - else - { - sc.workingResult = std::move(results); - } - } - - - void - NeighborhoodGraph::removeEdgesReliably(ObjectID id) { - GraphNode *nodetmp = 0; - try { - nodetmp = getNode(id); - } catch (Exception &err) { - stringstream msg; - msg << "removeEdgesReliably : cannot find a node. ID=" << id; - msg << ":" << err.what(); - NGTThrowException(msg.str()); - } - if (nodetmp == 0) { - stringstream msg; - msg << "removeEdgesReliably : cannot find a node. ID=" << id; - NGTThrowException(msg.str()); - } - GraphNode &node = *nodetmp; - if (node.size() == 0) { - cerr << "removeEdgesReliably : Warning! : No edges. ID=" << id << endl; - try { - removeNode(id); - } catch (Exception &err) { - stringstream msg; - msg << "removeEdgesReliably : Internal error! : cannot remove a node without edges. ID=" << id; - msg << ":" << err.what(); - NGTThrowException(msg.str()); - } - return; - } - - vector objtbl; - vector nodetbl; - try { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (GraphNode::iterator i = node.begin(repository.allocator); i != node.end(repository.allocator);) { -#else - for (GraphNode::iterator i = node.begin(); i != node.end();) { -#endif - if (id == (*i).id) { - cerr << "Graph::removeEdgesReliably: Inner error. Destination nodes include a source node. ID=" - << id << " continue..." << endl; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - i = node.erase(i, repository.allocator); -#else - i = node.erase(i); -#endif - continue; - } - objtbl.push_back(getObjectRepository().get((*i).id)); - GraphNode *n = 0; - try { - n = getNode((*i).id); - } catch (Exception &err) { - cerr << "Graph::removeEdgesReliably: Cannot find edges of a child. ID=" - << (*i).id << " continue..." << endl; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - i = node.erase(i, repository.allocator); -#else - i = node.erase(i); -#endif - continue; - } - nodetbl.push_back(n); - - ObjectDistance edge; - edge.id = id; - edge.distance = (*i).distance; - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - GraphNode::iterator ei = std::lower_bound(n->begin(repository.allocator), n->end(repository.allocator), edge); - if (ei != n->end(repository.allocator) && (*ei).id == id) { - n->erase(ei, repository.allocator); -#else - GraphNode::iterator ei = std::lower_bound(n->begin(), n->end(), edge); - if (ei != n->end() && (*ei).id == id) { - n->erase(ei); -#endif - } else { - stringstream msg; - msg << "removeEdgesReliably : internal error : cannot find an edge. ID=" - << id << " d=" << edge.distance << " in " << (*i).id << endl; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (GraphNode::iterator ni = n->begin(repository.allocator); ni != n->end(repository.allocator); ni++) { -#else - for (GraphNode::iterator ni = n->begin(); ni != n->end(); ni++) { -#endif - msg << "check. " << (*ni).id << endl; - } -#ifdef NGT_FORCED_REMOVE - msg << " anyway continue..."; - cerr << msg.str() << endl; -#else - NGTThrowException(msg.str()); -#endif - } - } - i++; - } - for (unsigned int i = 0; i < node.size() - 1; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - assert(node.at(i, repository.allocator).id != id); -#else - assert(node[i].id != id); -#endif - int minj = -1; - Distance mind = FLT_MAX; - for (unsigned int j = i + 1; j < node.size(); j++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - assert(node.at(j, repository.allocator).id != id); -#else - assert(node[j].id != id); -#endif - Distance d = objectSpace->getComparator()(*objtbl[i], *objtbl[j]); - if (d < mind) { - minj = j; - mind = d; - } - } - assert(minj != -1); - bool insertionA = false; - bool insertionB = false; - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ObjectDistance obj = node.at(minj, repository.allocator); -#else - ObjectDistance obj = node[minj]; -#endif - obj.distance = mind; - GraphNode &n = *nodetbl[i]; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - GraphNode::iterator ei = std::lower_bound(n.begin(repository.allocator), n.end(repository.allocator), obj); - if ((ei == n.end(repository.allocator)) || ((*ei).id != obj.id)) { - n.insert(ei, obj, repository.allocator); - insertionA = true; - } -#else - GraphNode::iterator ei = std::lower_bound(n.begin(), n.end(), obj); - if ((ei == n.end()) || ((*ei).id != obj.id)) { - n.insert(ei, obj); - insertionA = true; - } -#endif - } - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ObjectDistance obj = node.at(i, repository.allocator); -#else - ObjectDistance obj = node[i]; -#endif - obj.distance = mind; - GraphNode &n = *nodetbl[minj]; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - GraphNode::iterator ei = std::lower_bound(n.begin(repository.allocator), n.end(repository.allocator), obj); - if ((ei == n.end(repository.allocator)) || ((*ei).id != obj.id)) { - n.insert(ei, obj, repository.allocator); - insertionB = true; - } -#else - GraphNode::iterator ei = std::lower_bound(n.begin(), n.end(), obj); - if ((ei == n.end()) || ((*ei).id != obj.id)) { - n.insert(ei, obj); - insertionB = true; - } -#endif - } - if (insertionA != insertionB) { - stringstream msg; - msg << "Graph::removeEdgeReliably:Warning. Lost conectivity! Isn't this ANNG? ID=" << id << "."; -#ifdef NGT_FORCED_REMOVE - msg << " Anyway continue..."; - cerr << msg.str() << endl; -#else - NGTThrowException(msg.str()); -#endif - } - if ((i + 1 < node.size()) && (i + 1 != (unsigned int)minj)) { - ObjectDistance tmpr; - PersistentObject *tmpf; - GraphNode *tmprs; - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - tmpr = node.at(i + 1, repository.allocator); -#else - tmpr = node[i + 1]; -#endif - tmpf = objtbl[i + 1]; - tmprs = nodetbl[i + 1]; - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - node.at(i + 1, repository.allocator) = node.at(minj, repository.allocator); -#else - node[i + 1] = node[minj]; -#endif - objtbl[i + 1] = objtbl[minj]; - nodetbl[i + 1] = nodetbl[minj]; - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - node.at(minj, repository.allocator) = tmpr; -#else - node[minj] = tmpr; -#endif - objtbl[minj] = tmpf; - nodetbl[minj] = tmprs; - } - } - - } catch(Exception &err) { - stringstream msg; - msg << "removeEdgesReliably : Relink error ID=" << id << ":" << err.what(); -#ifdef NGT_FORCED_REMOVE - cerr << msg.str() << " continue..." << endl; -#else - NGTThrowException(msg.str()); -#endif - } - - try { - removeNode(id); - } catch (Exception &err) { - stringstream msg; - msg << "removeEdgesReliably : removeEdges error. ID=" << id << ":" << err.what(); - NGTThrowException(msg.str()); - } - } - -class TruncationSearchJob { -public: - TruncationSearchJob() {} - TruncationSearchJob &operator=(TruncationSearchJob &d) { - idx = d.idx; - object = d.object; - nearest = d.nearest; - start = d.start; - radius = d.radius; - return *this; - } - size_t idx; - PersistentObject *object; - ObjectDistance nearest; - ObjectDistance start; - NGT::Distance radius; -}; - -class TruncationSearchSharedData { -public: - TruncationSearchSharedData(NGT::NeighborhoodGraph &g, NGT::ObjectID id, size_t size, NGT::Distance lr) : - graphIndex(g), targetID(id), resultSize(size), explorationCoefficient(lr) {} - NGT::NeighborhoodGraph &graphIndex; - NGT::ObjectID targetID; - size_t resultSize; - NGT::Distance explorationCoefficient; -}; - -class TruncationSearchThread : public NGT::Thread { -public: - TruncationSearchThread() {} - virtual ~TruncationSearchThread() {} - virtual int run() { - NGT::ThreadPool::Thread &poolThread = - (NGT::ThreadPool::Thread&)*this; - TruncationSearchSharedData &sd = *poolThread.getSharedData(); - for (;;) { - TruncationSearchJob job; - try { - poolThread.getInputJobQueue().popFront(job); - } catch(NGT::ThreadTerminationException &err) { - break; - } catch (NGT::Exception &err) { - cerr << "TruncationSearchThread::run()::Inner error. continue..." << endl; - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object *po = sd.graphIndex.objectSpace->allocateObject((Object&)*job.object); - NGT::SearchContainer ssc(*po); -#else - NGT::SearchContainer ssc(*job.object); -#endif - - NGT::ObjectDistances srs, results; - - srs.push_back(job.start); - ssc.setResults(&results); - ssc.size = sd.resultSize; - ssc.radius = job.radius; - ssc.explorationCoefficient = sd.explorationCoefficient; - ssc.id = 0; - try { - sd.graphIndex.search(ssc, srs); - } catch(...) { - cerr << "CreateIndexThread::run : Fatal Error!" << endl; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - sd.graphIndex.objectSpace->deleteObject(po); -#endif - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - sd.graphIndex.objectSpace->deleteObject(po); -#endif - job.nearest = results[0]; - poolThread.getOutputJobQueue().pushBack(job); - } - return 0; - } - -}; - -typedef NGT::ThreadPool TruncationSearchThreadPool; - -int -NeighborhoodGraph::truncateEdgesOptimally( - ObjectID id, - GraphNode &results, - size_t truncationSize - ) -{ - - ObjectDistances delNodes; - - size_t osize = results.size(); - - size_t resSize = 2; - TruncationSearchThreadPool threads(property.truncationThreadPoolSize); - TruncationSearchSharedData sd(*this, id, resSize, 1.1); - threads.setSharedData(&sd); - threads.create(); - - try { - for (size_t i = truncationSize; i < results.size(); i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (id == results.at(i, repository.allocator).id) { -#else - if (id == results[i].id) { -#endif - continue; - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - delNodes.push_back(results.at(i, repository.allocator)); -#else - delNodes.push_back(results[i]); -#endif - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - GraphNode::iterator ri = results.begin(repository.allocator); - ri += truncationSize; - results.erase(ri, results.end(repository.allocator), repository.allocator); -#else - GraphNode::iterator ri = results.begin(); - ri += truncationSize; - results.erase(ri, results.end()); -#endif - - for (size_t i = 0; i < delNodes.size(); i++) { - GraphNode::iterator j; - GraphNode &res = *getNode(delNodes[i].id); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (j = res.begin(repository.allocator); j != res.end(repository.allocator); j++) { -#else - for (j = res.begin(); j != res.end(); j++) { -#endif - if ((*j).id == id) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - res.erase(j, repository.allocator); -#else - res.erase(j); -#endif - break; - } - } - } - bool retry = true; - size_t maxResSize = osize * 2; - size_t batchSize = 20; - TruncationSearchThreadPool::OutputJobQueue &output = threads.getOutputJobQueue(); - TruncationSearchJob job; - - for (; retry == true; resSize = maxResSize) { - retry = false; - sd.resultSize = resSize; - size_t nodeidx = 0; - for (;;) { - size_t nodeSize = 0; - for (; nodeidx < delNodes.size(); nodeidx++) { - if (delNodes[nodeidx].id == 0) { - continue; - } - nodeSize++; - job.object = getObjectRepository().get(delNodes[nodeidx].id); - job.idx = nodeidx; - job.start.id = id; - job.start.distance = delNodes[nodeidx].distance; - job.radius = FLT_MAX; - threads.pushInputQueue(job); - if (nodeSize >= batchSize) { - break; - } - } - if (nodeSize == 0) { - break; - } - threads.waitForFinish(); - - if (output.size() != nodeSize) { - nodeSize = output.size(); - } - size_t cannotMoveCnt = 0; - for (size_t i = 0; i < nodeSize; i++) { - TruncationSearchJob &ojob = output.front(); - ObjectID nearestID = ojob.nearest.id; - size_t idx = ojob.idx; - if (nearestID == delNodes[idx].id) { - delNodes[idx].id = 0; - output.pop_front(); - continue; - } else if (nearestID == id) { - cannotMoveCnt++; - if ((resSize < maxResSize) && (cannotMoveCnt > 1)) { - retry = true; - output.pop_front(); - continue; - } - } else { - } - - ObjectID tid = delNodes[idx].id; - delNodes[idx].id = 0; - - GraphNode &delres = *getNode(tid); - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - GraphNode::iterator ei = std::lower_bound(delres.begin(repository.allocator), delres.end(repository.allocator), ojob.nearest); - if ((*ei).id != ojob.nearest.id) { - delres.insert(ei, ojob.nearest, repository.allocator); -#else - GraphNode::iterator ei = std::lower_bound(delres.begin(), delres.end(), ojob.nearest); - if ((*ei).id != ojob.nearest.id) { - delres.insert(ei, ojob.nearest); -#endif - } else { - output.pop_front(); - continue; - } - } - ObjectDistance r; - r.distance = ojob.nearest.distance; - r.id = tid; - if (nearestID != id) { - GraphNode &rs = *getNode(nearestID); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - rs.push_back(r, repository.allocator); - std::sort(rs.begin(repository.allocator), rs.end(repository.allocator)); -#else - rs.push_back(r); - std::sort(rs.begin(), rs.end()); -#endif - } else { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - results.push_back(r, repository.allocator); - std::sort(results.begin(repository.allocator), results.end(repository.allocator)); -#else - results.push_back(r); - std::sort(results.begin(), results.end()); -#endif - } - output.pop_front(); - } - - } - } - - int cnt = 0; - for (size_t i = 0; i < delNodes.size(); i++) { - if (delNodes[i].id != 0) { - cnt++; - } - } - if (cnt != 0) { - for (size_t i = 0; i < delNodes.size(); i++) { - if (delNodes[i].id != 0) { - } - } - } - threads.terminate(); - } catch (Exception &err) { - threads.terminate(); - Exception e(err); - throw e; - } - - size_t delsize = osize - results.size(); - - return delsize; -} - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Graph.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Graph.h deleted file mode 100644 index 7dd44d8eab..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Graph.h +++ /dev/null @@ -1,967 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include - -#include "NGT/defines.h" -#include "NGT/Common.h" -#include "NGT/ObjectSpaceRepository.h" - -#include "faiss/utils/BitsetView.h" - -#include "NGT/HashBasedBooleanSet.h" - -#ifndef NGT_GRAPH_CHECK_VECTOR -#include -#endif - -#ifdef NGT_GRAPH_UNCHECK_STACK -#include -#endif - -#ifndef NGT_EXPLORATION_COEFFICIENT -#define NGT_EXPLORATION_COEFFICIENT 1.1 -#endif - -#ifndef NGT_INSERTION_EXPLORATION_COEFFICIENT -#define NGT_INSERTION_EXPLORATION_COEFFICIENT 1.1 -#endif - -#ifndef NGT_TRUNCATION_THRESHOLD -#define NGT_TRUNCATION_THRESHOLD 50 -#endif - -#ifndef NGT_SEED_SIZE -#define NGT_SEED_SIZE 10 -#endif - -#ifndef NGT_CREATION_EDGE_SIZE -#define NGT_CREATION_EDGE_SIZE 10 -#endif - -namespace NGT { - class Property; - - typedef GraphNode GRAPH_NODE; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - class GraphRepository: public PersistentRepository { -#else - class GraphRepository: public Repository { -#endif - - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - typedef PersistentRepository VECTOR; -#else - typedef Repository VECTOR; - - GraphRepository() { - prevsize = new vector; - } - virtual ~GraphRepository() { - deleteAll(); - if (prevsize != 0) { - delete prevsize; - } - } -#endif - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - void open(const std::string &file, size_t sharedMemorySize) { - SharedMemoryAllocator &allocator = VECTOR::getAllocator(); - off_t *entryTable = (off_t*)allocator.construct(file, sharedMemorySize); - if (entryTable == 0) { - entryTable = (off_t*)construct(); - allocator.setEntry(entryTable); - } - assert(entryTable != 0); - this->initialize(entryTable); - } - - void *construct() { - SharedMemoryAllocator &allocator = VECTOR::getAllocator(); - off_t *entryTable = new(allocator) off_t[2]; - entryTable[0] = allocator.getOffset(PersistentRepository::construct()); - entryTable[1] = allocator.getOffset(new(allocator) Vector); - return entryTable; - } - - void initialize(void *e) { - SharedMemoryAllocator &allocator = VECTOR::getAllocator(); - off_t *entryTable = (off_t*)e; - array = (ARRAY*)allocator.getAddr(entryTable[0]); - PersistentRepository::initialize(allocator.getAddr(entryTable[0])); - prevsize = (Vector*)allocator.getAddr(entryTable[1]); - } -#endif - - void insert(ObjectID id, ObjectDistances &objects) { - GRAPH_NODE *r = allocate(); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - (*r).copy(objects, VECTOR::getAllocator()); -#else - *r = objects; -#endif - try { - put(id, r); - } catch (Exception &exp) { - delete r; - throw exp; - } - if (id >= prevsize->size()) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - prevsize->resize(id + 1, VECTOR::getAllocator(), 0); -#else - prevsize->resize(id + 1, 0); -#endif - } else { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - (*prevsize).at(id, VECTOR::getAllocator()) = 0; -#else - (*prevsize)[id] = 0; -#endif - } - return; - } - - inline GRAPH_NODE *get(ObjectID fid, size_t &minsize) { - GRAPH_NODE *rs = VECTOR::get(fid); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - minsize = (*prevsize).at(fid, VECTOR::getAllocator()); -#else - minsize = (*prevsize)[fid]; -#endif - return rs; - } - void serialize(std::ofstream &os) { - VECTOR::serialize(os); - Serializer::write(os, *prevsize); - } - // for milvus - void serialize(std::stringstream & grp) - { - VECTOR::serialize(grp); - Serializer::write(grp, *prevsize); - } - void deserialize(std::ifstream &is) { - VECTOR::deserialize(is); - Serializer::read(is, *prevsize); - } - // for milvus - void deserialize(std::stringstream & is) - { - VECTOR::deserialize(is); - Serializer::read(is, *prevsize); - } - void show() { - for (size_t i = 0; i < this->size(); i++) { - std::cout << "Show graph " << i << " "; - if ((*this)[i] == 0) { - std::cout << std::endl; - continue; - } - for (size_t j = 0; j < (*this)[i]->size(); j++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cout << (*this)[i]->at(j, VECTOR::getAllocator()).id << ":" << (*this)[i]->at(j, VECTOR::getAllocator()).distance << " "; -#else - std::cout << (*this)[i]->at(j).id << ":" << (*this)[i]->at(j).distance << " "; -#endif - } - std::cout << std::endl; - } - } - - virtual int64_t memSize() { - int64_t ret = prevsize->size() * sizeof(unsigned short); - for (size_t i = 1; i < this->size(); ++ i) { - ret += (*this)[i]->memSize(); - } - return ret; - } - - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Vector *prevsize; -#else - std::vector *prevsize; -#endif - }; - -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - class ReadOnlyGraphNode : public std::vector> { - public: - ReadOnlyGraphNode():reservedSize(0), usedSize(0) {} - void reserve(size_t s) { - reservedSize = ((s & 7) == 0) ? s : (s & 0xFFFFFFFFFFFFFFF8) + 8; - resize(reservedSize); - for (size_t i = (reservedSize & 0xFFFFFFFFFFFFFFF8); i < reservedSize; i++) { - (*this)[i].first = 0; - } - } - void push_back(std::pair node) { - (*this)[usedSize] = node; - usedSize++; - } - size_t size() { return usedSize; } - virtual int64_t memSize() { return reservedSize * (sizeof(uint64_t) + (*this)[0].second->memSize()); } - size_t reservedSize; - size_t usedSize; - }; - - class SearchGraphRepository : public std::vector { - public: - SearchGraphRepository() {} - bool isEmpty(size_t idx) { return (*this)[idx].empty(); } - virtual int64_t memSize() { - int64_t ret = 0; - for (size_t i = 1; i < this->size(); ++ i) { - ret += (*this)[i].memSize(); - } - return ret; - } - - void deserialize(std::ifstream &is, ObjectRepository &objectRepository) { - if (!is.is_open()) { - NGTThrowException("NGT::SearchGraph: Not open the specified stream yet."); - } - clear(); - size_t s; - NGT::Serializer::read(is, s); - resize(s); - for (size_t id = 0; id < s; id++) { - char type; - NGT::Serializer::read(is, type); - switch(type) { - case '-': - break; - case '+': - { - ObjectDistances node; - node.deserialize(is); - ReadOnlyGraphNode &searchNode = at(id); - searchNode.reserve(node.size()); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (auto ni = node.begin(); ni != node.end(); ni++) { - std::cerr << "not implement" << std::endl; - abort(); - } -#else - for (auto ni = node.begin(); ni != node.end(); ni++) { - searchNode.push_back(std::pair((*ni).id, objectRepository.get((*ni).id))); - } -#endif - } - break; - default: - { - assert(type == '-' || type == '+'); - break; - } - } - } - } - - }; - -#endif // NGT_GRAPH_READ_ONLY_GRAPH - - class NeighborhoodGraph { - public: - enum GraphType { - GraphTypeNone = 0, - GraphTypeANNG = 1, - GraphTypeKNNG = 2, - GraphTypeBKNNG = 3, - GraphTypeONNG = 4, - GraphTypeIANNG = 5, // Improved ANNG - GraphTypeDNNG = 6 - }; - - enum SeedType { - SeedTypeNone = 0, - SeedTypeRandomNodes = 1, - SeedTypeFixedNodes = 2, - SeedTypeFirstNode = 3, - SeedTypeAllLeafNodes = 4 - }; - -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - class Search { - public: - static void (*getMethod(NGT::ObjectSpace::DistanceType dtype, NGT::ObjectSpace::ObjectType otype, size_t size))(NGT::NeighborhoodGraph&, NGT::SearchContainer&, NGT::ObjectDistances&) { - if (size < 5000000) { - switch (otype) { - default: - case NGT::ObjectSpace::Float: - switch (dtype) { - case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityFloat; - case NGT::ObjectSpace::DistanceTypeCosine : return cosineSimilarityFloat; - case NGT::ObjectSpace::DistanceTypeNormalizedAngle : return normalizedAngleFloat; - case NGT::ObjectSpace::DistanceTypeAngle : return angleFloat; - case NGT::ObjectSpace::DistanceTypeL2 : return l2Float; - case NGT::ObjectSpace::DistanceTypeL1 : return l1Float; - case NGT::ObjectSpace::DistanceTypeSparseJaccard : return sparseJaccardFloat; - default: return l2Float; - } - break; - case NGT::ObjectSpace::Uint8: - switch (dtype) { - case NGT::ObjectSpace::DistanceTypeHamming : return hammingUint8; - case NGT::ObjectSpace::DistanceTypeJaccard : return jaccardUint8; - case NGT::ObjectSpace::DistanceTypeL2 : return l2Uint8; - case NGT::ObjectSpace::DistanceTypeL1 : return l1Uint8; - default : return l2Uint8; - } - break; - } - return l1Uint8; - } else { - switch (otype) { - default: - case NGT::ObjectSpace::Float: - switch (dtype) { - case NGT::ObjectSpace::DistanceTypeNormalizedCosine : return normalizedCosineSimilarityFloatForLargeDataset; - case NGT::ObjectSpace::DistanceTypeCosine : return cosineSimilarityFloatForLargeDataset; - case NGT::ObjectSpace::DistanceTypeNormalizedAngle : return normalizedAngleFloatForLargeDataset; - case NGT::ObjectSpace::DistanceTypeAngle : return angleFloatForLargeDataset; - case NGT::ObjectSpace::DistanceTypeL2 : return l2FloatForLargeDataset; - case NGT::ObjectSpace::DistanceTypeL1 : return l1FloatForLargeDataset; - case NGT::ObjectSpace::DistanceTypeSparseJaccard : return sparseJaccardFloatForLargeDataset; - default: return l2FloatForLargeDataset; - } - break; - case NGT::ObjectSpace::Uint8: - switch (dtype) { - case NGT::ObjectSpace::DistanceTypeHamming : return hammingUint8ForLargeDataset; - case NGT::ObjectSpace::DistanceTypeJaccard : return jaccardUint8ForLargeDataset; - case NGT::ObjectSpace::DistanceTypeL2 : return l2Uint8ForLargeDataset; - case NGT::ObjectSpace::DistanceTypeL1 : return l1Uint8ForLargeDataset; - default : return l2Uint8ForLargeDataset; - } - break; - } - return l1Uint8ForLargeDataset; - } - } - static void l1Uint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void l2Uint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void l1Float(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void l2Float(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void hammingUint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void jaccardUint8(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void sparseJaccardFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void cosineSimilarityFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void angleFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void normalizedCosineSimilarityFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void normalizedAngleFloat(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - - static void l1Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void l2Uint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void l1FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void l2FloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void hammingUint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void jaccardUint8ForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void sparseJaccardFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void cosineSimilarityFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void angleFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void normalizedCosineSimilarityFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - static void normalizedAngleFloatForLargeDataset(NeighborhoodGraph &graph, NGT::SearchContainer &sc, ObjectDistances &seeds); - - }; -#endif - - class Property { - public: - Property() { setDefault(); } - void setDefault() { - truncationThreshold = 0; - edgeSizeForCreation = NGT_CREATION_EDGE_SIZE; - edgeSizeForSearch = 0; - edgeSizeLimitForCreation = 5; - insertionRadiusCoefficient = NGT_INSERTION_EXPLORATION_COEFFICIENT; - seedSize = NGT_SEED_SIZE; - seedType = SeedTypeNone; - truncationThreadPoolSize = 8; - batchSizeForCreation = 200; - graphType = GraphTypeANNG; - dynamicEdgeSizeBase = 30; - dynamicEdgeSizeRate = 20; - buildTimeLimit = 0.0; - outgoingEdge = 10; - incomingEdge = 80; - } - void clear() { - truncationThreshold = -1; - edgeSizeForCreation = -1; - edgeSizeForSearch = -1; - edgeSizeLimitForCreation = -1; - insertionRadiusCoefficient = -1; - seedSize = -1; - seedType = SeedTypeNone; - truncationThreadPoolSize = -1; - batchSizeForCreation = -1; - graphType = GraphTypeNone; - dynamicEdgeSizeBase = -1; - dynamicEdgeSizeRate = -1; - buildTimeLimit = -1; - outgoingEdge = -1; - incomingEdge = -1; - } - void set(NGT::Property &prop); - void get(NGT::Property &prop); - - void exportProperty(NGT::PropertySet &p) { - p.set("IncrimentalEdgeSizeLimitForTruncation", truncationThreshold); - p.set("EdgeSizeForCreation", edgeSizeForCreation); - p.set("EdgeSizeForSearch", edgeSizeForSearch); - p.set("EdgeSizeLimitForCreation", edgeSizeLimitForCreation); - assert(insertionRadiusCoefficient >= 1.0); - p.set("EpsilonForCreation", insertionRadiusCoefficient - 1.0); - p.set("BatchSizeForCreation", batchSizeForCreation); - p.set("SeedSize", seedSize); - p.set("TruncationThreadPoolSize", truncationThreadPoolSize); - p.set("DynamicEdgeSizeBase", dynamicEdgeSizeBase); - p.set("DynamicEdgeSizeRate", dynamicEdgeSizeRate); - p.set("BuildTimeLimit", buildTimeLimit); - p.set("OutgoingEdge", outgoingEdge); - p.set("IncomingEdge", incomingEdge); - switch (graphType) { - case NeighborhoodGraph::GraphTypeKNNG: p.set("GraphType", "KNNG"); break; - case NeighborhoodGraph::GraphTypeANNG: p.set("GraphType", "ANNG"); break; - case NeighborhoodGraph::GraphTypeBKNNG: p.set("GraphType", "BKNNG"); break; - case NeighborhoodGraph::GraphTypeONNG: p.set("GraphType", "ONNG"); break; - case NeighborhoodGraph::GraphTypeIANNG: p.set("GraphType", "IANNG"); break; - default: std::cerr << "Graph::exportProperty: Fatal error! Invalid Graph Type." << std::endl; abort(); - } - switch (seedType) { - case NeighborhoodGraph::SeedTypeRandomNodes: p.set("SeedType", "RandomNodes"); break; - case NeighborhoodGraph::SeedTypeFixedNodes: p.set("SeedType", "FixedNodes"); break; - case NeighborhoodGraph::SeedTypeFirstNode: p.set("SeedType", "FirstNode"); break; - case NeighborhoodGraph::SeedTypeNone: p.set("SeedType", "None"); break; - case NeighborhoodGraph::SeedTypeAllLeafNodes: p.set("SeedType", "AllLeafNodes"); break; - default: std::cerr << "Graph::exportProperty: Fatal error! Invalid Seed Type." << std::endl; abort(); - } - } - void importProperty(NGT::PropertySet &p) { - setDefault(); - truncationThreshold = p.getl("IncrimentalEdgeSizeLimitForTruncation", truncationThreshold); - edgeSizeForCreation = p.getl("EdgeSizeForCreation", edgeSizeForCreation); - edgeSizeForSearch = p.getl("EdgeSizeForSearch", edgeSizeForSearch); - edgeSizeLimitForCreation = p.getl("EdgeSizeLimitForCreation", edgeSizeLimitForCreation); - insertionRadiusCoefficient = p.getf("EpsilonForCreation", insertionRadiusCoefficient); - insertionRadiusCoefficient += 1.0; - batchSizeForCreation = p.getl("BatchSizeForCreation", batchSizeForCreation); - seedSize = p.getl("SeedSize", seedSize); - truncationThreadPoolSize = p.getl("TruncationThreadPoolSize", truncationThreadPoolSize); - dynamicEdgeSizeBase = p.getl("DynamicEdgeSizeBase", dynamicEdgeSizeBase); - dynamicEdgeSizeRate = p.getl("DynamicEdgeSizeRate", dynamicEdgeSizeRate); - buildTimeLimit = p.getf("BuildTimeLimit", buildTimeLimit); - outgoingEdge = p.getl("OutgoingEdge", outgoingEdge); - incomingEdge = p.getl("IncomingEdge", incomingEdge); - PropertySet::iterator it = p.find("GraphType"); - if (it != p.end()) { - if (it->second == "KNNG") graphType = NeighborhoodGraph::GraphTypeKNNG; - else if (it->second == "ANNG") graphType = NeighborhoodGraph::GraphTypeANNG; - else if (it->second == "BKNNG") graphType = NeighborhoodGraph::GraphTypeBKNNG; - else if (it->second == "ONNG") graphType = NeighborhoodGraph::GraphTypeONNG; - else if (it->second == "IANNG") graphType = NeighborhoodGraph::GraphTypeIANNG; - else { std::cerr << "Graph::importProperty: Fatal error! Invalid Graph Type. " << it->second << std::endl; abort(); } - } - it = p.find("SeedType"); - if (it != p.end()) { - if (it->second == "RandomNodes") seedType = NeighborhoodGraph::SeedTypeRandomNodes; - else if (it->second == "FixedNodes") seedType = NeighborhoodGraph::SeedTypeFixedNodes; - else if (it->second == "FirstNode") seedType = NeighborhoodGraph::SeedTypeFirstNode; - else if (it->second == "None") seedType = NeighborhoodGraph::SeedTypeNone; - else if (it->second == "AllLeafNodes") seedType = NeighborhoodGraph::SeedTypeAllLeafNodes; - else { std::cerr << "Graph::importProperty: Fatal error! Invalid Seed Type. " << it->second << std::endl; abort(); } - } - } - friend std::ostream & operator<<(std::ostream& os, const Property& p) { - os << "truncationThreshold=" << p.truncationThreshold << std::endl; - os << "edgeSizeForCreation=" << p.edgeSizeForCreation << std::endl; - os << "edgeSizeForSearch=" << p.edgeSizeForSearch << std::endl; - os << "edgeSizeLimitForCreation=" << p.edgeSizeLimitForCreation << std::endl; - os << "insertionRadiusCoefficient=" << p.insertionRadiusCoefficient << std::endl; - os << "insertionRadiusCoefficient=" << p.insertionRadiusCoefficient << std::endl; - os << "seedSize=" << p.seedSize << std::endl; - os << "seedType=" << p.seedType << std::endl; - os << "truncationThreadPoolSize=" << p.truncationThreadPoolSize << std::endl; - os << "batchSizeForCreation=" << p.batchSizeForCreation << std::endl; - os << "graphType=" << p.graphType << std::endl; - os << "dynamicEdgeSizeBase=" << p.dynamicEdgeSizeBase << std::endl; - os << "dynamicEdgeSizeRate=" << p.dynamicEdgeSizeRate << std::endl; - os << "outgoingEdge=" << p.outgoingEdge << std::endl; - os << "incomingEdge=" << p.incomingEdge << std::endl; - return os; - } - - int64_t memSize() { return sizeof(*this); } - - int16_t truncationThreshold; - int16_t edgeSizeForCreation; - int16_t edgeSizeForSearch; - int16_t edgeSizeLimitForCreation; - double insertionRadiusCoefficient; - int16_t seedSize; - SeedType seedType; - int16_t truncationThreadPoolSize; - int16_t batchSizeForCreation; - GraphType graphType; - int16_t dynamicEdgeSizeBase; - int16_t dynamicEdgeSizeRate; - float buildTimeLimit; - int16_t outgoingEdge; - int16_t incomingEdge; - }; - - NeighborhoodGraph(): objectSpace(0) { - property.truncationThreshold = NGT_TRUNCATION_THRESHOLD; - // initialize random to generate random seeds -#ifdef NGT_DISABLE_SRAND_FOR_RANDOM - struct timeval randTime; - gettimeofday(&randTime, 0); - srand(randTime.tv_usec); -#endif - } - - inline GraphNode *getNode(ObjectID fid, size_t &minsize) { return repository.get(fid, minsize); } - inline GraphNode *getNode(ObjectID fid) { return repository.VECTOR::get(fid); } - void insertNode(ObjectID id, ObjectDistances &objects) { - switch (property.graphType) { - case GraphTypeANNG: - insertANNGNode(id, objects); - break; - case GraphTypeIANNG: - insertIANNGNode(id, objects); - break; - case GraphTypeONNG: - insertONNGNode(id, objects); - break; - case GraphTypeKNNG: - insertKNNGNode(id, objects); - break; - case GraphTypeBKNNG: - insertBKNNGNode(id, objects); - break; - case GraphTypeNone: - NGTThrowException("NGT::insertNode: GraphType is not specified."); - break; - default: - NGTThrowException("NGT::insertNode: GraphType is invalid."); - break; - } - } - - void insertBKNNGNode(ObjectID id, ObjectDistances &results) { - if (repository.isEmpty(id)) { - repository.insert(id, results); - } else { - GraphNode &rs = *getNode(id); - for (ObjectDistances::iterator ri = results.begin(); ri != results.end(); ri++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - rs.push_back((*ri), repository.allocator); -#else - rs.push_back((*ri)); -#endif - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::sort(rs.begin(repository.allocator), rs.end(repository.allocator)); - ObjectID prev = 0; - for (GraphNode::iterator ri = rs.begin(repository.allocator); ri != rs.end(repository.allocator);) { - if (prev == (*ri).id) { - ri = rs.erase(ri, repository.allocator); - continue; - } - prev = (*ri).id; - ri++; - } -#else - std::sort(rs.begin(), rs.end()); - ObjectID prev = 0; - for (GraphNode::iterator ri = rs.begin(); ri != rs.end();) { - if (prev == (*ri).id) { - ri = rs.erase(ri); - continue; - } - prev = (*ri).id; - ri++; - } -#endif - } - for (ObjectDistances::iterator ri = results.begin(); ri != results.end(); ri++) { - assert(id != (*ri).id); - addBKNNGEdge((*ri).id, id, (*ri).distance); - } - return; - } - - void insertKNNGNode(ObjectID id, ObjectDistances &results) { - repository.insert(id, results); - } - - void insertANNGNode(ObjectID id, ObjectDistances &results) { - repository.insert(id, results); - std::queue truncateQueue; - for (ObjectDistances::iterator ri = results.begin(); ri != results.end(); ri++) { - assert(id != (*ri).id); - if (addEdge((*ri).id, id, (*ri).distance)) { - truncateQueue.push((*ri).id); - } - } - while (!truncateQueue.empty()) { - ObjectID tid = truncateQueue.front(); - truncateEdges(tid); - truncateQueue.pop(); - } - return; - } - - void insertIANNGNode(ObjectID id, ObjectDistances &results) { - repository.insert(id, results); - for (ObjectDistances::iterator ri = results.begin(); ri != results.end(); ri++) { - assert(id != (*ri).id); - addEdgeDeletingExcessEdges((*ri).id, id, (*ri).distance); - } - return; - } - - void insertONNGNode(ObjectID id, ObjectDistances &results) { - if (property.truncationThreshold != 0) { - std::stringstream msg; - msg << "NGT::insertONNGNode: truncation should be disabled!" << std::endl; - NGTThrowException(msg); - } - int count = 0; - for (ObjectDistances::iterator ri = results.begin(); ri != results.end(); ri++, count++) { - assert(id != (*ri).id); - if (count >= property.incomingEdge) { - break; - } - addEdge((*ri).id, id, (*ri).distance); - } - if (static_cast(results.size()) > property.outgoingEdge) { - results.resize(property.outgoingEdge); - } - repository.insert(id, results); - } - - void removeEdgesReliably(ObjectID id); - - int truncateEdgesOptimally(ObjectID id, GraphNode &results, size_t truncationSize); - - int truncateEdges(ObjectID id) { - GraphNode &results = *getNode(id); - if (results.size() == 0) { - return -1; - } - - size_t truncationSize = NGT_TRUNCATION_THRESHOLD; - if (truncationSize < (size_t)property.edgeSizeForCreation) { - truncationSize = property.edgeSizeForCreation; - } - return truncateEdgesOptimally(id, results, truncationSize); - } - - // setup edgeSize - inline size_t getEdgeSize(NGT::SearchContainer &sc) { - size_t edgeSize = INT_MAX; - if (sc.edgeSize < 0) { - if (sc.edgeSize == -2) { - double add = pow(10, (sc.explorationCoefficient - 1.0) * static_cast(property.dynamicEdgeSizeRate)); - edgeSize = add >= static_cast(INT_MAX) ? INT_MAX : property.dynamicEdgeSizeBase + add; - } else { - edgeSize = property.edgeSizeForSearch == 0 ? INT_MAX : property.edgeSizeForSearch; - } - } else { - edgeSize = sc.edgeSize == 0 ? INT_MAX : sc.edgeSize; - } - return edgeSize; - } - - void search(NGT::SearchContainer &sc, ObjectDistances &seeds); - // for milvus - void search(NGT::SearchContainer & sc, ObjectDistances & seeds, const faiss::BitsetView bitset); - -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - template void searchReadOnlyGraph(NGT::SearchContainer &sc, ObjectDistances &seeds); -#endif - - void removeEdge(ObjectID fid, ObjectID rmid) { - GraphNode &rs = *getNode(fid); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - for (GraphNode::iterator ri = rs.begin(repository.allocator); ri != rs.end(repository.allocator); ri++) { - if ((*ri).id == rmid) { - rs.erase(ri, repository.allocator); - break; - } - } -#else - for (GraphNode::iterator ri = rs.begin(); ri != rs.end(); ri++) { - if ((*ri).id == rmid) { - rs.erase(ri); - break; - } - } -#endif - } - - void removeEdge(GraphNode &node, ObjectDistance &edge) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - GraphNode::iterator ni = std::lower_bound(node.begin(repository.allocator), node.end(repository.allocator), edge); - if (ni != node.end(repository.allocator) && (*ni).id == edge.id) { - node.erase(ni, repository.allocator); -#else - GraphNode::iterator ni = std::lower_bound(node.begin(), node.end(), edge); - if (ni != node.end() && (*ni).id == edge.id) { - node.erase(ni); -#endif - return; - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (ni == node.end(repository.allocator)) { -#else - if (ni == node.end()) { -#endif - std::stringstream msg; - msg << "NGT::removeEdge: Cannot found " << edge.id; - NGTThrowException(msg); - } else { - std::stringstream msg; - msg << "NGT::removeEdge: Cannot found " << (*ni).id << ":" << edge.id; - NGTThrowException(msg); - } - } - - void - removeNode(ObjectID id) { - repository.erase(id); - } - - class BooleanVector : public std::vector { - public: - inline BooleanVector(size_t s):std::vector(s, false) {} - inline void insert(size_t i) { std::vector::operator[](i) = true; } - }; - -#ifdef NGT_GRAPH_VECTOR_RESULT - typedef ObjectDistances ResultSet; -#else - typedef std::priority_queue, std::less > ResultSet; -#endif - -#if defined(NGT_GRAPH_CHECK_BOOLEANSET) - typedef BooleanSet DistanceCheckedSet; -#elif defined(NGT_GRAPH_CHECK_VECTOR) - typedef BooleanVector DistanceCheckedSet; -#elif defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) - typedef HashBasedBooleanSet DistanceCheckedSet; -#else - class DistanceCheckedSet : public unordered_set { - public: - bool operator[](ObjectID id) { return find(id) != end(); } - }; -#endif - - typedef HashBasedBooleanSet DistanceCheckedSetForLargeDataset; - - class NodeWithPosition : public ObjectDistance { - public: - NodeWithPosition(uint32_t p = 0):position(p){} - NodeWithPosition(ObjectDistance &o):ObjectDistance(o), position(0){} - NodeWithPosition &operator=(const NodeWithPosition &n) { - ObjectDistance::operator=(static_cast(n)); - position = n.position; - assert(id != 0); - return *this; - } - uint32_t position; - }; - -#ifdef NGT_GRAPH_UNCHECK_STACK - typedef std::stack UncheckedSet; -#else -#ifdef NGT_GRAPH_BETTER_FIRST_RESTORE - typedef std::priority_queue, std::greater > UncheckedSet; -#else - typedef std::priority_queue, std::greater > UncheckedSet; -#endif -#endif - void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds); - void setupDistances(NGT::SearchContainer &sc, ObjectDistances &seeds, double (&comparator)(const void*, const void*, size_t)); - - void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, - UncheckedSet &unchecked, DistanceCheckedSet &distanceChecked); - -#if !defined(NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET) - void setupSeeds(SearchContainer &sc, ObjectDistances &seeds, ResultSet &results, - UncheckedSet &unchecked, DistanceCheckedSetForLargeDataset &distanceChecked); -#endif - - - int getEdgeSize() {return property.edgeSizeForCreation;} - - ObjectRepository &getObjectRepository() { return objectSpace->getRepository(); } - - ObjectSpace &getObjectSpace() { return *objectSpace; } - - void deleteInMemory() { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - assert(0); -#else - for (std::vector::iterator i = repository.begin(); i != repository.end(); i++) { - if ((*i) != 0) { - delete (*i); - } - } - repository.clear(); -#endif - } - - static double (*getComparator())(const void*, const void*, size_t); - - - protected: - void - addBKNNGEdge(ObjectID target, ObjectID addID, Distance addDistance) { - if (repository.isEmpty(target)) { - ObjectDistances objs; - objs.push_back(ObjectDistance(addID, addDistance)); - repository.insert(target, objs); - return; - } - addEdge(target, addID, addDistance, false); - } - - public: - void addEdge(GraphNode &node, ObjectID addID, Distance addDistance, bool identityCheck = true) { - ObjectDistance obj(addID, addDistance); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - GraphNode::iterator ni = std::lower_bound(node.begin(repository.allocator), node.end(repository.allocator), obj); - if ((ni != node.end(repository.allocator)) && ((*ni).id == addID)) { - if (identityCheck) { - std::stringstream msg; - msg << "NGT::addEdge: already existed! " << (*ni).id << ":" << addID; - NGTThrowException(msg); - } - return; - } -#else - GraphNode::iterator ni = std::lower_bound(node.begin(), node.end(), obj); - if ((ni != node.end()) && ((*ni).id == addID)) { - if (identityCheck) { - std::stringstream msg; - msg << "NGT::addEdge: already existed! " << (*ni).id << ":" << addID; - NGTThrowException(msg); - } - return; - } -#endif -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - node.insert(ni, obj, repository.allocator); -#else - node.insert(ni, obj); -#endif - } - - // identityCheck is checking whether the same edge has already added to the node. - // return whether truncation is needed that means the node has too many edges. - bool addEdge(ObjectID target, ObjectID addID, Distance addDistance, bool identityCheck = true) { - size_t minsize = 0; - GraphNode &node = property.truncationThreshold == 0 ? *getNode(target) : *getNode(target, minsize); - addEdge(node, addID, addDistance, identityCheck); - if ((size_t)property.truncationThreshold != 0 && node.size() - minsize > - (size_t)property.truncationThreshold) { - return true; - } - return false; - } - - void addEdgeDeletingExcessEdges(ObjectID target, ObjectID addID, Distance addDistance, bool identityCheck = true) { - GraphNode &node = *getNode(target); - size_t kEdge = property.edgeSizeForCreation - 1; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - if (node.size() > kEdge && node.at(kEdge, repository.allocator).distance >= addDistance) { - GraphNode &linkedNode = *getNode(node.at(kEdge, repository.allocator).id); - ObjectDistance linkedNodeEdge(target, node.at(kEdge, repository.allocator).distance); - if ((linkedNode.size() > kEdge) && node.at(kEdge, repository.allocator).distance >= - linkedNode.at(kEdge, repository.allocator).distance) { -#else - if (node.size() > kEdge && node[kEdge].distance >= addDistance) { - GraphNode &linkedNode = *getNode(node[kEdge].id); - ObjectDistance linkedNodeEdge(target, node[kEdge].distance); - if ((linkedNode.size() > kEdge) && node[kEdge].distance >= linkedNode[kEdge].distance) { -#endif - try { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - removeEdge(node, node.at(kEdge, repository.allocator)); -#else - removeEdge(node, node[kEdge]); -#endif - } catch (Exception &exp) { - std::stringstream msg; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - msg << "addEdge: Cannot remove. (a) " << target << "," << addID << "," << node.at(kEdge, repository.allocator).id << "," << node.at(kEdge, repository.allocator).distance; -#else - msg << "addEdge: Cannot remove. (a) " << target << "," << addID << "," << node[kEdge].id << "," << node[kEdge].distance; -#endif - msg << ":" << exp.what(); - NGTThrowException(msg.str()); - } - try { - removeEdge(linkedNode, linkedNodeEdge); - } catch (Exception &exp) { - std::stringstream msg; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - msg << "addEdge: Cannot remove. (b) " << target << "," << addID << "," << node.at(kEdge, repository.allocator).id << "," << node.at(kEdge, repository.allocator).distance; -#else - msg << "addEdge: Cannot remove. (b) " << target << "," << addID << "," << node[kEdge].id << "," << node[kEdge].distance; -#endif - msg << ":" << exp.what(); - NGTThrowException(msg.str()); - } - } - } - addEdge(node, addID, addDistance, identityCheck); - } - - -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - void loadSearchGraph(const std::string &database) { - std::ifstream isg(database + "/grp"); - NeighborhoodGraph::searchRepository.deserialize(isg, NeighborhoodGraph::getObjectRepository()); - } -#endif - - public: - - virtual int64_t memSize() { return repository.memSize() + searchRepository.memSize() + property.memSize() + objectSpace->memSize(); } - GraphRepository repository; - ObjectSpace *objectSpace; - -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - SearchGraphRepository searchRepository; -#endif - - NeighborhoodGraph::Property property; - - }; // NeighborhoodGraph - - } // NGT - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/GraphOptimizer.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/GraphOptimizer.h deleted file mode 100644 index 4d6d285600..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/GraphOptimizer.h +++ /dev/null @@ -1,823 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "GraphReconstructor.h" -#include "Optimizer.h" - -namespace NGT { - class GraphOptimizer { - public: - class ANNGEdgeOptimizationParameter { - public: - ANNGEdgeOptimizationParameter() { - initialize(); - } - void initialize() { - noOfQueries = 200; - noOfResults = 50; - noOfThreads = 16; - targetAccuracy = 0.9; // when epsilon is 0.0 and all of the edges are used - targetNoOfObjects = 0; - noOfSampleObjects = 100000; - maxNoOfEdges = 100; - } - size_t noOfQueries; - size_t noOfResults; - size_t noOfThreads; - float targetAccuracy; - size_t targetNoOfObjects; - size_t noOfSampleObjects; - size_t maxNoOfEdges; - }; - - GraphOptimizer(bool unlog = false) { - init(); - logDisabled = unlog; - } - - GraphOptimizer(int outgoing, int incoming, int nofqs, int nofrs, - float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m, - bool unlog // stderr log is disabled. - ) { - init(); - set(outgoing, incoming, nofqs, nofrs, baseAccuracyFrom, baseAccuracyTo, - rateAccuracyFrom, rateAccuracyTo, gte, m); - logDisabled = unlog; - } - - void init() { - numOfOutgoingEdges = 10; - numOfIncomingEdges= 120; - numOfQueries = 100; - numOfResults = 20; - baseAccuracyRange = std::pair(0.30, 0.50); - rateAccuracyRange = std::pair(0.80, 0.90); - gtEpsilon = 0.1; - margin = 0.2; - logDisabled = false; - shortcutReduction = true; - searchParameterOptimization = true; - prefetchParameterOptimization = true; - accuracyTableGeneration = true; - } - - void adjustSearchCoefficients(const std::string indexPath){ - NGT::Index index(indexPath); - NGT::GraphIndex &graph = static_cast(index.getIndex()); - NGT::Optimizer optimizer(index); - if (logDisabled) { - optimizer.disableLog(); - } else { - optimizer.enableLog(); - } - try { - auto coefficients = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, numOfQueries, gtEpsilon, margin); - NGT::NeighborhoodGraph::Property &prop = graph.getGraphProperty(); - prop.dynamicEdgeSizeBase = coefficients.first; - prop.dynamicEdgeSizeRate = coefficients.second; - prop.edgeSizeForSearch = -2; - } catch(NGT::Exception &err) { - std::stringstream msg; - msg << "Optimizer::adjustSearchCoefficients: Cannot adjust the search coefficients. " << err.what(); - NGTThrowException(msg); - } - graph.saveIndex(indexPath); - } - - static double measureQueryTime(NGT::Index &index, size_t start) { - NGT::ObjectSpace &objectSpace = index.getObjectSpace(); - NGT::ObjectRepository &objectRepository = objectSpace.getRepository(); - size_t nQueries = 200; - nQueries = objectRepository.size() - 1 < nQueries ? objectRepository.size() - 1 : nQueries; - - size_t step = objectRepository.size() / nQueries; - assert(step != 0); - std::vector ids; - for (size_t startID = start; startID < step; startID++) { - for (size_t id = startID; id < objectRepository.size(); id += step) { - if (!objectRepository.isEmpty(id)) { - ids.push_back(id); - } - } - if (ids.size() >= nQueries) { - ids.resize(nQueries); - break; - } - } - if (nQueries > ids.size()) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("# of Queries is not enough."); -// std::cerr << "# of Queries is not enough." << std::endl; - return DBL_MAX; - } - - NGT::Timer timer; - timer.reset(); - for (auto id = ids.begin(); id != ids.end(); id++) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - NGT::Object *obj = objectSpace.allocateObject(*objectRepository.get(*id)); - NGT::SearchContainer searchContainer(*obj); -#else - NGT::SearchContainer searchContainer(*objectRepository.get(*id)); -#endif - NGT::ObjectDistances objects; - searchContainer.setResults(&objects); - searchContainer.setSize(10); - searchContainer.setEpsilon(0.1); - timer.restart(); - index.search(searchContainer); - timer.stop(); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - objectSpace.deleteObject(obj); -#endif - } - return timer.time * 1000.0; - } - - static std::pair searchMinimumQueryTime(NGT::Index &index, size_t prefetchOffset, - int maxPrefetchSize, size_t seedID) { - NGT::ObjectSpace &objectSpace = index.getObjectSpace(); - int step = 256; - int prevPrefetchSize = 64; - size_t minPrefetchSize = 0; - double minTime = DBL_MAX; - for (step = 256; step != 32; step /= 2) { - double prevTime = DBL_MAX; - for (int prefetchSize = prevPrefetchSize - step < 64 ? 64 : prevPrefetchSize - step; prefetchSize <= maxPrefetchSize; prefetchSize += step) { - objectSpace.setPrefetchOffset(prefetchOffset); - objectSpace.setPrefetchSize(prefetchSize); - double time = measureQueryTime(index, seedID); - if (prevTime < time) { - break; - } - prevTime = time; - prevPrefetchSize = prefetchSize; - } - if (minTime > prevTime) { - minTime = prevTime; - minPrefetchSize = prevPrefetchSize; - } - } - return std::make_pair(minPrefetchSize, minTime); - } - - static std::pair adjustPrefetchParameters(NGT::Index &index) { - - bool gridSearch = false; - { - double time = measureQueryTime(index, 1); - if (time < 500.0) { - gridSearch = true; - } - } - - size_t prefetchOffset = 0; - size_t prefetchSize = 0; - std::vector> mins; - NGT::ObjectSpace &objectSpace = index.getObjectSpace(); - int maxSize = objectSpace.getByteSizeOfObject() * 4; - maxSize = maxSize < 64 * 28 ? maxSize : 64 * 28; - for (int trial = 0; trial < 10; trial++) { - size_t minps = 0; - size_t minpo = 0; - if (gridSearch) { - double minTime = DBL_MAX; - for (size_t po = 1; po <= 10; po++) { - auto min = searchMinimumQueryTime(index, po, maxSize, trial + 1); - if (minTime > min.second) { - minTime = min.second; - minps = min.first; - minpo = po; - } - } - } else { - double prevTime = DBL_MAX; - for (size_t po = 1; po <= 10; po++) { - auto min = searchMinimumQueryTime(index, po, maxSize, trial + 1); - if (prevTime < min.second) { - break; - } - prevTime = min.second; - minps = min.first; - minpo = po; - } - } - if (std::find(mins.begin(), mins.end(), std::make_pair(minpo, minps)) != mins.end()) { - prefetchOffset = minpo; - prefetchSize = minps; - mins.push_back(std::make_pair(minpo, minps)); - break; - } - mins.push_back(std::make_pair(minpo, minps)); - } - return std::make_pair(prefetchOffset, prefetchSize); - } - - void execute(NGT::Index & index_) - { - NGT::GraphIndex & graphIndex = static_cast(index_.getIndex()); - if (numOfOutgoingEdges > 0 || numOfIncomingEdges > 0) - { - if (!logDisabled) - { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: adjusting outgoing and incoming edges..."); -// std::cerr << "GraphOptimizer: adjusting outgoing and incoming edges..." << std::endl; - } - NGT::Timer timer; - timer.start(); - std::vector graph; - try - { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Optimizer::execute: Extract the graph data."); -// std::cerr << "Optimizer::execute: Extract the graph data." << std::endl; - // extract only edges from the index to reduce the memory usage. - NGT::GraphReconstructor::extractGraph(graph, graphIndex); - NeighborhoodGraph::Property & prop = graphIndex.getGraphProperty(); - if (prop.graphType != NGT::NeighborhoodGraph::GraphTypeANNG) - { - NGT::GraphReconstructor::convertToANNG(graph); - } - NGT::GraphReconstructor::reconstructGraph(graph, graphIndex, numOfOutgoingEdges, numOfIncomingEdges); - timer.stop(); - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Optimizer::execute: Graph reconstruction time=" + std::to_string(timer.time) + " (sec) "); -// std::cerr << "Optimizer::execute: Graph reconstruction time=" << timer.time << " (sec) " << std::endl; - prop.graphType = NGT::NeighborhoodGraph::GraphTypeONNG; - } - catch (NGT::Exception & err) - { - throw(err); - } - } - - if (shortcutReduction) - { - if (!logDisabled) - { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: redusing shortcut edges..."); -// std::cerr << "GraphOptimizer: redusing shortcut edges..." << std::endl; - } - try - { - NGT::Timer timer; - timer.start(); - NGT::GraphReconstructor::adjustPathsEffectively(graphIndex); - timer.stop(); - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Optimizer::execute: Path adjustment time=" + std::to_string(timer.time) + " (sec) "); -// std::cerr << "Optimizer::execute: Path adjustment time=" << timer.time << " (sec) " << std::endl; - } - catch (NGT::Exception & err) - { - throw(err); - } - } - } - - void optimizeSearchParameters(NGT::Index & outIndex) - { - if (searchParameterOptimization) - { - if (!logDisabled) - { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: optimizing search parameters..."); -// std::cerr << "GraphOptimizer: optimizing search parameters..." << std::endl; - } - NGT::GraphIndex & outGraph = static_cast(outIndex.getIndex()); - NGT::Optimizer optimizer(outIndex); - if (logDisabled) - { - optimizer.disableLog(); - } - else - { - optimizer.enableLog(); - } - try - { - auto coefficients = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, numOfQueries, gtEpsilon, margin); - NGT::NeighborhoodGraph::Property & prop = outGraph.getGraphProperty(); - prop.dynamicEdgeSizeBase = coefficients.first; - prop.dynamicEdgeSizeRate = coefficients.second; - prop.edgeSizeForSearch = -2; - } - catch (NGT::Exception & err) - { - std::stringstream msg; - msg << "Optimizer::execute: Cannot adjust the search coefficients. " << err.what(); - NGTThrowException(msg); - } - } - - if (searchParameterOptimization || prefetchParameterOptimization || accuracyTableGeneration) - { - // NGT::GraphIndex & outGraph = static_cast(*outIndex.getIndex()); - if (prefetchParameterOptimization) - { - if (!logDisabled) - { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: optimizing prefetch parameters..."); -// std::cerr << "GraphOptimizer: optimizing prefetch parameters..." << std::endl; - } - try - { - auto prefetch = adjustPrefetchParameters(outIndex); - NGT::Property prop; - outIndex.getProperty(prop); - prop.prefetchOffset = prefetch.first; - prop.prefetchSize = prefetch.second; - outIndex.setProperty(prop); - } - catch (NGT::Exception & err) - { - std::stringstream msg; - msg << "Optimizer::execute: Cannot adjust prefetch parameters. " << err.what(); - NGTThrowException(msg); - } - } - if (accuracyTableGeneration) - { - if (!logDisabled) - { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: generating the accuracy table..."); -// std::cerr << "GraphOptimizer: generating the accuracy table..." << std::endl; - } - try - { - auto table = NGT::Optimizer::generateAccuracyTable(outIndex, numOfResults, numOfQueries); - NGT::Index::AccuracyTable accuracyTable(table); - NGT::Property prop; - outIndex.getProperty(prop); - prop.accuracyTable = accuracyTable.getString(); - outIndex.setProperty(prop); - } - catch (NGT::Exception & err) - { - std::stringstream msg; - msg << "Optimizer::execute: Cannot generate the accuracy table. " << err.what(); - NGTThrowException(msg); - } - } - } - } - - void execute( - const std::string inIndexPath, - const std::string outIndexPath - ){ - if (access(outIndexPath.c_str(), 0) == 0) { - std::stringstream msg; - msg << "Optimizer::execute: The specified index exists. " << outIndexPath; - NGTThrowException(msg); - } - - const std::string com = "cp -r " + inIndexPath + " " + outIndexPath; - int stat = system(com.c_str()); - if (stat != 0) { - std::stringstream msg; - msg << "Optimizer::execute: Cannot create the specified index. " << outIndexPath; - NGTThrowException(msg); - } - - { - NGT::StdOstreamRedirector redirector(logDisabled); - NGT::GraphIndex graphIndex(outIndexPath, false); - if (numOfOutgoingEdges > 0 || numOfIncomingEdges > 0) { - if (!logDisabled) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: adjusting outgoing and incoming edges..."); -// std::cerr << "GraphOptimizer: adjusting outgoing and incoming edges..." << std::endl; - } - redirector.begin(); - NGT::Timer timer; - timer.start(); - std::vector graph; - try { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Optimizer::execute: Extract the graph data."); -// std::cerr << "Optimizer::execute: Extract the graph data." << std::endl; - // extract only edges from the index to reduce the memory usage. - NGT::GraphReconstructor::extractGraph(graph, graphIndex); - NeighborhoodGraph::Property &prop = graphIndex.getGraphProperty(); - if (prop.graphType != NGT::NeighborhoodGraph::GraphTypeANNG) { - NGT::GraphReconstructor::convertToANNG(graph); - } - NGT::GraphReconstructor::reconstructGraph(graph, graphIndex, numOfOutgoingEdges, numOfIncomingEdges); - timer.stop(); - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Optimizer::execute: Graph reconstruction time=" + std::to_string(timer.time) + " (sec) "); -// std::cerr << "Optimizer::execute: Graph reconstruction time=" << timer.time << " (sec) " << std::endl; - graphIndex.saveGraph(outIndexPath); - prop.graphType = NGT::NeighborhoodGraph::GraphTypeONNG; - graphIndex.saveProperty(outIndexPath); - } catch (NGT::Exception &err) { - redirector.end(); - throw(err); - } - } - - if (shortcutReduction) { - if (!logDisabled) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: redusing shortcut edges..."); -// std::cerr << "GraphOptimizer: redusing shortcut edges..." << std::endl; - } - try { - NGT::Timer timer; - timer.start(); - NGT::GraphReconstructor::adjustPathsEffectively(graphIndex); - timer.stop(); - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("optimizer::execute: path adjustment time=" + std::to_string(timer.time) + " (sec) "); -// std::cerr << "optimizer::execute: path adjustment time=" << timer.time << " (sec) " << std::endl; - graphIndex.saveGraph(outIndexPath); - } catch (NGT::Exception &err) { - redirector.end(); - throw(err); - } - } - redirector.end(); - } - - optimizeSearchParameters(outIndexPath); - - } - - void optimizeSearchParameters(const std::string outIndexPath) - { - - if (searchParameterOptimization) { - if (!logDisabled) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: optimizing search parameters..."); -// std::cerr << "GraphOptimizer: optimizing search parameters..." << std::endl; - } - NGT::Index outIndex(outIndexPath); - NGT::GraphIndex &outGraph = static_cast(outIndex.getIndex()); - NGT::Optimizer optimizer(outIndex); - if (logDisabled) { - optimizer.disableLog(); - } else { - optimizer.enableLog(); - } - try { - auto coefficients = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, numOfQueries, gtEpsilon, margin); - NGT::NeighborhoodGraph::Property &prop = outGraph.getGraphProperty(); - prop.dynamicEdgeSizeBase = coefficients.first; - prop.dynamicEdgeSizeRate = coefficients.second; - prop.edgeSizeForSearch = -2; - outGraph.saveProperty(outIndexPath); - } catch(NGT::Exception &err) { - std::stringstream msg; - msg << "Optimizer::execute: Cannot adjust the search coefficients. " << err.what(); - NGTThrowException(msg); - } - } - - if (searchParameterOptimization || prefetchParameterOptimization || accuracyTableGeneration) { - NGT::StdOstreamRedirector redirector(logDisabled); - redirector.begin(); - NGT::Index outIndex(outIndexPath, true); - NGT::GraphIndex &outGraph = static_cast(outIndex.getIndex()); - if (prefetchParameterOptimization) { - if (!logDisabled) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: optimizing prefetch parameters..."); -// std::cerr << "GraphOptimizer: optimizing prefetch parameters..." << std::endl; - } - try { - auto prefetch = adjustPrefetchParameters(outIndex); - NGT::Property prop; - outIndex.getProperty(prop); - prop.prefetchOffset = prefetch.first; - prop.prefetchSize = prefetch.second; - outIndex.setProperty(prop); - outGraph.saveProperty(outIndexPath); - } catch(NGT::Exception &err) { - redirector.end(); - std::stringstream msg; - msg << "Optimizer::execute: Cannot adjust prefetch parameters. " << err.what(); - NGTThrowException(msg); - } - } - if (accuracyTableGeneration) { - if (!logDisabled) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphOptimizer: generating the accuracy table..."); -// std::cerr << "GraphOptimizer: generating the accuracy table..." << std::endl; - } - try { - auto table = NGT::Optimizer::generateAccuracyTable(outIndex, numOfResults, numOfQueries); - NGT::Index::AccuracyTable accuracyTable(table); - NGT::Property prop; - outIndex.getProperty(prop); - prop.accuracyTable = accuracyTable.getString(); - outIndex.setProperty(prop); - } catch(NGT::Exception &err) { - redirector.end(); - std::stringstream msg; - msg << "Optimizer::execute: Cannot generate the accuracy table. " << err.what(); - NGTThrowException(msg); - } - } - try { - outGraph.saveProperty(outIndexPath); - redirector.end(); - } catch(NGT::Exception &err) { - redirector.end(); - std::stringstream msg; - msg << "Optimizer::execute: Cannot save the index. " << outIndexPath << err.what(); - NGTThrowException(msg); - } - - } - } - - static std::tuple // optimized # of edges, accuracy, accuracy gain per edge - optimizeNumberOfEdgesForANNG(NGT::Optimizer &optimizer, std::vector> &queries, - size_t nOfResults, float targetAccuracy, size_t maxNoOfEdges) { - - NGT::Index &index = optimizer.index; - std::stringstream queryStream; - std::stringstream gtStream; - float maxEpsilon = 0.0; - - optimizer.generatePseudoGroundTruth(queries, maxEpsilon, queryStream, gtStream); - - size_t nOfEdges = 0; - double accuracy = 0.0; - size_t prevEdge = 0; - double prevAccuracy = 0.0; - double gain = 0.0; - { - std::vector graph; - NGT::GraphReconstructor::extractGraph(graph, static_cast(index.getIndex())); - float epsilon = 0.0; - for (size_t edgeSize = 5; edgeSize <= maxNoOfEdges; edgeSize += (edgeSize >= 10 ? 10 : 5) ) { - NGT::GraphReconstructor::reconstructANNGFromANNG(graph, index, edgeSize); - NGT::Command::SearchParameter searchParameter; - searchParameter.size = nOfResults; - searchParameter.outputMode = 'e'; - searchParameter.edgeSize = 0; - searchParameter.beginOfEpsilon = searchParameter.endOfEpsilon = epsilon; - queryStream.clear(); - queryStream.seekg(0, std::ios_base::beg); - std::vector acc; - NGT::Optimizer::search(index, queryStream, gtStream, searchParameter, acc); - if (acc.size() == 0) { - NGTThrowException("Fatal error! Cannot get any accuracy value."); - } - accuracy = acc[0].meanAccuracy; - nOfEdges = edgeSize; - if (prevEdge != 0) { - gain = (accuracy - prevAccuracy) / (edgeSize - prevEdge); - } - if (accuracy >= targetAccuracy) { - break; - } - prevEdge = edgeSize; - prevAccuracy = accuracy; - } - } - return std::make_tuple(nOfEdges, accuracy, gain); - } - - static std::pair - optimizeNumberOfEdgesForANNG(NGT::Index &index, ANNGEdgeOptimizationParameter ¶meter) - { - if (parameter.targetNoOfObjects == 0) { - parameter.targetNoOfObjects = index.getObjectRepositorySize(); - } - - NGT::Optimizer optimizer(index, parameter.noOfResults); - - NGT::ObjectRepository &objectRepository = index.getObjectSpace().getRepository(); - NGT::GraphIndex &graphIndex = static_cast(index.getIndex()); - NGT::GraphAndTreeIndex &treeIndex = static_cast(index.getIndex()); - NGT::GraphRepository &graphRepository = graphIndex.NeighborhoodGraph::repository; - //float targetAccuracy = parameter.targetAccuracy + FLT_EPSILON; - - std::vector> queries; - optimizer.extractAndRemoveRandomQueries(parameter.noOfQueries, queries); - { - graphRepository.deleteAll(); - treeIndex.DVPTree::deleteAll(); - treeIndex.DVPTree::insertNode(treeIndex.DVPTree::leafNodes.allocate()); - } - - NGT::NeighborhoodGraph::Property &prop = graphIndex.getGraphProperty(); - prop.edgeSizeForCreation = parameter.maxNoOfEdges; - std::vector>> transition; - size_t targetNo = 12500; - for (;targetNo <= objectRepository.size() && targetNo <= parameter.noOfSampleObjects; - targetNo *= 2) { - ObjectID id = 0; - size_t noOfObjects = 0; - for (id = 1; id < objectRepository.size(); ++id) { - if (!objectRepository.isEmpty(id)) { - noOfObjects++; - } - if (noOfObjects >= targetNo) { - break; - } - } - id++; - index.createIndex(parameter.noOfThreads, id); - auto edge = NGT::GraphOptimizer::optimizeNumberOfEdgesForANNG(optimizer, queries, parameter.noOfResults, parameter.targetAccuracy, parameter.maxNoOfEdges); - transition.push_back(make_pair(noOfObjects, edge)); - } - if (transition.size() < 2) { - std::stringstream msg; - msg << "Optimizer::optimizeNumberOfEdgesForANNG: Cannot optimize the number of edges. Too small object set. # of objects=" << objectRepository.size() << " target No.=" << targetNo; - NGTThrowException(msg); - } - double edgeRate = 0.0; - double accuracyRate = 0.0; - for (auto i = transition.begin(); i != transition.end() - 1; ++i) { - edgeRate += std::get<0>((*(i + 1)).second) - std::get<0>((*i).second); - accuracyRate += std::get<1>((*(i + 1)).second) - std::get<1>((*i).second); - } - edgeRate /= (transition.size() - 1); - accuracyRate /= (transition.size() - 1); - size_t estimatedEdge = std::get<0>(transition[0].second) + - edgeRate * (log2(parameter.targetNoOfObjects) - log2(transition[0].first)); - float estimatedAccuracy = std::get<1>(transition[0].second) + - accuracyRate * (log2(parameter.targetNoOfObjects) - log2(transition[0].first)); - if (estimatedAccuracy < parameter.targetAccuracy) { - estimatedEdge += (parameter.targetAccuracy - estimatedAccuracy) / std::get<2>(transition.back().second); - estimatedAccuracy = parameter.targetAccuracy; - } - - if (estimatedEdge == 0) { - std::stringstream msg; - msg << "Optimizer::optimizeNumberOfEdgesForANNG: Cannot optimize the number of edges. " - << estimatedEdge << ":" << estimatedAccuracy << " # of objects=" << objectRepository.size(); - NGTThrowException(msg); - } - - return std::make_pair(estimatedEdge, estimatedAccuracy); - } - - std::pair - optimizeNumberOfEdgesForANNG(const std::string indexPath, GraphOptimizer::ANNGEdgeOptimizationParameter ¶meter) { - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - NGTThrowException("Not implemented for NGT with the shared memory option."); -#endif - - NGT::StdOstreamRedirector redirector(logDisabled); - redirector.begin(); - - try { - NGT::Index index(indexPath, false); - - auto optimizedEdge = NGT::GraphOptimizer::optimizeNumberOfEdgesForANNG(index, parameter); - - - NGT::GraphIndex &graph = static_cast(index.getIndex()); - size_t noOfEdges = (optimizedEdge.first + 10) / 5 * 5; - if (noOfEdges > parameter.maxNoOfEdges) { - noOfEdges = parameter.maxNoOfEdges; - } - - NGT::NeighborhoodGraph::Property &prop = graph.getGraphProperty(); - prop.edgeSizeForCreation = noOfEdges; - static_cast(index.getIndex()).saveProperty(indexPath); - optimizedEdge.first = noOfEdges; - redirector.end(); - return optimizedEdge; - } catch (NGT::Exception &err) { - redirector.end(); - throw(err); - } - } - - void set(int outgoing, int incoming, int nofqs, int nofrs, - float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m - ) { - set(outgoing, incoming, nofqs, nofrs); - setExtension(baseAccuracyFrom, baseAccuracyTo, rateAccuracyFrom, rateAccuracyTo, gte, m); - } - - void set(int outgoing, int incoming, int nofqs, int nofrs) { - if (outgoing >= 0) { - numOfOutgoingEdges = outgoing; - } - if (incoming >= 0) { - numOfIncomingEdges = incoming; - } - if (nofqs > 0) { - numOfQueries = nofqs; - } - if (nofrs > 0) { - numOfResults = nofrs; - } - } - - void setExtension(float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m - ) { - if (baseAccuracyFrom > 0.0) { - baseAccuracyRange.first = baseAccuracyFrom; - } - if (baseAccuracyTo > 0.0) { - baseAccuracyRange.second = baseAccuracyTo; - } - if (rateAccuracyFrom > 0.0) { - rateAccuracyRange.first = rateAccuracyFrom; - } - if (rateAccuracyTo > 0.0) { - rateAccuracyRange.second = rateAccuracyTo; - } - if (gte >= -1.0) { - gtEpsilon = gte; - } - if (m > 0.0) { - margin = m; - } - } - - // obsolete because of a lack of a parameter - void set(int outgoing, int incoming, int nofqs, - float baseAccuracyFrom, float baseAccuracyTo, - float rateAccuracyFrom, float rateAccuracyTo, - double gte, double m - ) { - if (outgoing >= 0) { - numOfOutgoingEdges = outgoing; - } - if (incoming >= 0) { - numOfIncomingEdges = incoming; - } - if (nofqs > 0) { - numOfQueries = nofqs; - } - if (baseAccuracyFrom > 0.0) { - baseAccuracyRange.first = baseAccuracyFrom; - } - if (baseAccuracyTo > 0.0) { - baseAccuracyRange.second = baseAccuracyTo; - } - if (rateAccuracyFrom > 0.0) { - rateAccuracyRange.first = rateAccuracyFrom; - } - if (rateAccuracyTo > 0.0) { - rateAccuracyRange.second = rateAccuracyTo; - } - if (gte >= -1.0) { - gtEpsilon = gte; - } - if (m > 0.0) { - margin = m; - } - } - - void setProcessingModes(bool shortcut = true, bool searchParameter = true, bool prefetchParameter = true, - bool accuracyTable = true) { - shortcutReduction = shortcut; - searchParameterOptimization = searchParameter; - prefetchParameterOptimization = prefetchParameter; - accuracyTableGeneration = accuracyTable; - } - - size_t numOfOutgoingEdges; - size_t numOfIncomingEdges; - std::pair baseAccuracyRange; - std::pair rateAccuracyRange; - size_t numOfQueries; - size_t numOfResults; - double gtEpsilon; - double margin; - bool logDisabled; - bool shortcutReduction; - bool searchParameterOptimization; - bool prefetchParameterOptimization; - bool accuracyTableGeneration; - }; - -}; // NGT - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/GraphReconstructor.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/GraphReconstructor.h deleted file mode 100644 index 167d7f98b1..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/GraphReconstructor.h +++ /dev/null @@ -1,988 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include -#include -#include "defines.h" - -#ifdef _OPENMP -#include -#else -#warning "*** OMP is *NOT* available! ***" -#endif - -namespace NGT { - -class GraphReconstructor { - public: - static void extractGraph(std::vector &graph, NGT::GraphIndex &graphIndex) { - graph.reserve(graphIndex.repository.size()); - for (size_t id = 1; id < graphIndex.repository.size(); id++) { - if (id % 1000000 == 0) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor::extractGraph: Processed " + std::to_string(id) + " objects."); -// std::cerr << "GraphReconstructor::extractGraph: Processed " << id << " objects." << std::endl; - } - try { - NGT::GraphNode &node = *graphIndex.getNode(id); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::ObjectDistances nd; - nd.reserve(node.size()); - for (auto n = node.begin(graphIndex.repository.allocator); n != node.end(graphIndex.repository.allocator); ++n) { - nd.push_back(ObjectDistance((*n).id, (*n).distance)); - } - graph.push_back(nd); -#else - graph.push_back(node); -#endif - if (graph.back().size() != graph.back().capacity()) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor::extractGraph: Warning! The graph size must be the same as the capacity. " + std::to_string(id)); -// std::cerr << "GraphReconstructor::extractGraph: Warning! The graph size must be the same as the capacity. " << id << std::endl; - } - } catch(NGT::Exception &err) { - graph.push_back(NGT::ObjectDistances()); - continue; - } - } - - } - - - - - static void - adjustPaths(NGT::Index &outIndex) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("construct index is not implemented."); -// std::cerr << "construct index is not implemented." << std::endl; - exit(1); -#else - NGT::GraphIndex &outGraph = dynamic_cast(outIndex.getIndex()); - size_t rStartRank = 0; - std::list > tmpGraph; - for (size_t id = 1; id < outGraph.repository.size(); id++) { - NGT::GraphNode &node = *outGraph.getNode(id); - tmpGraph.push_back(std::pair(id, node)); - if (node.size() > rStartRank) { - node.resize(rStartRank); - } - } - size_t removeCount = 0; - for (size_t rank = rStartRank; ; rank++) { - bool edge = false; - Timer timer; - for (auto it = tmpGraph.begin(); it != tmpGraph.end();) { - size_t id = (*it).first; - try { - NGT::GraphNode &node = (*it).second; - if (rank >= node.size()) { - it = tmpGraph.erase(it); - continue; - } - edge = true; - if (rank >= 1 && node[rank - 1].distance > node[rank].distance) { -// std::cerr << "distance order is wrong!" << std::endl; -// std::cerr << id << ":" << rank << ":" << node[rank - 1].id << ":" << node[rank].id << std::endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("distance order is wrong!"); - (*NGT_LOG_DEBUG_)(std::to_string(id) + ":" + std::to_string(rank) + ":" + std::to_string(node[rank - 1].id) + ":" + std::to_string(node[rank].id)); - } - } - NGT::GraphNode &tn = *outGraph.getNode(id); - volatile bool found = false; - if (rank < 1000) { - for (size_t tni = 0; tni < tn.size() && !found; tni++) { - if (tn[tni].id == node[rank].id) { - continue; - } - NGT::GraphNode &dstNode = *outGraph.getNode(tn[tni].id); - for (size_t dni = 0; dni < dstNode.size(); dni++) { - if ((dstNode[dni].id == node[rank].id) && (dstNode[dni].distance < node[rank].distance)) { - found = true; - break; - } - } - } - } else { -#ifdef _OPENMP -#pragma omp parallel for num_threads(10) -#endif - for (size_t tni = 0; tni < tn.size(); tni++) { - if (found) { - continue; - } - if (tn[tni].id == node[rank].id) { - continue; - } - NGT::GraphNode &dstNode = *outGraph.getNode(tn[tni].id); - for (size_t dni = 0; dni < dstNode.size(); dni++) { - if ((dstNode[dni].id == node[rank].id) && (dstNode[dni].distance < node[rank].distance)) { - found = true; - } - } - } - } - if (!found) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - outGraph.addEdge(id, node.at(i, outGraph.repository.allocator).id, - node.at(i, outGraph.repository.allocator).distance, true); -#else - tn.push_back(NGT::ObjectDistance(node[rank].id, node[rank].distance)); -#endif - } else { - removeCount++; - } - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - it++; - continue; - } - it++; - } - if (edge == false) { - break; - } - } -#endif // NGT_SHARED_MEMORY_ALLOCATOR - } - - static void - adjustPathsEffectively(NGT::Index &outIndex) - { - NGT::GraphIndex &outGraph = dynamic_cast(outIndex.getIndex()); - adjustPathsEffectively(outGraph); - } - - static bool edgeComp(NGT::ObjectDistance a, NGT::ObjectDistance b) { - return a.id < b.id; - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - static void insert(NGT::GraphNode &node, size_t edgeID, NGT::Distance edgeDistance, NGT::GraphIndex &graph) { - NGT::ObjectDistance edge(edgeID, edgeDistance); - GraphNode::iterator ni = std::lower_bound(node.begin(graph.repository.allocator), node.end(graph.repository.allocator), edge, edgeComp); - node.insert(ni, edge, graph.repository.allocator); - } - - static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID) - { - NGT::GraphNode &srcNode = *graph.getNode(srcNodeID); - GraphNode::iterator ni = std::lower_bound(srcNode.begin(graph.repository.allocator), srcNode.end(graph.repository.allocator), ObjectDistance(dstNodeID, 0.0), edgeComp); - return (ni != srcNode.end(graph.repository.allocator)) && ((*ni).id == dstNodeID); - } -#else - static void insert(NGT::GraphNode &node, size_t edgeID, NGT::Distance edgeDistance) { - NGT::ObjectDistance edge(edgeID, edgeDistance); - GraphNode::iterator ni = std::lower_bound(node.begin(), node.end(), edge, edgeComp); - node.insert(ni, edge); - } - - static bool hasEdge(NGT::GraphIndex &graph, size_t srcNodeID, size_t dstNodeID) - { - NGT::GraphNode &srcNode = *graph.getNode(srcNodeID); - GraphNode::iterator ni = std::lower_bound(srcNode.begin(), srcNode.end(), ObjectDistance(dstNodeID, 0.0), edgeComp); - return (ni != srcNode.end()) && ((*ni).id == dstNodeID); - } -#endif - - - static void - adjustPathsEffectively(NGT::GraphIndex &outGraph) - { - Timer timer; - timer.start(); - std::vector tmpGraph; - for (size_t id = 1; id < outGraph.repository.size(); id++) { - try { - NGT::GraphNode &node = *outGraph.getNode(id); - tmpGraph.push_back(node); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - node.clear(outGraph.repository.allocator); -#else - node.clear(); -#endif - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - tmpGraph.push_back(NGT::GraphNode(outGraph.repository.allocator)); -#else - tmpGraph.push_back(NGT::GraphNode()); -#endif - } - } - if (outGraph.repository.size() != tmpGraph.size() + 1) { - std::stringstream msg; - msg << "GraphReconstructor: Fatal inner error. " << outGraph.repository.size() << ":" << tmpGraph.size(); - NGTThrowException(msg); - } - timer.stop(); -// std::cerr << "GraphReconstructor::adjustPaths: graph preparing time=" << timer << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor::adjustPaths: graph preparing time=" + std::to_string(timer.time)); - timer.reset(); - timer.start(); - - std::vector > > removeCandidates(tmpGraph.size()); - int removeCandidateCount = 0; -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (size_t idx = 0; idx < tmpGraph.size(); ++idx) { - auto it = tmpGraph.begin() + idx; - size_t id = idx + 1; - try { - NGT::GraphNode &srcNode = *it; - std::unordered_map > neighbors; - for (size_t sni = 0; sni < srcNode.size(); ++sni) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - neighbors[srcNode.at(sni, outGraph.repository.allocator).id] = std::pair(sni, srcNode.at(sni, outGraph.repository.allocator).distance); -#else - neighbors[srcNode[sni].id] = std::pair(sni, srcNode[sni].distance); -#endif - } - - std::vector > > candidates; - for (size_t sni = 0; sni < srcNode.size(); sni++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::GraphNode &pathNode = tmpGraph[srcNode.at(sni, outGraph.repository.allocator).id - 1]; -#else - NGT::GraphNode &pathNode = tmpGraph[srcNode[sni].id - 1]; -#endif - for (size_t pni = 0; pni < pathNode.size(); pni++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - auto dstNodeID = pathNode.at(pni, outGraph.repository.allocator).id; -#else - auto dstNodeID = pathNode[pni].id; -#endif - auto dstNode = neighbors.find(dstNodeID); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (dstNode != neighbors.end() - && srcNode.at(sni, outGraph.repository.allocator).distance < (*dstNode).second.second - && pathNode.at(pni, outGraph.repository.allocator).distance < (*dstNode).second.second - ) { -#else - if (dstNode != neighbors.end() - && srcNode[sni].distance < (*dstNode).second.second - && pathNode[pni].distance < (*dstNode).second.second - ) { -#endif -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - candidates.push_back(std::pair >((*dstNode).second.first, std::pair(srcNode.at(sni, outGraph.repository.allocator).id, dstNodeID))); -#else - candidates.push_back(std::pair >((*dstNode).second.first, std::pair(srcNode[sni].id, dstNodeID))); -#endif - removeCandidateCount++; - } - } - } - sort(candidates.begin(), candidates.end(), std::greater>>()); - removeCandidates[id - 1].reserve(candidates.size()); - for (size_t i = 0; i < candidates.size(); i++) { - removeCandidates[id - 1].push_back(candidates[i].second); - } - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - timer.stop(); -// std::cerr << "GraphReconstructor::adjustPaths: extracting removed edge candidates time=" << timer << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor::adjustPaths: extracting removed edge candidates time=" + std::to_string(timer.time)); - timer.reset(); - timer.start(); - - std::list ids; - for (size_t idx = 0; idx < tmpGraph.size(); ++idx) { - ids.push_back(idx + 1); - } - - int removeCount = 0; - removeCandidateCount = 0; - for (size_t rank = 0; ids.size() != 0; rank++) { - for (auto it = ids.begin(); it != ids.end(); ) { - size_t id = *it; - size_t idx = id - 1; - try { - NGT::GraphNode &srcNode = tmpGraph[idx]; - if (rank >= srcNode.size()) { - if (!removeCandidates[idx].empty()) { -// std::cerr << "Something wrong! ID=" << id << " # of remaining candidates=" << removeCandidates[idx].size() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Something wrong! ID=" + std::to_string(id) + " # of remaining candidates=" + std::to_string(removeCandidates[idx].size())); - abort(); - } -#if !defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::GraphNode empty; - tmpGraph[idx] = empty; -#endif - it = ids.erase(it); - continue; - } - if (removeCandidates[idx].size() > 0) { - removeCandidateCount++; - bool pathExist = false; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - while (!removeCandidates[idx].empty() && (removeCandidates[idx].back().second == srcNode.at(rank, outGraph.repository.allocator).id)) { -#else - while (!removeCandidates[idx].empty() && (removeCandidates[idx].back().second == srcNode[rank].id)) { -#endif - size_t path = removeCandidates[idx].back().first; - size_t dst = removeCandidates[idx].back().second; - removeCandidates[idx].pop_back(); - if (removeCandidates[idx].empty()) { - std::vector> empty; - removeCandidates[idx] = empty; - } - if ((hasEdge(outGraph, id, path)) && (hasEdge(outGraph, path, dst))) { - pathExist = true; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - while (!removeCandidates[idx].empty() && (removeCandidates[idx].back().second == srcNode.at(rank, outGraph.repository.allocator).id)) { -#else - while (!removeCandidates[idx].empty() && (removeCandidates[idx].back().second == srcNode[rank].id)) { -#endif - removeCandidates[idx].pop_back(); - if (removeCandidates[idx].empty()) { - std::vector> empty; - removeCandidates[idx] = empty; - } - } - break; - } - } - if (pathExist) { - removeCount++; - it++; - continue; - } - } - NGT::GraphNode &outSrcNode = *outGraph.getNode(id); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - insert(outSrcNode, srcNode.at(rank, outGraph.repository.allocator).id, srcNode.at(rank, outGraph.repository.allocator).distance, outGraph); -#else - insert(outSrcNode, srcNode[rank].id, srcNode[rank].distance); -#endif - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - it++; - continue; - } - it++; - } - } - for (size_t id = 1; id < outGraph.repository.size(); id++) { - try { - NGT::GraphNode &node = *outGraph.getNode(id); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::sort(node.begin(outGraph.repository.allocator), node.end(outGraph.repository.allocator)); -#else - std::sort(node.begin(), node.end()); -#endif - } catch(...) {} - } - } - - - static - void convertToANNG(std::vector &graph) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("convertToANNG is not implemented for shared memory."); -// std::cerr << "convertToANNG is not implemented for shared memory." << std::endl; - return; -#else -// std::cerr << "convertToANNG begin" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("convertToANNG begin"); - for (size_t idx = 0; idx < graph.size(); idx++) { - NGT::GraphNode &node = graph[idx]; - for (auto ni = node.begin(); ni != node.end(); ++ni) { - graph[(*ni).id - 1].push_back(NGT::ObjectDistance(idx + 1, (*ni).distance)); - } - } - for (size_t idx = 0; idx < graph.size(); idx++) { - NGT::GraphNode &node = graph[idx]; - if (node.size() == 0) { - continue; - } - std::sort(node.begin(), node.end()); - NGT::ObjectID prev = 0; - for (auto it = node.begin(); it != node.end();) { - if (prev == (*it).id) { - it = node.erase(it); - continue; - } - prev = (*it).id; - it++; - } - NGT::GraphNode tmp = node; - node.swap(tmp); - } - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("convertToANNG end"); -// std::cerr << "convertToANNG end" << std::endl; -#endif - } - - static - void reconstructGraph(std::vector &graph, NGT::GraphIndex &outGraph, size_t originalEdgeSize, size_t reverseEdgeSize) - { - if (reverseEdgeSize > 10000) { -// std::cerr << "something wrong. Edge size=" << reverseEdgeSize << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("something wrong. Edge size=" + std::to_string(reverseEdgeSize)); - exit(1); - } - - NGT::Timer originalEdgeTimer, reverseEdgeTimer, normalizeEdgeTimer; - originalEdgeTimer.start(); - - for (size_t id = 1; id < outGraph.repository.size(); id++) { - try { - NGT::GraphNode &node = *outGraph.getNode(id); - if (originalEdgeSize == 0) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - node.clear(outGraph.repository.allocator); -#else - NGT::GraphNode empty; - node.swap(empty); -#endif - } else { - NGT::ObjectDistances n = graph[id - 1]; - if (n.size() < originalEdgeSize) { -// std::cerr << "GraphReconstructor: Warning. The edges are too few. " << n.size() << ":" << originalEdgeSize << " for " << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. The edges are too few. " + std::to_string(n.size()) + ":" + std::to_string(originalEdgeSize) + " for " + std::to_string(id)); - continue; - } - n.resize(originalEdgeSize); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - node.copy(n, outGraph.repository.allocator); -#else - node.swap(n); -#endif - } - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - originalEdgeTimer.stop(); - - reverseEdgeTimer.start(); - int insufficientNodeCount = 0; - for (size_t id = 1; id <= graph.size(); ++id) { - try { - NGT::ObjectDistances &node = graph[id - 1]; - size_t rsize = reverseEdgeSize; - if (rsize > node.size()) { - insufficientNodeCount++; - rsize = node.size(); - } - for (size_t i = 0; i < rsize; ++i) { - NGT::Distance distance = node[i].distance; - size_t nodeID = node[i].id; - try { - NGT::GraphNode &n = *outGraph.getNode(nodeID); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - n.push_back(NGT::ObjectDistance(id, distance), outGraph.repository.allocator); -#else - n.push_back(NGT::ObjectDistance(id, distance)); -#endif - } catch(...) {} - } - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - reverseEdgeTimer.stop(); - if (insufficientNodeCount != 0) { -// std::cerr << "# of the nodes edges of which are in short = " << insufficientNodeCount << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("# of the nodes edges of which are in short = " + std::to_string(insufficientNodeCount)); - } - - normalizeEdgeTimer.start(); - for (size_t id = 1; id < outGraph.repository.size(); id++) { - try { - NGT::GraphNode &n = *outGraph.getNode(id); - if (id % 100000 == 0) { -// std::cerr << "Processed " << id << " nodes" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(id) + " nodes"); - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::sort(n.begin(outGraph.repository.allocator), n.end(outGraph.repository.allocator)); -#else - std::sort(n.begin(), n.end()); -#endif - NGT::ObjectID prev = 0; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (auto it = n.begin(outGraph.repository.allocator); it != n.end(outGraph.repository.allocator);) { -#else - for (auto it = n.begin(); it != n.end();) { -#endif - if (prev == (*it).id) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - it = n.erase(it, outGraph.repository.allocator); -#else - it = n.erase(it); -#endif - continue; - } - prev = (*it).id; - it++; - } -#if !defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::GraphNode tmp = n; - n.swap(tmp); -#endif - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - normalizeEdgeTimer.stop(); -// std::cerr << "Reconstruction time=" << originalEdgeTimer.time << ":" << reverseEdgeTimer.time -// << ":" << normalizeEdgeTimer.time << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Reconstruction time=" + std::to_string(originalEdgeTimer.time) + ":" + std::to_string(reverseEdgeTimer.time) - + ":" + std::to_string(normalizeEdgeTimer.time)); - - NGT::Property prop; - outGraph.getProperty().get(prop); - prop.graphType = NGT::NeighborhoodGraph::GraphTypeONNG; - outGraph.getProperty().set(prop); - } - - - - static - void reconstructGraphWithConstraint(std::vector &graph, NGT::GraphIndex &outGraph, - size_t originalEdgeSize, size_t reverseEdgeSize, - char mode = 'a') - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("reconstructGraphWithConstraint is not implemented."); -// std::cerr << "reconstructGraphWithConstraint is not implemented." << std::endl; - abort(); -#else - - NGT::Timer originalEdgeTimer, reverseEdgeTimer, normalizeEdgeTimer; - - if (reverseEdgeSize > 10000) { -// std::cerr << "something wrong. Edge size=" << reverseEdgeSize << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("something wrong. Edge size=" + std::to_string(reverseEdgeSize)); - exit(1); - } - - for (size_t id = 1; id < outGraph.repository.size(); id++) { - if (id % 1000000 == 0) { -// std::cerr << "Processed " << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(id)); - } - try { - NGT::GraphNode &node = *outGraph.getNode(id); - if (node.size() == 0) { - continue; - } - node.clear(); - NGT::GraphNode empty; - node.swap(empty); - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - NGT::GraphIndex::showStatisticsOfGraph(outGraph); - - std::vector reverse(graph.size() + 1); - for (size_t id = 1; id <= graph.size(); ++id) { - try { - NGT::GraphNode &node = graph[id - 1]; - if (id % 100000 == 0) { -// std::cerr << "Processed (summing up) " << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed (summing up) " + std::to_string(id)); - } - for (size_t rank = 0; rank < node.size(); rank++) { - reverse[node[rank].id].push_back(ObjectDistance(id, node[rank].distance)); - } - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - - std::vector > reverseSize(graph.size() + 1); - reverseSize[0] = std::pair(0, 0); - for (size_t rid = 1; rid <= graph.size(); ++rid) { - reverseSize[rid] = std::pair(reverse[rid].size(), rid); - } - std::sort(reverseSize.begin(), reverseSize.end()); - - - std::vector indegreeCount(graph.size(), 0); - size_t zeroCount = 0; - for (size_t sizerank = 0; sizerank <= reverseSize.size(); sizerank++) { - - if (reverseSize[sizerank].first == 0) { - zeroCount++; - continue; - } - size_t rid = reverseSize[sizerank].second; - ObjectDistances &rnode = reverse[rid]; - for (auto rni = rnode.begin(); rni != rnode.end(); ++rni) { - if (indegreeCount[(*rni).id] >= reverseEdgeSize) { - continue; - } - NGT::GraphNode &node = *outGraph.getNode(rid); - if (indegreeCount[(*rni).id] > 0 && node.size() >= originalEdgeSize) { - continue; - } - - node.push_back(NGT::ObjectDistance((*rni).id, (*rni).distance)); - indegreeCount[(*rni).id]++; - } - } - reverseEdgeTimer.stop(); -// std::cerr << "The number of nodes with zero outdegree by reverse edges=" << zeroCount << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("The number of nodes with zero outdegree by reverse edges=" + std::to_string(zeroCount)); - NGT::GraphIndex::showStatisticsOfGraph(outGraph); - - normalizeEdgeTimer.start(); - for (size_t id = 1; id < outGraph.repository.size(); id++) { - try { - NGT::GraphNode &n = *outGraph.getNode(id); - if (id % 100000 == 0) { -// std::cerr << "Processed " << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(id)); - } - std::sort(n.begin(), n.end()); - NGT::ObjectID prev = 0; - for (auto it = n.begin(); it != n.end();) { - if (prev == (*it).id) { - it = n.erase(it); - continue; - } - prev = (*it).id; - it++; - } - NGT::GraphNode tmp = n; - n.swap(tmp); - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - normalizeEdgeTimer.stop(); - NGT::GraphIndex::showStatisticsOfGraph(outGraph); - - originalEdgeTimer.start(); - for (size_t id = 1; id < outGraph.repository.size(); id++) { - if (id % 1000000 == 0) { -// std::cerr << "Processed " << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(id)); - } - NGT::GraphNode &node = graph[id - 1]; - try { - NGT::GraphNode &onode = *outGraph.getNode(id); - bool stop = false; - for (size_t rank = 0; (rank < node.size() && rank < originalEdgeSize) && stop == false; rank++) { - switch (mode) { - case 'a': - if (onode.size() >= originalEdgeSize) { - stop = true; - continue; - } - break; - case 'c': - break; - } - NGT::Distance distance = node[rank].distance; - size_t nodeID = node[rank].id; - outGraph.addEdge(id, nodeID, distance, false); - } - } catch(NGT::Exception &err) { -// std::cerr << "GraphReconstructor: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("GraphReconstructor: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - } - originalEdgeTimer.stop(); - NGT::GraphIndex::showStatisticsOfGraph(outGraph); - -// std::cerr << "Reconstruction time=" << originalEdgeTimer.time << ":" << reverseEdgeTimer.time -// << ":" << normalizeEdgeTimer.time << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Reconstruction time=" + std::to_string(originalEdgeTimer.time) + ":" + std::to_string(reverseEdgeTimer.time) - + ":" + std::to_string(normalizeEdgeTimer.time)); - -#endif - } - - // reconstruct a pseudo ANNG with a fewer edges from an actual ANNG with more edges. - // graph is a source ANNG - // index is an index with a reconstructed ANNG - static - void reconstructANNGFromANNG(std::vector &graph, NGT::Index &index, size_t edgeSize) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("reconstructANNGFromANNG is not implemented."); -// std::cerr << "reconstructANNGFromANNG is not implemented." << std::endl; - abort(); -#else - - NGT::GraphIndex &outGraph = dynamic_cast(index.getIndex()); - - // remove all edges in the index. - for (size_t id = 1; id < outGraph.repository.size(); id++) { - if (id % 1000000 == 0) { -// std::cerr << "Processed " << id << " nodes." << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(id) + " nodes."); - } - try { - NGT::GraphNode &node = *outGraph.getNode(id); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - node.clear(outGraph.repository.allocator); -#else - NGT::GraphNode empty; - node.swap(empty); -#endif - } catch(NGT::Exception &err) { - } - } - - for (size_t id = 1; id <= graph.size(); ++id) { - size_t edgeCount = 0; - try { - NGT::ObjectDistances &node = graph[id - 1]; - NGT::GraphNode &n = *outGraph.getNode(id); - NGT::Distance prevDistance = 0.0; - assert(n.size() == 0); - for (size_t i = 0; i < node.size(); ++i) { - NGT::Distance distance = node[i].distance; - if (prevDistance > distance) { - NGTThrowException("Edge distance order is invalid"); - } - prevDistance = distance; - size_t nodeID = node[i].id; - if (node[i].id < id) { - try { - NGT::GraphNode &dn = *outGraph.getNode(nodeID); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - n.push_back(NGT::ObjectDistance(nodeID, distance), outGraph.repository.allocator); - dn.push_back(NGT::ObjectDistance(id, distance), outGraph.repository.allocator); -#else - n.push_back(NGT::ObjectDistance(nodeID, distance)); - dn.push_back(NGT::ObjectDistance(id, distance)); -#endif - } catch(...) {} - edgeCount++; - } - if (edgeCount >= edgeSize) { - break; - } - } - } catch(NGT::Exception &err) { - } - } - - for (size_t id = 1; id < outGraph.repository.size(); id++) { - try { - NGT::GraphNode &n = *outGraph.getNode(id); - std::sort(n.begin(), n.end()); - NGT::ObjectID prev = 0; - for (auto it = n.begin(); it != n.end();) { - if (prev == (*it).id) { - it = n.erase(it); - continue; - } - prev = (*it).id; - it++; - } - NGT::GraphNode tmp = n; - n.swap(tmp); - } catch (...) { - } - } -#endif - } - - static void refineANNG(NGT::Index &index, bool unlog, float epsilon = 0.1, float accuracy = 0.0, int noOfEdges = 0, int exploreEdgeSize = INT_MIN, size_t batchSize = 10000) { - NGT::StdOstreamRedirector redirector(unlog); - redirector.begin(); - try { - refineANNG(index, epsilon, accuracy, noOfEdges, exploreEdgeSize, batchSize); - } catch (NGT::Exception &err) { - redirector.end(); - throw(err); - } - } - - static void refineANNG(NGT::Index &index, float epsilon = 0.1, float accuracy = 0.0, int noOfEdges = 0, int exploreEdgeSize = INT_MIN, size_t batchSize = 10000) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGTThrowException("GraphReconstructor::refineANNG: Not implemented for the shared memory option."); -#else - auto prop = static_cast(index.getIndex()).getGraphProperty(); - NGT::ObjectRepository &objectRepository = index.getObjectSpace().getRepository(); - NGT::GraphIndex &graphIndex = static_cast(index.getIndex()); - size_t nOfObjects = objectRepository.size(); - bool error = false; - std::string errorMessage; - for (size_t bid = 1; bid < nOfObjects; bid += batchSize) { - NGT::ObjectDistances results[batchSize]; - // search -#pragma omp parallel for - for (size_t idx = 0; idx < batchSize; idx++) { - size_t id = bid + idx; - if (id % 100000 == 0) { -// std::cerr << "# of processed objects=" << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("# of processed objects=" + std::to_string(id)); - } - if (objectRepository.isEmpty(id)) { - continue; - } - NGT::SearchContainer searchContainer(*objectRepository.get(id)); - searchContainer.setResults(&results[idx]); - assert(prop.edgeSizeForCreation > 0); - searchContainer.setSize(noOfEdges > prop.edgeSizeForCreation ? noOfEdges : prop.edgeSizeForCreation); - if (accuracy > 0.0) { - searchContainer.setExpectedAccuracy(accuracy); - } else { - searchContainer.setEpsilon(epsilon); - } - if (exploreEdgeSize != INT_MIN) { - searchContainer.setEdgeSize(exploreEdgeSize); - } - if (!error) { - try { - index.search(searchContainer); - } catch (NGT::Exception &err) { -#pragma omp critical - { - error = true; - errorMessage = err.what(); - } - } - } - } - if (error) { - std::stringstream msg; - msg << "GraphReconstructor::refineANNG: " << errorMessage; - NGTThrowException(msg); - } - // outgoing edges -#pragma omp parallel for - for (size_t idx = 0; idx < batchSize; idx++) { - size_t id = bid + idx; - if (objectRepository.isEmpty(id)) { - continue; - } - NGT::GraphNode &node = *graphIndex.getNode(id); - for (auto i = results[idx].begin(); i != results[idx].end(); ++i) { - if ((*i).id != id) { - node.push_back(*i); - } - } - std::sort(node.begin(), node.end()); - // dedupe - ObjectID prev = 0; - for (GraphNode::iterator ni = node.begin(); ni != node.end();) { - if (prev == (*ni).id) { - ni = node.erase(ni); - continue; - } - prev = (*ni).id; - ni++; - } - } - // incomming edges - if (noOfEdges != 0) { - continue; - } - for (size_t idx = 0; idx < batchSize; idx++) { - size_t id = bid + idx; - if (id % 10000 == 0) { -// std::cerr << "# of processed objects=" << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("# of processed objects=" + std::to_string(id)); - } - for (auto i = results[idx].begin(); i != results[idx].end(); ++i) { - if ((*i).id != id) { - NGT::GraphNode &node = *graphIndex.getNode((*i).id); - graphIndex.addEdge(node, id, (*i).distance, false); - } - } - } - } - if (noOfEdges != 0) { - // prune to build knng - size_t nedges = noOfEdges < 0 ? -noOfEdges : noOfEdges; -#pragma omp parallel for - for (ObjectID id = 1; id < nOfObjects; ++id) { - if (objectRepository.isEmpty(id)) { - continue; - } - NGT::GraphNode &node = *graphIndex.getNode(id); - if (node.size() > nedges) { - node.resize(nedges); - } - } - } -#endif // defined(NGT_SHARED_MEMORY_ALLOCATOR) - } -}; - -}; // NGT diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/HashBasedBooleanSet.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/HashBasedBooleanSet.h deleted file mode 100644 index 024b001937..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/HashBasedBooleanSet.h +++ /dev/null @@ -1,113 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "defines.h" -#include -#include -#include -#include -#include - -class HashBasedBooleanSet{ - private: - uint32_t *_table; - uint32_t _tableSize; - uint32_t _mask; - - std::unordered_set _stlHash; - - - inline uint32_t _hash1(const uint32_t value){ - return value & _mask; - } - - public: - HashBasedBooleanSet():_table(NULL), _tableSize(0), _mask(0) {} - - HashBasedBooleanSet(const uint64_t size):_table(NULL), _tableSize(0), _mask(0) { - size_t bitSize = 0; - size_t bit = size; - while (bit != 0) { - bitSize++; - bit >>= 1; - } - size_t bucketSize = 0x1 << ((bitSize + 4) / 2 + 3); - initialize(bucketSize); - } - void initialize(const uint32_t tableSize) { - _tableSize = tableSize; - _mask = _tableSize - 1; - const uint32_t checkValue = _hash1(tableSize); - if(checkValue != 0){ - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("[WARN] table size is not 2^N : " + std::to_string(tableSize)); -// std::cerr << "[WARN] table size is not 2^N : " << tableSize << std::endl; - } - - _table = new uint32_t[tableSize]; - memset(_table, 0, tableSize * sizeof(uint32_t)); - } - - ~HashBasedBooleanSet(){ - delete[] _table; - _stlHash.clear(); - } - - inline bool operator[](const uint32_t num){ - const uint32_t hashValue = _hash1(num); - - auto v = _table[hashValue]; - if (v == num){ - return true; - } - if (v == 0){ - return false; - } - if (_stlHash.count(num) <= 0) { - return false; - } - return true; - } - - inline void set(const uint32_t num){ - uint32_t &value = _table[_hash1(num)]; - if(value == 0){ - value = num; - }else{ - if(value != num){ - _stlHash.insert(num); - } - } - } - - inline void insert(const uint32_t num){ - set(num); - } - - inline void reset(const uint32_t num){ - const uint32_t hashValue = _hash1(num); - if(_table[hashValue] != 0){ - if(_table[hashValue] != num){ - _stlHash.erase(num); - }else{ - _table[hashValue] = UINT_MAX; - } - } - } -}; - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Index.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Index.cpp deleted file mode 100644 index 3a91932fab..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Index.cpp +++ /dev/null @@ -1,1943 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include -#include "NGT/defines.h" -#include "NGT/Common.h" -#include "NGT/ObjectSpaceRepository.h" -#include "NGT/Index.h" -#include "NGT/Thread.h" -#include "NGT/GraphReconstructor.h" -#include "NGT/Version.h" - -using namespace std; -using namespace NGT; - - -void -Index::version(ostream &os) -{ - os << "libngt:" << endl; - Version::get(os); -} - -string -Index::getVersion() -{ - return Version::getVersion(); -} - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR -NGT::Index::Index(NGT::Property &prop, const string &database) { - if (prop.dimension == 0) { - NGTThrowException("Index::Index. Dimension is not specified."); - } - Index* idx = 0; - mkdir(database); - if (prop.indexType == NGT::Index::Property::GraphAndTree) { - idx = new NGT::GraphAndTreeIndex(database, prop); - } else if (prop.indexType == NGT::Index::Property::Graph) { - idx = new NGT::GraphIndex(database, prop); - } else { - NGTThrowException("Index::Index: Not found IndexType in property file."); - } - if (idx == 0) { - stringstream msg; - msg << "Index::Index: Cannot construct. "; - NGTThrowException(msg); - } - index = idx; - path = ""; -} -#else -NGT::Index::Index(NGT::Property &prop) { - if (prop.dimension == 0) { - NGTThrowException("Index::Index. Dimension is not specified."); - } - Index* idx = 0; - if (prop.indexType == NGT::Index::Property::GraphAndTree) { - idx = new NGT::GraphAndTreeIndex(prop); - } else if (prop.indexType == NGT::Index::Property::Graph) { - idx = new NGT::GraphIndex(prop); - } else { - NGTThrowException("Index::Index: Not found IndexType in property file."); - } - if (idx == 0) { - stringstream msg; - msg << "Index::Index: Cannot construct. "; - NGTThrowException(msg); - } - index = idx; - path = ""; -} -#endif - -float -NGT::Index::getEpsilonFromExpectedAccuracy(double accuracy) { - return static_cast(getIndex()).getEpsilonFromExpectedAccuracy(accuracy); - } - -void -NGT::Index::open(const string &database, bool rdOnly) { - NGT::Property prop; - prop.load(database); - Index* idx = 0; - if (prop.indexType == NGT::Index::Property::GraphAndTree) { - idx = new NGT::GraphAndTreeIndex(database, rdOnly); - } else if (prop.indexType == NGT::Index::Property::Graph) { - idx = new NGT::GraphIndex(database, rdOnly); - } else { - NGTThrowException("Index::Open: Not found IndexType in property file."); - } - if (idx == 0) { - stringstream msg; - msg << "Index::open: Cannot open. " << database; - NGTThrowException(msg); - } - index = idx; - path = database; -} - -// for milvus -NGT::Index * NGT::Index::loadIndex(std::stringstream & obj, std::stringstream & grp, std::stringstream & prf, std::stringstream & tre) -{ - NGT::Property prop; - prop.load(prf); - if (prop.databaseType != NGT::Index::Property::DatabaseType::Memory) - { - NGTThrowException("GraphIndex: Cannot open. Not memory type."); - } - assert(prop.dimension != 0); - NGT::Index * idx = new NGT::Index(); - if (prop.indexType == NGT::Index::Property::GraphAndTree) - { - auto iidx = new NGT::GraphAndTreeIndex(prop); - idx->index = iidx; - } - else if (prop.indexType == NGT::Index::Property::Graph) - { - auto iidx = new NGT::GraphIndex(prop); - idx->index = iidx; - } - else - { - NGTThrowException("Index::Open: Not found IndexType in property file."); - } - idx->index->loadIndexFromStream(obj, grp, tre); - return idx; -} - -//For milvus -NGT::Index * NGT::Index::createGraphAndTree(const float * row_data, NGT::Property & prop, size_t dataSize) -{ - //TODO - if (prop.dimension == 0) - { - NGTThrowException("Index::createGraphAndTree. Dimension is not specified."); - } - NGT::Index * res = new NGT::Index(); - prop.indexType = NGT::Index::Property::IndexType::GraphAndTree; - NGT::Index * idx = new NGT::GraphAndTreeIndex(prop); - assert(idx != 0); - try - { - loadRawDataAndCreateIndex(idx, row_data, prop.threadPoolSize, dataSize); - res->index = idx; - return res; - } - catch (Exception & err) - { - delete idx; - delete res; - throw err; - } -} - -void -NGT::Index::createGraphAndTree(const string &database, NGT::Property &prop, const string &dataFile, - size_t dataSize, bool redirect) { - if (prop.dimension == 0) { - NGTThrowException("Index::createGraphAndTree. Dimension is not specified."); - } - prop.indexType = NGT::Index::Property::IndexType::GraphAndTree; - Index *idx = 0; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - mkdir(database); - idx = new NGT::GraphAndTreeIndex(database, prop); -#else - idx = new NGT::GraphAndTreeIndex(prop); -#endif - assert(idx != 0); - StdOstreamRedirector redirector(redirect); - redirector.begin(); - try { - loadAndCreateIndex(*idx, database, dataFile, prop.threadPoolSize, dataSize); - } catch(Exception &err) { - delete idx; - redirector.end(); - throw err; - } - delete idx; - redirector.end(); -} - -// For milvus -NGT::Index * NGT::Index::createGraph(const float * row_data, NGT::Property & prop, size_t dataSize) -{ - //TODO - if (prop.dimension == 0) - { - NGTThrowException("Index::createGraphAndTree. Dimension is not specified."); - } - prop.indexType = NGT::Index::Property::IndexType::Graph; - NGT::Index * res = new NGT::Index(); - NGT::Index * idx = new NGT::GraphAndTreeIndex(prop); - assert(idx != 0); - try - { - loadRawDataAndCreateIndex(idx, row_data, prop.threadPoolSize, dataSize); - res->index = idx; - return res; - } - catch (Exception & err) - { - delete idx; - delete res; - throw err; - } -} - -void -NGT::Index::createGraph(const string &database, NGT::Property &prop, const string &dataFile, size_t dataSize, bool redirect) { - if (prop.dimension == 0) { - NGTThrowException("Index::createGraphAndTree. Dimension is not specified."); - } - prop.indexType = NGT::Index::Property::IndexType::Graph; - Index *idx = 0; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - mkdir(database); - idx = new NGT::GraphIndex(database, prop); -#else - idx = new NGT::GraphIndex(prop); -#endif - assert(idx != 0); - StdOstreamRedirector redirector(redirect); - redirector.begin(); - try { - loadAndCreateIndex(*idx, database, dataFile, prop.threadPoolSize, dataSize); - } catch(Exception &err) { - delete idx; - redirector.end(); - throw err; - } - delete idx; - redirector.end(); -} - -// For milvus -void NGT::Index::loadRawDataAndCreateIndex(NGT::Index * index_, const float * row_data, size_t threadSize, size_t dataSize) -{ - if (dataSize) - { - index_->loadRawData(row_data, dataSize); - } - else - { - return; - } - if (index_->getObjectRepositorySize() == 0) - { - NGTThrowException("Index::create: Data file is empty."); - } - NGT::Timer timer; - timer.start(); - index_->createIndex(threadSize); - timer.stop(); - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Index creation time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); -// cerr << "Index creation time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; -} - -void -NGT::Index::loadAndCreateIndex(Index &index, const string &database, const string &dataFile, size_t threadSize, size_t dataSize) { - NGT::Timer timer; - timer.start(); - if (dataFile.size() != 0) { - index.load(dataFile, dataSize); - } else { - index.saveIndex(database); - return; - } - timer.stop(); - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Data loading time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); -// cerr << "Data loading time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; - if (index.getObjectRepositorySize() == 0) { - NGTThrowException("Index::create: Data file is empty."); - } - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("# of objects=" + std::to_string(index.getObjectRepositorySize() - 1)); -// cerr << "# of objects=" << index.getObjectRepositorySize() - 1 << endl; - timer.reset(); - timer.start(); - index.createIndex(threadSize); - timer.stop(); - index.saveIndex(database); -// cerr << "Index creation time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Index creation time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); -} - -// For milvus -void NGT::Index::append(NGT::Index * index_, const float * data, size_t dataSize, size_t threadSize) -{ - NGT::Timer timer; - timer.start(); - if (data != 0 && dataSize != 0) - { - index_->append(data, dataSize); - } - else - { - NGTThrowException("Index::append: No data."); - } - timer.stop(); -// cerr << "Data loading time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; -// cerr << "# of objects=" << index_->getObjectRepositorySize() - 1 << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)( - "Data loading time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) - + " (msec)"); - (*NGT_LOG_DEBUG_)("# of objects=" + std::to_string(index_->getObjectRepositorySize() - 1)); - } - timer.reset(); - timer.start(); - index_->createIndex(threadSize); - timer.stop(); -// cerr << "Index creation time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Index creation time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - return; -} - -void -NGT::Index::append(const string &database, const string &dataFile, size_t threadSize, size_t dataSize) { - NGT::Index index(database); - NGT::Timer timer; - timer.start(); - if (dataFile.size() != 0) { - index.append(dataFile, dataSize); - } - timer.stop(); - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Data loading time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - (*NGT_LOG_DEBUG_)("# of objects=" + std::to_string(index.getObjectRepositorySize() - 1)); - } -// cerr << "Data loading time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; -// cerr << "# of objects=" << index.getObjectRepositorySize() - 1 << endl; - timer.reset(); - timer.start(); - index.createIndex(threadSize); - timer.stop(); - index.saveIndex(database); -// cerr << "Index creation time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Index creation time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - return; -} - -void -NGT::Index::append(const string &database, const float *data, size_t dataSize, size_t threadSize) { - NGT::Index index(database); - NGT::Timer timer; - timer.start(); - if (data != 0 && dataSize != 0) { - index.append(data, dataSize); - } else { - NGTThrowException("Index::append: No data."); - } - timer.stop(); -// cerr << "Data loading time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; -// cerr << "# of objects=" << index.getObjectRepositorySize() - 1 << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Data loading time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - (*NGT_LOG_DEBUG_)("# of objects=" + std::to_string(index.getObjectRepositorySize() - 1)); - } - timer.reset(); - timer.start(); - index.createIndex(threadSize); - timer.stop(); - index.saveIndex(database); -// cerr << "Index creation time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Index creation time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - return; -} - -void -NGT::Index::remove(const string &database, vector &objects, bool force) { - NGT::Index index(database); - NGT::Timer timer; - timer.start(); - for (vector::iterator i = objects.begin(); i != objects.end(); i++) { - try { - index.remove(*i, force); - } catch (Exception &err) { -// cerr << "Warning: Cannot remove the node. ID=" << *i << " : " << err.what() << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning: Cannot remove the node. ID=" + std::to_string(*i) + " : " + err.what()); - continue; - } - } - timer.stop(); -// cerr << "Data removing time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; -// cerr << "# of objects=" << index.getObjectRepositorySize() - 1 << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Data removing time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - (*NGT_LOG_DEBUG_)("# of objects=" + std::to_string(index.getObjectRepositorySize() - 1)); - } - index.saveIndex(database); - return; -} - -void -NGT::Index::importIndex(const string &database, const string &file) { - Index *idx = 0; - NGT::Property property; - property.importProperty(file); - NGT::Timer timer; - timer.start(); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - property.databaseType = NGT::Index::Property::DatabaseType::MemoryMappedFile; - mkdir(database); -#else - property.databaseType = NGT::Index::Property::DatabaseType::Memory; -#endif - if (property.indexType == NGT::Index::Property::IndexType::GraphAndTree) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - idx = new NGT::GraphAndTreeIndex(database, property); -#else - idx = new NGT::GraphAndTreeIndex(property); -#endif - assert(idx != 0); - } else if (property.indexType == NGT::Index::Property::IndexType::Graph) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - idx = new NGT::GraphIndex(database, property); -#else - idx = new NGT::GraphIndex(property); -#endif - assert(idx != 0); - } else { - NGTThrowException("Index::Open: Not found IndexType in property file."); - } - idx->importIndex(file); - timer.stop(); -// cerr << "Data importing time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; -// cerr << "# of objects=" << idx->getObjectRepositorySize() - 1 << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Data importing time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - (*NGT_LOG_DEBUG_)("# of objects=" + std::to_string(idx->getObjectRepositorySize() - 1)); - } - idx->saveIndex(database); - delete idx; -} - -void -NGT::Index::exportIndex(const string &database, const string &file) { - NGT::Index idx(database); - NGT::Timer timer; - timer.start(); - idx.exportIndex(file); - timer.stop(); -// cerr << "Data exporting time=" << timer.time << " (sec) " << timer.time * 1000.0 << " (msec)" << endl; -// cerr << "# of objects=" << idx.getObjectRepositorySize() - 1 << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Data exporting time=" + std::to_string(timer.time) + " (sec) " + std::to_string(timer.time * 1000.0) + " (msec)"); - (*NGT_LOG_DEBUG_)("# of objects=" + std::to_string(idx.getObjectRepositorySize() - 1)); - } -} - -std::vector -NGT::Index::makeSparseObject(std::vector &object) -{ - if (static_cast(getIndex()).getProperty().distanceType != NGT::ObjectSpace::DistanceType::DistanceTypeSparseJaccard) { - NGTThrowException("NGT::Index::makeSparseObject: Not sparse jaccard."); - } - size_t dimension = getObjectSpace().getDimension(); - if (object.size() + 1 > dimension) { - std::stringstream msg; - dimension = object.size() + 1; - } - std::vector obj(dimension, 0.0); - for (size_t i = 0; i < object.size(); i++) { - float fv = *reinterpret_cast(&object[i]); - obj[i] = fv; - } - return obj; -} - -void -NGT::Index::Property::set(NGT::Property &prop) { - if (prop.dimension != -1) dimension = prop.dimension; - if (prop.threadPoolSize != -1) threadPoolSize = prop.threadPoolSize; - if (prop.objectType != ObjectSpace::ObjectTypeNone) objectType = prop.objectType; - if (prop.distanceType != DistanceType::DistanceTypeNone) distanceType = prop.distanceType; - if (prop.indexType != IndexTypeNone) indexType = prop.indexType; - if (prop.databaseType != DatabaseTypeNone) databaseType = prop.databaseType; - if (prop.objectAlignment != ObjectAlignmentNone) objectAlignment = prop.objectAlignment; - if (prop.pathAdjustmentInterval != -1) pathAdjustmentInterval = prop.pathAdjustmentInterval; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - if (prop.graphSharedMemorySize != -1) graphSharedMemorySize = prop.graphSharedMemorySize; - if (prop.treeSharedMemorySize != -1) treeSharedMemorySize = prop.treeSharedMemorySize; - if (prop.objectSharedMemorySize != -1) objectSharedMemorySize = prop.objectSharedMemorySize; -#endif - if (prop.prefetchOffset != -1) prefetchOffset = prop.prefetchOffset; - if (prop.prefetchSize != -1) prefetchSize = prop.prefetchSize; - if (prop.accuracyTable != "") accuracyTable = prop.accuracyTable; -} - -void -NGT::Index::Property::get(NGT::Property &prop) { - prop.dimension = dimension; - prop.threadPoolSize = threadPoolSize; - prop.objectType = objectType; - prop.distanceType = distanceType; - prop.indexType = indexType; - prop.databaseType = databaseType; - prop.pathAdjustmentInterval = pathAdjustmentInterval; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - prop.graphSharedMemorySize = graphSharedMemorySize; - prop.treeSharedMemorySize = treeSharedMemorySize; - prop.objectSharedMemorySize = objectSharedMemorySize; -#endif - prop.prefetchOffset = prefetchOffset; - prop.prefetchSize = prefetchSize; - prop.accuracyTable = accuracyTable; -} - -class CreateIndexJob { -public: - CreateIndexJob() {} - CreateIndexJob &operator=(const CreateIndexJob &d) { - id = d.id; - results = d.results; - object = d.object; - batchIdx = d.batchIdx; - return *this; - } - friend bool operator<(const CreateIndexJob &ja, const CreateIndexJob &jb) { return ja.batchIdx < jb.batchIdx; } - NGT::ObjectID id; - NGT::Object *object; // this will be a node of the graph later. - NGT::ObjectDistances *results; - size_t batchIdx; -}; - -class CreateIndexSharedData { -public: - CreateIndexSharedData(NGT::GraphIndex &nngt) : graphIndex(nngt) {} - NGT::GraphIndex &graphIndex; -}; - -class CreateIndexThread : public NGT::Thread { -public: - CreateIndexThread() {} - virtual ~CreateIndexThread() {} - virtual int run(); - -}; - - typedef NGT::ThreadPool CreateIndexThreadPool; - -int -CreateIndexThread::run() { - - NGT::ThreadPool::Thread &poolThread = - (NGT::ThreadPool::Thread&)*this; - - CreateIndexSharedData &sd = *poolThread.getSharedData(); - NGT::GraphIndex &graphIndex = sd.graphIndex; - - for(;;) { - CreateIndexJob job; - try { - poolThread.getInputJobQueue().popFront(job); - } catch(NGT::ThreadTerminationException &err) { - break; - } catch(NGT::Exception &err) { -// cerr << "CreateIndex::search:Error! popFront " << err.what() << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("CreateIndex::search:Error! popFront " + std::string(err.what())); - break; - } - ObjectDistances *rs = new ObjectDistances; - Object &obj = *job.object; - try { - if (graphIndex.NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeKNNG) { - graphIndex.searchForKNNGInsertion(obj, job.id, *rs); // linear search - } else { - graphIndex.searchForNNGInsertion(obj, *rs); - } - } catch(NGT::Exception &err) { -// cerr << "CreateIndex::search:Fatal error! ID=" << job.id << " " << err.what() << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("CreateIndex::search:Fatal error! ID=" + std::to_string(job.id) + " " + err.what()); - abort(); - } - job.results = rs; - poolThread.getOutputJobQueue().pushBack(job); - } - - return 0; - -} - -class BuildTimeController { -public: - BuildTimeController(GraphIndex &graph, NeighborhoodGraph::Property &prop):property(prop) { - noOfInsertedObjects = graph.objectSpace->getRepository().size() - graph.repository.size(); - interval = 10000; - count = interval; - edgeSizeSave = property.edgeSizeForCreation; - insertionRadiusCoefficientSave = property.insertionRadiusCoefficient; - buildTimeLimit = property.buildTimeLimit; - time = 0.0; - timer.start(); - } - ~BuildTimeController() { - property.edgeSizeForCreation = edgeSizeSave; - property.insertionRadiusCoefficient = insertionRadiusCoefficientSave; - } - void adjustEdgeSize(size_t c) { - if (buildTimeLimit > 0.0 && count <= c) { - timer.stop(); - double estimatedTime = time + timer.time / interval * (noOfInsertedObjects - count); - estimatedTime /= 60 * 60; // hour - const size_t edgeInterval = 5; - const int minimumEdge = 5; - const float radiusInterval = 0.02; - if (estimatedTime > buildTimeLimit) { - if (property.insertionRadiusCoefficient - radiusInterval >= 1.0) { - property.insertionRadiusCoefficient -= radiusInterval; - } else { - property.edgeSizeForCreation -= edgeInterval; - if (property.edgeSizeForCreation < minimumEdge) { - property.edgeSizeForCreation = minimumEdge; - } - } - } - time += timer.time; - count += interval; - timer.start(); - } - } - - size_t noOfInsertedObjects; - size_t interval; - size_t count ; - size_t edgeSizeSave; - double insertionRadiusCoefficientSave; - Timer timer; - double time; - double buildTimeLimit; - NeighborhoodGraph::Property &property; -}; - -void -NGT::GraphIndex::constructObjectSpace(NGT::Property &prop) { - assert(prop.dimension != 0); - size_t dimension = prop.dimension; - if (prop.distanceType == NGT::ObjectSpace::DistanceType::DistanceTypeSparseJaccard) { - dimension++; - } - - switch (prop.objectType) { - case NGT::ObjectSpace::ObjectType::Float : - objectSpace = new ObjectSpaceRepository(dimension, typeid(float), prop.distanceType); - break; - case NGT::ObjectSpace::ObjectType::Uint8 : - objectSpace = new ObjectSpaceRepository(dimension, typeid(uint8_t), prop.distanceType); - break; - default: - stringstream msg; - msg << "Invalid Object Type in the property. " << prop.objectType; - NGTThrowException(msg); - } -} - -void -NGT::GraphIndex::loadIndex(const string &ifile, bool readOnly) { - objectSpace->deserialize(ifile + "/obj"); -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - if (readOnly && property.indexType == NGT::Index::Property::IndexType::Graph) { - GraphIndex::NeighborhoodGraph::loadSearchGraph(ifile); - } else { - ifstream isg(ifile + "/grp"); - repository.deserialize(isg); - } -#else - ifstream isg(ifile + "/grp"); - repository.deserialize(isg); -#endif -} - -// for milvus -void NGT::GraphIndex::saveProperty(std::stringstream & prf) { NGT::Property::save(*this, prf); } - -void -NGT::GraphIndex::saveProperty(const std::string &file) { - NGT::Property::save(*this, file); -} - -void -NGT::GraphIndex::exportProperty(const std::string &file) { - NGT::Property::exportProperty(*this, file); -} - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR -NGT::GraphIndex::GraphIndex(const string &allocator, bool rdonly):readOnly(rdonly) { - NGT::Property prop; - prop.load(allocator); - if (prop.databaseType != NGT::Index::Property::DatabaseType::MemoryMappedFile) { - NGTThrowException("GraphIndex: Cannot open. Not memory mapped file type."); - } - initialize(allocator, prop); -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - searchUnupdatableGraph = NeighborhoodGraph::Search::getMethod(prop.distanceType, prop.objectType, - objectSpace->getRepository().size()); -#endif -} - -NGT::GraphAndTreeIndex::GraphAndTreeIndex(const string &allocator, NGT::Property &prop):GraphIndex(allocator, prop) { - initialize(allocator, prop.treeSharedMemorySize); -} - -void -GraphAndTreeIndex::createTreeIndex() -{ - ObjectRepository &fr = GraphIndex::objectSpace->getRepository(); - for (size_t id = 0; id < fr.size(); id++){ - if (id % 100000 == 0) { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(" Processed id=" + std::to_string(id)); -// cerr << " Processed id=" << id << endl; - } - if (fr.isEmpty(id)) { - continue; - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object *f = GraphIndex::objectSpace->allocateObject(*fr[id]); - DVPTree::InsertContainer tiobj(*f, id); -#else - DVPTree::InsertContainer tiobj(*fr[id], id); -#endif - try { - DVPTree::insert(tiobj); - } catch (Exception &err) { -// cerr << "GraphAndTreeIndex::createTreeIndex: Warning. ID=" << id << ":"; -// cerr << err.what() << " continue.." << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("GraphAndTreeIndex::createTreeIndex: Warning. ID=" + std::to_string(id) + ":"); - (*NGT_LOG_DEBUG_)(std::string(err.what()) + " continue.."); - } - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(f); -#endif - } -} - -void -NGT::GraphIndex::initialize(const string &allocator, NGT::Property &prop) { - constructObjectSpace(prop); - repository.open(allocator + "/grp", prop.graphSharedMemorySize); - objectSpace->open(allocator + "/obj", prop.objectSharedMemorySize); - setProperty(prop); -} -#else // NGT_SHARED_MEMORY_ALLOCATOR -NGT::GraphIndex::GraphIndex(const string &database, bool rdOnly):readOnly(rdOnly) { - NGT::Property prop; - prop.load(database); - if (prop.databaseType != NGT::Index::Property::DatabaseType::Memory) { - NGTThrowException("GraphIndex: Cannot open. Not memory type."); - } - assert(prop.dimension != 0); - initialize(prop); - loadIndex(database, readOnly); -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - if (prop.searchType == "Large") { - searchUnupdatableGraph = NeighborhoodGraph::Search::getMethod(prop.distanceType, prop.objectType, 10000000); - } else if (prop.searchType == "Small") { - searchUnupdatableGraph = NeighborhoodGraph::Search::getMethod(prop.distanceType, prop.objectType, 0); - } else { - searchUnupdatableGraph = NeighborhoodGraph::Search::getMethod(prop.distanceType, prop.objectType, - objectSpace->getRepository().size()); - } -#endif -} -#endif - -void -GraphIndex::createIndex() -{ - GraphRepository &anngRepo = repository; - ObjectRepository &fr = objectSpace->getRepository(); - size_t pathAdjustCount = property.pathAdjustmentInterval; - NGT::ObjectID id = 1; - size_t count = 0; - BuildTimeController buildTimeController(*this, NeighborhoodGraph::property); - for (; id < fr.size(); id++) { - if (id < anngRepo.size() && anngRepo[id] != 0) { - continue; - } - insert(id); - buildTimeController.adjustEdgeSize(++count); - if (pathAdjustCount > 0 && pathAdjustCount <= id) { - GraphReconstructor::adjustPathsEffectively(static_cast(*this)); - pathAdjustCount += property.pathAdjustmentInterval; - } - } -} - -static size_t -searchMultipleQueryForCreation(GraphIndex &neighborhoodGraph, - NGT::ObjectID &id, - CreateIndexJob &job, - CreateIndexThreadPool &threads, - size_t sizeOfRepository) -{ - ObjectRepository &repo = neighborhoodGraph.objectSpace->getRepository(); - GraphRepository &anngRepo = neighborhoodGraph.repository; - size_t cnt = 0; - for (; id < repo.size(); id++) { - if (sizeOfRepository > 0 && id >= sizeOfRepository) { - break; - } - if (repo[id] == 0) { - continue; - } - if (neighborhoodGraph.NeighborhoodGraph::property.graphType != NeighborhoodGraph::GraphTypeBKNNG) { - if (id < anngRepo.size() && anngRepo[id] != 0) { - continue; - } - } - job.id = id; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - job.object = neighborhoodGraph.objectSpace->allocateObject(*repo[id]); -#else - job.object = repo[id]; -#endif - job.batchIdx = cnt; - threads.pushInputQueue(job); - cnt++; - if (cnt >= (size_t)neighborhoodGraph.NeighborhoodGraph::property.batchSizeForCreation) { - id++; - break; - } - } // for - return cnt; -} - -static void -insertMultipleSearchResults(GraphIndex &neighborhoodGraph, - CreateIndexThreadPool::OutputJobQueue &output, - size_t dataSize) -{ - // compute distances among all of the resultant objects - if (neighborhoodGraph.NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeANNG || - neighborhoodGraph.NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeIANNG || - neighborhoodGraph.NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeONNG || - neighborhoodGraph.NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeDNNG) { - // This processing occupies about 30% of total indexing time when batch size is 200. - // Only initial batch objects should be connected for each other. - // The number of nodes in the graph is checked to know whether the batch is initial. - //size_t size = NeighborhoodGraph::property.edgeSizeForCreation; - size_t size = neighborhoodGraph.NeighborhoodGraph::property.edgeSizeForCreation; - // add distances from a current object to subsequence objects to imitate of sequential insertion. - - sort(output.begin(), output.end()); // sort by batchIdx - - for (size_t idxi = 0; idxi < dataSize; idxi++) { - // add distances - ObjectDistances &objs = *output[idxi].results; - for (size_t idxj = 0; idxj < idxi; idxj++) { - ObjectDistance r; - r.distance = neighborhoodGraph.objectSpace->getComparator()(*output[idxi].object, *output[idxj].object); - r.id = output[idxj].id; - objs.push_back(r); - } - // sort and cut excess edges - std::sort(objs.begin(), objs.end()); - if (objs.size() > size) { - objs.resize(size); - } - } // for (size_t idxi .... - } // if (neighborhoodGraph.graphType == NeighborhoodGraph::GraphTypeUDNNG) - // insert resultant objects into the graph as edges - for (size_t i = 0; i < dataSize; i++) { - CreateIndexJob &gr = output[i]; - if ((*gr.results).size() == 0) { - } - if (static_cast(gr.id) > neighborhoodGraph.NeighborhoodGraph::property.edgeSizeForCreation && - static_cast(gr.results->size()) < neighborhoodGraph.NeighborhoodGraph::property.edgeSizeForCreation) { -// cerr << "createIndex: Warning. The specified number of edges could not be acquired, because the pruned parameter [-S] might be set." << endl; -// cerr << " The node id=" << gr.id << endl; -// cerr << " The number of edges for the node=" << gr.results->size() << endl; -// cerr << " The pruned parameter (edgeSizeForSearch [-S])=" << neighborhoodGraph.NeighborhoodGraph::property.edgeSizeForSearch << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("createIndex: Warning. The specified number of edges could not be acquired, because the pruned parameter [-S] might be set."); - (*NGT_LOG_DEBUG_)(" The node id=" + std::to_string(gr.id)); - (*NGT_LOG_DEBUG_)(" The number of edges for the node=" + std::to_string(gr.results->size())); - (*NGT_LOG_DEBUG_)(" The pruned parameter (edgeSizeForSearch [-S])=" + std::to_string(neighborhoodGraph.NeighborhoodGraph::property.edgeSizeForSearch)); - } - } - neighborhoodGraph.insertNode(gr.id, *gr.results); - } -} - -void -GraphIndex::createIndex(size_t threadPoolSize, size_t sizeOfRepository) -{ - if (NeighborhoodGraph::property.edgeSizeForCreation == 0) { - return; - } - if (threadPoolSize <= 1) { - createIndex(); - } else { - Timer timer; - size_t timerInterval = 100000; - size_t timerCount = timerInterval; - size_t count = 0; - timer.start(); - - size_t pathAdjustCount = property.pathAdjustmentInterval; - CreateIndexThreadPool threads(threadPoolSize); - CreateIndexSharedData sd(*this); - - threads.setSharedData(&sd); - threads.create(); - CreateIndexThreadPool::OutputJobQueue &output = threads.getOutputJobQueue(); - - BuildTimeController buildTimeController(*this, NeighborhoodGraph::property); - - try { - CreateIndexJob job; - NGT::ObjectID id = 1; - for (;;) { - // search for the nearest neighbors - size_t cnt = searchMultipleQueryForCreation(*this, id, job, threads, sizeOfRepository); - if (cnt == 0) { - break; - } - // wait for the completion of the search - threads.waitForFinish(); - if (output.size() != cnt) { -// cerr << "NNTGIndex::insertGraphIndexByThread: Warning!! Thread response size is wrong." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("NNTGIndex::insertGraphIndexByThread: Warning!! Thread response size is wrong."); - cnt = output.size(); - } - // insertion - insertMultipleSearchResults(*this, output, cnt); - - while (!output.empty()) { - delete output.front().results; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(output.front().object); -#endif - output.pop_front(); - } - - count += cnt; - if (timerCount <= count) { - timer.stop(); -// cerr << "Processed " << timerCount << " time= " << timer << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(timerCount )+ " time= " + std::to_string(timer.time)); - timerCount += timerInterval; - timer.start(); - } - buildTimeController.adjustEdgeSize(count); - if (pathAdjustCount > 0 && pathAdjustCount <= count) { - GraphReconstructor::adjustPathsEffectively(static_cast(*this)); - pathAdjustCount += property.pathAdjustmentInterval; - } - } - } catch(Exception &err) { - threads.terminate(); - throw err; - } - threads.terminate(); - } - -} - -void GraphIndex::setupPrefetch(NGT::Property &prop) { - assert(GraphIndex::objectSpace != 0); - prop.prefetchOffset = GraphIndex::objectSpace->setPrefetchOffset(prop.prefetchOffset); - prop.prefetchSize = GraphIndex::objectSpace->setPrefetchSize(prop.prefetchSize); -} - -bool -NGT::GraphIndex::showStatisticsOfGraph(NGT::GraphIndex &outGraph, char mode, size_t edgeSize) -{ - long double distance = 0.0; - size_t numberOfNodes = 0; - size_t numberOfOutdegree = 0; - size_t numberOfNodesWithoutEdges = 0; - size_t maxNumberOfOutdegree = 0; - size_t minNumberOfOutdegree = SIZE_MAX; - std::vector indegreeCount; - std::vector outdegreeHistogram; - std::vector indegreeHistogram; - std::vector > indegree; - NGT::GraphRepository &graph = outGraph.repository; - NGT::ObjectRepository &repo = outGraph.objectSpace->getRepository(); - indegreeCount.resize(graph.size(), 0); - indegree.resize(graph.size()); - size_t removedObjectCount = 0; - bool valid = true; - for (size_t id = 1; id < graph.size(); id++) { - if (repo[id] == 0) { - removedObjectCount++; - continue; - } - NGT::GraphNode *node = 0; - try { - node = outGraph.getNode(id); - } catch(NGT::Exception &err) { -// std::cerr << "ngt info: Error. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ngt info: Error. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - valid = false; - continue; - } - numberOfNodes++; - if (numberOfNodes % 1000000 == 0) { -// std::cerr << "Processed " << numberOfNodes << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(numberOfNodes)); - } - size_t esize = node->size() > edgeSize ? edgeSize : node->size(); - if (esize == 0) { - numberOfNodesWithoutEdges++; - } - if (esize > maxNumberOfOutdegree) { - maxNumberOfOutdegree = esize; - } - if (esize < minNumberOfOutdegree) { - minNumberOfOutdegree = esize; - } - if (outdegreeHistogram.size() <= esize) { - outdegreeHistogram.resize(esize + 1); - } - outdegreeHistogram[esize]++; - if (mode == 'e') { - std::cout << id << "," << esize << ": "; - } - for (size_t i = 0; i < esize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::ObjectDistance &n = (*node).at(i, graph.allocator); -#else - NGT::ObjectDistance &n = (*node)[i]; -#endif - if (n.id == 0) { -// std::cerr << "ngt info: Warning. id is zero." << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ngt info: Warning. id is zero."); - valid = false; - } - indegreeCount[n.id]++; - indegree[n.id].push_back(n.distance); - numberOfOutdegree++; - double d = n.distance; - if (mode == 'e') { - std::cout << n.id << ":" << d << " "; - } - distance += d; - } - if (mode == 'e') { - std::cout << std::endl; - } - } - - if (mode == 'a') { - size_t count = 0; - for (size_t id = 1; id < graph.size(); id++) { - if (repo[id] == 0) { - continue; - } - NGT::GraphNode *n = 0; - try { - n = outGraph.getNode(id); - } catch(NGT::Exception &err) { - continue; - } - NGT::GraphNode &node = *n; - for (size_t i = 0; i < node.size(); i++) { - NGT::GraphNode *nn = 0; - try { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - nn = outGraph.getNode(node.at(i, graph.allocator).id); -#else - nn = outGraph.getNode(node[i].id); -#endif - } catch(NGT::Exception &err) { - count++; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -// std::cerr << "Directed edge! " << id << "->" << node.at(i, graph.allocator).id << " no object. " -// << node.at(i, graph.allocator).id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Directed edge! " + std::to_string(id) + "->" + std::to_string(node.at(i, graph.allocator).id) + " no object. " - + std::to_string(node.at(i, graph.allocator).id)); -#else -// std::cerr << "Directed edge! " << id << "->" << node[i].id << " no object. " << node[i].id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Directed edge! " + std::to_string(id) + "->" + std::to_string(node[i].id) + " no object. " + std::to_string(node[i].id)); -#endif - continue; - } - NGT::GraphNode &nnode = *nn; - bool found = false; - for (size_t i = 0; i < nnode.size(); i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (nnode.at(i, graph.allocator).id == id) { -#else - if (nnode[i].id == id) { -#endif - found = true; - break; - } - } - if (!found) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -// std::cerr << "Directed edge! " << id << "->" << node.at(i, graph.allocator).id << " no edge. " -// << node.at(i, graph.allocator).id << "->" << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Directed edge! " + std::to_string(id) + "->" + std::to_string(node.at(i, graph.allocator).id) + " no edge. " - + std::to_string(node.at(i, graph.allocator).id) + "->" + std::to_string(id)); -#else - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Directed edge! " + std::to_string(id) + "->" + std::to_string(node[i].id) + " no edge. " + std::to_string(node[i].id) + "->" + std::to_string(id)); -// std::cerr << "Directed edge! " << id << "->" << node[i].id << " no edge. " << node[i].id << "->" << id << std::endl; -#endif - count++; - } - } - } -// std::cerr << "The number of directed edges=" << count << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("The number of directed edges=" + std::to_string(count)); - } - - // calculate outdegree distance 10 - size_t d10count = 0; - long double distance10 = 0.0; - size_t d10SkipCount = 0; - const size_t dcsize = 10; - for (size_t id = 1; id < graph.size(); id++) { - if (repo[id] == 0) { - continue; - } - NGT::GraphNode *n = 0; - try { - n = outGraph.getNode(id); - } catch(NGT::Exception &err) { -// std::cerr << "ngt info: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ngt info: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - NGT::GraphNode &node = *n; - if (node.size() < dcsize - 1) { - d10SkipCount++; - continue; - } - for (size_t i = 0; i < node.size(); i++) { - if (i >= dcsize) { - break; - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - distance10 += node.at(i, graph.allocator).distance; -#else - distance10 += node[i].distance; -#endif - d10count++; - } - } - distance10 /= (long double)d10count; - - // calculate indegree distance 10 - size_t ind10count = 0; - long double indegreeDistance10 = 0.0; - size_t ind10SkipCount = 0; - for (size_t id = 1; id < indegree.size(); id++) { - std::vector &node = indegree[id]; - if (node.size() < dcsize - 1) { - ind10SkipCount++; - continue; - } - std::sort(node.begin(), node.end()); - for (size_t i = 0; i < node.size(); i++) { - assert(i == 0 || node[i - 1] <= node[i]); - if (i >= dcsize) { - break; - } - indegreeDistance10 += node[i]; - ind10count++; - } - } - indegreeDistance10 /= (long double)ind10count; - - // calculate variance - double averageNumberOfOutdegree = (double)numberOfOutdegree / (double)numberOfNodes; - double sumOfSquareOfOutdegree = 0; - double sumOfSquareOfIndegree = 0; - for (size_t id = 1; id < graph.size(); id++) { - if (repo[id] == 0) { - continue; - } - NGT::GraphNode *node = 0; - try { - node = outGraph.getNode(id); - } catch(NGT::Exception &err) { -// std::cerr << "ngt info: Warning. Cannot get the node. ID=" << id << ":" << err.what() << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ngt info: Warning. Cannot get the node. ID=" + std::to_string(id) + ":" + err.what()); - continue; - } - size_t esize = node->size(); - sumOfSquareOfOutdegree += ((double)esize - averageNumberOfOutdegree) * ((double)esize - averageNumberOfOutdegree); - sumOfSquareOfIndegree += ((double)indegreeCount[id] - averageNumberOfOutdegree) * ((double)indegreeCount[id] - averageNumberOfOutdegree); - } - - size_t numberOfNodesWithoutIndegree = 0; - size_t maxNumberOfIndegree = 0; - size_t minNumberOfIndegree = INT64_MAX; - for (size_t id = 1; id < graph.size(); id++) { - if (graph[id] == 0) { - continue; - } - if (indegreeCount[id] == 0) { - numberOfNodesWithoutIndegree++; -// std::cerr << "Error! The node without incoming edges. " << id << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Error! The node without incoming edges. " + std::to_string(id)); - valid = false; - } - if (indegreeCount[id] > static_cast(maxNumberOfIndegree)) { - maxNumberOfIndegree = indegreeCount[id]; - } - if (indegreeCount[id] < static_cast(minNumberOfIndegree)) { - minNumberOfIndegree = indegreeCount[id]; - } - if (static_cast(indegreeHistogram.size()) <= indegreeCount[id]) { - indegreeHistogram.resize(indegreeCount[id] + 1); - } - indegreeHistogram[indegreeCount[id]]++; - } - - size_t count = 0; - int medianOutdegree = -1; - size_t modeOutdegree = 0; - size_t max = 0; - double c95 = 0.0; - double c99 = 0.0; - for (size_t i = 0; i < outdegreeHistogram.size(); i++) { - count += outdegreeHistogram[i]; - if (medianOutdegree == -1 && count >= numberOfNodes / 2) { - medianOutdegree = i; - } - if (max < outdegreeHistogram[i]) { - max = outdegreeHistogram[i]; - modeOutdegree = i; - } - if (count > numberOfNodes * 0.95) { - if (c95 == 0.0) { - c95 += i * (count - numberOfNodes * 0.95); - } else { - c95 += i * outdegreeHistogram[i]; - } - } - if (count > numberOfNodes * 0.99) { - if (c99 == 0.0) { - c99 += i * (count - numberOfNodes * 0.99); - } else { - c99 += i * outdegreeHistogram[i]; - } - } - } - c95 /= (double)numberOfNodes * 0.05; - c99 /= (double)numberOfNodes * 0.01; - - count = 0; - int medianIndegree = -1; - size_t modeIndegree = 0; - max = 0; - double c5 = 0.0; - double c1 = 0.0; - for (size_t i = 0; i < indegreeHistogram.size(); i++) { - if (count < numberOfNodes * 0.05) { - if (count + indegreeHistogram[i] >= numberOfNodes * 0.05) { - c5 += i * (numberOfNodes * 0.05 - count); - } else { - c5 += i * indegreeHistogram[i]; - } - } - if (count < numberOfNodes * 0.01) { - if (count + indegreeHistogram[i] >= numberOfNodes * 0.01) { - c1 += i * (numberOfNodes * 0.01 - count); - } else { - c1 += i * indegreeHistogram[i]; - } - } - count += indegreeHistogram[i]; - if (medianIndegree == -1 && count >= numberOfNodes / 2) { - medianIndegree = i; - } - if (max < indegreeHistogram[i]) { - max = indegreeHistogram[i]; - modeIndegree = i; - } - } - c5 /= (double)numberOfNodes * 0.05; - c1 /= (double)numberOfNodes * 0.01; - - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("The size of the object repository (not the number of the objects):\t" + std::to_string(repo.size() - 1)); - (*NGT_LOG_DEBUG_)("The number of the removed objects:\t" + std::to_string(removedObjectCount) + "/" + std::to_string(repo.size() - 1)); - (*NGT_LOG_DEBUG_)("The number of the nodes:\t" + std::to_string(numberOfNodes)); - (*NGT_LOG_DEBUG_)("The number of the edges:\t" + std::to_string(numberOfOutdegree)); - (*NGT_LOG_DEBUG_)("The mean of the edge lengths:\t" + std::to_string(distance / (double)numberOfOutdegree)); - (*NGT_LOG_DEBUG_)("The mean of the number of the edges per node:\t" + std::to_string((double)numberOfOutdegree / (double)numberOfNodes)); - (*NGT_LOG_DEBUG_)("The number of the nodes without edges:\t" + std::to_string(numberOfNodesWithoutEdges)); - (*NGT_LOG_DEBUG_)("The maximum of the outdegrees:\t" + std::to_string(maxNumberOfOutdegree)); - if (minNumberOfOutdegree == SIZE_MAX) { - (*NGT_LOG_DEBUG_)("The minimum of the outdegrees:\t-NA-"); - } else { - (*NGT_LOG_DEBUG_)("The minimum of the outdegrees:\t" + std::to_string(minNumberOfOutdegree)); - } - (*NGT_LOG_DEBUG_)("The number of the nodes where indegree is 0:\t" + std::to_string(numberOfNodesWithoutIndegree)); - (*NGT_LOG_DEBUG_)("The maximum of the indegrees:\t" + std::to_string(maxNumberOfIndegree)); - if (minNumberOfIndegree == INT64_MAX) { - (*NGT_LOG_DEBUG_)("The minimum of the indegrees:\t-NA-"); - } else { - (*NGT_LOG_DEBUG_)("The minimum of the indegrees:\t" + std::to_string(minNumberOfIndegree)); - } - (*NGT_LOG_DEBUG_)("#-nodes,#-edges,#-no-indegree,avg-edges,avg-dist,max-out,min-out,v-out,max-in,min-in,v-in,med-out," - "med-in,mode-out,mode-in,c95,c5,o-distance(10),o-skip,i-distance(10),i-skip:" - + std::to_string(numberOfNodes) + ":" + std::to_string(numberOfOutdegree) + ":" + std::to_string(numberOfNodesWithoutIndegree) + ":" - + std::to_string((double)numberOfOutdegree / (double)numberOfNodes) + ":" - + std::to_string(distance / (double)numberOfOutdegree) + ":" - + std::to_string(maxNumberOfOutdegree) + ":" + std::to_string(minNumberOfOutdegree) + ":" + std::to_string(sumOfSquareOfOutdegree / (double)numberOfOutdegree) + ":" - + std::to_string(maxNumberOfIndegree) + ":" + std::to_string(minNumberOfIndegree) + ":" + std::to_string(sumOfSquareOfIndegree / (double)numberOfOutdegree) + ":" - + std::to_string(medianOutdegree) + ":" + std::to_string(medianIndegree) + ":" + std::to_string(modeOutdegree) + ":" + std::to_string(modeIndegree) - + ":" + std::to_string(c95) + ":" + std::to_string(c5) + ":" + std::to_string(c99) + ":" + std::to_string(c1) + ":" + std::to_string(distance10) + ":" + std::to_string(d10SkipCount) + ":" - + std::to_string(indegreeDistance10) + ":" + std::to_string(ind10SkipCount)); - if (mode == 'h') { - (*NGT_LOG_DEBUG_)("#\tout\tin"); - for (size_t i = 0; i < outdegreeHistogram.size() || i < indegreeHistogram.size(); i++) { - size_t out = outdegreeHistogram.size() <= i ? 0 : outdegreeHistogram[i]; - size_t in = indegreeHistogram.size() <= i ? 0 : indegreeHistogram[i]; - (*NGT_LOG_DEBUG_)(std::to_string(i) + "\t" + std::to_string(out) + "\t" + std::to_string(in)); - } - } - } - /* - std::cerr << "The size of the object repository (not the number of the objects):\t" << repo.size() - 1 << std::endl; - std::cerr << "The number of the removed objects:\t" << removedObjectCount << "/" << repo.size() - 1 << std::endl; - std::cerr << "The number of the nodes:\t" << numberOfNodes << std::endl; - std::cerr << "The number of the edges:\t" << numberOfOutdegree << std::endl; - std::cerr << "The mean of the edge lengths:\t" << std::setprecision(10) << distance / (double)numberOfOutdegree << std::endl; - std::cerr << "The mean of the number of the edges per node:\t" << (double)numberOfOutdegree / (double)numberOfNodes << std::endl; - std::cerr << "The number of the nodes without edges:\t" << numberOfNodesWithoutEdges << std::endl; - std::cerr << "The maximum of the outdegrees:\t" << maxNumberOfOutdegree << std::endl; - if (minNumberOfOutdegree == SIZE_MAX) { - std::cerr << "The minimum of the outdegrees:\t-NA-" << std::endl; - } else { - std::cerr << "The minimum of the outdegrees:\t" << minNumberOfOutdegree << std::endl; - } - std::cerr << "The number of the nodes where indegree is 0:\t" << numberOfNodesWithoutIndegree << std::endl; - std::cerr << "The maximum of the indegrees:\t" << maxNumberOfIndegree << std::endl; - if (minNumberOfIndegree == INT64_MAX) { - std::cerr << "The minimum of the indegrees:\t-NA-" << std::endl; - } else { - std::cerr << "The minimum of the indegrees:\t" << minNumberOfIndegree << std::endl; - } - std::cerr << "#-nodes,#-edges,#-no-indegree,avg-edges,avg-dist,max-out,min-out,v-out,max-in,min-in,v-in,med-out," - "med-in,mode-out,mode-in,c95,c5,o-distance(10),o-skip,i-distance(10),i-skip:" - << numberOfNodes << ":" << numberOfOutdegree << ":" << numberOfNodesWithoutIndegree << ":" - << std::setprecision(10) << (double)numberOfOutdegree / (double)numberOfNodes << ":" - << distance / (double)numberOfOutdegree << ":" - << maxNumberOfOutdegree << ":" << minNumberOfOutdegree << ":" << sumOfSquareOfOutdegree / (double)numberOfOutdegree<< ":" - << maxNumberOfIndegree << ":" << minNumberOfIndegree << ":" << sumOfSquareOfIndegree / (double)numberOfOutdegree << ":" - << medianOutdegree << ":" << medianIndegree << ":" << modeOutdegree << ":" << modeIndegree - << ":" << c95 << ":" << c5 << ":" << c99 << ":" << c1 << ":" << distance10 << ":" << d10SkipCount << ":" - << indegreeDistance10 << ":" << ind10SkipCount << std::endl; - if (mode == 'h') { - std::cerr << "#\tout\tin" << std::endl; - for (size_t i = 0; i < outdegreeHistogram.size() || i < indegreeHistogram.size(); i++) { - size_t out = outdegreeHistogram.size() <= i ? 0 : outdegreeHistogram[i]; - size_t in = indegreeHistogram.size() <= i ? 0 : indegreeHistogram[i]; - std::cerr << i << "\t" << out << "\t" << in << std::endl; - } - } - */ - return valid; -} - - -void -GraphAndTreeIndex::createIndex(size_t threadPoolSize, size_t sizeOfRepository) -{ - assert(threadPoolSize > 0); - - if (NeighborhoodGraph::property.edgeSizeForCreation == 0) { - return; - } - - Timer timer; - size_t timerInterval = 100000; - size_t timerCount = timerInterval; - size_t count = 0; - timer.start(); - - size_t pathAdjustCount = property.pathAdjustmentInterval; - CreateIndexThreadPool threads(threadPoolSize); - - CreateIndexSharedData sd(*this); - - threads.setSharedData(&sd); - threads.create(); - CreateIndexThreadPool::OutputJobQueue &output = threads.getOutputJobQueue(); - - BuildTimeController buildTimeController(*this, NeighborhoodGraph::property); - - try { - CreateIndexJob job; - NGT::ObjectID id = 1; - for (;;) { - size_t cnt = searchMultipleQueryForCreation(*this, id, job, threads, sizeOfRepository); - - if (cnt == 0) { - break; - } - threads.waitForFinish(); - - if (output.size() != cnt) { -// cerr << "NNTGIndex::insertGraphIndexByThread: Warning!! Thread response size is wrong." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("NNTGIndex::insertGraphIndexByThread: Warning!! Thread response size is wrong."); - cnt = output.size(); - } - - insertMultipleSearchResults(*this, output, cnt); - - for (size_t i = 0; i < cnt; i++) { - CreateIndexJob &job = output[i]; - if (((job.results->size() > 0) && ((*job.results)[0].distance != 0.0)) || - (job.results->size() == 0)) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object *f = GraphIndex::objectSpace->allocateObject(*job.object); - DVPTree::InsertContainer tiobj(*f, job.id); -#else - DVPTree::InsertContainer tiobj(*job.object, job.id); -#endif - try { - DVPTree::insert(tiobj); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(f); -#endif - } catch (Exception &err) { -// cerr << "NGT::createIndex: Fatal error. ID=" << job.id << ":"; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("NGT::createIndex: Fatal error. ID=" + std::to_string(job.id) + ":"); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(f); -#endif - if (NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeKNNG) { -// cerr << err.what() << " continue.." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(std::string(err.what()) + " continue.."); - } else { - throw err; - } - } - } - } // for - - while (!output.empty()) { - delete output.front().results; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(output.front().object); -#endif - output.pop_front(); - } - - count += cnt; - if (timerCount <= count) { - timer.stop(); -// cerr << "Processed " << timerCount << " objects. time= " << timer << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)( - "Processed " + std::to_string(timerCount) + " objects. time= " + std::to_string(timer.time)); - } - timerCount += timerInterval; - timer.start(); - } - buildTimeController.adjustEdgeSize(count); - if (pathAdjustCount > 0 && pathAdjustCount <= count) { - GraphReconstructor::adjustPathsEffectively(static_cast(*this)); - pathAdjustCount += property.pathAdjustmentInterval; - } - } - } catch(Exception &err) { - threads.terminate(); - throw err; - } - threads.terminate(); -} - - -void -GraphAndTreeIndex::createIndex(const vector > &objects, - vector &ids, - double range, size_t threadPoolSize) -{ - Timer timer; - size_t timerInterval = 100000; - size_t timerCount = timerInterval; - size_t count = 0; - timer.start(); - if (threadPoolSize <= 0) { -// cerr << "Not implemented!!" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Not implemented!!"); - abort(); - } else { - CreateIndexThreadPool threads(threadPoolSize); - CreateIndexSharedData sd(*this); - threads.setSharedData(&sd); - threads.create(); - CreateIndexThreadPool::OutputJobQueue &output = threads.getOutputJobQueue(); - try { - CreateIndexJob job; - size_t idx = 0; - for (;;) { - size_t cnt = 0; - { - for (; idx < objects.size(); idx++) { - if (objects[idx].first == 0) { - ids.push_back(InsertionResult()); - continue; - } - job.id = 0; - job.results = 0; - job.object = objects[idx].first; - job.batchIdx = ids.size(); - // insert an empty entry to prepare. - ids.push_back(InsertionResult(job.id, false, 0.0)); - threads.pushInputQueue(job); - cnt++; - if (cnt >= (size_t)NeighborhoodGraph::property.batchSizeForCreation) { - idx++; - break; - } - } - } - if (cnt == 0) { - break; - } - threads.waitForFinish(); - if (output.size() != cnt) { -// cerr << "NNTGIndex::insertGraphIndexByThread: Warning!! Thread response size is wrong." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("NNTGIndex::insertGraphIndexByThread: Warning!! Thread response size is wrong."); - cnt = output.size(); - } - { - // This processing occupies about 30% of total indexing time when batch size is 200. - // Only initial batch objects should be connected for each other. - // The number of nodes in the graph is checked to know whether the batch is initial. - size_t size = NeighborhoodGraph::property.edgeSizeForCreation; - // add distances from a current object to subsequence objects to imitate of sequential insertion. - - sort(output.begin(), output.end()); - for (size_t idxi = 0; idxi < cnt; idxi++) { - // add distances - ObjectDistances &objs = *output[idxi].results; - for (size_t idxj = 0; idxj < idxi; idxj++) { - if (output[idxj].id == 0) { - // unregistered object - continue; - } - ObjectDistance r; - r.distance = GraphIndex::objectSpace->getComparator()(*output[idxi].object, *output[idxj].object); - r.id = output[idxj].id; - objs.push_back(r); - } - // sort and cut excess edges - std::sort(objs.begin(), objs.end()); - if (objs.size() > size) { - objs.resize(size); - } - if ((objs.size() > 0) && (range < 0.0 || ((double)objs[0].distance <= range + FLT_EPSILON))) { - // The line below was replaced by the line above to consider EPSILON for float comparison. 170702 - // if ((objs.size() > 0) && (range < 0.0 || (objs[0].distance <= range))) { - // An identical or similar object already exits - ids[output[idxi].batchIdx].identical = true; - ids[output[idxi].batchIdx].id = objs[0].id; - ids[output[idxi].batchIdx].distance = objs[0].distance; - output[idxi].id = 0; - } else { - assert(output[idxi].id == 0); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - PersistentObject *obj = GraphIndex::objectSpace->allocatePersistentObject(*output[idxi].object); - output[idxi].id = GraphIndex::objectSpace->insert(obj); -#else - output[idxi].id = GraphIndex::objectSpace->insert(output[idxi].object); -#endif - ids[output[idxi].batchIdx].id = output[idxi].id; - } - } - } - // insert resultant objects into the graph as edges - for (size_t i = 0; i < cnt; i++) { - CreateIndexJob &job = output.front(); - if (job.id != 0) { - if (property.indexType == NGT::Property::GraphAndTree) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object *f = GraphIndex::objectSpace->allocateObject(*job.object); - DVPTree::InsertContainer tiobj(*f, job.id); -#else - DVPTree::InsertContainer tiobj(*job.object, job.id); -#endif - try { - DVPTree::insert(tiobj); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(f); -#endif - } catch (Exception &err) { -// cerr << "NGT::createIndex: Fatal error. ID=" << job.id << ":" << err.what(); - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("NGT::createIndex: Fatal error. ID=" + std::to_string(job.id) + ":" + err.what()); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(f); -#endif - if (NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeKNNG) { -// cerr << err.what() << " continue.." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(std::string(err.what()) + " continue.."); - } else { - throw err; - } - } - } - if (((*job.results).size() == 0) && (job.id != 1)) { -// cerr << "insert warning!! No searched nodes!. If the first time, no problem. " << job.id << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("insert warning!! No searched nodes!. If the first time, no problem. " + std::to_string(job.id)); - } - GraphIndex::insertNode(job.id, *job.results); - } - if (job.results != 0) { - delete job.results; - } - output.pop_front(); - } - - count += cnt; - if (timerCount <= count) { - timer.stop(); -// cerr << "Processed " << timerCount << " time= " << timer << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Processed " + std::to_string(timerCount) + " time= " + std::to_string(timer.time)); - timerCount += timerInterval; - timer.start(); - } - } - } catch(Exception &err) { -// cerr << "thread terminate!" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("thread terminate!"); - threads.terminate(); - throw err; - } - threads.terminate(); - } -} - -static bool -findPathAmongIdenticalObjects(GraphAndTreeIndex &graph, size_t srcid, size_t dstid) { - stack nodes; - unordered_set done; - nodes.push(srcid); - while (!nodes.empty()) { - auto tid = nodes.top(); - nodes.pop(); - done.insert(tid); - GraphNode &node = *graph.GraphIndex::getNode(tid); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - for (auto i = node.begin(graph.repository.allocator); i != node.end(graph.GraphIndex::repository.allocator); ++i) { -#else - for (auto i = node.begin(); i != node.end(); ++i) { -#endif - if ((*i).distance != 0.0) { - break; - } - if ((*i).id == dstid) { - return true; - } - if (done.count((*i).id) == 0) { - nodes.push((*i).id); - } - } - } - return false; -} - -bool -GraphAndTreeIndex::verify(vector &status, bool info, char mode) { - bool valid = GraphIndex::verify(status, info); - if (!valid) { -// cerr << "The graph or object is invalid!" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("The graph or object is invalid!"); - } - bool treeValid = DVPTree::verify(GraphIndex::objectSpace->getRepository().size(), status); - if (!treeValid) { -// cerr << "The tree is invalid" << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("The tree is invalid"); - } - valid = valid && treeValid; - // status: tree|graph|object -// cerr << "Started checking consistency..." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Started checking consistency..."); - for (size_t id = 1; id < status.size(); id++) { - if (id % 100000 == 0) { -// cerr << "The number of processed objects=" << id << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("The number of processed objects=" + std::to_string(id)); - } - if (status[id] != 0x00 && status[id] != 0x07) { - if (status[id] == 0x03) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - NGT::Object *po = GraphIndex::objectSpace->allocateObject(*GraphIndex::getObjectRepository().get(id)); - NGT::SearchContainer sc(*po); -#else - NGT::SearchContainer sc(*GraphIndex::getObjectRepository().get(id)); -#endif - NGT::ObjectDistances objects; - sc.setResults(&objects); - sc.id = 0; - sc.radius = 0.0; - sc.explorationCoefficient = 1.1; - sc.edgeSize = 0; - ObjectDistances seeds; - seeds.push_back(ObjectDistance(id, 0.0)); - objects.clear(); - try { - GraphIndex::search(sc, seeds); - } catch(Exception &err) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(po); -#endif -// cerr << "Fatal Error!: Cannot search! " << err.what() << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Fatal Error!: Cannot search! " + std::string(err.what())); - objects.clear(); - } - size_t n = 0; - bool registeredIdenticalObject = false; - for (; n < objects.size(); n++) { - if (objects[n].id != id && status[objects[n].id] == 0x07) { - registeredIdenticalObject = true; - break; - } - } - if (!registeredIdenticalObject) { - if (info) { -// cerr << "info: not found the registered same objects. id=" << id << " size=" << objects.size() << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("info: not found the registered same objects. id=" + std::to_string(id) + " size=" + std::to_string(objects.size())); - } - sc.id = 0; - sc.radius = FLT_MAX; - sc.explorationCoefficient = 1.2; - sc.edgeSize = 0; - sc.size = objects.size() < 100 ? 100 : objects.size() * 2; - ObjectDistances seeds; - seeds.push_back(ObjectDistance(id, 0.0)); - objects.clear(); - try { - GraphIndex::search(sc, seeds); - } catch(Exception &err) { -// cerr << "Fatal Error!: Cannot search! " << err.what() << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Fatal Error!: Cannot search! " + std::string(err.what())); - } - objects.clear(); - } - registeredIdenticalObject = false; - for (n = 0; n < objects.size(); n++) { - if (objects[n].distance != 0.0) break; - if (objects[n].id != id && status[objects[n].id] == 0x07) { - registeredIdenticalObject = true; - if (info) { -// cerr << "info: found by using mode accurate search. " << objects[n].id << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("info: found by using mode accurate search. " + std::to_string(objects[n].id)); - } - break; - } - } - } - if (!registeredIdenticalObject && mode != 's') { - if (info) { -// cerr << "info: not found by using more accurate search." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("info: not found by using more accurate search."); - } - sc.id = 0; - sc.radius = 0.0; - sc.explorationCoefficient = 1.1; - sc.edgeSize = 0; - sc.size = SIZE_MAX; - objects.clear(); - linearSearch(sc); - n = 0; - registeredIdenticalObject = false; - for (; n < objects.size(); n++) { - if (objects[n].distance != 0.0) break; - if (objects[n].id != id && status[objects[n].id] == 0x07) { - registeredIdenticalObject = true; - if (info) { -// cerr << "info: found by using linear search. " << objects[n].id << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("info: found by using linear search. " + std::to_string(objects[n].id)); - } - break; - } - } - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(po); -#endif - if (registeredIdenticalObject) { - if (info) { -// cerr << "Info ID=" << id << ":" << static_cast(status[id]) << endl; -// cerr << " found the valid same objects. " << objects[n].id << endl; - if (NGT_LOG_DEBUG_){ - (*NGT_LOG_DEBUG_)("Info ID=" + std::to_string(id) + ":" + std::to_string(static_cast(status[id]))); - (*NGT_LOG_DEBUG_)(" found the valid same objects. " + std::to_string(objects[n].id)); - } - } - GraphNode &fromNode = *GraphIndex::getNode(id); - bool fromFound = false; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - for (auto i = fromNode.begin(GraphIndex::repository.allocator); i != fromNode.end(GraphIndex::repository.allocator); ++i) { -#else - for (auto i = fromNode.begin(); i != fromNode.end(); ++i) { -#endif - if ((*i).id == objects[n].id) { - fromFound = true; - } - } - GraphNode &toNode = *GraphIndex::getNode(objects[n].id); - bool toFound = false; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - for (auto i = toNode.begin(GraphIndex::repository.allocator); i != toNode.end(GraphIndex::repository.allocator); ++i) { -#else - for (auto i = toNode.begin(); i != toNode.end(); ++i) { -#endif - if ((*i).id == id) { - toFound = true; - } - } - if (!fromFound || !toFound) { - if (info) { - if (!fromFound && !toFound) { -// cerr << "Warning no undirected edge between " << id << "(" << fromNode.size() << ") and " -// << objects[n].id << "(" << toNode.size() << ")." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning no undirected edge between " + std::to_string(id) + "(" + std::to_string(fromNode.size()) + ") and " - + std::to_string(objects[n].id) + "(" + std::to_string(toNode.size()) + ")."); - } else if (!fromFound) { -// cerr << "Warning no directed edge from " << id << "(" << fromNode.size() << ") to " -// << objects[n].id << "(" << toNode.size() << ")." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning no directed edge from " + std::to_string(id) + "(" + std::to_string(fromNode.size()) + ") to " - + std::to_string(objects[n].id) + "(" + std::to_string(toNode.size()) + ")."); - } else if (!toFound) { -// cerr << "Warning no reverse directed edge from " << id << "(" << fromNode.size() << ") to " -// << objects[n].id << "(" << toNode.size() << ")." << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning no reverse directed edge from " + std::to_string(id) + "(" + std::to_string(fromNode.size()) + ") to " - + std::to_string(objects[n].id) + "(" + std::to_string(toNode.size()) + ")."); - } - } - if (!findPathAmongIdenticalObjects(*this, id, objects[n].id)) { -// cerr << "Warning no path from " << id << " to " << objects[n].id << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning no path from " + std::to_string(id) + " to " + std::to_string(objects[n].id)); - } - if (!findPathAmongIdenticalObjects(*this, objects[n].id, id)) { -// cerr << "Warning no reverse path from " << id << " to " << objects[n].id << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Warning no reverse path from " + std::to_string(id) + " to " + std::to_string(objects[n].id)); - } - } - } else { - if (mode == 's') { -// cerr << "Warning: not found the valid same object, but not try to use linear search." << endl; -// cerr << "Error! ID=" << id << ":" << static_cast(status[id]) << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Warning: not found the valid same object, but not try to use linear search."); - (*NGT_LOG_DEBUG_)("Error! ID=" + std::to_string(id) + ":" + std::to_string(static_cast(status[id]))); - } - } else { -// cerr << "Warning: not found the valid same object even by using linear search." << endl; -// cerr << "Error! ID=" << id << ":" << static_cast(status[id]) << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Warning: not found the valid same object even by using linear search."); - (*NGT_LOG_DEBUG_)("Error! ID=" + std::to_string(id) + ":" + std::to_string(static_cast(status[id]))); - } - valid = false; - } - } - } else if (status[id] == 0x01) { - if (info) { -// cerr << "Warning! ID=" << id << ":" << static_cast(status[id]) << endl; -// cerr << " not inserted into the indexes" << endl; - if (NGT_LOG_DEBUG_) { - (*NGT_LOG_DEBUG_)("Warning! ID=" + std::to_string(id) + ":" + std::to_string(static_cast(status[id]))); - (*NGT_LOG_DEBUG_)(" not inserted into the indexes"); - } - } - } else { -// cerr << "Error! ID=" << id << ":" << static_cast(status[id]) << endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Error! ID=" + std::to_string(id) + ":" + std::to_string(static_cast(status[id]))); - valid = false; - } - } - } - return valid; -} - - - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Index.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Index.h deleted file mode 100644 index 12556632fb..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Index.h +++ /dev/null @@ -1,2358 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "NGT/Common.h" -#include "NGT/Graph.h" -#include "NGT/Thread.h" -#include "NGT/Tree.h" -#include "NGT/defines.h" - -#include "NGT/GetCoreNumber.h" - - -namespace NGT -{ -class Property; - -class Index -{ -public: - class Property - { - public: - typedef ObjectSpace::ObjectType ObjectType; - typedef ObjectSpace::DistanceType DistanceType; - typedef NeighborhoodGraph::SeedType SeedType; - typedef NeighborhoodGraph::GraphType GraphType; - enum ObjectAlignment - { - ObjectAlignmentNone = 0, - ObjectAlignmentTrue = 1, - ObjectAlignmentFalse = 2 - }; - enum IndexType - { - IndexTypeNone = 0, - GraphAndTree = 1, - Graph = 2 - }; - enum DatabaseType - { - DatabaseTypeNone = 0, - Memory = 1, - MemoryMappedFile = 2 - }; - Property() { setDefault(); } - void setDefault() - { - dimension = 0; - threadPoolSize = 32; - objectType = ObjectSpace::ObjectType::Float; - distanceType = DistanceType::DistanceTypeL2; - indexType = IndexType::GraphAndTree; - objectAlignment = ObjectAlignment::ObjectAlignmentFalse; - pathAdjustmentInterval = 0; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - databaseType = DatabaseType::MemoryMappedFile; - graphSharedMemorySize = 512; // MB - treeSharedMemorySize = 512; // MB - objectSharedMemorySize = 512; // MB 512 is up to 50M objects. -#else - databaseType = DatabaseType::Memory; -#endif - prefetchOffset = 0; - prefetchSize = 0; - } - void clear() - { - dimension = -1; - threadPoolSize = -1; - objectType = ObjectSpace::ObjectTypeNone; - distanceType = DistanceType::DistanceTypeNone; - indexType = IndexTypeNone; - databaseType = DatabaseTypeNone; - objectAlignment = ObjectAlignment::ObjectAlignmentNone; - pathAdjustmentInterval = -1; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - graphSharedMemorySize = -1; - treeSharedMemorySize = -1; - objectSharedMemorySize = -1; -#endif - prefetchOffset = -1; - prefetchSize = -1; - accuracyTable = ""; - } - - void exportProperty(NGT::PropertySet & p) - { - p.set("Dimension", dimension); - p.set("ThreadPoolSize", threadPoolSize); - switch (objectType) - { - case ObjectSpace::ObjectType::Uint8: - p.set("ObjectType", "Integer-1"); - break; - case ObjectSpace::ObjectType::Float: - p.set("ObjectType", "Float-4"); - break; - default: - std::cerr << "Fatal error. Invalid object type. " << objectType << std::endl; - abort(); - } - switch (distanceType) - { - case DistanceType::DistanceTypeNone: - p.set("DistanceType", "None"); - break; - case DistanceType::DistanceTypeL1: - p.set("DistanceType", "L1"); - break; - case DistanceType::DistanceTypeL2: - p.set("DistanceType", "L2"); - break; - case DistanceType::DistanceTypeIP: - p.set("DistanceType", "IP"); - break; - case DistanceType::DistanceTypeHamming: - p.set("DistanceType", "Hamming"); - break; - case DistanceType::DistanceTypeJaccard: - p.set("DistanceType", "Jaccard"); - break; - case DistanceType::DistanceTypeSparseJaccard: - p.set("DistanceType", "SparseJaccard"); - break; - case DistanceType::DistanceTypeAngle: - p.set("DistanceType", "Angle"); - break; - case DistanceType::DistanceTypeCosine: - p.set("DistanceType", "Cosine"); - break; - case DistanceType::DistanceTypeNormalizedAngle: - p.set("DistanceType", "NormalizedAngle"); - break; - case DistanceType::DistanceTypeNormalizedCosine: - p.set("DistanceType", "NormalizedCosine"); - break; - default: - std::cerr << "Fatal error. Invalid distance type. " << distanceType << std::endl; - abort(); - } - switch (indexType) - { - case IndexType::GraphAndTree: - p.set("IndexType", "GraphAndTree"); - break; - case IndexType::Graph: - p.set("IndexType", "Graph"); - break; - default: - std::cerr << "Fatal error. Invalid index type. " << indexType << std::endl; - abort(); - } - switch (databaseType) - { - case DatabaseType::Memory: - p.set("DatabaseType", "Memory"); - break; - case DatabaseType::MemoryMappedFile: - p.set("DatabaseType", "MemoryMappedFile"); - break; - default: - std::cerr << "Fatal error. Invalid database type. " << databaseType << std::endl; - abort(); - } - switch (objectAlignment) - { - case ObjectAlignment::ObjectAlignmentNone: - p.set("ObjectAlignment", "None"); - break; - case ObjectAlignment::ObjectAlignmentTrue: - p.set("ObjectAlignment", "True"); - break; - case ObjectAlignment::ObjectAlignmentFalse: - p.set("ObjectAlignment", "False"); - break; - default: - std::cerr << "Fatal error. Invalid objectAlignment. " << objectAlignment << std::endl; - abort(); - } - p.set("PathAdjustmentInterval", pathAdjustmentInterval); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - p.set("GraphSharedMemorySize", graphSharedMemorySize); - p.set("TreeSharedMemorySize", treeSharedMemorySize); - p.set("ObjectSharedMemorySize", objectSharedMemorySize); -#endif - p.set("PrefetchOffset", prefetchOffset); - p.set("PrefetchSize", prefetchSize); - p.set("AccuracyTable", accuracyTable); - } - - void importProperty(NGT::PropertySet & p) - { - setDefault(); - dimension = p.getl("Dimension", dimension); - threadPoolSize = p.getl("ThreadPoolSize", threadPoolSize); - PropertySet::iterator it = p.find("ObjectType"); - if (it != p.end()) - { - if (it->second == "Float-4") - { - objectType = ObjectSpace::ObjectType::Float; - } - else if (it->second == "Integer-1") - { - objectType = ObjectSpace::ObjectType::Uint8; - } - else - { - std::cerr << "Invalid Object Type in the property. " << it->first << ":" << it->second << std::endl; - } - } - else - { - std::cerr << "Not found \"ObjectType\"" << std::endl; - } - it = p.find("DistanceType"); - if (it != p.end()) - { - if (it->second == "None") - { - distanceType = DistanceType::DistanceTypeNone; - } - else if (it->second == "L1") - { - distanceType = DistanceType::DistanceTypeL1; - } - else if (it->second == "L2") - { - distanceType = DistanceType::DistanceTypeL2; - } - else if (it->second == "IP") - { - distanceType = DistanceType::DistanceTypeIP; - } - else if (it->second == "Hamming") - { - distanceType = DistanceType::DistanceTypeHamming; - } - else if (it->second == "Jaccard") - { - distanceType = DistanceType::DistanceTypeJaccard; - } - else if (it->second == "SparseJaccard") - { - distanceType = DistanceType::DistanceTypeSparseJaccard; - } - else if (it->second == "Angle") - { - distanceType = DistanceType::DistanceTypeAngle; - } - else if (it->second == "Cosine") - { - distanceType = DistanceType::DistanceTypeCosine; - } - else if (it->second == "NormalizedAngle") - { - distanceType = DistanceType::DistanceTypeNormalizedAngle; - } - else if (it->second == "NormalizedCosine") - { - distanceType = DistanceType::DistanceTypeNormalizedCosine; - } - else - { - std::cerr << "Invalid Distance Type in the property. " << it->first << ":" << it->second << std::endl; - } - } - else - { - std::cerr << "Not found \"DistanceType\"" << std::endl; - } - it = p.find("IndexType"); - if (it != p.end()) - { - if (it->second == "GraphAndTree") - { - indexType = IndexType::GraphAndTree; - } - else if (it->second == "Graph") - { - indexType = IndexType::Graph; - } - else - { - std::cerr << "Invalid Index Type in the property. " << it->first << ":" << it->second << std::endl; - } - } - else - { - std::cerr << "Not found \"IndexType\"" << std::endl; - } - it = p.find("DatabaseType"); - if (it != p.end()) - { - if (it->second == "Memory") - { - databaseType = DatabaseType::Memory; - } - else if (it->second == "MemoryMappedFile") - { - databaseType = DatabaseType::MemoryMappedFile; - } - else - { - std::cerr << "Invalid Database Type in the property. " << it->first << ":" << it->second << std::endl; - } - } - else - { - std::cerr << "Not found \"DatabaseType\"" << std::endl; - } - it = p.find("ObjectAlignment"); - if (it != p.end()) - { - if (it->second == "None") - { - objectAlignment = ObjectAlignment::ObjectAlignmentNone; - } - else if (it->second == "True") - { - objectAlignment = ObjectAlignment::ObjectAlignmentTrue; - } - else if (it->second == "False") - { - objectAlignment = ObjectAlignment::ObjectAlignmentFalse; - } - else - { - std::cerr << "Invalid Object Alignment in the property. " << it->first << ":" << it->second << std::endl; - } - } - else - { - std::cerr << "Not found \"ObjectAlignment\"" << std::endl; - objectAlignment = ObjectAlignment::ObjectAlignmentFalse; - } - pathAdjustmentInterval = p.getl("PathAdjustmentInterval", pathAdjustmentInterval); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - graphSharedMemorySize = p.getl("GraphSharedMemorySize", graphSharedMemorySize); - treeSharedMemorySize = p.getl("TreeSharedMemorySize", treeSharedMemorySize); - objectSharedMemorySize = p.getl("ObjectSharedMemorySize", objectSharedMemorySize); -#endif - prefetchOffset = p.getl("PrefetchOffset", prefetchOffset); - prefetchSize = p.getl("PrefetchSize", prefetchSize); - it = p.find("AccuracyTable"); - if (it != p.end()) - { - accuracyTable = it->second; - } - it = p.find("SearchType"); - if (it != p.end()) - { - searchType = it->second; - } - } - - void set(NGT::Property & prop); - void get(NGT::Property & prop); - int64_t memSize() { return sizeof(*this); } - int dimension; - int threadPoolSize; - ObjectSpace::ObjectType objectType; - DistanceType distanceType; - IndexType indexType; - DatabaseType databaseType; - ObjectAlignment objectAlignment; - int pathAdjustmentInterval; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - int graphSharedMemorySize; - int treeSharedMemorySize; - int objectSharedMemorySize; -#endif - int prefetchOffset; - int prefetchSize; - std::string accuracyTable; - std::string searchType; // test - }; - - class InsertionResult - { - public: - InsertionResult() : id(0), identical(false), distance(0.0) {} - InsertionResult(size_t i, bool tf, Distance d) : id(i), identical(tf), distance(d) {} - int64_t memSize() { return sizeof(*this); } - size_t id; - bool identical; - Distance distance; // the distance between the centroid and the inserted object. - }; - - class AccuracyTable - { - public: - AccuracyTable(){}; - AccuracyTable(std::vector> & t) { set(t); } - AccuracyTable(std::string str) { set(str); } - void set(std::vector> & t) { table = t; } - int64_t memSize() { return sizeof(*this) + table.capacity() * sizeof(table[0]); } - void set(std::string str) - { - std::vector tokens; - Common::tokenize(str, tokens, ","); - if (tokens.size() < 2) - { - return; - } - for (auto i = tokens.begin(); i != tokens.end(); ++i) - { - std::vector ts; - Common::tokenize(*i, ts, ":"); - if (ts.size() != 2) - { - std::stringstream msg; - msg << "AccuracyTable: Invalid accuracy table string " << *i << ":" << str; - NGTThrowException(msg); - } - table.push_back(std::make_pair(Common::strtod(ts[0]), Common::strtod(ts[1]))); - } - } - - float getEpsilon(double accuracy) - { - if (table.size() <= 2) - { - std::stringstream msg; - msg << "AccuracyTable: The accuracy table is not set yet. The table size=" << table.size(); - NGTThrowException(msg); - } - if (accuracy > 1.0) - { - accuracy = 1.0; - } - std::pair lower, upper; - { - auto i = table.begin(); - for (; i != table.end(); ++i) - { - if ((*i).second >= accuracy) - { - break; - } - } - if (table.end() == i) - { - i -= 2; - } - else if (table.begin() != i) - { - i--; - } - lower = *i++; - upper = *i; - } - float e = lower.first + (upper.first - lower.first) * (accuracy - lower.second) / (upper.second - lower.second); - if (e < -0.9) - { - e = -0.9; - } - return e; - } - - std::string getString() - { - std::stringstream str; - for (auto i = table.begin(); i != table.end(); ++i) - { - str << (*i).first << ":" << (*i).second; - if (i + 1 != table.end()) - { - str << ","; - } - } - return str.str(); - } - std::vector> table; - }; - - Index() : index(0) {} -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Index(NGT::Property & prop, const std::string & database); -#else - Index(NGT::Property & prop); -#endif - Index(const std::string & database, bool rdOnly = false) : index(0) { open(database, rdOnly); } - Index(const std::string & database, NGT::Property & prop) : index(0) { open(database, prop); } - virtual ~Index() { close(); } - - void open(const std::string & database, NGT::Property & prop) - { - open(database); - setProperty(prop); - } - void open(const std::string & database, bool rdOnly = false); - - void close() - { - if (index != 0) - { - delete index; - index = 0; - } - path.clear(); - } - void save() - { - if (path.empty()) - { - NGTThrowException("NGT::Index::saveIndex: path is empty"); - } - saveIndex(path); - } -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - void save(std::string indexPath) { saveIndex(indexPath); } -#endif - static void mkdir(const std::string & dir) - { - if (::mkdir(dir.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) != 0) - { - std::stringstream msg; - msg << "NGT::Index::mkdir: Cannot make the specified directory. " << dir; - NGTThrowException(msg); - } - } - static void create(const std::string & database, NGT::Property & prop, bool redirect = false) - { - createGraphAndTree(database, prop, redirect); - } - // For milvus - static NGT::Index * createGraphAndTree(const float * row_data, NGT::Property & prop, size_t dataSize); - static NGT::Index * loadIndex(std::stringstream & obj, std::stringstream & grp, std::stringstream & prf, std::stringstream & tre); - static void createGraphAndTree( - const std::string & database, NGT::Property & prop, const std::string & dataFile, size_t dataSize = 0, bool redirect = false); - static void createGraphAndTree(const std::string & database, NGT::Property & prop, bool redirect = false) - { - createGraphAndTree(database, prop, "", redirect); - } - // For milvus - static NGT::Index * createGraph(const float * row_data, NGT::Property & prop, size_t dataSize); - static void createGraph( - const std::string & database, NGT::Property & prop, const std::string & dataFile, size_t dataSize = 0, bool redirect = false); - template - size_t insert(const std::vector & object); - template - size_t append(const std::vector & object); - // For milvus - static void append(Index * index_, const float * data, size_t dataSize, size_t threadSize); - static void append(const std::string & database, const std::string & dataFile, size_t threadSize, size_t dataSize); - static void append(const std::string & database, const float * data, size_t dataSize, size_t threadSize); - static void remove(const std::string & database, std::vector & objects, bool force = false); - static void exportIndex(const std::string & database, const std::string & file); - static void importIndex(const std::string & database, const std::string & file); - // For milvus - virtual void loadRawData(const float * raw_data, size_t dataSize) { getIndex().loadRawData(raw_data, dataSize); } - // For milvus - virtual size_t getDimension() { return getIndex().getDimension(); } - virtual void load(const std::string & ifile, size_t dataSize) { getIndex().load(ifile, dataSize); } - virtual void append(const std::string & ifile, size_t dataSize) { getIndex().append(ifile, dataSize); } - virtual void append(const float * data, size_t dataSize) - { - redirector.begin(); - try - { - getIndex().append(data, dataSize); - } - catch (Exception & err) - { - redirector.end(); - throw err; - } - redirector.end(); - } - virtual void append(const double * data, size_t dataSize) - { - redirector.begin(); - try - { - getIndex().append(data, dataSize); - } - catch (Exception & err) - { - redirector.end(); - throw err; - } - redirector.end(); - } - virtual size_t getObjectRepositorySize() { return getIndex().getObjectRepositorySize(); } - // For milvus - virtual size_t getNumberOfVectors() { return getIndex().getObjectRepositorySize() - 1; } - virtual void createIndex(size_t threadNumber, size_t sizeOfRepository = 0) - { - redirector.begin(); - try - { - getIndex().createIndex(threadNumber, sizeOfRepository); - } - catch (Exception & err) - { - redirector.end(); - throw err; - } - redirector.end(); - } - // for milvus - virtual void saveIndex(std::stringstream & obj, std::stringstream & grp, std::stringstream & prf, std::stringstream & tre) - { - getIndex().saveIndex(obj, grp, prf, tre); - } - virtual void saveIndex(const std::string & ofile) { getIndex().saveIndex(ofile); } - virtual void loadIndex(const std::string & ofile) { getIndex().loadIndex(ofile); } - virtual void loadIndexFromStream(std::stringstream & obj, std::stringstream & grp, std::stringstream & tre) - { - getIndex().loadIndexFromStream(obj, grp, tre); - } - virtual Object * allocateObject(const std::string & textLine, const std::string & sep) - { - return getIndex().allocateObject(textLine, sep); - } - virtual Object * allocateObject(const std::vector & obj) { return getIndex().allocateObject(obj); } - virtual Object * allocateObject(const std::vector & obj) { return getIndex().allocateObject(obj); } - virtual Object * allocateObject(const std::vector & obj) { return getIndex().allocateObject(obj); } - virtual Object * allocateObject(const float * obj, size_t size) { return getIndex().allocateObject(obj, size); } - virtual size_t getSizeOfElement() { return getIndex().getSizeOfElement(); } - virtual void setProperty(NGT::Property & prop) { getIndex().setProperty(prop); } - virtual void getProperty(NGT::Property & prop) { getIndex().getProperty(prop); } - virtual void deleteObject(Object * po) { getIndex().deleteObject(po); } - virtual void linearSearch(NGT::SearchContainer & sc) { getIndex().linearSearch(sc); } - virtual void linearSearch(NGT::SearchQuery & sc) { getIndex().linearSearch(sc); } - // for milvus - virtual void search(NGT::SearchContainer & sc, const faiss::BitsetView bitset) { getIndex().search(sc, bitset); } - virtual void search(NGT::SearchContainer & sc) { getIndex().search(sc); } - virtual void search(NGT::SearchQuery & sc) { getIndex().search(sc); } - virtual void search(NGT::SearchContainer & sc, ObjectDistances & seeds) { getIndex().search(sc, seeds); } - virtual void remove(ObjectID id, bool force = false) { getIndex().remove(id, force); } - virtual void exportIndex(const std::string & file) { getIndex().exportIndex(file); } - virtual void importIndex(const std::string & file) { getIndex().importIndex(file); } - virtual bool verify(std::vector & status, bool info = false, char mode = '-') { return getIndex().verify(status, info, mode); } - virtual ObjectSpace & getObjectSpace() { return getIndex().getObjectSpace(); } - virtual size_t - getSharedMemorySize(std::ostream & os, SharedMemoryAllocator::GetMemorySizeType t = SharedMemoryAllocator::GetTotalMemorySize) - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - size_t osize = getObjectSpace().getRepository().getAllocator().getMemorySize(t); -#else - size_t osize = 0; -#endif - os << "object=" << osize << std::endl; - size_t isize = getIndex().getSharedMemorySize(os, t); - return osize + isize; - } - float getEpsilonFromExpectedAccuracy(double accuracy); - void searchUsingOnlyGraph(NGT::SearchContainer & sc) - { - sc.distanceComputationCount = 0; - sc.visitCount = 0; - ObjectDistances seeds; - getIndex().search(sc, seeds); - } - std::vector makeSparseObject(std::vector & object); - Index & getIndex() - { - if (index == 0) - { - assert(index != 0); - NGTThrowException("NGT::Index::getIndex: Index is unavailable."); - } - return *index; - } - void enableLog() { redirector.disable(); } - void disableLog() { redirector.enable(); } - - static void destroy(const std::string & path) - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - std::remove(std::string(path + "/grp").c_str()); - std::remove(std::string(path + "/grpc").c_str()); - std::remove(std::string(path + "/trei").c_str()); - std::remove(std::string(path + "/treic").c_str()); - std::remove(std::string(path + "/trel").c_str()); - std::remove(std::string(path + "/trelc").c_str()); - std::remove(std::string(path + "/objpo").c_str()); - std::remove(std::string(path + "/objpoc").c_str()); -#else - std::remove(std::string(path + "/grp").c_str()); - std::remove(std::string(path + "/tre").c_str()); - std::remove(std::string(path + "/obj").c_str()); -#endif - std::remove(std::string(path + "/prf").c_str()); - std::remove(path.c_str()); - } - - static void version(std::ostream & os); - static std::string getVersion(); - std::string getPath() { return path; } - - virtual int64_t memSize() { return redirector.memSize() + sizeof(path) + (index ? getIndex().memSize() : 0); } - -protected: - Object * allocateObject(void * vec, const std::type_info & objectType) - { - if (vec == 0) - { - std::stringstream msg; - msg << "NGT::Index::allocateObject: Object is not set. "; - NGTThrowException(msg); - } - Object * object = 0; - if (objectType == typeid(float)) - { - object = allocateObject(*static_cast *>(vec)); - } - else if (objectType == typeid(double)) - { - object = allocateObject(*static_cast *>(vec)); - } - else if (objectType == typeid(uint8_t)) - { - object = allocateObject(*static_cast *>(vec)); - } - else - { - std::stringstream msg; - msg << "NGT::Index::allocateObject: Unavailable object type."; - NGTThrowException(msg); - } - return object; - } - - // For milvus - static void loadRawDataAndCreateIndex(Index * index_, const float * raw_data, size_t threadSize, size_t dataSize); - static void - loadAndCreateIndex(Index & index, const std::string & database, const std::string & dataFile, size_t threadSize, size_t dataSize); - - Index * index; - std::string path; - StdOstreamRedirector redirector; -}; - -class GraphIndex : public Index, public NeighborhoodGraph -{ -public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex(const std::string & allocator, bool rdOnly = false); - GraphIndex(const std::string & allocator, NGT::Property & prop) : readOnly(false) { initialize(allocator, prop); } - void initialize(const std::string & allocator, NGT::Property & prop); -#else // NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex(const std::string & database, bool rdOnly = false); - GraphIndex(NGT::Property & prop) : readOnly(false) { initialize(prop); } - - void initialize(NGT::Property & prop) - { - constructObjectSpace(prop); - setProperty(prop); - } - -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - virtual ~GraphIndex() { destructObjectSpace(); } - void constructObjectSpace(NGT::Property & prop); - - void destructObjectSpace() - { - if (objectSpace == 0) - { - return; - } - if (property.objectType == NGT::ObjectSpace::ObjectType::Float) - { - ObjectSpaceRepository * os = (ObjectSpaceRepository *)objectSpace; -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - os->deleteAll(); -#endif - delete os; - } - else if (property.objectType == NGT::ObjectSpace::ObjectType::Uint8) - { - ObjectSpaceRepository * os = (ObjectSpaceRepository *)objectSpace; -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - os->deleteAll(); -#endif - delete os; - } - else - { - std::cerr << "Cannot find Object Type in the property. " << property.objectType << std::endl; - return; - } - objectSpace = 0; - } - - // For milvus - virtual void loadRawData(const float * row_data, size_t dataSize) - { - if (!dataSize) - return; - try - { - objectSpace->readRawData(row_data, dataSize); - } - catch (Exception & err) - { - throw(err); - } - } - - virtual void load(const std::string & ifile, size_t dataSize = 0) - { - if (ifile.empty()) - { - return; - } - std::istream * is; - std::ifstream * ifs = 0; - if (ifile == "-") - { - is = &std::cin; - } - else - { - ifs = new std::ifstream; - ifs->std::ifstream::open(ifile); - if (!(*ifs)) - { - std::stringstream msg; - msg << "Index::load: Cannot open the specified file. " << ifile; - NGTThrowException(msg); - } - is = ifs; - } - try - { - objectSpace->readText(*is, dataSize); - } - catch (Exception & err) - { - if (ifile != "-") - { - delete ifs; - } - throw(err); - } - if (ifile != "-") - { - delete ifs; - } - } - - virtual void append(const std::string & ifile, size_t dataSize = 0) - { - if (ifile.empty()) - { - return; - } - std::istream * is; - std::ifstream * ifs = 0; - if (ifile == "-") - { - is = &std::cin; - } - else - { - ifs = new std::ifstream; - ifs->std::ifstream::open(ifile); - if (!(*ifs)) - { - std::stringstream msg; - msg << "Index::load: Cannot open the specified file. " << ifile; - NGTThrowException(msg); - } - is = ifs; - } - try - { - objectSpace->appendText(*is, dataSize); - } - catch (Exception & err) - { - if (ifile != "-") - { - delete ifs; - } - throw(err); - } - if (ifile != "-") - { - delete ifs; - } - } - - virtual void append(const float * data, size_t dataSize) { objectSpace->append(data, dataSize); } - virtual void append(const double * data, size_t dataSize) { objectSpace->append(data, dataSize); } - - void saveObjectRepository(const std::string & ofile) - { -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - try - { - mkdir(ofile); - } - catch (...) - { - } - if (objectSpace != 0) - { - objectSpace->serialize(ofile + "/obj"); - } - else - { - std::cerr << "saveIndex::Warning! ObjectSpace is null. continue saving..." << std::endl; - } -#endif - } - - // for milvus - void saveObjectRepository(std::stringstream & obj) { objectSpace->serialize(obj); } - - void saveGraph(const std::string & ofile) - { -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - std::string fname = ofile + "/grp"; - std::ofstream osg(fname); - if (!osg.is_open()) - { - std::stringstream msg; - msg << "saveIndex:: Cannot open. " << fname; - NGTThrowException(msg); - } - repository.serialize(osg); -#endif - } - - // for milvus - void saveGraph(std::stringstream & grp) { repository.serialize(grp); } - - //for milvus - virtual void - saveIndex(std::stringstream & obj, std::stringstream & grp, std::stringstream & prf, [[maybe_unused]] std::stringstream & tre) - { - saveObjectRepository(obj); - saveGraph(grp); - saveProperty(prf); - } - virtual void saveIndex(const std::string & ofile) - { - saveObjectRepository(ofile); - saveGraph(ofile); - saveProperty(ofile); - } - - void saveProperty(std::stringstream & prf); - void saveProperty(const std::string & file); - - void exportProperty(const std::string & file); - - virtual void loadIndex(const std::string & ifile, bool readOnly); - - // for milvus - virtual void loadIndexFromStream(std::stringstream & obj, std::stringstream & grp, [[maybe_unused]] std::stringstream & tre) - { - objectSpace->deserialize(obj); - repository.deserialize(grp); - } - - virtual void exportIndex(const std::string & ofile) - { - try - { - mkdir(ofile); - } - catch (...) - { - std::stringstream msg; - msg << "exportIndex:: Cannot make the directory. " << ofile; - NGTThrowException(msg); - } - objectSpace->serializeAsText(ofile + "/obj"); - std::ofstream osg(ofile + "/grp"); - repository.serializeAsText(osg); - exportProperty(ofile); - } - - virtual void importIndex(const std::string & ifile) - { - objectSpace->deserializeAsText(ifile + "/obj"); - std::string fname = ifile + "/grp"; - std::ifstream isg(fname); - if (!isg.is_open()) - { - std::stringstream msg; - msg << "importIndex:: Cannot open. " << fname; - NGTThrowException(msg); - } - repository.deserializeAsText(isg); - } - - void linearSearch(NGT::SearchContainer & sc) - { - ObjectSpace::ResultSet results; - objectSpace->linearSearch(sc.object, sc.radius, sc.size, results); - ObjectDistances & qresults = sc.getResult(); - qresults.moveFrom(results); - } - - void linearSearch(NGT::SearchQuery & searchQuery) - { - Object * query = Index::allocateObject(searchQuery.getQuery(), searchQuery.getQueryType()); - try - { - NGT::SearchContainer sc(searchQuery, *query); - ObjectSpace::ResultSet results; - objectSpace->linearSearch(sc.object, sc.radius, sc.size, results); - ObjectDistances & qresults = sc.getResult(); - qresults.moveFrom(results); - } - catch (Exception & err) - { - deleteObject(query); - throw err; - } - deleteObject(query); - } - - // GraphIndex - virtual void search(NGT::SearchContainer & sc) - { - sc.distanceComputationCount = 0; - sc.visitCount = 0; - ObjectDistances seeds; - search(sc, seeds); - } - - // for milvus - virtual void search(NGT::SearchContainer & sc, const faiss::BitsetView bitset) - { - sc.distanceComputationCount = 0; - sc.visitCount = 0; - ObjectDistances seeds; - search(sc, seeds, bitset); - } - - void search(NGT::SearchQuery & searchQuery) - { - Object * query = Index::allocateObject(searchQuery.getQuery(), searchQuery.getQueryType()); - try - { - NGT::SearchContainer sc(searchQuery, *query); - sc.distanceComputationCount = 0; - sc.visitCount = 0; - ObjectDistances seeds; - search(sc, seeds); - } - catch (Exception & err) - { - deleteObject(query); - throw err; - } - deleteObject(query); - } - - // get randomly nodes as seeds. - template - void getRandomSeeds(REPOSITORY & repo, ObjectDistances & seeds, size_t seedSize) - { - // clear all distances to find the same object as a randomized object. - for (ObjectDistances::iterator i = seeds.begin(); i != seeds.end(); i++) - { - (*i).distance = 0.0; - } - size_t repositorySize = repo.size(); - repositorySize = repositorySize == 0 ? 0 : repositorySize - 1; // Because the head of repository is a dummy. - seedSize = seedSize > repositorySize ? repositorySize : seedSize; - std::vector deteted; - size_t emptyCount = 0; - while (seedSize > seeds.size()) - { - double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0); - size_t idx = floor(repositorySize * random) + 1; - if (repo.isEmpty(idx)) - { - emptyCount++; - if (emptyCount > repositorySize) - { - break; - } - continue; - } - ObjectDistance obj(idx, 0.0); - if (find(seeds.begin(), seeds.end(), obj) != seeds.end()) - { - continue; - } - seeds.push_back(obj); - } - } - - void remove(const ObjectID id, bool force) - { - if (!NeighborhoodGraph::repository.isEmpty(id)) - { - removeEdgesReliably(id); - } - try - { - getObjectRepository().remove(id); - } - catch (Exception & err) - { - std::cerr << "NGT::GraphIndex::remove:: cannot remove from feature. id=" << id << " " << err.what() << std::endl; - throw err; - } - } - - virtual void searchForNNGInsertion(Object & po, ObjectDistances & result) - { - NGT::SearchContainer sc(po); - sc.setResults(&result); - sc.size = NeighborhoodGraph::property.edgeSizeForCreation; - sc.radius = FLT_MAX; - sc.explorationCoefficient = NeighborhoodGraph::property.insertionRadiusCoefficient; - try - { - GraphIndex::search(sc); - } - catch (Exception & err) - { - throw err; - } - if (static_cast(result.size()) < NeighborhoodGraph::property.edgeSizeForCreation && result.size() < repository.size()) - { - if (sc.edgeSize != 0) - { - sc.edgeSize = 0; // not prune edges. - try - { - GraphIndex::search(sc); - } - catch (Exception & err) - { - throw err; - } - } - } - } - - void searchForKNNGInsertion(Object & po, ObjectID id, ObjectDistances & result) - { - double radius = FLT_MAX; - size_t size = NeighborhoodGraph::property.edgeSizeForCreation; - if (id > 0) - { - size = NeighborhoodGraph::property.edgeSizeForCreation + 1; - } - ObjectSpace::ResultSet rs; - objectSpace->linearSearch(po, radius, size, rs); - result.moveFrom(rs, id); - if ((size_t)NeighborhoodGraph::property.edgeSizeForCreation != result.size()) - { - std::cerr << "searchForKNNGInsert::Warning! inconsistency of the sizes. ID=" << id << " " - << NeighborhoodGraph::property.edgeSizeForCreation << ":" << result.size() << std::endl; - for (size_t i = 0; i < result.size(); i++) - { - std::cerr << result[i].id << ":" << result[i].distance << " "; - } - std::cerr << std::endl; - } - } - - virtual void insert(ObjectID id) - { - ObjectRepository & fr = objectSpace->getRepository(); - if (fr[id] == 0) - { - std::cerr << "NGTIndex::insert empty " << id << std::endl; - return; - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object & po = *objectSpace->allocateObject(*fr[id]); -#else - Object & po = *fr[id]; -#endif - ObjectDistances rs; - if (NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeANNG) - { - searchForNNGInsertion(po, rs); - } - else - { - searchForKNNGInsertion(po, id, rs); - } - insertNode(id, rs); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - objectSpace->deleteObject(&po); -#endif - } - - virtual void createIndex(); - virtual void createIndex(size_t threadNumber, size_t sizeOfRepository = 0); - - void checkGraph() - { - GraphRepository & repo = repository; - ObjectRepository & fr = objectSpace->getRepository(); - for (size_t id = 0; id < fr.size(); id++) - { - if (repo[id] == 0) - { - std::cerr << id << " empty" << std::endl; - continue; - } - if ((id % 10000) == 0) - { - std::cerr << "checkGraph: Processed size=" << id << std::endl; - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object & po = *objectSpace->allocateObject(*fr[id]); -#else - Object & po = *fr[id]; -#endif - GraphNode * objects = getNode(id); - - ObjectDistances rs; - NeighborhoodGraph::property.edgeSizeForCreation = objects->size() + 1; - searchForNNGInsertion(po, rs); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - objectSpace->deleteObject(&po); -#endif - - if (rs.size() != objects->size()) - { - std::cerr << "Cannot get the specified number of the results. " << rs.size() << ":" << objects->size() << std::endl; - } - size_t count = 0; - ObjectDistances::iterator rsi = rs.begin(); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (GraphNode::iterator ri = objects->begin(repo.allocator); ri != objects->end(repo.allocator) && rsi != rs.end();) - { -#else - for (GraphNode::iterator ri = objects->begin(); ri != objects->end() && rsi != rs.end();) - { -#endif - if ((*ri).distance == (*rsi).distance && (*ri).id == (*rsi).id) - { - count++; - ri++; - rsi++; - } - else if ((*ri).distance < (*rsi).distance) - { - ri++; - } - else - { - rsi++; - } - } - if (count != objects->size()) - { - std::cerr << "id=" << id << " identities=" << count << " " << objects->size() << " " << rs.size() << std::endl; - } - } - } - - virtual bool verify(std::vector & status, bool info) - { - bool valid = true; - std::cerr << "Started verifying graph and objects" << std::endl; - GraphRepository & repo = repository; - ObjectRepository & fr = objectSpace->getRepository(); - if (repo.size() != fr.size()) - { - if (info) - { - std::cerr << "Warning! # of nodes is different from # of objects. " << repo.size() << ":" << fr.size() << std::endl; - } - } - status.clear(); - status.resize(fr.size(), 0); - for (size_t id = 1; id < fr.size(); id++) - { - status[id] |= repo[id] != 0 ? 0x02 : 0x00; - status[id] |= fr[id] != 0 ? 0x01 : 0x00; - } - for (size_t id = 1; id < fr.size(); id++) - { - if (fr[id] == 0) - { - if (id < repo.size() && repo[id] != 0) - { - std::cerr << "Error! The node exists in the graph, but the object does not exist. " << id << std::endl; - valid = false; - } - } - if (fr[id] != 0 && repo[id] == 0) - { - std::cerr << "Error. No." << id << " is not registerd in the graph." << std::endl; - valid = false; - } - if ((id % 1000000) == 0) - { - std::cerr << " verified " << id << " entries." << std::endl; - } - if (fr[id] != 0) - { - try - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object * po = objectSpace->allocateObject(*fr[id]); -#else - Object * po = fr[id]; -#endif - if (po == 0) - { - std::cerr << "Error! Cannot get the object. " << id << std::endl; - valid = false; - continue; - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - objectSpace->deleteObject(po); -#endif - } - catch (Exception & err) - { - std::cerr << "Error! Cannot get the object. " << id << " " << err.what() << std::endl; - valid = false; - continue; - } - } - if (id >= repo.size()) - { - std::cerr << "Error. No." << id << " is not registerd in the object repository. " << repo.size() << std::endl; - valid = false; - } - if (id < repo.size() && repo[id] != 0) - { - try - { - GraphNode * objects = getNode(id); - if (objects == 0) - { - std::cerr << "Error! Cannot get the node. " << id << std::endl; - valid = false; - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (GraphNode::iterator ri = objects->begin(repo.allocator); ri != objects->end(repo.allocator); ++ri) - { -#else - for (GraphNode::iterator ri = objects->begin(); ri != objects->end(); ++ri) - { -#endif -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - for (GraphNode::iterator rj = objects->begin(repo.allocator) + std::distance(objects->begin(repo.allocator), ri); - rj != objects->end(repo.allocator); - ++rj) - { - if ((*ri).id == (*rj).id - && std::distance(objects->begin(repo.allocator), ri) != std::distance(objects->begin(repo.allocator), rj)) - { - std::cerr << "Error! More than two identical objects! ID=" << (*rj).id - << " idx=" << std::distance(objects->begin(repo.allocator), ri) << ":" - << std::distance(objects->begin(repo.allocator), rj) << " disntace=" << (*ri).distance << ":" - << (*rj).distance << std::endl; -#else - for (GraphNode::iterator rj = objects->begin() + std::distance(objects->begin(), ri); rj != objects->end(); ++rj) - { - if ((*ri).id == (*rj).id && std::distance(objects->begin(), ri) != std::distance(objects->begin(), rj)) - { - std::cerr << "Error! More than two identical objects! ID=" << (*rj).id - << " idx=" << std::distance(objects->begin(), ri) << ":" << std::distance(objects->begin(), rj) - << " disntace=" << (*ri).distance << ":" << (*rj).distance << std::endl; -#endif - valid = false; - } - } - - if ((*ri).id == 0 || (*ri).id >= repo.size()) - { - std::cerr << "Error! Neighbor's ID of the node is out of range. ID=" << id << std::endl; - valid = false; - } - else if (repo[(*ri).id] == 0) - { - std::cerr << "Error! The neighbor ID of the node is invalid. ID=" << id << " Invalid ID=" << (*ri).id - << std::endl; - if (fr[(*ri).id] == 0) - { - std::cerr << "The neighbor doesn't exist in the object repository as well. ID=" << (*ri).id << std::endl; - } - else - { - std::cerr << "The neighbor exists in the object repository. ID=" << (*ri).id << std::endl; - } - valid = false; - } - if ((*ri).distance < 0.0) - { - std::cerr << "Error! Neighbor's distance is munus. ID=" << id << std::endl; - valid = false; - } - } - } - catch (Exception & err) - { - std::cerr << "Error! Cannot get the node. " << id << " " << err.what() << std::endl; - valid = false; - } - } - } - return valid; - } - - static bool showStatisticsOfGraph(NGT::GraphIndex & outGraph, char mode = '-', size_t edgeSize = UINT_MAX); - - size_t getObjectRepositorySize() { return objectSpace->getRepository().size(); } - // For milvus - virtual size_t getNumberOfVector() { return getObjectRepositorySize() - 1; } - - size_t getSizeOfElement() { return objectSpace->getSizeOfElement(); } - - // For milvus - virtual size_t getDimension() { return objectSpace->getDimension(); } - - Object * allocateObject(const std::string & textLine, const std::string & sep) - { - return objectSpace->allocateNormalizedObject(textLine, sep); - } - Object * allocateObject(const std::vector & obj) { return objectSpace->allocateNormalizedObject(obj); } - Object * allocateObject(const std::vector & obj) { return objectSpace->allocateNormalizedObject(obj); } - Object * allocateObject(const std::vector & obj) { return objectSpace->allocateNormalizedObject(obj); } - Object * allocateObject(const float * obj, size_t size) { return objectSpace->allocateNormalizedObject(obj, size); } - - void deleteObject(Object * po) { return objectSpace->deleteObject(po); } - - ObjectSpace & getObjectSpace() { return *objectSpace; } - - void setupPrefetch(NGT::Property & prop); - - void setProperty(NGT::Property & prop) - { - setupPrefetch(prop); - GraphIndex::property.set(prop); - NeighborhoodGraph::property.set(prop); - assert(property.dimension != 0); - accuracyTable.set(property.accuracyTable); - } - - void getProperty(NGT::Property & prop) - { - GraphIndex::property.get(prop); - NeighborhoodGraph::property.get(prop); - } - - NeighborhoodGraph::Property & getGraphProperty() { return NeighborhoodGraph::property; } - Index::Property & getGraphIndexProperty() { return GraphIndex::property; } - - virtual size_t getSharedMemorySize(std::ostream & os, SharedMemoryAllocator::GetMemorySizeType t) - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - size_t size = repository.getAllocator().getMemorySize(t); -#else - size_t size = 0; -#endif - os << "graph=" << size << std::endl; - return size; - } - - float getEpsilonFromExpectedAccuracy(double accuracy) { return accuracyTable.getEpsilon(accuracy); } - - Index::Property & getProperty() { return property; } - -protected: - template - void getSeedsFromGraph(REPOSITORY & repo, ObjectDistances & seeds) - { - if (repo.size() != 0) - { - size_t seedSize = repo.size() - 1 < (size_t)NeighborhoodGraph::property.seedSize ? repo.size() - 1 - : (size_t)NeighborhoodGraph::property.seedSize; - if (NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeRandomNodes - || NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeNone) - { - getRandomSeeds(repo, seeds, seedSize); - } - else if (NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeFixedNodes) - { - // To check speed using fixed seeds. - for (size_t i = 1; i <= seedSize; i++) - { - ObjectDistance obj(i, 0.0); - seeds.push_back(obj); - } - } - else if (NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeFirstNode) - { - ObjectDistance obj(1, 0.0); - seeds.push_back(obj); - } - else - { - getRandomSeeds(repo, seeds, seedSize); - } - } - } - - // GraphIndex - virtual void search(NGT::SearchContainer & sc, ObjectDistances & seeds) - { - if (sc.size == 0) - { - while (!sc.workingResult.empty()) - sc.workingResult.pop(); - return; - } - if (seeds.size() == 0) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) || !defined(NGT_GRAPH_READ_ONLY_GRAPH) - getSeedsFromGraph(repository, seeds); -#else - if (readOnly) - { - getSeedsFromGraph(searchRepository, seeds); - } - else - { - getSeedsFromGraph(repository, seeds); - } -#endif - } - if (sc.expectedAccuracy > 0.0) - { - sc.setEpsilon(getEpsilonFromExpectedAccuracy(sc.expectedAccuracy)); - } - - NGT::SearchContainer so(sc); - try - { - if (readOnly) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) || !defined(NGT_GRAPH_READ_ONLY_GRAPH) - NeighborhoodGraph::search(so, seeds); -#else - (*searchUnupdatableGraph)(*this, so, seeds); -#endif - } - else - { - NeighborhoodGraph::search(so, seeds); - } - sc.workingResult = std::move(so.workingResult); - sc.distanceComputationCount = so.distanceComputationCount; - sc.visitCount = so.visitCount; - } - catch (Exception & err) - { - std::cerr << err.what() << std::endl; - Exception e(err); - throw e; - } - } - - // for milvus - virtual void search(NGT::SearchContainer & sc, ObjectDistances & seeds, const faiss::BitsetView bitset) - { - if (sc.size == 0) - { - while (!sc.workingResult.empty()) - sc.workingResult.pop(); - return; - } - if (seeds.size() == 0) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) || !defined(NGT_GRAPH_READ_ONLY_GRAPH) - getSeedsFromGraph(repository, seeds); -#else - if (readOnly) - { - getSeedsFromGraph(searchRepository, seeds); - } - else - { - getSeedsFromGraph(repository, seeds); - } -#endif - } - if (sc.expectedAccuracy > 0.0) - { - sc.setEpsilon(getEpsilonFromExpectedAccuracy(sc.expectedAccuracy)); - } - - NGT::SearchContainer so(sc); - try - { - if (readOnly) - { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) || !defined(NGT_GRAPH_READ_ONLY_GRAPH) - NeighborhoodGraph::search(so, seeds); -#else - (*searchUnupdatableGraph)(*this, so, seeds); -#endif - } - else - { - NeighborhoodGraph::search(so, seeds, bitset); - } - sc.workingResult = std::move(so.workingResult); - sc.distanceComputationCount = so.distanceComputationCount; - sc.visitCount = so.visitCount; - } - catch (Exception & err) - { - std::cerr << err.what() << std::endl; - Exception e(err); - throw e; - } - } - - virtual int64_t memSize() { return property.memSize() + sizeof(readOnly) + accuracyTable.memSize() + Index::memSize() + NeighborhoodGraph::memSize(); } - Index::Property property; - - bool readOnly; -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - void (*searchUnupdatableGraph)(NGT::NeighborhoodGraph &, NGT::SearchContainer &, NGT::ObjectDistances &); -#endif - - Index::AccuracyTable accuracyTable; -}; - -class GraphAndTreeIndex : public GraphIndex, public DVPTree -{ -public: - - virtual int64_t memSize() { return GraphIndex::memSize() + DVPTree::memSize(); } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphAndTreeIndex(const std::string & allocator, bool rdOnly = false) : GraphIndex(allocator, false) { initialize(allocator, 0); } - GraphAndTreeIndex(const std::string & allocator, NGT::Property & prop); - void initialize(const std::string & allocator, size_t sharedMemorySize) - { - DVPTree::objectSpace = GraphIndex::objectSpace; - DVPTree::open(allocator + "/tre", sharedMemorySize); - } -#else - GraphAndTreeIndex(const std::string & database, bool rdOnly = false) : GraphIndex(database, rdOnly) - { - GraphAndTreeIndex::loadIndex(database, rdOnly); - } - - GraphAndTreeIndex(NGT::Property & prop) : GraphIndex(prop) { DVPTree::objectSpace = GraphIndex::objectSpace; } -#endif - virtual ~GraphAndTreeIndex() {} - - void create() {} - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - void alignObjects() {} -#else - void alignObjects() - { - NGT::ObjectSpace & space = getObjectSpace(); - NGT::ObjectRepository & repo = space.getRepository(); - Object ** object = repo.getPtr(); - std::vector exist(repo.size(), false); - std::vector leafNodeIDs; - DVPTree::getAllLeafNodeIDs(leafNodeIDs); - size_t objectCount = 0; - for (size_t i = 0; i < leafNodeIDs.size(); i++) - { - ObjectDistances objects; - DVPTree::getObjectIDsFromLeaf(leafNodeIDs[i], objects); - for (size_t j = 0; j < objects.size(); j++) - { - exist[objects[j].id] = true; - objectCount++; - } - } - std::multimap notexist; - if (objectCount != repo.size()) - { - for (size_t id = 1; id < exist.size(); id++) - { - if (!exist[id]) - { - DVPTree::SearchContainer tso(*object[id]); - tso.mode = DVPTree::SearchContainer::SearchLeaf; - tso.radius = 0.0; - tso.size = 1; - try - { - DVPTree::search(tso); - } - catch (Exception & err) - { - std::stringstream msg; - msg << "GraphAndTreeIndex::getSeeds: Cannot search for tree.:" << err.what(); - NGTThrowException(msg); - } - notexist.insert(std::pair(tso.nodeID.getID(), id)); - objectCount++; - } - } - } - assert(objectCount == repo.size() - 1); - - objectCount = 1; - std::vector> order; - for (size_t i = 0; i < leafNodeIDs.size(); i++) - { - ObjectDistances objects; - DVPTree::getObjectIDsFromLeaf(leafNodeIDs[i], objects); - for (size_t j = 0; j < objects.size(); j++) - { - order.push_back(std::pair(objects[j].id, objectCount)); - objectCount++; - } - auto nei = notexist.equal_range(leafNodeIDs[i].getID()); - for (auto ii = nei.first; ii != nei.second; ++ii) - { - order.push_back(std::pair((*ii).second, objectCount)); - objectCount++; - } - } - assert(objectCount == repo.size()); - Object * tmp = space.allocateObject(); - std::unordered_set uncopiedObjects; - for (size_t i = 1; i < repo.size(); i++) - { - uncopiedObjects.insert(i); - } - size_t copycount = 0; - while (!uncopiedObjects.empty()) - { - size_t startID = *uncopiedObjects.begin(); - if (startID == order[startID - 1].first) - { - uncopiedObjects.erase(startID); - copycount++; - continue; - } - size_t id = startID; - space.copy(*tmp, *object[id]); - uncopiedObjects.erase(id); - do - { - space.copy(*object[id], *object[order[id - 1].first]); - copycount++; - id = order[id - 1].first; - uncopiedObjects.erase(id); - } while (order[id - 1].first != startID); - space.copy(*object[id], *tmp); - copycount++; - } - space.deleteObject(tmp); - - assert(copycount == repo.size() - 1); - - sort(order.begin(), order.end()); - uncopiedObjects.clear(); - for (size_t i = 1; i < repo.size(); i++) - { - uncopiedObjects.insert(i); - } - copycount = 0; - Object * tmpPtr; - while (!uncopiedObjects.empty()) - { - size_t startID = *uncopiedObjects.begin(); - if (startID == order[startID - 1].second) - { - uncopiedObjects.erase(startID); - copycount++; - continue; - } - size_t id = startID; - tmpPtr = object[id]; - uncopiedObjects.erase(id); - do - { - object[id] = object[order[id - 1].second]; - copycount++; - id = order[id - 1].second; - uncopiedObjects.erase(id); - } while (order[id - 1].second != startID); - object[id] = tmpPtr; - copycount++; - } - assert(copycount == repo.size() - 1); - } -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - void load(const std::string & ifile) - { - GraphIndex::load(ifile); - DVPTree::objectSpace = GraphIndex::objectSpace; - } - - // for milvus - void saveIndex(std::stringstream & obj, std::stringstream & grp, std::stringstream & prf, [[maybe_unused]] std::stringstream & tre) - { - GraphIndex::saveIndex(obj, grp, prf, tre); - DVPTree::serialize(tre); - } - - void saveIndex(const std::string & ofile) - { - GraphIndex::saveIndex(ofile); -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - std::string fname = ofile + "/tre"; - std::ofstream ost(fname); - if (!ost.is_open()) - { - std::stringstream msg; - msg << "saveIndex:: Cannot open. " << fname; - NGTThrowException(msg); - } - DVPTree::serialize(ost); -#endif - } - // for milvus - void loadIndexFromStream(std::stringstream & obj, std::stringstream & grp, [[maybe_unused]] std::stringstream & tre) - { - GraphIndex::objectSpace->deserialize(obj); - repository.deserialize(grp); - DVPTree::objectSpace = GraphIndex::objectSpace; - DVPTree::deserialize(tre); - } - - void loadIndex(const std::string & ifile, bool readOnly) - { - DVPTree::objectSpace = GraphIndex::objectSpace; - std::ifstream ist(ifile + "/tre"); - DVPTree::deserialize(ist); -#ifdef NGT_GRAPH_READ_ONLY_GRAPH - if (readOnly) - { - if (property.objectAlignment == NGT::Index::Property::ObjectAlignmentTrue) - { - alignObjects(); - } - GraphIndex::NeighborhoodGraph::loadSearchGraph(ifile); - } -#endif - } - - void exportIndex(const std::string & ofile) - { - GraphIndex::exportIndex(ofile); - std::ofstream ost(ofile + "/tre"); - DVPTree::serializeAsText(ost); - } - - void importIndex(const std::string & ifile) - { - std::string fname = ifile + "/tre"; - std::ifstream ist(fname); - if (!ist.is_open()) - { - std::stringstream msg; - msg << "importIndex:: Cannot open. " << fname; - NGTThrowException(msg); - } - DVPTree::deserializeAsText(ist); - GraphIndex::importIndex(ifile); - } - - void remove(const ObjectID id, bool force = false) - { - Object * obj = 0; - try - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - obj = GraphIndex::objectSpace->allocateObject(*GraphIndex::objectSpace->getRepository().get(id)); -#else - obj = GraphIndex::objectSpace->getRepository().get(id); -#endif - } - catch (Exception & err) - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(obj); -#endif - if (force) - { - try - { - DVPTree::removeNaively(id); - } - catch (...) - { - } - try - { - GraphIndex::remove(id, force); - } - catch (...) - { - } - std::stringstream msg; - msg << err.what() - << " Even though the object could not be found, the object could be removed from the tree and graph if it existed in " - "them."; - NGTThrowException(msg); - } - throw err; - } - if (NeighborhoodGraph::repository.isEmpty(id)) - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(obj); -#endif - if (force) - { - try - { - DVPTree::removeNaively(id); - } - catch (...) - { - } - } - GraphIndex::remove(id, force); - return; - } - NGT::SearchContainer so(*obj); - ObjectDistances results; - so.setResults(&results); - so.id = 0; - so.size = 2; - so.radius = 0.0; - so.explorationCoefficient = 1.1; - ObjectDistances seeds; - seeds.push_back(ObjectDistance(id, 0.0)); - GraphIndex::search(so, seeds); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(obj); -#endif - if (results.size() == 0) - { - NGTThrowException("Not found the specified id"); - } - if (results.size() == 1) - { - try - { - DVPTree::remove(id); - } - catch (Exception & err) - { - std::stringstream msg; - msg << "remove:: cannot remove from tree. id=" << id << " " << err.what(); - NGTThrowException(msg); - } - } - else - { - ObjectID replaceID = id == results[0].id ? results[1].id : results[0].id; - try - { - DVPTree::replace(id, replaceID); - } - catch (Exception & err) - { - } - } - GraphIndex::remove(id, force); - } - - void searchForNNGInsertion(Object & po, ObjectDistances & result) - { - NGT::SearchContainer sc(po); - sc.setResults(&result); - sc.size = NeighborhoodGraph::property.edgeSizeForCreation; - sc.radius = FLT_MAX; - sc.explorationCoefficient = NeighborhoodGraph::property.insertionRadiusCoefficient; - sc.useAllNodesInLeaf = true; - try - { - GraphAndTreeIndex::search(sc); - } - catch (Exception & err) - { - throw err; - } - if (static_cast(result.size()) < NeighborhoodGraph::property.edgeSizeForCreation && result.size() < repository.size()) - { - if (sc.edgeSize != 0) - { - try - { - GraphAndTreeIndex::search(sc); - } - catch (Exception & err) - { - throw err; - } - } - } - } - - void insert(ObjectID id) - { - ObjectRepository & fr = GraphIndex::objectSpace->getRepository(); - if (fr[id] == 0) - { - std::cerr << "GraphAndTreeIndex::insert empty " << id << std::endl; - return; - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object & po = *GraphIndex::objectSpace->allocateObject(*fr[id]); -#else - Object & po = *fr[id]; -#endif - ObjectDistances rs; - if (NeighborhoodGraph::property.graphType == NeighborhoodGraph::GraphTypeANNG) - { - searchForNNGInsertion(po, rs); - } - else - { - searchForKNNGInsertion(po, id, rs); - } - - GraphIndex::insertNode(id, rs); - - if (((rs.size() > 0) && (rs[0].distance != 0.0)) || rs.size() == 0) - { - DVPTree::InsertContainer tiobj(po, id); - try - { - DVPTree::insert(tiobj); - } - catch (Exception & err) - { - std::cerr << "GraphAndTreeIndex::insert: Fatal error" << std::endl; - std::cerr << err.what() << std::endl; - return; - } - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - GraphIndex::objectSpace->deleteObject(&po); -#endif - } - - void createIndex(size_t threadNumber, size_t sizeOfRepository = 0); - - void createIndex( - const std::vector> & objects, - std::vector & ids, - double range, - size_t threadNumber); - - void createTreeIndex(); - - // GraphAndTreeIndex - void getSeedsFromTree(NGT::SearchContainer & sc, ObjectDistances & seeds) - { - DVPTree::SearchContainer tso(sc.object); - tso.mode = DVPTree::SearchContainer::SearchLeaf; - tso.radius = 0.0; - tso.size = 1; - tso.distanceComputationCount = 0; - tso.visitCount = 0; - try - { - DVPTree::search(tso); - } - catch (Exception & err) - { - std::stringstream msg; - msg << "GraphAndTreeIndex::getSeeds: Cannot search for tree.:" << err.what(); - NGTThrowException(msg); - } - - try - { - DVPTree::getObjectIDsFromLeaf(tso.nodeID, seeds); - } - catch (Exception & err) - { - std::stringstream msg; - msg << "GraphAndTreeIndex::getSeeds: Cannot get a leaf.:" << err.what(); - NGTThrowException(msg); - } - sc.distanceComputationCount += tso.distanceComputationCount; - sc.visitCount += tso.visitCount; - if (sc.useAllNodesInLeaf || NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeAllLeafNodes) - { - return; - } - // if seedSize is zero, the result size of the query is used as seedSize. - size_t seedSize = NeighborhoodGraph::property.seedSize == 0 ? sc.size : NeighborhoodGraph::property.seedSize; - seedSize = seedSize > sc.size ? sc.size : seedSize; - if (seeds.size() > seedSize) - { - srand(tso.nodeID.getID()); - // to accelerate thinning data. - for (size_t i = seeds.size(); i > seedSize; i--) - { - double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0); - size_t idx = floor(i * random); - seeds[idx] = seeds[i - 1]; - } - seeds.resize(seedSize); - } - else if (seeds.size() < seedSize) - { - // A lack of the seeds is compansated by random seeds. - //getRandomSeeds(seeds, seedSize); - } - } - - // GraphAndTreeIndex - void search(NGT::SearchContainer & sc) - { - sc.distanceComputationCount = 0; - sc.visitCount = 0; - ObjectDistances seeds; - getSeedsFromTree(sc, seeds); - GraphIndex::search(sc, seeds); - } - - // for milvus - void - getSeedsFromTree(NGT::SearchContainer& sc, ObjectDistances& seeds, const faiss::BitsetView bitset) { - DVPTree::SearchContainer tso(sc.object); - tso.mode = DVPTree::SearchContainer::SearchLeaf; - tso.radius = 0.0; - tso.size = 1; - tso.distanceComputationCount = 0; - tso.visitCount = 0; - try - { - DVPTree::search(tso); - } - catch (Exception & err) - { - std::stringstream msg; - msg << "GraphAndTreeIndex::getSeeds: Cannot search for tree.:" << err.what(); - NGTThrowException(msg); - } - - try - { - DVPTree::getObjectIDsFromLeaf(tso.nodeID, seeds, bitset); - } - catch (Exception & err) - { - std::stringstream msg; - msg << "GraphAndTreeIndex::getSeeds: Cannot get a leaf.:" << err.what(); - NGTThrowException(msg); - } - sc.distanceComputationCount += tso.distanceComputationCount; - sc.visitCount += tso.visitCount; - if (sc.useAllNodesInLeaf || NeighborhoodGraph::property.seedType == NeighborhoodGraph::SeedTypeAllLeafNodes) - { - return; - } - // if seedSize is zero, the result size of the query is used as seedSize. - size_t seedSize = NeighborhoodGraph::property.seedSize == 0 ? sc.size : NeighborhoodGraph::property.seedSize; - seedSize = seedSize > sc.size ? sc.size : seedSize; - if (seeds.size() > seedSize) - { - srand(tso.nodeID.getID()); - // to accelerate thinning data. - for (size_t i = seeds.size(); i > seedSize; i--) - { - double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0); - size_t idx = floor(i * random); - seeds[idx] = seeds[i - 1]; - } - seeds.resize(seedSize); - } - else if (seeds.size() < seedSize) - { - // A lack of the seeds is compansated by random seeds. - //getRandomSeeds(seeds, seedSize); - } - } - - // for milvus - void search(NGT::SearchContainer & sc, const faiss::BitsetView bitset) - { - sc.distanceComputationCount = 0; - sc.visitCount = 0; - ObjectDistances seeds; - getSeedsFromTree(sc, seeds, bitset); - GraphIndex::search(sc, seeds, bitset); - } - - void search(NGT::SearchQuery & searchQuery) - { - Object * query = Index::allocateObject(searchQuery.getQuery(), searchQuery.getQueryType()); - try - { - NGT::SearchContainer sc(searchQuery, *query); - sc.distanceComputationCount = 0; - sc.visitCount = 0; - ObjectDistances seeds; - getSeedsFromTree(sc, seeds); - GraphIndex::search(sc, seeds); - } - catch (Exception & err) - { - deleteObject(query); - throw err; - } - deleteObject(query); - } - - size_t getSharedMemorySize(std::ostream & os, SharedMemoryAllocator::GetMemorySizeType t) - { - return GraphIndex::getSharedMemorySize(os, t) + DVPTree::getSharedMemorySize(os, t); - } - - bool verify(std::vector & status, bool info, char mode); -}; - -class Property : public Index::Property, public NeighborhoodGraph::Property -{ -public: - void setDefault() - { - Index::Property::setDefault(); - NeighborhoodGraph::Property::setDefault(); - } - - void setDefaultForCreateIndex() - { - setDefault(); - edgeSizeForSearch = 40; - threadPoolSize = NGT::getCoreNumber(); - } - - void clear() - { - Index::Property::clear(); - NeighborhoodGraph::Property::clear(); - } - void set(NGT::Property & p) - { - Index::Property::set(p); - NeighborhoodGraph::Property::set(p); - } - - // for milvus - void load(std::stringstream & prf) - { - NGT::PropertySet prop; - prop.load(prf); - Index::Property::importProperty(prop); - NeighborhoodGraph::Property::importProperty(prop); - } - - void load(const std::string & file) - { - NGT::PropertySet prop; - prop.load(file + "/prf"); - Index::Property::importProperty(prop); - NeighborhoodGraph::Property::importProperty(prop); - } - - void save(const std::string & file) - { - NGT::PropertySet prop; - Index::Property::exportProperty(prop); - NeighborhoodGraph::Property::exportProperty(prop); - prop.save(file + "/prf"); - } - - // for milvus - static void save(GraphIndex & graphIndex, std::stringstream & prf) - { - NGT::PropertySet prop; - graphIndex.getGraphIndexProperty().exportProperty(prop); - graphIndex.getGraphProperty().exportProperty(prop); - prop.save(prf); - } - - static void save(GraphIndex & graphIndex, const std::string & file) - { - NGT::PropertySet prop; - graphIndex.getGraphIndexProperty().exportProperty(prop); - graphIndex.getGraphProperty().exportProperty(prop); - prop.save(file + "/prf"); - } - - void importProperty(const std::string & file) - { - NGT::PropertySet prop; - prop.load(file + "/prf"); - Index::Property::importProperty(prop); - NeighborhoodGraph::Property::importProperty(prop); - } - - static void exportProperty(GraphIndex & graphIndex, const std::string & file) - { - NGT::PropertySet prop; - graphIndex.getGraphIndexProperty().exportProperty(prop); - graphIndex.getGraphProperty().exportProperty(prop); - prop.save(file + "/prf"); - } -}; - -} // namespace NGT - -template -size_t NGT::Index::append(const std::vector & object) -{ - if (getObjectSpace().getRepository().size() == 0) - { - getObjectSpace().getRepository().initialize(); - } - - auto * o = getObjectSpace().getRepository().allocateNormalizedPersistentObject(object); - getObjectSpace().getRepository().push_back(dynamic_cast(o)); - size_t oid = getObjectSpace().getRepository().size() - 1; - return oid; -} - -template -size_t NGT::Index::insert(const std::vector & object) -{ - if (getObjectSpace().getRepository().size() == 0) - { - getObjectSpace().getRepository().initialize(); - } - - auto * o = getObjectSpace().getRepository().allocateNormalizedPersistentObject(object); - size_t oid = getObjectSpace().getRepository().insert(dynamic_cast(o)); - return oid; -} - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManager.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManager.cpp deleted file mode 100644 index 85d04f00f4..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManager.cpp +++ /dev/null @@ -1,457 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "MmapManagerImpl.hpp" - -namespace MemoryManager{ - // static method --- - void MmapManager::setDefaultOptionValue(init_option_st &optionst) - { - optionst.use_expand = MMAP_DEFAULT_ALLOW_EXPAND; - optionst.reuse_type = REUSE_DATA_CLASSIFY; - } - - size_t MmapManager::getAlignSize(size_t size){ - if((size % MMAP_MEMORY_ALIGN) == 0){ - return size; - }else{ - return ( (size >> MMAP_MEMORY_ALIGN_EXP ) + 1 ) * MMAP_MEMORY_ALIGN; - } - } - // static method --- - - - MmapManager::MmapManager():_impl(new MmapManager::Impl(*this)) - { - for(uint64_t i = 0; i < MMAP_MAX_UNIT_NUM; ++i){ - _impl->mmapDataAddr[i] = NULL; - } - } - - MmapManager::~MmapManager() = default; - - void MmapManager::dumpHeap() const - { - _impl->dumpHeap(); - } - - bool MmapManager::isOpen() const - { - return _impl->isOpen; - } - - void *MmapManager::getEntryHook() const { - return getAbsAddr(_impl->mmapCntlHead->entry_p); - } - - void MmapManager::setEntryHook(const void *entry_p){ - _impl->mmapCntlHead->entry_p = getRelAddr(entry_p); - } - - - bool MmapManager::init(const std::string &filePath, size_t size, const init_option_st *optionst) const - { - try{ - const std::string controlFile = filePath + MMAP_CNTL_FILE_SUFFIX; - - struct stat st; - if(stat(controlFile.c_str(), &st) == 0){ - return false; - } - if(filePath.length() > MMAP_MAX_FILE_NAME_LENGTH){ - std::cerr << "too long filepath" << std::endl; - return false; - } - if((size % sysconf(_SC_PAGESIZE) != 0) || ( size < MMAP_LOWER_SIZE )){ - std::cerr << "input size error" << std::endl; - return false; - } - - int32_t fd = _impl->formatFile(controlFile, MMAP_CNTL_FILE_SIZE); - assert(fd >= 0); - - errno = 0; - char *cntl_p = (char *)mmap(NULL, MMAP_CNTL_FILE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if(cntl_p == MAP_FAILED){ - const std::string err_str = getErrorStr(errno); - if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - throw MmapManagerException(controlFile + " " + err_str); - } - if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - - try { - fd = _impl->formatFile(filePath, size); - } catch (MmapManagerException &err) { - if(munmap(cntl_p, MMAP_CNTL_FILE_SIZE) == -1) { - throw MmapManagerException("[ERR] : munmap error : " + getErrorStr(errno) + - " : Through the exception : " + err.what()); - } - throw err; - } - if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - - boot_st bootStruct = {0}; - control_st controlStruct = {0}; - _impl->initBootStruct(bootStruct, size); - _impl->initControlStruct(controlStruct, size); - - char *cntl_head = cntl_p; - cntl_head += sizeof(boot_st); - - if(optionst != NULL){ - controlStruct.use_expand = optionst->use_expand; - controlStruct.reuse_type = optionst->reuse_type; - } - - memcpy(cntl_p, (char *)&bootStruct, sizeof(boot_st)); - memcpy(cntl_head, (char *)&controlStruct, sizeof(control_st)); - - errno = 0; - if(munmap(cntl_p, MMAP_CNTL_FILE_SIZE) == -1) throw MmapManagerException("munmap error : " + getErrorStr(errno)); - - return true; - }catch(MmapManagerException &e){ - std::cerr << "init error. " << e.what() << std::endl; - throw e; - } - } - - bool MmapManager::openMemory(const std::string &filePath) - { - try{ - if(_impl->isOpen == true){ - std::string err_str = "[ERROR] : openMemory error (double open)."; - throw MmapManagerException(err_str); - } - - const std::string controlFile = filePath + MMAP_CNTL_FILE_SUFFIX; - _impl->filePath = filePath; - - int32_t fd; - - errno = 0; - if((fd = open(controlFile.c_str(), O_RDWR, 0666)) == -1){ - const std::string err_str = getErrorStr(errno); - throw MmapManagerException("file open error" + err_str); - } - - errno = 0; - boot_st *boot_p = (boot_st*)mmap(NULL, MMAP_CNTL_FILE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if(boot_p == MAP_FAILED){ - const std::string err_str = getErrorStr(errno); - if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - throw MmapManagerException(controlFile + " " + err_str); - } - if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - - if(boot_p->version != MMAP_MANAGER_VERSION){ - std::cerr << "[WARN] : version error" << std::endl; - errno = 0; - if(munmap(boot_p, MMAP_CNTL_FILE_SIZE) == -1) throw MmapManagerException("munmap error : " + getErrorStr(errno)); - throw MmapManagerException("MemoryManager version error"); - } - - errno = 0; - if((fd = open(filePath.c_str(), O_RDWR, 0666)) == -1){ - const std::string err_str = getErrorStr(errno); - errno = 0; - if(munmap(boot_p, MMAP_CNTL_FILE_SIZE) == -1) throw MmapManagerException("munmap error : " + getErrorStr(errno)); - throw MmapManagerException("file open error = " + std::string(filePath.c_str()) + err_str); - } - - _impl->mmapCntlHead = (control_st*)( (char *)boot_p + sizeof(boot_st)); - _impl->mmapCntlAddr = (void *)boot_p; - - for(uint64_t i = 0; i < _impl->mmapCntlHead->unit_num; i++){ - off_t offset = _impl->mmapCntlHead->base_size * i; - errno = 0; - _impl->mmapDataAddr[i] = mmap(NULL, _impl->mmapCntlHead->base_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); - if(_impl->mmapDataAddr[i] == MAP_FAILED){ - if (errno == EINVAL) { - std::cerr << "MmapManager::openMemory: Fatal error. EINVAL" << std::endl - << " If you use valgrind, this error might occur when the DB is created." << std::endl - << " In the case of that, reduce bsize in SharedMemoryAllocator." << std::endl; - assert(errno != EINVAL); - } - const std::string err_str = getErrorStr(errno); - if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - closeMemory(true); - throw MmapManagerException(err_str); - } - } - if(close(fd) == -1) std::cerr << controlFile << "[WARN] : filedescript cannot close" << std::endl; - - _impl->isOpen = true; - return true; - }catch(MmapManagerException &e){ - std::cerr << "open error" << std::endl; - throw e; - } - } - - void MmapManager::closeMemory(const bool force) - { - try{ - if(force || _impl->isOpen){ - uint16_t count = 0; - void *error_ids[MMAP_MAX_UNIT_NUM] = {0}; - for(uint16_t i = 0; i < _impl->mmapCntlHead->unit_num; i++){ - if(_impl->mmapDataAddr[i] != NULL){ - if(munmap(_impl->mmapDataAddr[i], _impl->mmapCntlHead->base_size) == -1){ - error_ids[i] = _impl->mmapDataAddr[i];; - count++; - } - _impl->mmapDataAddr[i] = NULL; - } - } - - if(count > 0){ - std::string msg = ""; - - for(uint16_t i = 0; i < count; i++){ - std::stringstream ss; - ss << error_ids[i]; - msg += ss.str() + ", "; - } - throw MmapManagerException("unmap error : ids = " + msg); - } - - if(_impl->mmapCntlAddr != NULL){ - if(munmap(_impl->mmapCntlAddr, MMAP_CNTL_FILE_SIZE) == -1) throw MmapManagerException("munmap error : " + getErrorStr(errno)); - _impl->mmapCntlAddr = NULL; - } - _impl->isOpen = false; - } - }catch(MmapManagerException &e){ - std::cerr << "close error" << std::endl; - throw e; - } - } - - off_t MmapManager::alloc(const size_t size, const bool not_reuse_flag) - { - try{ - if(!_impl->isOpen){ - std::cerr << "not open this file" << std::endl; - return -1; - } - - size_t alloc_size = getAlignSize(size); - - if( (alloc_size + sizeof(chunk_head_st)) >= _impl->mmapCntlHead->base_size ){ - std::cerr << "alloc size over. size=" << size << "." << std::endl; - return -1; - } - - if(!not_reuse_flag){ - if( _impl->mmapCntlHead->reuse_type == REUSE_DATA_CLASSIFY - || _impl->mmapCntlHead->reuse_type == REUSE_DATA_QUEUE - || _impl->mmapCntlHead->reuse_type == REUSE_DATA_QUEUE_PLUS){ - off_t ret_offset; - reuse_state_t reuse_state = REUSE_STATE_OK; - ret_offset = reuse(alloc_size, reuse_state); - if(reuse_state != REUSE_STATE_ALLOC){ - return ret_offset; - } - } - } - - head_st *unit_header = &_impl->mmapCntlHead->data_headers[_impl->mmapCntlHead->active_unit]; - if((unit_header->break_p + sizeof(chunk_head_st) + alloc_size) >= _impl->mmapCntlHead->base_size){ - if(_impl->mmapCntlHead->use_expand == true){ - if(_impl->expandMemory() == false){ - std::cerr << __func__ << ": cannot expand" << std::endl; - return -1; - } - unit_header = &_impl->mmapCntlHead->data_headers[_impl->mmapCntlHead->active_unit]; - }else{ - std::cerr << __func__ << ": total size over" << std::endl; - return -1; - } - } - - const off_t file_offset = _impl->mmapCntlHead->active_unit * _impl->mmapCntlHead->base_size; - const off_t ret_p = file_offset + ( unit_header->break_p + sizeof(chunk_head_st) ); - - chunk_head_st *chunk_head = (chunk_head_st*)(unit_header->break_p + (char *)_impl->mmapDataAddr[_impl->mmapCntlHead->active_unit]); - _impl->setupChunkHead(chunk_head, false, _impl->mmapCntlHead->active_unit, -1, alloc_size); - unit_header->break_p += alloc_size + sizeof(chunk_head_st); - unit_header->chunk_num++; - - return ret_p; - }catch(MmapManagerException &e){ - std::cerr << "allocation error" << std::endl; - throw e; - } - } - - void MmapManager::free(const off_t p) - { - switch(_impl->mmapCntlHead->reuse_type){ - case REUSE_DATA_CLASSIFY: - _impl->free_data_classify(p); - break; - case REUSE_DATA_QUEUE: - _impl->free_data_queue(p); - break; - case REUSE_DATA_QUEUE_PLUS: - _impl->free_data_queue_plus(p); - break; - default: - _impl->free_data_classify(p); - break; - } - } - - off_t MmapManager::reuse(const size_t size, reuse_state_t &reuse_state) - { - off_t ret_off; - - switch(_impl->mmapCntlHead->reuse_type){ - case REUSE_DATA_CLASSIFY: - ret_off = _impl->reuse_data_classify(size, reuse_state); - break; - case REUSE_DATA_QUEUE: - ret_off = _impl->reuse_data_queue(size, reuse_state); - break; - case REUSE_DATA_QUEUE_PLUS: - ret_off = _impl->reuse_data_queue_plus(size, reuse_state); - break; - default: - ret_off = _impl->reuse_data_classify(size, reuse_state); - break; - } - - return ret_off; - } - - void *MmapManager::getAbsAddr(off_t p) const - { - if(p < 0){ - return NULL; - } - const uint16_t unit_id = p / _impl->mmapCntlHead->base_size; - const off_t file_offset = unit_id * _impl->mmapCntlHead->base_size; - const off_t ret_p = p - file_offset; - - return ABS_ADDR(ret_p, _impl->mmapDataAddr[unit_id]); - } - - off_t MmapManager::getRelAddr(const void *p) const - { - const chunk_head_st *chunk_head = (chunk_head_st *)((char *)p - sizeof(chunk_head_st)); - const uint16_t unit_id = chunk_head->unit_id; - - const off_t file_offset = unit_id * _impl->mmapCntlHead->base_size; - off_t ret_p = (off_t)((char *)p - (char *)_impl->mmapDataAddr[unit_id]); - ret_p += file_offset; - - return ret_p; - } - - std::string getErrorStr(int32_t err_num){ - char err_msg[256]; -#ifdef _GNU_SOURCE - char *msg = strerror_r(err_num, err_msg, 256); - return std::string(msg); -#else - strerror_r(err_num, err_msg, 256); - return std::string(err_msg); -#endif - } - - size_t MmapManager::getTotalSize() const - { - const uint16_t active_unit = _impl->mmapCntlHead->active_unit; - const size_t ret_size = ((_impl->mmapCntlHead->unit_num - 1) * _impl->mmapCntlHead->base_size) + _impl->mmapCntlHead->data_headers[active_unit].break_p; - - return ret_size; - } - - size_t MmapManager::getUseSize() const - { - size_t total_size = 0; - void *ref_addr = (void *)&total_size; - _impl->scanAllData(ref_addr, CHECK_STATS_USE_SIZE); - - return total_size; - } - - uint64_t MmapManager::getUseNum() const - { - uint64_t total_chunk_num = 0; - void *ref_addr = (void *)&total_chunk_num; - _impl->scanAllData(ref_addr, CHECK_STATS_USE_NUM); - - return total_chunk_num; - } - - size_t MmapManager::getFreeSize() const - { - size_t total_size = 0; - void *ref_addr = (void *)&total_size; - _impl->scanAllData(ref_addr, CHECK_STATS_FREE_SIZE); - - return total_size; - } - - uint64_t MmapManager::getFreeNum() const - { - uint64_t total_chunk_num = 0; - void *ref_addr = (void *)&total_chunk_num; - _impl->scanAllData(ref_addr, CHECK_STATS_FREE_NUM); - - return total_chunk_num; - } - - uint16_t MmapManager::getUnitNum() const - { - return _impl->mmapCntlHead->unit_num; - } - - size_t MmapManager::getQueueCapacity() const - { - free_queue_st *free_queue = &_impl->mmapCntlHead->free_queue; - return free_queue->capacity; - } - - uint64_t MmapManager::getQueueNum() const - { - free_queue_st *free_queue = &_impl->mmapCntlHead->free_queue; - return free_queue->tail; - } - - uint64_t MmapManager::getLargeListNum() const - { - uint64_t count = 0; - free_list_st *free_list = &_impl->mmapCntlHead->free_data.large_list; - - if(free_list->free_p == -1){ - return count; - } - - off_t current_off = free_list->free_p; - chunk_head_st *current_chunk_head = (chunk_head_st *)getAbsAddr(current_off); - - while(current_chunk_head != NULL){ - count++; - current_off = current_chunk_head->free_next; - current_chunk_head = (chunk_head_st *)getAbsAddr(current_off); - } - - return count; - } -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManager.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManager.h deleted file mode 100644 index 971666596c..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManager.h +++ /dev/null @@ -1,95 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include -#include -#include - -#define ABS_ADDR(x, y) (void *)(x + (char *)y); - -#define USE_MMAP_MANAGER - -namespace MemoryManager{ - - typedef enum _option_reuse_t{ - REUSE_DATA_CLASSIFY, - REUSE_DATA_QUEUE, - REUSE_DATA_QUEUE_PLUS, - }option_reuse_t; - - typedef enum _reuse_state_t{ - REUSE_STATE_OK, - REUSE_STATE_FALSE, - REUSE_STATE_ALLOC, - }reuse_state_t; - - typedef enum _check_statistics_t{ - CHECK_STATS_USE_SIZE, - CHECK_STATS_USE_NUM, - CHECK_STATS_FREE_SIZE, - CHECK_STATS_FREE_NUM, - }check_statistics_t; - - typedef struct _init_option_st{ - bool use_expand; - option_reuse_t reuse_type; - }init_option_st; - - - class MmapManager{ - public: - MmapManager(); - ~MmapManager(); - - bool init(const std::string &filePath, size_t size, const init_option_st *optionst = NULL) const; - bool openMemory(const std::string &filePath); - void closeMemory(const bool force = false); - off_t alloc(const size_t size, const bool not_reuse_flag = false); - void free(const off_t p); - off_t reuse(const size_t size, reuse_state_t &reuse_state); - void *getAbsAddr(off_t p) const; - off_t getRelAddr(const void *p) const; - - size_t getTotalSize() const; - size_t getUseSize() const; - uint64_t getUseNum() const; - size_t getFreeSize() const; - uint64_t getFreeNum() const; - uint16_t getUnitNum() const; - size_t getQueueCapacity() const; - uint64_t getQueueNum() const; - uint64_t getLargeListNum() const; - - void dumpHeap() const; - - bool isOpen() const; - void *getEntryHook() const; - void setEntryHook(const void *entry_p); - - // static method --- - static void setDefaultOptionValue(init_option_st &optionst); - static size_t getAlignSize(size_t size); - - private: - class Impl; - std::unique_ptr _impl; - }; - - std::string getErrorStr(int32_t err_num); -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerDefs.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerDefs.h deleted file mode 100644 index 0a708dff75..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerDefs.h +++ /dev/null @@ -1,98 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "MmapManager.h" - -#include - -namespace MemoryManager{ - const uint64_t MMAP_MANAGER_VERSION = 5; - - const bool MMAP_DEFAULT_ALLOW_EXPAND = false; - const uint64_t MMAP_CNTL_FILE_RANGE = 16; - const size_t MMAP_CNTL_FILE_SIZE = MMAP_CNTL_FILE_RANGE * sysconf(_SC_PAGESIZE); - const uint64_t MMAP_MAX_FILE_NAME_LENGTH = 1024; - const std::string MMAP_CNTL_FILE_SUFFIX = "c"; - - const size_t MMAP_LOWER_SIZE = 1; - const size_t MMAP_MEMORY_ALIGN = 8; - const size_t MMAP_MEMORY_ALIGN_EXP = 3; - -#ifndef MMANAGER_TEST_MODE - const uint64_t MMAP_MAX_UNIT_NUM = 1024; -#else - const uint64_t MMAP_MAX_UNIT_NUM = 8; -#endif - - const uint64_t MMAP_FREE_QUEUE_SIZE = 1024; - - const uint64_t MMAP_FREE_LIST_NUM = 64; - - typedef struct _boot_st{ - uint32_t version; - uint64_t reserve; - size_t size; - }boot_st; - - typedef struct _head_st{ - off_t break_p; - uint64_t chunk_num; - uint64_t reserve; - }head_st; - - - typedef struct _free_list_st{ - off_t free_p; - off_t free_last_p; - }free_list_st; - - - typedef struct _free_st{ - free_list_st large_list; - free_list_st free_lists[MMAP_FREE_LIST_NUM]; - }free_st; - - - typedef struct _free_queue_st{ - off_t data; - size_t capacity; - uint64_t tail; - }free_queue_st; - - - - typedef struct _control_st{ - bool use_expand; - uint16_t unit_num; - uint16_t active_unit; - uint64_t reserve; - size_t base_size; - off_t entry_p; - option_reuse_t reuse_type; - free_st free_data; - free_queue_st free_queue; - head_st data_headers[MMAP_MAX_UNIT_NUM]; - }control_st; - - typedef struct _chunk_head_st{ - bool delete_flg; - uint16_t unit_id; - off_t free_next; - size_t size; - }chunk_head_st; -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerException.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerException.h deleted file mode 100644 index 3720430c72..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerException.h +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include -#include - -namespace MemoryManager{ - class MmapManagerException : public std::domain_error{ - public: - MmapManagerException(const std::string &msg) : std::domain_error(msg){} - }; -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerImpl.hpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerImpl.hpp deleted file mode 100644 index e7fa0ccaec..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/MmapManagerImpl.hpp +++ /dev/null @@ -1,655 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "defines.h" -#include "MmapManagerDefs.h" -#include "MmapManagerException.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace MemoryManager{ - - class MmapManager::Impl{ - public: - Impl() = delete; - Impl(MmapManager &ommanager); - virtual ~Impl(){} - - MmapManager &mmanager; - bool isOpen; - void *mmapCntlAddr; - control_st *mmapCntlHead; - std::string filePath; - void *mmapDataAddr[MMAP_MAX_UNIT_NUM]; - - void initBootStruct(boot_st &bst, size_t size) const; - void initFreeStruct(free_st &fst) const; - void initFreeQueue(free_queue_st &fqst) const; - void initControlStruct(control_st &cntlst, size_t size) const; - - void setupChunkHead(chunk_head_st *chunk_head, const bool delete_flg, const uint16_t unit_id, const off_t free_next, const size_t size) const; - bool expandMemory(); - int32_t formatFile(const std::string &targetFile, size_t size) const; - void clearChunk(const off_t chunk_off) const; - - void free_data_classify(const off_t p, const bool force_large_list = false) const; - off_t reuse_data_classify(const size_t size, reuse_state_t &reuse_state, const bool force_large_list = false) const; - void free_data_queue(const off_t p); - off_t reuse_data_queue(const size_t size, reuse_state_t &reuse_state); - void free_data_queue_plus(const off_t p); - off_t reuse_data_queue_plus(const size_t size, reuse_state_t &reuse_state); - - bool scanAllData(void *target, const check_statistics_t stats_type) const; - - void upHeap(free_queue_st *free_queue, uint64_t index) const; - void downHeap(free_queue_st *free_queue)const; - bool insertHeap(free_queue_st *free_queue, const off_t p) const; - bool getHeap(free_queue_st *free_queue, off_t *p) const; - size_t getMaxHeapValue(free_queue_st *free_queue) const; - void dumpHeap() const; - - void divChunk(const off_t chunk_offset, const size_t size); - }; - - - MmapManager::Impl::Impl(MmapManager &ommanager):mmanager(ommanager), isOpen(false), mmapCntlAddr(NULL), mmapCntlHead(NULL){} - - - void MmapManager::Impl::initBootStruct(boot_st &bst, size_t size) const - { - bst.version = MMAP_MANAGER_VERSION; - bst.reserve = 0; - bst.size = size; - } - - void MmapManager::Impl::initFreeStruct(free_st &fst) const - { - fst.large_list.free_p = -1; - fst.large_list.free_last_p = -1; - for(uint32_t i = 0; i < MMAP_FREE_LIST_NUM; ++i){ - fst.free_lists[i].free_p = -1; - fst.free_lists[i].free_last_p = -1; - } - } - - void MmapManager::Impl::initFreeQueue(free_queue_st &fqst) const - { - fqst.data = -1; - fqst.capacity = MMAP_FREE_QUEUE_SIZE; - fqst.tail = 1; - } - - void MmapManager::Impl::initControlStruct(control_st &cntlst, size_t size) const - { - cntlst.use_expand = MMAP_DEFAULT_ALLOW_EXPAND; - cntlst.unit_num = 1; - cntlst.active_unit = 0; - cntlst.reserve = 0; - cntlst.base_size = size; - cntlst.entry_p = 0; - cntlst.reuse_type = REUSE_DATA_CLASSIFY; - initFreeStruct(cntlst.free_data); - initFreeQueue(cntlst.free_queue); - memset(cntlst.data_headers, 0, sizeof(head_st) * MMAP_MAX_UNIT_NUM); - } - - void MmapManager::Impl::setupChunkHead(chunk_head_st *chunk_head, const bool delete_flg, const uint16_t unit_id, const off_t free_next, const size_t size) const - { - chunk_head_st chunk_buffer; - chunk_buffer.delete_flg = delete_flg; - chunk_buffer.unit_id = unit_id; - chunk_buffer.free_next = free_next; - chunk_buffer.size = size; - - memcpy(chunk_head, &chunk_buffer, sizeof(chunk_head_st)); - } - - bool MmapManager::Impl::expandMemory() - { - const uint16_t new_unit_num = mmapCntlHead->unit_num + 1; - const size_t new_file_size = mmapCntlHead->base_size * new_unit_num; - const off_t old_file_size = mmapCntlHead->base_size * mmapCntlHead->unit_num; - - if(new_unit_num >= MMAP_MAX_UNIT_NUM){ - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("over max unit num"); -// std::cerr << "over max unit num" << std::endl; - return false; - } - - int32_t fd = formatFile(filePath, new_file_size); - assert(fd >= 0); - - const off_t offset = mmapCntlHead->base_size * mmapCntlHead->unit_num; - errno = 0; - void *new_area = mmap(NULL, mmapCntlHead->base_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); - if(new_area == MAP_FAILED){ - const std::string err_str = getErrorStr(errno); - - errno = 0; - if(ftruncate(fd, old_file_size) == -1){ - const std::string err_str = getErrorStr(errno); - throw MmapManagerException("truncate error" + err_str); - } - - if(close(fd) == -1 && NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(filePath + "[WARN] : filedescript cannot close"); -// std::cerr << filePath << "[WARN] : filedescript cannot close" << std::endl; - throw MmapManagerException("mmap error" + err_str); - } - if(close(fd) == -1 && NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(filePath + "[WARN] : filedescript cannot close"); -// std::cerr << filePath << "[WARN] : filedescript cannot close" << std::endl; - - mmapDataAddr[mmapCntlHead->unit_num] = new_area; - - mmapCntlHead->unit_num = new_unit_num; - mmapCntlHead->active_unit++; - - return true; - } - - int32_t MmapManager::Impl::formatFile(const std::string &targetFile, size_t size) const - { - const char *c = ""; - int32_t fd; - - errno = 0; - if((fd = open(targetFile.c_str(), O_RDWR|O_CREAT, 0666)) == -1){ - std::stringstream ss; - ss << "[ERR] Cannot open the file. " << targetFile << " " << getErrorStr(errno); - throw MmapManagerException(ss.str()); - } - errno = 0; - if(lseek(fd, (off_t)size-1, SEEK_SET) < 0){ - std::stringstream ss; - ss << "[ERR] Cannot seek the file. " << targetFile << " " << getErrorStr(errno); - if(close(fd) == -1 && NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(targetFile + "[WARN] : filedescript cannot close"); -// std::cerr << targetFile << "[WARN] : filedescript cannot close" << std::endl; - throw MmapManagerException(ss.str()); - } - errno = 0; - if(write(fd, &c, sizeof(char)) == -1){ - std::stringstream ss; - ss << "[ERR] Cannot write the file. Check the disk space. " << targetFile << " " << getErrorStr(errno); - if(close(fd) == -1 && NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(targetFile + "[WARN] : filedescript cannot close"); -// std::cerr << targetFile << "[WARN] : filedescript cannot close" << std::endl; - throw MmapManagerException(ss.str()); - } - - return fd; - } - - void MmapManager::Impl::clearChunk(const off_t chunk_off) const - { - chunk_head_st *chunk_head = (chunk_head_st *)mmanager.getAbsAddr(chunk_off); - const off_t payload_off = chunk_off + sizeof(chunk_head_st); - - chunk_head->delete_flg = false; - chunk_head->free_next = -1; - char *payload_addr = (char *)mmanager.getAbsAddr(payload_off); - memset(payload_addr, 0, chunk_head->size); - } - - void MmapManager::Impl::free_data_classify(const off_t p, const bool force_large_list) const - { - const off_t chunk_offset = p - sizeof(chunk_head_st); - chunk_head_st *chunk_head = (chunk_head_st *)mmanager.getAbsAddr(chunk_offset); - const size_t p_size = chunk_head->size; - - - - const size_t border_size = MMAP_MEMORY_ALIGN * MMAP_FREE_LIST_NUM; - - free_list_st *free_list; - if(p_size <= border_size && force_large_list == false){ - uint32_t index = (p_size / MMAP_MEMORY_ALIGN) - 1; - free_list = &mmapCntlHead->free_data.free_lists[index]; - }else{ - free_list = &mmapCntlHead->free_data.large_list; - } - - if(free_list->free_p == -1){ - free_list->free_p = free_list->free_last_p = chunk_offset; - }else{ - off_t last_off = free_list->free_last_p; - chunk_head_st *tmp_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(last_off); - free_list->free_last_p = tmp_chunk_head->free_next = chunk_offset; - } - chunk_head->delete_flg = true; - } - - off_t MmapManager::Impl::reuse_data_classify(const size_t size, reuse_state_t &reuse_state, const bool force_large_list) const - { - - - const size_t border_size = MMAP_MEMORY_ALIGN * MMAP_FREE_LIST_NUM; - - free_list_st *free_list; - if(size <= border_size && force_large_list == false){ - uint32_t index = (size / MMAP_MEMORY_ALIGN) - 1; - free_list = &mmapCntlHead->free_data.free_lists[index]; - }else{ - free_list = &mmapCntlHead->free_data.large_list; - } - - if(free_list->free_p == -1){ - reuse_state = REUSE_STATE_ALLOC; - return -1; - } - - off_t current_off = free_list->free_p; - off_t ret_off = 0; - chunk_head_st *current_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(current_off); - chunk_head_st *ret_chunk_head = NULL; - - if( (size <= border_size) && (free_list->free_last_p == free_list->free_p) ){ - ret_off = current_off; - ret_chunk_head = current_chunk_head; - free_list->free_p = free_list->free_last_p = -1; - }else{ - off_t ret_before_off = -1, before_off = -1; - bool found_candidate_flag = false; - - - while(current_chunk_head != NULL){ - if( current_chunk_head->size >= size ) found_candidate_flag = true; - - if(found_candidate_flag){ - ret_off = current_off; - ret_chunk_head = current_chunk_head; - ret_before_off = before_off; - break; - } - before_off = current_off; - current_off = current_chunk_head->free_next; - current_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(current_off); - } - - if(!found_candidate_flag){ - reuse_state = REUSE_STATE_ALLOC; - return -1; - } - - const off_t free_next = ret_chunk_head->free_next; - if(free_list->free_p == ret_off){ - free_list->free_p = free_next; - }else{ - chunk_head_st *before_chunk = (chunk_head_st *)mmanager.getAbsAddr(ret_before_off); - before_chunk->free_next = free_next; - } - - if(free_list->free_last_p == ret_off){ - free_list->free_last_p = ret_before_off; - } - } - - clearChunk(ret_off); - - ret_off = ret_off + sizeof(chunk_head_st); - return ret_off; - } - - void MmapManager::Impl::free_data_queue(const off_t p) - { - free_queue_st *free_queue = &mmapCntlHead->free_queue; - if(free_queue->data == -1){ - - const size_t queue_size = sizeof(off_t) * free_queue->capacity; - const off_t alloc_offset = mmanager.alloc(queue_size); - if(alloc_offset == -1){ - - return free_data_classify(p, true); - } - free_queue->data = alloc_offset; - }else if(free_queue->tail >= free_queue->capacity){ - - const off_t tmp_old_queue = free_queue->data; - const size_t old_size = sizeof(off_t) * free_queue->capacity; - const size_t new_capacity = free_queue->capacity * 2; - const size_t new_size = sizeof(off_t) * new_capacity; - - if(new_size > mmapCntlHead->base_size){ - - - return free_data_classify(p, true); - }else{ - const off_t alloc_offset = mmanager.alloc(new_size); - if(alloc_offset == -1){ - - return free_data_classify(p, true); - } - free_queue->data = alloc_offset; - const off_t *old_data = (off_t *)mmanager.getAbsAddr(tmp_old_queue); - off_t *new_data = (off_t *)mmanager.getAbsAddr(free_queue->data); - memcpy(new_data, old_data, old_size); - - free_queue->capacity = new_capacity; - mmanager.free(tmp_old_queue); - } - } - - const off_t chunk_offset = p - sizeof(chunk_head_st); - if(!insertHeap(free_queue, chunk_offset)){ - - return; - } - - chunk_head_st *chunk_head = (chunk_head_st*)mmanager.getAbsAddr(chunk_offset); - chunk_head->delete_flg = 1; - - return; - } - - off_t MmapManager::Impl::reuse_data_queue(const size_t size, reuse_state_t &reuse_state) - { - free_queue_st *free_queue = &mmapCntlHead->free_queue; - if(free_queue->data == -1){ - - reuse_state = REUSE_STATE_ALLOC; - return -1; - } - - if(getMaxHeapValue(free_queue) < size){ - reuse_state = REUSE_STATE_ALLOC; - return -1; - } - - off_t ret_off; - if(!getHeap(free_queue, &ret_off)){ - - reuse_state = REUSE_STATE_ALLOC; - return -1; - } - - - reuse_state_t list_state = REUSE_STATE_OK; - - off_t candidate_off = reuse_data_classify(MMAP_MEMORY_ALIGN, list_state, true); - if(list_state == REUSE_STATE_OK){ - - mmanager.free(candidate_off); - } - - const off_t c_ret_off = ret_off; - divChunk(c_ret_off, size); - - clearChunk(ret_off); - - ret_off = ret_off + sizeof(chunk_head_st); - - return ret_off; - } - - void MmapManager::Impl::free_data_queue_plus(const off_t p) - { - const off_t chunk_offset = p - sizeof(chunk_head_st); - chunk_head_st *chunk_head = (chunk_head_st *)mmanager.getAbsAddr(chunk_offset); - const size_t p_size = chunk_head->size; - - - - const size_t border_size = MMAP_MEMORY_ALIGN * MMAP_FREE_LIST_NUM; - - if(p_size <= border_size){ - free_data_classify(p); - }else{ - free_data_queue(p); - } - } - - off_t MmapManager::Impl::reuse_data_queue_plus(const size_t size, reuse_state_t &reuse_state) - { - - - const size_t border_size = MMAP_MEMORY_ALIGN * MMAP_FREE_LIST_NUM; - - off_t ret_off; - if(size <= border_size){ - ret_off = reuse_data_classify(size, reuse_state); - if(reuse_state == REUSE_STATE_ALLOC){ - - reuse_state = REUSE_STATE_OK; - ret_off = reuse_data_queue(size, reuse_state); - } - }else{ - ret_off = reuse_data_queue(size, reuse_state); - } - - return ret_off; - } - - - bool MmapManager::Impl::scanAllData(void *target, const check_statistics_t stats_type) const - { - const uint16_t unit_num = mmapCntlHead->unit_num; - size_t total_size = 0; - uint64_t total_chunk_num = 0; - - for(int i = 0; i < unit_num; i++){ - const head_st *target_unit_head = &mmapCntlHead->data_headers[i]; - const uint64_t chunk_num = target_unit_head->chunk_num; - const off_t base_offset = i * mmapCntlHead->base_size; - off_t target_offset = base_offset; - chunk_head_st *target_chunk; - - for(uint64_t j = 0; j < chunk_num; j++){ - target_chunk = (chunk_head_st*)mmanager.getAbsAddr(target_offset); - - if(stats_type == CHECK_STATS_USE_SIZE){ - if(target_chunk->delete_flg == false){ - total_size += target_chunk->size; - } - }else if(stats_type == CHECK_STATS_USE_NUM){ - if(target_chunk->delete_flg == false){ - total_chunk_num++; - } - }else if(stats_type == CHECK_STATS_FREE_SIZE){ - if(target_chunk->delete_flg == true){ - total_size += target_chunk->size; - } - }else if(stats_type == CHECK_STATS_FREE_NUM){ - if(target_chunk->delete_flg == true){ - total_chunk_num++; - } - } - - const size_t chunk_size = sizeof(chunk_head_st) + target_chunk->size; - target_offset += chunk_size; - } - } - - if(stats_type == CHECK_STATS_USE_SIZE || stats_type == CHECK_STATS_FREE_SIZE){ - size_t *tmp_size = (size_t *)target; - *tmp_size = total_size; - }else if(stats_type == CHECK_STATS_USE_NUM || stats_type == CHECK_STATS_FREE_NUM){ - uint64_t *tmp_chunk_num = (uint64_t *)target; - *tmp_chunk_num = total_chunk_num; - } - - return true; - } - - void MmapManager::Impl::upHeap(free_queue_st *free_queue, uint64_t index) const - { - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); - - while(index > 1){ - uint64_t parent = index / 2; - - const off_t parent_chunk_offset = queue[parent]; - const off_t index_chunk_offset = queue[index]; - const chunk_head_st *parent_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(parent_chunk_offset); - const chunk_head_st *index_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(index_chunk_offset); - - if(parent_chunk_head->size < index_chunk_head->size){ - - const off_t tmp = queue[parent]; - queue[parent] = queue[index]; - queue[index] = tmp; - } - index = parent; - } - } - - void MmapManager::Impl::downHeap(free_queue_st *free_queue)const - { - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); - uint64_t index = 1; - - while(index * 2 <= free_queue->tail){ - uint64_t child = index * 2; - - const off_t index_chunk_offset = queue[index]; - const chunk_head_st *index_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(index_chunk_offset); - - if(child + 1 < free_queue->tail){ - const off_t left_chunk_offset = queue[child]; - const off_t right_chunk_offset = queue[child+1]; - const chunk_head_st *left_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(left_chunk_offset); - const chunk_head_st *right_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(right_chunk_offset); - - - if(left_chunk_head->size < right_chunk_head->size){ - child = child + 1; - } - } - - - const off_t child_chunk_offset = queue[child]; - const chunk_head_st *child_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(child_chunk_offset); - - if(child_chunk_head->size > index_chunk_head->size){ - - const off_t tmp = queue[child]; - queue[child] = queue[index]; - queue[index] = tmp; - index = child; - }else{ - break; - } - } - } - - bool MmapManager::Impl::insertHeap(free_queue_st *free_queue, const off_t p) const - { - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); - uint64_t index; - if(free_queue->capacity < free_queue->tail){ - return false; - } - - index = free_queue->tail; - queue[index] = p; - free_queue->tail += 1; - - upHeap(free_queue, index); - - return true; - } - - bool MmapManager::Impl::getHeap(free_queue_st *free_queue, off_t *p) const - { - - if( (free_queue->tail - 1) <= 0){ - return false; - } - - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); - *p = queue[1]; - free_queue->tail -= 1; - queue[1] = queue[free_queue->tail]; - downHeap(free_queue); - - return true; - } - - size_t MmapManager::Impl::getMaxHeapValue(free_queue_st *free_queue) const - { - if(free_queue->data == -1){ - return 0; - } - const off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); - const chunk_head_st *chunk_head = (chunk_head_st *)mmanager.getAbsAddr(queue[1]); - - return chunk_head->size; - } - - void MmapManager::Impl::dumpHeap() const - { - free_queue_st *free_queue = &mmapCntlHead->free_queue; - if(free_queue->data == -1){ - std::cout << "heap unused" << std::endl; - return; - } - - off_t *queue = (off_t *)mmanager.getAbsAddr(free_queue->data); - for(uint32_t i = 1; i < free_queue->tail; ++i){ - const off_t chunk_offset = queue[i]; - const off_t payload_offset = chunk_offset + sizeof(chunk_head_st); - const chunk_head_st *chunk_head = (chunk_head_st *)mmanager.getAbsAddr(chunk_offset); - const size_t size = chunk_head->size; - std::cout << "[" << chunk_offset << "(" << payload_offset << "), " << size << "] "; - } - std::cout << std::endl; - } - - void MmapManager::Impl::divChunk(const off_t chunk_offset, const size_t size) - { - if((mmapCntlHead->reuse_type != REUSE_DATA_QUEUE) - && (mmapCntlHead->reuse_type != REUSE_DATA_QUEUE_PLUS)){ - return; - } - - chunk_head_st *chunk_head = (chunk_head_st *)mmanager.getAbsAddr(chunk_offset); - const size_t border_size = sizeof(chunk_head_st) + MMAP_MEMORY_ALIGN; - const size_t align_size = getAlignSize(size); - const size_t rest_size = chunk_head->size - align_size; - - if(rest_size < border_size){ - return; - } - - - chunk_head->size = align_size; - - const off_t new_chunk_offset = chunk_offset + sizeof(chunk_head_st) + align_size; - chunk_head_st *new_chunk_head = (chunk_head_st *)mmanager.getAbsAddr(new_chunk_offset); - const size_t new_size = rest_size - sizeof(chunk_head_st); - setupChunkHead(new_chunk_head, true, chunk_head->unit_id, -1, new_size); - - - head_st *unit_header = &mmapCntlHead->data_headers[mmapCntlHead->active_unit]; - unit_header->chunk_num++; - - - const off_t payload_offset = new_chunk_offset + sizeof(chunk_head_st); - mmanager.free(payload_offset); - - return; - } -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/NGTQ/Command.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/NGTQ/Command.h deleted file mode 100644 index 809c10014b..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/NGTQ/Command.h +++ /dev/null @@ -1,602 +0,0 @@ -// -// Copyright (C) 2016-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "NGT/NGTQ/Quantizer.h" - -#define NGTQ_SEARCH_CODEBOOK_SIZE_FLUCTUATION - -namespace NGTQ { - -class Command { -public: - Command():debugLevel(0) {} - - void - create(NGT::Args &args) - { - const string usage = "Usage: ngtq create " - "[-o object-type (f:float|c:unsigned char)] [-D distance-function] [-n data-size] " - "[-p #-of-thread] [-d dimension] [-R global-codebook-range] [-r local-codebook-range] " - "[-C global-codebook-size-limit] [-c local-codebook-size-limit] [-N local-division-no] " - "[-T single-local-centroid (t|f)] [-e epsilon] [-i index-type (t:Tree|g:Graph)] " - "[-M global-centroid-creation-mode (d|s)] [-L global-centroid-creation-mode (d|k|s)] " - "[-S local-sample-coefficient] " - "index(output) data.tsv(input)"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "DB is not specified." << endl; - cerr << usage << endl; - return; - } - string data; - try { - data = args.get("#2"); - } catch (...) { - cerr << "Data is not specified." << endl; - } - - char objectType = args.getChar("o", 'f'); - char distanceType = args.getChar("D", '2'); - size_t dataSize = args.getl("n", 0); - - NGTQ::Property property; - property.threadSize = args.getl("p", 24); - property.dimension = args.getl("d", 0); - property.globalRange = args.getf("R", 0); - property.localRange = args.getf("r", 0); - property.globalCentroidLimit = args.getl("C", 1000000); - property.localCentroidLimit = args.getl("c", 65000); - property.localDivisionNo = args.getl("N", 8); - property.batchSize = args.getl("b", 1000); - property.localClusteringSampleCoefficient = args.getl("S", 10); - { - char localCentroidType = args.getChar("T", 'f'); - property.singleLocalCodebook = localCentroidType == 't' ? true : false; - } - { - char centroidCreationMode = args.getChar("M", 'd'); - switch(centroidCreationMode) { - case 'd': property.centroidCreationMode = NGTQ::CentroidCreationModeDynamic; break; - case 's': property.centroidCreationMode = NGTQ::CentroidCreationModeStatic; break; - default: - cerr << "ngt: Invalid centroid creation mode. " << centroidCreationMode << endl; - cerr << usage << endl; - return; - } - } - { - char localCentroidCreationMode = args.getChar("L", 'd'); - switch(localCentroidCreationMode) { - case 'd': property.localCentroidCreationMode = NGTQ::CentroidCreationModeDynamic; break; - case 's': property.localCentroidCreationMode = NGTQ::CentroidCreationModeStatic; break; - case 'k': property.localCentroidCreationMode = NGTQ::CentroidCreationModeDynamicKmeans; break; - default: - cerr << "ngt: Invalid centroid creation mode. " << localCentroidCreationMode << endl; - cerr << usage << endl; - return; - } - } - - NGT::Property globalProperty; - NGT::Property localProperty; - - { - char indexType = args.getChar("i", 't'); - globalProperty.indexType = indexType == 't' ? NGT::Property::GraphAndTree : NGT::Property::Graph; - localProperty.indexType = globalProperty.indexType; - } - globalProperty.insertionRadiusCoefficient = args.getf("e", 0.1) + 1.0; - localProperty.insertionRadiusCoefficient = globalProperty.insertionRadiusCoefficient; - - if (debugLevel >= 1) { - cerr << "epsilon=" << globalProperty.insertionRadiusCoefficient << endl; - cerr << "data size=" << dataSize << endl; - cerr << "dimension=" << property.dimension << endl; - cerr << "thread size=" << property.threadSize << endl; - cerr << "batch size=" << localProperty.batchSizeForCreation << endl;; - cerr << "index type=" << globalProperty.indexType << endl; - } - - - switch (objectType) { - case 'f': property.dataType = NGTQ::DataTypeFloat; break; - case 'c': property.dataType = NGTQ::DataTypeUint8; break; - default: - cerr << "ngt: Invalid object type. " << objectType << endl; - cerr << usage << endl; - return; - } - - switch (distanceType) { - case '2': property.distanceType = NGTQ::DistanceTypeL2; break; - case '1': property.distanceType = NGTQ::DistanceTypeL1; break; - case 'a': property.distanceType = NGTQ::DistanceTypeAngle; break; - default: - cerr << "ngt: Invalid distance type. " << distanceType << endl; - cerr << usage << endl; - return; - } - - cerr << "ngtq: Create" << endl; - NGTQ::Index::create(database, property, globalProperty, localProperty); - - cerr << "ngtq: Append" << endl; - NGTQ::Index::append(database, data, dataSize); - } - - void - rebuild(NGT::Args &args) - { - const string usage = "Usage: ngtq rebuild " - "[-o object-type (f:float|c:unsigned char)] [-D distance-function] [-n data-size] " - "[-p #-of-thread] [-d dimension] [-R global-codebook-range] [-r local-codebook-range] " - "[-C global-codebook-size-limit] [-c local-codebook-size-limit] [-N local-division-no] " - "[-T single-local-centroid (t|f)] [-e epsilon] [-i index-type (t:Tree|g:Graph)] " - "[-M centroid-creation_mode (d|s)] " - "index(output) data.tsv(input)"; - string srcIndex; - try { - srcIndex = args.get("#1"); - } catch (...) { - cerr << "DB is not specified." << endl; - cerr << usage << endl; - return; - } - string rebuiltIndex = srcIndex + ".tmp"; - - - NGTQ::Property property; - NGT::Property globalProperty; - NGT::Property localProperty; - - { - NGTQ::Index index(srcIndex); - property = index.getQuantizer().property; - index.getQuantizer().globalCodebook.getProperty(globalProperty); - index.getQuantizer().getLocalCodebook(0).getProperty(localProperty); - } - - property.globalRange = args.getf("R", property.globalRange); - property.localRange = args.getf("r", property.localRange); - property.globalCentroidLimit = args.getl("C", property.globalCentroidLimit); - property.localCentroidLimit = args.getl("c", property.localCentroidLimit); - property.localDivisionNo = args.getl("N", property.localDivisionNo); - { - char localCentroidType = args.getChar("T", '-'); - if (localCentroidType != '-') { - property.singleLocalCodebook = localCentroidType == 't' ? true : false; - } - } - { - char centroidCreationMode = args.getChar("M", '-'); - if (centroidCreationMode != '-') { - property.centroidCreationMode = centroidCreationMode == 'd' ? - NGTQ::CentroidCreationModeDynamic : NGTQ::CentroidCreationModeStatic; - } - } - - cerr << "global range=" << property.globalRange << endl; - cerr << "local range=" << property.localRange << endl; - cerr << "global centroid limit=" << property.globalCentroidLimit << endl; - cerr << "local centroid limit=" << property.localCentroidLimit << endl; - cerr << "local division no=" << property.localDivisionNo << endl; - - NGTQ::Index::create(rebuiltIndex, property, globalProperty, localProperty); - cerr << "created a new db" << endl; - cerr << "start rebuilding..." << endl; - NGTQ::Index::rebuild(srcIndex, rebuiltIndex); - { - string src = srcIndex; - string dst = srcIndex + ".org"; - if (std::rename(src.c_str(), dst.c_str()) != 0) { - stringstream msg; - msg << "ngtq::rebuild: Cannot rename. " << src << "=>" << dst ; - NGTThrowException(msg); - } - } - { - string src = rebuiltIndex; - string dst = srcIndex; - if (std::rename(src.c_str(), dst.c_str()) != 0) { - stringstream msg; - msg << "ngtq::rebuild: Cannot rename. " << src << "=>" << dst ; - NGTThrowException(msg); - } - } - } - - - void - append(NGT::Args &args) - { - const string usage = "Usage: ngtq append [-n data-size] " - "index(output) data.tsv(input)"; - string index; - try { - index = args.get("#1"); - } catch (...) { - cerr << "DB is not specified." << endl; - cerr << usage << endl; - return; - } - string data; - try { - data = args.get("#2"); - } catch (...) { - cerr << "Data is not specified." << endl; - } - - size_t dataSize = args.getl("n", 0); - - if (debugLevel >= 1) { - cerr << "data size=" << dataSize << endl; - } - - NGTQ::Index::append(index, data, dataSize); - - } - - void - search(NGT::Args &args) - { - const string usage = "Usage: ngtq search [-i g|t|s] [-n result-size] [-e epsilon] [-m mode(r|l|c|a)] " - "[-E edge-size] [-o output-mode] [-b result expansion(begin:end:[x]step)] " - "index(input) query.tsv(input)"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "DB is not specified" << endl; - cerr << usage << endl; - return; - } - - string query; - try { - query = args.get("#2"); - } catch (...) { - cerr << "Query is not specified" << endl; - cerr << usage << endl; - return; - } - - int size = args.getl("n", 20); - char outputMode = args.getChar("o", '-'); - float epsilon = 0.1; - - char mode = args.getChar("m", '-'); - NGTQ::AggregationMode aggregationMode; - switch (mode) { - case 'r': aggregationMode = NGTQ::AggregationModeExactDistanceThroughApproximateDistance; break; // refine - case 'e': aggregationMode = NGTQ::AggregationModeExactDistance; break; // refine - case 'l': aggregationMode = NGTQ::AggregationModeApproximateDistanceWithLookupTable; break; // lookup - case 'c': aggregationMode = NGTQ::AggregationModeApproximateDistanceWithCache; break; // cache - case '-': - case 'a': aggregationMode = NGTQ::AggregationModeApproximateDistance; break; // cache - default: - cerr << "Invalid aggregation mode. " << mode << endl; - cerr << usage << endl; - return; - } - - if (args.getString("e", "none") == "-") { - // linear search - epsilon = FLT_MAX; - } else { - epsilon = args.getf("e", 0.1); - } - - size_t beginOfResultExpansion, endOfResultExpansion, stepOfResultExpansion; - bool mulStep = false; - { - beginOfResultExpansion = stepOfResultExpansion = 1; - endOfResultExpansion = 0; - string str = args.getString("b", "16"); - vector tokens; - NGT::Common::tokenize(str, tokens, ":"); - if (tokens.size() >= 1) { beginOfResultExpansion = NGT::Common::strtod(tokens[0]); } - if (tokens.size() >= 2) { endOfResultExpansion = NGT::Common::strtod(tokens[1]); } - if (tokens.size() >= 3) { - if (tokens[2][0] == 'x') { - mulStep = true; - stepOfResultExpansion = NGT::Common::strtod(tokens[2].substr(1)); - } else { - stepOfResultExpansion = NGT::Common::strtod(tokens[2]); - } - } - } - if (debugLevel >= 1) { - cerr << "size=" << size << endl; - cerr << "result expansion=" << beginOfResultExpansion << "->" << endOfResultExpansion << "," << stepOfResultExpansion << endl; - } - - NGTQ::Index index(database); - try { - ifstream is(query); - if (!is) { - cerr << "Cannot open the specified file. " << query << endl; - return; - } - if (outputMode == 's') { cout << "# Beginning of Evaluation" << endl; } - string line; - double totalTime = 0; - int queryCount = 0; - while(getline(is, line)) { - NGT::Object *query = index.allocateObject(line, " \t", 0); - queryCount++; - size_t resultExpansion = 0; - for (size_t base = beginOfResultExpansion; - resultExpansion <= endOfResultExpansion; - base = mulStep ? base * stepOfResultExpansion : base + stepOfResultExpansion) { - resultExpansion = base; - NGT::ObjectDistances objects; - - if (outputMode == 'e') { - index.search(query, objects, size, resultExpansion, aggregationMode, epsilon); - objects.clear(); - } - - NGT::Timer timer; - timer.start(); - // size : # of final resultant objects - // resultExpansion : # of resultant objects by using codebook search - index.search(query, objects, size, resultExpansion, aggregationMode, epsilon); - timer.stop(); - - totalTime += timer.time; - if (outputMode == 'e') { - cout << "# Query No.=" << queryCount << endl; - cout << "# Query=" << line.substr(0, 20) + " ..." << endl; - cout << "# Index Type=" << "----" << endl; - cout << "# Size=" << size << endl; - cout << "# Epsilon=" << epsilon << endl; - cout << "# Result expansion=" << resultExpansion << endl; - cout << "# Distance Computation=" << index.getQuantizer().distanceComputationCount << endl; - cout << "# Query Time (msec)=" << timer.time * 1000.0 << endl; - } else { - cout << "Query No." << queryCount << endl; - cout << "Rank\tIN-ID\tID\tDistance" << endl; - } - - for (size_t i = 0; i < objects.size(); i++) { - cout << i + 1 << "\t" << objects[i].id << "\t"; - cout << objects[i].distance << endl; - } - - if (outputMode == 'e') { - cout << "# End of Search" << endl; - } else { - cout << "Query Time= " << timer.time << " (sec), " << timer.time * 1000.0 << " (msec)" << endl; - } - } - if (outputMode == 'e') { - cout << "# End of Query" << endl; - } - index.deleteObject(query); - } - if (outputMode == 'e') { - cout << "# Average Query Time (msec)=" << totalTime * 1000.0 / (double)queryCount << endl; - cout << "# Number of queries=" << queryCount << endl; - cout << "# End of Evaluation" << endl; - } else { - cout << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " - << totalTime * 1000.0 / (double)queryCount << " (msec), (" - << totalTime << "/" << queryCount << ")" << endl; - } - } catch (NGT::Exception &err) { - cerr << "Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "Error" << endl; - cerr << usage << endl; - } - index.close(); - } - - void - remove(NGT::Args &args) - { - const string usage = "Usage: ngtq remove [-d object-ID-type(f|d)] index(input) object-ID(input)"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "DB is not specified" << endl; - cerr << usage << endl; - return; - } - try { - args.get("#2"); - } catch (...) { - cerr << "ID is not specified" << endl; - cerr << usage << endl; - return; - } - char dataType = args.getChar("d", 'f'); - if (debugLevel >= 1) { - cerr << "dataType=" << dataType << endl; - } - - try { - vector objects; - if (dataType == 'f') { - string ids; - try { - ids = args.get("#2"); - } catch (...) { - cerr << "Data file is not specified" << endl; - cerr << usage << endl; - return; - } - ifstream is(ids); - if (!is) { - cerr << "Cannot open the specified file. " << ids << endl; - return; - } - string line; - int count = 0; - while(getline(is, line)) { - count++; - vector tokens; - NGT::Common::tokenize(line, tokens, "\t "); - if (tokens.size() == 0 || tokens[0].size() == 0) { - continue; - } - char *e; - size_t id; - try { - id = strtol(tokens[0].c_str(), &e, 10); - objects.push_back(id); - } catch (...) { - cerr << "Illegal data. " << tokens[0] << endl; - } - if (*e != 0) { - cerr << "Illegal data. " << e << endl; - } - cerr << "removed ID=" << id << endl; - } - } else { - size_t id = args.getl("#2", 0); - cerr << "removed ID=" << id << endl; - objects.push_back(id); - } - NGT::Index::remove(database, objects); - } catch (NGT::Exception &err) { - cerr << "Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "Error" << endl; - cerr << usage << endl; - } - } - - - void - info(NGT::Args &args) - { - const string usage = "Usage: ngtq info index"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "DB is not specified" << endl; - cerr << usage << endl; - return; - } - NGTQ::Index index(database); - index.info(cout); - - } - - void - validate(NGT::Args &args) - { - const string usage = "parameter"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "DB is not specified" << endl; - cerr << usage << endl; - return; - } - NGTQ::Index index(database); - - index.getQuantizer().validate(); - - } - - - -#ifdef NGTQ_SHARED_INVERTED_INDEX - void - compress(NGT::Args &args) - { - const string usage = "Usage: ngtq compress index)"; - string database; - try { - database = args.get("#1"); - } catch (...) { - cerr << "DB is not specified" << endl; - cerr << usage << endl; - return; - } - try { - NGTQ::Index::compress(database); - } catch (NGT::Exception &err) { - cerr << "Error " << err.what() << endl; - cerr << usage << endl; - } catch (...) { - cerr << "Error" << endl; - cerr << usage << endl; - } - } -#endif - - void help() { - cerr << "Usage : ngtq command database data" << endl; - cerr << " command : create search remove append export import" << endl; - } - - void execute(NGT::Args args) { - string command; - try { - command = args.get("#0"); - } catch(...) { - help(); - return; - } - - debugLevel = args.getl("X", 0); - - try { - if (debugLevel >= 1) { - cerr << "ngt::command=" << command << endl; - } - if (command == "search") { - search(args); - } else if (command == "create") { - create(args); - } else if (command == "append") { - append(args); - } else if (command == "remove") { - remove(args); - } else if (command == "info") { - info(args); - } else if (command == "validate") { - validate(args); - } else if (command == "rebuild") { - rebuild(args); -#ifdef NGTQ_SHARED_INVERTED_INDEX - } else if (command == "compress") { - compress(args); -#endif - } else { - cerr << "Illegal command. " << command << endl; - } - } catch(NGT::Exception &err) { - cerr << "ngt: Fatal error: " << err.what() << endl; - } - } - - int debugLevel; - -}; - -}; - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/NGTQ/Quantizer.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/NGTQ/Quantizer.h deleted file mode 100644 index ccbba403a8..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/NGTQ/Quantizer.h +++ /dev/null @@ -1,2375 +0,0 @@ -// -// Copyright (C) 2016-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "NGT/Index.h" -#include "NGT/ArrayFile.h" -#include "NGT/Clustering.h" -#include "NGT/defines.h" - - - -//#define NGTQ_DISTANCE_ANGLE - - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR -#define NGTQ_SHARED_INVERTED_INDEX -#endif - -namespace NGTQ { - -#ifdef NGTQ_INVERTED_INDEX_UINT16 -typedef uint16_t InvertedIndexEntrySizeType; -#else -typedef uint32_t InvertedIndexEntrySizeType; -#endif - -template -class InvertedIndexObject { -public: - InvertedIndexObject() { id = 0; clear(); } - InvertedIndexObject(uint32_t i) { set(i); } - void set(uint32_t i) { id = i; clear(); } - void clear() { - for (size_t i = 0; i < SIZE; i++) { - localID[i] = 0; - } - } - uint32_t id; - T localID[SIZE]; -}; - - template -#ifdef NGTQ_SHARED_INVERTED_INDEX -class InvertedIndexEntry : public NGT::Vector > { - typedef NGT::Vector > PARENT; -#else -class InvertedIndexEntry : public vector > { - typedef vector > PARENT; -#endif -public: -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexEntry(SharedMemoryAllocator &allocator, NGT::ObjectSpace *os = 0) {} - void pushBack(SharedMemoryAllocator &allocator) { - PARENT::push_back(InvertedIndexObject(), allocator); - } - void pushBack(size_t id, SharedMemoryAllocator &allocator) { - PARENT::push_back(InvertedIndexObject(id), allocator); - } -#else - InvertedIndexEntry(NGT::ObjectSpace *os = 0) {} - void pushBack() { PARENT::push_back(InvertedIndexObject()); } - void pushBack(size_t id) { PARENT::push_back(InvertedIndexObject(id)); } -#endif - - void serialize(ofstream &os, NGT::ObjectSpace *objspace = 0) { - assert(PARENT::size() <= numeric_limits::max()); - NGT::Serializer::write(os, static_cast(PARENT::size())); - - os.write((const char*)&PARENT::at(0), PARENT::size() * sizeof(InvertedIndexObject)); - } - - void deserialize(ifstream &is, NGT::ObjectSpace *objectspace = 0) { - PARENT::clear(); - InvertedIndexEntrySizeType sz; - try { - NGT::Serializer::read(is, sz); - } catch(NGT::Exception &err) { - stringstream msg; - msg << "InvertedIndexEntry::deserialize: It might be caused by inconsistency of the valuable type of the inverted index size. " << err.what(); - NGTThrowException(msg); - } - PARENT::resize(sz); - is.read((char*)&PARENT::at(0), PARENT::size() * sizeof(InvertedIndexObject)); - } - -}; - -class LocalDatam { -public: - LocalDatam(){}; - LocalDatam(size_t iii, size_t iil) - : iiIdx(iii), iiLocalIdx(iil) {} - size_t iiIdx; - size_t iiLocalIdx; -}; - -template -class SerializableObject : public NGT::Object { -public: - static size_t getSerializedDataSize() { return SIZE; } -}; - - enum DataType { - DataTypeUint8 = 0, - DataTypeFloat = 1 - }; - - enum DistanceType { - DistanceTypeNone = 0, - DistanceTypeL1 = 1, - DistanceTypeL2 = 2, - DistanceTypeHamming = 3, - DistanceTypeAngle = 4 - }; - - enum CentroidCreationMode { - CentroidCreationModeDynamic = 0, - CentroidCreationModeStatic = 1, - CentroidCreationModeDynamicKmeans = 2, - }; - - enum AggregationMode { - AggregationModeApproximateDistance = 0, - AggregationModeApproximateDistanceWithLookupTable = 1, - AggregationModeApproximateDistanceWithCache = 2, - AggregationModeExactDistanceThroughApproximateDistance = 3, - AggregationModeExactDistance = 4 - }; - - class Property { - public: - Property() { - // default values - threadSize = 32; - globalRange = 200; - localRange = 50; - globalCentroidLimit = 10000000; - localCentroidLimit = 1000000; - dimension = 0; - dataSize = 0; - dataType = DataTypeFloat; - distanceType = DistanceTypeNone; - singleLocalCodebook = false; - localDivisionNo = 8; - batchSize = 1000; - centroidCreationMode = CentroidCreationModeDynamic; - localCentroidCreationMode = CentroidCreationModeDynamic; - localIDByteSize = 0; // finally decided by localCentroidLimit - localCodebookState = false; // not completed - localClusteringSampleCoefficient = 10; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - invertedIndexSharedMemorySize = 512; // MB -#endif - } - - void save(const string &path) { - NGT::PropertySet prop; - prop.set("ThreadSize", (long)threadSize); - prop.set("GlobalRange", globalRange); - prop.set("LocalRange", localRange); - prop.set("GlobalCentroidLimit", (long)globalCentroidLimit); - prop.set("LocalCentroidLimit", (long)localCentroidLimit); - prop.set("Dimension", (long)dimension); - prop.set("DataSize", (long)dataSize); - prop.set("DataType", (long)dataType); - prop.set("DistanceType", (long)distanceType); - prop.set("SingleLocalCodebook", (long)singleLocalCodebook); - prop.set("LocalDivisionNo", (long)localDivisionNo); - prop.set("BatchSize", (long)batchSize); - prop.set("CentroidCreationMode", (long)centroidCreationMode); - prop.set("LocalCentroidCreationMode", (long)localCentroidCreationMode); - prop.set("LocalIDByteSize", (long)localIDByteSize); - prop.set("LocalCodebookState", (long)localCodebookState); - prop.set("LocalSampleCoefficient", (long)localClusteringSampleCoefficient); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - prop.set("InvertedIndexSharedMemorySize", (long)invertedIndexSharedMemorySize); -#endif - prop.save(path + "/prf"); - } - - void setupLocalIDByteSize() { - if (localCentroidLimit > 0xffff - 1) { - if (localIDByteSize == 2) { - NGTThrowException("NGTQ::Property: The localIDByteSize is illegal for the localCentroidLimit."); - } - localIDByteSize = 4; - } else { - if (localIDByteSize == INT_MAX) { - localIDByteSize = 4; - } else if (localIDByteSize == 0) { - localIDByteSize = 2; - } else { - } - } - if (localIDByteSize != 2 && localIDByteSize != 4) { - NGTThrowException("NGTQ::Property: Fatal internal error! localIDByteSize should be 2 or 4."); - } - } - - void load(const string &path) { - NGT::PropertySet prop; - prop.load(path + "/prf"); - threadSize = prop.getl("ThreadSize", threadSize); - globalRange = prop.getf("GlobalRange", globalRange); - localRange = prop.getf("LocalRange", localRange); - globalCentroidLimit = prop.getl("GlobalCentroidLimit", globalCentroidLimit); - localCentroidLimit = prop.getl("LocalCentroidLimit", localCentroidLimit); - dimension = prop.getl("Dimension", dimension); - dataSize = prop.getl("DataSize", dataSize); - dataType = (DataType)prop.getl("DataType", dataType); - distanceType = (DistanceType)prop.getl("DistanceType", distanceType); - singleLocalCodebook = prop.getl("SingleLocalCodebook", singleLocalCodebook); - localDivisionNo = prop.getl("LocalDivisionNo", localDivisionNo); - batchSize = prop.getl("BatchSize", batchSize); - centroidCreationMode= (CentroidCreationMode)prop.getl("CentroidCreationMode", centroidCreationMode); - localCentroidCreationMode = (CentroidCreationMode)prop.getl("LocalCentroidCreationMode", localCentroidCreationMode); - localIDByteSize = prop.getl("LocalIDByteSize", INT_MAX); - localCodebookState = prop.getl("LocalCodebookState", localCodebookState); - localClusteringSampleCoefficient = prop.getl("LocalSampleCoefficient", localClusteringSampleCoefficient); - setupLocalIDByteSize(); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - invertedIndexSharedMemorySize - = prop.getl("InvertedIndexSharedMemorySize", invertedIndexSharedMemorySize); -#endif - } - - void setup(const Property &p) { - threadSize = p.threadSize; - globalRange = p.globalRange; - localRange = p.localRange; - globalCentroidLimit = p.globalCentroidLimit; - localCentroidLimit = p.localCentroidLimit; - distanceType = p.distanceType; - singleLocalCodebook = p.singleLocalCodebook; - localDivisionNo = p.localDivisionNo; - batchSize = p.batchSize; - centroidCreationMode = p.centroidCreationMode; - localCentroidCreationMode = p.localCentroidCreationMode; - localIDByteSize = p.localIDByteSize; - localCodebookState = p.localCodebookState; - localClusteringSampleCoefficient = p.localClusteringSampleCoefficient; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - invertedIndexSharedMemorySize = p.invertedIndexSharedMemorySize; -#endif - } - - inline size_t getLocalCodebookNo() { return singleLocalCodebook ? 1 : localDivisionNo; } - - size_t threadSize; - double globalRange; - double localRange; - size_t globalCentroidLimit; - size_t localCentroidLimit; - size_t dimension; - size_t dataSize; - DataType dataType; - DistanceType distanceType; - bool singleLocalCodebook; - size_t localDivisionNo; - size_t batchSize; - CentroidCreationMode centroidCreationMode; - CentroidCreationMode localCentroidCreationMode; - size_t localIDByteSize; - bool localCodebookState; - size_t localClusteringSampleCoefficient; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - size_t invertedIndexSharedMemorySize; -#endif -}; - -class Quantizer { -public: - typedef ArrayFile ObjectList; - - Quantizer(DataType dt, size_t dim) { - property.dimension = dim; - property.dataType = dt; - switch (property.dataType) { - case DataTypeUint8: - property.dataSize = sizeof(uint8_t) * property.dimension; - break; - case DataTypeFloat: - property.dataSize = sizeof(float) * property.dimension; - break; - default: - cerr << "Quantizer constructor: Inner error. Invalid data type." << endl; - break; - } - } - - virtual ~Quantizer() { } - - virtual void create(const string &index, - NGT::Property &globalPropertySet, - NGT::Property &localPropertySet) = 0; - virtual void insert(vector > &objects) = 0; - virtual void insert(const string &line, vector > &objects, size_t id) = 0; - virtual void rebuildIndex() = 0; - virtual void save() = 0; - virtual void open(const string &index, NGT::Property &globalProperty) = 0; - virtual void open(const string &index) = 0; - virtual void close() = 0; -#ifdef NGTQ_SHARED_INVERTED_INDEX - virtual void reconstructInvertedIndex(const string &indexFile) = 0; -#endif - - virtual void validate() = 0; - - virtual void search(NGT::Object *object, NGT::ObjectDistances &objs, size_t size, - size_t approximateSearchSize, - size_t codebookSearchSize, bool resultRefinement, bool lookUpTable, - double epsilon) = 0; - - virtual void search(NGT::Object *object, NGT::ObjectDistances &objs, size_t size, - size_t approximateSearchSize, - size_t codebookSearchSize, AggregationMode aggregationMode, - double epsilon) = 0; - - virtual void search(NGT::Object *object, NGT::ObjectDistances &objs, size_t size, - float expansion, - AggregationMode aggregationMode, - double epsilon) = 0; - - virtual void info(ostream &os) = 0; - - virtual NGT::Index & getLocalCodebook(size_t size) = 0; - - virtual void verify() = 0; - - virtual size_t getLocalCodebookSize(size_t size) = 0; - - virtual size_t getInstanceSharedMemorySize(ostream &os, SharedMemoryAllocator::GetMemorySizeType t = SharedMemoryAllocator::GetTotalMemorySize) = 0; - - NGT::Object *allocateObject(string &line, const string &sep) { - return globalCodebook.allocateObject(line, " \t"); - } - NGT::Object *allocateObject(vector &obj) { - return globalCodebook.allocateObject(obj); - } - void deleteObject(NGT::Object *object) { globalCodebook.deleteObject(object); } - - void setThreadSize(size_t size) { property.threadSize = size; } - void setGlobalRange(double r) { property.globalRange = r; } - void setLocalRange(double r) { property.localRange = r; } - void setGlobalCentroidLimit(size_t s) { property.globalCentroidLimit = s; } - void setLocalCentroidLimit(size_t s) { property.localCentroidLimit = s; } - void setDimension(size_t s) { property.dimension = s; } - void setDistanceType(DistanceType t) { property.distanceType = t; } - - string getRootDirectory() { return rootDirectory; } - - size_t getSharedMemorySize(ostream &os, SharedMemoryAllocator::GetMemorySizeType t = SharedMemoryAllocator::GetTotalMemorySize) { - os << "Global centroid:" << endl; - return globalCodebook.getSharedMemorySize(os, t) + getInstanceSharedMemorySize(os, t); - } - - ObjectList objectList; - string rootDirectory; - - Property property; - - NGT::Index globalCodebook; - - size_t distanceComputationCount; - -}; - -#ifdef NGTQ_DISTANCE_ANGLE - class LocalDistanceLookup { - public: - LocalDistanceLookup():a(0.0), b(0.0), sum(0.0){}; - void set(double pa, double pb, double psum) {a = pa; b = pb; sum= psum;} - double a; - double b; - double sum; - }; -#endif - -class QuantizedObjectDistance { -public: - class Cache { - public: - Cache():localDistanceLookup(0) {} - ~Cache() { - if (localDistanceLookup != 0) { - delete[] localDistanceLookup; - localDistanceLookup = 0; - } - } - bool isValid(size_t idx) { return flag[idx]; } -#ifndef NGTQ_DISTANCE_ANGLE - void set(size_t idx, double d) { flag[idx] = true; localDistanceLookup[idx] = d; } - double getDistance(size_t idx) { return localDistanceLookup[idx]; } -#endif - void initialize(size_t s) { - size = s; -#ifdef NGTQ_DISTANCE_ANGLE - localDistanceLookup = new LocalDistanceLookup[size]; -#else - localDistanceLookup = new double[size]; -#endif - flag.resize(size, false); - } -#ifdef NGTQ_DISTANCE_ANGLE - LocalDistanceLookup *localDistanceLookup; -#else - double *localDistanceLookup; -#endif - size_t size; - vector flag; - }; - - QuantizedObjectDistance(){} - virtual ~QuantizedObjectDistance() {} - - virtual double operator()(NGT::Object &object, size_t objectID, void *localID) = 0; - - virtual double operator()(void *localID, Cache &cache) = 0; - - virtual double cache(NGT::Object &object, size_t objectID, void *localID, Cache &cache) = 0; - - template - inline double getAngleDistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebook != 0); - NGT::PersistentObject &gcentroid = *globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - unsigned char *gcptr = &gcentroid.at(0, globalCodebook->getObjectSpace().getRepository().allocator); -#else - unsigned char *gcptr = &gcentroid[0]; -#endif - unsigned char *optr = &((NGT::Object&)object)[0]; - double normA = 0.0F; - double normB = 0.0F; - double sum = 0.0F; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebook[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebook[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif - float *lcendptr = lcptr + localDataSize; - while (lcptr != lcendptr) { - double a = *optr++; - double b = *gcptr++ + *lcptr++; - normA += a * a; - normB += b * b; - sum += a * b; - } - } - double cosine = sum / (sqrt(normA) * sqrt(normB)); - if (cosine >= 1.0F) { - // nothing to do - return 0.0F; - } else if (cosine <= -1.0F) { - return acos(-1.0F); - } - return acos(cosine); - } - -#if defined(NGT_AVX_DISABLED) || !defined(__AVX__) - template - inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebook != 0); - NGT::PersistentObject &gcentroid = *globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - unsigned char *gcptr = &gcentroid.at(0, globalCodebook->getObjectSpace().getRepository().allocator); -#else - unsigned char *gcptr = &gcentroid[0]; -#endif - unsigned char *optr = &((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebook[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebook[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif - double d = 0.0; - float *lcendptr = lcptr + localDataSize; - while (lcptr != lcendptr) { - double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++; - d += sub * sub; - } - distance += d; - } - return sqrt(distance); - } -#else - // AVX - template - inline double getL2DistanceUint8(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebook != 0); - NGT::PersistentObject &gcentroid = *globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - unsigned char *gcptr = &gcentroid.at(0, globalCodebook->getObjectSpace().getRepository().allocator); -#else - unsigned char *gcptr = &gcentroid[0]; -#endif - unsigned char *optr = &((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebook[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebook[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif - - float *lcendptr = lcptr + localDataSize - 3; - __m128 sum = _mm_setzero_ps(); - while (lcptr < lcendptr) { - __m128i x1 = _mm_cvtepu8_epi32(*(__m128i const*)optr); - __m128i x2 = _mm_cvtepu8_epi32(*(__m128i const*)gcptr); - x1 = _mm_sub_epi32(x1, x2); - __m128 sub = _mm_sub_ps(_mm_cvtepi32_ps(x1), *(__m128 const*)lcptr); - sum = _mm_add_ps(sum, _mm_mul_ps(sub, sub)); - optr += 4; - gcptr += 4; - lcptr += 4; - } - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum); - double d = f[0] + f[1] + f[2] + f[3]; - while (lcptr < lcendptr) { - double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++; - d += sub * sub; - } - distance += d; - } - distance = sqrt(distance); - return distance; - } -#endif - - template - inline double getAngleDistanceFloat(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebook != 0); - NGT::PersistentObject &gcentroid = *globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(float); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *gcptr = (float*)&gcentroid.at(0, globalCodebook->getObjectSpace().getRepository().allocator); -#else - float *gcptr = (float*)&gcentroid[0]; -#endif - float *optr = (float*)&((NGT::Object&)object)[0]; - double normA = 0.0F; - double normB = 0.0F; - double sum = 0.0F; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebook[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebook[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif - float *lcendptr = lcptr + localDataSize; - while (lcptr != lcendptr) { - double a = *optr++; - double b = *gcptr++ + *lcptr++; - normA += a * a; - normB += b * b; - sum += a * b; - } - } - double cosine = sum / (sqrt(normA) * sqrt(normB)); - if (cosine >= 1.0F) { - // nothing to do - return 0.0F; - } else if (cosine <= -1.0F) { - return acos(-1.0F); - } - return acos(cosine); - } - - template - inline double getL2DistanceFloat(NGT::Object &object, size_t objectID, T localID[]) { - assert(globalCodebook != 0); - NGT::PersistentObject &gcentroid = *globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(float); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *gcptr = (float*)&gcentroid.at(0, globalCodebook->getObjectSpace().getRepository().allocator); -#else - float *gcptr = (float*)&gcentroid[0]; -#endif - float *optr = (float*)&((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebook[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebook[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif - float *lcendptr = lcptr + localDataSize; - double d = 0.0; - while (lcptr != lcendptr) { - double sub = (*optr++ - *gcptr++) - *lcptr++; - d += sub * sub; - } - distance += d; - } - distance = sqrt(distance); - return distance; - } - -#ifdef NGTQ_DISTANCE_ANGLE - inline void createDistanceLookup(NGT::Object &object, size_t objectID, Cache &cache) { - assert(globalCodebook != 0); - NGT::Object &gcentroid = (NGT::Object &)*globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(float); - float *optr = (float*)&((NGT::Object&)object)[0]; - float *gcptr = (float*)&gcentroid[0]; - LocalDistanceLookup *dlu = cache.localDistanceLookup; - size_t oft = 0; - for (size_t li = 0; li < localCodebookNo; li++, oft += localDataSize) { - dlu++; - for (size_t k = 1; k < localCodebookCentroidNo; k++) { - NGT::Object &lcentroid = (NGT::Object&)*localCodebook[li].getObjectSpace().getRepository().get(k); - float *lcptr = (float*)&lcentroid[0]; - float *lcendptr = lcptr + localDataSize; - float *toptr = optr + oft; - float *tgcptr = gcptr + oft; - double normA = 0.0F; - double normB = 0.0F; - double sum = 0.0F; - while (lcptr != lcendptr) { - double a = *toptr++; - double b = *tgcptr++ + *lcptr++; - normA += a * a; - normB += b * b; - sum += a * b; - } - dlu->set(normA, normB, sum); - dlu++; - } - } - } -#else - inline void createDistanceLookup(NGT::Object &object, size_t objectID, Cache &cache) { - assert(globalCodebook != 0); - NGT::Object &gcentroid = (NGT::Object &)*globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(float); - float residualVector[sizeOfObject]; - { - float *resptr = residualVector; - float *gcptr = (float*)&gcentroid[0]; - float *optr = (float*)&((NGT::Object&)object)[0]; - float *optrend = optr + sizeOfObject; - while (optr != optrend) { - *resptr++ = *optr++ - *gcptr++; - } - } - double *dlu = cache.localDistanceLookup; - size_t oft = 0; - for (size_t li = 0; li < localCodebookNo; li++, oft += localDataSize) { - dlu++; - for (size_t k = 1; k < localCodebookCentroidNo; k++) { - NGT::Object &lcentroid = dynamic_cast(*localCodebook[li].getObjectSpace().getRepository().get(k)); - float *lcptr = (float*)&lcentroid[0]; - float *lcendptr = lcptr + localDataSize; - float *resptr = residualVector + oft; - double d = 0.0; - while (lcptr != lcendptr) { - double sub = *resptr++ - *lcptr++; - d += sub * sub; - } - *dlu++ = d; - } - } - } -#endif - - void set(NGT::Index *gcb, NGT::Index lcb[], size_t dn, size_t lcn) { - globalCodebook = gcb; - localCodebook = lcb; - localDivisionNo = dn; - set(lcb, lcn); - } - - void set(NGT::Index lcb[], size_t lcn) { - localCodebookNo = lcn; - localCodebookCentroidNo = lcb[0].getObjectRepositorySize(); - } - - void initialize(Cache &c) { - c.initialize(localCodebookNo * localCodebookCentroidNo); - } - - NGT::Index *globalCodebook; - NGT::Index *localCodebook; - size_t localDivisionNo; - size_t localCodebookNo; - size_t localCodebookCentroidNo; -}; - -template -class QuantizedObjectDistanceUint8 : public QuantizedObjectDistance { -public: - -#ifdef NGTQ_DISTANCE_ANGLE - inline double operator()(void *l, Cache &cache) { - T *localID = static_cast(l); - double normA = 0.0F; - double normB = 0.0F; - double sum = 0.0F; - for (size_t li = 0; li < localDivisionNo; li++) { - LocalDistanceLookup &ldl = *(cache.localDistanceLookup + li * localCodebookCentroidNo + localID[li]); - normA += ldl.a; - normB += ldl.b; - sum += ldl.sum; - } - double cosine = sum / (sqrt(normA) * sqrt(normB)); - if (cosine >= 1.0F) { - // nothing to do - return 0.0F; - } else if (cosine <= -1.0F) { - return acos(-1.0F); - } - return acos(cosine); - } - inline double operator()(NGT::Object &object, size_t objectID, void *l) { - return getAngleDistanceUint8(object, objectID, static_cast(l)); - } - inline double cache(NGT::Object &object, size_t objectID, void *l, Cache &cache) { - cerr << "cache is not implemented" << endl; - abort(); - return 0.0; - } -#else - inline double operator()(void *l, Cache &cache) { - T *localID = static_cast(l); - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - distance += cache.getDistance(li * localCodebookCentroidNo + localID[li]); - } - return sqrt(distance); - } - inline double operator()(NGT::Object &object, size_t objectID, void *l) { - return getL2DistanceUint8(object, objectID, static_cast(l)); - } - inline double cache(NGT::Object &object, size_t objectID, void *l, Cache &cache) { - T *localID = static_cast(l); - NGT::PersistentObject &gcentroid = *globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(uint8_t); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - unsigned char *gcptr = &gcentroid.at(0, globalCodebook->getObjectSpace().getRepository().allocator); -#else - unsigned char *gcptr = &gcentroid[0]; -#endif - unsigned char *optr = &((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - if (cache.isValid(li * localCodebookCentroidNo + localID[li])) { - distance += cache.getDistance(li * localCodebookCentroidNo + localID[li]); - optr += localDataSize; - gcptr += localDataSize; - } else { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebook[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebook[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif - double d = 0.0; - float *lcendptr = lcptr + localDataSize; - while (lcptr != lcendptr) { - double sub = ((int)*optr++ - (int)*gcptr++) - *lcptr++; - d += sub * sub; - } - distance += d; - cache.set(li * localCodebookCentroidNo + localID[li], d); - } - } - return sqrt(distance); - } -#endif - -}; - -template -class QuantizedObjectDistanceFloat : public QuantizedObjectDistance { -public: - -#ifdef NGTQ_DISTANCE_ANGLE - inline double operator()(void *l, Cache &cache) { - T *localID = static_cast(l); - double normA = 0.0F; - double normB = 0.0F; - double sum = 0.0F; - for (size_t li = 0; li < localDivisionNo; li++) { - LocalDistanceLookup &ldl = *(cache.localDistanceLookup + li * localCodebookCentroidNo + localID[li]); - normA += ldl.a; - normB += ldl.b; - sum += ldl.sum; - } - double cosine = sum / (sqrt(normA) * sqrt(normB)); - if (cosine >= 1.0F) { - // nothing to do - return 0.0F; - } else if (cosine <= -1.0F) { - return acos(-1.0F); - } - return acos(cosine); - } - inline double operator()(NGT::Object &object, size_t objectID, void *l) { - return getAngleDistanceFloat(object, objectID, static_cast(l)); - } - inline double cache(NGT::Object &object, size_t objectID, void *l, Cache &cache) { - cerr << "cache is not implemented." << endl; - abort(); - return 0.0; - } -#else - inline double operator()(void *l, Cache &cache) { - T *localID = static_cast(l); - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - distance += cache.getDistance(li * localCodebookCentroidNo + localID[li]); - } - return sqrt(distance); - } - inline double operator()(NGT::Object &object, size_t objectID, void *l) { - return getL2DistanceFloat(object, objectID, static_cast(l)); - } - inline double cache(NGT::Object &object, size_t objectID, void *l, Cache &cache) { - T *localID = static_cast(l); - NGT::PersistentObject &gcentroid = *globalCodebook->getObjectSpace().getRepository().get(objectID); - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localDataSize = sizeOfObject / localDivisionNo / sizeof(float); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *gcptr = (float*)&gcentroid.at(0, globalCodebook->getObjectSpace().getRepository().allocator); -#else - float *gcptr = (float*)&gcentroid[0]; -#endif - float *optr = (float*)&((NGT::Object&)object)[0]; - double distance = 0.0; - for (size_t li = 0; li < localDivisionNo; li++) { - if (cache.isValid(li * localCodebookCentroidNo + localID[li])) { - distance += cache.getDistance(li * localCodebookCentroidNo + localID[li]); - optr += localDataSize; - gcptr += localDataSize; - } else { - size_t idx = localCodebookNo == 1 ? 0 : li; - NGT::PersistentObject &lcentroid = *localCodebook[idx].getObjectSpace().getRepository().get(localID[li]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *lcptr = (float*)&lcentroid.at(0, localCodebook[idx].getObjectSpace().getRepository().allocator); -#else - float *lcptr = (float*)&lcentroid[0]; -#endif - float *lcendptr = lcptr + localDataSize; - double d = 0.0; - while (lcptr != lcendptr) { - double sub = (*optr++ - *gcptr++) - *lcptr++; - d += sub * sub; - } - distance += d; - cache.set(li * localCodebookCentroidNo + localID[li], d); - } - } - return sqrt(distance); - } -#endif - -}; - -class GenerateResidualObject { -public: - virtual ~GenerateResidualObject() {} - virtual void operator()(size_t objectID, size_t centroidID, - vector > > &localObjs) = 0; - - void set(NGT::Index &gc, NGT::Index lc[], size_t dn, size_t lcn, - Quantizer::ObjectList *ol) { - globalCodebook = &(NGT::GraphAndTreeIndex&)gc.getIndex();; - divisionNo = dn; - objectList = ol; - set(lc, lcn); - } - void set(NGT::Index lc[], size_t lcn) { - localCodebook.clear(); - localCodebookNo = lcn; - for (size_t i = 0; i < localCodebookNo; ++i) { - localCodebook.push_back(&(NGT::GraphAndTreeIndex&)lc[i].getIndex()); - } - } - - NGT::GraphAndTreeIndex *globalCodebook; - vector localCodebook; - size_t divisionNo; - size_t localCodebookNo; - Quantizer::ObjectList *objectList; -}; - -class GenerateResidualObjectUint8 : public GenerateResidualObject { -public: - void operator()(size_t objectID, size_t centroidID, - vector > > &localObjs) { - NGT::PersistentObject &globalCentroid = *globalCodebook->getObjectSpace().getRepository().get(centroidID); - NGT::Object object(&globalCodebook->getObjectSpace()); - objectList->get(objectID, object, &globalCodebook->getObjectSpace()); - // compute residual objects - size_t sizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t lsize = sizeOfObject / divisionNo; - for (size_t di = 0; di < divisionNo; di++) { - vector subObject; - subObject.resize(lsize); - for (size_t d = 0; d < lsize; d++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - subObject[d] = (double)object[di * lsize + d] - - (double)globalCentroid.at(di * lsize + d, globalCodebook->getObjectSpace().getRepository().allocator); -#else - subObject[d] = (double)object[di * lsize + d] - (double)globalCentroid[di * lsize + d]; -#endif - } - size_t idx = localCodebookNo == 1 ? 0 : di; - NGT::Object *localObj = localCodebook[idx]->allocateObject(subObject); - localObjs[idx].push_back(pair(localObj, 0)); - } - } -}; - -class GenerateResidualObjectFloat : public GenerateResidualObject { -public: - void operator()(size_t objectID, size_t centroidID, - vector > > &localObjs) { - NGT::PersistentObject &globalCentroid = *globalCodebook->getObjectSpace().getRepository().get(centroidID); - NGT::Object object(&globalCodebook->getObjectSpace()); - objectList->get(objectID, object, &globalCodebook->getObjectSpace()); - // compute residual objects - size_t byteSizeOfObject = globalCodebook->getObjectSpace().getByteSizeOfObject(); - size_t localByteSize = byteSizeOfObject / divisionNo; - size_t localDimension = localByteSize / sizeof(float); - for (size_t di = 0; di < divisionNo; di++) { - vector subObject; - subObject.resize(localDimension); - float *subVector = static_cast(object.getPointer(di * localByteSize)); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - float *globalCentroidSubVector = static_cast(globalCentroid.getPointer(di * localByteSize, - globalCodebook->getObjectSpace().getRepository().allocator)); -#else - float *globalCentroidSubVector = static_cast(globalCentroid.getPointer(di * localByteSize)); -#endif - for (size_t d = 0; d < localDimension; d++) { - subObject[d] = (double)subVector[d] - (double)globalCentroidSubVector[d]; - } - size_t idx = localCodebookNo == 1 ? 0 : di; - NGT::Object *localObj = localCodebook[idx]->allocateObject(subObject); - localObjs[idx].push_back(pair(localObj, 0)); - } - } -}; - -template -class QuantizerInstance : public Quantizer { -public: - - typedef void (QuantizerInstance::*AggregateObjectsFunction)(NGT::ObjectDistance &, NGT::Object *, size_t size, NGT::ObjectSpace::ResultSet &, size_t); - typedef InvertedIndexEntry IIEntry; - - QuantizerInstance(DataType dataType, size_t dimension):Quantizer(dataType, dimension) { - property.localDivisionNo = DIVISION_NO; - if (property.localDivisionNo < 1 || property.localDivisionNo > 64) { - stringstream msg; - msg << "Quantizer::Error. Invalid divion no. " << DIVISION_NO; - NGTThrowException(msg); - } - quantizedObjectDistance = 0; - generateResidualObject = 0; - } - - virtual ~QuantizerInstance() { close(); } - - void createEmptyIndex(const string &index, - NGT::Property &globalProperty, - NGT::Property &localProperty) - { - rootDirectory = index; - NGT::Index::mkdir(rootDirectory); - string global = rootDirectory + "/global"; - NGT::Index::mkdir(global); - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - NGT::GraphAndTreeIndex globalCodebook(global, globalProperty); - globalCodebook.saveIndex(global); - globalCodebook.close(); -#else - NGT::GraphAndTreeIndex globalCodebook(globalProperty); - globalCodebook.saveIndex(global); - globalCodebook.close(); -#endif - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - size_t localCodebookNo = property.getLocalCodebookNo(); - for (size_t i = 0; i < localCodebookNo; ++i) { - stringstream local; - local << rootDirectory << "/local-" << i; - NGT::Index::mkdir(local.str()); - NGT::GraphAndTreeIndex localCodebook(local.str(), localProperty); - localCodebook.saveIndex(local.str()); - } -#else - NGT::GraphAndTreeIndex localCodebook(localProperty); - size_t localCodebookNo = property.getLocalCodebookNo(); - for (size_t i = 0; i < localCodebookNo; ++i) { - stringstream local; - local << rootDirectory << "/local-" << i; - NGT::Index::mkdir(local.str()); - localCodebook.saveIndex(local.str()); - } - localCodebook.close(); -#endif -#ifdef NGTQ_SHARED_INVERTED_INDEX - invertedIndex.open(index + "/ivt", property.invertedIndexSharedMemorySize); -#else - ofstream of(rootDirectory + "/ivt"); - invertedIndex.serialize(of); -#endif - string fname = rootDirectory + "/obj"; - if (property.dataSize == 0) { - NGTThrowException("Quantizer: data size of the object list is 0."); - } - objectList.create(fname, property.dataSize); - objectList.open(fname); - objectList.close(); - - property.save(rootDirectory); - } - - void open(const string &index, NGT::Property &globalProperty) { - open(index); - globalCodebook.setProperty(globalProperty); - } - - void open(const string &index) { - rootDirectory = index; - property.load(rootDirectory); - string globalIndex = index + "/global"; - globalCodebook.open(globalIndex); - size_t localCodebookNo = property.getLocalCodebookNo(); - - for (size_t i = 0; i < localCodebookNo; ++i) { - stringstream localIndex; - localIndex << index << "/local-" << i; - localCodebook[i].open(localIndex.str()); - } -#ifdef NGTQ_SHARED_INVERTED_INDEX - invertedIndex.open(index + "/ivt", 0); -#else - ifstream ifs(index + "/ivt"); - if (!ifs) { - cerr << "Cannot open " << index + "/ivt" << "." << endl; - return; - } - invertedIndex.deserialize(ifs); -#endif - objectList.open(index + "/obj"); - - NGT::Property globalProperty; - globalCodebook.getProperty(globalProperty); - if (globalProperty.objectType == NGT::Property::ObjectType::Float) { - if (property.localIDByteSize == 4) { - quantizedObjectDistance = new QuantizedObjectDistanceFloat; - } else if (property.localIDByteSize == 2) { - quantizedObjectDistance = new QuantizedObjectDistanceFloat; - } else { - abort(); - } - generateResidualObject = new GenerateResidualObjectFloat; - } else if (globalProperty.objectType == NGT::Property::ObjectType::Uint8) { - if (property.localIDByteSize == 4) { - quantizedObjectDistance = new QuantizedObjectDistanceUint8; - } else if (property.localIDByteSize == 2) { - quantizedObjectDistance = new QuantizedObjectDistanceUint8; - } else { - abort(); - } - generateResidualObject = new GenerateResidualObjectUint8; - } else { - cerr << "NGTQ::open: Fatal Inner Error: invalid object type. " << globalProperty.objectType << endl; - cerr << " check NGT version consistency between the caller and the library." << endl; - assert(0); - } - assert(quantizedObjectDistance != 0); - - quantizedObjectDistance->set(&globalCodebook, localCodebook, DIVISION_NO, property.getLocalCodebookNo()); - generateResidualObject->set(globalCodebook, localCodebook, DIVISION_NO, property.getLocalCodebookNo(), &objectList); - } - - void save() { -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - string global = rootDirectory + "/global"; - globalCodebook.saveIndex(global); - size_t localCodebookNo = property.getLocalCodebookNo(); - for (size_t i = 0; i < localCodebookNo; ++i) { - stringstream local; - local << rootDirectory << "/local-" << i; - try { - NGT::Index::mkdir(local.str()); - } catch (...) {} - localCodebook[i].saveIndex(local.str()); - } -#endif // NGT_SHARED_MEMORY_ALLOCATOR -#ifndef NGTQ_SHARED_INVERTED_INDEX - ofstream of(rootDirectory + "/ivt"); - invertedIndex.serialize(of); -#endif - property.save(rootDirectory); - } - - void close() { - objectList.close(); - globalCodebook.close(); - size_t localCodebookNo = property.getLocalCodebookNo(); - for (size_t i = 0; i < localCodebookNo; ++i) { - localCodebook[i].close(); - } - if (quantizedObjectDistance != 0) { - delete quantizedObjectDistance; - quantizedObjectDistance = 0; - } - if (generateResidualObject != 0) { - delete generateResidualObject; - generateResidualObject = 0; - } -#ifndef NGTQ_SHARED_INVERTED_INDEX - invertedIndex.deleteAll(); -#endif - } - -#ifdef NGTQ_SHARED_INVERTED_INDEX - void reconstructInvertedIndex(const string &invertedFile) { - // reduce memory usage of shared memory - size_t size = invertedIndex.size(); -#ifdef NGTQ_RECONSTRUCTION_DISABLE - cerr << "Reconstruction is disabled!!!!!" << endl; - return; -#endif - cerr << "reconstructing to reduce shared memory..." << endl; - NGT::PersistentRepository tmpInvertedIndex; - tmpInvertedIndex.open(invertedFile, 0); - tmpInvertedIndex.reserve(size); - for (size_t id = 0; id < size; ++id) { - if (invertedIndex.isEmpty(id)) { - continue; - } - if (id % 100000 == 0) { - cerr << "Processed " << id << endl; - } - IIEntry *entry = new(tmpInvertedIndex.getAllocator()) InvertedIndexEntry(tmpInvertedIndex.getAllocator()); - size_t esize = (*invertedIndex.at(id)).size(); - (*entry).reserve(esize, tmpInvertedIndex.getAllocator()); - for (size_t i = 0; i < esize; ++i) { - (*entry).pushBack(tmpInvertedIndex.getAllocator()); - (*entry).at(i, tmpInvertedIndex.getAllocator()) = - (*invertedIndex.at(id)).at(i, invertedIndex.getAllocator()); - } - tmpInvertedIndex.put(id, entry); - } - cerr << "verifying..." << endl; - for (size_t id = 0; id < size; ++id) { - if (invertedIndex.isEmpty(id)) { - continue; - } - if (id % 100000 == 0) { - cerr << "Processed " << id << endl; - } - IIEntry &sentry = *invertedIndex.at(id); - IIEntry &dentry = *tmpInvertedIndex.at(id); - size_t esize = sentry.size(); - if (esize != dentry.size()) { - cerr << id << " : size is inconsistency" << endl; - } - for (size_t i = 0; i < esize; ++i) { - InvertedIndexObject &sobject = (*invertedIndex.at(id)).at(i, invertedIndex.getAllocator()); - InvertedIndexObject &dobject = (*tmpInvertedIndex.at(id)).at(i, tmpInvertedIndex.getAllocator()); - if (sobject.id != dobject.id) { - cerr << id << "," << i << " : id is inconsistency" << endl; - } - for (size_t d = 0; d < DIVISION_NO; ++d) { - if (sobject.localID[d] != dobject.localID[d]) { - cerr << id << "," << i << "," << d << " : local id is inconsistency" << endl; - } - } - } - } - - tmpInvertedIndex.close(); - } -#endif - - void createIndex(NGT::GraphAndTreeIndex &codebook, - size_t centroidLimit, - const vector > &objects, - vector &ids, - double &range) - { - if (centroidLimit > 0) { - if (getNumberOfObjects(codebook) >= centroidLimit) { - range = -1.0; - codebook.createIndex(objects, ids, range, property.threadSize); - } else if (getNumberOfObjects(codebook) + objects.size() > centroidLimit) { - auto start = objects.begin(); - do { - size_t s = centroidLimit - getNumberOfObjects(codebook); - auto end = start; - if (std::distance(objects.begin(), start) + s >= objects.size()) { - end = objects.end(); - } else { - end += s; - } - vector idstmp; - vector > objtmp; - std::copy(start, end, std::back_inserter(objtmp)); - codebook.createIndex(objtmp, idstmp, range, property.threadSize); - assert(idstmp.size() == objtmp.size()); - std::copy(idstmp.begin(), idstmp.end(), std::back_inserter(ids)); - start = end; - } while (start != objects.end() && centroidLimit - getNumberOfObjects(codebook) > 0); - range = -1.0; - vector idstmp; - vector > objtmp; - std::copy(start, objects.end(), std::back_inserter(objtmp)); - codebook.createIndex(objtmp, idstmp, range, property.threadSize); - std::copy(idstmp.begin(), idstmp.end(), std::back_inserter(ids)); - assert(ids.size() == objects.size()); - } else { - codebook.createIndex(objects, ids, range, property.threadSize); - } - } else { - codebook.createIndex(objects, ids, range, property.threadSize); - } - } - - void setGlobalCodeToInvertedEntry(NGT::Index::InsertionResult &id, pair &object, vector &localData) { - size_t globalCentroidID = id.id; - if (invertedIndex.isEmpty(globalCentroidID)) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - invertedIndex.put(globalCentroidID, new(invertedIndex.allocator) InvertedIndexEntry(invertedIndex.allocator)); -#else - invertedIndex.put(globalCentroidID, new InvertedIndexEntry); -#endif - } - assert(!invertedIndex.isEmpty(globalCentroidID)); - IIEntry &invertedIndexEntry = *invertedIndex.at(globalCentroidID); - if (id.identical) { - if (property.centroidCreationMode == CentroidCreationModeDynamic) { - assert(invertedIndexEntry.size() != 0); - } - // objects[].second=Record No.=Object No. - // object No. is just set to the index entry. not local centroid ids. - // local centroid id will be set later. -#ifdef NGTQ_SHARED_INVERTED_INDEX - invertedIndexEntry.pushBack(object.second, invertedIndex.allocator); -#else - invertedIndexEntry.pushBack(object.second); -#endif - if (id.distance != 0.0) { - localData.push_back(LocalDatam(globalCentroidID, - invertedIndexEntry.size() - 1)); - } - } else { - // There is no identical and similar object in the DB - // This object should be a centroid. - if (property.centroidCreationMode != CentroidCreationModeDynamic) { - cerr << "Quantizer: Error! Although it is an original quantizer, object has been added to the global." << endl; - cerr << " Specify the size limitation of the global." << endl; - assert(id.identical); - } - if (invertedIndexEntry.size() == 0) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - invertedIndexEntry.pushBack(object.second, invertedIndex.allocator); -#else - invertedIndexEntry.pushBack(object.second); -#endif - } else { -#ifdef NGTQ_SHARED_INVERTED_INDEX - invertedIndexEntry.at(0, invertedIndex.allocator).set(object.second); -#else - invertedIndexEntry[0].set(object.second); -#endif - } - } - } - - void setSingleLocalCodeToInvertedIndexEntry(vector &lcodebook, vector &localData, vector > > &localObjs) { - double lr = property.localRange; - size_t localCentroidLimit = property.localCentroidLimit; - if (property.localCodebookState) { - lr = -1.0; - localCentroidLimit = 0; - } - vector lids; - createIndex(*lcodebook[0], localCentroidLimit, localObjs[0], lids, lr); - for (size_t i = 0; i < localData.size(); i++) { - for (size_t di = 0; di < DIVISION_NO; di++) { - size_t id = lids[i * DIVISION_NO + di].id; - assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); -#ifdef NGTQ_SHARED_INVERTED_INDEX - (*invertedIndex.at(localData[i].iiIdx)).at(localData[i].iiLocalIdx, invertedIndex.allocator).localID[di] = id; -#else - (*invertedIndex.at(localData[i].iiIdx))[localData[i].iiLocalIdx].localID[di] = id; -#endif - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - localCodebook[0].deleteObject(localObjs[0][i].first); -#else - if (lids[i].identical) { - localCodebook[0].deleteObject(localObjs[0][i].first); - } -#endif - } - } - - bool setMultipleLocalCodeToInvertedIndexEntry(vector &lcodebook, vector &localData, vector > > &localObjs) { - size_t localCodebookNo = property.getLocalCodebookNo(); - bool localCodebookFull = true; - for (size_t li = 0; li < localCodebookNo; ++li) { - double lr = property.localRange; - size_t localCentroidLimit = property.localCentroidLimit; - if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) { - localCentroidLimit *= property.localClusteringSampleCoefficient; - } - if (property.localCodebookState) { - lr = -1.0; - localCentroidLimit = 0; - } - vector lids; - createIndex(*lcodebook[li], localCentroidLimit, localObjs[li], lids, lr); - if (lr >= 0.0) { - localCodebookFull = false; - } - assert(localData.size() == lids.size()); - for (size_t i = 0; i < localData.size(); i++) { - size_t id = lids[i].id; - assert(!property.localCodebookState || id <= ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)); -#ifdef NGTQ_SHARED_INVERTED_INDEX - (*invertedIndex.at(localData[i].iiIdx)).at(localData[i].iiLocalIdx, invertedIndex.allocator).localID[li] = id; -#else - (*invertedIndex.at(localData[i].iiIdx))[localData[i].iiLocalIdx].localID[li] = id; -#endif -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - localCodebook[li].deleteObject(localObjs[li][i].first); -#else - if (lids[i].identical) { - localCodebook[li].deleteObject(localObjs[li][i].first); - } -#endif - } - } - return localCodebookFull; - } - - void buildMultipleLocalCodebooks(NGT::Index *localCodebook, size_t localCodebookNo, size_t numberOfCentroids) { - NGT::Clustering clustering; - clustering.epsilonFrom = 0.10; - clustering.epsilonTo = 0.50; - clustering.epsilonStep = 0.05; - clustering.maximumIteration = 10; - for (size_t li = 0; li < localCodebookNo; ++li) { - cerr << "Beginning of clustering " << localCodebook[li].getPath() << endl; - double diff = clustering.kmeansWithNGT(localCodebook[li], numberOfCentroids); - if (diff > 0.0) { - cerr << "Not converge" << endl; - } - cerr << "End of clustering " << localCodebook[li].getPath() << endl; - } - } - - void replaceInvertedIndexEntry(size_t localCodebookNo) { - vector localData; - for (size_t gidx = 1; gidx < invertedIndex.size(); gidx++) { - IIEntry &invertedIndexEntry = *invertedIndex.at(gidx); - for (size_t oi = 1; oi < invertedIndexEntry.size(); oi++) { - localData.push_back(LocalDatam(gidx, oi)); - } - } - vector > > localObjs; - localObjs.resize(localCodebookNo); - for (size_t i = 0; i < localData.size(); i++) { - IIEntry &invertedIndexEntry = *invertedIndex.at(localData[i].iiIdx); -#ifdef NGTQ_SHARED_INVERTED_INDEX - (*generateResidualObject)(invertedIndexEntry.at(localData[i].iiLocalIdx, invertedIndex.allocator).id, - localData[i].iiIdx, // centroid:ID of global codebook - localObjs); -#else - (*generateResidualObject)(invertedIndexEntry[localData[i].iiLocalIdx].id, - localData[i].iiIdx, // centroid:ID of global codebook - localObjs); -#endif - } - vector lcodebook; - for (size_t i = 0; i < localCodebookNo; i++) { - lcodebook.push_back(&(NGT::GraphAndTreeIndex &)localCodebook[i].getIndex()); - } - setMultipleLocalCodeToInvertedIndexEntry(lcodebook, localData, localObjs); - } - - void insert(vector > &objects) { - NGT::GraphAndTreeIndex &gcodebook = (NGT::GraphAndTreeIndex &)globalCodebook.getIndex(); - vector lcodebook; - size_t localCodebookNo = property.getLocalCodebookNo(); - for (size_t i = 0; i < localCodebookNo; i++) { - lcodebook.push_back(&(NGT::GraphAndTreeIndex &)localCodebook[i].getIndex()); - } - double gr = property.globalRange; - vector ids; - createIndex(gcodebook, property.globalCentroidLimit, objects, ids, gr); -#ifdef NGTQ_SHARED_INVERTED_INDEX - if (invertedIndex.getAllocatedSize() <= invertedIndex.size() + objects.size()) { - invertedIndex.reserve(invertedIndex.getAllocatedSize() * 2); - } -#else - invertedIndex.reserve(invertedIndex.size() + objects.size()); -#endif - vector localData; - for (size_t i = 0; i < ids.size(); i++) { - setGlobalCodeToInvertedEntry(ids[i], objects[i], localData); - } - vector > > localObjs; - localObjs.resize(property.getLocalCodebookNo()); - for (size_t i = 0; i < localData.size(); i++) { - IIEntry &invertedIndexEntry = *invertedIndex.at(localData[i].iiIdx); -#ifdef NGTQ_SHARED_INVERTED_INDEX - (*generateResidualObject)(invertedIndexEntry.at(localData[i].iiLocalIdx, invertedIndex.allocator).id, - localData[i].iiIdx, // centroid:ID of global codebook - localObjs); -#else - (*generateResidualObject)(invertedIndexEntry[localData[i].iiLocalIdx].id, - localData[i].iiIdx, // centroid:ID of global codebook - localObjs); -#endif - } - if (property.singleLocalCodebook) { - // single local codebook - setSingleLocalCodeToInvertedIndexEntry(lcodebook, localData, localObjs); - } else { - // multiple local codebooks - bool localCodebookFull = setMultipleLocalCodeToInvertedIndexEntry(lcodebook, localData, localObjs); - if ((!property.localCodebookState) && localCodebookFull) { - if (property.localCentroidCreationMode == CentroidCreationModeDynamicKmeans) { - buildMultipleLocalCodebooks(localCodebook, localCodebookNo, property.localCentroidLimit); - (*generateResidualObject).set(localCodebook, localCodebookNo); - property.localCodebookState = true; - localCodebookFull = false; - replaceInvertedIndexEntry(localCodebookNo); - } else { - property.localCodebookState = true; - localCodebookFull = false; - } - } - } - for (size_t i = 0; i < objects.size(); i++) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - globalCodebook.deleteObject(objects[i].first); -#else - if (ids[i].identical == true) { - globalCodebook.deleteObject(objects[i].first); - } -#endif - } - objects.clear(); - } - - void insert(const string &line, vector > &objects, size_t count) { - size_t id = count; - if (count == 0) { - id = objectList.size(); - id = id == 0 ? 1 : id; - } - NGT::Object *object = globalCodebook.allocateObject(line, " \t"); - objectList.put(id, *object, &globalCodebook.getObjectSpace()); - objects.push_back(pair(object, id)); - if (objects.size() >= property.batchSize) { - // batch insert - insert(objects); - } - } - - void rebuildIndex() { - vector > objects; - size_t objectCount = objectList.size(); - size_t count = 0; - for (size_t idx = 1; idx < objectCount; idx++) { - count++; - if (count % 100000 == 0) { - cerr << "Processed " << count; - cerr << endl; - } - NGT::Object *object = globalCodebook.getObjectSpace().allocateObject(); - objectList.get(idx, *object, &globalCodebook.getObjectSpace()); - objects.push_back(pair(object, idx)); - if (objects.size() >= property.batchSize) { - insert(objects); - } - } - if (objects.size() >= 0) { - insert(objects); - } - } - - void create(const string &index, - NGT::Property &globalProperty, - NGT::Property &localProperty - ) { - - if (property.localCentroidLimit > ((1UL << (sizeof(LOCAL_ID_TYPE) * 8)) - 1)) { - stringstream msg; - msg << "Quantizer::Error. Local centroid limit is too large. " << property.localCentroidLimit << " It must be less than " << (1UL << (sizeof(LOCAL_ID_TYPE) * 8)); - NGTThrowException(msg); - } - - NGT::Property gp; - NGT::Property lp; - - gp.setDefault(); - lp.setDefault(); - - gp.batchSizeForCreation = 500; - gp.edgeSizeLimitForCreation = 0; - gp.edgeSizeForCreation = 10; - gp.graphType = NGT::Index::Property::GraphType::GraphTypeANNG; - gp.insertionRadiusCoefficient = 1.1; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - gp.graphSharedMemorySize = 512; // MB - gp.treeSharedMemorySize = 512; // MB - gp.objectSharedMemorySize = 512; // MB 512 is for up to 20M objects. -#endif - - lp.batchSizeForCreation = 500; - lp.edgeSizeLimitForCreation = 0; - lp.edgeSizeForCreation = 10; - lp.graphType = NGT::Index::Property::GraphType::GraphTypeANNG; - lp.insertionRadiusCoefficient = 1.1; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - lp.graphSharedMemorySize = 128; // MB - lp.treeSharedMemorySize = 128; // MB - lp.objectSharedMemorySize = 128; // MB 128 is for up to 5M objects? -#endif - - gp.set(globalProperty); - lp.set(localProperty); - - gp.edgeSizeForSearch = 40; - lp.edgeSizeForSearch = 40; - - lp.objectType = NGT::Index::Property::ObjectType::Float; - - gp.dimension = property.dimension; - if (gp.dimension == 0) { - stringstream msg; - msg << "NGTQ::Quantizer::create: specified dimension is zero!"; - NGTThrowException(msg); - } - if (property.localDivisionNo != 1 && property.dimension % property.localDivisionNo != 0) { - stringstream msg; - msg << "NGTQ::Quantizer::create: dimension and localDivisionNo are not proper. " - << property.dimension << ":" << property.localDivisionNo; - NGTThrowException(msg); - } - lp.dimension = property.dimension / property.localDivisionNo; - - switch (property.dataType) { - case DataTypeFloat: - gp.objectType = NGT::Index::Property::ObjectType::Float; - break; - case DataTypeUint8: - gp.objectType = NGT::Index::Property::ObjectType::Uint8; - break; - default: - { - stringstream msg; - msg << "NGTQ::Quantizer::create: Inner error! Invalid data type."; - NGTThrowException(msg); - } - } - - switch (property.distanceType) { - case DistanceTypeL1: - gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL1; - lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL1; - break; - case DistanceTypeL2: -#ifdef NGTQ_DISTANCE_ANGLE - { - stringstream msg; - msg << "NGTQ::Quantizer::create: L2 is unavailable!!! you have to rebuild."; - NGTThrowException(msg); - } -#endif - gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; - lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; - break; - case DistanceTypeHamming: - gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeHamming; - lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeHamming; - break; - case DistanceTypeAngle: -#ifndef NGTQ_DISTANCE_ANGLE - { - stringstream msg; - msg << "NGTQ::Quantizer::create: Angle is unavailable!!! you have to rebuild."; - NGTThrowException(msg); - } -#endif - gp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeAngle; - lp.distanceType = NGT::Index::Property::DistanceType::DistanceTypeAngle; - break; - default: - { - stringstream msg; - msg << "NGTQ::Quantizer::create Inner error! Invalid distance type."; - NGTThrowException(msg); - } - } - - createEmptyIndex(index, gp, lp); - } - - void validate() { - size_t gcbSize = globalCodebook.getObjectRepositorySize(); - cerr << "global codebook size=" << gcbSize << endl; - for (size_t gidx = 1; gidx < 4 && gidx < gcbSize; gidx++) { - if (invertedIndex[gidx] == 0) { - cerr << "something wrong" << endl; - exit(1); - } - cerr << gidx << " inverted index size=" << (*invertedIndex[gidx]).size() << endl; - if ((*invertedIndex[gidx]).size() == 0) { - cerr << "something wrong" << endl; - continue; - } - - NGT::PersistentObject &gcentroid = *globalCodebook.getObjectSpace().getRepository().get(gidx); - vector gco; - globalCodebook.getObjectSpace().getRepository().extractObject(&gcentroid, gco); - cerr << "global centroid object(" << gco.size() << ")="; - for (size_t i = 0; i < gco.size(); i++) { - cerr << gco[i] << " "; - } - cerr << endl; - - { -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[gidx]).at(0, invertedIndex.allocator); -#else - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[gidx])[0]; -#endif - if (invertedIndexEntry.id != gidx) { - cerr << "a global centroid id is wrong in the inverted index." << gidx << ":" << invertedIndexEntry.id << endl; - exit(1); - } - } - NGT::Object *gcentroidFromList = globalCodebook.getObjectSpace().getRepository().allocateObject(); - objectList.get(gidx, *gcentroidFromList, &globalCodebook.getObjectSpace()); - vector gcolist; - globalCodebook.getObjectSpace().getRepository().extractObject(gcentroidFromList, gcolist); - if (gco != gcolist) { - cerr << "Fatal error! centroid in NGT is different from object list in NGTQ" << endl; - exit(1); - } - vector elements; - for (size_t iidx = 0; iidx < (*invertedIndex[gidx]).size(); iidx++) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[gidx]).at(iidx, invertedIndex.allocator); -#else - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[gidx])[iidx]; -#endif - elements.push_back(invertedIndexEntry.id); - cerr << " object ID=" << invertedIndexEntry.id; - { - NGT::Object *o = globalCodebook.getObjectSpace().getRepository().allocateObject(); - objectList.get(invertedIndexEntry.id, *o, &globalCodebook.getObjectSpace()); - NGT::Distance distance = globalCodebook.getObjectSpace().getComparator()(*gcentroidFromList, *o); - cerr << ":distance=" << distance; - } - cerr << ":local codebook IDs="; - for (size_t li = 0; li < property.localDivisionNo; li++) { - cerr << invertedIndexEntry.localID[li] << " "; - } - cerr << endl; - for (size_t li = 0; li < property.localDivisionNo; li++) { - if (invertedIndexEntry.localID[li] == 0) { - if (property.centroidCreationMode != CentroidCreationModeStatic) { - if (iidx == 0) { - break; - } - } - cerr << "local ID is unexpected zero." << endl; - } - } - } - vector ngid; - { - size_t resultSize = 30; - size_t approximateSearchSize = 1000; - size_t codebookSearchSize = 50; - bool refine = true; - bool lookuptable = false; - double epsilon = 0.1; - NGT::ObjectDistances objects; - search(gcentroidFromList, objects, resultSize, approximateSearchSize, codebookSearchSize, - refine, lookuptable, epsilon); - for (size_t resulti = 0; resulti < objects.size(); resulti++) { - if (std::find(elements.begin(), elements.end(), objects[resulti].id) != elements.end()) { - cerr << " "; - } else { - cerr << "x "; - ngid.push_back(objects[resulti].id); - NGT::ObjectDistances result; - NGT::Object *o = globalCodebook.getObjectSpace().getRepository().allocateObject(); - objectList.get(ngid.back(), *o, &globalCodebook.getObjectSpace()); - NGT::GraphAndTreeIndex &graphIndex = (NGT::GraphAndTreeIndex &)globalCodebook.getIndex(); - graphIndex.searchForNNGInsertion(*o, result); - if (result[0].distance > objects[resulti].distance) { - cerr << " Strange! "; - cerr << result[0].distance << ":" << objects[resulti].distance << " "; - } - globalCodebook.getObjectSpace().getRepository().deleteObject(o); - } - cerr << " search object " << resulti << " ID=" << objects[resulti].id << " distance=" << objects[resulti].distance << endl; - } - } - globalCodebook.getObjectSpace().getRepository().deleteObject(gcentroidFromList); - } - } - - void searchGlobalCodebook(NGT::Object *query, size_t size, NGT::ObjectDistances &objects, - size_t &approximateSearchSize, - size_t codebookSearchSize, - double epsilon) { - - NGT::SearchContainer sc(*query); - sc.setResults(&objects); - sc.size = codebookSearchSize; - sc.radius = FLT_MAX; - sc.explorationCoefficient = epsilon + 1.0; - if (epsilon >= FLT_MAX) { - globalCodebook.linearSearch(sc); - } else { - globalCodebook.search(sc); - } - - } - - inline void aggregateObjectsWithExactDistance(NGT::ObjectDistance &globalCentroid, NGT::Object *query, size_t size, NGT::ObjectSpace::ResultSet &results, size_t approximateSearchSize) { - NGT::ObjectSpace &objectSpace = globalCodebook.getObjectSpace(); - for (size_t j = 0; j < invertedIndex[globalCentroid.id]->size() && results.size() < approximateSearchSize; j++) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id]).at(j, invertedIndex.allocator); -#else - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id])[j]; -#endif - double distance; - if (invertedIndexEntry.localID[0] == 0) { - distance = globalCentroid.distance; - } else { - NGT::Object o(&objectSpace); - objectList.get(invertedIndexEntry.id, (NGT::Object&)o, &objectSpace); - distance = objectSpace.getComparator()(*query, (NGT::Object&)o); - } - - NGT::ObjectDistance obj; - obj.id = invertedIndexEntry.id; - obj.distance = distance; - assert(obj.id > 0); - results.push(obj); - - } - } - - inline void aggregateObjectsWithLookupTable(NGT::ObjectDistance &globalCentroid, NGT::Object *query, size_t size, NGT::ObjectSpace::ResultSet &results, size_t approximateSearchSize) { - QuantizedObjectDistance::Cache cache; - (*quantizedObjectDistance).createDistanceLookup(*query, globalCentroid.id, cache); - - for (size_t j = 0; j < invertedIndex[globalCentroid.id]->size() && results.size() < approximateSearchSize; j++) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id]).at(j, invertedIndex.allocator); -#else - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id])[j]; -#endif - double distance; - if (invertedIndexEntry.localID[0] == 0) { - distance = globalCentroid.distance; - } else { - distance = (*quantizedObjectDistance)(invertedIndexEntry.localID, cache); - } - - - NGT::ObjectDistance obj; - obj.id = invertedIndexEntry.id; - obj.distance = distance; - assert(obj.id > 0); - results.push(obj); - - } - } - - - inline void aggregateObjectsWithCache(NGT::ObjectDistance &globalCentroid, NGT::Object *query, size_t size, NGT::ObjectSpace::ResultSet &results, size_t approximateSearchSize) { - - QuantizedObjectDistance::Cache cache; - (*quantizedObjectDistance).initialize(cache); - - for (size_t j = 0; j < invertedIndex[globalCentroid.id]->size() && results.size() < approximateSearchSize; j++) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id]).at(j, invertedIndex.allocator); -#else - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id])[j]; -#endif - double distance; - if (invertedIndexEntry.localID[0] == 0) { - distance = globalCentroid.distance; - } else { - distance = (*quantizedObjectDistance).cache(*query, globalCentroid.id, invertedIndexEntry.localID, cache); - } - - - NGT::ObjectDistance obj; - obj.id = invertedIndexEntry.id; - obj.distance = distance; - assert(obj.id > 0); - results.push(obj); - - } - } - - - inline void aggregateObjects(NGT::ObjectDistance &globalCentroid, NGT::Object *query, size_t size, NGT::ObjectSpace::ResultSet &results, size_t approximateSearchSize) { - for (size_t j = 0; j < invertedIndex[globalCentroid.id]->size() && results.size() < approximateSearchSize; j++) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id]).at(j, invertedIndex.allocator); -#else - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[globalCentroid.id])[j]; -#endif - double distance; - if (invertedIndexEntry.localID[0] == 0) { - distance = globalCentroid.distance; - } else { - distance = (*quantizedObjectDistance)(*query, globalCentroid.id, invertedIndexEntry.localID); - } - - - NGT::ObjectDistance obj; - obj.id = invertedIndexEntry.id; - obj.distance = distance; - assert(obj.id > 0); - results.push(obj); - if (results.size() >= approximateSearchSize) { - return; - } - - } - } - - - inline void aggregateObjects(NGT::Object *query, size_t size, NGT::ObjectDistances &objects, NGT::ObjectSpace::ResultSet &results, size_t approximateSearchSize, AggregateObjectsFunction aggregateObjectsFunction) { - for (size_t i = 0; i < objects.size(); i++) { - if (invertedIndex[objects[i].id] == 0) { - if (property.centroidCreationMode == CentroidCreationModeDynamic) { - cerr << "Inverted index is empty. " << objects[i].id << endl; - } - continue; - } - ((*this).*aggregateObjectsFunction)(objects[i], query, size, results, approximateSearchSize); - if (results.size() >= approximateSearchSize) { - return; - } - } - } - - void refineDistance(NGT::Object *query, NGT::ObjectDistances &results) { - NGT::ObjectSpace &objectSpace = globalCodebook.getObjectSpace(); - for (auto i = results.begin(); i != results.end(); ++i) { - NGT::ObjectDistance &result = *i; - NGT::Object o(&objectSpace); - objectList.get(result.id, (NGT::Object&)o, &objectSpace); - double distance = objectSpace.getComparator()(*query, (NGT::Object&)o); - result.distance = distance; - } - std::sort(results.begin(), results.end()); - } - - void search(NGT::Object *query, NGT::ObjectDistances &objs, - size_t size, - float expansion, - AggregationMode aggregationMode, - double epsilon = FLT_MAX) { - size_t approximateSearchSize = size * expansion; - size_t codebookSearchSize = approximateSearchSize / (objectList.size() / globalCodebook.getObjectRepositorySize()) + 1; - search(query, objs, size, approximateSearchSize, codebookSearchSize, aggregationMode, epsilon); - } - - void search(NGT::Object *query, NGT::ObjectDistances &objs, - size_t size, size_t approximateSearchSize, - size_t codebookSearchSize, bool resultRefinement, - bool lookUpTable = false, - double epsilon = FLT_MAX) { - AggregationMode aggregationMode; - if (resultRefinement) { - aggregationMode = AggregationModeExactDistance; - } else { - if (lookUpTable) { - aggregationMode = AggregationModeApproximateDistanceWithLookupTable; - } else { - aggregationMode = AggregationModeApproximateDistanceWithCache; - } - } - search(query, objs, size, approximateSearchSize, codebookSearchSize, aggregationMode, epsilon); - } - - void search(NGT::Object *query, NGT::ObjectDistances &objs, - size_t size, size_t approximateSearchSize, - size_t codebookSearchSize, - AggregationMode aggregationMode, - double epsilon = FLT_MAX) { - if (aggregationMode == AggregationModeApproximateDistanceWithLookupTable) { - if (property.dataType != DataTypeFloat) { - NGTThrowException("NGTQ: Fatal inner error. the lookup table is only for dataType float!"); - } - } - NGT::ObjectDistances objects; - searchGlobalCodebook(query, size, objects, approximateSearchSize, codebookSearchSize, epsilon); - - objs.clear(); - NGT::ObjectSpace::ResultSet results; - distanceComputationCount = 0; - - AggregateObjectsFunction aggregateObjectsFunction = &QuantizerInstance::aggregateObjectsWithCache; - switch(aggregationMode) { - case AggregationModeExactDistance : - aggregateObjectsFunction = &QuantizerInstance::aggregateObjectsWithExactDistance; - break; - case AggregationModeApproximateDistanceWithLookupTable : - aggregateObjectsFunction = &QuantizerInstance::aggregateObjectsWithLookupTable; - break; - case AggregationModeExactDistanceThroughApproximateDistance : - case AggregationModeApproximateDistanceWithCache : - aggregateObjectsFunction = &QuantizerInstance::aggregateObjectsWithCache; - break; - case AggregationModeApproximateDistance : - aggregateObjectsFunction = &QuantizerInstance::aggregateObjects; - break; - default: - cerr << "NGTQ::Fatal Error. invalid aggregation mode. " << aggregationMode << endl; - abort(); - } - - aggregateObjects(query, size, objects, results, approximateSearchSize, aggregateObjectsFunction); - - objs.resize(results.size()); - while (!results.empty()) { - objs[results.size() - 1] = results.top(); - results.pop(); - } - if (objs.size() > size) { - objs.resize(size); - } - if (aggregationMode == AggregationModeExactDistanceThroughApproximateDistance) { - refineDistance(query, objs); - } - } - - void info(ostream &os) { - cerr << "info" << endl; - os << "Inverted index size=" << invertedIndex.size() << endl; - for (size_t i = 0; i < invertedIndex.size(); i++) { - if (invertedIndex[i] != 0) { - os << i << " " << invertedIndex[i]->size() << endl; - } - } - } - - NGT::Index &getLocalCodebook(size_t idx) { return localCodebook[idx]; } - - void verify() { - cerr << "sizeof(LOCAL_ID_TYPE)=" << sizeof(LOCAL_ID_TYPE) << endl; - size_t objcount = objectList.size(); - cerr << "Object count=" << objcount << endl; - size_t gcount = globalCodebook.getObjectRepositorySize(); - cerr << "Global codebook size=" << gcount << endl; - size_t lcount = localCodebook[0].getObjectRepositorySize(); - cerr << "Local codebook size=" << lcount << endl; - lcount *= 1.1; - cerr << "Inverted index size=" << invertedIndex.size() << endl; - - cerr << "Started verifying global codebook..." << endl; - vector status; - globalCodebook.verify(status); - - cerr << "Started verifing the inverted index." << endl; - size_t errorCount = 0; - for (size_t i = 1; i < invertedIndex.size(); i++) { - if (i % 1000000 == 0) { - cerr << " verified " << i << " entries" << endl; - } - if (errorCount > 100) { - cerr << "Too many errors. Stop..." << endl; - return; - } - if (invertedIndex[i] == 0) { - cerr << "Warning inverted index is zero. " << i << endl; - continue; - } - for (size_t j = 1; j < invertedIndex[i]->size(); j++) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[i]).at(j, invertedIndex.allocator); -#else - InvertedIndexObject &invertedIndexEntry = (*invertedIndex[i])[j]; -#endif - if (invertedIndexEntry.id >= objcount) { - cerr << "The object ID of the inverted index entry is too big! " << invertedIndexEntry.id << ":" << objcount << endl; - cerr << " No. of the wrong entry in the inverted index is " << i << endl; - errorCount++; - } - if (invertedIndexEntry.id == 0) { - cerr << "The object ID of the inverted index entry is zero! " << invertedIndexEntry.id << ":" << objcount << endl; - cerr << " No. of the wrong entry in the inverted index is " << i << endl; - errorCount++; - } - for (size_t li = 0; li < property.localDivisionNo; li++) { - if (lcount != 0 && invertedIndexEntry.localID[li] >= lcount) { - cerr << "The local centroid ID of the inverted index entry is wrong. " << invertedIndexEntry.localID[li] << ":" << lcount << endl; - cerr << " No. of the wrong entry in the inverted index is " << i << ". No. of local centroid db is " << li << endl; - errorCount++; - } - if (invertedIndexEntry.localID[li] == 0) { - } - } - } - } - } - - size_t getLocalCodebookSize(size_t size) { return localCodebook[size].getObjectRepositorySize(); } - - size_t getInstanceSharedMemorySize(ostream &os, SharedMemoryAllocator::GetMemorySizeType t = SharedMemoryAllocator::GetTotalMemorySize) { -#ifdef NGTQ_SHARED_INVERTED_INDEX - size_t size = invertedIndex.getAllocator().getMemorySize(t); -#else - size_t size = 0; -#endif - os << "inverted=" << size << endl; - os << "Local centroid:" << endl; - for (size_t di = 0; di < DIVISION_NO; di++) { - size += localCodebook[di].getSharedMemorySize(os, t); - } - return size; - } - - size_t getNumberOfObjects(NGT::GraphAndTreeIndex &index) { - return index.getObjectRepositorySize() == 0 ? 0 : static_cast(index.getObjectRepositorySize()) - 1; - } - -#ifdef NGTQ_SHARED_INVERTED_INDEX - NGT::PersistentRepository invertedIndex; -#else - NGT::Repository invertedIndex; -#endif - QuantizedObjectDistance *quantizedObjectDistance; - GenerateResidualObject *generateResidualObject; - NGT::Index localCodebook[DIVISION_NO]; - -}; - -class Quantization { -public: - static Quantizer *generate(DataType dataType, size_t dimension, size_t divisionNo, size_t localIDByteSize) { - Quantizer *quantizer; - if (localIDByteSize == 4) { - switch (divisionNo) { - case 1: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 2: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 4: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 8: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 10: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 15: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 16: - quantizer = new QuantizerInstance(dataType, dimension); - break; - default: - cerr << "Not support the specified number of divisions. " << divisionNo << ":" << localIDByteSize << endl; - abort(); - } - } else if (localIDByteSize == 2) { - switch (divisionNo) { - case 1: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 4: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 8: - quantizer = new QuantizerInstance(dataType, dimension); - break; - case 16: - quantizer = new QuantizerInstance(dataType, dimension); - break; - default: - cerr << "Not support the specified number of divisions. " << divisionNo << ":" << localIDByteSize << endl; - abort(); - } - } else { - cerr << "Not support the specified size of local ID. " << localIDByteSize << endl; - abort(); - } - - return quantizer; - } -}; - - class Index { - public: - Index():quantizer(0) {} - Index(const string& index):quantizer(0) { open(index); } - ~Index() { close(); } - - - - static void create(const string &index, Property &property, - NGT::Property &globalProperty, - NGT::Property &localProperty) { - if (property.dimension == 0) { - NGTThrowException("NGTQ::create: Error. The dimension is zero."); - } - property.setupLocalIDByteSize(); - NGTQ::Quantizer *quantizer = - NGTQ::Quantization::generate(property.dataType, property.dimension, property.getLocalCodebookNo(), property.localIDByteSize); - try { - quantizer->property.setup(property); - quantizer->create(index, globalProperty, localProperty); - } catch(NGT::Exception &err) { - delete quantizer; - throw err; - } - delete quantizer; - } -#ifdef NGTQ_SHARED_INVERTED_INDEX - static void compress(const string &indexFile) { - Index index; - index.open(indexFile); - string tmpivt = indexFile + "/ivt-tmp"; - index.getQuantizer().reconstructInvertedIndex(tmpivt); - index.close(); - string ivt = indexFile + "/ivt"; - unlink(ivt.c_str()); - rename(tmpivt.c_str(), ivt.c_str()); - string ivtc = ivt + "c"; - unlink(ivtc.c_str()); - string tmpivtc = tmpivt + "c"; - rename(tmpivtc.c_str(), ivtc.c_str()); - } -#endif - - static void append(const string &indexName, // index file - const string &data, // data file - size_t dataSize = 0 // data size - ) { - NGTQ::Index index(indexName); - istream *is; - if (data == "-") { - is = &cin; - } else { - ifstream *ifs = new ifstream; - ifs->ifstream::open(data); - if (!(*ifs)) { - cerr << "Cannot open the specified file. " << data << endl; - return; - } - is = ifs; - } - string line; - vector > objects; - size_t count = 0; - // extract objects from the file and insert them to the object list. - while(getline(*is, line)) { - count++; - index.insert(line, objects, 0); - if (count % 10000 == 0) { - cerr << "Processed " << count; - cerr << endl; - } - } - if (objects.size() > 0) { - index.insert(objects); - } - cerr << "end of insertion. " << count << endl; - if (data != "-") { - delete is; - } - - index.save(); - index.close(); - } - - static void rebuild(const string &indexName, - const string &rebuiltIndexName - ) { - - const string srcObjectList = indexName + "/obj"; - const string dstObjectList = rebuiltIndexName + "/obj"; - - if (std::rename(srcObjectList.c_str(), dstObjectList.c_str()) != 0) { - stringstream msg; - msg << "Quantizer::rebuild: Cannot rename an object file. " << srcObjectList << "=>" << dstObjectList ; - NGTThrowException(msg); - } - - try { - NGTQ::Index index(rebuiltIndexName); - - index.rebuildIndex(); - - index.save(); - index.close(); - } catch(NGT::Exception &err) { - std::rename(dstObjectList.c_str(), srcObjectList.c_str()); - throw err; - } - - } - - void open(const string &index) { - close(); - NGT::Property globalProperty; - globalProperty.clear(); - globalProperty.edgeSizeForSearch = 40; - quantizer = getQuantizer(index, globalProperty); - } - - void save() { - getQuantizer().save(); - } - - void close() { - if (quantizer != 0) { - delete quantizer; - quantizer = 0; - } - } - void insert(string &line, vector > &objects, size_t id) { - getQuantizer().insert(line, objects, id); - } - - void insert(vector > &objects) { - getQuantizer().insert(objects); - } - - void rebuildIndex() { - getQuantizer().rebuildIndex(); - } - - NGT::Object *allocateObject(string &line, const string &sep, size_t dimension) { - return getQuantizer().allocateObject(line, sep); - } - - NGT::Object *allocateObject(vector &obj) { - return getQuantizer().allocateObject(obj); - } - - void deleteObject(NGT::Object *object) { getQuantizer().deleteObject(object); } - - void search(NGT::Object *object, NGT::ObjectDistances &objs, - size_t size, size_t approximateSearchSize, - size_t codebookSearchSize, bool resultRefinement, - bool lookUpTable, double epsilon) { - getQuantizer().search(object, objs, size, approximateSearchSize, codebookSearchSize, - resultRefinement, lookUpTable, epsilon); - } - - void search(NGT::Object *object, NGT::ObjectDistances &objs, - size_t size, float expansion, - AggregationMode aggregationMode, - double epsilon) { - getQuantizer().search(object, objs, size, expansion, - aggregationMode, epsilon); - } - - void info(ostream &os) { getQuantizer().info(os); } - - void verify() { getQuantizer().verify(); } - - NGTQ::Quantizer &getQuantizer() { - if (quantizer == 0) { - NGTThrowException("NGTQ::Index: Not open."); - } - return *quantizer; - } - - size_t getGlobalCodebookSize() { return quantizer->globalCodebook.getObjectRepositorySize(); } - size_t getLocalCodebookSize(size_t idx) { return quantizer->getLocalCodebookSize(idx); } - - size_t getSharedMemorySize(ostream &os, SharedMemoryAllocator::GetMemorySizeType t = SharedMemoryAllocator::GetTotalMemorySize) { - return quantizer->getSharedMemorySize(os, t); - } - - - protected: - - static NGTQ::Quantizer *getQuantizer(const string &index) { - NGT::Property globalProperty; - globalProperty.clear(); - return getQuantizer(index, globalProperty); - } - - static NGTQ::Quantizer *getQuantizer(const string &index, NGT::Property &globalProperty) { - NGTQ::Property property; - try { - property.load(index); - } catch (NGT::Exception &err) { - stringstream msg; - msg << "Quantizer::getQuantizer: Cannot load the property. " << index << " : " << err.what(); - NGTThrowException(msg); - } - NGTQ::Quantizer *quantizer = - NGTQ::Quantization::generate(property.dataType, property.dimension, property.localDivisionNo, property.localIDByteSize); - if (quantizer == 0) { - NGTThrowException("NGTQ::Index: Cannot get quantizer."); - } - try { - quantizer->open(index, globalProperty); - } catch(NGT::Exception &err) { - delete quantizer; - throw err; - } - return quantizer; - } - - NGTQ::Quantizer *quantizer; - }; - -} // namespace NGTQ diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Node.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Node.cpp deleted file mode 100644 index b7cbc9142d..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Node.cpp +++ /dev/null @@ -1,338 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "NGT/defines.h" - -#include "NGT/Node.h" -#include "NGT/Tree.h" - -#include - -using namespace std; - -const double NGT::Node::Object::Pivot = -1.0; - -using namespace NGT; - -void -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -InternalNode::updateChild(DVPTree &dvptree, Node::ID src, Node::ID dst, - SharedMemoryAllocator &allocator) { -#else -InternalNode::updateChild(DVPTree &dvptree, Node::ID src, Node::ID dst) { -#endif - int cs = dvptree.internalChildrenSize; - for (int i = 0; i < cs; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (getChildren(allocator)[i] == src) { - getChildren(allocator)[i] = dst; -#else - if (getChildren()[i] == src) { - getChildren()[i] = dst; -#endif - return; - } - } -} - -int -LeafNode::selectPivotByMaxDistance(Container &c, Node::Objects &fs) -{ - DVPTree::InsertContainer &iobj = (DVPTree::InsertContainer&)c; - int fsize = fs.size(); - Distance maxd = 0.0; - int maxid = 0; - for (int i = 1; i < fsize; i++) { - Distance d = iobj.vptree->objectSpace->getComparator()(*fs[0].object, *fs[i].object); - if (d >= maxd) { - maxd = d; - maxid = i; - } - } - - int aid = maxid; - maxd = 0.0; - maxid = 0; - for (int i = 0; i < fsize; i++) { - Distance d = iobj.vptree->objectSpace->getComparator()(*fs[aid].object, *fs[i].object); - if (i == aid) { - continue; - } - if (d >= maxd) { - maxd = d; - maxid = i; - } - } - - int bid = maxid; - maxd = 0.0; - maxid = 0; - for (int i = 0; i < fsize; i++) { - Distance d = iobj.vptree->objectSpace->getComparator()(*fs[bid].object, *fs[i].object); - if (i == bid) { - continue; - } - if (d >= maxd) { - maxd = d; - maxid = i; - } - } - return maxid; -} - -int -LeafNode::selectPivotByMaxVariance(Container &c, Node::Objects &fs) -{ - DVPTree::InsertContainer &iobj = (DVPTree::InsertContainer&)c; - - int fsize = fs.size(); - Distance *distance = new Distance[fsize * fsize]; - - for (int i = 0; i < fsize; i++) { - distance[i * fsize + i] = 0; - } - - for (int i = 0; i < fsize; i++) { - for (int j = i + 1; j < fsize; j++) { - Distance d = iobj.vptree->objectSpace->getComparator()(*fs[i].object, *fs[j].object); - distance[i * fsize + j] = d; - distance[j * fsize + i] = d; - } - } - - double *variance = new double[fsize]; - for (int i = 0; i < fsize; i++) { - double avg = 0.0; - for (int j = 0; j < fsize; j++) { - avg += distance[i * fsize + j]; - } - avg /= (double)fsize; - - double v = 0.0; - for (int j = 0; j < fsize; j++) { - v += pow(distance[i * fsize + j] - avg, 2.0); - } - variance[i] = v / (double)fsize; - } - - double maxv = variance[0]; - int maxid = 0; - for (int i = 0; i < fsize; i++) { - if (variance[i] > maxv) { - maxv = variance[i]; - maxid = i; - } - } - delete [] variance; - delete [] distance; - - return maxid; -} - -void -LeafNode::splitObjects(Container &c, Objects &fs, int pv) -{ - DVPTree::InsertContainer &iobj = (DVPTree::InsertContainer&)c; - - // sort the objects by distance - int fsize = fs.size(); - for (int i = 0; i < fsize; i++) { - if (i == pv) { - fs[i].distance = 0; - } else { - Distance d = iobj.vptree->objectSpace->getComparator()(*fs[pv].object, *fs[i].object); - fs[i].distance = d; - } - } - - sort(fs.begin(), fs.end()); - - int childrenSize = iobj.vptree->internalChildrenSize; - int cid = childrenSize - 1; - int cms = (fsize * cid) / childrenSize; - - // divide the objects into child clusters. - fs[fsize - 1].clusterID = cid; - for (int i = fsize - 2; i >= 0; i--) { - if (i < cms && cid > 0) { - if (fs[i].distance != fs[i + 1].distance) { - cid--; - cms = (fsize * cid) / childrenSize; - } - } - fs[i].clusterID = cid; - } - - if (cid != 0) { - // the required number of child nodes could not be acquired - stringstream msg; - msg << "LeafNode::splitObjects: Too many same distances. Reduce internal children size for the tree index or not use the tree index." << endl; - msg << " internalChildrenSize=" << childrenSize << endl; - msg << " # of the children=" << (childrenSize - cid) << endl; - msg << " Size=" << fsize << endl; - msg << " pivot=" << pv << endl; - msg << " cluster id=" << cid << endl; - msg << " Show distances for debug." << endl; - for (int i = 0; i < fsize; i++) { - msg << " " << fs[i].id << ":" << fs[i].distance << endl; - msg << " "; - PersistentObject &po = *fs[i].object; - iobj.vptree->objectSpace->show(msg, po); - msg << endl; - } - if (fs[fsize - 1].clusterID == cid) { - msg << "LeafNode::splitObjects: All of the object distances are the same!" << endl;; - NGTThrowException(msg.str()); - } else { - cerr << msg.str() << endl; - cerr << "LeafNode::splitObjects: Anyway, continue..." << endl; - // sift the cluster IDs to start from 0 to continue. - for (int i = 0; i < fsize; i++) { - fs[i].clusterID -= cid; - } - } - } - - long long *pivots = new long long[childrenSize]; - for (int i = 0; i < childrenSize; i++) { - pivots[i] = -1; - } - - // find the boundaries for the subspaces - for (int i = 0; i < fsize; i++) { - if (pivots[fs[i].clusterID] == -1) { - pivots[fs[i].clusterID] = i; - fs[i].leafDistance = Object::Pivot; - } else { - Distance d = iobj.vptree->objectSpace->getComparator()(*fs[pivots[fs[i].clusterID]].object, *fs[i].object); - fs[i].leafDistance = d; - } - } - delete[] pivots; - - return; -} - -void -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -LeafNode::removeObject(size_t id, size_t replaceId, SharedMemoryAllocator &allocator) { -#else -LeafNode::removeObject(size_t id, size_t replaceId) { -#endif - - size_t fsize = getObjectSize(); - size_t idx; - for (idx = 0; idx < fsize; idx++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (getObjectIDs(allocator)[idx].id == id) { - if (replaceId != 0) { - getObjectIDs(allocator)[idx].id = replaceId; -#else - if (getObjectIDs()[idx].id == id) { - if (replaceId != 0) { - getObjectIDs()[idx].id = replaceId; -#endif - return; - } else { - break; - } - } - } - if (idx == fsize) { - if (pivot == 0) { - NGTThrowException("LeafNode::removeObject: Internal error!. the pivot is illegal."); - } - stringstream msg; - msg << "VpTree::Leaf::remove: Warning. Cannot find the specified object. ID=" << id << "," << replaceId << " idx=" << idx << " If the same objects were inserted into the index, ignore this message."; - NGTThrowException(msg.str()); - } - -#ifdef NGT_NODE_USE_VECTOR - for (; idx < objectIDs.size() - 1; idx++) { - getObjectIDs()[idx] = getObjectIDs()[idx + 1]; - } - objectIDs.pop_back(); -#else - objectSize--; - for (; idx < objectSize; idx++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getObjectIDs(allocator)[idx] = getObjectIDs(allocator)[idx + 1]; -#else - getObjectIDs()[idx] = getObjectIDs()[idx + 1]; -#endif - } -#endif - - return; -} - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -bool InternalNode::verify(PersistentRepository &internalNodes, PersistentRepository &leafNodes, - SharedMemoryAllocator &allocator) { -#else -bool InternalNode::verify(Repository &internalNodes, Repository &leafNodes) { -#endif - size_t isize = internalNodes.size(); - size_t lsize = leafNodes.size(); - bool valid = true; - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - size_t nid = getChildren(allocator)[i].getID(); - ID::Type type = getChildren(allocator)[i].getType(); -#else - size_t nid = getChildren()[i].getID(); - ID::Type type = getChildren()[i].getType(); -#endif - size_t size = type == ID::Leaf ? lsize : isize; - if (nid >= size) { - cerr << "Error! Internal children node id is too big." << nid << ":" << size << endl; - valid = false; - } - try { - if (type == ID::Leaf) { - leafNodes.get(nid); - } else { - internalNodes.get(nid); - } - } catch (...) { - cerr << "Error! Cannot get the node. " << ((type == ID::Leaf) ? "Leaf" : "Internal") << endl; - valid = false; - } - } - return valid; -} - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) -bool LeafNode::verify(size_t nobjs, vector &status, SharedMemoryAllocator &allocator) { -#else -bool LeafNode::verify(size_t nobjs, vector &status) { -#endif - bool valid = true; - for (size_t i = 0; i < objectSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - size_t nid = getObjectIDs(allocator)[i].id; -#else - size_t nid = getObjectIDs()[i].id; -#endif - if (nid > nobjs) { - cerr << "Error! Object id is too big. " << nid << ":" << nobjs << endl; - valid =false; - continue; - } - status[nid] |= 0x04; - } - return valid; -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Node.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Node.h deleted file mode 100644 index cb542fe7b6..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Node.h +++ /dev/null @@ -1,779 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include -#include "NGT/Common.h" -#include "NGT/ObjectSpaceRepository.h" -#include "NGT/defines.h" - -namespace NGT { - class DVPTree; - class InternalNode; - class LeafNode; - class Node { - public: - typedef unsigned int NodeID; - class ID { - public: - enum Type { - Leaf = 1, - Internal = 0 - }; - ID():id(0) {} - ID &operator=(const ID &n) { - id = n.id; - return *this; - } - ID &operator=(int i) { - setID(i); - return *this; - } - bool operator==(ID &n) { return id == n.id; } - bool operator<(ID &n) { return id < n.id; } - Type getType() { return (Type)((0x80000000 & id) >> 31); } - NodeID getID() { return 0x7fffffff & id; } - NodeID get() { return id; } - void setID(NodeID i) { id = (0x80000000 & id) | i; } - void setType(Type t) { id = (t << 31) | getID(); } - void setRaw(NodeID i) { id = i; } - void setNull() { id = 0; } - // for milvus - void serialize(std::stringstream & os) { NGT::Serializer::write(os, id); } - void serialize(std::ofstream &os) { NGT::Serializer::write(os, id); } - void deserialize(std::ifstream &is) { NGT::Serializer::read(is, id); } - // for milvus - void deserialize(std::stringstream & is) { NGT::Serializer::read(is, id); } - void serializeAsText(std::ofstream &os) { NGT::Serializer::writeAsText(os, id); } - void deserializeAsText(std::ifstream &is) { NGT::Serializer::readAsText(is, id); } - virtual int64_t memSize() { return sizeof(id); } - protected: - NodeID id; - }; - - class Object { - public: - Object():object(0) {} - bool operator<(const Object &o) const { return distance < o.distance; } - virtual int64_t memSize() { return sizeof(*this) + object->memSize(); } // size of object cannot be decided accurately - static const double Pivot; - ObjectID id; - PersistentObject *object; - Distance distance; - Distance leafDistance; - int clusterID; - }; - - typedef std::vector Objects; - - Node() { - parent.setNull(); - id.setNull(); - } - - virtual ~Node() {} - - Node &operator=(const Node &n) { - id = n.id; - parent = n.parent; - return *this; - } - - // for milvus - void serialize(std::stringstream & os) - { - id.serialize(os); - parent.serialize(os); - } - - void serialize(std::ofstream &os) { - id.serialize(os); - parent.serialize(os); - } - - void deserialize(std::ifstream &is) { - id.deserialize(is); - parent.deserialize(is); - } - - void deserialize(std::stringstream & is) - { - id.deserialize(is); - parent.deserialize(is); - } - - void serializeAsText(std::ofstream &os) { - id.serializeAsText(os); - os << " "; - parent.serializeAsText(os); - } - - void deserializeAsText(std::ifstream &is) { - id.deserializeAsText(is); - parent.deserializeAsText(is); - } - - virtual int64_t memSize() { return id.memSize() * 2 + pivot->memSize(); } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - void setPivot(PersistentObject &f, ObjectSpace &os, SharedMemoryAllocator &allocator) { - if (pivot == 0) { - pivot = NGT::PersistentObject::allocate(os); - } - getPivot(os).set(f, os); - } - PersistentObject &getPivot(ObjectSpace &os) { - return *(PersistentObject*)os.getRepository().getAllocator().getAddr(pivot); - } - void deletePivot(ObjectSpace &os, SharedMemoryAllocator &allocator) { - os.deleteObject(&getPivot(os)); - } -#else // NGT_SHARED_MEMORY_ALLOCATOR - void setPivot(NGT::Object &f, ObjectSpace &os) { - if (pivot == 0) { - pivot = NGT::Object::allocate(os); - } - os.copy(getPivot(), f); - } - NGT::Object &getPivot() { return *pivot; } - void deletePivot(ObjectSpace &os) { - os.deleteObject(pivot); - } -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - bool pivotIsEmpty() { - return pivot == 0; - } - - ID id; - ID parent; - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - off_t pivot; -#else - NGT::Object *pivot; -#endif - - }; - - - class InternalNode : public Node { - public: -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - InternalNode(size_t csize, SharedMemoryAllocator &allocator) : childrenSize(csize) { initialize(allocator); } - InternalNode(SharedMemoryAllocator &allocator, NGT::ObjectSpace *os = 0) : childrenSize(5) { initialize(allocator); } -#else - InternalNode(size_t csize) : childrenSize(csize) { initialize(); } - InternalNode(NGT::ObjectSpace *os = 0) : childrenSize(5) { initialize(); } -#endif - - ~InternalNode() { -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - if (children != 0) { - delete[] children; - } - if (borders != 0) { - delete[] borders; - } -#endif - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void initialize(SharedMemoryAllocator &allocator) { -#else - void initialize() { -#endif - id = 0; - id.setType(ID::Internal); - pivot = 0; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - children = allocator.getOffset(new(allocator) ID[childrenSize]); -#else - children = new ID[childrenSize]; -#endif - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getChildren(allocator)[i] = 0; -#else - getChildren()[i] = 0; -#endif - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - borders = allocator.getOffset(new(allocator) Distance[childrenSize - 1]); -#else - borders = new Distance[childrenSize - 1]; -#endif - for (size_t i = 0; i < childrenSize - 1; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getBorders(allocator)[i] = 0; -#else - getBorders()[i] = 0; -#endif - } - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void updateChild(DVPTree &dvptree, ID src, ID dst, SharedMemoryAllocator &allocator); -#else - void updateChild(DVPTree &dvptree, ID src, ID dst); -#endif - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ID *getChildren(SharedMemoryAllocator &allocator) { return (ID*)allocator.getAddr(children); } - Distance *getBorders(SharedMemoryAllocator &allocator) { return (Distance*)allocator.getAddr(borders); } -#else // NGT_SHARED_MEMORY_ALLOCATOR - ID *getChildren() { return children; } - Distance *getBorders() { return borders; } -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - // for milvus - void serialize(std::stringstream & os, ObjectSpace * objectspace = 0) - { - Node::serialize(os); - if (pivot == 0) - { - NGTThrowException("Node::write: pivot is null!"); - } - assert(objectspace != 0); - getPivot().serialize(os, objectspace); - NGT::Serializer::write(os, childrenSize); - for (size_t i = 0; i < childrenSize; i++) - { - getChildren()[i].serialize(os); - } - for (size_t i = 0; i < childrenSize - 1; i++) - { - NGT::Serializer::write(os, getBorders()[i]); - } - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void serialize(std::ofstream &os, SharedMemoryAllocator &allocator, ObjectSpace *objectspace = 0) { -#else - void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { -#endif - Node::serialize(os); - if (pivot == 0) { - NGTThrowException("Node::write: pivot is null!"); - } - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getPivot(*objectspace).serialize(os, allocator, objectspace); -#else - getPivot().serialize(os, objectspace); -#endif - NGT::Serializer::write(os, childrenSize); - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getChildren(allocator)[i].serialize(os); -#else - getChildren()[i].serialize(os); -#endif - } - for (size_t i = 0; i < childrenSize - 1; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::Serializer::write(os, getBorders(allocator)[i]); -#else - NGT::Serializer::write(os, getBorders()[i]); -#endif - } - } - void deserialize(std::ifstream &is, ObjectSpace *objectspace = 0) { - Node::deserialize(is); - if (pivot == 0) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - pivot = PersistentObject::allocate(*objectspace); -#else - pivot = PersistentObject::allocate(*objectspace); -#endif - } - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "not implemented" << std::endl; - assert(0); -#else - getPivot().deserialize(is, objectspace); -#endif - NGT::Serializer::read(is, childrenSize); - assert(children != 0); - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - assert(0); -#else - getChildren()[i].deserialize(is); -#endif - } - assert(borders != 0); - for (size_t i = 0; i < childrenSize - 1; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - assert(0); -#else - NGT::Serializer::read(is, getBorders()[i]); -#endif - } - } - // for milvus - void deserialize(std::stringstream & is, ObjectSpace * objectspace = 0) - { - Node::deserialize(is); - if (pivot == 0) - { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - pivot = PersistentObject::allocate(*objectspace); -#else - pivot = PersistentObject::allocate(*objectspace); -#endif - } - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "not implemented" << std::endl; - assert(0); -#else - getPivot().deserialize(is, objectspace); -#endif - NGT::Serializer::read(is, childrenSize); - assert(children != 0); - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - assert(0); -#else - getChildren()[i].deserialize(is); -#endif - } - assert(borders != 0); - for (size_t i = 0; i < childrenSize - 1; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - assert(0); -#else - NGT::Serializer::read(is, getBorders()[i]); -#endif - } - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void serializeAsText(std::ofstream &os, SharedMemoryAllocator &allocator, ObjectSpace *objectspace = 0) { -#else - void serializeAsText(std::ofstream &os, ObjectSpace *objectspace = 0) { -#endif - Node::serializeAsText(os); - if (pivot == 0) { - NGTThrowException("Node::write: pivot is null!"); - } - os << " "; - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getPivot(*objectspace).serializeAsText(os, objectspace); -#else - getPivot().serializeAsText(os, objectspace); -#endif - os << " "; - NGT::Serializer::writeAsText(os, childrenSize); - os << " "; - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getChildren(allocator)[i].serializeAsText(os); -#else - getChildren()[i].serializeAsText(os); -#endif - os << " "; - } - for (size_t i = 0; i < childrenSize - 1; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::Serializer::writeAsText(os, getBorders(allocator)[i]); -#else - NGT::Serializer::writeAsText(os, getBorders()[i]); -#endif - os << " "; - } - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void deserializeAsText(std::ifstream &is, SharedMemoryAllocator &allocator, ObjectSpace *objectspace = 0) { -#else - void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0) { -#endif - Node::deserializeAsText(is); - if (pivot == 0) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - pivot = PersistentObject::allocate(*objectspace); -#else - pivot = PersistentObject::allocate(*objectspace); -#endif - } - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getPivot(*objectspace).deserializeAsText(is, objectspace); -#else - getPivot().deserializeAsText(is, objectspace); -#endif - size_t csize; - NGT::Serializer::readAsText(is, csize); - assert(children != 0); - assert(childrenSize == csize); - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getChildren(allocator)[i].deserializeAsText(is); -#else - getChildren()[i].deserializeAsText(is); -#endif - } - assert(borders != 0); - for (size_t i = 0; i < childrenSize - 1; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::Serializer::readAsText(is, getBorders(allocator)[i]); -#else - NGT::Serializer::readAsText(is, getBorders()[i]); -#endif - } - } - - virtual int64_t memSize() { return sizeof(childrenSize) + children->memSize() + childrenSize * sizeof(Distance) + Node::memSize(); } - - void show() { - std::cout << "Show internal node " << childrenSize << ":"; - for (size_t i = 0; i < childrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - assert(0); -#else - std::cout << getChildren()[i].getID() << " "; -#endif - } - std::cout << std::endl; - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - bool verify(PersistentRepository &internalNodes, PersistentRepository &leafNodes, - SharedMemoryAllocator &allocator); -#else - bool verify(Repository &internalNodes, Repository &leafNodes); -#endif - - static const int InternalChildrenSizeMax = 5; - const size_t childrenSize; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - off_t children; - off_t borders; -#else - ID *children; - Distance *borders; -#endif - }; - - - class LeafNode : public Node { - public: -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - LeafNode(SharedMemoryAllocator &allocator, NGT::ObjectSpace *os = 0) { -#else - LeafNode(NGT::ObjectSpace *os = 0) { -#endif - id = 0; - id.setType(ID::Leaf); - pivot = 0; -#ifdef NGT_NODE_USE_VECTOR - objectIDs.reserve(LeafObjectsSizeMax); -#else - objectSize = 0; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - objectIDs = allocator.getOffset(new(allocator) Object[LeafObjectsSizeMax]); -#else - objectIDs = new NGT::ObjectDistance[LeafObjectsSizeMax]; -#endif -#endif - } - - ~LeafNode() { -#ifndef NGT_SHARED_MEMORY_ALLOCATOR -#ifndef NGT_NODE_USE_VECTOR - if (objectIDs != 0) { - delete[] objectIDs; - } -#endif -#endif - } - - static int - selectPivotByMaxDistance(Container &iobj, Node::Objects &fs); - - static int - selectPivotByMaxVariance(Container &iobj, Node::Objects &fs); - - static void - splitObjects(Container &insertedObject, Objects &splitObjectSet, int pivot); - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void removeObject(size_t id, size_t replaceId, SharedMemoryAllocator &allocator); -#else - void removeObject(size_t id, size_t replaceId); -#endif - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR -#ifndef NGT_NODE_USE_VECTOR - NGT::ObjectDistance *getObjectIDs(SharedMemoryAllocator &allocator) { - return (NGT::ObjectDistance *)allocator.getAddr(objectIDs); - } -#endif -#else // NGT_SHARED_MEMORY_ALLOCATOR - NGT::ObjectDistance *getObjectIDs() { return objectIDs; } -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - // for milvus - void serialize(std::stringstream & os, ObjectSpace * objectspace = 0) - { - Node::serialize(os); - NGT::Serializer::write(os, objectSize); - for (int i = 0; i < objectSize; i++) - { - objectIDs[i].serialize(os); - } - if (pivot == 0) - { - // Before insertion, parent ID == 0 and object size == 0, that indicates an empty index - if (parent.getID() != 0 || objectSize != 0) - { - NGTThrowException("Node::write: pivot is null!"); - } - } - else - { - assert(objectspace != 0); - pivot->serialize(os, objectspace); - } - } - void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { - Node::serialize(os); -#ifdef NGT_NODE_USE_VECTOR - NGT::Serializer::write(os, objectIDs); -#else - NGT::Serializer::write(os, objectSize); - for (int i = 0; i < objectSize; i++) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - std::cerr << "not implemented" << std::endl; - assert(0); -#else - objectIDs[i].serialize(os); -#endif - } -#endif // NGT_NODE_USE_VECTOR - if (pivot == 0) { - // Before insertion, parent ID == 0 and object size == 0, that indicates an empty index - if (parent.getID() != 0 || objectSize != 0) { - NGTThrowException("Node::write: pivot is null!"); - } - } else { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - std::cerr << "not implemented" << std::endl; - assert(0); -#else - assert(objectspace != 0); - pivot->serialize(os, objectspace); -#endif - } - } - void deserialize(std::ifstream &is, ObjectSpace *objectspace = 0) { - Node::deserialize(is); - -#ifdef NGT_NODE_USE_VECTOR - objectIDs.clear(); - NGT::Serializer::read(is, objectIDs); -#else - assert(objectIDs != 0); - NGT::Serializer::read(is, objectSize); - for (int i = 0; i < objectSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "not implemented" << std::endl; - assert(0); -#else - getObjectIDs()[i].deserialize(is); -#endif - } -#endif - if (parent.getID() == 0 && objectSize == 0) { - // The index is empty - return; - } - if (pivot == 0) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - pivot = PersistentObject::allocate(*objectspace); -#else - pivot = PersistentObject::allocate(*objectspace); - assert(pivot != 0); -#endif - } - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "not implemented" << std::endl; - assert(0); -#else - getPivot().deserialize(is, objectspace); -#endif - } - - // for milvus - void deserialize(std::stringstream & is, ObjectSpace * objectspace = 0) - { - Node::deserialize(is); - -#ifdef NGT_NODE_USE_VECTOR - objectIDs.clear(); - NGT::Serializer::read(is, objectIDs); -#else - assert(objectIDs != 0); - NGT::Serializer::read(is, objectSize); - for (int i = 0; i < objectSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "not implemented" << std::endl; - assert(0); -#else - getObjectIDs()[i].deserialize(is); -#endif - } -#endif - if (parent.getID() == 0 && objectSize == 0) { - // The index is empty - return; - } - if (pivot == 0) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - pivot = PersistentObject::allocate(*objectspace); -#else - pivot = PersistentObject::allocate(*objectspace); - assert(pivot != 0); -#endif - } - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "not implemented" << std::endl; - assert(0); -#else - getPivot().deserialize(is, objectspace); -#endif - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void serializeAsText(std::ofstream &os, SharedMemoryAllocator &allocator, ObjectSpace *objectspace = 0) { -#else - void serializeAsText(std::ofstream &os, ObjectSpace *objectspace = 0) { -#endif - Node::serializeAsText(os); - os << " "; - if (pivot == 0) { - NGTThrowException("Node::write: pivot is null!"); - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - getPivot(*objectspace).serializeAsText(os, objectspace); -#else - assert(pivot != 0); - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - pivot->serializeAsText(os, allocator, objectspace); -#else - pivot->serializeAsText(os, objectspace); -#endif -#endif - os << " "; -#ifdef NGT_NODE_USE_VECTOR - NGT::Serializer::writeAsText(os, objectIDs); -#else - NGT::Serializer::writeAsText(os, objectSize); - for (int i = 0; i < objectSize; i++) { - os << " "; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - getObjectIDs(allocator)[i].serializeAsText(os); -#else - objectIDs[i].serializeAsText(os); -#endif - } -#endif - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - void deserializeAsText(std::ifstream &is, SharedMemoryAllocator &allocator, ObjectSpace *objectspace = 0) { -#else - void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0) { -#endif - Node::deserializeAsText(is); - if (pivot == 0) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - pivot = PersistentObject::allocate(*objectspace); -#else - pivot = PersistentObject::allocate(*objectspace); -#endif - } - assert(objectspace != 0); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getPivot(*objectspace).deserializeAsText(is, objectspace); -#else - getPivot().deserializeAsText(is, objectspace); -#endif -#ifdef NGT_NODE_USE_VECTOR - objectIDs.clear(); - NGT::Serializer::readAsText(is, objectIDs); -#else - assert(objectIDs != 0); - NGT::Serializer::readAsText(is, objectSize); - for (int i = 0; i < objectSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - getObjectIDs(allocator)[i].deserializeAsText(is); -#else - getObjectIDs()[i].deserializeAsText(is); -#endif - } -#endif - } - - void show() { - std::cout << "Show leaf node " << objectSize << ":"; - for (int i = 0; i < objectSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - std::cerr << "not implemented" << std::endl; - assert(0); -#else - std::cout << getObjectIDs()[i].id << "," << getObjectIDs()[i].distance << " "; -#endif - } - std::cout << std::endl; - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - bool verify(size_t nobjs, std::vector &status, SharedMemoryAllocator &allocator); -#else - bool verify(size_t nobjs, std::vector &status); -#endif - - virtual int64_t memSize() { return sizeof(objectSize) + objectIDs->memSize() * objectSize + Node::memSize(); } - -#ifdef NGT_NODE_USE_VECTOR - size_t getObjectSize() { return objectIDs.size(); } -#else - size_t getObjectSize() { return objectSize; } -#endif - - static const size_t LeafObjectsSizeMax = 100; - -#ifdef NGT_NODE_USE_VECTOR - std::vector objectIDs; -#else - unsigned short objectSize; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - off_t objectIDs; -#else - ObjectDistance *objectIDs; -#endif -#endif - }; - - -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectRepository.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectRepository.h deleted file mode 100644 index 2c152b98cc..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectRepository.h +++ /dev/null @@ -1,428 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once -#include -#include "defines.h" - -namespace NGT { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - class ObjectRepository : - public PersistentRepository { - public: - typedef PersistentRepository Parent; - void open(const std::string &smfile, size_t sharedMemorySize) { - std::string file = smfile; - file.append("po"); - Parent::open(file, sharedMemorySize); - } -#else - class ObjectRepository : public Repository { - public: - typedef Repository Parent; -#endif - ObjectRepository(size_t dim, const std::type_info &ot):dimension(dim), type(ot), sparse(false) { } - - void initialize() { - deleteAll(); - Parent::push_back((PersistentObject*)0); - } - - // for milvus - void serialize(std::stringstream & obj, ObjectSpace * ospace) { Parent::serialize(obj, ospace); } - - void serialize(const std::string &ofile, ObjectSpace *ospace) { - std::ofstream objs(ofile); - if (!objs.is_open()) { - std::stringstream msg; - msg << "NGT::ObjectSpace: Cannot open the specified file " << ofile << "."; - NGTThrowException(msg); - } - Parent::serialize(objs, ospace); - } - - void deserialize(std::stringstream & obj, ObjectSpace * ospace) - { - assert(ospace != 0); - Parent::deserialize(obj, ospace); - } - - void deserialize(const std::string &ifile, ObjectSpace *ospace) { - assert(ospace != 0); - std::ifstream objs(ifile); - if (!objs.is_open()) { - std::stringstream msg; - msg << "NGT::ObjectSpace: Cannot open the specified file " << ifile << "."; - NGTThrowException(msg); - } - Parent::deserialize(objs, ospace); - } - - void serializeAsText(const std::string &ofile, ObjectSpace *ospace) { - std::ofstream objs(ofile); - if (!objs.is_open()) { - std::stringstream msg; - msg << "NGT::ObjectSpace: Cannot open the specified file " << ofile << "."; - NGTThrowException(msg); - } - Parent::serializeAsText(objs, ospace); - } - - void deserializeAsText(const std::string &ifile, ObjectSpace *ospace) { - std::ifstream objs(ifile); - if (!objs.is_open()) { - std::stringstream msg; - msg << "NGT::ObjectSpace: Cannot open the specified file " << ifile << "."; - NGTThrowException(msg); - } - Parent::deserializeAsText(objs, ospace); - } - - void readText(std::istream &is, size_t dataSize = 0) { - initialize(); - appendText(is, dataSize); - } - - // For milvus - template - void readRawData(const T * raw_data, size_t dataSize) - { - initialize(); - append(raw_data, dataSize); - } - - virtual PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { -// std::cerr << "ObjectRepository::allocateNormalizedPersistentObject(double): Fatal error! Something wrong!" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectRepository::allocateNormalizedPersistentObject(double): Fatal error! Something wrong!"); - abort(); - } - - virtual PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { -// std::cerr << "ObjectRepository::allocateNormalizedPersistentObject(float): Fatal error! Something wrong!" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectRepository::allocateNormalizedPersistentObject(float): Fatal error! Something wrong!"); - abort(); - } - - virtual PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { -// std::cerr << "ObjectRepository::allocateNormalizedPersistentObject(uint8_t): Fatal error! Something wrong!" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectRepository::allocateNormalizedPersistentObject(uint8_t): Fatal error! Something wrong!"); - abort(); - } - - virtual PersistentObject *allocateNormalizedPersistentObject(const float *obj, size_t size) { -// std::cerr << "ObjectRepository::allocateNormalizedPersistentObject: Fatal error! Something wrong!" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectRepository::allocateNormalizedPersistentObject: Fatal error! Something wrong!"); - abort(); - } - - void appendText(std::istream &is, size_t dataSize = 0) { - if (dimension == 0) { - NGTThrowException("ObjectSpace::readText: Dimension is not specified."); - } - if (size() == 0) { - // First entry should be always a dummy entry. - // If it is empty, the dummy entry should be inserted. - push_back((PersistentObject*)0); - } - size_t prevDataSize = size(); - if (dataSize > 0) { - reserve(size() + dataSize); - } - std::string line; - size_t lineNo = 0; - while (getline(is, line)) { - lineNo++; - if (dataSize > 0 && (dataSize <= size() - prevDataSize)) { -// std::cerr << "The size of data reached the specified size. The remaining data in the file are not inserted. " -// << dataSize << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("The size of data reached the specified size. The remaining data in the file are not inserted. " - + std::to_string(dataSize)); - break; - } - std::vector object; - try { - extractObjectFromText(line, "\t ", object); - PersistentObject *obj = 0; - try { - obj = allocateNormalizedPersistentObject(object); - } catch (Exception &err) { -// std::cerr << err.what() << " continue..." << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(std::string(err.what()) + " continue..."); - obj = allocatePersistentObject(object); - } - push_back(obj); - } catch (Exception &err) { -// std::cerr << "ObjectSpace::readText: Warning! Invalid line. [" << line << "] Skip the line " << lineNo << " and continue." << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::readText: Warning! Invalid line. [" + line + "] Skip the line " + std::to_string(lineNo) + " and continue."); - } - } - } - - template - void append(T *data, size_t objectCount) { - if (dimension == 0) { - NGTThrowException("ObjectSpace::readText: Dimension is not specified."); - } - if (size() == 0) { - // First entry should be always a dummy entry. - // If it is empty, the dummy entry should be inserted. - push_back((PersistentObject*)0); - } - if (objectCount > 0) { - reserve(size() + objectCount); - } - for (size_t idx = 0; idx < objectCount; idx++, data += dimension) { - std::vector object; - object.reserve(dimension); - for (size_t dataidx = 0; dataidx < dimension; dataidx++) { - object.push_back(data[dataidx]); - } - try { - PersistentObject *obj = 0; - try { - obj = allocateNormalizedPersistentObject(object); - } catch (Exception &err) { -// std::cerr << err.what() << " continue..." << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)(std::string(err.what()) + " continue..."); - obj = allocatePersistentObject(object); - } - push_back(obj); - - } catch (Exception &err) { -// std::cerr << "ObjectSpace::readText: Warning! Invalid data. Skip the data no. " << idx << " and continue." << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::readText: Warning! Invalid data. Skip the data no. " + std::to_string(idx) + " and continue."); - } - } - } - - Object *allocateObject() { - return (Object*) new Object(paddedByteSize); - } - - // This method is called during search to generate query. - // Therefore the object is not persistent. - Object *allocateObject(const std::string &textLine, const std::string &sep) { - std::vector object; - extractObjectFromText(textLine, sep, object); - Object *po = (Object*)allocateObject(object); - return (Object*)po; - } - - void extractObjectFromText(const std::string &textLine, const std::string &sep, std::vector &object) { - object.resize(dimension); - std::vector tokens; - NGT::Common::tokenize(textLine, tokens, sep); - if (dimension > tokens.size()) { - std::stringstream msg; - msg << "ObjectSpace::allocate: too few dimension. " << tokens.size() << ":" << dimension << ". " - << textLine; - NGTThrowException(msg); - } - size_t idx; - for (idx = 0; idx < dimension; idx++) { - if (tokens[idx].size() == 0) { - std::stringstream msg; - msg << "ObjectSpace::allocate: too few dimension. " << tokens.size() << ":" - << dimension << ". " << textLine; - NGTThrowException(msg); - } - char *e; - object[idx] = strtod(tokens[idx].c_str(), &e); - if (*e != 0) { -// std::cerr << "ObjectSpace::readText: Warning! Not numerical value. [" << e << "]" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::readText: Warning! Not numerical value. [" + std::string(e) + "]"); - break; - } - } - } - - template - Object *allocateObject(T *o, size_t size) { - size_t osize = paddedByteSize; - if (sparse) { - size_t vsize = size * (type == typeid(float) ? 4 : 1); - osize = osize < vsize ? vsize : osize; - } else { - if (dimension != size) { -// std::cerr << "ObjectSpace::allocateObject: Fatal error! dimension is invalid. The indexed objects=" -// << dimension << " The specified object=" << size << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::allocateObject: Fatal error! dimension is invalid. The indexed objects=" - + std::to_string(dimension) + " The specified object=" + std::to_string(size)); - assert(dimension == size); - } - } - Object *po = new Object(osize); - void *object = static_cast(&(*po)[0]); - if (type == typeid(uint8_t)) { - uint8_t *obj = static_cast(object); - for (size_t i = 0; i < size; i++) { - obj[i] = static_cast(o[i]); - } - } else if (type == typeid(float)) { - float *obj = static_cast(object); - for (size_t i = 0; i < size; i++) { - obj[i] = static_cast(o[i]); - } - } else { -// std::cerr << "ObjectSpace::allocate: Fatal error: unsupported type!" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::allocate: Fatal error: unsupported type!"); - abort(); - } - return po; - } - - template - Object *allocateObject(const std::vector &o) { - return allocateObject(o.data(), o.size()); - } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - PersistentObject *allocatePersistentObject(Object &o) { - SharedMemoryAllocator &objectAllocator = getAllocator(); - size_t cpsize = dimension; - if (type == typeid(uint8_t)) { - cpsize *= sizeof(uint8_t); - } else if (type == typeid(float)) { - cpsize *= sizeof(float); - } else { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::allocate: Fatal error: unsupported type!"); -// std::cerr << "ObjectSpace::allocate: Fatal error: unsupported type!" << std::endl; - abort(); - } - PersistentObject *po = new (objectAllocator) PersistentObject(objectAllocator, paddedByteSize); - void *dsto = &(*po).at(0, allocator); - void *srco = &o[0]; - memcpy(dsto, srco, cpsize); - return po; - } - - template - PersistentObject *allocatePersistentObject(T *o, size_t size) { - SharedMemoryAllocator &objectAllocator = getAllocator(); - PersistentObject *po = new (objectAllocator) PersistentObject(objectAllocator, paddedByteSize); - if (size != 0 && dimension != size) { - std::stringstream msg; - msg << "ObjectSpace::allocatePersistentObject: Fatal error! The dimensionality is invalid. The specified dimensionality=" - << (sparse ? dimension - 1 : dimension) << ". The specified object=" << (sparse ? size - 1 : size) << "."; - NGTThrowException(msg); - } - void *object = static_cast(&(*po).at(0, allocator)); - if (type == typeid(uint8_t)) { - uint8_t *obj = static_cast(object); - for (size_t i = 0; i < dimension; i++) { - obj[i] = static_cast(o[i]); - } - } else if (type == typeid(float)) { - float *obj = static_cast(object); - for (size_t i = 0; i < dimension; i++) { - obj[i] = static_cast(o[i]); - } - } else { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::allocate: Fatal error: unsupported type!"); -// std::cerr << "ObjectSpace::allocate: Fatal error: unsupported type!" << std::endl; - abort(); - } - return po; - } - - template - PersistentObject *allocatePersistentObject(const std::vector &o) { - return allocatePersistentObject(o.data(), o.size()); - } - -#else - template - PersistentObject *allocatePersistentObject(T *o, size_t size) { - if (size != 0 && dimension != size) { - std::stringstream msg; - msg << "ObjectSpace::allocatePersistentObject: Fatal error! The dimensionality is invalid. The specified dimensionality=" - << (sparse ? dimension - 1 : dimension) << ". The specified object=" << (sparse ? size - 1 : size) << "."; - NGTThrowException(msg); - } - return allocateObject(o, size); - } - - template - PersistentObject *allocatePersistentObject(const std::vector &o) { - return allocatePersistentObject(o.data(), o.size()); - } -#endif - - void deleteObject(Object *po) { - delete po; - } - - private: - void extractObject(void *object, std::vector &d) { - if (type == typeid(uint8_t)) { - uint8_t *obj = (uint8_t*)object; - for (size_t i = 0; i < dimension; i++) { - d.push_back(obj[i]); - } - } else if (type == typeid(float)) { - float *obj = (float*)object; - for (size_t i = 0; i < dimension; i++) { - d.push_back(obj[i]); - } - } else { -// std::cerr << "ObjectSpace::allocate: Fatal error: unsupported type!" << std::endl; - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectSpace::allocate: Fatal error: unsupported type!"); - abort(); - } - } - public: - void extractObject(Object *o, std::vector &d) { - void *object = (void*)(&(*o)[0]); - extractObject(object, d); - } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - void extractObject(PersistentObject *o, std::vector &d) { - SharedMemoryAllocator &objectAllocator = getAllocator(); - void *object = (void*)(&(*o).at(0, objectAllocator)); - extractObject(object, d); - } -#endif - - void setLength(size_t l) { byteSize = l; } - void setPaddedLength(size_t l) { paddedByteSize = l; } - void setSparse() { sparse = true; } - size_t getByteSize() { return byteSize; } - size_t insert(PersistentObject *obj) { return Parent::insert(obj); } - const size_t dimension; - const std::type_info &type; - protected: - size_t byteSize; // the length of all of elements. - size_t paddedByteSize; - bool sparse; // sparse data format - }; - -} // namespace NGT diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectSpace.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectSpace.h deleted file mode 100644 index 6e74e1d9ee..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectSpace.h +++ /dev/null @@ -1,496 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include "PrimitiveComparator.h" - -class ObjectSpace; - -namespace NGT { - - class PersistentObjectDistances; - class ObjectDistances : public std::vector { - public: - ObjectDistances(NGT::ObjectSpace *os = 0) {} - // for milvus - void serialize(std::stringstream & os, ObjectSpace * objspace = 0) { NGT::Serializer::write(os, (std::vector &)*this); } - void serialize(std::ofstream &os, ObjectSpace *objspace = 0) { NGT::Serializer::write(os, (std::vector&)*this);} - // for milvus - void deserialize(std::stringstream & is, ObjectSpace * objspace = 0) - { - NGT::Serializer::read(is, (std::vector &)*this); - } - void deserialize(std::ifstream &is, ObjectSpace *objspace = 0) { NGT::Serializer::read(is, (std::vector&)*this);} - - void serializeAsText(std::ofstream &os, ObjectSpace *objspace = 0) { - NGT::Serializer::writeAsText(os, size()); - os << " "; - for (size_t i = 0; i < size(); i++) { - (*this)[i].serializeAsText(os); - os << " "; - } - } - void deserializeAsText(std::ifstream &is, ObjectSpace *objspace = 0) { - size_t s; - NGT::Serializer::readAsText(is, s); - resize(s); - for (size_t i = 0; i < size(); i++) { - (*this)[i].deserializeAsText(is); - } - } - - void moveFrom(std::priority_queue, std::less > &pq) { - this->clear(); - this->resize(pq.size()); - for (int i = pq.size() - 1; i >= 0; i--) { - (*this)[i] = pq.top(); - pq.pop(); - } - assert(pq.size() == 0); - } - - void moveFrom(std::priority_queue, std::less > &pq, double (&f)(double)) { - this->clear(); - this->resize(pq.size()); - for (int i = pq.size() - 1; i >= 0; i--) { - (*this)[i] = pq.top(); - (*this)[i].distance = f((*this)[i].distance); - pq.pop(); - } - assert(pq.size() == 0); - } - - void moveFrom(std::priority_queue, std::less > &pq, unsigned int id) { - this->clear(); - if (pq.size() == 0) { - return; - } - this->resize(id == 0 ? pq.size() : pq.size() - 1); - int i = this->size() - 1; - while (pq.size() != 0 && i >= 0) { - if (pq.top().id != id) { - (*this)[i] = pq.top(); - i--; - } - pq.pop(); - } - if (pq.size() != 0 && pq.top().id != id) { - std::cerr << "moveFrom: Fatal error: somethig wrong! " << pq.size() << ":" << this->size() << ":" << id << ":" << pq.top().id << std::endl; - assert(pq.size() == 0 || pq.top().id == id); - } - } - - int64_t memSize() const { -// auto obj = (std::vector)(*this); - if (this->size() == 0) - return 0; - else { - return (*this)[0].memSize() * this->size(); - } -// return this->size() == 0 ? 0 : (*this)[0].memSize() * (this->size()); - } - ObjectDistances &operator=(PersistentObjectDistances &objs); - }; - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - class PersistentObjectDistances : public Vector { - public: - PersistentObjectDistances(SharedMemoryAllocator &allocator, NGT::ObjectSpace *os = 0) {} - void serialize(std::ofstream &os, ObjectSpace *objectspace = 0) { NGT::Serializer::write(os, (Vector&)*this); } - void deserialize(std::ifstream &is, ObjectSpace *objectspace = 0) { NGT::Serializer::read(is, (Vector&)*this); } - void serializeAsText(std::ofstream &os, SharedMemoryAllocator &allocator, ObjectSpace *objspace = 0) { - NGT::Serializer::writeAsText(os, size()); - os << " "; - for (size_t i = 0; i < size(); i++) { - (*this).at(i, allocator).serializeAsText(os); - os << " "; - } - } - void deserializeAsText(std::ifstream &is, SharedMemoryAllocator &allocator, ObjectSpace *objspace = 0) { - size_t s; - is >> s; - resize(s, allocator); - for (size_t i = 0; i < size(); i++) { - (*this).at(i, allocator).deserializeAsText(is); - } - } - PersistentObjectDistances ©(ObjectDistances &objs, SharedMemoryAllocator &allocator) { - clear(allocator); - reserve(objs.size(), allocator); - for (ObjectDistances::iterator i = objs.begin(); i != objs.end(); i++) { - push_back(*i, allocator); - } - return *this; - } - }; - typedef PersistentObjectDistances GraphNode; - - inline ObjectDistances &ObjectDistances::operator=(PersistentObjectDistances &objs) - { - clear(); - reserve(objs.size()); - std::cerr << "not implemented" << std::endl; - assert(0); - return *this; - } -#else // NGT_SHARED_MEMORY_ALLOCATOR - typedef ObjectDistances GraphNode; -#endif // NGT_SHARED_MEMORY_ALLOCATOR - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - class PersistentObject; -#else - typedef Object PersistentObject; -#endif - - class ObjectRepository; - - class ObjectSpace { - public: - class Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Comparator(size_t d, SharedMemoryAllocator &a) : dimension(d), allocator(a) {} -#else - Comparator(size_t d) : dimension(d) {} -#endif - virtual double operator()(Object &objecta, Object &objectb) = 0; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - virtual double operator()(Object &objecta, PersistentObject &objectb) = 0; - virtual double operator()(PersistentObject &objecta, PersistentObject &objectb) = 0; -#endif - size_t dimension; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - SharedMemoryAllocator &allocator; -#endif - virtual ~Comparator(){} - int64_t memSize() { return sizeof(size_t); } - }; - enum DistanceType { - DistanceTypeNone = -1, - DistanceTypeL1 = 0, - DistanceTypeL2 = 1, - DistanceTypeHamming = 2, - DistanceTypeAngle = 3, - DistanceTypeCosine = 4, - DistanceTypeNormalizedAngle = 5, - DistanceTypeNormalizedCosine = 6, - DistanceTypeJaccard = 7, - DistanceTypeSparseJaccard = 8, - DistanceTypeIP = 9 - }; - - enum ObjectType { - ObjectTypeNone = 0, - Uint8 = 1, - Float = 2 - }; - - - typedef std::priority_queue, std::less > ResultSet; - ObjectSpace(size_t d):dimension(d), distanceType(DistanceTypeNone), comparator(0), normalization(false) {} - virtual ~ObjectSpace() { if (comparator != 0) { delete comparator; } } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - virtual void open(const std::string &f, size_t shareMemorySize) = 0; - virtual Object *allocateObject(Object &o) = 0; - virtual Object *allocateObject(PersistentObject &o) = 0; - virtual PersistentObject *allocatePersistentObject(Object &obj) = 0; - virtual void deleteObject(PersistentObject *) = 0; - virtual void copy(PersistentObject &objecta, PersistentObject &objectb) = 0; - virtual void show(std::ostream &os, PersistentObject &object) = 0; - virtual size_t insert(PersistentObject *obj) = 0; -#else - virtual size_t insert(Object *obj) = 0; -#endif - - Comparator &getComparator() { return *comparator; } - - virtual void serialize(const std::string &of) = 0; - // for milvus - virtual void serialize(std::stringstream & obj) = 0; - // for milvus - virtual void deserialize(std::stringstream & obj) = 0; - virtual void deserialize(const std::string &ifile) = 0; - virtual void serializeAsText(const std::string &of) = 0; - virtual void deserializeAsText(const std::string &of) = 0; - //for milvus - virtual void readRawData(const float * raw_data, size_t dataSize) = 0; - virtual void readText(std::istream &is, size_t dataSize) = 0; - virtual void appendText(std::istream &is, size_t dataSize) = 0; - virtual void append(const float *data, size_t dataSize) = 0; - virtual void append(const double *data, size_t dataSize) = 0; - - virtual void copy(Object &objecta, Object &objectb) = 0; - - virtual void linearSearch(Object &query, double radius, size_t size, - ObjectSpace::ResultSet &results) = 0; - - virtual const std::type_info &getObjectType() = 0; - virtual void show(std::ostream &os, Object &object) = 0; - virtual size_t getSize() = 0; - virtual size_t getSizeOfElement() = 0; - virtual size_t getByteSizeOfObject() = 0; - virtual Object *allocateNormalizedObject(const std::string &textLine, const std::string &sep) = 0; - virtual Object *allocateNormalizedObject(const std::vector &obj) = 0; - virtual Object *allocateNormalizedObject(const std::vector &obj) = 0; - virtual Object *allocateNormalizedObject(const std::vector &obj) = 0; - virtual Object *allocateNormalizedObject(const float *obj, size_t size) = 0; - virtual PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) = 0; - virtual PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) = 0; - virtual void deleteObject(Object *po) = 0; - virtual Object *allocateObject() = 0; - virtual void remove(size_t id) = 0; - - virtual ObjectRepository &getRepository() = 0; - - virtual void setDistanceType(DistanceType t) = 0; - virtual DistanceType getDistanceType() = 0; - - virtual void *getObject(size_t idx) = 0; - virtual void getObject(size_t idx, std::vector &v) = 0; - virtual void getObjects(const std::vector &idxs, std::vector> &vs) = 0; - - size_t getDimension() { return dimension; } - size_t getPaddedDimension() { return ((dimension - 1) / 16 + 1) * 16; } - virtual int64_t memSize() { return sizeof(dimension) + sizeof(distanceType) + sizeof(prefetchOffset) * 2 + sizeof(normalization) + comparator->memSize(); }; - - template - void normalize(T *data, size_t dim) { - double sum = 0.0; - for (size_t i = 0; i < dim; i++) { - sum += (double)data[i] * (double)data[i]; - } - if (sum == 0.0) { - std::stringstream msg; - msg << "ObjectSpace::normalize: Error! the object is an invalid zero vector for the cosine similarity or angle distance."; - NGTThrowException(msg); - } - sum = sqrt(sum); - for (size_t i = 0; i < dim; i++) { - data[i] = (double)data[i] / sum; - } - } - uint32_t getPrefetchOffset() { return prefetchOffset; } - uint32_t setPrefetchOffset(size_t offset) { - if (offset == 0) { - prefetchOffset = floor(300.0 / (static_cast(getPaddedDimension()) + 30.0) + 1.0); - } else { - prefetchOffset = offset; - } - return prefetchOffset; - } - uint32_t getPrefetchSize() { return prefetchSize; } - uint32_t setPrefetchSize(size_t size) { - if (size == 0) { - prefetchSize = getByteSizeOfObject(); - } else { - prefetchSize = size; - } - return prefetchSize; - } - protected: - const size_t dimension; - DistanceType distanceType; - Comparator *comparator; - bool normalization; - uint32_t prefetchOffset; - uint32_t prefetchSize; - }; - - class BaseObject { - public: - virtual uint8_t &operator[](size_t idx) const = 0; - void serialize(std::ostream &os, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); - if(objectspace == 0) return; // make compiler happy; - size_t byteSize = objectspace->getByteSizeOfObject(); - NGT::Serializer::write(os, (uint8_t*)&(*this)[0], byteSize); - } - void deserialize(std::istream &is, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); - if(objectspace == 0) return; // make compiler happy; - size_t byteSize = objectspace->getByteSizeOfObject(); - assert(&(*this)[0] != 0); - NGT::Serializer::read(is, (uint8_t*)&(*this)[0], byteSize); - } - void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); - if(objectspace == 0) return; // make compiler happy; - const std::type_info &t = objectspace->getObjectType(); - size_t dimension = objectspace->getDimension(); - void *ref = (void*)&(*this)[0]; - if (t == typeid(uint8_t)) { - NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); - } else if (t == typeid(float)) { - NGT::Serializer::writeAsText(os, (float*)ref, dimension); - } else if (t == typeid(double)) { - NGT::Serializer::writeAsText(os, (double*)ref, dimension); - } else if (t == typeid(uint16_t)) { - NGT::Serializer::writeAsText(os, (uint16_t*)ref, dimension); - } else if (t == typeid(uint32_t)) { - NGT::Serializer::writeAsText(os, (uint32_t*)ref, dimension); - } else { - std::cerr << "Object::serializeAsText: not supported data type. [" << t.name() << "]" << std::endl; - assert(0); - } - } - void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0) { - assert(objectspace != 0); - if(objectspace == 0) return; - const std::type_info &t = objectspace->getObjectType(); - size_t dimension = objectspace->getDimension(); - void *ref = (void*)&(*this)[0]; - assert(ref != 0); - if (t == typeid(uint8_t)) { - NGT::Serializer::readAsText(is, (uint8_t*)ref, dimension); - } else if (t == typeid(float)) { - NGT::Serializer::readAsText(is, (float*)ref, dimension); - } else if (t == typeid(double)) { - NGT::Serializer::readAsText(is, (double*)ref, dimension); - } else if (t == typeid(uint16_t)) { - NGT::Serializer::readAsText(is, (uint16_t*)ref, dimension); - } else if (t == typeid(uint32_t)) { - NGT::Serializer::readAsText(is, (uint32_t*)ref, dimension); - } else { - std::cerr << "Object::deserializeAsText: not supported data type. [" << t.name() << "]" << std::endl; - assert(0); - } - } - - }; - - class Object : public BaseObject { - public: - Object(NGT::ObjectSpace *os = 0):vector(0) { - assert(os != 0); - if(os == 0) return; - size_t s = os->getByteSizeOfObject(); - construct(s); - } - - Object(size_t s):vector(0) { - assert(s != 0); - construct(s); - } - - void copy(Object &o, size_t s) { - assert(vector != 0); - for (size_t i = 0; i < s; i++) { - vector[i] = o[i]; - } - } - - virtual ~Object() { clear(); } - - uint8_t &operator[](size_t idx) const { return vector[idx]; } - - void *getPointer(size_t idx = 0) const { return vector + idx; } - - static Object *allocate(ObjectSpace &objectspace) { return new Object(&objectspace); } - - virtual int64_t memSize() { return std::strlen((char*)vector); } - private: - void clear() { - if (vector != 0) { - MemoryCache::alignedFree(vector); - } - vector = 0; - } - - void construct(size_t s) { - assert(vector == 0); - size_t allocsize = ((s - 1) / 64 + 1) * 64; - vector = static_cast(MemoryCache::alignedAlloc(allocsize)); - memset(vector, 0, allocsize); - } - - uint8_t* vector; - }; - - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - class PersistentObject : public BaseObject { - public: - PersistentObject(SharedMemoryAllocator &allocator, NGT::ObjectSpace *os = 0):array(0) { - assert(os != 0); - size_t s = os->getByteSizeOfObject(); - construct(s, allocator); - } - PersistentObject(SharedMemoryAllocator &allocator, size_t s):array(0) { - assert(s != 0); - construct(s, allocator); - } - - ~PersistentObject() {} - - uint8_t &at(size_t idx, SharedMemoryAllocator &allocator) const { - uint8_t *a = (uint8_t *)allocator.getAddr(array); - return a[idx]; - } - uint8_t &operator[](size_t idx) const { - std::cerr << "not implemented" << std::endl; - assert(0); - uint8_t *a = 0; - return a[idx]; - } - - void *getPointer(size_t idx, SharedMemoryAllocator &allocator) { - uint8_t *a = (uint8_t *)allocator.getAddr(array); - return a + idx; - } - - // set v in objectspace to this object using allocator. - void set(PersistentObject &po, ObjectSpace &objectspace); - - static off_t allocate(ObjectSpace &objectspace); - - void serializeAsText(std::ostream &os, SharedMemoryAllocator &allocator, - ObjectSpace *objectspace = 0) { - serializeAsText(os, objectspace); - } - - void serializeAsText(std::ostream &os, ObjectSpace *objectspace = 0); - - void deserializeAsText(std::ifstream &is, SharedMemoryAllocator &allocator, - ObjectSpace *objectspace = 0) { - deserializeAsText(is, objectspace); - } - - void deserializeAsText(std::ifstream &is, ObjectSpace *objectspace = 0); - - void serialize(std::ostream &os, SharedMemoryAllocator &allocator, - ObjectSpace *objectspace = 0) { - std::cerr << "serialize is not implemented" << std::endl; - assert(0); - } - - private: - void construct(size_t s, SharedMemoryAllocator &allocator) { - assert(array == 0); - assert(s != 0); - size_t allocsize = ((s - 1) / 64 + 1) * 64; - array = allocator.getOffset(new(allocator) uint8_t[allocsize]); - memset(getPointer(0, allocator), 0, allocsize); - } - off_t array; - }; -#endif // NGT_SHARED_MEMORY_ALLOCATOR - -} - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectSpaceRepository.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectSpaceRepository.h deleted file mode 100644 index 6ffb387562..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/ObjectSpaceRepository.h +++ /dev/null @@ -1,652 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - - -#include -#include "Common.h" -#include "ObjectSpace.h" -#include "ObjectRepository.h" -#include "PrimitiveComparator.h" - -class ObjectSpace; - -namespace NGT { - - template - class ObjectSpaceRepository : public ObjectSpace, public ObjectRepository { - public: - - class ComparatorL1 : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorL1(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareL1((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareL1((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareL1((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorL1(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareL1((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorL2 : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorL2(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareL2((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareL2((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareL2((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorL2(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareL2((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorIP : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorIP(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareIP((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareIP((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareIP((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorIP(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareInnerProduct((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorHammingDistance : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorHammingDistance(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareHammingDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareHammingDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareHammingDistance((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorHammingDistance(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareHammingDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorJaccardDistance : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorJaccardDistance(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareJaccardDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareJaccardDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareJaccardDistance((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorJaccardDistance(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareJaccardDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorSparseJaccardDistance : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorSparseJaccardDistance(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareSparseJaccardDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareSparseJaccardDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareSparseJaccardDistance((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorSparseJaccardDistance(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareSparseJaccardDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorAngleDistance : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorAngleDistance(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareAngleDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareAngleDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareAngleDistance((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorAngleDistance(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareAngleDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorNormalizedAngleDistance : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorNormalizedAngleDistance(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareNormalizedAngleDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareNormalizedAngleDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareNormalizedAngleDistance((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorNormalizedAngleDistance(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareNormalizedAngleDistance((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorCosineSimilarity : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorCosineSimilarity(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareCosineSimilarity((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareCosineSimilarity((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareCosineSimilarity((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorCosineSimilarity(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareCosineSimilarity((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - class ComparatorNormalizedCosineSimilarity : public Comparator { - public: -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - ComparatorNormalizedCosineSimilarity(size_t d, SharedMemoryAllocator &a) : Comparator(d, a) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareNormalizedCosineSimilarity((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } - double operator()(Object &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareNormalizedCosineSimilarity((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } - double operator()(PersistentObject &objecta, PersistentObject &objectb) { - return PrimitiveComparator::compareNormalizedCosineSimilarity((OBJECT_TYPE*)&objecta.at(0, allocator), (OBJECT_TYPE*)&objectb.at(0, allocator), dimension); - } -#else - ComparatorNormalizedCosineSimilarity(size_t d) : Comparator(d) {} - double operator()(Object &objecta, Object &objectb) { - return PrimitiveComparator::compareNormalizedCosineSimilarity((OBJECT_TYPE*)&objecta[0], (OBJECT_TYPE*)&objectb[0], dimension); - } -#endif - }; - - ObjectSpaceRepository(size_t d, const std::type_info &ot, DistanceType t) : ObjectSpace(d), ObjectRepository(d, ot) { - size_t objectSize = 0; - if (ot == typeid(uint8_t)) { - objectSize = sizeof(uint8_t); - } else if (ot == typeid(float)) { - objectSize = sizeof(float); - } else { - std::stringstream msg; - msg << "ObjectSpace::constructor: Not supported type. " << ot.name(); - NGTThrowException(msg); - } - setLength(objectSize * d); - setPaddedLength(objectSize * ObjectSpace::getPaddedDimension()); - setDistanceType(t); - } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - void open(const std::string &f, size_t sharedMemorySize) { ObjectRepository::open(f, sharedMemorySize); } - void copy(PersistentObject &objecta, PersistentObject &objectb) { objecta = objectb; } - - void show(std::ostream &os, PersistentObject &object) { - const std::type_info &t = getObjectType(); - if (t == typeid(uint8_t)) { - unsigned char *optr = static_cast(&object.at(0,allocator)); - for (size_t i = 0; i < getDimension(); i++) { - os << (int)optr[i] << " "; - } - } else if (t == typeid(float)) { - float *optr = reinterpret_cast(&object.at(0,allocator)); - for (size_t i = 0; i < getDimension(); i++) { - os << optr[i] << " "; - } - } else { - os << " not implement for the type."; - } - } - - Object *allocateObject(Object &o) { - Object *po = new Object(getByteSizeOfObject()); - for (size_t i = 0; i < getByteSizeOfObject(); i++) { - (*po)[i] = o[i]; - } - return po; - } - Object *allocateObject(PersistentObject &o) { - PersistentObject &spo = (PersistentObject &)o; - Object *po = new Object(getByteSizeOfObject()); - for (size_t i = 0; i < getByteSizeOfObject(); i++) { - (*po)[i] = spo.at(i,ObjectRepository::allocator); - } - return (Object*)po; - } - void deleteObject(PersistentObject *po) { - delete po; - } -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - void copy(Object &objecta, Object &objectb) { - objecta.copy(objectb, getByteSizeOfObject()); - } - - DistanceType getDistanceType() { return distanceType; } - - void setDistanceType(DistanceType t) { - if (comparator != 0) { - delete comparator; - } - assert(ObjectSpace::dimension != 0); - distanceType = t; - switch (distanceType) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - case DistanceTypeL1: - comparator = new ObjectSpaceRepository::ComparatorL1(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - break; - case DistanceTypeL2: - comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - break; - case DistanceTypeHamming: - comparator = new ObjectSpaceRepository::ComparatorHammingDistance(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - break; - case DistanceTypeJaccard: - comparator = new ObjectSpaceRepository::ComparatorJaccardDistance(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - break; - case DistanceTypeSparseJaccard: - comparator = new ObjectSpaceRepository::ComparatorSparseJaccardDistance(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - setSparse(); - break; - case DistanceTypeAngle: - comparator = new ObjectSpaceRepository::ComparatorAngleDistance(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - break; - case DistanceTypeCosine: - comparator = new ObjectSpaceRepository::ComparatorCosineSimilarity(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - break; - case DistanceTypeNormalizedAngle: - comparator = new ObjectSpaceRepository::ComparatorNormalizedAngleDistance(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - normalization = true; - break; - case DistanceTypeNormalizedCosine: - comparator = new ObjectSpaceRepository::ComparatorNormalizedCosineSimilarity(ObjectSpace::getPaddedDimension(), ObjectRepository::allocator); - normalization = true; - break; -#else - case DistanceTypeL1: - comparator = new ObjectSpaceRepository::ComparatorL1(ObjectSpace::getPaddedDimension()); - break; - case DistanceTypeL2: - comparator = new ObjectSpaceRepository::ComparatorL2(ObjectSpace::getPaddedDimension()); - break; - case DistanceTypeIP: - comparator = new ObjectSpaceRepository::ComparatorIP(ObjectSpace::getPaddedDimension()); - break; - case DistanceTypeHamming: - comparator = new ObjectSpaceRepository::ComparatorHammingDistance(ObjectSpace::getPaddedDimension()); - break; - case DistanceTypeJaccard: - comparator = new ObjectSpaceRepository::ComparatorJaccardDistance(ObjectSpace::getPaddedDimension()); - break; - case DistanceTypeSparseJaccard: - comparator = new ObjectSpaceRepository::ComparatorSparseJaccardDistance(ObjectSpace::getPaddedDimension()); - setSparse(); - break; - case DistanceTypeAngle: - comparator = new ObjectSpaceRepository::ComparatorAngleDistance(ObjectSpace::getPaddedDimension()); - break; - case DistanceTypeCosine: - comparator = new ObjectSpaceRepository::ComparatorCosineSimilarity(ObjectSpace::getPaddedDimension()); - break; - case DistanceTypeNormalizedAngle: - comparator = new ObjectSpaceRepository::ComparatorNormalizedAngleDistance(ObjectSpace::getPaddedDimension()); - normalization = true; - break; - case DistanceTypeNormalizedCosine: - comparator = new ObjectSpaceRepository::ComparatorNormalizedCosineSimilarity(ObjectSpace::getPaddedDimension()); - normalization = true; - break; -#endif - default: - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Distance type is not specified"); -// std::cerr << "Distance type is not specified" << std::endl; - assert(distanceType != DistanceTypeNone); - abort(); - } - } - - - void serialize(const std::string & ofile) { ObjectRepository::serialize(ofile, this); } - // for milvus - void serialize(std::stringstream & obj) { ObjectRepository::serialize(obj, this); } - // for milvus - void deserialize(std::stringstream & obj) { ObjectRepository::deserialize(obj, this); } - void deserialize(const std::string &ifile) { ObjectRepository::deserialize(ifile, this); } - void serializeAsText(const std::string &ofile) { ObjectRepository::serializeAsText(ofile, this); } - void deserializeAsText(const std::string &ifile) { ObjectRepository::deserializeAsText(ifile, this); } - // For milvus - void readRawData(const float * raw_data, size_t dataSize) { ObjectRepository::readRawData(raw_data, dataSize); } - void readText(std::istream &is, size_t dataSize) { ObjectRepository::readText(is, dataSize); } - void appendText(std::istream &is, size_t dataSize) { ObjectRepository::appendText(is, dataSize); } - - void append(const float *data, size_t dataSize) { ObjectRepository::append(data, dataSize); } - void append(const double *data, size_t dataSize) { ObjectRepository::append(data, dataSize); } - - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - PersistentObject *allocatePersistentObject(Object &obj) { - return ObjectRepository::allocatePersistentObject(obj); - } - size_t insert(PersistentObject *obj) { return ObjectRepository::insert(obj); } -#else - size_t insert(Object *obj) { return ObjectRepository::insert(obj); } -#endif - - void remove(size_t id) { ObjectRepository::remove(id); } - - void linearSearch(Object &query, double radius, size_t size, ObjectSpace::ResultSet &results) { - if (!results.empty()) { - NGTThrowException("lenearSearch: results is not empty"); - } -#ifndef NGT_PREFETCH_DISABLED - size_t byteSizeOfObject = getByteSizeOfObject(); - const size_t prefetchOffset = getPrefetchOffset(); -#endif - ObjectRepository &rep = *this; - for (size_t idx = 0; idx < rep.size(); idx++) { -#ifndef NGT_PREFETCH_DISABLED - if (idx + prefetchOffset < rep.size() && rep[idx + prefetchOffset] != 0) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - MemoryCache::prefetch((unsigned char*)&(*static_cast(ObjectRepository::get(idx + prefetchOffset))), byteSizeOfObject); -#else - MemoryCache::prefetch((unsigned char*)&(*static_cast(rep[idx + prefetchOffset]))[0], byteSizeOfObject); -#endif - } -#endif - if (rep[idx] == 0) { - continue; - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Distance d = (*comparator)((Object&)query, (PersistentObject&)*rep[idx]); -#else - Distance d = (*comparator)((Object&)query, (Object&)*rep[idx]); -#endif - if (radius < 0.0 || d <= radius) { - NGT::ObjectDistance obj(idx, d); - results.push(obj); - if (results.size() > size) { - results.pop(); - } - } - } - return; - } - - void *getObject(size_t idx) { - if (isEmpty(idx)) { - std::stringstream msg; - msg << "NGT::ObjectSpaceRepository: The specified ID is out of the range. The object ID should be greater than zero. " << idx << ":" << ObjectRepository::size() << "."; - NGTThrowException(msg); - } - PersistentObject &obj = *(*this)[idx]; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - return reinterpret_cast(&obj.at(0, allocator)); -#else - return reinterpret_cast(&obj[0]); -#endif - } - - void getObject(size_t idx, std::vector &v) { - OBJECT_TYPE *obj = static_cast(getObject(idx)); - size_t dim = getDimension(); - v.resize(dim); - for (size_t i = 0; i < dim; i++) { - v[i] = static_cast(obj[i]); - } - } - - void getObjects(const std::vector &idxs, std::vector> &vs) { - vs.resize(idxs.size()); - auto v = vs.begin(); - for (auto idx = idxs.begin(); idx != idxs.end(); idx++, v++) { - getObject(*idx, *v); - } - } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - void normalize(PersistentObject &object) { - OBJECT_TYPE *obj = (OBJECT_TYPE*)&object.at(0, getRepository().getAllocator()); - ObjectSpace::normalize(obj, ObjectSpace::dimension); - } -#endif - void normalize(Object &object) { - OBJECT_TYPE *obj = (OBJECT_TYPE*)&object[0]; - ObjectSpace::normalize(obj, ObjectSpace::dimension); - } - - Object *allocateObject() { return ObjectRepository::allocateObject(); } - void deleteObject(Object *po) { ObjectRepository::deleteObject(po); } - - Object *allocateNormalizedObject(const std::string &textLine, const std::string &sep) { - Object *allocatedObject = ObjectRepository::allocateObject(textLine, sep); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - Object *allocateNormalizedObject(const std::vector &obj) { - Object *allocatedObject = ObjectRepository::allocateObject(obj); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - Object *allocateNormalizedObject(const std::vector &obj) { - Object *allocatedObject = ObjectRepository::allocateObject(obj); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - Object *allocateNormalizedObject(const std::vector &obj) { - Object *allocatedObject = ObjectRepository::allocateObject(obj); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - Object *allocateNormalizedObject(const float *obj, size_t size) { - Object *allocatedObject = ObjectRepository::allocateObject(obj, size); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - - PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { - PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - - PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { - PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - - PersistentObject *allocateNormalizedPersistentObject(const std::vector &obj) { - PersistentObject *allocatedObject = ObjectRepository::allocatePersistentObject(obj); - if (normalization) { - normalize(*allocatedObject); - } - return allocatedObject; - } - - size_t getSize() { return ObjectRepository::size(); } - size_t getSizeOfElement() { return sizeof(OBJECT_TYPE); } - const std::type_info &getObjectType() { return typeid(OBJECT_TYPE); }; - size_t getByteSizeOfObject() { return getByteSize(); } - - ObjectRepository &getRepository() { return *this; }; - - void show(std::ostream &os, Object &object) { - const std::type_info &t = getObjectType(); - if (t == typeid(uint8_t)) { - unsigned char *optr = static_cast(&object[0]); - for (size_t i = 0; i < getDimension(); i++) { - os << (int)optr[i] << " "; - } - } else if (t == typeid(float)) { - float *optr = reinterpret_cast(&object[0]); - for (size_t i = 0; i < getDimension(); i++) { - os << optr[i] << " "; - } - } else { - os << " not implement for the type."; - } - } - - }; - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - // set v in objectspace to this object using allocator. - inline void PersistentObject::set(PersistentObject &po, ObjectSpace &objectspace) { - SharedMemoryAllocator &allocator = objectspace.getRepository().getAllocator(); - uint8_t *src = (uint8_t *)&po.at(0, allocator); - uint8_t *dst = (uint8_t *)&(*this).at(0, allocator); - memcpy(dst, src, objectspace.getByteSizeOfObject()); - } - - inline off_t PersistentObject::allocate(ObjectSpace &objectspace) { - SharedMemoryAllocator &allocator = objectspace.getRepository().getAllocator(); - return allocator.getOffset(new(allocator) PersistentObject(allocator, &objectspace)); - } - - inline void PersistentObject::serializeAsText(std::ostream &os, ObjectSpace *objectspace) { - assert(objectspace != 0); - SharedMemoryAllocator &allocator = objectspace->getRepository().getAllocator(); - const std::type_info &t = objectspace->getObjectType(); - void *ref = &(*this).at(0, allocator); - size_t dimension = objectspace->getDimension(); - if (t == typeid(uint8_t)) { - NGT::Serializer::writeAsText(os, (uint8_t*)ref, dimension); - } else if (t == typeid(float)) { - NGT::Serializer::writeAsText(os, (float*)ref, dimension); - } else if (t == typeid(double)) { - NGT::Serializer::writeAsText(os, (double*)ref, dimension); - } else if (t == typeid(uint16_t)) { - NGT::Serializer::writeAsText(os, (uint16_t*)ref, dimension); - } else if (t == typeid(uint32_t)) { - NGT::Serializer::writeAsText(os, (uint32_t*)ref, dimension); - } else { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("ObjectT::serializeAsText: not supported data type. [" + std::to_string(t.name()) + "]"); -// std::cerr << "ObjectT::serializeAsText: not supported data type. [" << t.name() << "]" << std::endl; - assert(0); - } - } - - inline void PersistentObject::deserializeAsText(std::ifstream &is, ObjectSpace *objectspace) { - assert(objectspace != 0); - SharedMemoryAllocator &allocator = objectspace->getRepository().getAllocator(); - const std::type_info &t = objectspace->getObjectType(); - size_t dimension = objectspace->getDimension(); - void *ref = &(*this).at(0, allocator); - assert(ref != 0); - if (t == typeid(uint8_t)) { - NGT::Serializer::readAsText(is, (uint8_t*)ref, dimension); - } else if (t == typeid(float)) { - NGT::Serializer::readAsText(is, (float*)ref, dimension); - } else if (t == typeid(double)) { - NGT::Serializer::readAsText(is, (double*)ref, dimension); - } else if (t == typeid(uint16_t)) { - NGT::Serializer::readAsText(is, (uint16_t*)ref, dimension); - } else if (t == typeid(uint32_t)) { - NGT::Serializer::readAsText(is, (uint32_t*)ref, dimension); - } else { - if (NGT_LOG_DEBUG_) - (*NGT_LOG_DEBUG_)("Object::deserializeAsText: not supported data type. [" + std::to_string(t.name()) + "]"); -// std::cerr << "Object::deserializeAsText: not supported data type. [" << t.name() << "]" << std::endl; - assert(0); - } - } - -#endif -} // namespace NGT - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Optimizer.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Optimizer.h deleted file mode 100644 index 6bd51a6af5..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Optimizer.h +++ /dev/null @@ -1,1568 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "Command.h" - - -#define NGT_LOG_BASED_OPTIMIZATION - -namespace NGT { - class Optimizer { - public: - - - Optimizer(NGT::Index &i, size_t n = 10):index(i), nOfResults(n) { - } - ~Optimizer() {} - - class MeasuredValue { - public: - MeasuredValue():keyValue(0.0), totalCount(0), meanAccuracy(0.0), meanTime(0.0), meanDistanceCount(0.0), meanVisitCount(0.0) {} - double keyValue; - size_t totalCount; - float meanAccuracy; - double meanTime; - double meanDistanceCount; - double meanVisitCount; - }; - - class SumupValues { - public: - class Result { - public: - size_t queryNo; - double key; - double accuracy; - double time; - double distanceCount; - double visitCount; - double meanDistance; - std::vector searchedIDs; - std::vector unsearchedIDs; - }; - - SumupValues(bool res = false):resultIsAvailable(res){} - - void clear() { - totalAccuracy.clear(); - totalTime.clear(); - totalDistanceCount.clear(); - totalVisitCount.clear(); - totalCount.clear(); - } - - std::vector sumup() { - std::vector accuracies; - for (auto it = totalAccuracy.begin(); it != totalAccuracy.end(); ++it) { - MeasuredValue a; - a.keyValue = (*it).first; - a.totalCount = totalCount[a.keyValue]; - a.meanAccuracy = totalAccuracy[a.keyValue] / (double)totalCount[a.keyValue]; - a.meanTime = totalTime[a.keyValue] / (double)totalCount[a.keyValue]; - a.meanDistanceCount = (double)totalDistanceCount[a.keyValue] / (double)totalCount[a.keyValue]; - a.meanVisitCount = (double)totalVisitCount[a.keyValue] / (double)totalCount[a.keyValue]; - accuracies.push_back(a); - } - return accuracies; - } - - std::map totalAccuracy; - std::map totalTime; - std::map totalDistanceCount; - std::map totalVisitCount; - std::map totalCount; - - bool resultIsAvailable; - std::vector results; - - }; - - void enableLog() { redirector.disable(); } - void disableLog() { redirector.enable(); } - - static void search(NGT::Index &index, std::istream >Stream, Command::SearchParameter &sp, std::vector &acc) { - std::ifstream is(sp.query); - if (!is) { - std::stringstream msg; - msg << "Cannot open the specified file. " << sp.query; - NGTThrowException(msg); - } - - search(index, gtStream, sp, acc); - } - - static void search(NGT::Index &index, std::istream &queries, std::istream >Stream, Command::SearchParameter &sp, std::vector &acc) { - sp.stepOfEpsilon = 1.0; - std::stringstream resultStream; - NGT::Command::search(index, sp, queries, resultStream); - resultStream.clear(); - resultStream.seekg(0, std::ios_base::beg); - std::string type; - size_t actualResultSize = 0; - gtStream.seekg(0, std::ios_base::end); - auto pos = gtStream.tellg(); - if (pos == 0) { - acc = evaluate(resultStream, type, actualResultSize); - } else { - SumupValues sumupValues(true); - gtStream.clear(); - gtStream.seekg(0, std::ios_base::beg); - acc = evaluate(gtStream, resultStream, sumupValues, type, actualResultSize); - - } - - assert(acc.size() == 1); - } - - static std::vector - evaluate(std::istream &resultStream, std::string &type, - size_t &resultDataSize, size_t specifiedResultSize = 0, size_t groundTruthSize = 0, bool recall = false) - { - - resultDataSize = 0; - - if (recall) { - if (specifiedResultSize == 0) { - std::stringstream msg; - msg << "For calculating recalls, the result size should be specified."; - NGTThrowException(msg); - } - resultDataSize = specifiedResultSize; - } else { - checkAndGetSize(resultStream, resultDataSize); - } - - std::string line; - size_t queryNo = 1; - - SumupValues sumupValues; - - resultStream.clear(); - resultStream.seekg(0, std::ios_base::beg); - - do { - std::unordered_set gt; - double farthestDistance = 0.0; - sumup(resultStream, queryNo, sumupValues, - gt, resultDataSize, type, recall, farthestDistance); - queryNo++; - } while (!resultStream.eof()); - - return sumupValues.sumup(); - } - - static std::vector - evaluate(std::istream >Stream, std::istream &resultStream, std::string &type, - size_t &resultDataSize, size_t specifiedResultSize = 0, size_t groundTruthSize = 0, bool recall = false) - { - SumupValues sumupValues; - return evaluate(gtStream, resultStream, sumupValues, type, resultDataSize, specifiedResultSize, groundTruthSize, recall); - } - - static std::vector - evaluate(std::istream >Stream, std::istream &resultStream, SumupValues &sumupValues, std::string &type, - size_t &resultDataSize, size_t specifiedResultSize = 0, size_t groundTruthSize = 0, bool recall = false) - { - resultDataSize = 0; - - if (recall) { - if (specifiedResultSize == 0) { - std::stringstream msg; - msg << "For calculating recalls, the result size should be specified."; - NGTThrowException(msg); - } - resultDataSize = specifiedResultSize; - } else { - checkAndGetSize(resultStream, resultDataSize); - } - - std::string line; - size_t queryNo = 1; - sumupValues.clear(); - - resultStream.clear(); - resultStream.seekg(0, std::ios_base::beg); - - while (getline(gtStream, line)) { - std::vector tokens; - NGT::Common::tokenize(line, tokens, "="); - if (tokens.size() == 0) { - continue; - } - if (tokens[0] == "# Query No.") { - if (tokens.size() > 1 && (size_t)NGT::Common::strtol(tokens[1]) == queryNo) { - std::unordered_set gt; - double farthestDistance; - if (groundTruthSize == 0) { - loadGroundTruth(gtStream, gt, resultDataSize, farthestDistance); - } else { - loadGroundTruth(gtStream, gt, groundTruthSize, farthestDistance); - } - sumup(resultStream, queryNo, sumupValues, - gt, resultDataSize, type, recall, farthestDistance); - - queryNo++; - } - } - } - - return sumupValues.sumup(); - } - - static void - loadGroundTruth(std::istream & gtf, std::unordered_set & gt, size_t resultDataSize, double &distance) { - std::string line; - size_t dataCount = 0; - size_t searchCount = 0; - while (getline(gtf, line)) { - if (line.size() != 0 && line.at(0) == '#') { - std::vector gtf; - NGT::Common::tokenize(line, gtf, "="); - if (gtf.size() >= 1) { - if (gtf[0] == "# End of Search") { - searchCount++; - } - if (gtf[0] == "# End of Query") { - if (searchCount != 1) { - std::stringstream msg; - msg << "Error: gt has not just one search result."; - NGTThrowException(msg); - } - if (dataCount < resultDataSize) { - std::stringstream msg; - msg << "Error: gt data is less than result size! " << dataCount << ":" << resultDataSize; - NGTThrowException(msg); - } - return; - } - continue; - } - } - dataCount++; - if (dataCount > resultDataSize) { - continue; - } - std::vector result; - NGT::Common::tokenize(line, result, " \t"); - if (result.size() < 3) { - std::stringstream msg; - msg << "result format is wrong. "; - NGTThrowException(msg); - } - size_t id = NGT::Common::strtol(result[1]); - distance = NGT::Common::strtod(result[2]); - try { - gt.insert(id); - } catch(...) { - std::stringstream msg; - msg << "Cannot insert id into the gt. " << id; - NGTThrowException(msg); - } - } - } - - static void checkAndGetSize(std::istream &resultStream, size_t &resultDataSize) - { - size_t lineCount = 0; - size_t prevDataCount = 0; - std::string line; - bool warn = false; - - while (getline(resultStream, line)) { - lineCount++; - if (line.size() != 0 && line.at(0) == '#') { - std::vector tf; - NGT::Common::tokenize(line, tf, "="); - if (tf.size() >= 1 && tf[0] == "# Query No.") { - size_t dataCount = 0; - std::string lastDataLine; - while (getline(resultStream, line)) { - lineCount++; - if (line.size() != 0 && line.at(0) == '#') { - std::vector gtf; - NGT::Common::tokenize(line, gtf, "="); - if (gtf.size() >= 1 && gtf[0] == "# End of Search") { - if (prevDataCount == 0) { - prevDataCount = dataCount; - } else { - if (prevDataCount != dataCount) { - warn = true; - std::cerr << "Warning!: Result sizes are inconsistent! $prevDataCount:$dataCount" << std::endl; - std::cerr << " Line No." << lineCount << ":" << lastDataLine << std::endl; - if (prevDataCount < dataCount) { - prevDataCount = dataCount; - } - } - } - dataCount = 0; - break; - } - continue; - } - lastDataLine = line; - std::vector result; - NGT::Common::tokenize(line, result, " \t"); - if (result.size() < 3) { - std::stringstream msg; - msg << "result format is wrong. "; - NGTThrowException(msg); - } - size_t rank = NGT::Common::strtol(result[0]); - dataCount++; - if (rank != dataCount) { - std::stringstream msg; - msg << "check: inner error! " << rank << ":" << dataCount; - NGTThrowException(msg); - } - } - } - } - } - resultDataSize = prevDataCount; - if (warn) { - std::cerr << "Warning! ****************************************************************************" << std::endl; - std::cerr << " Check if the result number $$resultDataSize is correct." << std::endl; - std::cerr << "Warning! ****************************************************************************" << std::endl; - } - } - - static void sumup(std::istream &resultStream, - size_t queryNo, - SumupValues &sumupValues, - std::unordered_set >, - const size_t resultDataSize, - std::string &keyValue, - bool recall, - double farthestDistance) - { - std::string line; - size_t lineNo = 0; - while (getline(resultStream, line)) { - lineNo++; - size_t resultNo = 0; - if (line.size() != 0 && line.at(0) == '#') { - std::vector tf; - NGT::Common::tokenize(line, tf, "="); - if (tf.size() >= 1 && tf[0] == "# Query No.") { - if (tf.size() >= 2 && (size_t)NGT::Common::strtol(tf[1]) == queryNo) { - size_t relevantCount = 0; - size_t dataCount = 0; - std::string epsilon; - std::string expansion; - double queryTime = 0.0; - size_t distanceCount = 0; - size_t visitCount = 0; - double totalDistance = 0.0; - std::unordered_set searchedIDs; - while (getline(resultStream, line)) { - lineNo++; - if (line.size() != 0 && line.at(0) == '#') { - std::vector gtf; - NGT::Common::tokenize(line, gtf, "="); - if (gtf.size() >= 2 && (gtf[0] == "# Epsilon" || gtf[0] == "# Factor")) { - epsilon = gtf[1]; - } else if (gtf.size() >= 2 && gtf[0] == "# Result expansion") { - expansion = gtf[1]; - } else if (gtf.size() >= 2 && gtf[0] == "# Query Time (msec)") { - queryTime = NGT::Common::strtod(gtf[1]); - } else if (gtf.size() >= 2 && gtf[0] == "# Distance Computation") { - distanceCount = NGT::Common::strtol(gtf[1]); - } else if (gtf.size() >= 2 && gtf[0] == "# Visit Count") { - visitCount = NGT::Common::strtol(gtf[1]); - } else if (gtf.size() >= 1 && gtf[0] == "# End of Query") { - return; - } else if (gtf.size() >= 1 && gtf[0] == "# End of Search") { - resultNo++; - if (recall == false && resultDataSize != dataCount) { - std::cerr << "Warning! ****************************************************************************" << std::endl; - std::cerr << " Use $resultDataSize instead of $dataCount as the result size to compute accuracy. " << std::endl; - std::cerr << " # of the actual search resultant objects=" << dataCount << std::endl; - std::cerr << " the specified # of search objects or # of the ground truth data=" << resultDataSize << std::endl; - std::cerr << " Line No.=" << lineNo << " Query No.=" << queryNo << " Result No.=" << resultNo << std::endl; - std::cerr << "Warning! ****************************************************************************" << std::endl; - } - double accuracy = (double)relevantCount / (double)resultDataSize; - double key; - if (epsilon != "") { - key = NGT::Common::strtod(epsilon); - keyValue = "Factor (Epsilon)"; - } else if (expansion != "") { - key = NGT::Common::strtod(expansion); - keyValue = "Expansion"; - } else { - std::stringstream msg; - msg << "check: inner error! " << epsilon; - std::cerr << "Cannot find epsilon."; - NGTThrowException(msg); - } - { - auto di = sumupValues.totalAccuracy.find(key); - if (di != sumupValues.totalAccuracy.end()) { - (*di).second += accuracy; - } else { - sumupValues.totalAccuracy.insert(std::make_pair(key, accuracy)); - } - } - { - auto di = sumupValues.totalTime.find(key); - if (di != sumupValues.totalTime.end()) { - (*di).second += queryTime; - } else { - sumupValues.totalTime.insert(std::make_pair(key, queryTime)); - } - } - { - auto di = sumupValues.totalDistanceCount.find(key); - if (di != sumupValues.totalDistanceCount.end()) { - (*di).second += distanceCount; - } else { - sumupValues.totalDistanceCount.insert(std::make_pair(key, distanceCount)); - } - } - { - auto di = sumupValues.totalVisitCount.find(key); - if (di != sumupValues.totalVisitCount.end()) { - (*di).second += visitCount; - } else { - sumupValues.totalVisitCount.insert(std::make_pair(key, visitCount)); - } - } - { - auto di = sumupValues.totalCount.find(key); - if (di != sumupValues.totalCount.end()) { - (*di).second ++; - } else { - sumupValues.totalCount.insert(std::make_pair(key, 1)); - } - } - if (sumupValues.resultIsAvailable) { - SumupValues::Result result; - result.queryNo = queryNo; - result.key = key; - result.accuracy = accuracy; - result.time = queryTime; - result.distanceCount = distanceCount; - result.visitCount = visitCount; - result.meanDistance = totalDistance / (double)resultDataSize; - for (auto i = gt.begin(); i != gt.end(); ++i) { - if (searchedIDs.find(*i) == searchedIDs.end()) { - result.unsearchedIDs.push_back(*i); - } else { - result.searchedIDs.push_back(*i); - } - } - sumupValues.results.push_back(result); - searchedIDs.clear(); - } - totalDistance = 0.0; - relevantCount = 0; - dataCount = 0; - } - continue; - } - std::vector result; - NGT::Common::tokenize(line, result, " \t"); - if (result.size() < 3) { - std::cerr << "result format is wrong. " << std::endl; - abort(); - } - size_t rank = NGT::Common::strtol(result[0]); - size_t id = NGT::Common::strtol(result[1]); - double distance = NGT::Common::strtod(result[2]); - totalDistance += distance; - if (gt.count(id) != 0) { - relevantCount++; - if (sumupValues.resultIsAvailable) { - searchedIDs.insert(id); - } - } else { - if (farthestDistance > 0.0 && distance <= farthestDistance) { - relevantCount++; - if (distance < farthestDistance) { - } - } - } - dataCount++; - if (rank != dataCount) { - std::cerr << "inner error! $rank $dataCount !!" << std::endl;; - abort(); - } - } - } else { - std::cerr << "Fatal error! : Cannot find query No. " << queryNo << std::endl; - abort(); - } - } - } - } - } - - static void exploreEpsilonForAccuracy(NGT::Index &index, std::istream &queries, std::istream >Stream, - Command::SearchParameter &sp, std::pair accuracyRange, double margin) - { - double fromUnder = 0.0; - double fromOver = 1.0; - double toUnder = 0.0; - double toOver = 1.0; - float fromUnderEpsilon = -0.9; - float fromOverEpsilon = -0.9; - float toUnderEpsilon = -0.9; - float toOverEpsilon = -0.9; - - float accuracyRangeFrom = accuracyRange.first; - float accuracyRangeTo = accuracyRange.second; - - double range = accuracyRangeTo - accuracyRangeFrom; - - std::vector acc; - - { - float startEpsilon = -0.6; - float epsilonStep = 0.02; - size_t count; - for (count = 0;; count++) { - float epsilon = round((startEpsilon + epsilonStep * count) * 100.0F) / 100.0F; - if (epsilon > 0.25F) { - std::stringstream msg; - msg << "exploreEpsilonForAccuracy:" << std::endl; - msg << "Error!! Epsilon (lower bound) is too large. " << epsilon << "," << startEpsilon << "," << epsilonStep << "," << count; - NGTThrowException(msg); - } - acc.clear(); - sp.beginOfEpsilon = sp.endOfEpsilon = fromOverEpsilon = epsilon; - queries.clear(); - queries.seekg(0, std::ios_base::beg); - search(index, queries, gtStream, sp, acc); - if (acc[0].meanAccuracy >= accuracyRangeFrom) { - break; - } - } - if (fromOverEpsilon == startEpsilon) { - std::stringstream msg; - msg << "exploreEpsilonForAccuracy:" << std::endl; - msg << "Error! startEpsilon should be reduced for the specified range."; - NGTThrowException(msg); - } - fromOver = acc[0].meanAccuracy; - - if (fromOver < accuracyRangeTo) { - startEpsilon = fromOverEpsilon; - for (count = 0;; count++) { - float epsilon = round((startEpsilon + epsilonStep * count) * 100.0F) / 100.0F; - sp.beginOfEpsilon = sp.endOfEpsilon = toOverEpsilon = epsilon; - if (epsilon > 0.25F) { - std::stringstream msg; - msg << "exploreEpsilonForAccuracy:" << std::endl; - msg << "Error!! Epsilon (upper bound) is too large. " << epsilon << "," << startEpsilon << "," << epsilonStep << "," << count; - NGTThrowException(msg); - } - acc.clear(); - queries.clear(); - queries.seekg(0, std::ios_base::beg); - search(index, queries, gtStream, sp, acc); - epsilon += epsilonStep; - if (acc[0].meanAccuracy >= accuracyRangeTo) { - break; - } - } - toOver = acc[0].meanAccuracy; - } else { - toOver = fromOver; - toOverEpsilon = fromOverEpsilon; - } - fromUnderEpsilon = fromOverEpsilon - epsilonStep; - } - sp.beginOfEpsilon = sp.endOfEpsilon = fromUnderEpsilon; - while (true) { - acc.clear(); - queries.clear(); - queries.seekg(0, std::ios_base::beg); - search(index, queries, gtStream, sp, acc); - if (acc[0].meanAccuracy >= fromUnder && acc[0].meanAccuracy <= accuracyRangeFrom) { - fromUnder = acc[0].meanAccuracy; - fromUnderEpsilon = acc[0].keyValue; - } - if (acc[0].meanAccuracy <= fromOver && acc[0].meanAccuracy > accuracyRangeFrom) { - fromOver = acc[0].meanAccuracy; - fromOverEpsilon = acc[0].keyValue; - } - if (acc[0].meanAccuracy <= toOver && acc[0].meanAccuracy > accuracyRangeTo) { - toOver = acc[0].meanAccuracy; - toOverEpsilon = acc[0].keyValue; - } - if (acc[0].meanAccuracy >= toUnder && acc[0].meanAccuracy <= accuracyRangeTo) { - toUnder = acc[0].meanAccuracy; - toUnderEpsilon = acc[0].keyValue; - } - - if (fromUnder < accuracyRangeFrom - range * margin) { - if ((fromUnderEpsilon + fromOverEpsilon) / 2.0 == sp.beginOfEpsilon) { - std::stringstream msg; - msg << "exploreEpsilonForAccuracy:" << std::endl; - msg << "Error!! Not found proper under epsilon for margin=" << margin << " and the number of queries." << std::endl; - msg << " Should increase margin or the number of queries to get the proper epsilon. "; - NGTThrowException(msg); - } else { - sp.beginOfEpsilon = sp.endOfEpsilon = (fromUnderEpsilon + fromOverEpsilon) / 2.0; - } - } else if (toOver > accuracyRangeTo + range * margin) { - if ((toUnderEpsilon + toOverEpsilon) / 2.0 == sp.beginOfEpsilon) { - std::stringstream msg; - msg << "exploreEpsilonForAccuracy:" << std::endl; - msg << "Error!! Not found proper over epsilon for margin=" << margin << " and the number of queries." << std::endl; - msg << " Should increase margin or the number of queries to get the proper epsilon. "; - NGTThrowException(msg); - } else { - sp.beginOfEpsilon = sp.endOfEpsilon = (toUnderEpsilon + toOverEpsilon) / 2.0; - } - } else { - if (fromUnderEpsilon == toOverEpsilon) { - std::stringstream msg; - msg << "exploreEpsilonForAccuracy:" << std::endl; - msg << "Error!! From and to epsilons are the same. Cannot continue."; - NGTThrowException(msg); - } - sp.beginOfEpsilon = fromUnderEpsilon; - sp.endOfEpsilon = toOverEpsilon; - return; - } - } - std::stringstream msg; - msg << "Something wrong!"; - NGTThrowException(msg); - } - - MeasuredValue measure(std::istream &queries, std::istream >Stream, Command::SearchParameter &searchParameter, std::pair accuracyRange, double margin) { - - exploreEpsilonForAccuracy(index, queries, gtStream, searchParameter, accuracyRange, margin); - - std::stringstream resultStream; - queries.clear(); - queries.seekg(0, std::ios_base::beg); - NGT::Command::search(index, searchParameter, queries, resultStream); - gtStream.clear(); - gtStream.seekg(0, std::ios_base::beg); - resultStream.clear(); - resultStream.seekg(0, std::ios_base::beg); - std::string type; - size_t actualResultSize = 0; - std::vector accuracies = evaluate(gtStream, resultStream, type, actualResultSize); - size_t size; - double distanceCount = 0, visitCount = 0, time = 0; - calculateMeanValues(accuracies, accuracyRange.first, accuracyRange.second, size, distanceCount, visitCount, time); - if (distanceCount == 0) { - std::stringstream msg; - msg << "measureDistance: Error! Distance count is zero."; - NGTThrowException(msg); - } - MeasuredValue v; - v.meanVisitCount = visitCount; - v.meanDistanceCount = distanceCount; - v.meanTime = time; - return v; - } - - std::pair adjustBaseSearchEdgeSize(std::stringstream &queries, Command::SearchParameter &searchParameter, std::stringstream >Stream, std::pair accuracyRange, float marginInit = 0.2, size_t prevBase = 0) { - searchParameter.edgeSize = -2; - size_t minimumBase = 4; - size_t minimumStep = 2; - size_t baseStartInit = 1; - while (prevBase != 0) { - prevBase >>= 1; - baseStartInit <<= 1; - } - baseStartInit >>= 2; - baseStartInit = baseStartInit < minimumBase ? minimumBase : baseStartInit; - while(true) { - try { - float margin = marginInit; - size_t baseStart = baseStartInit; - double minTime = DBL_MAX; - size_t minBase = 0; - std::map times; - std::cerr << "adjustBaseSearchEdgeSize: explore for the margin " << margin << ", " << baseStart << "..." << std::endl; - for (size_t baseStep = 16; baseStep != 1; baseStep /= 2) { - double prevTime = DBL_MAX; - for (size_t base = baseStart; ; base += baseStep) { - if (base > 1000) { - std::stringstream msg; - msg << "base is too large! " << base; - NGTThrowException(msg); - } - searchParameter.step = 10; - NGT::GraphIndex &graphIndex = static_cast(index.getIndex()); - NeighborhoodGraph::Property &prop = graphIndex.getGraphProperty(); - prop.dynamicEdgeSizeBase = base; - double time; - if (times.count(base) == 0) { - for (;;) { - try { - auto values = measure(queries, gtStream, searchParameter, accuracyRange, margin); - time = values.meanTime; - break; - } catch(NGT::Exception &err) { - if (err.getMessage().find("Error!! Epsilon") != std::string::npos && - err.getMessage().find("is too large") != std::string::npos) { - std::cerr << "Warning: Cannot adjust the base edge size." << err.what() << std::endl; - std::cerr << "Try again with the next base" << std::endl; - NGTThrowException("**Retry**"); - } - if (margin > 0.4) { - std::cerr << "Warning: Cannot adjust the base even for the widest margin " << margin << ". " << err.what(); - NGTThrowException("**Retry**"); - } else { - std::cerr << "Warning: Cannot adjust the base edge size for margin " << margin << ". " << err.what() << std::endl; - std::cerr << "Try again for the next margin." << std::endl; - margin += 0.05; - } - } - } - times.insert(std::make_pair(base, time)); - } else { - time = times.at(base); - } - if (prevTime <= time) { - if (baseStep == minimumStep) { - return std::make_pair(minBase, minTime); - } else { - baseStart = static_cast(minBase) - static_cast(baseStep) < static_cast(baseStart) ? baseStart : minBase - baseStep; - break; - } - } - prevTime = time; - if (time < minTime) { - minTime = time; - minBase = base; - } - } - } - } catch(NGT::Exception &err) { - if (err.getMessage().find("**Retry**") != std::string::npos) { - baseStartInit += minimumStep; - } else { - throw err; - } - } - } - } - - size_t adjustBaseSearchEdgeSize(std::pair accuracyRange, size_t querySize, double epsilon, float margin = 0.2) { - std::cerr << "adjustBaseSearchEdgeSize: Extract queries for GT..." << std::endl; - std::stringstream queries; - extractQueries(querySize, queries); - - std::cerr << "adjustBaseSearchEdgeSize: create GT..." << std::endl; - Command::SearchParameter searchParameter; - searchParameter.edgeSize = -1; - std::stringstream gtStream; - createGroundTruth(index, epsilon, searchParameter, queries, gtStream); - - auto base = adjustBaseSearchEdgeSize(queries, searchParameter, gtStream, accuracyRange, margin); - return base.first; - } - - - std::pair adjustRateSearchEdgeSize(std::stringstream &queries, Command::SearchParameter &searchParameter, std::stringstream >Stream, std::pair accuracyRange, float marginInit = 0.2, size_t prevRate = 0) { - searchParameter.edgeSize = -2; - size_t minimumRate = 2; - size_t minimumStep = 4; - size_t rateStartInit = 1; - while (prevRate != 0) { - prevRate >>= 1; - rateStartInit <<= 1; - } - rateStartInit >>= 2; - rateStartInit = rateStartInit < minimumRate ? minimumRate : rateStartInit; - while (true) { - try { - float margin = marginInit; - size_t rateStart = rateStartInit; - double minTime = DBL_MAX; - size_t minRate = 0; - std::map times; - std::cerr << "adjustRateSearchEdgeSize: explore for the margin " << margin << ", " << rateStart << "..." << std::endl; - for (size_t rateStep = 16; rateStep != 1; rateStep /= 2) { - double prevTime = DBL_MAX; - for (size_t rate = rateStart; rate < 2000; rate += rateStep) { - if (rate > 1000) { - std::stringstream msg; - msg << "rate is too large! " << rate; - NGTThrowException(msg); - } - searchParameter.step = 10; - NGT::GraphIndex &graphIndex = static_cast(index.getIndex()); - NeighborhoodGraph::Property &prop = graphIndex.getGraphProperty(); - prop.dynamicEdgeSizeRate = rate; - double time; - if (times.count(rate) == 0) { - for (;;) { - try { - auto values = measure(queries, gtStream, searchParameter, accuracyRange, margin); - time = values.meanTime; - break; - } catch(NGT::Exception &err) { - if (err.getMessage().find("Error!! Epsilon") != std::string::npos && - err.getMessage().find("is too large") != std::string::npos) { - std::cerr << "Warning: Cannot adjust the rate of edge size." << err.what() << std::endl; - std::cerr << "Try again with the next rate" << std::endl; - NGTThrowException("**Retry**"); - } - if (margin > 0.4) { - std::cerr << "Error: Cannot adjust the rate even for the widest margin " << margin << ". " << err.what(); - NGTThrowException("**Retry**"); - } else { - std::cerr << "Warning: Cannot adjust the rate of edge size for margin " << margin << ". " << err.what() << std::endl; - std::cerr << "Try again for the next margin." << std::endl; - margin += 0.05; - } - } - } - times.insert(std::make_pair(rate, time)); - } else { - time = times.at(rate); - } - if (prevTime <= time) { - if (rateStep == minimumStep) { - return std::make_pair(minRate, minTime); - } else { - rateStart = static_cast(minRate) - static_cast(rateStep) < static_cast(rateStart) ? rateStart : minRate - rateStep; - break; - } - } - prevTime = time; - if (time < minTime) { - minTime = time; - minRate = rate; - } - } - } - } catch(NGT::Exception &err) { - if (err.getMessage().find("**Retry**") != std::string::npos) { - rateStartInit += minimumStep; - } else { - throw err; - } - } - } - } - - - - std::pair adjustSearchEdgeSize(std::pair baseAccuracyRange, std::pair rateAccuracyRange, size_t querySize, double epsilon, float margin = 0.2) { - - - std::stringstream queries; - std::stringstream gtStream; - - Command::SearchParameter searchParameter; - searchParameter.edgeSize = -1; - NGT::GraphIndex &graphIndex = static_cast(index.getIndex()); - NeighborhoodGraph::Property &prop = graphIndex.getGraphProperty(); - searchParameter.size = nOfResults; - redirector.begin(); - try { - std::cerr << "adjustSearchEdgeSize: Extract queries for GT..." << std::endl; - extractQueries(querySize, queries); - std::cerr << "adjustSearchEdgeSize: create GT..." << std::endl; - createGroundTruth(index, epsilon, searchParameter, queries, gtStream); - } catch (NGT::Exception &err) { - std::cerr << "adjustSearchEdgeSize: Error!! Cannot adjust. " << err.what() << std::endl; - redirector.end(); - return std::pair(0, 0); - } - redirector.end(); - - auto prevBase = std::pair(0, 0); - auto prevRate = std::pair(0, 0); - auto base = std::pair(0, 0); - auto rate = std::pair(20, 0); - - std::map, double> history; - redirector.begin(); - for(;;) { - try { - prop.dynamicEdgeSizeRate = rate.first; - prevBase = base; - base = adjustBaseSearchEdgeSize(queries, searchParameter, gtStream, baseAccuracyRange, margin, prevBase.first); - std::cerr << "adjustRateSearchEdgeSize: Base: base=" << prevBase.first << "->" << base.first << ",rate=" << prevRate.first << "->" << rate.first << std::endl; - if (prevBase.first == base.first) { - break; - } - prop.dynamicEdgeSizeBase = base.first; - prevRate = rate; - rate = adjustRateSearchEdgeSize(queries, searchParameter, gtStream, rateAccuracyRange, margin, prevRate.first); - std::cerr << "adjustRateSearchEdgeSize: Rate base=" << prevBase.first << "->" << base.first << ",rate=" << prevRate.first << "->" << rate.first << std::endl; - if (prevRate.first == rate.first) { - break; - } - if (history.count(std::make_pair(base.first, rate.first)) != 0) { - std::cerr << "adjustRateSearchEdgeSize: Warning! Found an infinite loop." << std::endl; - double minTime = rate.second; - std::pair min = std::make_pair(base.first, rate.first); - for (auto i = history.begin(); i != history.end(); ++i) { - double dc = (*i).second; - if (dc < minTime) { - minTime = dc; - min = (*i).first; - } - } - return min; - } - // store parameters here to prioritize high accuracy - history.insert(std::make_pair(std::make_pair(base.first, rate.first), rate.second)); - } catch (NGT::Exception &err) { - std::cerr << "adjustRateSearchEdgeSize: Error!! Cannot adjust. " << err.what() << std::endl; - redirector.end(); - return std::pair(0, 0); - } - } - redirector.end(); - return std::make_pair(base.first, rate.first); - } - - static void adjustSearchEdgeSize(Args &args) - { - const std::string usage = "Usage: ngt adjust-edge-size [-m margin] [-e epsilon-for-ground-truth] [-q #-of-queries] [-n #-of-results] index"; - - std::string indexName; - try { - indexName = args.get("#1"); - } catch (...) { - std::cerr << "ngt: Error: DB is not specified" << std::endl; - std::cerr << usage << std::endl; - return; - } - - std::pair baseAccuracyRange = std::pair(0.30, 0.50); - std::pair rateAccuracyRange = std::pair(0.80, 0.90); - - std::string opt = args.getString("A", ""); - if (opt.size() != 0) { - std::vector tokens; - NGT::Common::tokenize(opt, tokens, ":"); - if (tokens.size() >= 1) { baseAccuracyRange.first = NGT::Common::strtod(tokens[0]); } - if (tokens.size() >= 2) { baseAccuracyRange.second = NGT::Common::strtod(tokens[1]); } - if (tokens.size() >= 3) { rateAccuracyRange.first = NGT::Common::strtod(tokens[2]); } - if (tokens.size() >= 4) { rateAccuracyRange.second = NGT::Common::strtod(tokens[3]); } - } - - double margin = args.getf("m", 0.2); - double epsilon = args.getf("e", 0.1); - size_t querySize = args.getl("q", 100); - size_t nOfResults = args.getl("n", 10); - - std::cerr << "adjustRateSearchEdgeSize: range= " << baseAccuracyRange.first << "-" << baseAccuracyRange.second - << "," << rateAccuracyRange.first << "-" << rateAccuracyRange.second << std::endl; - std::cerr << "adjustRateSearchEdgeSize: # of queries=" << querySize << std::endl; - - NGT::Index index(indexName); - - Optimizer optimizer(index, nOfResults); - try { - auto v = optimizer.adjustSearchEdgeSize(baseAccuracyRange, rateAccuracyRange, querySize, epsilon, margin); - NGT::GraphIndex &graphIndex = static_cast(index.getIndex()); - NeighborhoodGraph::Property &prop = graphIndex.getGraphProperty(); - if (v.first > 0) { - prop.dynamicEdgeSizeBase = v.first; - } - if (v.second > 0) { - prop.dynamicEdgeSizeRate = v.second; - } - if (prop.dynamicEdgeSizeRate > 0 && prop.dynamicEdgeSizeBase > 0) { - graphIndex.saveProperty(indexName); - } - } catch (NGT::Exception &err) { - std::cerr << "adjustRateSearchEdgeSize: Error!! Cannot adjust. " << err.what() << std::endl; - return; - } - } - - - void outputObject(std::ostream &os, std::vector &v, NGT::Property &prop) { - switch (prop.objectType) { - case NGT::ObjectSpace::ObjectType::Uint8: - { - for (auto i = v.begin(); i != v.end(); ++i) { - int d = *i; - os << d; - if (i + 1 != v.end()) { - os << "\t"; - } - } - os << std::endl; - } - break; - default: - case NGT::ObjectSpace::ObjectType::Float: - { - for (auto i = v.begin(); i != v.end(); ++i) { - os << *i; - if (i + 1 != v.end()) { - os << "\t"; - } - } - os << std::endl; - } - break; - } - } - - void outputObjects(std::vector> &vs, std::ostream &os) { - NGT::Property prop; - index.getProperty(prop); - - for (auto i = vs.begin(); i != vs.end(); ++i) { - outputObject(os, *i, prop); - } - } - - std::vector extractObject(size_t id, NGT::Property &prop) { - std::vector v; - switch (prop.objectType) { - case NGT::ObjectSpace::ObjectType::Uint8: - { - auto *obj = static_cast(index.getObjectSpace().getObject(id)); - for (int i = 0; i < prop.dimension; i++) { - int d = *obj++; - v.push_back(d); - } - } - break; - default: - case NGT::ObjectSpace::ObjectType::Float: - { - auto *obj = static_cast(index.getObjectSpace().getObject(id)); - for (int i = 0; i < prop.dimension; i++) { - float d = *obj++; - v.push_back(d); - } - } - break; - } - return v; - } - - std::vector meanObject(size_t id1, size_t id2, NGT::Property &prop) { - std::vector v; - switch (prop.objectType) { - case NGT::ObjectSpace::ObjectType::Uint8: - { - auto *obj1 = static_cast(index.getObjectSpace().getObject(id1)); - auto *obj2 = static_cast(index.getObjectSpace().getObject(id2)); - for (int i = 0; i < prop.dimension; i++) { - int d = (*obj1++ + *obj2++) / 2; - v.push_back(d); - } - } - break; - default: - case NGT::ObjectSpace::ObjectType::Float: - { - auto *obj1 = static_cast(index.getObjectSpace().getObject(id1)); - auto *obj2 = static_cast(index.getObjectSpace().getObject(id2)); - for (int i = 0; i < prop.dimension; i++) { - float d = (*obj1++ + *obj2++) / 2.0F; - v.push_back(d); - } - } - break; - } - return v; - } - - void extractQueries(std::vector> &queries, std::ostream &os) { - NGT::Property prop; - index.getProperty(prop); - - for (auto i = queries.begin(); i != queries.end(); ++i) { - outputObject(os, *i, prop); - } - } - - void extractQueries(size_t nqueries, std::ostream &os, bool similarObject = false) { - - std::vector> queries; - extractQueries(nqueries, queries, similarObject); - - extractQueries(queries, os); - } - - void extractAndRemoveRandomQueries(size_t nqueries, std::vector> &queries) { - NGT::Property prop; - index.getProperty(prop); - size_t repositorySize = index.getObjectRepositorySize(); - NGT::ObjectRepository &objectRepository = index.getObjectSpace().getRepository(); - - queries.clear(); - - size_t emptyCount = 0; - while (nqueries > queries.size()) { - double random = ((double)rand() + 1.0) / ((double)RAND_MAX + 2.0); - size_t idx = floor(repositorySize * random) + 1; - if (objectRepository.isEmpty(idx)) { - emptyCount++; - if (emptyCount >= 1000) { - std::stringstream msg; - msg << "Too small amount of objects. " << repositorySize << ":" << nqueries; - NGTThrowException(msg); - } - continue; - } - queries.push_back(extractObject(idx, prop)); - objectRepository.erase(idx); - } - } - - void extractQueries(size_t nqueries, std::vector> &queries, bool similarObject = false) { - - NGT::Property prop; - index.getProperty(prop); - - size_t osize = index.getObjectRepositorySize(); - size_t interval = osize / nqueries; - size_t count = 0; - for (size_t id1 = 1; id1 < osize && count < nqueries; id1 += interval, count++) { - size_t oft = 0; - while (index.getObjectSpace().getRepository().isEmpty(id1 + oft)) { - oft++; - if (id1 + oft >= osize) { - std::stringstream msg; - msg << "Too many empty entries to extract. Object repository size=" << osize << " " << id1 << ":" << oft; - NGTThrowException(msg); - } - } - if (similarObject) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - NGT::Object *query = index.getObjectSpace().allocateObject(*index.getObjectSpace().getRepository().get(id1 + oft)); -#else - NGT::Object *query = index.getObjectSpace().getRepository().get(id1 + oft); -#endif - NGT::SearchContainer sc(*query); - NGT::ObjectDistances results; - sc.setResults(&results); - sc.setSize(nOfResults); - index.search(sc); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - index.getObjectSpace().deleteObject(query); -#endif - if (results.size() < 2) { - std::stringstream msg; - msg << "Cannot get even two results for queries."; - NGTThrowException(msg); - } - size_t id2 = 1; - for (size_t i = 1; i < results.size(); i++) { - if (results[i].distance > 0.0) { - id2 = results[i].id; - break; - } - } - queries.push_back(meanObject(id1 + oft, id2, prop)); - } else { - size_t id2 = id1 + oft + 1; - while (index.getObjectSpace().getRepository().isEmpty(id2)) { - id2++; - if (id2 >= osize) { - std::stringstream msg; - msg << "Too many empty entries to extract."; - NGTThrowException(msg); - } - } - queries.push_back(meanObject(id1 + oft, id2, prop)); - } - } - assert(count == nqueries); - - } - - - static void - extractQueries(Args &args) - { - const std::string usage = "Usage: ngt eval-query -n #-of-queries index"; - - std::string indexName; - try { - indexName = args.get("#1"); - } catch (...) { - std::cerr << "ngt: Error: DB is not specified" << std::endl; - std::cerr << usage << std::endl; - return; - } - size_t nqueries = args.getl("n", 1000); - - NGT::Index index(indexName); - NGT::Optimizer optimizer(index); - optimizer.extractQueries(nqueries, std::cout); - - } - - static void createGroundTruth(NGT::Index &index, double epsilon, Command::SearchParameter &searchParameter, std::stringstream &queries, std::stringstream >Stream){ - queries.clear(); - queries.seekg(0, std::ios_base::beg); - searchParameter.outputMode = 'e'; - searchParameter.beginOfEpsilon = searchParameter.endOfEpsilon = epsilon; - NGT::Command::search(index, searchParameter, queries, gtStream); - } - - static int - calculateMeanValues(std::vector &accuracies, double accuracyRangeFrom, double accuracyRangeTo, - size_t &size, double &meanDistanceCount, double &meanVisitCount, double &meanTime) { - int stat = 0; - size = 0; - if (accuracies.front().meanAccuracy > accuracyRangeFrom) { - stat = 0x1; - } - if (accuracies.back().meanAccuracy < accuracyRangeTo) { - stat |= 0x2; - } - if (stat != 0) { - return stat; - } - std::vector acc; - acc = accuracies; - for (auto start = acc.rbegin(); start != acc.rend(); ++start) { - if ((*start).meanAccuracy <= accuracyRangeFrom) { - ++start; - acc.erase(acc.begin(), start.base()); - break; - } - } - for (auto end = acc.begin(); end != acc.end(); ++end) { - if ((*end).meanAccuracy >= accuracyRangeTo) { - end++; - acc.erase(end, acc.end()); - break; - } - } - std::vector> distance; - std::vector> visit; - std::vector> time; - for (auto i = acc.begin(); i != acc.end(); ++i) { -#ifdef NGT_LOG_BASED_OPTIMIZATION - if ((*i).meanDistanceCount > 0.0) { - (*i).meanDistanceCount = log10((*i).meanDistanceCount); - } - if ((*i).meanVisitCount > 0.0) { - (*i).meanVisitCount = log10((*i).meanVisitCount); - } -#endif - distance.push_back(std::make_pair((*i).meanDistanceCount, (*i).meanAccuracy)); - visit.push_back(std::make_pair((*i).meanVisitCount, (*i).meanAccuracy)); - time.push_back(std::make_pair((*i).meanTime, (*i).meanAccuracy)); - } - { - size_t last = distance.size() - 1; - double xfrom = (distance[1].second * distance[0].first - distance[0].second * distance[1].first + - accuracyRangeFrom * (distance[1].first - distance[0].first)) / - (distance[1].second - distance[0].second); - double xto = (distance[last].second * distance[last - 1].first - distance[last - 1].second * distance[last].first + - accuracyRangeTo * (distance[last].first - distance[last - 1].first)) / - (distance[last].second - distance[last - 1].second); - distance[0].first = xfrom; - distance[0].second = accuracyRangeFrom; - distance[last].first = xto; - distance[last].second = accuracyRangeTo; - double area = 0.0; - for (size_t i = 0; i < distance.size() - 1; ++i) { - area += ((distance[i].first + distance[i + 1].first) * (distance[i + 1].second - distance[i].second)) / 2.0; - } - meanDistanceCount = area / (distance[last].second - distance[0].second); - } - { - size_t last = visit.size() - 1; - double xfrom = (visit[1].second * visit[0].first - visit[0].second * visit[1].first + - accuracyRangeFrom * (visit[1].first - visit[0].first)) / - (visit[1].second - visit[0].second); - double xto = (visit[last].second * visit[last - 1].first - visit[last - 1].second * visit[last].first + - accuracyRangeTo * (visit[last].first - visit[last - 1].first)) / - (visit[last].second - visit[last - 1].second); - visit[0].first = xfrom; - visit[0].second = accuracyRangeFrom; - visit[last].first = xto; - visit[last].second = accuracyRangeTo; - double area = 0.0; - for (size_t i = 0; i < visit.size() - 1; ++i) { - area += ((visit[i].first + visit[i + 1].first) * (visit[i + 1].second - visit[i].second)) / 2.0; - } - meanVisitCount = area / (visit[last].second - visit[0].second); - } - { - size_t last = time.size() - 1; - double xfrom = (time[1].second * time[0].first - time[0].second * time[1].first + - accuracyRangeFrom * (time[1].first - time[0].first)) / - (time[1].second - time[0].second); - double xto = (time[last].second * time[last - 1].first - time[last - 1].second * time[last].first + - accuracyRangeTo * (time[last].first - time[last - 1].first)) / - (time[last].second - time[last - 1].second); - time[0].first = xfrom; - time[0].second = accuracyRangeFrom; - time[last].first = xto; - time[last].second = accuracyRangeTo; - double area = 0.0; - for (size_t i = 0; i < time.size() - 1; ++i) { - area += ((time[i].first + time[i + 1].first) * (time[i + 1].second - time[i].second)) / 2.0; - } - meanTime = area / (time[last].second - time[0].second); - } - assert(distance.size() == time.size()); - size = distance.size(); - return 0; - } - - static void evaluate(Args &args) - { - const std::string usage = "Usage: ngt eval [-n number-of-results] [-m mode(r=recall)] [-g ground-truth-size] [-o output-mode] ground-truth search-result\n" - " Make a ground truth list (linear search): \n" - " ngt search -i s -n 20 -o e index query.list > ground-truth.list"; - - std::string gtFile, resultFile; - try { - gtFile = args.get("#1"); - } catch (...) { - std::cerr << "ground truth is not specified." << std::endl; - std::cerr << usage << std::endl; - return; - } - try { - resultFile = args.get("#2"); - } catch (...) { - std::cerr << "result file is not specified." << std::endl; - std::cerr << usage << std::endl; - return; - } - - size_t resultSize = args.getl("n", 0); - if (resultSize != 0) { - std::cerr << "The specified number of results=" << resultSize << std::endl; - } - - size_t groundTruthSize = args.getl("g", 0); - - bool recall = false; - if (args.getChar("m", '-') == 'r') { - std::cerr << "Recall" << std::endl; - recall = true; - } - char omode = args.getChar("o", '-'); - - std::ifstream resultStream(resultFile); - if (!resultStream) { - std::cerr << "Cannot open the specified target file. " << resultFile << std::endl; - std::cerr << usage << std::endl; - return; - } - - std::ifstream gtStream(gtFile); - if (!gtStream) { - std::cerr << "Cannot open the specified GT file. " << gtFile << std::endl; - std::cerr << usage << std::endl; - return; - } - - std::string type; - size_t actualResultSize = 0; - std::vector accuracies = - evaluate(gtStream, resultStream, type, actualResultSize, resultSize, groundTruthSize, recall); - - std::cout << "# # of evaluated resultant objects per query=" << actualResultSize << std::endl; - if (recall) { - std::cout << "# " << type << "\t# of Queries\tRecall\t"; - } else { - std::cout << "# " << type << "\t# of Queries\tPrecision\t"; - } - if (omode == 'd') { - std::cout << "# of computations\t# of visted nodes" << std::endl; - for (auto it = accuracies.begin(); it != accuracies.end(); ++it) { - std::cout << (*it).keyValue << "\t" << (*it).totalCount << "\t" << (*it).meanAccuracy << "\t" - << (*it).meanDistanceCount << "\t" << (*it).meanVisitCount << std::endl; - } - } else { - std::cout << "Time(msec)\t# of computations\t# of visted nodes" << std::endl; - for (auto it = accuracies.begin(); it != accuracies.end(); ++it) { - std::cout << (*it).keyValue << "\t" << (*it).totalCount << "\t" << (*it).meanAccuracy << "\t" << (*it).meanTime << "\t" - << (*it).meanDistanceCount << "\t" << (*it).meanVisitCount << std::endl; - } - } - - } - - void generatePseudoGroundTruth(size_t nOfQueries, float &maxEpsilon, std::stringstream &queryStream, std::stringstream >Stream) - { - std::vector> queries; - extractQueries(nOfQueries, queries); - generatePseudoGroundTruth(queries, maxEpsilon, queryStream, gtStream); - } - - void generatePseudoGroundTruth(std::vector> &queries, float &maxEpsilon, std::stringstream &queryStream, std::stringstream >Stream) - { - size_t nOfQueries = queries.size(); - maxEpsilon = 0.0; - { - std::vector queryObjects; - for (auto i = queries.begin(); i != queries.end(); ++i) { - queryObjects.push_back(index.allocateObject(*i)); - } - - int identityCount = 0; - std::vector lastDistances(nOfQueries); - double time = 0.0; - double step = 0.02; - for (float e = 0.0; e < 10.0; e += step) { - size_t idx; - bool identity = true; - NGT::Timer timer; - for (idx = 0; idx < queryObjects.size(); idx++) { - NGT::SearchContainer sc(*queryObjects[idx]); - NGT::ObjectDistances results; - sc.setResults(&results); - sc.setSize(nOfResults); - sc.setEpsilon(e); - timer.restart(); - index.search(sc); - timer.stop(); - NGT::Distance d = results.back().distance; - if (d != lastDistances[idx]) { - identity = false; - } - lastDistances[idx] = d; - } - if (e == 0.0) { - time = timer.time; - } - if (timer.time > time * 40.0) { - maxEpsilon = e; - break; - } - if (identity) { - identityCount++; - step *= 1.2; - if (identityCount > 5) { - maxEpsilon = e; - break; - } - } else { - identityCount = 0; - } - } - - for (auto i = queryObjects.begin(); i != queryObjects.end(); ++i) { - index.deleteObject(*i); - } - - } - - { - // generate (pseudo) ground truth data - NGT::Command::SearchParameter searchParameter; - searchParameter.size = nOfResults; - searchParameter.outputMode = 'e'; - searchParameter.edgeSize = 0; // get the best accuracy by using all edges - //searchParameter.indexType = 's'; // linear search - extractQueries(queries, queryStream); - NGT::Optimizer::createGroundTruth(index, maxEpsilon, searchParameter, queryStream, gtStream); - } - } - - static std::vector> - generateAccuracyTable(NGT::Index &index, size_t nOfResults = 50, size_t querySize = 100) { - - NGT::Property prop; - index.getProperty(prop); - if (prop.edgeSizeForSearch != 0 && prop.edgeSizeForSearch != -2) { - std::stringstream msg; - msg << "Optimizer::generateAccuracyTable: edgeSizeForSearch is invalid to call generateAccuracyTable, because accuracy 1.0 cannot be achieved with the setting. edgeSizeForSearch=" << prop.edgeSizeForSearch << "."; - NGTThrowException(msg); - } - - NGT::Optimizer optimizer(index, nOfResults); - - float maxEpsilon = 0.0; - std::stringstream queryStream; - std::stringstream gtStream; - - optimizer.generatePseudoGroundTruth(querySize, maxEpsilon, queryStream, gtStream); - - std::map map; - { - float interval = 0.05; - float prev = 0.0; - std::vector acc; - float epsilon = -0.6; - double accuracy; - do { - auto pair = map.find(epsilon); - if (pair == map.end()) { - NGT::Command::SearchParameter searchParameter; - searchParameter.outputMode = 'e'; - searchParameter.beginOfEpsilon = searchParameter.endOfEpsilon = epsilon; - queryStream.clear(); - queryStream.seekg(0, std::ios_base::beg); - NGT::Optimizer::search(index, queryStream, gtStream, searchParameter, acc); - if (acc.size() == 0) { - NGTThrowException("Fatal error! Cannot get any accuracy value."); - } - accuracy = acc[0].meanAccuracy; - map.insert(std::make_pair(epsilon, accuracy)); - } else { - accuracy = (*pair).second; - } - if (prev != 0.0) { - if (accuracy - prev < 0.02) { - interval *= 2.0; - } else if (accuracy - prev > 0.05 && interval > 0.0001) { - - epsilon -= interval; - interval /= 2.0; - accuracy = prev; - } - } - prev = accuracy; - epsilon += interval; - if (accuracy > 0.98 && epsilon > maxEpsilon) { - break; - } - } while (accuracy < 1.0); - } - - std::vector> epsilonAccuracyMap; - std::pair prev(0.0, -1.0); - for (auto i = map.begin(); i != map.end(); ++i) { - if (fabs((*i).first - prev.first) <= FLT_EPSILON) { - continue; - } - if ((*i).second - prev.second < DBL_EPSILON) { - continue; - } - epsilonAccuracyMap.push_back(*i); - if ((*i).second >= 1.0) { - break; - } - prev = *i; - } - - return epsilonAccuracyMap; - } - - NGT::Index &index; - size_t nOfResults; - StdOstreamRedirector redirector; - }; -}; // NGT - - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/PrimitiveComparator.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/PrimitiveComparator.h deleted file mode 100644 index 1bde606dab..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/PrimitiveComparator.h +++ /dev/null @@ -1,840 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "NGT/defines.h" - -#if defined(NGT_NO_AVX) -// #warning "*** SIMD is *NOT* available! ***" -#else -#include -#endif - -namespace NGT { - -class MemoryCache { - public: - inline static void - prefetch(unsigned char* ptr, const size_t byteSizeOfObject) { -#if !defined(NGT_NO_AVX) - switch ((byteSizeOfObject - 1) >> 6) { - default: - case 28: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 27: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 26: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 25: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 24: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 23: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 22: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 21: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 20: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 19: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 18: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 17: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 16: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 15: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 14: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 13: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 12: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 11: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 10: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 9: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 8: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 7: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 6: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 5: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 4: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 3: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 2: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 1: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - case 0: - _mm_prefetch(ptr, _MM_HINT_T0); - ptr += 64; - break; - } -#endif - } - inline static void* - alignedAlloc(const size_t allocSize) { -#ifdef NGT_NO_AVX - return new uint8_t[allocSize]; -#else -#if defined(NGT_AVX512) - size_t alignment = 64; - uint64_t mask = 0xFFFFFFFFFFFFFFC0; -#elif defined(NGT_AVX2) - size_t alignment = 32; - uint64_t mask = 0xFFFFFFFFFFFFFFE0; -#else - size_t alignment = 16; - uint64_t mask = 0xFFFFFFFFFFFFFFF0; -#endif - uint8_t* p = new uint8_t[allocSize + alignment]; - uint8_t* ptr = p + alignment; - ptr = reinterpret_cast((reinterpret_cast(ptr) & mask)); - *p++ = 0xAB; - while (p != ptr) *p++ = 0xCD; - return ptr; -#endif - } - inline static void - alignedFree(void* ptr) { -#ifdef NGT_NO_AVX - delete[] static_cast(ptr); -#else - uint8_t* p = static_cast(ptr); - p--; - while (*p == 0xCD) p--; - if (*p != 0xAB) { - NGTThrowException("MemoryCache::alignedFree: Fatal Error! Cannot find allocated address."); - } - delete[] p; -#endif - } -}; - -class PrimitiveComparator { - public: - static double - absolute(double v) { - return fabs(v); - } - static int - absolute(int v) { - return abs(v); - } - -#if defined(NGT_NO_AVX) - template - inline static double - compareL2(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - const OBJECT_TYPE* last = a + size; - const OBJECT_TYPE* lastgroup = last - 3; - COMPARE_TYPE diff0, diff1, diff2, diff3; - double d = 0.0; - while (a < lastgroup) { - diff0 = (COMPARE_TYPE)(a[0] - b[0]); - diff1 = (COMPARE_TYPE)(a[1] - b[1]); - diff2 = (COMPARE_TYPE)(a[2] - b[2]); - diff3 = (COMPARE_TYPE)(a[3] - b[3]); - d += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; - a += 4; - b += 4; - } - while (a < last) { - diff0 = (COMPARE_TYPE)(*a++ - *b++); - d += diff0 * diff0; - } -// return sqrt((double)d); - return d; - } - - inline static double - compareL2(const uint8_t* a, const uint8_t* b, size_t size) { - return compareL2(a, b, size); - } - - inline static double - compareL2(const float* a, const float* b, size_t size) { - return compareL2(a, b, size); - } - -#else - inline static double - compareL2(const float* a, const float* b, size_t size) { - const float* last = a + size; -#if defined(NGT_AVX512) - __m512 sum512 = _mm512_setzero_ps(); - while (a < last) { - __m512 v = _mm512_sub_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b)); - sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(v, v)); - a += 16; - b += 16; - } - - __m256 sum256 = _mm256_add_ps(_mm512_extractf32x8_ps(sum512, 0), _mm512_extractf32x8_ps(sum512, 1)); - __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); -#elif defined(NGT_AVX2) - __m256 sum256 = _mm256_setzero_ps(); - __m256 v; - while (a < last) { - v = _mm256_sub_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b)); - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v)); - a += 8; - b += 8; - v = _mm256_sub_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b)); - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(v, v)); - a += 8; - b += 8; - } - __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); -#else - __m128 sum128 = _mm_setzero_ps(); - __m128 v; - while (a < last) { - v = _mm_sub_ps(_mm_loadu_ps(a), _mm_loadu_ps(b)); - sum128 = _mm_add_ps(sum128, _mm_mul_ps(v, v)); - a += 4; - b += 4; - v = _mm_sub_ps(_mm_loadu_ps(a), _mm_loadu_ps(b)); - sum128 = _mm_add_ps(sum128, _mm_mul_ps(v, v)); - a += 4; - b += 4; - v = _mm_sub_ps(_mm_loadu_ps(a), _mm_loadu_ps(b)); - sum128 = _mm_add_ps(sum128, _mm_mul_ps(v, v)); - a += 4; - b += 4; - v = _mm_sub_ps(_mm_loadu_ps(a), _mm_loadu_ps(b)); - sum128 = _mm_add_ps(sum128, _mm_mul_ps(v, v)); - a += 4; - b += 4; - } -#endif - - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum128); - - double s = f[0] + f[1] + f[2] + f[3]; -// return sqrt(s); - return s; - } - - inline static double - compareL2(const unsigned char* a, const unsigned char* b, size_t size) { - __m128 sum = _mm_setzero_ps(); - const unsigned char* last = a + size; - const unsigned char* lastgroup = last - 7; - const __m128i zero = _mm_setzero_si128(); - while (a < lastgroup) { - __m128i x1 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)a)); - __m128i x2 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)b)); - x1 = _mm_subs_epi16(x1, x2); - __m128i v = _mm_mullo_epi16(x1, x1); - sum = _mm_add_ps(sum, _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, zero))); - sum = _mm_add_ps(sum, _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, zero))); - a += 8; - b += 8; - } - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum); - double s = f[0] + f[1] + f[2] + f[3]; - while (a < last) { - int d = (int)*a++ - (int)*b++; - s += d * d; - } -// return sqrt(s); - return s; - } -#endif -#if defined(NGT_NO_AVX) - template - static double - compareL1(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - const OBJECT_TYPE* last = a + size; - const OBJECT_TYPE* lastgroup = last - 3; - COMPARE_TYPE diff0, diff1, diff2, diff3; - double d = 0.0; - while (a < lastgroup) { - diff0 = (COMPARE_TYPE)(a[0] - b[0]); - diff1 = (COMPARE_TYPE)(a[1] - b[1]); - diff2 = (COMPARE_TYPE)(a[2] - b[2]); - diff3 = (COMPARE_TYPE)(a[3] - b[3]); - d += absolute(diff0) + absolute(diff1) + absolute(diff2) + absolute(diff3); - a += 4; - b += 4; - } - while (a < last) { - diff0 = (COMPARE_TYPE)*a++ - (COMPARE_TYPE)*b++; - d += absolute(diff0); - } - return d; - } - - inline static double - compareL1(const uint8_t* a, const uint8_t* b, size_t size) { - return compareL1(a, b, size); - } - - inline static double - compareL1(const float* a, const float* b, size_t size) { - return compareL1(a, b, size); - } - -#else - inline static double - compareL1(const float* a, const float* b, size_t size) { - __m256 sum = _mm256_setzero_ps(); - const float* last = a + size; - const float* lastgroup = last - 7; - while (a < lastgroup) { - __m256 x1 = _mm256_sub_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b)); - const __m256 mask = _mm256_set1_ps(-0.0f); - __m256 v = _mm256_andnot_ps(mask, x1); - sum = _mm256_add_ps(sum, v); - a += 8; - b += 8; - } - __attribute__((aligned(32))) float f[8]; - _mm256_store_ps(f, sum); - double s = f[0] + f[1] + f[2] + f[3] + f[4] + f[5] + f[6] + f[7]; - while (a < last) { - double d = fabs(*a++ - *b++); - s += d; - } - return s; - } - inline static double - compareL1(const unsigned char* a, const unsigned char* b, size_t size) { - __m128 sum = _mm_setzero_ps(); - const unsigned char* last = a + size; - const unsigned char* lastgroup = last - 7; - const __m128i zero = _mm_setzero_si128(); - while (a < lastgroup) { - __m128i x1 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)a)); - __m128i x2 = _mm_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)b)); - x1 = _mm_subs_epi16(x1, x2); - x1 = _mm_sign_epi16(x1, x1); - sum = _mm_add_ps(sum, _mm_cvtepi32_ps(_mm_unpacklo_epi16(x1, zero))); - sum = _mm_add_ps(sum, _mm_cvtepi32_ps(_mm_unpackhi_epi16(x1, zero))); - a += 8; - b += 8; - } - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum); - double s = f[0] + f[1] + f[2] + f[3]; - while (a < last) { - double d = fabs((double)*a++ - (double)*b++); - s += d; - } - return s; - } -#endif - -#if defined(NGT_NO_AVX) || !defined(__POPCNT__) - inline static double - popCount(uint32_t x) { - x = (x & 0x55555555) + (x >> 1 & 0x55555555); - x = (x & 0x33333333) + (x >> 2 & 0x33333333); - x = (x & 0x0F0F0F0F) + (x >> 4 & 0x0F0F0F0F); - x = (x & 0x00FF00FF) + (x >> 8 & 0x00FF00FF); - x = (x & 0x0000FFFF) + (x >> 16 & 0x0000FFFF); - return x; - } - - template - inline static double - compareHammingDistance(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - const uint32_t* last = reinterpret_cast(a + size); - - const uint32_t* uinta = reinterpret_cast(a); - const uint32_t* uintb = reinterpret_cast(b); - size_t count = 0; - while (uinta < last) { - count += popCount(*uinta++ ^ *uintb++); - } - - return static_cast(count); - } -#else - template - inline static double - compareHammingDistance(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - const uint64_t* last = reinterpret_cast(a + size); - - const uint64_t* uinta = reinterpret_cast(a); - const uint64_t* uintb = reinterpret_cast(b); - size_t count = 0; - while (uinta < last) { - count += _mm_popcnt_u64(*uinta++ ^ *uintb++); - count += _mm_popcnt_u64(*uinta++ ^ *uintb++); - } - - return static_cast(count); - } -#endif - -#if defined(NGT_NO_AVX) || !defined(__POPCNT__) - template - inline static double - compareJaccardDistance(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - const uint32_t* last = reinterpret_cast(a + size); - - const uint32_t* uinta = reinterpret_cast(a); - const uint32_t* uintb = reinterpret_cast(b); - size_t count = 0; - size_t countDe = 0; - while (uinta < last) { - count += popCount(*uinta & *uintb); - countDe += popCount(*uinta++ | *uintb++); - count += popCount(*uinta & *uintb); - countDe += popCount(*uinta++ | *uintb++); - } - - return 1.0 - static_cast(count) / static_cast(countDe); - } -#else - template - inline static double - compareJaccardDistance(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - const uint64_t* last = reinterpret_cast(a + size); - - const uint64_t* uinta = reinterpret_cast(a); - const uint64_t* uintb = reinterpret_cast(b); - size_t count = 0; - size_t countDe = 0; - while (uinta < last) { - count += _mm_popcnt_u64(*uinta & *uintb); - countDe += _mm_popcnt_u64(*uinta++ | *uintb++); - count += _mm_popcnt_u64(*uinta & *uintb); - countDe += _mm_popcnt_u64(*uinta++ | *uintb++); - } - - return 1.0 - static_cast(count) / static_cast(countDe); - } -#endif - - inline static double - compareSparseJaccardDistance(const unsigned char* a, unsigned char* b, size_t size) { - abort(); - } - - inline static double - compareSparseJaccardDistance(const float* a, const float* b, size_t size) { - size_t loca = 0; - size_t locb = 0; - const uint32_t* ai = reinterpret_cast(a); - const uint32_t* bi = reinterpret_cast(b); - size_t count = 0; - while (locb < size && ai[loca] != 0 && bi[loca] != 0) { - int64_t sub = static_cast(ai[loca]) - static_cast(bi[locb]); - count += sub == 0; - loca += sub <= 0; - locb += sub >= 0; - } - while (ai[loca] != 0) { - loca++; - } - while (locb < size && bi[locb] != 0) { - locb++; - } - return 1.0 - static_cast(count) / static_cast(loca + locb - count); - } - -#if defined(NGT_NO_AVX) - template - inline static double - compareDotProduct(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - double sum = 0.0; - for (size_t loc = 0; loc < size; loc++) { - sum += (double)a[loc] * (double)b[loc]; - } - return sum; - } - - template - inline static double - compareInnerProduct(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - double sum = 0.0; - for (size_t loc = 0; loc < size; loc++) { - sum += (double)a[loc] * (double)b[loc]; -// sum += a[loc] * b[loc]; - } - return -sum; - } - - template - inline static double - compareCosine(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - double normA = 0.0; - double normB = 0.0; - double sum = 0.0; - for (size_t loc = 0; loc < size; loc++) { - normA += (double)a[loc] * (double)a[loc]; - normB += (double)b[loc] * (double)b[loc]; - sum += (double)a[loc] * (double)b[loc]; - } - - double cosine = sum / sqrt(normA * normB); - - return cosine; - } -#else - inline static double - compareDotProduct(const float* a, const float* b, size_t size) { - const float* last = a + size; -#if defined(NGT_AVX512) - __m512 sum512 = _mm512_setzero_ps(); - while (a < last) { - sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b))); - a += 16; - b += 16; - } - - __m256 sum256 = _mm256_add_ps(_mm512_extractf32x8_ps(sum512, 0), _mm512_extractf32x8_ps(sum512, 1)); - __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); -#elif defined(NGT_AVX2) - __m256 sum256 = _mm256_setzero_ps(); - while (a < last) { - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b))); - a += 8; - b += 8; - } - __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); -#else - __m128 sum128 = _mm_setzero_ps(); - while (a < last) { - sum128 = _mm_add_ps(sum128, _mm_mul_ps(_mm_loadu_ps(a), _mm_loadu_ps(b))); - a += 4; - b += 4; - } -#endif - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum128); - - double s = f[0] + f[1] + f[2] + f[3]; - return s; - } - - inline static double - compareInnerProduct(const float* a, const float* b, size_t size) { - const float* last = a + size; -#if defined(NGT_AVX512) - __m512 sum512 = _mm512_setzero_ps(); - while (a < last) { - sum512 = _mm512_add_ps(sum512, _mm512_mul_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b))); - a += 16; - b += 16; - } - - __m256 sum256 = _mm256_add_ps(_mm512_extractf32x8_ps(sum512, 0), _mm512_extractf32x8_ps(sum512, 1)); - __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); -#elif defined(NGT_AVX2) - __m256 sum256 = _mm256_setzero_ps(); - while (a < last) { - sum256 = _mm256_add_ps(sum256, _mm256_mul_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b))); - a += 8; - b += 8; - } - __m128 sum128 = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); -#else - __m128 sum128 = _mm_setzero_ps(); - while (a < last) { - sum128 = _mm_add_ps(sum128, _mm_mul_ps(_mm_loadu_ps(a), _mm_loadu_ps(b))); - a += 4; - b += 4; - } -#endif - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, sum128); - - double s = f[0] + f[1] + f[2] + f[3]; - return -s; - } - - inline static double - compareDotProduct(const unsigned char* a, const unsigned char* b, size_t size) { - double sum = 0.0; - for (size_t loc = 0; loc < size; loc++) { - sum += (double)a[loc] * (double)b[loc]; - } - return sum; - } - - inline static double - compareInnerProduct(const unsigned char* a, const unsigned char* b, size_t size) { - double sum = 0.0; - for (size_t loc = 0; loc < size; loc++) { - sum += (double)a[loc] * (double)b[loc]; - } - return -sum; - } - - inline static double - compareCosine(const float* a, const float* b, size_t size) { - const float* last = a + size; -#if defined(NGT_AVX512) - __m512 normA = _mm512_setzero_ps(); - __m512 normB = _mm512_setzero_ps(); - __m512 sum = _mm512_setzero_ps(); - while (a < last) { - __m512 am = _mm512_loadu_ps(a); - __m512 bm = _mm512_loadu_ps(b); - normA = _mm512_add_ps(normA, _mm512_mul_ps(am, am)); - normB = _mm512_add_ps(normB, _mm512_mul_ps(bm, bm)); - sum = _mm512_add_ps(sum, _mm512_mul_ps(am, bm)); - a += 16; - b += 16; - } - __m256 am256 = _mm256_add_ps(_mm512_extractf32x8_ps(normA, 0), _mm512_extractf32x8_ps(normA, 1)); - __m256 bm256 = _mm256_add_ps(_mm512_extractf32x8_ps(normB, 0), _mm512_extractf32x8_ps(normB, 1)); - __m256 s256 = _mm256_add_ps(_mm512_extractf32x8_ps(sum, 0), _mm512_extractf32x8_ps(sum, 1)); - __m128 am128 = _mm_add_ps(_mm256_extractf128_ps(am256, 0), _mm256_extractf128_ps(am256, 1)); - __m128 bm128 = _mm_add_ps(_mm256_extractf128_ps(bm256, 0), _mm256_extractf128_ps(bm256, 1)); - __m128 s128 = _mm_add_ps(_mm256_extractf128_ps(s256, 0), _mm256_extractf128_ps(s256, 1)); -#elif defined(NGT_AVX2) - __m256 normA = _mm256_setzero_ps(); - __m256 normB = _mm256_setzero_ps(); - __m256 sum = _mm256_setzero_ps(); - __m256 am, bm; - while (a < last) { - am = _mm256_loadu_ps(a); - bm = _mm256_loadu_ps(b); - normA = _mm256_add_ps(normA, _mm256_mul_ps(am, am)); - normB = _mm256_add_ps(normB, _mm256_mul_ps(bm, bm)); - sum = _mm256_add_ps(sum, _mm256_mul_ps(am, bm)); - a += 8; - b += 8; - } - __m128 am128 = _mm_add_ps(_mm256_extractf128_ps(normA, 0), _mm256_extractf128_ps(normA, 1)); - __m128 bm128 = _mm_add_ps(_mm256_extractf128_ps(normB, 0), _mm256_extractf128_ps(normB, 1)); - __m128 s128 = _mm_add_ps(_mm256_extractf128_ps(sum, 0), _mm256_extractf128_ps(sum, 1)); -#else - __m128 am128 = _mm_setzero_ps(); - __m128 bm128 = _mm_setzero_ps(); - __m128 s128 = _mm_setzero_ps(); - __m128 am, bm; - while (a < last) { - am = _mm_loadu_ps(a); - bm = _mm_loadu_ps(b); - am128 = _mm_add_ps(am128, _mm_mul_ps(am, am)); - bm128 = _mm_add_ps(bm128, _mm_mul_ps(bm, bm)); - s128 = _mm_add_ps(s128, _mm_mul_ps(am, bm)); - a += 4; - b += 4; - } - -#endif - - __attribute__((aligned(32))) float f[4]; - _mm_store_ps(f, am128); - double na = f[0] + f[1] + f[2] + f[3]; - _mm_store_ps(f, bm128); - double nb = f[0] + f[1] + f[2] + f[3]; - _mm_store_ps(f, s128); - double s = f[0] + f[1] + f[2] + f[3]; - - double cosine = s / sqrt(na * nb); - return cosine; - } - - inline static double - compareCosine(const unsigned char* a, const unsigned char* b, size_t size) { - double normA = 0.0; - double normB = 0.0; - double sum = 0.0; - for (size_t loc = 0; loc < size; loc++) { - normA += (double)a[loc] * (double)a[loc]; - normB += (double)b[loc] * (double)b[loc]; - sum += (double)a[loc] * (double)b[loc]; - } - - double cosine = sum / sqrt(normA * normB); - - return cosine; - } -#endif // #if defined(NGT_NO_AVX) - - template - inline static double - compareAngleDistance(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - double cosine = compareCosine(a, b, size); - if (cosine >= 1.0) { - return 0.0; - } else if (cosine <= -1.0) { - return acos(-1.0); - } else { - return acos(cosine); - } - } - - template - inline static double - compareNormalizedAngleDistance(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - double cosine = compareDotProduct(a, b, size); - if (cosine >= 1.0) { - return 0.0; - } else if (cosine <= -1.0) { - return acos(-1.0); - } else { - return acos(cosine); - } - } - - template - inline static double - compareCosineSimilarity(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - return 1.0 - compareCosine(a, b, size); - } - - template - inline static double - compareNormalizedCosineSimilarity(const OBJECT_TYPE* a, const OBJECT_TYPE* b, size_t size) { - double v = 1.0 - compareDotProduct(a, b, size); - return v < 0.0 ? 0.0 : v; - } - - class L1Uint8 { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareL1((const uint8_t*)a, (const uint8_t*)b, size); - } - }; - - class L2Uint8 { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareL2((const uint8_t*)a, (const uint8_t*)b, size); - } - }; - - class HammingUint8 { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareHammingDistance((const uint8_t*)a, (const uint8_t*)b, size); - } - }; - - class JaccardUint8 { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareJaccardDistance((const uint8_t*)a, (const uint8_t*)b, size); - } - }; - - class SparseJaccardFloat { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareSparseJaccardDistance((const float*)a, (const float*)b, size); - } - }; - - class L2Float { - public: - inline static double - compare(const void* a, const void* b, size_t size) { -#if defined(NGT_NO_AVX) - return PrimitiveComparator::compareL2((const float*)a, (const float*)b, size); -#else - return PrimitiveComparator::compareL2((const float*)a, (const float*)b, size); -#endif - } - }; - - class L1Float { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareL1((const float*)a, (const float*)b, size); - } - }; - - class CosineSimilarityFloat { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareCosineSimilarity((const float*)a, (const float*)b, size); - } - }; - - class AngleFloat { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareAngleDistance((const float*)a, (const float*)b, size); - } - }; - - class NormalizedCosineSimilarityFloat { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareNormalizedCosineSimilarity((const float*)a, (const float*)b, size); - } - }; - - class NormalizedAngleFloat { - public: - inline static double - compare(const void* a, const void* b, size_t size) { - return PrimitiveComparator::compareNormalizedAngleDistance((const float*)a, (const float*)b, size); - } - }; -}; - -} // namespace NGT - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/SharedMemoryAllocator.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/SharedMemoryAllocator.cpp deleted file mode 100644 index af5d0efdee..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/SharedMemoryAllocator.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "NGT/SharedMemoryAllocator.h" - - - -void* operator -new(size_t size, SharedMemoryAllocator &allocator) -{ - void *addr = allocator.allocate(size); -#ifdef MEMORY_ALLOCATOR_INFO - std::cerr << "new:" << size << " " << addr << " " << allocator.getTotalSize() << std::endl; -#endif - return addr; -} - -void* operator -new[](size_t size, SharedMemoryAllocator &allocator) -{ - - void *addr = allocator.allocate(size); -#ifdef MEMORY_ALLOCATOR_INFO - std::cerr << "new[]:" << size << " " << addr << " " << allocator.getTotalSize() << std::endl; -#endif - return addr; -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/SharedMemoryAllocator.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/SharedMemoryAllocator.h deleted file mode 100644 index 907cf3868c..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/SharedMemoryAllocator.h +++ /dev/null @@ -1,209 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "NGT/defines.h" -#include "NGT/MmapManager.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#define MMAP_MANAGER - - - -/////////////////////////////////////////////////////////////////////// -class SharedMemoryAllocator { - public: - enum GetMemorySizeType { - GetTotalMemorySize = 0, - GetAllocatedMemorySize = 1, - GetFreedMemorySize = 2 - }; - - SharedMemoryAllocator():isValid(false) { -#ifdef SMA_TRACE - std::cerr << "SharedMemoryAllocatorSiglton::constructor" << std::endl; -#endif - } - SharedMemoryAllocator(const SharedMemoryAllocator& a){} - SharedMemoryAllocator& operator=(const SharedMemoryAllocator& a){ return *this; } - public: - void* allocate(size_t size) { - if (isValid == false) { - std::cerr << "SharedMemoryAllocator::allocate: Fatal error! " << std::endl; - assert(isValid); - } -#ifdef SMA_TRACE - std::cerr << "SharedMemoryAllocator::allocate: size=" << size << std::endl; - std::cerr << "SharedMemoryAllocator::allocate: before " << getTotalSize() << ":" << getAllocatedSize() << ":" << getFreedSize() << std::endl; -#endif -#if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) - if(!isValid){ - return NULL; - } - off_t file_offset = mmanager->alloc(size, true); - if (file_offset == -1) { - std::cerr << "Fatal Error: Allocating memory size is too big for this settings." << std::endl; - std::cerr << " Max allocation size should be enlarged." << std::endl; - abort(); - } - void *p = mmanager->getAbsAddr(file_offset); - std::memset(p, 0, size); -#ifdef SMA_TRACE - std::cerr << "SharedMemoryAllocator::allocate: end" <init(filePath, size, &option)){ -#ifdef SMA_TRACE - std::cerr << "SMA: info. already existed." << std::endl; -#endif - create = false; - } else { -#ifdef SMA_TRACE - std::cerr << "SMA::construct: msize=" << msize << ":" << memorysize << std::endl; -#endif - } - if(!mmanager->openMemory(filePath)){ - std::cerr << "SMA: open error" << std::endl; - return 0; - } - if (!create) { -#ifdef SMA_TRACE - std::cerr << "SMA: get hook to initialize data structure" << std::endl; -#endif - hook = mmanager->getEntryHook(); - assert(hook != 0); - } -#endif - isValid = true; -#ifdef SMA_TRACE - std::cerr << "SharedMemoryAllocator::construct: " << filePath << " total=" - << getTotalSize() << " allocated=" << getAllocatedSize() << " freed=" - << getFreedSize() << " (" << (double)getFreedSize() / (double)getTotalSize() << ") " << std::endl; -#endif - return hook; - } - void destruct() { - if (!isValid) { - return; - } - isValid = false; -#ifdef MMAP_MANAGER - mmanager->closeMemory(); - delete mmanager; -#endif - }; - void setEntry(void *entry) { -#ifdef MMAP_MANAGER - mmanager->setEntryHook(entry); -#endif - } - void *getAddr(off_t oft) { - if (oft == 0) { - return 0; - } - assert(oft > 0); -#if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) - return mmanager->getAbsAddr(oft); -#else - return (void*)oft; -#endif - } - off_t getOffset(void *adr) { - if (adr == 0) { - return 0; - } -#if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) - return mmanager->getRelAddr(adr); -#else - return (off_t)adr; -#endif - } - size_t getMemorySize(GetMemorySizeType t) { - switch (t) { - case GetTotalMemorySize : return getTotalSize(); - case GetAllocatedMemorySize : return getAllocatedSize(); - case GetFreedMemorySize : return getFreedSize(); - } - return getTotalSize(); - } - size_t getTotalSize() { return mmanager->getTotalSize(); } - size_t getAllocatedSize() { return mmanager->getUseSize(); } - size_t getFreedSize() { return mmanager->getFreeSize(); } - - bool isValid; - std::string file; -#ifdef MMAP_MANAGER - MemoryManager::MmapManager *mmanager; -#endif -}; - -///////////////////////////////////////////////////////////////////////// - -void* operator new(size_t size, SharedMemoryAllocator &allocator); -void* operator new[](size_t size, SharedMemoryAllocator &allocator); diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Thread.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Thread.cpp deleted file mode 100644 index 93b23733ff..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Thread.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include - -#include "Thread.h" - -using namespace std; -using namespace NGT; - -namespace NGT { -class ThreadInfo { - public: - pthread_t threadid; - pthread_attr_t threadAttr; -}; - -class ThreadMutex { - public: - pthread_mutex_t mutex; - pthread_cond_t condition; -}; -} - -Thread::Thread() { - threadInfo = new ThreadInfo; - threadInfo->threadid = 0; - threadNo = -1; - isTerminate = false; -} - -Thread::~Thread() { - if (threadInfo != 0) { - delete threadInfo; - } -} - -ThreadMutex * -Thread::constructThreadMutex() -{ - return new ThreadMutex; -} - -void -Thread::destructThreadMutex(ThreadMutex *t) -{ - if (t != 0) { - pthread_mutex_destroy(&(t->mutex)); - pthread_cond_destroy(&(t->condition)); - delete t; - } -} - -int -Thread::start() -{ - pthread_attr_init(&(threadInfo->threadAttr)); - size_t stackSize = 0; - pthread_attr_getstacksize(&(threadInfo->threadAttr), &stackSize); - if (stackSize < 0xa00000) { // 64bit stack size - stackSize *= 4; - } - pthread_attr_setstacksize(&(threadInfo->threadAttr), stackSize); - pthread_attr_getstacksize(&(threadInfo->threadAttr), &stackSize); - return pthread_create(&(threadInfo->threadid), &(threadInfo->threadAttr), Thread::startThread, this); - -} - -int -Thread::join() -{ - return pthread_join(threadInfo->threadid, 0); -} - -void -Thread::lock(ThreadMutex &m) -{ - pthread_mutex_lock(&m.mutex); -} -void -Thread::unlock(ThreadMutex &m) -{ - pthread_mutex_unlock(&m.mutex); -} -void -Thread::signal(ThreadMutex &m) -{ - pthread_cond_signal(&m.condition); -} - -void -Thread::wait(ThreadMutex &m) -{ - if (pthread_cond_wait(&m.condition, &m.mutex) != 0) { - cerr << "waitForSignalFromThread: internal error" << endl; - NGTThrowException("waitForSignalFromThread: internal error"); - } -} - -void -Thread::broadcast(ThreadMutex &m) -{ - pthread_cond_broadcast(&m.condition); -} - -void -Thread::mutexInit(ThreadMutex &m) -{ - if (pthread_mutex_init(&m.mutex, NULL) != 0) { - NGTThrowException("Thread::mutexInit: Cannot initialize mutex"); - } - if (pthread_cond_init(&m.condition, NULL) != 0) { - NGTThrowException("Thread::mutexInit: Cannot initialize condition"); - } -} diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Thread.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Thread.h deleted file mode 100644 index 45208a98c3..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Thread.h +++ /dev/null @@ -1,291 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "NGT/Common.h" - -#include -#include -#include -#include - -#include -#include - -namespace NGT { -void * evaluate_responce(void *); - -class ThreadTerminationException : public Exception { - public: - ThreadTerminationException(const std::string &file, size_t line, std::stringstream &m) { set(file, line, m.str()); } - ThreadTerminationException(const std::string &file, size_t line, const std::string &m) { set(file, line, m); } -}; - -class ThreadInfo; -class ThreadMutex; - -class Thread -{ - public: - Thread(); - - virtual ~Thread(); - virtual int start(); - - virtual int join(); - - static ThreadMutex *constructThreadMutex(); - static void destructThreadMutex(ThreadMutex *t); - - static void mutexInit(ThreadMutex &m); - - static void lock(ThreadMutex &m); - static void unlock(ThreadMutex &m); - static void signal(ThreadMutex &m); - static void wait(ThreadMutex &m); - static void broadcast(ThreadMutex &m); - - protected: - virtual int run() { - return 0; - } - - private: - static void* startThread(void *thread) { - if (thread == 0) { - return 0; - } - Thread* p = (Thread*)thread; - p->run(); - return thread; - } - - public: - int threadNo; - bool isTerminate; - - protected: - ThreadInfo *threadInfo; -}; - -template -class ThreadPool { - public: - class JobQueue : public std::deque { - public: - JobQueue() { - threadMutex = Thread::constructThreadMutex(); - Thread::mutexInit(*threadMutex); - } - ~JobQueue() { - Thread::destructThreadMutex(threadMutex); - } - bool isDeficient() { return std::deque::size() <= requestSize; } - bool isEmpty() { return std::deque::size() == 0; } - bool isFull() { return std::deque::size() >= maxSize; } - void setRequestSize(int s) { requestSize = s; } - void setMaxSize(int s) { maxSize = s; } - void lock() { Thread::lock(*threadMutex); } - void unlock() { Thread::unlock(*threadMutex); } - void signal() { Thread::signal(*threadMutex); } - void wait() { Thread::wait(*threadMutex); } - void wait(JobQueue &q) { wait(*q.threadMutex); } - void broadcast() { Thread::broadcast(*threadMutex); } - unsigned int requestSize; - unsigned int maxSize; - ThreadMutex *threadMutex; - }; - class InputJobQueue : public JobQueue { - public: - InputJobQueue() { - isTerminate = false; - underPushing = false; - pushedSize = 0; - } - - void popFront(JOB &d) { - JobQueue::lock(); - while (JobQueue::isEmpty()) { - if (isTerminate) { - JobQueue::unlock(); - NGTThrowSpecificException("Thread::termination", ThreadTerminationException); - } - JobQueue::wait(); - } - d = std::deque::front(); - std::deque::pop_front(); - JobQueue::unlock(); - return; - } - - void popFront(std::deque &d, size_t s) { - JobQueue::lock(); - while (JobQueue::isEmpty()) { - if (isTerminate) { - JobQueue::unlock(); - NGTThrowSpecificException("Thread::termination", ThreadTerminationException); - } - JobQueue::wait(); - } - for (size_t i = 0; i < s; i++) { - d.push_back(std::deque::front()); - std::deque::pop_front(); - if (JobQueue::isEmpty()) { - break; - } - } - JobQueue::unlock(); - return; - } - - void pushBack(JOB &data) { - JobQueue::lock(); - if (!underPushing) { - underPushing = true; - pushedSize = 0; - } - pushedSize++; - std::deque::push_back(data); - JobQueue::unlock(); - JobQueue::signal(); - } - - void pushBackEnd() { - underPushing = false; - } - - void terminate() { - JobQueue::lock(); - if (underPushing || !JobQueue::isEmpty()) { - JobQueue::unlock(); - NGTThrowException("Thread::teminate:Under pushing!"); - } - isTerminate = true; - JobQueue::unlock(); - JobQueue::broadcast(); - } - - bool isTerminate; - bool underPushing; - size_t pushedSize; - - }; - - class OutputJobQueue : public JobQueue { - public: - void waitForFull() { - JobQueue::wait(); - JobQueue::unlock(); - } - - void pushBack(JOB &data) { - JobQueue::lock(); - std::deque::push_back(data); - if (!JobQueue::isFull()) { - JobQueue::unlock(); - return; - } - JobQueue::unlock(); - JobQueue::signal(); - } - - }; - - class SharedData { - public: - SharedData():isAvailable(false) { - inputJobs.requestSize = 5; - inputJobs.maxSize = 50; - } - SHARED_DATA sharedData; - InputJobQueue inputJobs; - OutputJobQueue outputJobs; - bool isAvailable; - }; - - class Thread : public THREAD { - public: - SHARED_DATA &getSharedData() { - if (threadPool->sharedData.isAvailable) { - return threadPool->sharedData.sharedData; - } else { - NGTThrowException("Thread::getSharedData: Shared data is unavailable. No set yet."); - } - } - InputJobQueue &getInputJobQueue() { - return threadPool->sharedData.inputJobs; - } - OutputJobQueue &getOutputJobQueue() { - return threadPool->sharedData.outputJobs; - } - ThreadPool *threadPool; - }; - - ThreadPool(int s) { - size = s; - threads = new Thread[s]; - } - - ~ThreadPool() { - delete[] threads; - } - - void setSharedData(SHARED_DATA d) { - sharedData.sharedData = d; - sharedData.isAvailable = true; - } - - void create() { - for (unsigned int i = 0; i < size; i++) { - threads[i].threadPool = this; - threads[i].threadNo = i; - threads[i].start(); - } - } - - void pushInputQueue(JOB &data) { - if (!sharedData.inputJobs.underPushing) { - sharedData.outputJobs.lock(); - } - sharedData.inputJobs.pushBack(data); - } - - void waitForFinish() { - sharedData.inputJobs.pushBackEnd(); - sharedData.outputJobs.setMaxSize(sharedData.inputJobs.pushedSize); - sharedData.inputJobs.pushedSize = 0; - sharedData.outputJobs.waitForFull(); - } - - void terminate() { - sharedData.inputJobs.terminate(); - for (unsigned int i = 0; i < size; i++) { - threads[i].join(); - } - } - - InputJobQueue &getInputJobQueue() { return sharedData.inputJobs; } - OutputJobQueue &getOutputJobQueue() { return sharedData.outputJobs; } - - SharedData sharedData; // shared data - Thread *threads; // thread set - unsigned int size; // thread size - -}; - -} - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Tree.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Tree.cpp deleted file mode 100644 index 89547fe03d..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Tree.cpp +++ /dev/null @@ -1,564 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "NGT/defines.h" - -#include "NGT/Tree.h" -#include "NGT/Node.h" - -#include - -using namespace std; -using namespace NGT; - -void -DVPTree::insert(InsertContainer &iobj) { - SearchContainer q(iobj.object); - q.mode = SearchContainer::SearchLeaf; - q.vptree = this; - q.radius = 0.0; - - search(q); - - iobj.vptree = this; - - assert(q.nodeID.getType() == Node::ID::Leaf); - LeafNode *ln = (LeafNode*)getNode(q.nodeID); - insert(iobj, ln); - - return; -} - -void -DVPTree::insert(InsertContainer &iobj, LeafNode *leafNode) -{ - LeafNode &leaf = *leafNode; - size_t fsize = leaf.getObjectSize(); - if (fsize != 0) { - NGT::ObjectSpace::Comparator &comparator = objectSpace->getComparator(); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - Distance d = comparator(iobj.object, leaf.getPivot(*objectSpace)); -#else - Distance d = comparator(iobj.object, leaf.getPivot()); -#endif - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::ObjectDistance *objects = leaf.getObjectIDs(leafNodes.allocator); -#else - NGT::ObjectDistance *objects = leaf.getObjectIDs(); -#endif - - for (size_t i = 0; i < fsize; i++) { - if (objects[i].distance == d) { - Distance idd = 0.0; - ObjectID loid; - try { - loid = objects[i].id; - idd = comparator(iobj.object, *getObjectRepository().get(loid)); - } catch (Exception &e) { - stringstream msg; - msg << "LeafNode::insert: Cannot find object which belongs to a leaf node. id=" - << objects[i].id << ":" << e.what() << endl; - NGTThrowException(msg.str()); - } - if (idd == 0.0) { - if (loid == iobj.id) { - stringstream msg; - msg << "DVPTree::insert:already existed. " << iobj.id; - NGTThrowException(msg); - } - return; - } - } - } - } - - if (leaf.getObjectSize() >= leafObjectsSize) { - split(iobj, leaf); - } else { - insertObject(iobj, leaf); - } - - return; -} -Node::ID -DVPTree::split(InsertContainer &iobj, LeafNode &leaf) -{ - Node::Objects *fs = getObjects(leaf, iobj); - int pv = DVPTree::MaxVariance; - switch (splitMode) { - case DVPTree::MaxVariance: - pv = LeafNode::selectPivotByMaxVariance(iobj, *fs); - break; - case DVPTree::MaxDistance: - pv = LeafNode::selectPivotByMaxDistance(iobj, *fs); - break; - } - - LeafNode::splitObjects(iobj, *fs, pv); - - Node::ID nid = recombineNodes(iobj, *fs, leaf); - delete fs; - - return nid; -} - -Node::ID -DVPTree::recombineNodes(InsertContainer &ic, Node::Objects &fs, LeafNode &leaf) -{ - LeafNode *ln[internalChildrenSize]; - Node::ID targetParent = leaf.parent; - Node::ID targetId = leaf.id; - ln[0] = &leaf; - ln[0]->objectSize = 0; -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - for (size_t i = 1; i < internalChildrenSize; i++) { - ln[i] = new(leafNodes.allocator) LeafNode(leafNodes.allocator); - } -#else - for (size_t i = 1; i < internalChildrenSize; i++) { - ln[i] = new LeafNode; - } -#endif - InternalNode *in = createInternalNode(); - Node::ID inid = in->id; - try { - if (targetParent.getID() != 0) { - InternalNode &pnode = *(InternalNode*)getNode(targetParent); - for (size_t i = 0; i < internalChildrenSize; i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (pnode.getChildren(internalNodes.allocator)[i] == targetId) { - pnode.getChildren(internalNodes.allocator)[i] = inid; -#else - if (pnode.getChildren()[i] == targetId) { - pnode.getChildren()[i] = inid; -#endif - break; - } - } - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - in->setPivot(*getObjectRepository().get(fs[0].id), *objectSpace, internalNodes.allocator); -#else - in->setPivot(*getObjectRepository().get(fs[0].id), *objectSpace); -#endif - - in->parent = targetParent; - - int fsize = fs.size(); - int cid = fs[0].clusterID; -#ifdef NGT_NODE_USE_VECTOR - LeafNode::ObjectIDs fid; - fid.id = fs[0].id; - fid.distance = 0.0; - ln[cid]->objectIDs.push_back(fid); -#else -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ln[cid]->getObjectIDs(leafNodes.allocator)[ln[cid]->objectSize].id = fs[0].id; - ln[cid]->getObjectIDs(leafNodes.allocator)[ln[cid]->objectSize++].distance = 0.0; -#else - ln[cid]->getObjectIDs()[ln[cid]->objectSize].id = fs[0].id; - ln[cid]->getObjectIDs()[ln[cid]->objectSize++].distance = 0.0; -#endif -#endif - if (fs[0].leafDistance == Node::Object::Pivot) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ln[cid]->setPivot(*getObjectRepository().get(fs[0].id), *objectSpace, leafNodes.allocator); -#else - ln[cid]->setPivot(*getObjectRepository().get(fs[0].id), *objectSpace); -#endif - } else { - NGTThrowException("recombineNodes: internal error : illegal pivot."); - } - ln[cid]->parent = inid; - int maxClusterID = cid; - for (int i = 1; i < fsize; i++) { - int clusterID = fs[i].clusterID; - if (clusterID > maxClusterID) { - maxClusterID = clusterID; - } - Distance ld; - if (fs[i].leafDistance == Node::Object::Pivot) { - // pivot -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ln[clusterID]->setPivot(*getObjectRepository().get(fs[i].id), *objectSpace, leafNodes.allocator); -#else - ln[clusterID]->setPivot(*getObjectRepository().get(fs[i].id), *objectSpace); -#endif - ld = 0.0; - } else { - ld = fs[i].leafDistance; - } - -#ifdef NGT_NODE_USE_VECTOR - fid.id = fs[i].id; - fid.distance = ld; - ln[clusterID]->objectIDs.push_back(fid); -#else -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ln[clusterID]->getObjectIDs(leafNodes.allocator)[ln[clusterID]->objectSize].id = fs[i].id; - ln[clusterID]->getObjectIDs(leafNodes.allocator)[ln[clusterID]->objectSize++].distance = ld; -#else - ln[clusterID]->getObjectIDs()[ln[clusterID]->objectSize].id = fs[i].id; - ln[clusterID]->getObjectIDs()[ln[clusterID]->objectSize++].distance = ld; -#endif -#endif - ln[clusterID]->parent = inid; - if (clusterID != cid) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - in->getBorders(internalNodes.allocator)[cid] = fs[i].distance; -#else - in->getBorders()[cid] = fs[i].distance; -#endif - cid = fs[i].clusterID; - } - } - // When the number of the children is less than the expected, - // proper values are set to the empty children. - for (size_t i = maxClusterID + 1; i < internalChildrenSize; i++) { - ln[i]->parent = inid; - // dummy -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ln[i]->setPivot(*getObjectRepository().get(fs[0].id), *objectSpace, leafNodes.allocator); -#else - ln[i]->setPivot(*getObjectRepository().get(fs[0].id), *objectSpace); -#endif - if (i < (internalChildrenSize - 1)) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - in->getBorders(internalNodes.allocator)[i] = FLT_MAX; -#else - in->getBorders()[i] = FLT_MAX; -#endif - } - } - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - in->getChildren(internalNodes.allocator)[0] = targetId; -#else - in->getChildren()[0] = targetId; -#endif - for (size_t i = 1; i < internalChildrenSize; i++) { - insertNode(ln[i]); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - in->getChildren(internalNodes.allocator)[i] = ln[i]->id; -#else - in->getChildren()[i] = ln[i]->id; -#endif - } - } catch(Exception &e) { - throw e; - } - return inid; -} - -void -DVPTree::insertObject(InsertContainer &ic, LeafNode &leaf) { - if (leaf.getObjectSize() == 0) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - leaf.setPivot(*getObjectRepository().get(ic.id), *objectSpace, leafNodes.allocator); -#else - leaf.setPivot(*getObjectRepository().get(ic.id), *objectSpace); -#endif -#ifdef NGT_NODE_USE_VECTOR - LeafNode::ObjectIDs fid; - fid.id = ic.id; - fid.distance = 0; - leaf.objectIDs.push_back(fid); -#else -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - leaf.getObjectIDs(leafNodes.allocator)[leaf.objectSize].id = ic.id; - leaf.getObjectIDs(leafNodes.allocator)[leaf.objectSize++].distance = 0; -#else - leaf.getObjectIDs()[leaf.objectSize].id = ic.id; - leaf.getObjectIDs()[leaf.objectSize++].distance = 0; -#endif -#endif - } else { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - Distance d = objectSpace->getComparator()(ic.object, leaf.getPivot(*objectSpace)); -#else - Distance d = objectSpace->getComparator()(ic.object, leaf.getPivot()); -#endif - -#ifdef NGT_NODE_USE_VECTOR - LeafNode::ObjectIDs fid; - fid.id = ic.id; - fid.distance = d; - leaf.objectIDs.push_back(fid); - std::sort(leaf.objectIDs.begin(), leaf.objectIDs.end(), LeafNode::ObjectIDs()); -#else -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - leaf.getObjectIDs(leafNodes.allocator)[leaf.objectSize].id = ic.id; - leaf.getObjectIDs(leafNodes.allocator)[leaf.objectSize++].distance = d; -#else - leaf.getObjectIDs()[leaf.objectSize].id = ic.id; - leaf.getObjectIDs()[leaf.objectSize++].distance = d; -#endif -#endif - } -} - -Node::Objects * -DVPTree::getObjects(LeafNode &n, Container &iobj) -{ - int size = n.getObjectSize() + 1; - - Node::Objects *fs = new Node::Objects(size); - for (size_t i = 0; i < n.getObjectSize(); i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - (*fs)[i].object = getObjectRepository().get(n.getObjectIDs(leafNodes.allocator)[i].id); - (*fs)[i].id = n.getObjectIDs(leafNodes.allocator)[i].id; -#else - (*fs)[i].object = getObjectRepository().get(n.getObjectIDs()[i].id); - (*fs)[i].id = n.getObjectIDs()[i].id; -#endif - } -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - (*fs)[n.getObjectSize()].object = getObjectRepository().get(iobj.id); -#else - (*fs)[n.getObjectSize()].object = &iobj.object; -#endif - (*fs)[n.getObjectSize()].id = iobj.id; - return fs; -} - -void -DVPTree::removeEmptyNodes(InternalNode &inode) { - - int csize = internalChildrenSize; - - - InternalNode *target = &inode; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - Node::ID *children = target->getChildren(internalNodes.allocator); -#else - Node::ID *children = target->getChildren(); -#endif - for(;;) { - for (int i = 0; i < csize; i++) { - if (children[i].getType() == Node::ID::Internal) { - return; - } - LeafNode &ln = *static_cast(getNode(children[i])); - if (ln.getObjectSize() != 0) { - return; - } - } - - for (int i = 0; i < csize; i++) { - removeNode(children[i]); - } - if (target->parent.getID() == 0) { - removeNode(target->id); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - LeafNode *root = new(leafNodes.allocator) LeafNode(leafNodes.allocator); -#else - LeafNode *root = new LeafNode; -#endif - insertNode(root); - if (root->id.getID() != 1) { - NGTThrowException("Root id Error"); - } - return; - } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - LeafNode *ln = new(leafNodes.allocator) LeafNode(leafNodes.allocator); -#else - LeafNode *ln = new LeafNode; -#endif - ln->parent = target->parent; - insertNode(ln); - - InternalNode &in = *(InternalNode*)getNode(ln->parent); -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - in.updateChild(*this, target->id, ln->id, internalNodes.allocator); -#else - in.updateChild(*this, target->id, ln->id); -#endif - removeNode(target->id); - target = ∈ - } - - return; -} - - -void -DVPTree::search(SearchContainer &sc, InternalNode &node, UncheckedNode &uncheckedNode) -{ -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - Distance d = objectSpace->getComparator()(sc.object, node.getPivot(*objectSpace)); -#else - Distance d = objectSpace->getComparator()(sc.object, node.getPivot()); -#endif -#ifdef NGT_DISTANCE_COMPUTATION_COUNT - sc.distanceComputationCount++; -#endif - - int bsize = internalChildrenSize - 1; - - vector regions; - regions.reserve(internalChildrenSize); - - ObjectDistance child; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - Distance *borders = node.getBorders(internalNodes.allocator); -#else - Distance *borders = node.getBorders(); -#endif - int mid; - for (mid = 0; mid < bsize; mid++) { - if (d < borders[mid]) { - child.id = mid; - child.distance = 0.0; - regions.push_back(child); - if (d + sc.radius < borders[mid]) { - break; - } else { - continue; - } - } else { - if (d < borders[mid] + sc.radius) { - child.id = mid; - child.distance = d - borders[mid]; - regions.push_back(child); - continue; - } else { - continue; - } - } - } - - if (mid == bsize) { - if (d >= borders[mid - 1]) { - child.id = mid; - child.distance = 0.0; - regions.push_back(child); - } else { - child.id = mid; - child.distance = borders[mid - 1] - d; - regions.push_back(child); - } - } - - sort(regions.begin(), regions.end()); - -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - Node::ID *children = node.getChildren(internalNodes.allocator); -#else - Node::ID *children = node.getChildren(); -#endif - - vector::iterator i; - if (sc.mode == DVPTree::SearchContainer::SearchLeaf) { - if (children[regions.front().id].getType() == Node::ID::Leaf) { - sc.nodeID.setRaw(children[regions.front().id].get()); - assert(uncheckedNode.empty()); - } else { - uncheckedNode.push(children[regions.front().id]); - } - } else { - for (i = regions.begin(); i != regions.end(); i++) { - uncheckedNode.push(children[i->id]); - } - } - -} - -void -DVPTree::search(SearchContainer &so, LeafNode &node, UncheckedNode &uncheckedNode) -{ - DVPTree::SearchContainer &q = (DVPTree::SearchContainer&)so; - - if (node.getObjectSize() == 0) { - return; - } -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - Distance pq = objectSpace->getComparator()(q.object, node.getPivot(*objectSpace)); -#else - Distance pq = objectSpace->getComparator()(q.object, node.getPivot()); -#endif -#ifdef NGT_DISTANCE_COMPUTATION_COUNT - so.distanceComputationCount++; -#endif - - ObjectDistance r; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - NGT::ObjectDistance *objects = node.getObjectIDs(leafNodes.allocator); -#else - NGT::ObjectDistance *objects = node.getObjectIDs(); -#endif - - for (size_t i = 0; i < node.getObjectSize(); i++) { - if ((objects[i].distance <= pq + q.radius) && - (objects[i].distance >= pq - q.radius)) { - Distance d = 0; - try { - d = objectSpace->getComparator()(q.object, *q.vptree->getObjectRepository().get(objects[i].id)); -#ifdef NGT_DISTANCE_COMPUTATION_COUNT - so.distanceComputationCount++; -#endif - } catch(...) { - NGTThrowException("VpTree::LeafNode::search: Internal fatal error : Cannot get object"); - } - if (d <= q.radius) { - r.id = objects[i].id; - r.distance = d; - so.getResult().push_back(r); - std::sort(so.getResult().begin(), so.getResult().end()); - if (so.getResult().size() > q.size) { - so.getResult().resize(q.size); - } - } - } - } -} - -void -DVPTree::search(SearchContainer &sc) { - ((SearchContainer&)sc).vptree = this; - Node *root = getRootNode(); - assert(root != 0); - if (sc.mode == DVPTree::SearchContainer::SearchLeaf) { - if (root->id.getType() == Node::ID::Leaf) { - sc.nodeID.setRaw(root->id.get()); - return; - } - } - - UncheckedNode uncheckedNode; - uncheckedNode.push(root->id); - - while (!uncheckedNode.empty()) { - Node::ID nodeid = uncheckedNode.top(); - uncheckedNode.pop(); - Node *cnode = getNode(nodeid); - if (cnode == 0) { - cerr << "Error! child node is null. but continue." << endl; - continue; - } - if (cnode->id.getType() == Node::ID::Internal) { - search(sc, (InternalNode&)*cnode, uncheckedNode); - } else if (cnode->id.getType() == Node::ID::Leaf) { - search(sc, (LeafNode&)*cnode, uncheckedNode); - } else { - cerr << "Tree: Inner fatal error!: Node type error!" << endl; - abort(); - } - } -} - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Tree.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Tree.h deleted file mode 100644 index 4c4f0b0976..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Tree.h +++ /dev/null @@ -1,513 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include "NGT/Common.h" -#include "NGT/Node.h" -#include "NGT/defines.h" -#include "faiss/utils/BitsetView.h" - -#include -#include -#include -#include -#include - -namespace NGT { - - class DVPTree { - - public: - enum SplitMode { - MaxDistance = 0, - MaxVariance = 1 - }; - - typedef std::vector IDVector; - - class Container : public NGT::Container { - public: - Container(Object &f, ObjectID i):NGT::Container(f, i) {} - DVPTree *vptree; - }; - - class SearchContainer : public NGT::SearchContainer { - public: - enum Mode { - SearchLeaf = 0, - SearchObject = 1 - }; - - SearchContainer(Object &f, ObjectID i):NGT::SearchContainer(f, i) {} - SearchContainer(Object &f):NGT::SearchContainer(f, 0) {} - - DVPTree *vptree; - - Mode mode; - Node::ID nodeID; - }; - class InsertContainer : public Container { - public: - InsertContainer(Object &f, ObjectID i):Container(f, i) {} - }; - - class RemoveContainer : public Container { - public: - RemoveContainer(Object &f, ObjectID i):Container(f, i) {} - }; - - DVPTree() { - leafObjectsSize = LeafNode::LeafObjectsSizeMax; - internalChildrenSize = InternalNode::InternalChildrenSizeMax; - splitMode = MaxVariance; -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - insertNode(new LeafNode); -#endif - } - - virtual ~DVPTree() { -#ifndef NGT_SHARED_MEMORY_ALLOCATOR - deleteAll(); -#endif - } - - void deleteAll() { - for (size_t i = 0; i < leafNodes.size(); i++) { - if (leafNodes[i] != 0) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - leafNodes[i]->deletePivot(*objectSpace, leafNodes.allocator); -#else - leafNodes[i]->deletePivot(*objectSpace); -#endif - delete leafNodes[i]; - } - } - leafNodes.clear(); - for (size_t i = 0; i < internalNodes.size(); i++) { - if (internalNodes[i] != 0) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - internalNodes[i]->deletePivot(*objectSpace, internalNodes.allocator); -#else - internalNodes[i]->deletePivot(*objectSpace); -#endif - delete internalNodes[i]; - } - } - internalNodes.clear(); - } - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - void open(const std::string &f, size_t sharedMemorySize) { - // If no file, then create a new file. - leafNodes.open(f + "l", sharedMemorySize); - internalNodes.open(f + "i", sharedMemorySize); - if (leafNodes.size() == 0) { - if (internalNodes.size() != 0) { - NGTThrowException("Tree::Open: Internal error. Internal and leaf are inconsistent."); - } - LeafNode *ln = leafNodes.allocate(); - insertNode(ln); - } - } -#endif // NGT_SHARED_MEMORY_ALLOCATOR - - void insert(InsertContainer &iobj); - - void insert(InsertContainer &iobj, LeafNode *n); - - Node::ID split(InsertContainer &iobj, LeafNode &leaf); - - Node::ID recombineNodes(InsertContainer &ic, Node::Objects &fs, LeafNode &leaf); - - void insertObject(InsertContainer &obj, LeafNode &leaf); - - typedef std::stack UncheckedNode; - - void search(SearchContainer &so); - void search(SearchContainer &so, InternalNode &node, UncheckedNode &uncheckedNode); - void search(SearchContainer &so, LeafNode &node, UncheckedNode &uncheckedNode); - - bool searchObject(ObjectID id) { - LeafNode &ln = getLeaf(id); - for (size_t i = 0; i < ln.getObjectSize(); i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - if (ln.getObjectIDs(leafNodes.allocator)[i].id == id) { -#else - if (ln.getObjectIDs()[i].id == id) { -#endif - return true; - } - } - return false; - } - - LeafNode &getLeaf(ObjectID id) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - Object *qobject = objectSpace->allocateObject(*getObjectRepository().get(id)); - SearchContainer q(*qobject); -#else - SearchContainer q(*getObjectRepository().get(id)); -#endif - q.mode = SearchContainer::SearchLeaf; - q.vptree = this; - q.radius = 0.0; - q.size = 1; - - search(q); - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - objectSpace->deleteObject(qobject); -#endif - - return *(LeafNode*)getNode(q.nodeID); - - } - - void replace(ObjectID id, ObjectID replacedId) { remove(id, replacedId); } - - // remove the specified object. - void remove(ObjectID id, ObjectID replaceId = 0) { - LeafNode &ln = getLeaf(id); - try { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - ln.removeObject(id, replaceId, leafNodes.allocator); -#else - ln.removeObject(id, replaceId); -#endif - } catch(Exception &err) { - std::stringstream msg; - msg << "VpTree::remove: Inner error. Cannot remove object. leafNode=" << ln.id.getID() << ":" << err.what(); - NGTThrowException(msg); - } - if (ln.getObjectSize() == 0) { - if (ln.parent.getID() != 0) { - InternalNode &inode = *(InternalNode*)getNode(ln.parent); - removeEmptyNodes(inode); - } - } - - return; - } - - void removeNaively(ObjectID id, ObjectID replaceId = 0) { - for (size_t i = 0; i < leafNodes.size(); i++) { - if (leafNodes[i] != 0) { - try { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - leafNodes[i]->removeObject(id, replaceId, leafNodes.allocator); -#else - leafNodes[i]->removeObject(id, replaceId); -#endif - break; - } catch(...) {} - } - } - } - - Node *getRootNode() { - size_t nid = 1; - Node *root; - try { - root = internalNodes.get(nid); - } catch(Exception &err) { - try { - root = leafNodes.get(nid); - } catch(Exception &e) { - std::stringstream msg; - msg << "VpTree::getRootNode: Inner error. Cannot get a leaf root node. " << nid << ":" << e.what(); - NGTThrowException(msg); - } - } - - return root; - } - - InternalNode *createInternalNode() { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - InternalNode *n = new(internalNodes.allocator) InternalNode(internalChildrenSize, internalNodes.allocator); -#else - InternalNode *n = new InternalNode(internalChildrenSize); -#endif - insertNode(n); - return n; - } - - void - removeNode(Node::ID id) { - size_t idx = id.getID(); - if (id.getType() == Node::ID::Leaf) { - leafNodes.remove(idx); - } else { - internalNodes.remove(idx); - } - } - - void removeEmptyNodes(InternalNode &node); - - Node::Objects * getObjects(LeafNode &n, Container &iobj); - - // for milvus - void - getObjectIDsFromLeaf(Node::ID nid, ObjectDistances& rl, const faiss::BitsetView bitset) { - LeafNode& ln = *(LeafNode*)getNode(nid); - rl.clear(); - ObjectDistance r; - for (size_t i = 0; i < ln.getObjectSize(); i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - r.id = ln.getObjectIDs(leafNodes.allocator)[i].id; - r.distance = ln.getObjectIDs(leafNodes.allocator)[i].distance; -#else - r.id = ln.getObjectIDs()[i].id; - r.distance = ln.getObjectIDs()[i].distance; -#endif - if (!bitset.empty() && bitset.test(r.id - 1)) { - continue; - } - rl.push_back(r); - } - return; - } - void getObjectIDsFromLeaf(Node::ID nid, ObjectDistances &rl) { - LeafNode &ln = *(LeafNode*)getNode(nid); - rl.clear(); - ObjectDistance r; - for (size_t i = 0; i < ln.getObjectSize(); i++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - r.id = ln.getObjectIDs(leafNodes.allocator)[i].id; - r.distance = ln.getObjectIDs(leafNodes.allocator)[i].distance; -#else - r.id = ln.getObjectIDs()[i].id; - r.distance = ln.getObjectIDs()[i].distance; -#endif - rl.push_back(r); - } - return; - } - void - insertNode(LeafNode *n) { - size_t id = leafNodes.insert(n); - n->id.setID(id); - n->id.setType(Node::ID::Leaf); - } - - // replace - void replaceNode(LeafNode *n) { - int id = n->id.getID(); -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - leafNodes.set(id, n); -#else - leafNodes[id] = n; -#endif - } - - void - insertNode(InternalNode *n) - { - size_t id = internalNodes.insert(n); - n->id.setID(id); - n->id.setType(Node::ID::Internal); - } - - Node *getNode(Node::ID &id) { - Node *n = 0; - Node::NodeID idx = id.getID(); - if (id.getType() == Node::ID::Leaf) { - n = leafNodes.get(idx); - } else { - n = internalNodes.get(idx); - } - return n; - } - - void getAllLeafNodeIDs(std::vector &leafIDs) { - leafIDs.clear(); - Node *root = getRootNode(); - if (root->id.getType() == Node::ID::Leaf) { - leafIDs.push_back(root->id); - return; - } - UncheckedNode uncheckedNode; - uncheckedNode.push(root->id); - while (!uncheckedNode.empty()) { - Node::ID nodeid = uncheckedNode.top(); - uncheckedNode.pop(); - Node *cnode = getNode(nodeid); - if (cnode->id.getType() == Node::ID::Internal) { - InternalNode &inode = static_cast(*cnode); - for (size_t ci = 0; ci < internalChildrenSize; ci++) { -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - uncheckedNode.push(inode.getChildren(internalNodes.allocator)[ci]); -#else - uncheckedNode.push(inode.getChildren()[ci]); -#endif - } - } else if (cnode->id.getType() == Node::ID::Leaf) { - leafIDs.push_back(static_cast(*cnode).id); - } else { - std::cerr << "Tree: Inner fatal error!: Node type error!" << std::endl; - abort(); - } - } - } - - // for milvus - void serialize(std::stringstream & os) - { - leafNodes.serialize(os, objectSpace); - internalNodes.serialize(os, objectSpace); - } - - void serialize(std::ofstream &os) { - leafNodes.serialize(os, objectSpace); - internalNodes.serialize(os, objectSpace); - } - - void deserialize(std::ifstream &is) { - leafNodes.deserialize(is, objectSpace); - internalNodes.deserialize(is, objectSpace); - } - - void deserialize(std::stringstream & is) - { - leafNodes.deserialize(is, objectSpace); - internalNodes.deserialize(is, objectSpace); - } - - void serializeAsText(std::ofstream &os) { - leafNodes.serializeAsText(os, objectSpace); - internalNodes.serializeAsText(os, objectSpace); - } - - void deserializeAsText(std::ifstream &is) { - leafNodes.deserializeAsText(is, objectSpace); - internalNodes.deserializeAsText(is, objectSpace); - } - - void show() { - std::cout << "Show tree " << std::endl; - for (size_t i = 0; i < leafNodes.size(); i++) { - if (leafNodes[i] != 0) { - std::cout << i << ":"; - (*leafNodes[i]).show(); - } - } - for (size_t i = 0; i < internalNodes.size(); i++) { - if (internalNodes[i] != 0) { - std::cout << i << ":"; - (*internalNodes[i]).show(); - } - } - } - - bool verify(size_t objCount, std::vector &status) { - std::cerr << "Started verifying internal nodes. size=" << internalNodes.size() << "..." << std::endl; - bool valid = true; - for (size_t i = 0; i < internalNodes.size(); i++) { - if (internalNodes[i] != 0) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - valid = valid && (*internalNodes[i]).verify(internalNodes, leafNodes, internalNodes.allocator); -#else - valid = valid && (*internalNodes[i]).verify(internalNodes, leafNodes); -#endif - } - } - std::cerr << "Started verifying leaf nodes. size=" << leafNodes.size() << " ..." << std::endl; - for (size_t i = 0; i < leafNodes.size(); i++) { - if (leafNodes[i] != 0) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - valid = valid && (*leafNodes[i]).verify(objCount, status, leafNodes.allocator); -#else - valid = valid && (*leafNodes[i]).verify(objCount, status); -#endif - } - } - return valid; - } - - void deleteInMemory() { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - assert(0); -#else - for (std::vector::iterator i = leafNodes.begin(); i != leafNodes.end(); i++) { - if ((*i) != 0) { - delete (*i); - } - } - leafNodes.clear(); - for (std::vector::iterator i = internalNodes.begin(); i != internalNodes.end(); i++) { - if ((*i) != 0) { - delete (*i); - } - } - internalNodes.clear(); -#endif - } - - ObjectRepository &getObjectRepository() { return objectSpace->getRepository(); } - - size_t getSharedMemorySize(std::ostream &os, SharedMemoryAllocator::GetMemorySizeType t) { -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - size_t isize = internalNodes.getAllocator().getMemorySize(t); - os << "internal=" << isize << std::endl; - size_t lsize = leafNodes.getAllocator().getMemorySize(t); - os << "leaf=" << lsize << std::endl; - return isize + lsize; -#else - return 0; -#endif - } - - void getAllObjectIDs(std::set &ids) { - for (size_t i = 0; i < leafNodes.size(); i++) { - if (leafNodes[i] != 0) { - LeafNode &ln = *leafNodes[i]; -#if defined(NGT_SHARED_MEMORY_ALLOCATOR) - auto objs = ln.getObjectIDs(leafNodes.allocator); -#else - auto objs = ln.getObjectIDs(); -#endif - for (size_t idx = 0; idx < ln.objectSize; ++idx) { - ids.insert(objs[idx].id); - } - } - } - } - - virtual int64_t memSize() { return sizeof(size_t) * 2 + sizeof(splitMode) + name.size() + leafNodes.memSize() + internalNodes.memSize(); } - - public: - size_t internalChildrenSize; - size_t leafObjectsSize; - - SplitMode splitMode; - - std::string name; - -#ifdef NGT_SHARED_MEMORY_ALLOCATOR - PersistentRepository leafNodes; - PersistentRepository internalNodes; -#else - Repository leafNodes; - Repository internalNodes; -#endif - - ObjectSpace *objectSpace; - - }; -} // namespace DVPTree - - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Version.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/Version.cpp deleted file mode 100644 index 16ac6ec036..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Version.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include "NGT/Version.h" - -void -NGT::Version::get(std::ostream &os) -{ - os << " Version:" << NGT::Version::getVersion() << std::endl; - os << " Built date:" << NGT::Version::getBuildDate() << std::endl; - os << " The last git tag:" << Version::getGitTag() << std::endl; - os << " The last git commit hash:" << Version::getGitHash() << std::endl; - os << " The last git commit date:" << Version::getGitDate() << std::endl; -} - -const std::string -NGT::Version::getVersion() -{ - return NGT_VERSION; -} - -const std::string -NGT::Version::getBuildDate() -{ - return NGT_BUILD_DATE; -} - -const std::string -NGT::Version::getGitHash() -{ - return NGT_GIT_HASH; -} - -const std::string -NGT::Version::getGitDate() -{ - return NGT_GIT_DATE; -} - -const std::string -NGT::Version::getGitTag() -{ - return NGT_GIT_TAG; -} - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/Version.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/Version.h deleted file mode 100644 index 15b585a96e..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/Version.h +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -#include -#include - -#ifndef NGT_VERSION -#define NGT_VERSION "-" -#endif -#ifndef NGT_BUILD_DATE -#define NGT_BUILD_DATE "-" -#endif -#ifndef NGT_GIT_HASH -#define NGT_GIT_HASH "-" -#endif -#ifndef NGT_GIT_DATE -#define NGT_GIT_DATE "-" -#endif -#ifndef NGT_GIT_TAG -#define NGT_GIT_TAG "-" -#endif - -namespace NGT { -class Version { - public: - static void - get(std::ostream& os); - static const std::string - getVersion(); - static const std::string - getBuildDate(); - static const std::string - getGitHash(); - static const std::string - getGitDate(); - static const std::string - getGitTag(); - static const std::string - get(); -}; - -}; // namespace NGT - -#ifdef NGT_VERSION_FOR_HEADER -#include "Version.cpp" -#endif diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.cpp b/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.cpp deleted file mode 100644 index 699e5c05b2..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include - -void (*NGT_LOG_TRACE_)(const std::string&); - -void (*NGT_LOG_DEBUG_)(const std::string&); - -void (*NGT_LOG_INFO_)(const std::string&); - -void (*NGT_LOG_WARNING_)(const std::string&); - -void (*NGT_LOG_FATAL_)(const std::string&); - -void (*NGT_LOG_ERROR_)(const std::string&); diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.h b/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.h deleted file mode 100644 index a1bca93492..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.h +++ /dev/null @@ -1,73 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once -#include - -// Begin of cmake defines -#if 0 -#cmakedefine NGT_SHARED_MEMORY_ALLOCATOR // use shared memory for indexes -#cmakedefine NGT_GRAPH_CHECK_VECTOR // use vector to check whether accessed -#cmakedefine NGT_AVX_DISABLED // not use avx to compare -#cmakedefine NGT_LARGE_DATASET // more than 10M objects -#cmakedefine NGT_DISTANCE_COMPUTATION_COUNT // count # of distance computations -#endif -// End of cmake defines - -////////////////////////////////////////////////////////////////////////// -// Release Definitions for OSS - -//#define NGT_DISTANCE_COMPUTATION_COUNT - -#define NGT_CREATION_EDGE_SIZE 10 -#define NGT_EXPLORATION_COEFFICIENT 1.1 -#define NGT_INSERTION_EXPLORATION_COEFFICIENT 1.1 -#define NGT_SHARED_MEMORY_MAX_SIZE 1024 // MB -#define NGT_FORCED_REMOVE // When errors occur due to the index inconsistency, ignore them. - -#define NGT_COMPACT_VECTOR -#define NGT_GRAPH_READ_ONLY_GRAPH - -#ifdef NGT_LARGE_DATASET -#define NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET -#else -#define NGT_GRAPH_CHECK_VECTOR -#endif - -#if defined(NGT_AVX_DISABLED) -#define NGT_NO_AVX -#else -#if defined(__AVX512F__) && defined(__AVX512DQ__) -#define NGT_AVX512 -#elif defined(__AVX2__) -#define NGT_AVX2 -#else -#define NGT_NO_AVX -#endif -#endif - -extern void (*NGT_LOG_TRACE_)(const std::string&); - -extern void (*NGT_LOG_DEBUG_)(const std::string&); - -extern void (*NGT_LOG_INFO_)(const std::string&); - -extern void (*NGT_LOG_WARNING_)(const std::string&); - -extern void (*NGT_LOG_FATAL_)(const std::string&); - -extern void (*NGT_LOG_ERROR_)(const std::string&); - diff --git a/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.h.in b/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.h.in deleted file mode 100644 index 738c8369ea..0000000000 --- a/internal/core/src/index/thirdparty/NGT/lib/NGT/defines.h.in +++ /dev/null @@ -1,58 +0,0 @@ -// -// Copyright (C) 2015-2020 Yahoo Japan Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#pragma once - -// Begin of cmake defines -#cmakedefine NGT_SHARED_MEMORY_ALLOCATOR // use shared memory for indexes -#cmakedefine NGT_GRAPH_CHECK_VECTOR // use vector to check whether accessed -#cmakedefine NGT_AVX_DISABLED // not use avx to compare -#cmakedefine NGT_LARGE_DATASET // more than 10M objects -#cmakedefine NGT_DISTANCE_COMPUTATION_COUNT // count # of distance computations -// End of cmake defines - -////////////////////////////////////////////////////////////////////////// -// Release Definitions for OSS - -//#define NGT_DISTANCE_COMPUTATION_COUNT - -#define NGT_CREATION_EDGE_SIZE 10 -#define NGT_EXPLORATION_COEFFICIENT 1.1 -#define NGT_INSERTION_EXPLORATION_COEFFICIENT 1.1 -#define NGT_SHARED_MEMORY_MAX_SIZE 1024 // MB -#define NGT_FORCED_REMOVE // When errors occur due to the index inconsistency, ignore them. - -#define NGT_COMPACT_VECTOR -#define NGT_GRAPH_READ_ONLY_GRAPH - -#ifdef NGT_LARGE_DATASET - #define NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET -#else - #define NGT_GRAPH_CHECK_VECTOR -#endif - -#if defined(NGT_AVX_DISABLED) -#define NGT_NO_AVX -#else -#if defined(__AVX512F__) && defined(__AVX512DQ__) -#define NGT_AVX512 -#elif defined(__AVX2__) -#define NGT_AVX2 -#else -#define NGT_NO_AVX -#endif -#endif - diff --git a/internal/core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh b/internal/core/src/index/thirdparty/annoy/examples/s_compile_cpp.sh old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/build.sh b/internal/core/src/index/thirdparty/build.sh old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/Clustering.cpp b/internal/core/src/index/thirdparty/faiss/Clustering.cpp old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/Clustering.h b/internal/core/src/index/thirdparty/faiss/Clustering.h old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/build-aux/config.guess b/internal/core/src/index/thirdparty/faiss/build-aux/config.guess old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/build-aux/config.sub b/internal/core/src/index/thirdparty/faiss/build-aux/config.sub old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/build-aux/install-sh b/internal/core/src/index/thirdparty/faiss/build-aux/install-sh old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/build.sh b/internal/core/src/index/thirdparty/faiss/build.sh old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/configure b/internal/core/src/index/thirdparty/faiss/configure old mode 100755 new mode 100644 diff --git a/internal/core/src/index/thirdparty/faiss/faiss b/internal/core/src/index/thirdparty/faiss/faiss deleted file mode 120000 index 945c9b46d6..0000000000 --- a/internal/core/src/index/thirdparty/faiss/faiss +++ /dev/null @@ -1 +0,0 @@ -. \ No newline at end of file diff --git a/internal/core/src/index/thirdparty/faiss/faiss b/internal/core/src/index/thirdparty/faiss/faiss new file mode 100644 index 0000000000..945c9b46d6 --- /dev/null +++ b/internal/core/src/index/thirdparty/faiss/faiss @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/internal/core/src/indexbuilder/CMakeLists.txt b/internal/core/src/indexbuilder/CMakeLists.txt index 1449d2c23e..f1ed3eca7e 100644 --- a/internal/core/src/indexbuilder/CMakeLists.txt +++ b/internal/core/src/indexbuilder/CMakeLists.txt @@ -19,6 +19,7 @@ add_library(milvus_indexbuilder SHARED ${INDEXBUILDER_FILES} ) +find_library(TBB NAMES tbb) set(PLATFORM_LIBS dl) if (MSYS) set(PLATFORM_LIBS ) @@ -26,12 +27,12 @@ endif () # link order matters target_link_libraries(milvus_indexbuilder + knowhere milvus_config milvus_common milvus_utils milvus_proto - knowhere - tbb + ${TBB} log ${PLATFORM_LIBS} pthread diff --git a/internal/core/src/indexbuilder/index_c.cpp b/internal/core/src/indexbuilder/index_c.cpp index 1846554fa5..65f85bf470 100644 --- a/internal/core/src/indexbuilder/index_c.cpp +++ b/internal/core/src/indexbuilder/index_c.cpp @@ -10,8 +10,10 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include +#ifndef __APPLE__ #include -#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h" +#endif +#include "knowhere/index/vector_index/adapter/VectorAdapter.h" #include "indexbuilder/IndexWrapper.h" #include "indexbuilder/index_c.h" @@ -47,7 +49,9 @@ void DeleteIndex(CIndex index) { auto cIndex = (milvus::indexbuilder::IndexWrapper*)index; delete cIndex; +#ifndef __APPLE__ malloc_trim(0); +#endif } CStatus diff --git a/internal/core/src/indexbuilder/utils.h b/internal/core/src/indexbuilder/utils.h index d9bbc5a5f2..ea15b035dc 100644 --- a/internal/core/src/indexbuilder/utils.h +++ b/internal/core/src/indexbuilder/utils.h @@ -16,7 +16,7 @@ #include #include -#include "index/knowhere/knowhere/index/IndexType.h" +#include "knowhere/index/IndexType.h" namespace milvus::indexbuilder { diff --git a/internal/core/src/log/CMakeLists.txt b/internal/core/src/log/CMakeLists.txt index 5e490a575f..1f122073bb 100644 --- a/internal/core/src/log/CMakeLists.txt +++ b/internal/core/src/log/CMakeLists.txt @@ -21,9 +21,3 @@ set(LOG_FILES ${MILVUS_ENGINE_SRC}/log/Log.cpp add_library(log STATIC ${LOG_FILES}) set_target_properties(log PROPERTIES RULE_LAUNCH_COMPILE "") set_target_properties(log PROPERTIES RULE_LAUNCH_LINK "") - -if(MSYS) -target_link_libraries( log PRIVATE ) -else() -target_link_libraries( log PRIVATE fiu ) -endif() diff --git a/internal/core/src/log/Log.cpp b/internal/core/src/log/Log.cpp index 7e4f122017..373f3744cd 100644 --- a/internal/core/src/log/Log.cpp +++ b/internal/core/src/log/Log.cpp @@ -46,7 +46,13 @@ LogOut(const char* pattern, ...) { void SetThreadName(const std::string& name) { // Note: the name cannot exceed 16 bytes +#ifdef __APPLE__ + pthread_setname_np(name.c_str()); +#elif __linux__ pthread_setname_np(pthread_self(), name.c_str()); +#else +#error "Unsupported SetThreadName"; +#endif } std::string @@ -84,17 +90,26 @@ get_system_boottime() { int64_t get_thread_starttime() { +#ifdef __APPLE__ + uint64_t tid; + pthread_threadid_np(NULL, &tid); +#elif __linux__ int64_t tid = gettid(); +#else +#error "Unsupported SetThreadName"; +#endif + int64_t pid = getpid(); char filename[256]; - snprintf(filename, sizeof(filename), "/proc/%ld/task/%ld/stat", pid, tid); + snprintf(filename, sizeof(filename), "/proc/%lld/task/%lld/stat", (long long)pid, (long long)tid); // NOLINT int64_t val = 0; char comm[16], state; FILE* thread_stat = fopen(filename, "r"); - auto ret = fscanf(thread_stat, "%ld %s %s ", &val, comm, &state); + auto ret = fscanf(thread_stat, "%lld %s %s ", (long long*)&val, comm, &state); // NOLINT + for (auto i = 4; i < 23; i++) { - ret = fscanf(thread_stat, "%ld ", &val); + ret = fscanf(thread_stat, "%lld ", (long long*)&val); // NOLINT if (i == 22) { break; } diff --git a/internal/core/src/log/LogMgr.cpp b/internal/core/src/log/LogMgr.cpp index b238b8234c..d5d97db67d 100644 --- a/internal/core/src/log/LogMgr.cpp +++ b/internal/core/src/log/LogMgr.cpp @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include @@ -152,22 +151,16 @@ LogMgr::Filename(const std::string& filename) { LogMgr& LogMgr::Level(std::unordered_map& enables) { - fiu_do_on("LogMgr.Level.trace_enable_to_false", enables["trace"] = false); enable(el_config_, el::Level::Trace, enables["trace"]); - fiu_do_on("LogMgr.Level.info_enable_to_false", enables["info"] = false); enable(el_config_, el::Level::Info, enables["info"]); - fiu_do_on("LogMgr.Level.debug_enable_to_false", enables["debug"] = false); enable(el_config_, el::Level::Debug, enables["debug"]); - fiu_do_on("LogMgr.Level.warning_enable_to_false", enables["warning"] = false); enable(el_config_, el::Level::Warning, enables["warning"]); - fiu_do_on("LogMgr.Level.error_enable_to_false", enables["error"] = false); enable(el_config_, el::Level::Error, enables["error"]); - fiu_do_on("LogMgr.Level.fatal_enable_to_false", enables["fatal"] = false); enable(el_config_, el::Level::Fatal, enables["fatal"]); return *this; @@ -183,7 +176,6 @@ LogMgr::To(bool log_to_stdout, bool log_to_file) { LogMgr& LogMgr::Rotate(int64_t max_log_file_size, int64_t log_rotate_num) { - fiu_do_on("LogMgr.Rotate.set_max_log_size_small_than_min", max_log_file_size = MAX_LOG_FILE_SIZE_MIN - 1); if (max_log_file_size < MAX_LOG_FILE_SIZE_MIN || max_log_file_size > MAX_LOG_FILE_SIZE_MAX) { std::string msg = "max_log_file_size must in range[" + std::to_string(MAX_LOG_FILE_SIZE_MIN) + ", " + std::to_string(MAX_LOG_FILE_SIZE_MAX) + "], now is " + std::to_string(max_log_file_size); @@ -197,7 +189,6 @@ LogMgr::Rotate(int64_t max_log_file_size, int64_t log_rotate_num) { // set delete_exceeds = 0 means disable throw away log file even they reach certain limit. if (log_rotate_num != 0) { - fiu_do_on("LogMgr.Rotate.delete_exceeds_small_than_min", log_rotate_num = LOG_ROTATE_NUM_MIN - 1); if (log_rotate_num < LOG_ROTATE_NUM_MIN || log_rotate_num > LOG_ROTATE_NUM_MAX) { std::string msg = "log_rotate_num must in range[" + std::to_string(LOG_ROTATE_NUM_MIN) + ", " + std::to_string(LOG_ROTATE_NUM_MAX) + "], now is " + std::to_string(log_rotate_num); diff --git a/internal/core/src/query/SearchBruteForce.cpp b/internal/core/src/query/SearchBruteForce.cpp index 18587ba53d..f9113b372d 100644 --- a/internal/core/src/query/SearchBruteForce.cpp +++ b/internal/core/src/query/SearchBruteForce.cpp @@ -12,8 +12,15 @@ #include #include -#include +#ifdef __APPLE__ +#include "knowhere/index/vector_index/impl/bruteforce/BruteForce.h" +#include "knowhere/common/Heap.h" +#elif __linux__ #include +#include +#else +#error "Unsupported OS environment."; +#endif #include "SearchBruteForce.h" #include "SubSearchResult.h" @@ -35,6 +42,7 @@ raw_search(MetricType metric_type, float* D, idx_t* labels, const BitsetView bitset) { +#ifdef __linux__ using namespace faiss; // NOLINT if (metric_type == METRIC_Jaccard || metric_type == METRIC_Tanimoto) { float_maxheap_array_t res = {size_t(n), size_t(k), labels, D}; @@ -62,6 +70,9 @@ raw_search(MetricType metric_type, std::string("binary search not support metric type: ") + segcore::MetricTypeToString(metric_type); PanicInfo(msg); } +#else + PanicInfo("Unsupported brute force for binary search on current OS environment!"); +#endif } SubSearchResult @@ -100,18 +111,29 @@ FloatSearchBruteForce(const dataset::SearchDataset& dataset, SubSearchResult sub_qr(num_queries, topk, metric_type, round_decimal); auto query_data = reinterpret_cast(dataset.query_data); auto chunk_data = reinterpret_cast(chunk_data_raw); - +#ifdef __APPLE__ + if (metric_type == MetricType::METRIC_L2) { + knowhere::float_maxheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_ids(), + sub_qr.get_distances()}; + knowhere::knn_L2sqr_sse(query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset); + } else { + knowhere::float_minheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_ids(), + sub_qr.get_distances()}; + knowhere::knn_inner_product_sse(query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset); + } +#elif __linux__ if (metric_type == MetricType::METRIC_L2) { faiss::float_maxheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_ids(), sub_qr.get_distances()}; faiss::knn_L2sqr(query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset); - sub_qr.round_values(); - return sub_qr; } else { faiss::float_minheap_array_t buf{(size_t)num_queries, (size_t)topk, sub_qr.get_ids(), sub_qr.get_distances()}; faiss::knn_inner_product(query_data, chunk_data, dim, num_queries, size_per_chunk, &buf, bitset); - sub_qr.round_values(); - return sub_qr; } +#else +#error "Unsupported OS environment!"; +#endif + sub_qr.round_values(); + return sub_qr; } SubSearchResult diff --git a/internal/core/src/query/SearchBruteForce.h b/internal/core/src/query/SearchBruteForce.h index 8cb60c583a..046493dc7c 100644 --- a/internal/core/src/query/SearchBruteForce.h +++ b/internal/core/src/query/SearchBruteForce.h @@ -11,8 +11,6 @@ #pragma once -#include - #include "common/Schema.h" #include "query/SubSearchResult.h" #include "query/helper.h" diff --git a/internal/core/src/query/generated/ExecPlanNodeVisitor.h b/internal/core/src/query/generated/ExecPlanNodeVisitor.h index b055330d64..786147e80e 100644 --- a/internal/core/src/query/generated/ExecPlanNodeVisitor.h +++ b/internal/core/src/query/generated/ExecPlanNodeVisitor.h @@ -28,7 +28,7 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor { visit(BinaryVectorANNS& node) override; void - visit(RetrievePlanNode& node); + visit(RetrievePlanNode& node) override; public: using RetType = SearchResult; diff --git a/internal/core/src/query/generated/ExtractInfoPlanNodeVisitor.h b/internal/core/src/query/generated/ExtractInfoPlanNodeVisitor.h index 8183e3dcbc..fae207ed24 100644 --- a/internal/core/src/query/generated/ExtractInfoPlanNodeVisitor.h +++ b/internal/core/src/query/generated/ExtractInfoPlanNodeVisitor.h @@ -25,7 +25,7 @@ class ExtractInfoPlanNodeVisitor : public PlanNodeVisitor { visit(BinaryVectorANNS& node) override; void - visit(RetrievePlanNode& node); + visit(RetrievePlanNode& node) override; public: explicit ExtractInfoPlanNodeVisitor(ExtractedPlanInfo& plan_info) : plan_info_(plan_info) { diff --git a/internal/core/src/segcore/CMakeLists.txt b/internal/core/src/segcore/CMakeLists.txt index 95281d051a..9d2fcc415d 100644 --- a/internal/core/src/segcore/CMakeLists.txt +++ b/internal/core/src/segcore/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(milvus_segcore SHARED ${SEGCORE_FILES} ) +find_library(TBB NAMES tbb) set(PLATFORM_LIBS dl) if (MSYS) set(PLATFORM_LIBS ) @@ -41,7 +42,7 @@ target_link_libraries(milvus_segcore ${PLATFORM_LIBS} log pthread - tbb + ${TBB} ${OpenMP_CXX_FLAGS} knowhere milvus_common diff --git a/internal/core/src/segcore/DeletedRecord.h b/internal/core/src/segcore/DeletedRecord.h index daf4ba5d53..50965e622a 100644 --- a/internal/core/src/segcore/DeletedRecord.h +++ b/internal/core/src/segcore/DeletedRecord.h @@ -16,7 +16,6 @@ #include "AckResponder.h" #include "common/Schema.h" -#include "knowhere/index/vector_index/IndexIVF.h" #include "segcore/Record.h" namespace milvus::segcore { diff --git a/internal/core/src/segcore/FieldIndexing.cpp b/internal/core/src/segcore/FieldIndexing.cpp index 48a92a4705..d704807ed2 100644 --- a/internal/core/src/segcore/FieldIndexing.cpp +++ b/internal/core/src/segcore/FieldIndexing.cpp @@ -13,7 +13,9 @@ #include #include "common/SystemProperty.h" +#ifdef __linux__ #include "knowhere/index/vector_index/IndexIVF.h" +#endif #include "knowhere/index/vector_index/adapter/VectorAdapter.h" #include "segcore/FieldIndexing.h" @@ -21,6 +23,7 @@ namespace milvus::segcore { void VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) { +#ifdef __linux__ AssertInfo(field_meta_.get_data_type() == DataType::VECTOR_FLOAT, "Data type of vector field is not VECTOR_FLOAT"); auto dim = field_meta_.get_dim(); @@ -39,6 +42,9 @@ VectorFieldIndexing::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vec indexing->AddWithoutIds(dataset, conf); data_[chunk_id] = std::move(indexing); } +#else + throw std::runtime_error("Unsupported BuildIndexRange on current platform!"); +#endif } knowhere::Config diff --git a/internal/core/src/segcore/FieldIndexing.h b/internal/core/src/segcore/FieldIndexing.h index 334d0c26c4..8677ca4367 100644 --- a/internal/core/src/segcore/FieldIndexing.h +++ b/internal/core/src/segcore/FieldIndexing.h @@ -21,7 +21,7 @@ #include "InsertRecord.h" #include "common/Schema.h" #include "knowhere/index/structured_index_simple/StructuredIndexSort.h" -#include "knowhere/index/vector_index/IndexIVF.h" +#include "knowhere/index/vector_index/VecIndex.h" #include "segcore/SegcoreConfig.h" namespace milvus::segcore { @@ -36,6 +36,7 @@ class FieldIndexing { FieldIndexing(const FieldIndexing&) = delete; FieldIndexing& operator=(const FieldIndexing&) = delete; + virtual ~FieldIndexing() = default; // Do this in parallel virtual void diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 93d0fa9fb9..d17ee52d86 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -119,7 +119,7 @@ class SegmentGrowingImpl : public SegmentGrowing { enable_small_index_ = false; } - ssize_t + int64_t get_row_count() const override { return record_.ack_responder_.GetAck(); } diff --git a/internal/core/src/segcore/SimilarityCorelation.h b/internal/core/src/segcore/SimilarityCorelation.h index 2c9bccd76d..050a08e79b 100644 --- a/internal/core/src/segcore/SimilarityCorelation.h +++ b/internal/core/src/segcore/SimilarityCorelation.h @@ -11,7 +11,7 @@ #pragma once -#include "index/thirdparty/faiss/MetricType.h" +#include namespace milvus::segcore { static inline bool diff --git a/internal/core/src/segcore/Utils.h b/internal/core/src/segcore/Utils.h index f2e03ca8c3..e86da64f02 100644 --- a/internal/core/src/segcore/Utils.h +++ b/internal/core/src/segcore/Utils.h @@ -13,8 +13,7 @@ #include #include #include - -#include "index/thirdparty/faiss/MetricType.h" +#include namespace milvus::segcore { diff --git a/internal/core/src/segcore/collection_c.cpp b/internal/core/src/segcore/collection_c.cpp index bff17d8761..019e72daf6 100644 --- a/internal/core/src/segcore/collection_c.cpp +++ b/internal/core/src/segcore/collection_c.cpp @@ -9,8 +9,11 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License -#include +#ifndef __APPLE__ #include +#endif + +#include #include "segcore/collection_c.h" #include "segcore/Collection.h" @@ -25,7 +28,9 @@ void DeleteCollection(CCollection collection) { auto col = (milvus::segcore::Collection*)collection; delete col; +#ifndef __APPLE__ malloc_trim(0); +#endif } const char* diff --git a/internal/core/src/segcore/load_index_c.cpp b/internal/core/src/segcore/load_index_c.cpp index 8555b43900..c9a9195ad8 100644 --- a/internal/core/src/segcore/load_index_c.cpp +++ b/internal/core/src/segcore/load_index_c.cpp @@ -11,8 +11,8 @@ #include "common/LoadInfo.h" #include "exceptions/EasyAssert.h" -#include "index/knowhere/knowhere/common/BinarySet.h" -#include "index/knowhere/knowhere/index/vector_index/VecIndexFactory.h" +#include "knowhere/common/BinarySet.h" +#include "knowhere/index/vector_index/VecIndexFactory.h" #include "segcore/load_index_c.h" CStatus diff --git a/internal/core/src/utils/CMakeLists.txt b/internal/core/src/utils/CMakeLists.txt index 44592d6a39..08a7ba6716 100644 --- a/internal/core/src/utils/CMakeLists.txt +++ b/internal/core/src/utils/CMakeLists.txt @@ -15,8 +15,4 @@ aux_source_directory( ${MILVUS_ENGINE_SRC}/utils UTILS_FILES ) add_library( milvus_utils STATIC ${UTILS_FILES} ) -if(MSYS) target_link_libraries( milvus_utils PRIVATE milvus_exceptions) -else() -target_link_libraries( milvus_utils PRIVATE fiu milvus_exceptions ) -endif() diff --git a/internal/core/src/value/CMakeLists.txt b/internal/core/src/value/CMakeLists.txt index d7f36f631d..9537d401a6 100644 --- a/internal/core/src/value/CMakeLists.txt +++ b/internal/core/src/value/CMakeLists.txt @@ -29,9 +29,7 @@ set( VALUE_SRCS config/ConfigInit.cpp ValueType.cpp ) -set( VALUE_LIBS yaml-cpp - fiu - ) +set( VALUE_LIBS yaml-cpp) create_library( TARGET value @@ -41,20 +39,17 @@ create_library( if ( BUILD_UNIT_TEST ) create_library( - TARGET value-fiu + TARGET value-test SRCS ${VALUE_SRCS} - LIBS ${VALUE_LIBS} - DEFS FIU_ENABLE - ) - target_compile_definitions(value-fiu PRIVATE FIU_ENABLE) + LIBS ${VALUE_LIBS}) set(GTEST_LIBS gtest gtest_main gmock gmock_main) create_executable( TARGET ConfigMgrTest SRCS config/ConfigMgrTest - LIBS value-fiu ${GTEST_LIBS} - DEFS FIU_ENABLE + LIBS value-test ${GTEST_LIBS} + DEFS "" ) add_test ( NAME ConfigMgrTest @@ -65,7 +60,7 @@ if ( BUILD_UNIT_TEST ) TARGET ServerConfigTest SRCS config/ServerConfigTest LIBS value-fiu ${GTEST_LIBS} - DEFS FIU_ENABLE + DEFS "" ) add_test ( NAME ServerConfigTest @@ -75,8 +70,8 @@ if ( BUILD_UNIT_TEST ) create_executable( TARGET ValueTypeTest SRCS ValueTypeTest1 ValueTypeTest2 - LIBS value-fiu ${GTEST_LIBS} - DEFS FIU_ENABLE + LIBS value-test ${GTEST_LIBS} + DEFS "" ) add_test ( NAME ValueTypeTest diff --git a/internal/core/src/value/config/ConfigMgrTest.cpp b/internal/core/src/value/config/ConfigMgrTest.cpp deleted file mode 100644 index 6ad4c5e7a9..0000000000 --- a/internal/core/src/value/config/ConfigMgrTest.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (C) 2019-2020 Zilliz. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software distributed under the License -// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -// or implied. See the License for the specific language governing permissions and limitations under the License. - -#include -#include -#include -#include "ConfigMgr.h" - -#include "value/config/ServerConfig.h" - -namespace milvus { - -// TODO: need a safe directory for testing -// TEST(ConfigMgrTest, set_version) { -// ConfigMgr::GetInstance().Init(); -// ConfigMgr::GetInstance().LoadMemory(R"( -// version: 0.1 -//)"); -// ConfigMgr::GetInstance().FilePath() = "/tmp/milvus_unittest_configmgr.yaml"; -// -// ASSERT_EQ(ConfigMgr::GetInstance().Get("version"), "0.1"); -// ConfigMgr::GetInstance().Set("version", "100.0"); -// ASSERT_EQ(ConfigMgr::GetInstance().Get("version"), "100.0"); -//} - -} // namespace milvus diff --git a/internal/core/src/value/config/ServerConfig.cpp b/internal/core/src/value/config/ServerConfig.cpp index 07b2339381..f0070fc8bb 100644 --- a/internal/core/src/value/config/ServerConfig.cpp +++ b/internal/core/src/value/config/ServerConfig.cpp @@ -9,8 +9,6 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License. -#include - #include #include #include @@ -45,7 +43,6 @@ ParseGPUDevices(const std::string& str) { std::string device; while (std::getline(ss, device, ',')) { - fiu_do_on("ParseGPUDevices.invalid_format", device = ""); if (device.length() < 4) { /* Invalid format string */ return {}; diff --git a/internal/core/src/value/config/ServerConfigTest.cpp b/internal/core/src/value/config/ServerConfigTest.cpp index a7430d2821..98a1060c84 100644 --- a/internal/core/src/value/config/ServerConfigTest.cpp +++ b/internal/core/src/value/config/ServerConfigTest.cpp @@ -9,15 +9,11 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License. -#include -#include #include #include "value/config/ServerConfig.h" TEST(ServerConfigTest, parse_invalid_devices) { - fiu_init(0); - fiu_enable("ParseGPUDevices.invalid_format", 1, nullptr, 0); auto collections = milvus::ParseGPUDevices("gpu0,gpu1"); ASSERT_EQ(collections.size(), 0); } diff --git a/internal/core/thirdparty/CMakeLists.txt b/internal/core/thirdparty/CMakeLists.txt index c58eb6d7f8..09f010565d 100644 --- a/internal/core/thirdparty/CMakeLists.txt +++ b/internal/core/thirdparty/CMakeLists.txt @@ -38,10 +38,15 @@ message( STATUS "Thirdparty downloaded file path: ${THIRDPARTY_DOWNLOAD_PATH}" ) set( THREADS_PREFER_PTHREAD_FLAG ON ) find_package( Threads REQUIRED ) +add_subdirectory( knowhere ) + # ****************************** Thirdparty googletest *************************************** -if ( MILVUS_BUILD_TESTS ) +if ( MILVUS_BUILD_TESTS) add_subdirectory( gtest ) add_subdirectory( google_benchmark ) +endif() + +if ( MILVUS_BUILD_TESTS AND LINUX ) add_subdirectory( profiler ) endif() @@ -55,5 +60,4 @@ if ( MILVUS_WITH_OPENTRACING ) endif() add_subdirectory( protobuf ) -add_subdirectory( boost_ext ) -add_subdirectory( fiu ) +add_subdirectory( boost_ext ) \ No newline at end of file diff --git a/internal/core/thirdparty/fiu/CMakeLists.txt b/internal/core/thirdparty/fiu/CMakeLists.txt deleted file mode 100644 index 60f3c83516..0000000000 --- a/internal/core/thirdparty/fiu/CMakeLists.txt +++ /dev/null @@ -1,66 +0,0 @@ -#------------------------------------------------------------------------------- -# Copyright (C) 2019-2020 Zilliz. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under the License. -#------------------------------------------------------------------------------- -if(NOT DEFINED FIU_VERSION) - set(FIU_VERSION 1.00) -endif() - -if ( DEFINED ENV{KNOWHERE_FIU_URL} ) - set( FIU_SOURCE_URL "$ENV{MILVUS_FIU_URL}" ) -else () - set( FIU_SOURCE_URL "https://github.com/albertito/libfiu/archive/${FIU_VERSION}.tar.gz" ) -endif () - -macro( build_fiu ) - message( STATUS "Building FIU-${FIU_VERSION} from source" ) - ExternalProject_Add( - fiu_ep - PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/fiu-subbuild - DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} - INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR} - URL ${FIU_SOURCE_URL} - URL_MD5 "75f9d076daf964c9410611701f07c61b" - CONFIGURE_COMMAND "" - BUILD_IN_SOURCE 1 - BUILD_COMMAND ${MAKE} - INSTALL_COMMAND ${MAKE} "PREFIX=" install - ${EP_LOG_OPTIONS} - ) - - ExternalProject_Get_Property( fiu_ep INSTALL_DIR ) - if( NOT IS_DIRECTORY ${INSTALL_DIR}/include ) - file( MAKE_DIRECTORY "${INSTALL_DIR}/include" ) - endif() - add_library( fiu SHARED IMPORTED ) - set_target_properties( fiu - PROPERTIES - IMPORTED_GLOBAL TRUE - IMPORTED_LOCATION ${INSTALL_DIR}/lib/libfiu.so - INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include ) - add_dependencies(fiu fiu_ep) -endmacro() - -if (WIN32) - # nothing - message("skip building fiu on windows") -else () - build_fiu() - install( FILES ${INSTALL_DIR}/lib/libfiu.so - ${INSTALL_DIR}/lib/libfiu.so.0 - ${INSTALL_DIR}/lib/libfiu.so.1.00 - DESTINATION lib ) - get_target_property( var fiu INTERFACE_INCLUDE_DIRECTORIES ) - message( STATUS ${var} ) - set_directory_properties( PROPERTY INCLUDE_DIRECTORIES ${var} ) -endif () - - diff --git a/internal/core/thirdparty/fiu/fiu-local.h b/internal/core/thirdparty/fiu/fiu-local.h deleted file mode 100644 index b68327bffd..0000000000 --- a/internal/core/thirdparty/fiu/fiu-local.h +++ /dev/null @@ -1,37 +0,0 @@ - -/* libfiu - Fault Injection in Userspace - * - * This header, part of libfiu, is meant to be included in your project to - * avoid having libfiu as a mandatory build-time dependency. - * - * You can add it to your project, and #include it instead of fiu.h. - * The real fiu.h will be used only when FIU_ENABLE is defined. - * - * This header, as the rest of libfiu, is in the public domain. - * - * You can find more information about libfiu at - * http://blitiri.com.ar/p/libfiu. - */ - -#ifndef _FIU_LOCAL_H -#define _FIU_LOCAL_H - -/* Only define the stubs when fiu is disabled, otherwise use the real fiu.h - * header */ -#ifndef FIU_ENABLE - -#define fiu_init(flags) 0 -#define fiu_fail(name) 0 -#define fiu_failinfo() NULL -#define fiu_do_on(name, action) -#define fiu_exit_on(name) -#define fiu_return_on(name, retval) - -#else - -#include - -#endif /* FIU_ENABLE */ - -#endif /* _FIU_LOCAL_H */ - diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt new file mode 100644 index 0000000000..f1d9055884 --- /dev/null +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -0,0 +1,102 @@ +#------------------------------------------------------------------------------- +# Copyright (C) 2019-2020 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. +#------------------------------------------------------------------------------- + +set( KNOWHERE_VERSION v1.0.0 ) +set( KNOWHERE_SOURCE_MD5 "96f7f97aea75a2bd640e3aa82e8c7499" ) + +if ( DEFINED ENV{MILVUS_KNOWHERE_URL} ) + set( KNOWHERE_SOURCE_URL "$ENV{MILVUS_KNOWHERE_URL}" ) +else () + set( KNOWHERE_SOURCE_URL + "https://github.com/milvus-io/knowhere/archive/refs/tags/${KNOWHERE_VERSION}.tar.gz" ) +endif () + +#message( STATUS "Building knowhere-${KNOWHERE_VERSION} from source" ) +#FetchContent_Declare( +# knowhere +# URL ${KNOWHERE_SOURCE_URL} +# URL_MD5 ${KNOWHERE_SOURCE_MD5} +# DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} +# SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-src +# BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-build +# ) +# +#FetchContent_GetProperties(knowhere) +#if ( NOT knowhere_POPULATED ) +# FetchContent_Populate( knowhere ) +# add_subdirectory( ${knowhere_SOURCE_DIR} +# ${knowhere_BINARY_DIR} +# EXCLUDE_FROM_ALL ) +#endif() +# +#get_target_property( KNOWHERE_INCLUDE_DIR knowhere INTERFACE_INCLUDE_DIRECTORIES ) +#message( STATUS ${KNOWHERE_INCLUDE_DIR} ) +# +#get_property( var DIRECTORY "${knowhere_SOURCE_DIR}" PROPERTY COMPILE_OPTIONS ) +#message( STATUS "knowhere compile options: ${var}" ) +# + +macro(build_knowhere) + message(STATUS "Building knowhere-${KNOWHERE_VERSION} from source") + set (KNOWHERE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) + + set(KNOWHERE_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DCMAKE_BUILD_TYPE=Release + "-DCMAKE_INSTALL_PREFIX=${KNOWHERE_INSTALL_PREFIX}" + ) + + externalproject_add(knowhere_ep + URL ${KNOWHERE_SOURCE_URL} + URL_MD5 ${KNOWHERE_SOURCE_MD5} + CMAKE_ARGS ${KNOWHERE_CMAKE_ARGS} + BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS} + PREFIX ${CMAKE_BINARY_DIR}/3rdparty_download/knowhere-subbuild + BINARY_DIR knowhere-bin + INSTALL_DIR ${KNOWHERE_INSTALL_PREFIX} + ) + + ExternalProject_Get_Property(knowhere_ep INSTALL_DIR) + message(STATUS ${INSTALL_DIR}) + + if( NOT IS_DIRECTORY ${INSTALL_DIR}/include ) + file( MAKE_DIRECTORY "${INSTALL_DIR}/include" ) + endif() + + add_library(knowhere SHARED IMPORTED) + set_target_properties( knowhere + PROPERTIES + IMPORTED_GLOBAL TRUE + IMPORTED_LOCATION ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}knowhere${CMAKE_SHARED_LIBRARY_SUFFIX} + INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/${CMAKE_INSTALL_INCLUDEDIR}) + + add_dependencies(knowhere knowhere_ep) +endmacro() + +set(KNOWHERE_SOURCE "AUTO") +if (KNOWHERE_SOURCE STREQUAL "AUTO") + find_package(knowhere) + message(STATUS "knowhere libraries: ${KNOWHERE_LIBRARIES}") + message(STATUS "knowhere found: ${KNOWHERE_FOUND}") + + if (KNOWHERE_FOUND) + add_library(knowhere) + else() + build_knowhere() + endif() +elseif (KNOWHERE_SOURCE STREQUAL "BUNDLED") + build_knowhere() +elseif (KNOWHERE_SOURCE STREQUAL "SYSTEM") + find_package(knowhere REQUIRED) + add_library(knowhere) +endif () diff --git a/internal/core/thirdparty/yaml-cpp/CMakeLists.txt b/internal/core/thirdparty/yaml-cpp/CMakeLists.txt index bf54d80a51..dcd83e63c1 100644 --- a/internal/core/thirdparty/yaml-cpp/CMakeLists.txt +++ b/internal/core/thirdparty/yaml-cpp/CMakeLists.txt @@ -47,4 +47,4 @@ get_target_property( YAML_CPP_INCLUDE_DIR yaml-cpp INTERFACE_INCLUDE_DIRECTORIES message( STATUS ${YAML_CPP_INCLUDE_DIR} ) get_property( var DIRECTORY "${yaml-cpp_SOURCE_DIR}" PROPERTY COMPILE_OPTIONS ) -message( STATUS "yaml compile options: ${var}" ) +message( STATUS "yaml compile options: ${var}" ) \ No newline at end of file diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index f14aea76e3..88fa7c0302 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -10,58 +10,75 @@ # or implied. See the License for the specific language governing permissions and limitations under the License include_directories(${CMAKE_HOME_DIRECTORY}/src) -include_directories(${CMAKE_HOME_DIRECTORY}/src/index/knowhere) include_directories(${CMAKE_HOME_DIRECTORY}/src/thirdparty) add_definitions(-DMILVUS_TEST_SEGCORE_YAML_PATH="${CMAKE_SOURCE_DIR}/unittest/test_utils/test_segcore.yaml") -set(MILVUS_TEST_FILES - init_gtest.cpp - test_binary.cpp - test_bitmap.cpp - test_common.cpp - test_concurrent_vector.cpp - test_c_api.cpp - test_expr.cpp - test_get_entity_by_ids.cpp - test_indexing.cpp - test_index_wrapper.cpp - test_init.cpp - test_plan_proto.cpp - test_query.cpp - test_reduce.cpp - test_sealed.cpp - test_segcore.cpp - test_span.cpp - test_timestamp_index.cpp - test_reduce_c.cpp - test_conf_adapter_mgr.cpp - test_similarity_corelation.cpp - test_utils.cpp - ) +if (LINUX) + set(MILVUS_TEST_FILES + init_gtest.cpp + test_binary.cpp + test_bitmap.cpp + test_common.cpp + test_concurrent_vector.cpp + test_c_api.cpp + test_expr.cpp + test_get_entity_by_ids.cpp + test_indexing.cpp + test_index_wrapper.cpp + test_init.cpp + test_plan_proto.cpp + test_query.cpp + test_reduce.cpp + test_sealed.cpp + test_segcore.cpp + test_span.cpp + test_timestamp_index.cpp + test_reduce_c.cpp + test_conf_adapter_mgr.cpp + test_similarity_corelation.cpp + test_utils.cpp + ) + # check if memory leak exists in index builder + set(INDEX_BUILDER_TEST_FILES + test_index_wrapper.cpp + ) + + add_executable(index_builder_test + ${INDEX_BUILDER_TEST_FILES} + ) + + target_link_libraries(index_builder_test + gtest + gtest_main + milvus_segcore + milvus_indexbuilder + log + pthread + ) + install(TARGETS index_builder_test DESTINATION unittest) +endif() + +if ( APPLE ) + set(MILVUS_TEST_FILES + init_gtest.cpp + test_common.cpp + test_concurrent_vector.cpp + test_conf_adapter_mgr.cpp + test_plan_proto.cpp + test_reduce.cpp + test_reduce_c.cpp + test_segcore.cpp + test_timestamp_index.cpp + test_similarity_corelation.cpp + test_utils.cpp + ) +endif () add_executable(all_tests ${MILVUS_TEST_FILES} ) -# check if memory leak exists in index builder -set(INDEX_BUILDER_TEST_FILES - test_index_wrapper.cpp - ) - -add_executable(index_builder_test - ${INDEX_BUILDER_TEST_FILES} - ) - -target_link_libraries(index_builder_test - gtest - gtest_main - milvus_segcore - milvus_indexbuilder - log - pthread - ) - target_link_libraries(all_tests gtest milvus_segcore @@ -71,5 +88,4 @@ target_link_libraries(all_tests ) install(TARGETS all_tests DESTINATION unittest) -install(TARGETS index_builder_test DESTINATION unittest) diff --git a/internal/core/unittest/test_c_api.cpp b/internal/core/unittest/test_c_api.cpp index ee2c83228c..892a8fb206 100644 --- a/internal/core/unittest/test_c_api.cpp +++ b/internal/core/unittest/test_c_api.cpp @@ -16,12 +16,12 @@ #include #include #include +#include +#include +#include +#include #include "common/LoadInfo.h" -#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h" -#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h" -#include "index/knowhere/knowhere/index/vector_index/VecIndexFactory.h" -#include "index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h" #include "pb/milvus.pb.h" #include "pb/plan.pb.h" #include "query/ExprImpl.h" diff --git a/internal/core/unittest/test_conf_adapter_mgr.cpp b/internal/core/unittest/test_conf_adapter_mgr.cpp index 0a9d1d4d61..c840c8e903 100644 --- a/internal/core/unittest/test_conf_adapter_mgr.cpp +++ b/internal/core/unittest/test_conf_adapter_mgr.cpp @@ -11,10 +11,8 @@ #include #include - #include - -#include "index/knowhere/knowhere/index/vector_index/ConfAdapterMgr.h" +#include TEST(AdapterMgr, MultiThread) { auto run_case = [&]() { diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index 70fcace043..e0ce33dcbc 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -13,13 +13,14 @@ #include #include #include +#include +#include +#include +#include #include "indexbuilder/IndexWrapper.h" #include "indexbuilder/index_c.h" #include "indexbuilder/utils.h" -#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h" -#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h" -#include "index/knowhere/knowhere/archive/KnowhereConfig.h" #include "pb/index_cgo_msg.pb.h" #include "test_utils/DataGen.h" #include "test_utils/indexbuilder_test_utils.h" diff --git a/internal/core/unittest/test_utils.cpp b/internal/core/unittest/test_utils.cpp index e3f3365592..4a3547a68e 100644 --- a/internal/core/unittest/test_utils.cpp +++ b/internal/core/unittest/test_utils.cpp @@ -11,8 +11,8 @@ #include #include +#include -#include "index/thirdparty/faiss/MetricType.h" #include "segcore/Utils.h" TEST(Util, FaissMetricTypeToString) { diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index ef3fef2000..62ad27fce1 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -15,13 +15,13 @@ #include #include #include +#include +#include +#include +#include #include "Constants.h" #include "common/Schema.h" -#include "knowhere/index/vector_index/VecIndex.h" -#include "knowhere/index/vector_index/adapter/VectorAdapter.h" -#include "knowhere/index/vector_index/VecIndexFactory.h" -#include "knowhere/index/vector_index/IndexIVF.h" #include "query/SearchOnIndex.h" #include "segcore/SegmentGrowingImpl.h" #include "segcore/SegmentSealedImpl.h" diff --git a/internal/core/unittest/test_utils/indexbuilder_test_utils.h b/internal/core/unittest/test_utils/indexbuilder_test_utils.h index b0d1c79c81..ee218b353b 100644 --- a/internal/core/unittest/test_utils/indexbuilder_test_utils.h +++ b/internal/core/unittest/test_utils/indexbuilder_test_utils.h @@ -16,15 +16,15 @@ #include #include #include +#include +#include +#include +#include #include "pb/index_cgo_msg.pb.h" -#include "index/knowhere/knowhere/index/vector_index/helpers/IndexParameter.h" -#include "index/knowhere/knowhere/index/vector_index/adapter/VectorAdapter.h" #include "indexbuilder/IndexWrapper.h" #include "indexbuilder/index_c.h" #include "DataGen.h" -#include "faiss/MetricType.h" -#include "index/knowhere/knowhere/index/vector_index/VecIndexFactory.h" #include "indexbuilder/utils.h" constexpr int64_t DIM = 128; diff --git a/internal/indexnode/index.go b/internal/indexnode/index.go index e2827cbb84..070f3be63a 100644 --- a/internal/indexnode/index.go +++ b/internal/indexnode/index.go @@ -20,7 +20,8 @@ package indexnode #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_indexbuilder -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_indexbuilder -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_indexbuilder -Wl,-rpath=${SRCDIR}/../core/output/lib #include // free #include "segcore/collection_c.h" diff --git a/internal/indexnode/index_test.go b/internal/indexnode/index_test.go index b5a125a067..e518108c6a 100644 --- a/internal/indexnode/index_test.go +++ b/internal/indexnode/index_test.go @@ -14,6 +14,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux +// +build linux + package indexnode import ( diff --git a/internal/indexnode/indexnode.go b/internal/indexnode/indexnode.go index 3db52ab06b..d60b3337cb 100644 --- a/internal/indexnode/indexnode.go +++ b/internal/indexnode/indexnode.go @@ -20,7 +20,8 @@ package indexnode #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_indexbuilder -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_indexbuilder -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_indexbuilder -Wl,-rpath=${SRCDIR}/../core/output/lib #include #include "indexbuilder/init_c.h" diff --git a/internal/indexnode/indexnode_test.go b/internal/indexnode/indexnode_test.go index 39875d9004..18f1c64555 100644 --- a/internal/indexnode/indexnode_test.go +++ b/internal/indexnode/indexnode_test.go @@ -14,6 +14,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build linux +// +build linux + package indexnode import ( diff --git a/internal/kv/rocksdb/cwrapper/CMakeLists.txt b/internal/kv/rocksdb/cwrapper/CMakeLists.txt index 1d7c740213..56f49f05f3 100644 --- a/internal/kv/rocksdb/cwrapper/CMakeLists.txt +++ b/internal/kv/rocksdb/cwrapper/CMakeLists.txt @@ -21,9 +21,15 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) include( ExternalProject ) -set( ROCKSDB_VERSION "6.15.2" ) -set( ROCKSDB_SOURCE_URL - "https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz") +if ( APPLE ) + set( ROCKSDB_VERSION "6.27.3" ) + set( ROCKSDB_SOURCE_URL + "https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz") +else () + set( ROCKSDB_VERSION "6.15.2" ) + set( ROCKSDB_SOURCE_URL + "https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz") +endif () if( CUSTOM_THIRDPARTY_DOWNLOAD_PATH ) set( THIRDPARTY_DOWNLOAD_PATH ${CUSTOM_THIRDPARTY_DOWNLOAD_PATH} ) @@ -44,10 +50,17 @@ endif (CCACHE_FOUND) macro( build_rocksdb ) message( STATUS "Building ROCKSDB-${ROCKSDB_VERSION} from source" ) + if ( APPLE ) + set ( ROCKSDB_URL_MD5 "e4a0625f0cec82060e62c81b787a1124" ) + else () + set ( ROCKSDB_URL_MD5 "67f9e04fda62af551dd039c37b1808ac" ) + endif () + set( ROCKSDB_CMAKE_ARGS "-DWITH_GFLAGS=OFF" "-DROCKSDB_BUILD_SHARED=OFF" "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}" + "-DFAIL_ON_WARNINGS=OFF" #This is used to solve 'illegal instruction' problem in some machine "-DPORTABLE=ON" ) @@ -59,7 +72,7 @@ macro( build_rocksdb ) DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} INSTALL_DIR ${CMAKE_INSTALL_PREFIX} URL ${ROCKSDB_SOURCE_URL} - URL_MD5 "67f9e04fda62af551dd039c37b1808ac" + URL_MD5 ${ROCKSDB_URL_MD5} CMAKE_ARGS ${ROCKSDB_CMAKE_ARGS} INSTALL_COMMAND make install ) diff --git a/internal/kv/rocksdb/cwrapper/build.sh b/internal/kv/rocksdb/cwrapper/build.sh index 9db6aefccd..f806f3d8a3 100755 --- a/internal/kv/rocksdb/cwrapper/build.sh +++ b/internal/kv/rocksdb/cwrapper/build.sh @@ -63,6 +63,24 @@ while getopts "t:h:" arg; do done echo "BUILD_TYPE: " $BUILD_TYPE +unameOut="$(uname -s)" +if [[ ! ${jobs+1} ]]; then + case "${unameOut}" in + Linux*) jobs=$(nproc);; + Darwin*) + llvm_prefix="$(brew --prefix llvm)" + export CLANG_TOOLS_PATH="${llvm_prefix}/bin" + export CC="${llvm_prefix}/bin/clang" + export CXX="${llvm_prefix}/bin/clang++" + export LDFLAGS="-L${llvm_prefix}/lib -L/usr/local/opt/libomp/lib" + export CXXFLAGS="-I${llvm_prefix}/include -I/usr/local/include -I/usr/local/opt/libomp/include" + jobs=$(sysctl -n hw.physicalcpu);; + *) + echo "Exit 0, System:${unameOut}"; + exit 0; + esac +fi + pushd ${OUTPUT_LIB} CMAKE_CMD="cmake \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} .." @@ -70,7 +88,4 @@ CMAKE_CMD="cmake \ ${CMAKE_CMD} echo ${CMAKE_CMD} -if [[ ! ${jobs+1} ]]; then - jobs=$(nproc) -fi make -j ${jobs} VERBOSE=0 diff --git a/internal/querynode/cgo_helper.go b/internal/querynode/cgo_helper.go index c20239a116..7cd484b709 100644 --- a/internal/querynode/cgo_helper.go +++ b/internal/querynode/cgo_helper.go @@ -18,7 +18,8 @@ package querynode /* #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/collection_c.h" #include "common/type_c.h" diff --git a/internal/querynode/collection.go b/internal/querynode/collection.go index 585f0e6b3e..c0d743542b 100644 --- a/internal/querynode/collection.go +++ b/internal/querynode/collection.go @@ -20,7 +20,8 @@ package querynode #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/collection_c.h" #include "segcore/segment_c.h" diff --git a/internal/querynode/collection_replica.go b/internal/querynode/collection_replica.go index 237882abb5..1f54022d81 100644 --- a/internal/querynode/collection_replica.go +++ b/internal/querynode/collection_replica.go @@ -20,7 +20,8 @@ package querynode #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/collection_c.h" #include "segcore/segment_c.h" diff --git a/internal/querynode/load_index_info.go b/internal/querynode/load_index_info.go index e4fa2a8207..b413c94cac 100644 --- a/internal/querynode/load_index_info.go +++ b/internal/querynode/load_index_info.go @@ -18,7 +18,8 @@ package querynode /* #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/load_index_c.h" diff --git a/internal/querynode/partition.go b/internal/querynode/partition.go index 98b70ec316..e1cf96f2ee 100644 --- a/internal/querynode/partition.go +++ b/internal/querynode/partition.go @@ -20,7 +20,8 @@ package querynode #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/collection_c.h" #include "segcore/segment_c.h" diff --git a/internal/querynode/plan.go b/internal/querynode/plan.go index f608a505c5..6aceea7624 100644 --- a/internal/querynode/plan.go +++ b/internal/querynode/plan.go @@ -18,7 +18,8 @@ package querynode /* #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/collection_c.h" #include "segcore/segment_c.h" diff --git a/internal/querynode/query_node.go b/internal/querynode/query_node.go index f30db2d588..c6c3522902 100644 --- a/internal/querynode/query_node.go +++ b/internal/querynode/query_node.go @@ -20,7 +20,8 @@ package querynode #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/collection_c.h" #include "segcore/segment_c.h" diff --git a/internal/querynode/reduce.go b/internal/querynode/reduce.go index f720ebb256..86b4d3d6e2 100644 --- a/internal/querynode/reduce.go +++ b/internal/querynode/reduce.go @@ -18,7 +18,8 @@ package querynode /* #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/plan_c.h" #include "segcore/reduce_c.h" diff --git a/internal/querynode/segment.go b/internal/querynode/segment.go index daa889f369..d9025f5898 100644 --- a/internal/querynode/segment.go +++ b/internal/querynode/segment.go @@ -18,7 +18,9 @@ package querynode /* #cgo CFLAGS: -I${SRCDIR}/../core/output/include -#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib + +#cgo darwin LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath,"${SRCDIR}/../core/output/lib" +#cgo linux LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib #include "segcore/collection_c.h" #include "segcore/plan_c.h" diff --git a/internal/storage/payload.go b/internal/storage/payload.go index 0bb55e0b86..5bce8e1ae3 100644 --- a/internal/storage/payload.go +++ b/internal/storage/payload.go @@ -19,7 +19,7 @@ package storage /* #cgo CFLAGS: -I${SRCDIR}/cwrapper -#cgo LDFLAGS: -L${SRCDIR}/cwrapper/output/lib -L${SRCDIR}/cwrapper/output/lib64 -lwrapper -lparquet -larrow -larrow_bundled_dependencies -lstdc++ -lm +#cgo LDFLAGS: -L${SRCDIR}/cwrapper/output/lib -lwrapper -lparquet -larrow -larrow_bundled_dependencies -lstdc++ -lm #include #include "ParquetWrapper.h" */ diff --git a/internal/util/metricsinfo/container_darwin.go b/internal/util/metricsinfo/container_darwin.go new file mode 100644 index 0000000000..18c32304e6 --- /dev/null +++ b/internal/util/metricsinfo/container_darwin.go @@ -0,0 +1,32 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +package metricsinfo + +import ( + "errors" +) + +// inContainer checks if the service is running inside a container +// It should be always false while under windows. +func inContainer() (bool, error) { + return false, nil +} + +// getContainerMemLimit returns memory limit and error +func getContainerMemLimit() (uint64, error) { + return 0, errors.New("Not supported") +} + +// getContainerMemUsed returns memory usage and error +func getContainerMemUsed() (uint64, error) { + return 0, errors.New("Not supported") +} diff --git a/scripts/core_build.sh b/scripts/core_build.sh index c84f3d4c78..cdf5087291 100755 --- a/scripts/core_build.sh +++ b/scripts/core_build.sh @@ -144,6 +144,20 @@ if [[ ${MAKE_CLEAN} == "ON" ]]; then exit 0 fi +unameOut="$(uname -s)" +case "${unameOut}" in + Darwin*) + llvm_prefix="$(brew --prefix llvm)" + export CLANG_TOOLS_PATH="${llvm_prefix}/bin" + export CC="${llvm_prefix}/bin/clang" + export CXX="${llvm_prefix}/bin/clang++" + export LDFLAGS="-L${llvm_prefix}/lib -L/usr/local/opt/libomp/lib" + export CXXFLAGS="-I${llvm_prefix}/include -I/usr/local/include -I/usr/local/opt/libomp/include" + ;; + *) echo "==System:${unameOut}"; +esac + + CMAKE_CMD="cmake \ -DBUILD_UNIT_TEST=${BUILD_UNITTEST} \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} diff --git a/scripts/cwrapper_build.sh b/scripts/cwrapper_build.sh index b437cbb0f9..d5bcfc7b56 100755 --- a/scripts/cwrapper_build.sh +++ b/scripts/cwrapper_build.sh @@ -80,6 +80,11 @@ ${CMAKE_CMD} echo ${CMAKE_CMD} if [[ ! ${jobs+1} ]]; then - jobs=$(nproc) + unameOut="$(uname -s)" + case "${unameOut}" in + Linux*) jobs=$(nproc);; + Darwin*) jobs=$(sysctl -n hw.physicalcpu);; + *) echo "UNKNOWN:${unameOut}" ; exit 0; + esac fi make -j ${jobs} && make install diff --git a/scripts/cwrapper_rocksdb_build.sh b/scripts/cwrapper_rocksdb_build.sh index 96812c0071..b613f4fc57 100755 --- a/scripts/cwrapper_rocksdb_build.sh +++ b/scripts/cwrapper_rocksdb_build.sh @@ -23,7 +23,6 @@ while [ -h $SOURCE ]; do # resolve $SOURCE until the file is no longer a symlink [[ $SOURCE != /* ]] && SOURCE=$DIR/$SOURCE # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located done DIR=$( cd -P $( dirname $SOURCE ) && pwd ) -# echo $DIR CMAKE_BUILD=${DIR}/../cwrapper_rocksdb_build @@ -67,23 +66,43 @@ echo "CUSTOM_THIRDPARTY_PATH: " $CUSTOM_THIRDPARTY_PATH pushd ${CMAKE_BUILD} CMAKE_CMD="cmake \ --DCMAKE_BUILD_TYPE=${BUILD_TYPE} +-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} \ -DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} ${SRC_DIR}" ${CMAKE_CMD} echo ${CMAKE_CMD} +unameOut="$(uname -s)" if [[ ! ${jobs+1} ]]; then - jobs=$(nproc) + case "${unameOut}" in + Linux*) jobs=$(nproc);; + Darwin*) jobs=$(sysctl -n hw.physicalcpu);; + *) echo "UNKNOWN:${unameOut}"; exit 0; + esac fi + make -j ${jobs} go env -w CGO_CFLAGS="-I${OUTPUT_LIB}/include" +ldflags="" if [ -f "${OUTPUT_LIB}/lib/librocksdb.a" ]; then - go env -w CGO_LDFLAGS="-L${OUTPUT_LIB}/lib -l:librocksdb.a -lstdc++ -lm -lz" + case "${unameOut}" in + Linux*) ldflags="-L${OUTPUT_LIB}/lib -l:librocksdb.a -lstdc++ -lm -lz";; + Darwin*) ldflags="-L${OUTPUT_LIB}/lib -lrocksdb -stdlib=libc++ -lm -lz -lbz2 -ldl";; + *) echo "UNKNOWN:${unameOut}"; exit 0; + esac else - go env -w CGO_LDFLAGS="-L${OUTPUT_LIB}/lib64 -l:librocksdb.a -lstdc++ -lm -lz" + case "${unameOut}" in + Linux*) ldflags="-L${OUTPUT_LIB}/lib64 -l:librocksdb.a -lstdc++ -lm -lz";; + Darwin*) ldflags="-L${OUTPUT_LIB}/lib64 -lrocksdb -stdlib=libc++ -lm -lz -lbz2 -ldl";; + *) echo "UNKNOWN:${unameOut}" ; exit 0; + esac fi +if [[ $(arch) == 'arm64' ]]; then + go env -w GOARCH=arm64 +fi + +go env -w CGO_LDFLAGS="$ldflags" && GO111MODULE=on go get github.com/tecbot/gorocksdb diff --git a/scripts/install_deps.sh b/scripts/install_deps.sh index b3910b4517..2fb4f9ca5c 100755 --- a/scripts/install_deps.sh +++ b/scripts/install_deps.sh @@ -16,38 +16,80 @@ # See the License for the specific language governing permissions and # limitations under the License. -if [[ -x "$(command -v apt)" ]]; then - # for Ubuntu 18.04 - sudo apt install -y g++ gcc make ccache libssl-dev zlib1g-dev libboost-regex-dev \ - libboost-program-options-dev libboost-system-dev libboost-filesystem-dev \ - libboost-serialization-dev python3-dev libboost-python-dev libcurl4-openssl-dev gfortran libtbb-dev -elif [[ -x "$(command -v yum)" ]]; then - # for CentOS 7 - sudo yum install -y epel-release centos-release-scl-rh && \ - sudo yum install -y git make automake ccache openssl-devel zlib-devel \ - libcurl-devel python3-devel \ - devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran \ - llvm-toolset-7.0-clang llvm-toolset-7.0-clang-tools-extra +function install_linux_deps() { + if [[ -x "$(command -v apt)" ]]; then + # for Ubuntu 18.04 + sudo apt install -y g++ gcc make ccache libssl-dev zlib1g-dev libboost-regex-dev \ + libboost-program-options-dev libboost-system-dev libboost-filesystem-dev \ + libboost-serialization-dev python3-dev libboost-python-dev libcurl4-openssl-dev gfortran libtbb-dev + elif [[ -x "$(command -v yum)" ]]; then + # for CentOS 7 + sudo yum install -y epel-release centos-release-scl-rh && \ + sudo yum install -y git make automake ccache openssl-devel zlib-devel \ + libcurl-devel python3-devel \ + devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran \ + llvm-toolset-7.0-clang llvm-toolset-7.0-clang-tools-extra - echo "source scl_source enable devtoolset-7" | sudo tee -a /etc/profile.d/devtoolset-7.sh - echo "source scl_source enable llvm-toolset-7.0" | sudo tee -a /etc/profile.d/llvm-toolset-7.sh - echo "export CLANG_TOOLS_PATH=/opt/rh/llvm-toolset-7.0/root/usr/bin" | sudo tee -a /etc/profile.d/llvm-toolset-7.sh - source "/etc/profile.d/llvm-toolset-7.sh" + echo "source scl_source enable devtoolset-7" | sudo tee -a /etc/profile.d/devtoolset-7.sh + echo "source scl_source enable llvm-toolset-7.0" | sudo tee -a /etc/profile.d/llvm-toolset-7.sh + echo "export CLANG_TOOLS_PATH=/opt/rh/llvm-toolset-7.0/root/usr/bin" | sudo tee -a /etc/profile.d/llvm-toolset-7.sh + source "/etc/profile.d/llvm-toolset-7.sh" - # Install tbb - git clone https://github.com/wjakob/tbb.git && \ - cd tbb/build && \ - cmake .. && make -j && \ - sudo make install && \ - cd ../../ && rm -rf tbb/ + # Install tbb + git clone https://github.com/wjakob/tbb.git && \ + cd tbb/build && \ + cmake .. && make -j && \ + sudo make install && \ + cd ../../ && rm -rf tbb/ - # Install boost - wget -q https://boostorg.jfrog.io/artifactory/main/release/1.65.1/source/boost_1_65_1.tar.gz && \ - tar zxf boost_1_65_1.tar.gz && cd boost_1_65_1 && \ - ./bootstrap.sh --prefix=/usr/local --with-toolset=gcc --without-libraries=python && \ - sudo ./b2 -j2 --prefix=/usr/local --without-python toolset=gcc install && \ - cd ../ && rm -rf ./boost_1_65_1* -else - echo "Error Install Dependencies ..." - exit 1 + # Install boost + wget -q https://boostorg.jfrog.io/artifactory/main/release/1.65.1/source/boost_1_65_1.tar.gz && \ + tar zxf boost_1_65_1.tar.gz && cd boost_1_65_1 && \ + ./bootstrap.sh --prefix=/usr/local --with-toolset=gcc --without-libraries=python && \ + sudo ./b2 -j2 --prefix=/usr/local --without-python toolset=gcc install && \ + cd ../ && rm -rf ./boost_1_65_1* + else + echo "Error Install Dependencies ..." + exit 1 + fi +} + +function install_mac_deps() { + sudo xcode-select --install > /dev/null 2>&1 + brew install boost libomp ninja tbb cmake llvm + brew uninstall grep + brew install grep + export PATH="/usr/local/opt/grep/libexec/gnubin:$PATH" + brew update && brew upgrade && brew cleanup + + if [[ $(arch) == 'arm64' ]]; then + sudo mkdir /usr/local/include + sudo mkdir /usr/local/opt + sudo ln -s "$(brew --prefix llvm)" "/usr/local/opt/llvm" + sudo ln -s "$(brew --prefix libomp)/include/omp.h" "/usr/local/include/omp.h" + sudo ln -s "$(brew --prefix libomp)" "/usr/local/opt/libomp" + sudo ln -s "$(brew --prefix boost)/include/boost" "/usr/local/include/boost" + sudo ln -s "$(brew --prefix tbb)/include/tbb" "/usr/local/include/tbb" + sudo ln -s "$(brew --prefix tbb)/include/oneapi" "/usr/local/include/oneapi" + fi +} + +if ! command -v go &> /dev/null +then + echo "go could not be found, please install it" + exit fi + +if ! command -v cmake &> /dev/null +then + echo "cmake could not be found, please install it" + exit +fi + +unameOut="$(uname -s)" +case "${unameOut}" in + Linux*) install_linux_deps;; + Darwin*) install_mac_deps;; + *) echo "Unsupported OS:${unameOut}" ; exit 0; +esac + diff --git a/scripts/run_go_codecov.sh b/scripts/run_go_codecov.sh index dfa2bc8102..7850d863a8 100755 --- a/scripts/run_go_codecov.sh +++ b/scripts/run_go_codecov.sh @@ -23,14 +23,26 @@ set -e echo "mode: atomic" > ${FILE_COVERAGE_INFO} # run unittest +# TODO: "-race" is temporarily disabled for Mac Silicon. Add back when available. echo "Running unittest under ./internal" -for d in $(go list ./internal... | grep -v vendor); do - go test -race -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" - if [ -f profile.out ]; then - sed '1d' profile.out >> ${FILE_COVERAGE_INFO} - rm profile.out - fi -done +if [[ "$(uname -s)" == "Darwin" ]]; then + export MallocNanoZone=0 + for d in $(go list ./internal... | grep -v -e vendor -e internal/querycoord -e /metricsinfo -e internal/proxy -e internal/querynode); do + go test -race -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" + if [ -f profile.out ]; then + sed '1d' profile.out >> ${FILE_COVERAGE_INFO} + rm profile.out + fi + done +else + for d in $(go list ./internal... | grep -v vendor); do + go test -race -v -coverpkg=./... -coverprofile=profile.out -covermode=atomic "$d" + if [ -f profile.out ]; then + sed '1d' profile.out >> ${FILE_COVERAGE_INFO} + rm profile.out + fi + done +fi # generate html report go tool cover -html=./${FILE_COVERAGE_INFO} -o ./${FILE_COVERAGE_HTML} diff --git a/scripts/run_go_unittest.sh b/scripts/run_go_unittest.sh index 77b14ffcb1..97fa154c89 100755 --- a/scripts/run_go_unittest.sh +++ b/scripts/run_go_unittest.sh @@ -27,6 +27,11 @@ while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symli done ROOT_DIR="$( cd -P "$( dirname "$SOURCE" )/.." && pwd )" +unameOut="$(uname -s)" +if [[ "$unameOut" == "Darwin" ]]; then + export MallocNanoZone=0 +fi + # ignore MinIO,S3 unittes MILVUS_DIR="${ROOT_DIR}/internal/" echo "Running go unittest under $MILVUS_DIR" @@ -49,11 +54,16 @@ go test -race -cover "${MILVUS_DIR}/util/typeutil/..." -failfast #go test -race -cover "${MILVUS_DIR}/proxy/..." -failfast go test -race -cover "${MILVUS_DIR}/datanode/..." -failfast go test -race -cover "${MILVUS_DIR}/indexnode/..." -failfast -go test -race -cover "${MILVUS_DIR}/querynode/..." -failfast -go test -race -cover -v "${MILVUS_DIR}/distributed/rootcoord" -failfast -go test -race -cover -v "${MILVUS_DIR}/rootcoord" -failfast -go test -race -cover -v "${MILVUS_DIR}/datacoord/..." -failfast -go test -race -cover -v "${MILVUS_DIR}/indexcoord/..." -failfast +# TODO: enable ut on mac os +case "${unameOut}" in + Linux*) go test -race -cover "${MILVUS_DIR}/querynode/..." -failfast;; + *) echo "Skip querynode unit tests, unsupported os:${unameOut}"; +esac + +go test -race -cover "${MILVUS_DIR}/distributed/rootcoord" -failfast +go test -race -cover "${MILVUS_DIR}/rootcoord" -failfast +go test -race -cover "${MILVUS_DIR}/datacoord/..." -failfast +go test -race -cover "${MILVUS_DIR}/indexcoord/..." -failfast echo " Go unittest finished"