enhance: [StorageV2] enable build with azure (#44177)

related: #43869

---------

Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
This commit is contained in:
sthuang 2025-09-14 08:05:58 +08:00 committed by GitHub
parent 9228ed7b8f
commit b38013352d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 22 additions and 284 deletions

View File

@ -71,6 +71,7 @@ class MilvusConan(ConanFile):
"arrow:with_jemalloc": True,
"arrow:with_openssl": True,
"arrow:shared": False,
"arrow:with_azure": True,
"arrow:with_s3": True,
"arrow:encryption": True,
"aws-sdk-cpp:config": True,

View File

@ -85,7 +85,8 @@ if(USE_OPENDAL)
endif()
if(DEFINED AZURE_BUILD_DIR)
set(LINK_TARGETS ${LINK_TARGETS} azure_blob_chunk_manager)
# Arrow already includes Azure SDK when built with_azure=True
# No need to link additional azure_blob_chunk_manager
endif()
if (ENABLE_GCP_NATIVE)

View File

@ -23,19 +23,7 @@ endif()
if (DEFINED AZURE_BUILD_DIR)
add_definitions(-DAZURE_BUILD_DIR)
include_directories(azure-blob-storage)
include_directories("${AZURE_BUILD_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include")
set(SOURCE_FILES ${SOURCE_FILES} azure/AzureChunkManager.cpp)
add_library(azure_blob_chunk_manager SHARED IMPORTED)
set_target_properties(azure_blob_chunk_manager
PROPERTIES
IMPORTED_GLOBAL TRUE
IMPORTED_LOCATION "${AZURE_BUILD_DIR}/libblob-chunk-manager${CMAKE_SHARED_LIBRARY_SUFFIX}"
)
get_target_property(AZURE_IMPORTED_LOCATION azure_blob_chunk_manager IMPORTED_LOCATION)
get_target_property(AZURE_INTERFACE_INCLUDE_DIRECTORIES azure_blob_chunk_manager INTERFACE_INCLUDE_DIRECTORIES)
message("AZURE_IMPORTED_LOCATION: ${AZURE_IMPORTED_LOCATION}")
message("AZURE_INTERFACE_INCLUDE_DIRECTORIES: ${AZURE_INTERFACE_INCLUDE_DIRECTORIES}")
add_subdirectory(azure)
endif()
if(USE_OPENDAL)

View File

@ -1,43 +0,0 @@
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License
# Copyright (c) Microsoft Corporation. All rights reserved.
# SPDX-License-Identifier: MIT
cmake_minimum_required (VERSION 3.12)
set(CMAKE_CXX_STANDARD 17)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake-modules")
message("${CMAKE_CURRENT_SOURCE_DIR}")
include(AzureVcpkg)
az_vcpkg_integrate()
project(azure-blob-storage)
find_program(NUGET_EXE NAMES nuget)
if(NOT NUGET_EXE)
message(FATAL "CMake could not find the nuget command line tool. Please install it from https://www.nuget.org/downloads!")
else()
exec_program(${NUGET_EXE}
ARGS install "Microsoft.Attestation.Client" -Version 0.1.181 -ExcludeVersion -OutputDirectory ${CMAKE_BINARY_DIR}/packages)
endif()
find_package(azure-storage-blobs-cpp CONFIG REQUIRED)
find_package(azure-identity-cpp CONFIG REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-return-type -Wno-pedantic -fPIC")
add_library(blob-chunk-manager SHARED AzureBlobChunkManager.cpp)
target_link_libraries(blob-chunk-manager PUBLIC Azure::azure-identity Azure::azure-storage-blobs)
# should be link directly into libmilvus_core in future.
install(TARGETS blob-chunk-manager DESTINATION "${CMAKE_INSTALL_LIBDIR}")
if ( BUILD_UNIT_TEST STREQUAL "ON" )
add_subdirectory(test)
endif ()

View File

@ -1,171 +0,0 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# We need to know an absolute path to our repo root to do things like referencing ./LICENSE.txt file.
set(AZ_ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/..")
macro(az_vcpkg_integrate)
message("Vcpkg integrate step.")
# AUTO CMAKE_TOOLCHAIN_FILE:
# User can call `cmake -DCMAKE_TOOLCHAIN_FILE="path_to_the_toolchain"` as the most specific scenario.
# As the last alternative (default case), Azure SDK will automatically clone VCPKG folder and set toolchain from there.
if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
message("CMAKE_TOOLCHAIN_FILE is not defined. Define it for the user.")
# Set AZURE_SDK_DISABLE_AUTO_VCPKG env var to avoid Azure SDK from cloning and setting VCPKG automatically
# This option delegate package's dependencies installation to user.
if(NOT DEFINED ENV{AZURE_SDK_DISABLE_AUTO_VCPKG})
message("AZURE_SDK_DISABLE_AUTO_VCPKG is not defined. Fetch a local copy of vcpkg.")
# GET VCPKG FROM SOURCE
# User can set env var AZURE_SDK_VCPKG_COMMIT to pick the VCPKG commit to fetch
set(VCPKG_COMMIT_STRING 8150939b69720adc475461978e07c2d2bf5fb76e) # default SDK tested commit
if(DEFINED ENV{AZURE_SDK_VCPKG_COMMIT})
message("AZURE_SDK_VCPKG_COMMIT is defined. Using that instead of the default.")
set(VCPKG_COMMIT_STRING "$ENV{AZURE_SDK_VCPKG_COMMIT}") # default SDK tested commit
endif()
message("Vcpkg commit string used: ${VCPKG_COMMIT_STRING}")
include(FetchContent)
FetchContent_Declare(
vcpkg
GIT_REPOSITORY https://github.com/microsoft/vcpkg.git
GIT_TAG ${VCPKG_COMMIT_STRING}
)
FetchContent_GetProperties(vcpkg)
# make sure to pull vcpkg only once.
if(NOT vcpkg_POPULATED)
FetchContent_Populate(vcpkg)
endif()
# use the vcpkg source path
set(CMAKE_TOOLCHAIN_FILE "${vcpkg_SOURCE_DIR}/scripts/buildsystems/vcpkg.cmake" CACHE STRING "")
endif()
endif()
# enable triplet customization
if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET} AND NOT DEFINED VCPKG_TARGET_TRIPLET)
set(VCPKG_TARGET_TRIPLET "$ENV{VCPKG_DEFAULT_TRIPLET}" CACHE STRING "")
endif()
message("Vcpkg integrate step - DONE.")
endmacro()
macro(az_vcpkg_portfile_prep targetName fileName contentToRemove)
# with sdk/<lib>/vcpkg/<fileName>
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/${fileName}" fileContents)
# Windows -> Unix line endings
string(FIND fileContents "\r\n" crLfPos)
if (crLfPos GREATER -1)
string(REPLACE "\r\n" "\n" fileContents ${fileContents})
endif()
# remove comment header
string(REPLACE "${contentToRemove}" "" fileContents ${fileContents})
# undo Windows -> Unix line endings (if applicable)
if (crLfPos GREATER -1)
string(REPLACE "\n" "\r\n" fileContents ${fileContents})
endif()
unset(crLfPos)
# output to an intermediate location
file (WRITE "${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}" ${fileContents})
unset(fileContents)
# Produce the files to help with the vcpkg release.
# Go to the /out/build/<cfg>/vcpkg directory, and copy (merge) "ports" folder to the vcpkg repo.
# Then, update the portfile.cmake file SHA512 from "1" to the actual hash (a good way to do it is to uninstall a package,
# clean vcpkg/downloads, vcpkg/buildtrees, run "vcpkg install <pkg>", and get the SHA from the error message).
configure_file(
"${CMAKE_BINARY_DIR}/vcpkg_prep/${targetName}/${fileName}"
"${CMAKE_BINARY_DIR}/vcpkg/ports/${targetName}-cpp/${fileName}"
@ONLY
)
endmacro()
macro(az_vcpkg_export targetName macroNamePart dllImportExportHeaderPath)
foreach(vcpkgFile "vcpkg.json" "portfile.cmake")
az_vcpkg_portfile_prep(
"${targetName}"
"${vcpkgFile}"
"# Copyright (c) Microsoft Corporation.\n# Licensed under the MIT License.\n\n"
)
endforeach()
# Standard names for folders such as "bin", "lib", "include". We could hardcode, but some other libs use it too (curl).
include(GNUInstallDirs)
# When installing, copy our "inc" directory (headers) to "include" directory at the install location.
install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/azure/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/azure")
# Copy license as "copyright" (vcpkg dictates naming and location).
install(FILES "${AZ_ROOT_DIR}/LICENSE.txt" DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" RENAME "copyright")
# Indicate where to install targets. Mirrors what other ports do.
install(
TARGETS "${targetName}"
EXPORT "${targetName}-cppTargets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} # DLLs (if produced by build) go to "/bin"
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} # static .lib files
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} # .lib files for DLL build
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # headers
)
# If building a Windows DLL, patch the dll_import_export.hpp
if(WIN32 AND BUILD_SHARED_LIBS)
add_compile_definitions(AZ_${macroNamePart}_BEING_BUILT)
target_compile_definitions(${targetName} PUBLIC AZ_${macroNamePart}_DLL)
set(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE "*/ + 1 /*")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/inc/${dllImportExportHeaderPath}"
"${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}"
@ONLY
)
unset(AZ_${macroNamePart}_DLL_INSTALLED_AS_PACKAGE)
get_filename_component(dllImportExportHeaderDir ${dllImportExportHeaderPath} DIRECTORY)
install(
FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderPath}"
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${dllImportExportHeaderDir}"
)
unset(dllImportExportHeaderDir)
endif()
# Export the targets file itself.
install(
EXPORT "${targetName}-cppTargets"
DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp"
NAMESPACE Azure:: # Not the C++ namespace, but a namespace in terms of cmake.
FILE "${targetName}-cppTargets.cmake"
)
# configure_package_config_file(), write_basic_package_version_file()
include(CMakePackageConfigHelpers)
# Produce package config file.
configure_package_config_file(
"${CMAKE_CURRENT_SOURCE_DIR}/vcpkg/Config.cmake.in"
"${targetName}-cppConfig.cmake"
INSTALL_DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp"
PATH_VARS
CMAKE_INSTALL_LIBDIR)
# Produce version file.
write_basic_package_version_file(
"${targetName}-cppConfigVersion.cmake"
VERSION ${AZ_LIBRARY_VERSION} # the version that we extracted from package_version.hpp
COMPATIBILITY SameMajorVersion
)
# Install package config and version files.
install(
FILES
"${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/${targetName}-cppConfigVersion.cmake"
DESTINATION
"${CMAKE_INSTALL_DATAROOTDIR}/${targetName}-cpp" # to shares/<our_pkg>
)
# Export all the installs above as package.
export(PACKAGE "${targetName}-cpp")
endmacro()

View File

@ -1,9 +0,0 @@
{
"name": "azure-blob-storage",
"version-string": "1.0.0",
"dependencies": [
"azure-identity-cpp",
"azure-storage-blobs-cpp",
"gtest"
]
}

View File

@ -9,7 +9,7 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include "../AzureBlobChunkManager.h"
#include "AzureBlobChunkManager.h"
#include <azure/identity/workload_identity_credential.hpp>
#include <gtest/gtest.h>

View File

@ -21,7 +21,7 @@
#include "common/EasyAssert.h"
#include "log/Log.h"
#include "monitor/Monitor.h"
#include "storage/azure/AzureChunkManager.h"
#include "AzureChunkManager.h"
namespace milvus {
namespace storage {

View File

@ -20,7 +20,7 @@
#include <stdlib.h>
#include <string>
#include <vector>
#include "storage/azure-blob-storage/AzureBlobChunkManager.h"
#include "AzureBlobChunkManager.h"
#include "storage/ChunkManager.h"
#include "storage/Types.h"
#include "log/Log.h"

View File

@ -14,7 +14,7 @@
#include <vector>
#include "common/EasyAssert.h"
#include "storage/azure/AzureChunkManager.h"
#include "AzureChunkManager.h"
#include "storage/Util.h"
using namespace std;

View File

@ -9,11 +9,14 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License
# Copyright (c) Microsoft Corporation. All rights reserved.
# SPDX-License-Identifier: MIT
# Use Arrow's Azure support instead of vcpkg
set(AZURE_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/AzureChunkManager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/AzureBlobChunkManager.cpp
)
project(azure-blob-test)
# Add Azure source files to parent's SOURCE_FILES
set(SOURCE_FILES ${SOURCE_FILES} ${AZURE_SRCS} PARENT_SCOPE)
add_executable(azure-blob-test test_azure_blob_chunk_manager.cpp ../AzureBlobChunkManager.cpp)
find_package(GTest CONFIG REQUIRED)
target_link_libraries(azure-blob-test PRIVATE Azure::azure-identity Azure::azure-storage-blobs GTest::gtest blob-chunk-manager)
# Include directories for Azure SDK from Arrow's dependencies
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -75,9 +75,10 @@ if (NOT (LINUX OR APPLE))
endif()
if (DEFINED AZURE_BUILD_DIR)
include_directories("${AZURE_BUILD_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include")
add_definitions(-DAZURE_BUILD_DIR)
else()
list(FILTER MILVUS_TEST_FILES EXCLUDE REGEX "AzureChunkManagerTest\\.cpp$")
list(FILTER MILVUS_TEST_FILES EXCLUDE REGEX "AzureBlobChunkManagerTest\\.cpp$")
endif()
# need update aws-sdk-cpp, see more from https://github.com/aws/aws-sdk-cpp/issues/2119
# once done, move this line to the else branch of `if (DEFINED AZURE_BUILD_DIR)`

View File

@ -102,7 +102,7 @@ USE_DYNAMIC_SIMD="ON"
USE_OPENDAL="OFF"
TANTIVY_FEATURES=""
INDEX_ENGINE="KNOWHERE"
ENABLE_AZURE_FS="OFF"
ENABLE_AZURE_FS="ON"
: "${ENABLE_GCP_NATIVE:="OFF"}"
while getopts "p:t:s:n:a:y:x:o:f:ulcgbZh" arg; do
@ -189,37 +189,7 @@ usage:
esac
done
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
AZURE_BUILD_DIR="${ROOT_DIR}/cmake_build/azure"
if [ ! -d ${AZURE_BUILD_DIR} ]; then
mkdir -p ${AZURE_BUILD_DIR}
fi
pushd ${AZURE_BUILD_DIR}
env bash ${ROOT_DIR}/scripts/azure_build.sh -p ${INSTALL_PREFIX} -s ${ROOT_DIR}/internal/core/src/storage/azure-blob-storage -t ${BUILD_UNITTEST}
if [ ! -e libblob-chunk-manager* ]; then
echo "build blob-chunk-manager fail..."
cat vcpkg-bootstrap.log
exit 1
fi
if [ ! -e ${INSTALL_PREFIX}/lib/libblob-chunk-manager* ]; then
echo "install blob-chunk-manager fail..."
exit 1
fi
popd
SYSTEM_NAME=$(uname -s)
if [[ ${SYSTEM_NAME} == "Darwin" ]]; then
SYSTEM_NAME="osx"
elif [[ ${SYSTEM_NAME} == "Linux" ]]; then
SYSTEM_NAME="linux"
fi
ARCHITECTURE=$(uname -m)
if [[ ${ARCHITECTURE} == "x86_64" ]]; then
ARCHITECTURE="x64"
elif [[ ${ARCHITECTURE} == "aarch64" ]]; then
ARCHITECTURE="arm64"
fi
VCPKG_TARGET_TRIPLET=${ARCHITECTURE}-${SYSTEM_NAME}
fi
# Azure SDK build has been removed as we now use Arrow with Azure support directly
if [[ ! -d ${BUILD_OUTPUT_DIR} ]]; then
mkdir ${BUILD_OUTPUT_DIR}
@ -266,10 +236,7 @@ ${CMAKE_EXTRA_ARGS} \
-DTANTIVY_FEATURES_LIST=${TANTIVY_FEATURES} \
-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} \
-DENABLE_AZURE_FS=${ENABLE_AZURE_FS} "
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} "
fi
# Azure build variables removed as we now use Arrow with Azure support directly
CMAKE_CMD=${CMAKE_CMD}"${CPP_SRC_DIR}"
echo "CC $CC"