enhance: change opendal as compile configurable (#30384)

#30373

Signed-off-by: luzhang <luzhang@zilliz.com>
Co-authored-by: luzhang <luzhang@zilliz.com>
This commit is contained in:
zhagnlu 2024-02-20 19:16:52 +08:00 committed by GitHub
parent f4559cbe54
commit 976b6fc0e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 87 additions and 85 deletions

View File

@ -34,6 +34,11 @@ use_dynamic_simd = ON
ifdef USE_DYNAMIC_SIMD
use_dynamic_simd = ${USE_DYNAMIC_SIMD}
endif
use_opendal = OFF
ifdef USE_OPENDAL
use_opendal = ${USE_OPENDAL}
endif
# golangci-lint
GOLANGCI_LINT_VERSION := 1.55.2
GOLANGCI_LINT_OUTPUT := $(shell $(INSTALL_PATH)/golangci-lint --version 2>/dev/null)
@ -197,7 +202,7 @@ download-milvus-proto:
build-3rdparty:
@echo "Build 3rdparty ..."
@(env bash $(PWD)/scripts/3rdparty_build.sh)
@(env bash $(PWD)/scripts/3rdparty_build.sh -o ${use_opendal})
generated-proto-without-cpp: download-milvus-proto
@echo "Generate proto ..."
@ -213,19 +218,19 @@ generated-proto: download-milvus-proto build-3rdparty
build-cpp: generated-proto
@echo "Building Milvus cpp library ..."
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine})
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine} -o ${use_opendal})
build-cpp-gpu: generated-proto
@echo "Building Milvus cpp gpu library ... "
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -g -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine})
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -g -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine} -o ${use_opendal})
build-cpp-with-unittest: generated-proto
@echo "Building Milvus cpp library with unittest ... "
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -u -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine})
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -u -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine} -o ${use_opendal})
build-cpp-with-coverage: generated-proto
@echo "Building Milvus cpp library with coverage and unittest ..."
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -a ${use_asan} -u -c -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine})
@(env bash $(PWD)/scripts/core_build.sh -t ${mode} -a ${use_asan} -u -c -n ${use_disk_index} -y ${use_dynamic_simd} ${AZURE_OPTION} -x ${index_engine} -o ${use_opendal})
check-proto-product: generated-proto
@(env bash $(PWD)/scripts/check_proto_product.sh)

View File

@ -33,6 +33,10 @@ if ( USE_DYNAMIC_SIMD )
add_definitions(-DUSE_DYNAMIC_SIMD)
endif()
if (USE_OPENDAL)
add_definitions(-DUSE_OPENDAL)
endif()
project(core)
include(CheckCXXCompilerFlag)
if ( APPLE )

View File

@ -35,7 +35,6 @@ target_link_libraries(milvus_common
yaml-cpp
boost_bitset_ext
simdjson
opendal
${CONAN_LIBS}
re2
)

View File

@ -197,11 +197,7 @@ template <typename T>
void
InvertedIndexTantivy<T>::BuildV2(const Config& config) {
auto field_name = mem_file_manager_->GetIndexMeta().field_name;
auto res = space_->ScanData();
if (!res.ok()) {
PanicInfo(S3Error, "failed to create scan iterator");
}
auto reader = res.value();
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {

View File

@ -63,11 +63,7 @@ ScalarIndexSort<T>::BuildV2(const Config& config) {
return;
}
auto field_name = file_manager_->GetIndexMeta().field_name;
auto res = space_->ScanData();
if (!res.ok()) {
PanicInfo(S3Error, "failed to create scan iterator");
}
auto reader = res.value();
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {

View File

@ -74,11 +74,7 @@ StringIndexMarisa::BuildV2(const Config& config) {
throw std::runtime_error("index has been built");
}
auto field_name = file_manager_->GetIndexMeta().field_name;
auto res = space_->ScanData();
if (!res.ok()) {
PanicInfo(S3Error, "failed to create scan iterator");
}
auto reader = res.value();
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {

View File

@ -422,14 +422,7 @@ VectorMemIndex<T>::BuildV2(const Config& config) {
auto field_name = create_index_info_.field_name;
auto field_type = create_index_info_.field_type;
auto dim = create_index_info_.dim;
auto res = space_->ScanData();
if (!res.ok()) {
PanicInfo(IndexBuildError,
"failed to create scan iterator: {}",
res.status().ToString());
}
auto reader = res.value();
auto reader = space_->ScanData();
std::vector<FieldDataPtr> field_datas;
for (auto rec : *reader) {
if (!rec.ok()) {

View File

@ -719,12 +719,8 @@ void
LoadFieldDatasFromRemote2(std::shared_ptr<milvus_storage::Space> space,
SchemaPtr schema,
FieldDataInfo& field_data_info) {
auto res = space->ScanData();
auto reader = space->ScanData();
if (!res.ok()) {
PanicInfo(S3Error, "failed to create scan iterator");
}
auto reader = res.value();
for (auto rec = reader->Next(); rec != nullptr; rec = reader->Next()) {
if (!rec.ok()) {
PanicInfo(DataFormatBroken, "failed to read data");

View File

@ -49,7 +49,6 @@ set(STORAGE_FILES
storage_c.cpp
ChunkManager.cpp
MinioChunkManager.cpp
OpenDALChunkManager.cpp
AliyunSTSClient.cpp
AliyunCredentialsProvider.cpp
MemFileManagerImpl.cpp
@ -60,6 +59,10 @@ set(STORAGE_FILES
TencentCloudCredentialsProvider.cpp
TencentCloudSTSClient.cpp)
if(USE_OPENDAL)
list(APPEND STORAGE_FILES OpenDALChunkManager.cpp)
endif()
add_library(milvus_storage SHARED ${STORAGE_FILES})
if (DEFINED AZURE_BUILD_DIR)

View File

@ -394,13 +394,7 @@ DiskFileManagerImpl::CacheRawDataToDisk(
uint32_t num_rows = 0;
uint32_t dim = 0;
int64_t write_offset = sizeof(num_rows) + sizeof(dim);
auto res = space->ScanData();
if (!res.ok()) {
PanicInfo(IndexBuildError,
fmt::format("failed to create scan iterator: {}",
res.status().ToString()));
}
auto reader = res.value();
auto reader = space->ScanData();
for (auto rec : *reader) {
if (!rec.ok()) {
PanicInfo(IndexBuildError,
@ -682,13 +676,7 @@ DiskFileManagerImpl::CacheOptFieldToDisk(
WriteOptFieldsIvfMeta(
local_chunk_manager, local_data_path, num_of_fields, write_offset);
auto res = space->ScanData();
if (!res.ok()) {
PanicInfo(IndexBuildError,
fmt::format("failed to create scan iterator: {}",
res.status().ToString()));
}
auto reader = res.value();
auto reader = space->ScanData();
for (auto& [field_id, tup] : fields_map) {
const auto& field_name = std::get<0>(tup);
const auto& field_type = std::get<1>(tup);

View File

@ -20,7 +20,6 @@
#include <shared_mutex>
#include "storage/Util.h"
#include "opendal.h"
namespace milvus::storage {

View File

@ -35,7 +35,9 @@
#include "storage/LocalChunkManager.h"
#include "storage/MemFileManagerImpl.h"
#include "storage/MinioChunkManager.h"
#ifdef USE_OPENDAL
#include "storage/OpenDALChunkManager.h"
#endif
#include "storage/Types.h"
#include "storage/ThreadPools.h"
#include "storage/Util.h"
@ -686,10 +688,11 @@ CreateChunkManager(const StorageConfig& storage_config) {
}
}
}
#ifdef USE_OPENDAL
case ChunkManagerType::OpenDAL: {
return std::make_shared<OpenDALChunkManager>(storage_config);
}
#endif
default: {
PanicInfo(ConfigInvalid,
"unsupported storage_config.storage_type {}",

View File

@ -36,7 +36,9 @@ add_subdirectory(boost_ext)
add_subdirectory(rocksdb)
add_subdirectory(rdkafka)
add_subdirectory(simdjson)
if (USE_OPENDAL)
add_subdirectory(opendal)
endif()
add_subdirectory(tantivy)
add_subdirectory(milvus-storage)

View File

@ -11,7 +11,7 @@
# or implied. See the License for the specific language governing permissions and limitations under the License.
#-------------------------------------------------------------------------------
set( MILVUS_STORAGE_VERSION 4a9a35e)
set( MILVUS_STORAGE_VERSION 9d1ad9c)
message(STATUS "Building milvus-storage-${MILVUS_STORAGE_VERSION} from source")
message(STATUS ${CMAKE_BUILD_TYPE})

View File

@ -4,6 +4,11 @@ project(milvus-storage VERSION 0.1.0)
option(WITH_UT "Build the testing tree." ON)
option(WITH_ASAN "Build with address sanitizer." OFF)
option(USE_OPENDAL "Build with opendal." OFF)
if (USE_OPENDAL)
add_compile_definitions(MILVUS_OPENDAL)
endif()
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@ -18,7 +23,10 @@ file(GLOB_RECURSE SRC_FILES src/*.cpp src/*.cc)
message(STATUS "SRC_FILES: ${SRC_FILES}")
add_library(milvus-storage ${SRC_FILES})
target_include_directories(milvus-storage PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/milvus-storage ${CMAKE_CURRENT_SOURCE_DIR}/src)
target_link_libraries(milvus-storage PUBLIC arrow::arrow arrow::parquet Boost::boost protobuf::protobuf AWS::aws-sdk-cpp-core glog::glog opendal)
target_link_libraries(milvus-storage PUBLIC arrow::arrow arrow::parquet Boost::boost protobuf::protobuf AWS::aws-sdk-cpp-core glog::glog)
if (USE_OPENDAL)
target_link_libraries(milvus-storage PUBLIC opendal)
endif()
if (WITH_UT)
enable_testing()

View File

@ -271,10 +271,8 @@ PrepareInsertDataSpace()
arrow::field(kOptFieldName, arrow::int64()),
arrow::field("vec", arrow::fixed_size_binary(1))};
auto arrow_schema = std::make_shared<arrow::Schema>(arrow_fields);
auto schema_options = std::make_shared<milvus_storage::SchemaOptions>();
schema_options->primary_column = "pk";
schema_options->version_column = "ts";
schema_options->vector_column = "vec";
milvus_storage::SchemaOptions schema_options = {
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
auto schema =
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
boost::filesystem::remove_all(path);
@ -311,11 +309,11 @@ PrepareInsertDataSpace()
arrow::RecordBatch::Make(arrow_schema,
kEntityCnt,
{pk_array, ts_array, scalar_array, vec_array});
auto write_opt = milvus_storage::WriteOption{kEntityCnt};
space->Write(arrow::RecordBatchReader::Make({batch}, arrow_schema)
milvus_storage::WriteOption write_opt = {kEntityCnt};
space->Write(*arrow::RecordBatchReader::Make({batch}, arrow_schema)
.ValueOrDie()
.get(),
&write_opt);
write_opt);
return {path, std::move(space)};
}

View File

@ -296,10 +296,8 @@ TestSpace(boost::filesystem::path& temp_path,
GeneratedData& dataset,
std::vector<T>& scalars) {
auto arrow_schema = TestSchema<T>(vec_size);
auto schema_options = std::make_shared<milvus_storage::SchemaOptions>();
schema_options->primary_column = "pk";
schema_options->version_column = "ts";
schema_options->vector_column = "vec";
milvus_storage::SchemaOptions schema_options{
.primary_column = "pk", .version_column = "ts", .vector_column = "vec"};
auto schema =
std::make_shared<milvus_storage::Schema>(arrow_schema, schema_options);
EXPECT_TRUE(schema->Validate().ok());
@ -312,7 +310,7 @@ TestSpace(boost::filesystem::path& temp_path,
auto space = std::move(space_res.value());
auto rec = TestRecords<T>(vec_size, dataset, scalars);
auto write_opt = milvus_storage::WriteOption{nb};
space->Write(rec.get(), &write_opt);
space->Write(*rec, write_opt);
return std::move(space);
}

View File

@ -403,10 +403,9 @@ class StringIndexMarisaTestV2 : public StringIndexBaseTest {
GeneratedData& dataset,
std::vector<std::string>& scalars) {
auto arrow_schema = TestSchema(vec_size);
auto schema_options = std::make_shared<milvus_storage::SchemaOptions>();
schema_options->primary_column = "pk";
schema_options->version_column = "ts";
schema_options->vector_column = "vec";
milvus_storage::SchemaOptions schema_options{.primary_column = "pk",
.version_column = "ts",
.vector_column = "vec"};
auto schema = std::make_shared<milvus_storage::Schema>(arrow_schema,
schema_options);
EXPECT_TRUE(schema->Validate().ok());
@ -419,7 +418,7 @@ class StringIndexMarisaTestV2 : public StringIndexBaseTest {
auto space = std::move(space_res.value());
auto rec = TestRecords(vec_size, dataset, scalars);
auto write_opt = milvus_storage::WriteOption{nb};
space->Write(rec.get(), &write_opt);
space->Write(*rec, write_opt);
return std::move(space);
}
void

View File

@ -22,6 +22,16 @@ while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symli
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
BUILD_OPENDAL="OFF"
while getopts "o:" arg; do
case $arg in
o)
BUILD_OPENDAL=$OPTARG
;;
esac
done
ROOT_DIR="$( cd -P "$( dirname "$SOURCE" )/.." && pwd )"
CPP_SRC_DIR="${ROOT_DIR}/internal/core"
BUILD_OUTPUT_DIR="${ROOT_DIR}/cmake_build"
@ -61,10 +71,10 @@ esac
popd
pushd ${ROOT_DIR}/cmake_build/thirdparty
mkdir -p ${ROOT_DIR}/internal/core/output/lib
mkdir -p ${ROOT_DIR}/internal/core/output/include
git clone --depth=1 --branch v0.43.0-rc.2 https://github.com/apache/opendal.git opendal
cd opendal
pushd ${ROOT_DIR}/cmake_build/thirdparty
if command -v cargo >/dev/null 2>&1; then
echo "cargo exists"
unameOut="$(uname -s)"
@ -82,12 +92,15 @@ else
bash -c "curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=1.73 -y" || { echo 'rustup install failed'; exit 1;}
source $HOME/.cargo/env
fi
echo "BUILD_OPENDAL: ${BUILD_OPENDAL}"
if [ "${BUILD_OPENDAL}" = "ON" ]; then
git clone --depth=1 --branch v0.43.0-rc.2 https://github.com/apache/opendal.git opendal
cd opendal
pushd bindings/c
cargo +1.73 build --release --verbose || { echo 'opendal_c build failed'; exit 1; }
popd
mkdir -p ${ROOT_DIR}/internal/core/output/lib
mkdir -p ${ROOT_DIR}/internal/core/output/include
cp target/release/libopendal_c.a ${ROOT_DIR}/internal/core/output/lib/libopendal_c.a
cp bindings/c/include/opendal.h ${ROOT_DIR}/internal/core/output/include/opendal.h
fi
popd

View File

@ -99,9 +99,10 @@ EMBEDDED_MILVUS="OFF"
BUILD_DISK_ANN="OFF"
USE_ASAN="OFF"
USE_DYNAMIC_SIMD="ON"
USE_OPENDAL="OFF"
INDEX_ENGINE="KNOWHERE"
while getopts "p:d:t:s:f:n:i:y:a:x:ulrcghzmebZ" arg; do
while getopts "p:d:t:s:f:n:i:y:a:x:o:ulrcghzmebZ" arg; do
case $arg in
p)
INSTALL_PREFIX=$OPTARG
@ -148,6 +149,9 @@ while getopts "p:d:t:s:f:n:i:y:a:x:ulrcghzmebZ" arg; do
x)
INDEX_ENGINE=$OPTARG
;;
o)
USE_OPENDAL=$OPTARG
;;
h) # help
echo "
@ -164,10 +168,11 @@ parameter:
-b: build embedded milvus(default: OFF)
-a: build milvus with AddressSanitizer(default: false)
-Z: build milvus without azure-sdk-for-cpp, so cannot use azure blob
-o: build milvus with opendal(default: false)
-h: help
usage:
./core_build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} -s \${CUDA_ARCH} [-u] [-l] [-r] [-c] [-z] [-g] [-m] [-e] [-h] [-b]
./core_build.sh -p \${INSTALL_PREFIX} -t \${BUILD_TYPE} -s \${CUDA_ARCH} [-u] [-l] [-r] [-c] [-z] [-g] [-m] [-e] [-h] [-b] [-o]
"
exit 0
;;
@ -246,6 +251,7 @@ ${CMAKE_EXTRA_ARGS} \
-DUSE_ASAN=${USE_ASAN} \
-DUSE_DYNAMIC_SIMD=${USE_DYNAMIC_SIMD} \
-DCPU_ARCH=${CPU_ARCH} \
-DUSE_OPENDAL=${USE_OPENDAL} \
-DINDEX_ENGINE=${INDEX_ENGINE} "
if [ -z "$BUILD_WITHOUT_AZURE" ]; then
CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \