From bb486c0db34e6e4d800db6e11514c5a425c7a0a6 Mon Sep 17 00:00:00 2001 From: "cai.zhang" Date: Wed, 10 Dec 2025 13:53:13 +0800 Subject: [PATCH] fix: Fix path concatenation error when rootPath = "." in minio (#46220) issue: #46219 --------- Signed-off-by: Cai Zhang --- internal/core/src/storage/FileManager.h | 20 ++++++- internal/core/src/storage/Util.cpp | 12 ++-- internal/core/unittest/test_storage.cpp | 73 +++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/internal/core/src/storage/FileManager.h b/internal/core/src/storage/FileManager.h index b9f82529db..2dbf58776f 100644 --- a/internal/core/src/storage/FileManager.h +++ b/internal/core/src/storage/FileManager.h @@ -31,6 +31,20 @@ namespace milvus::storage { +// Normalize path to be consistent with Go's path.Join behavior. +// This handles two issues: +// 1. Removes leading "./" when root_path is "." +// 2. Removes trailing "/." that lexically_normal() may produce +inline std::string +NormalizePath(const boost::filesystem::path& path) { + auto result = path.lexically_normal().string(); + // Remove trailing "/." if present + if (result.size() >= 2 && result.substr(result.size() - 2) == "/.") { + result = result.substr(0, result.size() - 1); + } + return result; +} + struct FileManagerContext { FileManagerContext() : chunkManagerPtr(nullptr) { } @@ -179,7 +193,7 @@ class FileManagerImpl : public milvus::FileManager { std::to_string(index_meta_.index_version) + "/" + std::to_string(field_meta_.partition_id) + "/" + std::to_string(field_meta_.segment_id); - return (prefix / path / path1).string(); + return NormalizePath(prefix / path / path1); } virtual std::string @@ -198,7 +212,7 @@ class FileManagerImpl : public milvus::FileManager { if (bucket.empty()) { return v1_prefix; } else { - return (bucket / v1_prefix).string(); + return NormalizePath(bucket / v1_prefix); } } @@ -213,7 +227,7 @@ class FileManagerImpl : public milvus::FileManager { std::to_string(field_meta_.partition_id) + "/" + std::to_string(field_meta_.segment_id) + "/" + std::to_string(field_meta_.field_id); - return (prefix / path / path1).string(); + return NormalizePath(prefix / path / path1); } protected: diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index cc7066d343..b8eb4ff587 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -713,7 +713,7 @@ GenIndexPathPrefixByType(ChunkManagerPtr cm, boost::filesystem::path path = std::string(index_type); boost::filesystem::path path1 = GenIndexPathIdentifier(build_id, index_version, segment_id, field_id); - return (prefix / path / path1).string(); + return NormalizePath(prefix / path / path1); } std::string @@ -749,7 +749,7 @@ GenJsonStatsPathPrefix(ChunkManagerPtr cm, boost::filesystem::path path1 = GenIndexPathIdentifier(build_id, index_version, segment_id, field_id); - return (prefix / path / path1).string(); + return NormalizePath(prefix / path / path1); } std::string @@ -764,7 +764,7 @@ GenJsonStatsPathIdentifier(int64_t build_id, std::to_string(index_version) / std::to_string(collection_id) / std::to_string(partition_id) / std::to_string(segment_id) / std::to_string(field_id); - return p.string() + "/"; + return NormalizePath(p); } std::string @@ -784,7 +784,7 @@ GenRemoteJsonStatsPathPrefix(ChunkManagerPtr cm, partition_id, segment_id, field_id); - return p.string(); + return NormalizePath(p); } std::string @@ -811,7 +811,7 @@ GenFieldRawDataPathPrefix(ChunkManagerPtr cm, boost::filesystem::path path = std::string(RAWDATA_ROOT_PATH); boost::filesystem::path path1 = std::to_string(segment_id) + "/" + std::to_string(field_id) + "/"; - return (prefix / path / path1).string(); + return NormalizePath(prefix / path / path1); } std::string @@ -819,7 +819,7 @@ GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id) { boost::filesystem::path prefix = cm->GetRootPath(); boost::filesystem::path path = std::string(RAWDATA_ROOT_PATH); boost::filesystem::path path1 = std::to_string(segment_id); - return (prefix / path / path1).string(); + return NormalizePath(prefix / path / path1); } std::pair diff --git a/internal/core/unittest/test_storage.cpp b/internal/core/unittest/test_storage.cpp index 958eb655d5..247fb6cb89 100644 --- a/internal/core/unittest/test_storage.cpp +++ b/internal/core/unittest/test_storage.cpp @@ -16,6 +16,7 @@ #include #include #include "common/EasyAssert.h" +#include "storage/FileManager.h" #include "storage/LocalChunkManagerSingleton.h" #include "storage/RemoteChunkManagerSingleton.h" #include "storage/Util.h" @@ -241,4 +242,76 @@ TEST_F(StorageUtilTest, TestInitArrowFileSystem) { // auto fs = InitArrowFileSystem(remote_config); // ASSERT_NE(fs, nullptr); // } +} + +// Test cases for NormalizePath function +// NormalizePath uses boost::filesystem::path::lexically_normal() and then +// removes trailing "/." (only the dot, keeping the slash) +TEST_F(StorageUtilTest, NormalizePath) { + // === Basic paths === + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/b/c")), "a/b/c"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("")), ""); + EXPECT_EQ(NormalizePath(boost::filesystem::path("file")), "file"); + + // === Dot handling === + EXPECT_EQ(NormalizePath(boost::filesystem::path(".")), "."); + EXPECT_EQ(NormalizePath(boost::filesystem::path("./a/b")), "a/b"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/./b")), "a/b"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/b/.")), "a/b/"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("./a/./b/.")), "a/b/"); + + // === Double dot (..) handling === + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/b/../c")), "a/c"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/b/c/../../d")), "a/d"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("../a/b")), "../a/b"); + + // === Trailing slash === + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/b/")), "a/b/"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("files/")), "files/"); + + // === Absolute paths === + EXPECT_EQ(NormalizePath(boost::filesystem::path("/a/b/c")), "/a/b/c"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("/a/./b")), "/a/b"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("/a/b/.")), "/a/b/"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("/")), "/"); + + // === Multiple slashes === + EXPECT_EQ(NormalizePath(boost::filesystem::path("a//b//c")), "a/b/c"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/./b//c")), "a/b/c"); + + // === Real-world scenarios (S3/MinIO) === + EXPECT_EQ(NormalizePath(boost::filesystem::path("bucket/index_files/123")), + "bucket/index_files/123"); + // Key fix for 403 error + EXPECT_EQ( + NormalizePath(boost::filesystem::path("./index_files/segment_123")), + "index_files/segment_123"); + + // Path construction with root_path = "." + boost::filesystem::path prefix = "."; + boost::filesystem::path path = "index_files"; + boost::filesystem::path path1 = "segment_123"; + EXPECT_EQ(NormalizePath(prefix / path / path1), "index_files/segment_123"); + + // Non-empty root path + boost::filesystem::path prefix2 = "files"; + EXPECT_EQ(NormalizePath(prefix2 / path / path1), + "files/index_files/segment_123"); + + // Root path with trailing slash + boost::filesystem::path prefix3 = "files/"; + EXPECT_EQ(NormalizePath(prefix3 / path / path1), + "files/index_files/segment_123"); + + // Empty root path + boost::filesystem::path prefix4 = ""; + EXPECT_EQ(NormalizePath(prefix4 / path / path1), "index_files/segment_123"); + + // === Edge cases === + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/b/..")), "a"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("./a/../b/./c/../d")), + "b/d"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("a/b c/d")), "a/b c/d"); + EXPECT_EQ(NormalizePath(boost::filesystem::path("./.")), "."); + EXPECT_EQ(NormalizePath(boost::filesystem::path("./..")), ".."); } \ No newline at end of file