// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "storage/IndexData.h" #include "storage/FileManager.h" #include "storage/LocalChunkManager.h" #include "common/Consts.h" #include "storage/Types.h" #include "storage/ThreadPools.h" namespace milvus::storage { class DiskFileManagerImpl : public FileManagerImpl { public: explicit DiskFileManagerImpl(const FileManagerContext& fileManagerContext); ~DiskFileManagerImpl() override; bool LoadFile(const std::string& filename) noexcept override; bool AddFile(const std::string& filename) noexcept override; bool AddFileMeta(const FileMeta& file_meta) override; std::optional IsExisted(const std::string& filename) noexcept override; bool RemoveFile(const std::string& filename) noexcept override; std::shared_ptr OpenInputStream(const std::string& filename) override; std::shared_ptr OpenOutputStream(const std::string& filename) override; public: bool AddTextLog(const std::string& filename) noexcept; bool AddJsonKeyIndexLog(const std::string& filename) noexcept; public: std::string GetName() const override { return "DiskFileManagerImpl"; } std::string GetIndexIdentifier(); std::string GetLocalIndexObjectPrefix(); std::string GetLocalTempIndexObjectPrefix(); // Similar to GetTextIndexIdentifier, segment_id and field_id is also required. std::string GetLocalTextIndexPrefix(); std::string GetLocalTempTextIndexPrefix(); // Used for loading index, using this index prefix dir to store index. std::string GetLocalJsonKeyIndexPrefix(); std::string GetLocalTempJsonKeyIndexPrefix(); // Used for upload index to remote storage, using this index prefix dir as remote storage directory std::string GetRemoteJsonKeyLogPrefix(); // Used for upload index to remote storage, using this index prefix dir as remote storage directory std::string GetLocalNgramIndexPrefix(); // Used for loading index, using this index prefix dir to store index. std::string GetLocalTempNgramIndexPrefix(); std::string GetLocalRawDataObjectPrefix(); std::map GetRemotePathsToFileSize() const { return remote_paths_to_size_; } std::vector GetLocalFilePaths() const { return local_paths_; } void CacheIndexToDisk(const std::vector& remote_files, milvus::proto::common::LoadPriority priority); void CacheTextLogToDisk(const std::vector& remote_files, milvus::proto::common::LoadPriority priority); void CacheJsonKeyIndexToDisk(const std::vector& remote_files, milvus::proto::common::LoadPriority priority); void CacheNgramIndexToDisk(const std::vector& remote_files, milvus::proto::common::LoadPriority priority); void RemoveIndexFiles(); void RemoveTextLogFiles(); void RemoveJsonKeyIndexFiles(); void RemoveNgramIndexFiles(); void AddBatchIndexFiles(const std::string& local_file_name, const std::vector& local_file_offsets, const std::vector& remote_files, const std::vector& remote_file_sizes); template std::string CacheRawDataToDisk(const Config& config); std::string CacheOptFieldToDisk(const Config& config); std::string GetRemoteIndexPrefix() const { return GetRemoteIndexObjectPrefix(); } size_t GetAddedTotalFileSize() const { return added_total_file_size_; } std::string GetFileName(const std::string& localfile); std::string GetRemoteIndexFilePrefixV2() const override; private: int64_t GetIndexBuildId() { return index_meta_.build_id; } std::string GetRemoteIndexPath(const std::string& file_name, int64_t slice_num) const; /** * @brief Get the Remote Index Path V2 * @param file_name; v2 will not split the file with slice_num * @return std::string */ std::string GetRemoteIndexPathV2(const std::string& file_name) const; std::string GetRemoteTextLogPath(const std::string& file_name, int64_t slice_num) const; std::string GetRemoteJsonKeyIndexPath(const std::string& file_name, int64_t slice_num); bool AddFileInternal(const std::string& file_name, const std::function& get_remote_path) noexcept; void CacheIndexToDiskInternal(const std::vector& remote_files, const std::string& local_index_prefix, milvus::proto::common::LoadPriority priority = milvus::proto::common::LoadPriority::HIGH); template std::string cache_raw_data_to_disk_internal(const Config& config); template std::string cache_raw_data_to_disk_storage_v2(const Config& config); template void cache_raw_data_to_disk_common( const FieldDataPtr& field_data, const std::shared_ptr& local_chunk_manager, std::string& local_data_path, bool& file_created, uint32_t& dim, int64_t& write_offset); private: // local file path (abs path) std::vector local_paths_; // remote file path std::map remote_paths_to_size_; size_t added_total_file_size_ = 0; }; using DiskANNFileManagerImplPtr = std::shared_ptr; } // namespace milvus::storage