From 3fc309bdfc14d2dcf0d82c9385a37382a4193f51 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Wed, 3 Dec 2025 10:47:09 +0800 Subject: [PATCH] fix: add more logs related to tantivy upload/cache (#46019) issue: https://github.com/milvus-io/milvus/issues/45590 Signed-off-by: SpadeA --- .../core/src/index/InvertedIndexTantivy.cpp | 16 ++++++ .../core/src/storage/DiskFileManagerImpl.cpp | 7 +++ .../src/index_writer_v7/index_writer.rs | 6 +++ .../tantivy/tantivy-binding/src/util.rs | 53 ++++++++++++++++++- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index 81ada3a4d2..46d50cdb10 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -132,6 +132,15 @@ InvertedIndexTantivy::Upload(const Config& config) { boost::filesystem::path p(path_); boost::filesystem::directory_iterator end_iter; + // TODO: remove this log when #45590 is solved + auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id; + auto field_id = disk_file_manager_->GetFieldDataMeta().field_id; + LOG_INFO( + "InvertedIndexTantivy::Upload: segment_id={}, field_id={}, path={}", + segment_id, + field_id, + path_); + for (boost::filesystem::directory_iterator iter(p); iter != end_iter; iter++) { if (boost::filesystem::is_directory(*iter)) { @@ -184,6 +193,13 @@ InvertedIndexTantivy::Load(milvus::tracer::TraceContext ctx, "index file paths is empty when load disk ann index data"); auto inverted_index_files = index_files.value(); + // TODO: remove this log when #45590 is solved + auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id; + auto field_id = disk_file_manager_->GetFieldDataMeta().field_id; + LOG_INFO("InvertedIndexTantivy::Load: segment_id={}, field_id={}", + segment_id, + field_id); + LoadIndexMetas(inverted_index_files, config); RetainTantivyIndexFiles(inverted_index_files); auto load_priority = diff --git a/internal/core/src/storage/DiskFileManagerImpl.cpp b/internal/core/src/storage/DiskFileManagerImpl.cpp index a4317f9815..8dad667ecf 100644 --- a/internal/core/src/storage/DiskFileManagerImpl.cpp +++ b/internal/core/src/storage/DiskFileManagerImpl.cpp @@ -317,6 +317,11 @@ DiskFileManagerImpl::CacheIndexToDiskInternal( std::sort(slices.second.begin(), slices.second.end()); } + // TODO: remove this log when #45590 is solved + LOG_INFO("CacheIndexToDisk: caching {} files to {}", + index_slices.size(), + local_index_prefix); + for (auto& slices : index_slices) { auto prefix = slices.first; auto local_index_file_name = @@ -362,6 +367,8 @@ DiskFileManagerImpl::CacheIndexToDiskInternal( } local_paths_.emplace_back(local_index_file_name); + // TODO: remove this log when #45590 is solved + LOG_INFO("CacheIndexToDisk: cached file {}", local_index_file_name); } } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs index 7289d302cc..da12a408d0 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer_v7/index_writer.rs @@ -254,6 +254,12 @@ impl IndexWriterWrapperImpl { // self.manual_merge(); block_on(self.index_writer.garbage_collect_files())?; self.index_writer.wait_merging_threads()?; + + // TODO: remove this log when #45590 is solved + let metas = self.index.searchable_segment_metas()?; + let segment_ids: Vec<_> = metas.iter().map(|m| m.id().uuid_string()).collect(); + info!("tantivy index_writer finish, segments: {:?}", segment_ids); + Ok(()) } diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs index 681ef66e86..ee606a9652 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/util.rs @@ -1,5 +1,7 @@ use crate::error::Result; +use crate::log::init_log; use core::slice; +use log::error; use std::collections::HashSet; use std::ffi::CStr; use std::ffi::{c_char, c_void}; @@ -13,9 +15,27 @@ pub fn c_ptr_to_str(ptr: *const c_char) -> Result<&'static str> { pub fn index_exist(path: &str) -> bool { let Ok(dir) = MmapDirectory::open(path) else { + init_log(); + error!("tantivy index_exist: failed to open directory: {}", path); return false; }; - Index::exists(&dir).unwrap() + let exists = Index::exists(&dir).unwrap(); + if !exists { + init_log(); + let files: Vec<_> = std::fs::read_dir(path) + .map(|entries| { + entries + .filter_map(|e| e.ok()) + .map(|e| e.file_name().to_string_lossy().to_string()) + .collect() + }) + .unwrap_or_default(); + error!( + "tantivy index_exist: meta.json not found at {}, files: {:?}", + path, files + ); + } + exists } pub fn make_bounds(bound: T, inclusive: bool) -> Bound { @@ -47,3 +67,34 @@ pub extern "C" fn set_bitset(bitset: *mut c_void, doc_id: *const u32, len: usize bitset.insert(*doc); } } + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::tempdir; + + #[test] + fn test_index_exist_directory_not_exist() { + let result = index_exist("/nonexistent/path/to/index"); + assert!(!result); + } + + #[test] + fn test_index_exist_empty_directory() { + let dir = tempdir().unwrap(); + let path = dir.path().to_str().unwrap(); + let result = index_exist(path); + assert!(!result); + } + + #[test] + fn test_index_exist_directory_without_meta_json() { + let dir = tempdir().unwrap(); + let path = dir.path(); + // Create some dummy files but no meta.json + fs::write(path.join("dummy.txt"), "test").unwrap(); + let result = index_exist(path.to_str().unwrap()); + assert!(!result); + } +}