fix: add more logs related to tantivy upload/cache (#46019)

issue: https://github.com/milvus-io/milvus/issues/45590

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
Spade A 2025-12-03 10:47:09 +08:00 committed by GitHub
parent 5d0c8b1b40
commit 3fc309bdfc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 81 additions and 1 deletions

View File

@ -132,6 +132,15 @@ InvertedIndexTantivy<T>::Upload(const Config& config) {
boost::filesystem::path p(path_); boost::filesystem::path p(path_);
boost::filesystem::directory_iterator end_iter; boost::filesystem::directory_iterator end_iter;
// TODO: remove this log when #45590 is solved
auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id;
auto field_id = disk_file_manager_->GetFieldDataMeta().field_id;
LOG_INFO(
"InvertedIndexTantivy::Upload: segment_id={}, field_id={}, path={}",
segment_id,
field_id,
path_);
for (boost::filesystem::directory_iterator iter(p); iter != end_iter; for (boost::filesystem::directory_iterator iter(p); iter != end_iter;
iter++) { iter++) {
if (boost::filesystem::is_directory(*iter)) { if (boost::filesystem::is_directory(*iter)) {
@ -184,6 +193,13 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
"index file paths is empty when load disk ann index data"); "index file paths is empty when load disk ann index data");
auto inverted_index_files = index_files.value(); auto inverted_index_files = index_files.value();
// TODO: remove this log when #45590 is solved
auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id;
auto field_id = disk_file_manager_->GetFieldDataMeta().field_id;
LOG_INFO("InvertedIndexTantivy::Load: segment_id={}, field_id={}",
segment_id,
field_id);
LoadIndexMetas(inverted_index_files, config); LoadIndexMetas(inverted_index_files, config);
RetainTantivyIndexFiles(inverted_index_files); RetainTantivyIndexFiles(inverted_index_files);
auto load_priority = auto load_priority =

View File

@ -317,6 +317,11 @@ DiskFileManagerImpl::CacheIndexToDiskInternal(
std::sort(slices.second.begin(), slices.second.end()); std::sort(slices.second.begin(), slices.second.end());
} }
// TODO: remove this log when #45590 is solved
LOG_INFO("CacheIndexToDisk: caching {} files to {}",
index_slices.size(),
local_index_prefix);
for (auto& slices : index_slices) { for (auto& slices : index_slices) {
auto prefix = slices.first; auto prefix = slices.first;
auto local_index_file_name = auto local_index_file_name =
@ -362,6 +367,8 @@ DiskFileManagerImpl::CacheIndexToDiskInternal(
} }
local_paths_.emplace_back(local_index_file_name); local_paths_.emplace_back(local_index_file_name);
// TODO: remove this log when #45590 is solved
LOG_INFO("CacheIndexToDisk: cached file {}", local_index_file_name);
} }
} }

View File

@ -254,6 +254,12 @@ impl IndexWriterWrapperImpl {
// self.manual_merge(); // self.manual_merge();
block_on(self.index_writer.garbage_collect_files())?; block_on(self.index_writer.garbage_collect_files())?;
self.index_writer.wait_merging_threads()?; self.index_writer.wait_merging_threads()?;
// TODO: remove this log when #45590 is solved
let metas = self.index.searchable_segment_metas()?;
let segment_ids: Vec<_> = metas.iter().map(|m| m.id().uuid_string()).collect();
info!("tantivy index_writer finish, segments: {:?}", segment_ids);
Ok(()) Ok(())
} }

View File

@ -1,5 +1,7 @@
use crate::error::Result; use crate::error::Result;
use crate::log::init_log;
use core::slice; use core::slice;
use log::error;
use std::collections::HashSet; use std::collections::HashSet;
use std::ffi::CStr; use std::ffi::CStr;
use std::ffi::{c_char, c_void}; use std::ffi::{c_char, c_void};
@ -13,9 +15,27 @@ pub fn c_ptr_to_str(ptr: *const c_char) -> Result<&'static str> {
pub fn index_exist(path: &str) -> bool { pub fn index_exist(path: &str) -> bool {
let Ok(dir) = MmapDirectory::open(path) else { let Ok(dir) = MmapDirectory::open(path) else {
init_log();
error!("tantivy index_exist: failed to open directory: {}", path);
return false; return false;
}; };
Index::exists(&dir).unwrap() let exists = Index::exists(&dir).unwrap();
if !exists {
init_log();
let files: Vec<_> = std::fs::read_dir(path)
.map(|entries| {
entries
.filter_map(|e| e.ok())
.map(|e| e.file_name().to_string_lossy().to_string())
.collect()
})
.unwrap_or_default();
error!(
"tantivy index_exist: meta.json not found at {}, files: {:?}",
path, files
);
}
exists
} }
pub fn make_bounds<T>(bound: T, inclusive: bool) -> Bound<T> { pub fn make_bounds<T>(bound: T, inclusive: bool) -> Bound<T> {
@ -47,3 +67,34 @@ pub extern "C" fn set_bitset(bitset: *mut c_void, doc_id: *const u32, len: usize
bitset.insert(*doc); bitset.insert(*doc);
} }
} }
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn test_index_exist_directory_not_exist() {
let result = index_exist("/nonexistent/path/to/index");
assert!(!result);
}
#[test]
fn test_index_exist_empty_directory() {
let dir = tempdir().unwrap();
let path = dir.path().to_str().unwrap();
let result = index_exist(path);
assert!(!result);
}
#[test]
fn test_index_exist_directory_without_meta_json() {
let dir = tempdir().unwrap();
let path = dir.path();
// Create some dummy files but no meta.json
fs::write(path.join("dummy.txt"), "test").unwrap();
let result = index_exist(path.to_str().unwrap());
assert!(!result);
}
}