mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 17:18:35 +08:00
fix: add more logs related to tantivy upload/cache (#46019)
issue: https://github.com/milvus-io/milvus/issues/45590 Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
parent
5d0c8b1b40
commit
3fc309bdfc
@ -132,6 +132,15 @@ InvertedIndexTantivy<T>::Upload(const Config& config) {
|
||||
boost::filesystem::path p(path_);
|
||||
boost::filesystem::directory_iterator end_iter;
|
||||
|
||||
// TODO: remove this log when #45590 is solved
|
||||
auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id;
|
||||
auto field_id = disk_file_manager_->GetFieldDataMeta().field_id;
|
||||
LOG_INFO(
|
||||
"InvertedIndexTantivy::Upload: segment_id={}, field_id={}, path={}",
|
||||
segment_id,
|
||||
field_id,
|
||||
path_);
|
||||
|
||||
for (boost::filesystem::directory_iterator iter(p); iter != end_iter;
|
||||
iter++) {
|
||||
if (boost::filesystem::is_directory(*iter)) {
|
||||
@ -184,6 +193,13 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
||||
"index file paths is empty when load disk ann index data");
|
||||
auto inverted_index_files = index_files.value();
|
||||
|
||||
// TODO: remove this log when #45590 is solved
|
||||
auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id;
|
||||
auto field_id = disk_file_manager_->GetFieldDataMeta().field_id;
|
||||
LOG_INFO("InvertedIndexTantivy::Load: segment_id={}, field_id={}",
|
||||
segment_id,
|
||||
field_id);
|
||||
|
||||
LoadIndexMetas(inverted_index_files, config);
|
||||
RetainTantivyIndexFiles(inverted_index_files);
|
||||
auto load_priority =
|
||||
|
||||
@ -317,6 +317,11 @@ DiskFileManagerImpl::CacheIndexToDiskInternal(
|
||||
std::sort(slices.second.begin(), slices.second.end());
|
||||
}
|
||||
|
||||
// TODO: remove this log when #45590 is solved
|
||||
LOG_INFO("CacheIndexToDisk: caching {} files to {}",
|
||||
index_slices.size(),
|
||||
local_index_prefix);
|
||||
|
||||
for (auto& slices : index_slices) {
|
||||
auto prefix = slices.first;
|
||||
auto local_index_file_name =
|
||||
@ -362,6 +367,8 @@ DiskFileManagerImpl::CacheIndexToDiskInternal(
|
||||
}
|
||||
|
||||
local_paths_.emplace_back(local_index_file_name);
|
||||
// TODO: remove this log when #45590 is solved
|
||||
LOG_INFO("CacheIndexToDisk: cached file {}", local_index_file_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -254,6 +254,12 @@ impl IndexWriterWrapperImpl {
|
||||
// self.manual_merge();
|
||||
block_on(self.index_writer.garbage_collect_files())?;
|
||||
self.index_writer.wait_merging_threads()?;
|
||||
|
||||
// TODO: remove this log when #45590 is solved
|
||||
let metas = self.index.searchable_segment_metas()?;
|
||||
let segment_ids: Vec<_> = metas.iter().map(|m| m.id().uuid_string()).collect();
|
||||
info!("tantivy index_writer finish, segments: {:?}", segment_ids);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
use crate::error::Result;
|
||||
use crate::log::init_log;
|
||||
use core::slice;
|
||||
use log::error;
|
||||
use std::collections::HashSet;
|
||||
use std::ffi::CStr;
|
||||
use std::ffi::{c_char, c_void};
|
||||
@ -13,9 +15,27 @@ pub fn c_ptr_to_str(ptr: *const c_char) -> Result<&'static str> {
|
||||
|
||||
pub fn index_exist(path: &str) -> bool {
|
||||
let Ok(dir) = MmapDirectory::open(path) else {
|
||||
init_log();
|
||||
error!("tantivy index_exist: failed to open directory: {}", path);
|
||||
return false;
|
||||
};
|
||||
Index::exists(&dir).unwrap()
|
||||
let exists = Index::exists(&dir).unwrap();
|
||||
if !exists {
|
||||
init_log();
|
||||
let files: Vec<_> = std::fs::read_dir(path)
|
||||
.map(|entries| {
|
||||
entries
|
||||
.filter_map(|e| e.ok())
|
||||
.map(|e| e.file_name().to_string_lossy().to_string())
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
error!(
|
||||
"tantivy index_exist: meta.json not found at {}, files: {:?}",
|
||||
path, files
|
||||
);
|
||||
}
|
||||
exists
|
||||
}
|
||||
|
||||
pub fn make_bounds<T>(bound: T, inclusive: bool) -> Bound<T> {
|
||||
@ -47,3 +67,34 @@ pub extern "C" fn set_bitset(bitset: *mut c_void, doc_id: *const u32, len: usize
|
||||
bitset.insert(*doc);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn test_index_exist_directory_not_exist() {
|
||||
let result = index_exist("/nonexistent/path/to/index");
|
||||
assert!(!result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_exist_empty_directory() {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().to_str().unwrap();
|
||||
let result = index_exist(path);
|
||||
assert!(!result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_exist_directory_without_meta_json() {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path();
|
||||
// Create some dummy files but no meta.json
|
||||
fs::write(path.join("dummy.txt"), "test").unwrap();
|
||||
let result = index_exist(path.to_str().unwrap());
|
||||
assert!(!result);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user