mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-06 09:08:43 +08:00
fix: add more logs related to tantivy upload/cache (#46019)
issue: https://github.com/milvus-io/milvus/issues/45590 Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
parent
5d0c8b1b40
commit
3fc309bdfc
@ -132,6 +132,15 @@ InvertedIndexTantivy<T>::Upload(const Config& config) {
|
|||||||
boost::filesystem::path p(path_);
|
boost::filesystem::path p(path_);
|
||||||
boost::filesystem::directory_iterator end_iter;
|
boost::filesystem::directory_iterator end_iter;
|
||||||
|
|
||||||
|
// TODO: remove this log when #45590 is solved
|
||||||
|
auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id;
|
||||||
|
auto field_id = disk_file_manager_->GetFieldDataMeta().field_id;
|
||||||
|
LOG_INFO(
|
||||||
|
"InvertedIndexTantivy::Upload: segment_id={}, field_id={}, path={}",
|
||||||
|
segment_id,
|
||||||
|
field_id,
|
||||||
|
path_);
|
||||||
|
|
||||||
for (boost::filesystem::directory_iterator iter(p); iter != end_iter;
|
for (boost::filesystem::directory_iterator iter(p); iter != end_iter;
|
||||||
iter++) {
|
iter++) {
|
||||||
if (boost::filesystem::is_directory(*iter)) {
|
if (boost::filesystem::is_directory(*iter)) {
|
||||||
@ -184,6 +193,13 @@ InvertedIndexTantivy<T>::Load(milvus::tracer::TraceContext ctx,
|
|||||||
"index file paths is empty when load disk ann index data");
|
"index file paths is empty when load disk ann index data");
|
||||||
auto inverted_index_files = index_files.value();
|
auto inverted_index_files = index_files.value();
|
||||||
|
|
||||||
|
// TODO: remove this log when #45590 is solved
|
||||||
|
auto segment_id = disk_file_manager_->GetFieldDataMeta().segment_id;
|
||||||
|
auto field_id = disk_file_manager_->GetFieldDataMeta().field_id;
|
||||||
|
LOG_INFO("InvertedIndexTantivy::Load: segment_id={}, field_id={}",
|
||||||
|
segment_id,
|
||||||
|
field_id);
|
||||||
|
|
||||||
LoadIndexMetas(inverted_index_files, config);
|
LoadIndexMetas(inverted_index_files, config);
|
||||||
RetainTantivyIndexFiles(inverted_index_files);
|
RetainTantivyIndexFiles(inverted_index_files);
|
||||||
auto load_priority =
|
auto load_priority =
|
||||||
|
|||||||
@ -317,6 +317,11 @@ DiskFileManagerImpl::CacheIndexToDiskInternal(
|
|||||||
std::sort(slices.second.begin(), slices.second.end());
|
std::sort(slices.second.begin(), slices.second.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: remove this log when #45590 is solved
|
||||||
|
LOG_INFO("CacheIndexToDisk: caching {} files to {}",
|
||||||
|
index_slices.size(),
|
||||||
|
local_index_prefix);
|
||||||
|
|
||||||
for (auto& slices : index_slices) {
|
for (auto& slices : index_slices) {
|
||||||
auto prefix = slices.first;
|
auto prefix = slices.first;
|
||||||
auto local_index_file_name =
|
auto local_index_file_name =
|
||||||
@ -362,6 +367,8 @@ DiskFileManagerImpl::CacheIndexToDiskInternal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
local_paths_.emplace_back(local_index_file_name);
|
local_paths_.emplace_back(local_index_file_name);
|
||||||
|
// TODO: remove this log when #45590 is solved
|
||||||
|
LOG_INFO("CacheIndexToDisk: cached file {}", local_index_file_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -254,6 +254,12 @@ impl IndexWriterWrapperImpl {
|
|||||||
// self.manual_merge();
|
// self.manual_merge();
|
||||||
block_on(self.index_writer.garbage_collect_files())?;
|
block_on(self.index_writer.garbage_collect_files())?;
|
||||||
self.index_writer.wait_merging_threads()?;
|
self.index_writer.wait_merging_threads()?;
|
||||||
|
|
||||||
|
// TODO: remove this log when #45590 is solved
|
||||||
|
let metas = self.index.searchable_segment_metas()?;
|
||||||
|
let segment_ids: Vec<_> = metas.iter().map(|m| m.id().uuid_string()).collect();
|
||||||
|
info!("tantivy index_writer finish, segments: {:?}", segment_ids);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
|
use crate::log::init_log;
|
||||||
use core::slice;
|
use core::slice;
|
||||||
|
use log::error;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::ffi::CStr;
|
use std::ffi::CStr;
|
||||||
use std::ffi::{c_char, c_void};
|
use std::ffi::{c_char, c_void};
|
||||||
@ -13,9 +15,27 @@ pub fn c_ptr_to_str(ptr: *const c_char) -> Result<&'static str> {
|
|||||||
|
|
||||||
pub fn index_exist(path: &str) -> bool {
|
pub fn index_exist(path: &str) -> bool {
|
||||||
let Ok(dir) = MmapDirectory::open(path) else {
|
let Ok(dir) = MmapDirectory::open(path) else {
|
||||||
|
init_log();
|
||||||
|
error!("tantivy index_exist: failed to open directory: {}", path);
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
Index::exists(&dir).unwrap()
|
let exists = Index::exists(&dir).unwrap();
|
||||||
|
if !exists {
|
||||||
|
init_log();
|
||||||
|
let files: Vec<_> = std::fs::read_dir(path)
|
||||||
|
.map(|entries| {
|
||||||
|
entries
|
||||||
|
.filter_map(|e| e.ok())
|
||||||
|
.map(|e| e.file_name().to_string_lossy().to_string())
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
error!(
|
||||||
|
"tantivy index_exist: meta.json not found at {}, files: {:?}",
|
||||||
|
path, files
|
||||||
|
);
|
||||||
|
}
|
||||||
|
exists
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn make_bounds<T>(bound: T, inclusive: bool) -> Bound<T> {
|
pub fn make_bounds<T>(bound: T, inclusive: bool) -> Bound<T> {
|
||||||
@ -47,3 +67,34 @@ pub extern "C" fn set_bitset(bitset: *mut c_void, doc_id: *const u32, len: usize
|
|||||||
bitset.insert(*doc);
|
bitset.insert(*doc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::fs;
|
||||||
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_index_exist_directory_not_exist() {
|
||||||
|
let result = index_exist("/nonexistent/path/to/index");
|
||||||
|
assert!(!result);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_index_exist_empty_directory() {
|
||||||
|
let dir = tempdir().unwrap();
|
||||||
|
let path = dir.path().to_str().unwrap();
|
||||||
|
let result = index_exist(path);
|
||||||
|
assert!(!result);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_index_exist_directory_without_meta_json() {
|
||||||
|
let dir = tempdir().unwrap();
|
||||||
|
let path = dir.path();
|
||||||
|
// Create some dummy files but no meta.json
|
||||||
|
fs::write(path.join("dummy.txt"), "test").unwrap();
|
||||||
|
let result = index_exist(path.to_str().unwrap());
|
||||||
|
assert!(!result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user