mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-02-04 11:18:44 +08:00
fix: revert batch add (#41374)
issue: #41375 todo: to fix the problems fixed in the issue. --------- Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
parent
4552dd4b23
commit
62293cb582
@ -266,14 +266,9 @@ RustResult tantivy_index_add_bools_by_single_segment_writer(void *ptr,
|
||||
const bool *array,
|
||||
uintptr_t len);
|
||||
|
||||
RustResult tantivy_index_add_strings(void *ptr,
|
||||
const char *const *array,
|
||||
uintptr_t len,
|
||||
int64_t offset);
|
||||
RustResult tantivy_index_add_string(void *ptr, const char *s, int64_t offset);
|
||||
|
||||
RustResult tantivy_index_add_strings_by_single_segment_writer(void *ptr,
|
||||
const char *const *array,
|
||||
uintptr_t len);
|
||||
RustResult tantivy_index_add_string_by_single_segment_writer(void *ptr, const char *s);
|
||||
|
||||
RustResult tantivy_index_add_json_key_stats_data_by_batch(void *ptr,
|
||||
const char *const *keys,
|
||||
|
||||
@ -84,10 +84,8 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
writer.add_data_by_batch(&["网球和滑雪"], Some(0)).unwrap();
|
||||
writer
|
||||
.add_data_by_batch(&["网球以及滑雪"], Some(1))
|
||||
.unwrap();
|
||||
writer.add("网球和滑雪", Some(0)).unwrap();
|
||||
writer.add("网球以及滑雪", Some(1)).unwrap();
|
||||
|
||||
writer.commit().unwrap();
|
||||
|
||||
@ -118,7 +116,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
for i in 0..10000 {
|
||||
writer.add_data_by_batch(&["hello world"], Some(i)).unwrap();
|
||||
writer.add("hello world", Some(i)).unwrap();
|
||||
}
|
||||
writer.commit().unwrap();
|
||||
|
||||
|
||||
@ -79,15 +79,13 @@ impl IndexWriterWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_data_by_batch<T>(&mut self, data: &[T], offset: Option<i64>) -> Result<()>
|
||||
pub fn add<T>(&mut self, data: T, offset: Option<i64>) -> Result<()>
|
||||
where
|
||||
T: TantivyValue<TantivyDocumentV5> + TantivyValue<TantivyDocumentV7>,
|
||||
{
|
||||
match self {
|
||||
IndexWriterWrapper::V5(writer) => writer.add_data_by_batch(data, offset),
|
||||
IndexWriterWrapper::V7(writer) => {
|
||||
writer.add_data_by_batch(data, offset.unwrap() as u32)
|
||||
}
|
||||
IndexWriterWrapper::V5(writer) => writer.add(data, offset),
|
||||
IndexWriterWrapper::V7(writer) => writer.add(data, offset.unwrap() as u32),
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,19 +100,6 @@ impl IndexWriterWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_string_by_batch(
|
||||
&mut self,
|
||||
data: &[*const c_char],
|
||||
offset: Option<i64>,
|
||||
) -> Result<()> {
|
||||
match self {
|
||||
IndexWriterWrapper::V5(writer) => writer.add_string_by_batch(data, offset),
|
||||
IndexWriterWrapper::V7(writer) => {
|
||||
writer.add_string_by_batch(data, offset.unwrap() as u32)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_array_keywords(
|
||||
&mut self,
|
||||
datas: &[*const c_char],
|
||||
@ -173,9 +158,9 @@ impl IndexWriterWrapper {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rand::Rng;
|
||||
use std::{ffi::CString, ops::Bound};
|
||||
use tantivy_5::{query, Index, ReloadPolicy};
|
||||
|
||||
use rand::Rng;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
use crate::{data_type::TantivyDataType, TantivyIndexVersion};
|
||||
@ -200,13 +185,12 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
for i in 0..10 {
|
||||
index_wrapper
|
||||
.add_data_by_batch::<i64>(&[i], Some(i as i64))
|
||||
.unwrap();
|
||||
index_wrapper.add::<i64>(i, Some(i as i64)).unwrap();
|
||||
}
|
||||
index_wrapper.commit().unwrap();
|
||||
}
|
||||
|
||||
use tantivy_5::{query, Index, ReloadPolicy};
|
||||
let index = Index::open_in_dir(dir.path()).unwrap();
|
||||
let reader = index
|
||||
.reader_builder()
|
||||
@ -240,7 +224,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
for i in 0..10 {
|
||||
index_wrapper.add_data_by_batch::<i64>(&[i], None).unwrap();
|
||||
index_wrapper.add::<i64>(i, None).unwrap();
|
||||
}
|
||||
index_wrapper.finish().unwrap();
|
||||
}
|
||||
@ -283,9 +267,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
for i in 0..10 {
|
||||
index_wrapper
|
||||
.add_data_by_batch(&["hello"], Some(i as i64))
|
||||
.unwrap();
|
||||
index_wrapper.add("hello", Some(i as i64)).unwrap();
|
||||
}
|
||||
index_wrapper.commit().unwrap();
|
||||
}
|
||||
@ -352,73 +334,4 @@ mod tests {
|
||||
let count = index_writer.create_reader().unwrap().count().unwrap();
|
||||
assert_eq!(count, total_count);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_add_strings_by_batch() {
|
||||
use crate::data_type::TantivyDataType;
|
||||
use crate::index_writer::IndexWriterWrapper;
|
||||
|
||||
let temp_dir = tempdir().unwrap();
|
||||
let mut index_writer = IndexWriterWrapper::new(
|
||||
"test",
|
||||
TantivyDataType::Keyword,
|
||||
temp_dir.path().to_str().unwrap().to_string(),
|
||||
1,
|
||||
15 * 1024 * 1024,
|
||||
TantivyIndexVersion::V7,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let keys = (0..10000)
|
||||
.map(|i| format!("key{:05}", i))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let c_keys: Vec<CString> = keys.into_iter().map(|k| CString::new(k).unwrap()).collect();
|
||||
let key_ptrs: Vec<*const libc::c_char> = c_keys.iter().map(|cs| cs.as_ptr()).collect();
|
||||
|
||||
index_writer
|
||||
.add_string_by_batch(&key_ptrs, Some(0))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index_writer.create_reader().unwrap();
|
||||
let count: u32 = reader.count().unwrap();
|
||||
assert_eq!(count, 10000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_add_data_by_batch() {
|
||||
use crate::data_type::TantivyDataType;
|
||||
use crate::index_writer::IndexWriterWrapper;
|
||||
|
||||
let temp_dir = tempdir().unwrap();
|
||||
let mut index_writer = IndexWriterWrapper::new(
|
||||
"test",
|
||||
TantivyDataType::I64,
|
||||
temp_dir.path().to_str().unwrap().to_string(),
|
||||
1,
|
||||
15 * 1024 * 1024,
|
||||
TantivyIndexVersion::V7,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let keys = (0..10000).collect::<Vec<_>>();
|
||||
|
||||
let mut count = 0;
|
||||
for i in keys {
|
||||
index_writer
|
||||
.add_data_by_batch::<i64>(&[i], Some(i as i64))
|
||||
.unwrap();
|
||||
|
||||
count += 1;
|
||||
|
||||
if count % 1000 == 0 {
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index_writer.create_reader().unwrap();
|
||||
let count: u32 = reader.count().unwrap();
|
||||
assert_eq!(count, 10000);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@ use crate::{
|
||||
array::RustResult,
|
||||
cstr_to_str,
|
||||
data_type::TantivyDataType,
|
||||
error::Result,
|
||||
index_writer::IndexWriterWrapper,
|
||||
util::{create_binding, free_binding},
|
||||
TantivyIndexVersion,
|
||||
@ -99,6 +100,35 @@ pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> RustRes
|
||||
}
|
||||
|
||||
// -------------------------build--------------------
|
||||
fn execute<T: Copy, I>(
|
||||
arr: I,
|
||||
offset: i64,
|
||||
e: fn(&mut IndexWriterWrapper, T, Option<i64>) -> Result<()>,
|
||||
w: &mut IndexWriterWrapper,
|
||||
) -> Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = T>,
|
||||
{
|
||||
for (index, data) in arr.into_iter().enumerate() {
|
||||
e(w, data, Some(offset + (index as i64)))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn execute_by_single_segment_writer<T: Copy, I>(
|
||||
arr: I,
|
||||
e: fn(&mut IndexWriterWrapper, T, Option<i64>) -> Result<()>,
|
||||
w: &mut IndexWriterWrapper,
|
||||
) -> Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = T>,
|
||||
{
|
||||
for data in arr.into_iter() {
|
||||
e(w, data, None)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_index_add_int8s(
|
||||
ptr: *mut c_void,
|
||||
@ -109,9 +139,13 @@ pub extern "C" fn tantivy_index_add_int8s(
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
(*real)
|
||||
.add_data_by_batch::<i8>(arr, Some(offset_begin))
|
||||
.into()
|
||||
execute(
|
||||
arr.into_iter().map(|num| *num as i64),
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -123,7 +157,14 @@ pub extern "C" fn tantivy_index_add_int8s_by_single_segment_writer(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { (*real).add_data_by_batch::<i8>(arr, None).into() }
|
||||
unsafe {
|
||||
execute_by_single_segment_writer(
|
||||
arr.into_iter().map(|num| *num as i64),
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@ -136,9 +177,13 @@ pub extern "C" fn tantivy_index_add_int16s(
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
(*real)
|
||||
.add_data_by_batch::<i16>(arr, Some(offset_begin))
|
||||
.into()
|
||||
execute(
|
||||
arr.into_iter().map(|num| *num as i64),
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -150,7 +195,14 @@ pub extern "C" fn tantivy_index_add_int16s_by_single_segment_writer(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { (*real).add_data_by_batch::<i16>(arr, None).into() }
|
||||
unsafe {
|
||||
execute_by_single_segment_writer(
|
||||
arr.into_iter().map(|num| *num as i64),
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@ -163,9 +215,13 @@ pub extern "C" fn tantivy_index_add_int32s(
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
(*real)
|
||||
.add_data_by_batch::<i32>(arr, Some(offset_begin))
|
||||
.into()
|
||||
execute(
|
||||
arr.into_iter().map(|num| *num as i64),
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,7 +233,14 @@ pub extern "C" fn tantivy_index_add_int32s_by_single_segment_writer(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { (*real).add_data_by_batch::<i32>(arr, None).into() }
|
||||
unsafe {
|
||||
execute_by_single_segment_writer(
|
||||
arr.into_iter().map(|num| *num as i64),
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@ -189,10 +252,15 @@ pub extern "C" fn tantivy_index_add_int64s(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
|
||||
unsafe {
|
||||
(*real)
|
||||
.add_data_by_batch::<i64>(arr, Some(offset_begin))
|
||||
.into()
|
||||
execute(
|
||||
arr.iter().copied(),
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -204,7 +272,15 @@ pub extern "C" fn tantivy_index_add_int64s_by_single_segment_writer(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { (*real).add_data_by_batch::<i64>(arr, None).into() }
|
||||
|
||||
unsafe {
|
||||
execute_by_single_segment_writer(
|
||||
arr.iter().copied(),
|
||||
IndexWriterWrapper::add::<i64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@ -217,9 +293,13 @@ pub extern "C" fn tantivy_index_add_f32s(
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
(*real)
|
||||
.add_data_by_batch::<f32>(arr, Some(offset_begin))
|
||||
.into()
|
||||
execute(
|
||||
arr.into_iter().map(|num| *num as f64),
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add::<f64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -231,7 +311,14 @@ pub extern "C" fn tantivy_index_add_f32s_by_single_segment_writer(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { (*real).add_data_by_batch::<f32>(arr, None).into() }
|
||||
unsafe {
|
||||
execute_by_single_segment_writer(
|
||||
arr.into_iter().map(|num| *num as f64),
|
||||
IndexWriterWrapper::add::<f64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@ -244,9 +331,13 @@ pub extern "C" fn tantivy_index_add_f64s(
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
(*real)
|
||||
.add_data_by_batch::<f64>(arr, Some(offset_begin))
|
||||
.into()
|
||||
execute(
|
||||
arr.iter().copied(),
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add::<f64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -258,7 +349,14 @@ pub extern "C" fn tantivy_index_add_f64s_by_single_segment_writer(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { (*real).add_data_by_batch::<f64>(arr, None).into() }
|
||||
unsafe {
|
||||
execute_by_single_segment_writer(
|
||||
arr.into_iter().map(|num| *num as f64),
|
||||
IndexWriterWrapper::add::<f64>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@ -271,9 +369,13 @@ pub extern "C" fn tantivy_index_add_bools(
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe {
|
||||
(*real)
|
||||
.add_data_by_batch::<bool>(arr, Some(offset_begin))
|
||||
.into()
|
||||
execute(
|
||||
arr.iter().copied(),
|
||||
offset_begin,
|
||||
IndexWriterWrapper::add::<bool>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -285,34 +387,37 @@ pub extern "C" fn tantivy_index_add_bools_by_single_segment_writer(
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { (*real).add_data_by_batch::<bool>(arr, None).into() }
|
||||
unsafe {
|
||||
execute_by_single_segment_writer(
|
||||
arr.iter().copied(),
|
||||
IndexWriterWrapper::add::<bool>,
|
||||
&mut (*real),
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this is not a very efficient way, since we must call this function many times, which
|
||||
// will bring a lot of overhead caused by the rust binding.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_index_add_strings(
|
||||
pub extern "C" fn tantivy_index_add_string(
|
||||
ptr: *mut c_void,
|
||||
array: *const *const c_char,
|
||||
len: usize,
|
||||
s: *const c_char,
|
||||
offset: i64,
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { &mut (*real) }
|
||||
.add_string_by_batch(arr, Some(offset))
|
||||
.into()
|
||||
let s = cstr_to_str!(s);
|
||||
unsafe { (*real).add::<&str>(s, Some(offset)).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_index_add_strings_by_single_segment_writer(
|
||||
pub extern "C" fn tantivy_index_add_string_by_single_segment_writer(
|
||||
ptr: *mut c_void,
|
||||
array: *const *const c_char,
|
||||
len: usize,
|
||||
s: *const c_char,
|
||||
) -> RustResult {
|
||||
let real = ptr as *mut IndexWriterWrapper;
|
||||
let arr = unsafe { slice::from_raw_parts(array, len) };
|
||||
unsafe { &mut (*real) }
|
||||
.add_string_by_batch(arr, None)
|
||||
.into()
|
||||
let s = cstr_to_str!(s);
|
||||
unsafe { (*real).add::<&str>(s, None).into() }
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
||||
@ -8,9 +8,7 @@ use log::info;
|
||||
use tantivy_5::schema::{
|
||||
Field, IndexRecordOption, Schema, SchemaBuilder, TextFieldIndexing, TextOptions, FAST, INDEXED,
|
||||
};
|
||||
use tantivy_5::{
|
||||
doc, Document as TantivyDocument, Index, IndexWriter, SingleSegmentIndexWriter, UserOperation,
|
||||
};
|
||||
use tantivy_5::{doc, Document as TantivyDocument, Index, IndexWriter, SingleSegmentIndexWriter, UserOperation};
|
||||
|
||||
use crate::data_type::TantivyDataType;
|
||||
|
||||
@ -49,27 +47,6 @@ pub(crate) fn schema_builder_add_field(
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i8 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_i64(Field::from_field_id(field), *self as i64);
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i16 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_i64(Field::from_field_id(field), *self as i64);
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i32 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_i64(Field::from_field_id(field), *self as i64);
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i64 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
@ -77,10 +54,9 @@ impl TantivyValue<TantivyDocument> for i64 {
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for f32 {
|
||||
#[inline]
|
||||
impl TantivyValue<TantivyDocument> for u64 {
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_f64(Field::from_field_id(field), *self as f64);
|
||||
document.add_u64(Field::from_field_id(field), *self);
|
||||
}
|
||||
}
|
||||
|
||||
@ -172,58 +148,15 @@ impl IndexWriterWrapperImpl {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_data_by_batch<T: TantivyValue<TantivyDocument>>(
|
||||
pub fn add<T: TantivyValue<TantivyDocument>>(
|
||||
&mut self,
|
||||
batch_data: &[T],
|
||||
data: T,
|
||||
offset: Option<i64>,
|
||||
) -> Result<()> {
|
||||
match &self.index_writer {
|
||||
Either::Left(_) => self.add_datas(batch_data, offset.unwrap()),
|
||||
Either::Right(_) => self.add_datas_by_single_segment(batch_data),
|
||||
}
|
||||
}
|
||||
let mut document = TantivyDocument::default();
|
||||
data.add_to_document(self.field.field_id(), &mut document);
|
||||
|
||||
fn add_datas<T: TantivyValue<TantivyDocument>>(
|
||||
&mut self,
|
||||
batch_data: &[T],
|
||||
offset_begin: i64,
|
||||
) -> Result<()> {
|
||||
let writer = self.index_writer.as_ref().left().unwrap();
|
||||
let id_field = self.id_field.unwrap();
|
||||
let mut batch = Vec::with_capacity(BATCH_SIZE);
|
||||
for (idx, data) in batch_data.into_iter().enumerate() {
|
||||
let offset = offset_begin + idx as i64;
|
||||
|
||||
let mut doc = TantivyDocument::default();
|
||||
data.add_to_document(self.field.field_id(), &mut doc);
|
||||
doc.add_i64(id_field, offset);
|
||||
|
||||
batch.push(UserOperation::Add(doc));
|
||||
if batch.len() == BATCH_SIZE {
|
||||
writer.run(std::mem::replace(
|
||||
&mut batch,
|
||||
Vec::with_capacity(BATCH_SIZE),
|
||||
))?;
|
||||
}
|
||||
}
|
||||
|
||||
if !batch.is_empty() {
|
||||
writer.run(batch)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_datas_by_single_segment<T: TantivyValue<TantivyDocument>>(
|
||||
&mut self,
|
||||
batch_data: &[T],
|
||||
) -> Result<()> {
|
||||
for d in batch_data {
|
||||
let mut document = TantivyDocument::default();
|
||||
d.add_to_document(self.field.field_id(), &mut document);
|
||||
self.add_document(document, None)?;
|
||||
}
|
||||
Ok(())
|
||||
self.add_document(document, offset)
|
||||
}
|
||||
|
||||
pub fn add_array<T: TantivyValue<TantivyDocument>, I>(
|
||||
@ -255,56 +188,6 @@ impl IndexWriterWrapperImpl {
|
||||
self.add_document(document, offset)
|
||||
}
|
||||
|
||||
pub fn add_string_by_batch(
|
||||
&mut self,
|
||||
data: &[*const c_char],
|
||||
offset: Option<i64>,
|
||||
) -> Result<()> {
|
||||
match &self.index_writer {
|
||||
Either::Left(_) => self.add_strings(data, offset.unwrap()),
|
||||
Either::Right(_) => self.add_strings_by_single_segment(data),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_strings(&mut self, data: &[*const c_char], offset: i64) -> Result<()> {
|
||||
let writer = self.index_writer.as_ref().left().unwrap();
|
||||
let id_field = self.id_field.unwrap();
|
||||
let mut batch = Vec::with_capacity(BATCH_SIZE);
|
||||
for (idx, key) in data.into_iter().enumerate() {
|
||||
let key = unsafe { CStr::from_ptr(*key) }
|
||||
.to_str()
|
||||
.map_err(|e| TantivyBindingError::InternalError(e.to_string()))?;
|
||||
let key_offset = offset + idx as i64;
|
||||
batch.push(UserOperation::Add(doc!(
|
||||
id_field => key_offset,
|
||||
self.field => key,
|
||||
)));
|
||||
if batch.len() >= BATCH_SIZE {
|
||||
writer.run(std::mem::replace(
|
||||
&mut batch,
|
||||
Vec::with_capacity(BATCH_SIZE),
|
||||
))?;
|
||||
}
|
||||
}
|
||||
|
||||
if !batch.is_empty() {
|
||||
writer.run(batch)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_strings_by_single_segment(&mut self, data: &[*const c_char]) -> Result<()> {
|
||||
let writer = self.index_writer.as_mut().right().unwrap();
|
||||
for key in data {
|
||||
let key = unsafe { CStr::from_ptr(*key) }
|
||||
.to_str()
|
||||
.map_err(|e| TantivyBindingError::InternalError(e.to_string()))?;
|
||||
writer.add_document(doc!(self.field => key))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_json_key_stats(
|
||||
&mut self,
|
||||
keys: &[*const i8],
|
||||
|
||||
@ -48,27 +48,6 @@ pub(crate) fn schema_builder_add_field(
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i8 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_i64(Field::from_field_id(field), *self as i64);
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i16 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_i64(Field::from_field_id(field), *self as i64);
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i32 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_i64(Field::from_field_id(field), *self as i64);
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for i64 {
|
||||
#[inline]
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
@ -76,10 +55,9 @@ impl TantivyValue<TantivyDocument> for i64 {
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivyValue<TantivyDocument> for f32 {
|
||||
#[inline]
|
||||
impl TantivyValue<TantivyDocument> for u64 {
|
||||
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
|
||||
document.add_f64(Field::from_field_id(field), *self as f64);
|
||||
document.add_u64(Field::from_field_id(field), *self);
|
||||
}
|
||||
}
|
||||
|
||||
@ -147,31 +125,11 @@ impl IndexWriterWrapperImpl {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_data_by_batch<T: TantivyValue<TantivyDocument>>(
|
||||
&mut self,
|
||||
batch_data: &[T],
|
||||
mut offset: u32,
|
||||
) -> Result<()> {
|
||||
let mut batch = Vec::with_capacity(BATCH_SIZE);
|
||||
for data in batch_data.into_iter() {
|
||||
let mut doc = TantivyDocument::default();
|
||||
data.add_to_document(self.field.field_id(), &mut doc);
|
||||
pub fn add<T: TantivyValue<TantivyDocument>>(&mut self, data: T, offset: u32) -> Result<()> {
|
||||
let mut document = TantivyDocument::default();
|
||||
data.add_to_document(self.field.field_id(), &mut document);
|
||||
|
||||
batch.push(doc);
|
||||
if batch.len() == BATCH_SIZE {
|
||||
self.index_writer.add_documents_with_doc_id(
|
||||
offset,
|
||||
std::mem::replace(&mut batch, Vec::with_capacity(BATCH_SIZE)),
|
||||
)?;
|
||||
offset += BATCH_SIZE as u32;
|
||||
}
|
||||
}
|
||||
|
||||
if !batch.is_empty() {
|
||||
self.index_writer.add_documents_with_doc_id(offset, batch)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
self.add_document(document, offset)
|
||||
}
|
||||
|
||||
pub fn add_array<T: TantivyValue<TantivyDocument>, I>(
|
||||
@ -199,31 +157,6 @@ impl IndexWriterWrapperImpl {
|
||||
self.add_document(document, offset)
|
||||
}
|
||||
|
||||
pub fn add_string_by_batch(&mut self, data: &[*const c_char], mut offset: u32) -> Result<()> {
|
||||
let mut batch = Vec::with_capacity(BATCH_SIZE);
|
||||
for key in data.into_iter() {
|
||||
let key = unsafe { CStr::from_ptr(*key) }
|
||||
.to_str()
|
||||
.map_err(|e| TantivyBindingError::InternalError(e.to_string()))?;
|
||||
batch.push(doc!(
|
||||
self.field => key,
|
||||
));
|
||||
if batch.len() == BATCH_SIZE {
|
||||
self.index_writer.add_documents_with_doc_id(
|
||||
offset,
|
||||
std::mem::replace(&mut batch, Vec::with_capacity(BATCH_SIZE)),
|
||||
)?;
|
||||
offset += BATCH_SIZE as u32;
|
||||
}
|
||||
}
|
||||
|
||||
if !batch.is_empty() {
|
||||
self.index_writer.add_documents_with_doc_id(offset, batch)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_json_key_stats(
|
||||
&mut self,
|
||||
keys: &[*const i8],
|
||||
|
||||
@ -274,17 +274,16 @@ struct TantivyIndexWrapper {
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
std::vector<const char*> views;
|
||||
views.reserve(len);
|
||||
// TODO: not very efficient, a lot of overhead due to rust-ffi call.
|
||||
for (uintptr_t i = 0; i < len; i++) {
|
||||
views.push_back(
|
||||
static_cast<const std::string*>(array)[i].c_str());
|
||||
auto res = RustResultWrapper(tantivy_index_add_string(
|
||||
writer_,
|
||||
static_cast<const std::string*>(array)[i].c_str(),
|
||||
offset_begin + i));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add string: {}",
|
||||
res.result_->error);
|
||||
}
|
||||
auto res = RustResultWrapper(tantivy_index_add_strings(
|
||||
writer_, views.data(), len, offset_begin));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add string: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -468,18 +467,16 @@ struct TantivyIndexWrapper {
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
std::vector<const char*> views;
|
||||
views.reserve(len);
|
||||
// TODO: not very efficient, a lot of overhead due to rust-ffi call.
|
||||
for (uintptr_t i = 0; i < len; i++) {
|
||||
views.push_back(
|
||||
static_cast<const std::string*>(array)[i].c_str());
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_string_by_single_segment_writer(
|
||||
writer_,
|
||||
static_cast<const std::string*>(array)[i].c_str()));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add string: {}",
|
||||
res.result_->error);
|
||||
}
|
||||
auto res = RustResultWrapper(
|
||||
tantivy_index_add_strings_by_single_segment_writer(
|
||||
writer_, views.data(), len));
|
||||
AssertInfo(res.result_->success,
|
||||
"failed to add string: {}",
|
||||
res.result_->error);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1069,4 +1069,4 @@ TEST(TextMatch, ConcurrentReadWriteWithNull) {
|
||||
|
||||
writer.join();
|
||||
reader.join();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user