fix: revert batch add (#41374)

issue: #41375

todo: to fix the problems fixed in the issue.

---------

Signed-off-by: SpadeA <tangchenjie1210@gmail.com>
This commit is contained in:
Spade A 2025-04-17 22:32:38 +08:00 committed by GitHub
parent 4552dd4b23
commit 62293cb582
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 192 additions and 368 deletions

View File

@ -266,14 +266,9 @@ RustResult tantivy_index_add_bools_by_single_segment_writer(void *ptr,
const bool *array,
uintptr_t len);
RustResult tantivy_index_add_strings(void *ptr,
const char *const *array,
uintptr_t len,
int64_t offset);
RustResult tantivy_index_add_string(void *ptr, const char *s, int64_t offset);
RustResult tantivy_index_add_strings_by_single_segment_writer(void *ptr,
const char *const *array,
uintptr_t len);
RustResult tantivy_index_add_string_by_single_segment_writer(void *ptr, const char *s);
RustResult tantivy_index_add_json_key_stats_data_by_batch(void *ptr,
const char *const *keys,

View File

@ -84,10 +84,8 @@ mod tests {
)
.unwrap();
writer.add_data_by_batch(&["网球和滑雪"], Some(0)).unwrap();
writer
.add_data_by_batch(&["网球以及滑雪"], Some(1))
.unwrap();
writer.add("网球和滑雪", Some(0)).unwrap();
writer.add("网球以及滑雪", Some(1)).unwrap();
writer.commit().unwrap();
@ -118,7 +116,7 @@ mod tests {
.unwrap();
for i in 0..10000 {
writer.add_data_by_batch(&["hello world"], Some(i)).unwrap();
writer.add("hello world", Some(i)).unwrap();
}
writer.commit().unwrap();

View File

@ -79,15 +79,13 @@ impl IndexWriterWrapper {
}
}
pub fn add_data_by_batch<T>(&mut self, data: &[T], offset: Option<i64>) -> Result<()>
pub fn add<T>(&mut self, data: T, offset: Option<i64>) -> Result<()>
where
T: TantivyValue<TantivyDocumentV5> + TantivyValue<TantivyDocumentV7>,
{
match self {
IndexWriterWrapper::V5(writer) => writer.add_data_by_batch(data, offset),
IndexWriterWrapper::V7(writer) => {
writer.add_data_by_batch(data, offset.unwrap() as u32)
}
IndexWriterWrapper::V5(writer) => writer.add(data, offset),
IndexWriterWrapper::V7(writer) => writer.add(data, offset.unwrap() as u32),
}
}
@ -102,19 +100,6 @@ impl IndexWriterWrapper {
}
}
pub fn add_string_by_batch(
&mut self,
data: &[*const c_char],
offset: Option<i64>,
) -> Result<()> {
match self {
IndexWriterWrapper::V5(writer) => writer.add_string_by_batch(data, offset),
IndexWriterWrapper::V7(writer) => {
writer.add_string_by_batch(data, offset.unwrap() as u32)
}
}
}
pub fn add_array_keywords(
&mut self,
datas: &[*const c_char],
@ -173,9 +158,9 @@ impl IndexWriterWrapper {
#[cfg(test)]
mod tests {
use rand::Rng;
use std::{ffi::CString, ops::Bound};
use tantivy_5::{query, Index, ReloadPolicy};
use rand::Rng;
use tempfile::{tempdir, TempDir};
use crate::{data_type::TantivyDataType, TantivyIndexVersion};
@ -200,13 +185,12 @@ mod tests {
.unwrap();
for i in 0..10 {
index_wrapper
.add_data_by_batch::<i64>(&[i], Some(i as i64))
.unwrap();
index_wrapper.add::<i64>(i, Some(i as i64)).unwrap();
}
index_wrapper.commit().unwrap();
}
use tantivy_5::{query, Index, ReloadPolicy};
let index = Index::open_in_dir(dir.path()).unwrap();
let reader = index
.reader_builder()
@ -240,7 +224,7 @@ mod tests {
.unwrap();
for i in 0..10 {
index_wrapper.add_data_by_batch::<i64>(&[i], None).unwrap();
index_wrapper.add::<i64>(i, None).unwrap();
}
index_wrapper.finish().unwrap();
}
@ -283,9 +267,7 @@ mod tests {
.unwrap();
for i in 0..10 {
index_wrapper
.add_data_by_batch(&["hello"], Some(i as i64))
.unwrap();
index_wrapper.add("hello", Some(i as i64)).unwrap();
}
index_wrapper.commit().unwrap();
}
@ -352,73 +334,4 @@ mod tests {
let count = index_writer.create_reader().unwrap().count().unwrap();
assert_eq!(count, total_count);
}
#[test]
pub fn test_add_strings_by_batch() {
use crate::data_type::TantivyDataType;
use crate::index_writer::IndexWriterWrapper;
let temp_dir = tempdir().unwrap();
let mut index_writer = IndexWriterWrapper::new(
"test",
TantivyDataType::Keyword,
temp_dir.path().to_str().unwrap().to_string(),
1,
15 * 1024 * 1024,
TantivyIndexVersion::V7,
)
.unwrap();
let keys = (0..10000)
.map(|i| format!("key{:05}", i))
.collect::<Vec<_>>();
let c_keys: Vec<CString> = keys.into_iter().map(|k| CString::new(k).unwrap()).collect();
let key_ptrs: Vec<*const libc::c_char> = c_keys.iter().map(|cs| cs.as_ptr()).collect();
index_writer
.add_string_by_batch(&key_ptrs, Some(0))
.unwrap();
index_writer.commit().unwrap();
let reader = index_writer.create_reader().unwrap();
let count: u32 = reader.count().unwrap();
assert_eq!(count, 10000);
}
#[test]
pub fn test_add_data_by_batch() {
use crate::data_type::TantivyDataType;
use crate::index_writer::IndexWriterWrapper;
let temp_dir = tempdir().unwrap();
let mut index_writer = IndexWriterWrapper::new(
"test",
TantivyDataType::I64,
temp_dir.path().to_str().unwrap().to_string(),
1,
15 * 1024 * 1024,
TantivyIndexVersion::V7,
)
.unwrap();
let keys = (0..10000).collect::<Vec<_>>();
let mut count = 0;
for i in keys {
index_writer
.add_data_by_batch::<i64>(&[i], Some(i as i64))
.unwrap();
count += 1;
if count % 1000 == 0 {
index_writer.commit().unwrap();
}
}
index_writer.commit().unwrap();
let reader = index_writer.create_reader().unwrap();
let count: u32 = reader.count().unwrap();
assert_eq!(count, 10000);
}
}

View File

@ -5,6 +5,7 @@ use crate::{
array::RustResult,
cstr_to_str,
data_type::TantivyDataType,
error::Result,
index_writer::IndexWriterWrapper,
util::{create_binding, free_binding},
TantivyIndexVersion,
@ -99,6 +100,35 @@ pub extern "C" fn tantivy_create_reader_from_writer(ptr: *mut c_void) -> RustRes
}
// -------------------------build--------------------
fn execute<T: Copy, I>(
arr: I,
offset: i64,
e: fn(&mut IndexWriterWrapper, T, Option<i64>) -> Result<()>,
w: &mut IndexWriterWrapper,
) -> Result<()>
where
I: IntoIterator<Item = T>,
{
for (index, data) in arr.into_iter().enumerate() {
e(w, data, Some(offset + (index as i64)))?;
}
Ok(())
}
fn execute_by_single_segment_writer<T: Copy, I>(
arr: I,
e: fn(&mut IndexWriterWrapper, T, Option<i64>) -> Result<()>,
w: &mut IndexWriterWrapper,
) -> Result<()>
where
I: IntoIterator<Item = T>,
{
for data in arr.into_iter() {
e(w, data, None)?;
}
Ok(())
}
#[no_mangle]
pub extern "C" fn tantivy_index_add_int8s(
ptr: *mut c_void,
@ -109,9 +139,13 @@ pub extern "C" fn tantivy_index_add_int8s(
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
(*real)
.add_data_by_batch::<i8>(arr, Some(offset_begin))
.into()
execute(
arr.into_iter().map(|num| *num as i64),
offset_begin,
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
@ -123,7 +157,14 @@ pub extern "C" fn tantivy_index_add_int8s_by_single_segment_writer(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { (*real).add_data_by_batch::<i8>(arr, None).into() }
unsafe {
execute_by_single_segment_writer(
arr.into_iter().map(|num| *num as i64),
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
#[no_mangle]
@ -136,9 +177,13 @@ pub extern "C" fn tantivy_index_add_int16s(
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
(*real)
.add_data_by_batch::<i16>(arr, Some(offset_begin))
.into()
execute(
arr.into_iter().map(|num| *num as i64),
offset_begin,
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
@ -150,7 +195,14 @@ pub extern "C" fn tantivy_index_add_int16s_by_single_segment_writer(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { (*real).add_data_by_batch::<i16>(arr, None).into() }
unsafe {
execute_by_single_segment_writer(
arr.into_iter().map(|num| *num as i64),
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
#[no_mangle]
@ -163,9 +215,13 @@ pub extern "C" fn tantivy_index_add_int32s(
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
(*real)
.add_data_by_batch::<i32>(arr, Some(offset_begin))
.into()
execute(
arr.into_iter().map(|num| *num as i64),
offset_begin,
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
@ -177,7 +233,14 @@ pub extern "C" fn tantivy_index_add_int32s_by_single_segment_writer(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { (*real).add_data_by_batch::<i32>(arr, None).into() }
unsafe {
execute_by_single_segment_writer(
arr.into_iter().map(|num| *num as i64),
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
#[no_mangle]
@ -189,10 +252,15 @@ pub extern "C" fn tantivy_index_add_int64s(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
(*real)
.add_data_by_batch::<i64>(arr, Some(offset_begin))
.into()
execute(
arr.iter().copied(),
offset_begin,
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
@ -204,7 +272,15 @@ pub extern "C" fn tantivy_index_add_int64s_by_single_segment_writer(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { (*real).add_data_by_batch::<i64>(arr, None).into() }
unsafe {
execute_by_single_segment_writer(
arr.iter().copied(),
IndexWriterWrapper::add::<i64>,
&mut (*real),
)
.into()
}
}
#[no_mangle]
@ -217,9 +293,13 @@ pub extern "C" fn tantivy_index_add_f32s(
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
(*real)
.add_data_by_batch::<f32>(arr, Some(offset_begin))
.into()
execute(
arr.into_iter().map(|num| *num as f64),
offset_begin,
IndexWriterWrapper::add::<f64>,
&mut (*real),
)
.into()
}
}
@ -231,7 +311,14 @@ pub extern "C" fn tantivy_index_add_f32s_by_single_segment_writer(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { (*real).add_data_by_batch::<f32>(arr, None).into() }
unsafe {
execute_by_single_segment_writer(
arr.into_iter().map(|num| *num as f64),
IndexWriterWrapper::add::<f64>,
&mut (*real),
)
.into()
}
}
#[no_mangle]
@ -244,9 +331,13 @@ pub extern "C" fn tantivy_index_add_f64s(
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
(*real)
.add_data_by_batch::<f64>(arr, Some(offset_begin))
.into()
execute(
arr.iter().copied(),
offset_begin,
IndexWriterWrapper::add::<f64>,
&mut (*real),
)
.into()
}
}
@ -258,7 +349,14 @@ pub extern "C" fn tantivy_index_add_f64s_by_single_segment_writer(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { (*real).add_data_by_batch::<f64>(arr, None).into() }
unsafe {
execute_by_single_segment_writer(
arr.into_iter().map(|num| *num as f64),
IndexWriterWrapper::add::<f64>,
&mut (*real),
)
.into()
}
}
#[no_mangle]
@ -271,9 +369,13 @@ pub extern "C" fn tantivy_index_add_bools(
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe {
(*real)
.add_data_by_batch::<bool>(arr, Some(offset_begin))
.into()
execute(
arr.iter().copied(),
offset_begin,
IndexWriterWrapper::add::<bool>,
&mut (*real),
)
.into()
}
}
@ -285,34 +387,37 @@ pub extern "C" fn tantivy_index_add_bools_by_single_segment_writer(
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { (*real).add_data_by_batch::<bool>(arr, None).into() }
unsafe {
execute_by_single_segment_writer(
arr.iter().copied(),
IndexWriterWrapper::add::<bool>,
&mut (*real),
)
.into()
}
}
// TODO: this is not a very efficient way, since we must call this function many times, which
// will bring a lot of overhead caused by the rust binding.
#[no_mangle]
pub extern "C" fn tantivy_index_add_strings(
pub extern "C" fn tantivy_index_add_string(
ptr: *mut c_void,
array: *const *const c_char,
len: usize,
s: *const c_char,
offset: i64,
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { &mut (*real) }
.add_string_by_batch(arr, Some(offset))
.into()
let s = cstr_to_str!(s);
unsafe { (*real).add::<&str>(s, Some(offset)).into() }
}
#[no_mangle]
pub extern "C" fn tantivy_index_add_strings_by_single_segment_writer(
pub extern "C" fn tantivy_index_add_string_by_single_segment_writer(
ptr: *mut c_void,
array: *const *const c_char,
len: usize,
s: *const c_char,
) -> RustResult {
let real = ptr as *mut IndexWriterWrapper;
let arr = unsafe { slice::from_raw_parts(array, len) };
unsafe { &mut (*real) }
.add_string_by_batch(arr, None)
.into()
let s = cstr_to_str!(s);
unsafe { (*real).add::<&str>(s, None).into() }
}
#[no_mangle]

View File

@ -8,9 +8,7 @@ use log::info;
use tantivy_5::schema::{
Field, IndexRecordOption, Schema, SchemaBuilder, TextFieldIndexing, TextOptions, FAST, INDEXED,
};
use tantivy_5::{
doc, Document as TantivyDocument, Index, IndexWriter, SingleSegmentIndexWriter, UserOperation,
};
use tantivy_5::{doc, Document as TantivyDocument, Index, IndexWriter, SingleSegmentIndexWriter, UserOperation};
use crate::data_type::TantivyDataType;
@ -49,27 +47,6 @@ pub(crate) fn schema_builder_add_field(
}
}
impl TantivyValue<TantivyDocument> for i8 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_i64(Field::from_field_id(field), *self as i64);
}
}
impl TantivyValue<TantivyDocument> for i16 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_i64(Field::from_field_id(field), *self as i64);
}
}
impl TantivyValue<TantivyDocument> for i32 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_i64(Field::from_field_id(field), *self as i64);
}
}
impl TantivyValue<TantivyDocument> for i64 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
@ -77,10 +54,9 @@ impl TantivyValue<TantivyDocument> for i64 {
}
}
impl TantivyValue<TantivyDocument> for f32 {
#[inline]
impl TantivyValue<TantivyDocument> for u64 {
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_f64(Field::from_field_id(field), *self as f64);
document.add_u64(Field::from_field_id(field), *self);
}
}
@ -172,58 +148,15 @@ impl IndexWriterWrapperImpl {
Ok(())
}
pub fn add_data_by_batch<T: TantivyValue<TantivyDocument>>(
pub fn add<T: TantivyValue<TantivyDocument>>(
&mut self,
batch_data: &[T],
data: T,
offset: Option<i64>,
) -> Result<()> {
match &self.index_writer {
Either::Left(_) => self.add_datas(batch_data, offset.unwrap()),
Either::Right(_) => self.add_datas_by_single_segment(batch_data),
}
}
let mut document = TantivyDocument::default();
data.add_to_document(self.field.field_id(), &mut document);
fn add_datas<T: TantivyValue<TantivyDocument>>(
&mut self,
batch_data: &[T],
offset_begin: i64,
) -> Result<()> {
let writer = self.index_writer.as_ref().left().unwrap();
let id_field = self.id_field.unwrap();
let mut batch = Vec::with_capacity(BATCH_SIZE);
for (idx, data) in batch_data.into_iter().enumerate() {
let offset = offset_begin + idx as i64;
let mut doc = TantivyDocument::default();
data.add_to_document(self.field.field_id(), &mut doc);
doc.add_i64(id_field, offset);
batch.push(UserOperation::Add(doc));
if batch.len() == BATCH_SIZE {
writer.run(std::mem::replace(
&mut batch,
Vec::with_capacity(BATCH_SIZE),
))?;
}
}
if !batch.is_empty() {
writer.run(batch)?;
}
Ok(())
}
fn add_datas_by_single_segment<T: TantivyValue<TantivyDocument>>(
&mut self,
batch_data: &[T],
) -> Result<()> {
for d in batch_data {
let mut document = TantivyDocument::default();
d.add_to_document(self.field.field_id(), &mut document);
self.add_document(document, None)?;
}
Ok(())
self.add_document(document, offset)
}
pub fn add_array<T: TantivyValue<TantivyDocument>, I>(
@ -255,56 +188,6 @@ impl IndexWriterWrapperImpl {
self.add_document(document, offset)
}
pub fn add_string_by_batch(
&mut self,
data: &[*const c_char],
offset: Option<i64>,
) -> Result<()> {
match &self.index_writer {
Either::Left(_) => self.add_strings(data, offset.unwrap()),
Either::Right(_) => self.add_strings_by_single_segment(data),
}
}
fn add_strings(&mut self, data: &[*const c_char], offset: i64) -> Result<()> {
let writer = self.index_writer.as_ref().left().unwrap();
let id_field = self.id_field.unwrap();
let mut batch = Vec::with_capacity(BATCH_SIZE);
for (idx, key) in data.into_iter().enumerate() {
let key = unsafe { CStr::from_ptr(*key) }
.to_str()
.map_err(|e| TantivyBindingError::InternalError(e.to_string()))?;
let key_offset = offset + idx as i64;
batch.push(UserOperation::Add(doc!(
id_field => key_offset,
self.field => key,
)));
if batch.len() >= BATCH_SIZE {
writer.run(std::mem::replace(
&mut batch,
Vec::with_capacity(BATCH_SIZE),
))?;
}
}
if !batch.is_empty() {
writer.run(batch)?;
}
Ok(())
}
fn add_strings_by_single_segment(&mut self, data: &[*const c_char]) -> Result<()> {
let writer = self.index_writer.as_mut().right().unwrap();
for key in data {
let key = unsafe { CStr::from_ptr(*key) }
.to_str()
.map_err(|e| TantivyBindingError::InternalError(e.to_string()))?;
writer.add_document(doc!(self.field => key))?;
}
Ok(())
}
pub fn add_json_key_stats(
&mut self,
keys: &[*const i8],

View File

@ -48,27 +48,6 @@ pub(crate) fn schema_builder_add_field(
}
}
impl TantivyValue<TantivyDocument> for i8 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_i64(Field::from_field_id(field), *self as i64);
}
}
impl TantivyValue<TantivyDocument> for i16 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_i64(Field::from_field_id(field), *self as i64);
}
}
impl TantivyValue<TantivyDocument> for i32 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_i64(Field::from_field_id(field), *self as i64);
}
}
impl TantivyValue<TantivyDocument> for i64 {
#[inline]
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
@ -76,10 +55,9 @@ impl TantivyValue<TantivyDocument> for i64 {
}
}
impl TantivyValue<TantivyDocument> for f32 {
#[inline]
impl TantivyValue<TantivyDocument> for u64 {
fn add_to_document(&self, field: u32, document: &mut TantivyDocument) {
document.add_f64(Field::from_field_id(field), *self as f64);
document.add_u64(Field::from_field_id(field), *self);
}
}
@ -147,31 +125,11 @@ impl IndexWriterWrapperImpl {
Ok(())
}
pub fn add_data_by_batch<T: TantivyValue<TantivyDocument>>(
&mut self,
batch_data: &[T],
mut offset: u32,
) -> Result<()> {
let mut batch = Vec::with_capacity(BATCH_SIZE);
for data in batch_data.into_iter() {
let mut doc = TantivyDocument::default();
data.add_to_document(self.field.field_id(), &mut doc);
pub fn add<T: TantivyValue<TantivyDocument>>(&mut self, data: T, offset: u32) -> Result<()> {
let mut document = TantivyDocument::default();
data.add_to_document(self.field.field_id(), &mut document);
batch.push(doc);
if batch.len() == BATCH_SIZE {
self.index_writer.add_documents_with_doc_id(
offset,
std::mem::replace(&mut batch, Vec::with_capacity(BATCH_SIZE)),
)?;
offset += BATCH_SIZE as u32;
}
}
if !batch.is_empty() {
self.index_writer.add_documents_with_doc_id(offset, batch)?;
}
Ok(())
self.add_document(document, offset)
}
pub fn add_array<T: TantivyValue<TantivyDocument>, I>(
@ -199,31 +157,6 @@ impl IndexWriterWrapperImpl {
self.add_document(document, offset)
}
pub fn add_string_by_batch(&mut self, data: &[*const c_char], mut offset: u32) -> Result<()> {
let mut batch = Vec::with_capacity(BATCH_SIZE);
for key in data.into_iter() {
let key = unsafe { CStr::from_ptr(*key) }
.to_str()
.map_err(|e| TantivyBindingError::InternalError(e.to_string()))?;
batch.push(doc!(
self.field => key,
));
if batch.len() == BATCH_SIZE {
self.index_writer.add_documents_with_doc_id(
offset,
std::mem::replace(&mut batch, Vec::with_capacity(BATCH_SIZE)),
)?;
offset += BATCH_SIZE as u32;
}
}
if !batch.is_empty() {
self.index_writer.add_documents_with_doc_id(offset, batch)?;
}
Ok(())
}
pub fn add_json_key_stats(
&mut self,
keys: &[*const i8],

View File

@ -274,17 +274,16 @@ struct TantivyIndexWrapper {
}
if constexpr (std::is_same_v<T, std::string>) {
std::vector<const char*> views;
views.reserve(len);
// TODO: not very efficient, a lot of overhead due to rust-ffi call.
for (uintptr_t i = 0; i < len; i++) {
views.push_back(
static_cast<const std::string*>(array)[i].c_str());
auto res = RustResultWrapper(tantivy_index_add_string(
writer_,
static_cast<const std::string*>(array)[i].c_str(),
offset_begin + i));
AssertInfo(res.result_->success,
"failed to add string: {}",
res.result_->error);
}
auto res = RustResultWrapper(tantivy_index_add_strings(
writer_, views.data(), len, offset_begin));
AssertInfo(res.result_->success,
"failed to add string: {}",
res.result_->error);
return;
}
@ -468,18 +467,16 @@ struct TantivyIndexWrapper {
}
if constexpr (std::is_same_v<T, std::string>) {
std::vector<const char*> views;
views.reserve(len);
// TODO: not very efficient, a lot of overhead due to rust-ffi call.
for (uintptr_t i = 0; i < len; i++) {
views.push_back(
static_cast<const std::string*>(array)[i].c_str());
auto res = RustResultWrapper(
tantivy_index_add_string_by_single_segment_writer(
writer_,
static_cast<const std::string*>(array)[i].c_str()));
AssertInfo(res.result_->success,
"failed to add string: {}",
res.result_->error);
}
auto res = RustResultWrapper(
tantivy_index_add_strings_by_single_segment_writer(
writer_, views.data(), len));
AssertInfo(res.result_->success,
"failed to add string: {}",
res.result_->error);
return;
}

View File

@ -1069,4 +1069,4 @@ TEST(TextMatch, ConcurrentReadWriteWithNull) {
writer.join();
reader.join();
}
}