mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
enhance: update tantivy version (#39253)
https://github.com/milvus-io/milvus/issues/39254 --------- Signed-off-by: sunby <sunbingyi1992@gmail.com>
This commit is contained in:
parent
cd56d64ec4
commit
c13fc8cd19
665
internal/core/thirdparty/tantivy/tantivy-binding/Cargo.lock
generated
vendored
665
internal/core/thirdparty/tantivy/tantivy-binding/Cargo.lock
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
tantivy = { git = "https://github.com/milvus-io/tantivy", tag = "0.21.1-fix3" } # we have make a private fix for milvus, should be removed in future after milvus fixing the bug.
|
||||
tantivy = { git = "https://github.com/milvus-io/tantivy", tag = "v0.1.0" } # we have make a private fix for milvus, should be removed in future after milvus fixing the bug.
|
||||
futures = "0.3.21"
|
||||
libc = "0.2"
|
||||
scopeguard = "1.2"
|
||||
|
||||
@ -40,7 +40,7 @@ impl IndexReaderWrapper {
|
||||
|
||||
let reader = index
|
||||
.reader_builder()
|
||||
.reload_policy(ReloadPolicy::OnCommit) // OnCommit serve for growing segment.
|
||||
.reload_policy(ReloadPolicy::OnCommitWithDelay) // OnCommitWithDelay serve for growing segment.
|
||||
.try_into()?;
|
||||
reader.reload()?;
|
||||
|
||||
@ -98,11 +98,9 @@ impl IndexReaderWrapper {
|
||||
lower_bound: i64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_i64_bounds(
|
||||
self.field_name.to_string(),
|
||||
make_bounds(lower_bound, inclusive),
|
||||
Bound::Unbounded,
|
||||
);
|
||||
let term = Term::from_field_i64(self.field, lower_bound);
|
||||
|
||||
let q = RangeQuery::new(make_bounds(term, inclusive), Bound::Unbounded);
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
@ -111,11 +109,8 @@ impl IndexReaderWrapper {
|
||||
upper_bound: i64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_i64_bounds(
|
||||
self.field_name.to_string(),
|
||||
Bound::Unbounded,
|
||||
make_bounds(upper_bound, inclusive),
|
||||
);
|
||||
let term = Term::from_field_i64(self.field, upper_bound);
|
||||
let q = RangeQuery::new(Bound::Unbounded, make_bounds(term, inclusive));
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
@ -126,9 +121,9 @@ impl IndexReaderWrapper {
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let lb = make_bounds(lower_bound, lb_inclusive);
|
||||
let ub = make_bounds(upper_bound, ub_inclusive);
|
||||
let q = RangeQuery::new_i64_bounds(self.field_name.to_string(), lb, ub);
|
||||
let lb = make_bounds(Term::from_field_i64(self.field, lower_bound), lb_inclusive);
|
||||
let ub = make_bounds(Term::from_field_i64(self.field, upper_bound), ub_inclusive);
|
||||
let q = RangeQuery::new(lb, ub);
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
@ -145,9 +140,8 @@ impl IndexReaderWrapper {
|
||||
lower_bound: f64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_f64_bounds(
|
||||
self.field_name.to_string(),
|
||||
make_bounds(lower_bound, inclusive),
|
||||
let q = RangeQuery::new(
|
||||
make_bounds(Term::from_field_f64(self.field, lower_bound), inclusive),
|
||||
Bound::Unbounded,
|
||||
);
|
||||
self.search(&q)
|
||||
@ -158,10 +152,9 @@ impl IndexReaderWrapper {
|
||||
upper_bound: f64,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_f64_bounds(
|
||||
self.field_name.to_string(),
|
||||
let q = RangeQuery::new(
|
||||
Bound::Unbounded,
|
||||
make_bounds(upper_bound, inclusive),
|
||||
make_bounds(Term::from_field_f64(self.field, upper_bound), inclusive),
|
||||
);
|
||||
self.search(&q)
|
||||
}
|
||||
@ -173,9 +166,9 @@ impl IndexReaderWrapper {
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let lb = make_bounds(lower_bound, lb_inclusive);
|
||||
let ub = make_bounds(upper_bound, ub_inclusive);
|
||||
let q = RangeQuery::new_f64_bounds(self.field_name.to_string(), lb, ub);
|
||||
let lb = make_bounds(Term::from_field_f64(self.field, lower_bound), lb_inclusive);
|
||||
let ub = make_bounds(Term::from_field_f64(self.field, upper_bound), ub_inclusive);
|
||||
let q = RangeQuery::new(lb, ub);
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
@ -200,9 +193,8 @@ impl IndexReaderWrapper {
|
||||
lower_bound: &str,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_str_bounds(
|
||||
self.field_name.to_string(),
|
||||
make_bounds(lower_bound, inclusive),
|
||||
let q = RangeQuery::new(
|
||||
make_bounds(Term::from_field_text(self.field, lower_bound), inclusive),
|
||||
Bound::Unbounded,
|
||||
);
|
||||
self.search(&q)
|
||||
@ -213,10 +205,9 @@ impl IndexReaderWrapper {
|
||||
upper_bound: &str,
|
||||
inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let q = RangeQuery::new_str_bounds(
|
||||
self.field_name.to_string(),
|
||||
let q = RangeQuery::new(
|
||||
Bound::Unbounded,
|
||||
make_bounds(upper_bound, inclusive),
|
||||
make_bounds(Term::from_field_text(self.field, upper_bound), inclusive),
|
||||
);
|
||||
self.search(&q)
|
||||
}
|
||||
@ -228,9 +219,9 @@ impl IndexReaderWrapper {
|
||||
lb_inclusive: bool,
|
||||
ub_inclusive: bool,
|
||||
) -> Result<Vec<u32>> {
|
||||
let lb = make_bounds(lower_bound, lb_inclusive);
|
||||
let ub = make_bounds(upper_bound, ub_inclusive);
|
||||
let q = RangeQuery::new_str_bounds(self.field_name.to_string(), lb, ub);
|
||||
let lb = make_bounds(Term::from_field_text(self.field, lower_bound), lb_inclusive);
|
||||
let ub = make_bounds(Term::from_field_text(self.field, upper_bound), ub_inclusive);
|
||||
let q = RangeQuery::new(lb, ub);
|
||||
self.search(&q)
|
||||
}
|
||||
|
||||
|
||||
@ -6,9 +6,12 @@ use futures::executor::block_on;
|
||||
use libc::c_char;
|
||||
use log::info;
|
||||
use tantivy::schema::{
|
||||
Field, IndexRecordOption, Schema, SchemaBuilder, TextFieldIndexing, TextOptions, FAST, INDEXED,
|
||||
Field, IndexRecordOption, OwnedValue, Schema, SchemaBuilder, TextFieldIndexing, TextOptions,
|
||||
FAST, INDEXED,
|
||||
};
|
||||
use tantivy::{
|
||||
doc, tokenizer, Document, Index, IndexWriter, SingleSegmentIndexWriter, TantivyDocument,
|
||||
};
|
||||
use tantivy::{doc, Document, Index, IndexWriter, SingleSegmentIndexWriter};
|
||||
|
||||
use crate::data_type::TantivyDataType;
|
||||
|
||||
@ -55,7 +58,10 @@ impl IndexWriterWrapper {
|
||||
overall_memory_budget_in_bytes: usize,
|
||||
) -> Result<IndexWriterWrapper> {
|
||||
init_log();
|
||||
info!("create index writer, field_name: {}, data_type: {:?}", field_name, data_type);
|
||||
info!(
|
||||
"create index writer, field_name: {}, data_type: {:?}",
|
||||
field_name, data_type
|
||||
);
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field = schema_builder_add_field(&mut schema_builder, &field_name, data_type);
|
||||
// We cannot build direct connection from rows in multi-segments to milvus row data. So we have this doc_id field.
|
||||
@ -78,7 +84,10 @@ impl IndexWriterWrapper {
|
||||
path: String,
|
||||
) -> Result<IndexWriterWrapper> {
|
||||
init_log();
|
||||
info!("create single segment index writer, field_name: {}, data_type: {:?}", field_name, data_type);
|
||||
info!(
|
||||
"create single segment index writer, field_name: {}, data_type: {:?}",
|
||||
field_name, data_type
|
||||
);
|
||||
let mut schema_builder = Schema::builder();
|
||||
let field = schema_builder_add_field(&mut schema_builder, &field_name, data_type);
|
||||
let schema = schema_builder.build();
|
||||
@ -96,7 +105,7 @@ impl IndexWriterWrapper {
|
||||
IndexReaderWrapper::from_index(self.index.clone())
|
||||
}
|
||||
|
||||
fn index_writer_add_document(&self, document: Document) -> Result<()> {
|
||||
fn index_writer_add_document(&self, document: TantivyDocument) -> Result<()> {
|
||||
match self.index_writer {
|
||||
Either::Left(ref writer) => {
|
||||
let _ = writer.add_document(document)?;
|
||||
@ -108,7 +117,10 @@ impl IndexWriterWrapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn single_segment_index_writer_add_document(&mut self, document: Document) -> Result<()> {
|
||||
fn single_segment_index_writer_add_document(
|
||||
&mut self,
|
||||
document: TantivyDocument,
|
||||
) -> Result<()> {
|
||||
match self.index_writer {
|
||||
Either::Left(_) => {
|
||||
panic!("unexpected writer");
|
||||
@ -165,70 +177,70 @@ impl IndexWriterWrapper {
|
||||
}
|
||||
|
||||
pub fn add_multi_i8s(&mut self, datas: &[i8], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
document.add_field_value(self.field, &(*data as i64));
|
||||
}
|
||||
document.add_i64(self.id_field.unwrap(), offset);
|
||||
self.index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_i16s(&mut self, datas: &[i16], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
document.add_field_value(self.field, &(*data as i64));
|
||||
}
|
||||
document.add_i64(self.id_field.unwrap(), offset);
|
||||
self.index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_i32s(&mut self, datas: &[i32], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
document.add_field_value(self.field, &(*data as i64));
|
||||
}
|
||||
document.add_i64(self.id_field.unwrap(), offset);
|
||||
self.index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_i64s(&mut self, datas: &[i64], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
document.add_field_value(self.field, data);
|
||||
}
|
||||
document.add_i64(self.id_field.unwrap(), offset);
|
||||
self.index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_f32s(&mut self, datas: &[f32], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as f64);
|
||||
document.add_field_value(self.field, &(*data as f64));
|
||||
}
|
||||
document.add_i64(self.id_field.unwrap(), offset);
|
||||
self.index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_f64s(&mut self, datas: &[f64], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
document.add_field_value(self.field, data);
|
||||
}
|
||||
document.add_i64(self.id_field.unwrap(), offset);
|
||||
self.index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_bools(&mut self, datas: &[bool], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
document.add_field_value(self.field, data);
|
||||
}
|
||||
document.add_i64(self.id_field.unwrap(), offset);
|
||||
self.index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_keywords(&mut self, datas: &[*const c_char], offset: i64) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for element in datas {
|
||||
let data = unsafe { CStr::from_ptr(*element) };
|
||||
document.add_field_value(self.field, data.to_str()?);
|
||||
@ -278,57 +290,57 @@ impl IndexWriterWrapper {
|
||||
}
|
||||
|
||||
pub fn add_multi_i8s_by_single_segment_writer(&mut self, datas: &[i8]) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
document.add_field_value(self.field, &(*data as i64));
|
||||
}
|
||||
self.single_segment_index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_i16s_by_single_segment_writer(&mut self, datas: &[i16]) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
document.add_field_value(self.field, &(*data as i64));
|
||||
}
|
||||
self.single_segment_index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_i32s_by_single_segment_writer(&mut self, datas: &[i32]) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as i64);
|
||||
document.add_field_value(self.field, &(*data as i64));
|
||||
}
|
||||
self.single_segment_index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_i64s_by_single_segment_writer(&mut self, datas: &[i64]) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
document.add_field_value(self.field, data);
|
||||
}
|
||||
self.single_segment_index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_f32s_by_single_segment_writer(&mut self, datas: &[f32]) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data as f64);
|
||||
document.add_field_value(self.field, &(*data as f64));
|
||||
}
|
||||
self.single_segment_index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_f64s_by_single_segment_writer(&mut self, datas: &[f64]) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
document.add_field_value(self.field, data);
|
||||
}
|
||||
self.single_segment_index_writer_add_document(document)
|
||||
}
|
||||
|
||||
pub fn add_multi_bools_by_single_segment_writer(&mut self, datas: &[bool]) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for data in datas {
|
||||
document.add_field_value(self.field, *data);
|
||||
document.add_field_value(self.field, data);
|
||||
}
|
||||
self.single_segment_index_writer_add_document(document)
|
||||
}
|
||||
@ -337,7 +349,7 @@ impl IndexWriterWrapper {
|
||||
&mut self,
|
||||
datas: &[*const c_char],
|
||||
) -> Result<()> {
|
||||
let mut document = Document::default();
|
||||
let mut document = TantivyDocument::default();
|
||||
for element in datas {
|
||||
let data = unsafe { CStr::from_ptr(*element) };
|
||||
document.add_field_value(self.field, data.to_str()?);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user