From 06d73cf2e206c72947fc794d4ebcc5c281cb0962 Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Fri, 22 Nov 2024 12:02:32 +0800 Subject: [PATCH] enhance: Remove raw tokenizer register. (#37886) tantivy already register raw tokenizer by default Signed-off-by: sunby --- .../tantivy/tantivy-binding/src/index_writer.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs index c466d1ee83..a1a27da05c 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/index_writer.rs @@ -32,7 +32,6 @@ impl IndexWriterWrapper { let field: Field; let mut schema_builder = Schema::builder(); - let mut use_raw_tokenizer = false; match data_type { TantivyDataType::I64 => { field = schema_builder.add_i64_field(&field_name, INDEXED); @@ -45,11 +44,10 @@ impl IndexWriterWrapper { } TantivyDataType::Keyword => { let text_field_indexing = TextFieldIndexing::default() - .set_tokenizer("raw_tokenizer") + .set_tokenizer("raw") .set_index_option(IndexRecordOption::Basic); let text_options = TextOptions::default().set_indexing_options(text_field_indexing); field = schema_builder.add_text_field(&field_name, text_options); - use_raw_tokenizer = true; } TantivyDataType::Text => { panic!("text should be indexed with analyzer"); @@ -58,11 +56,6 @@ impl IndexWriterWrapper { let id_field = schema_builder.add_i64_field("doc_id", FAST); let schema = schema_builder.build(); let index = Index::create_in_dir(path.clone(), schema).unwrap(); - if use_raw_tokenizer { - index - .tokenizers() - .register("raw_tokenizer", tokenizer::RawTokenizer::default()); - } let index_writer = index .writer_with_num_threads(num_threads, overall_memory_budget_in_bytes) .unwrap();