From 87aa9a0f2d9951f95950e5ba296323f33d6238da Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:58:39 +0800 Subject: [PATCH] fix: empty analyzer params not use standard tokenizer (#38148) relate: https://github.com/milvus-io/milvus/issues/35853 Signed-off-by: aoiasd --- .../tantivy/tantivy-binding/src/tokenizer.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs index 1630381511..b910774a2e 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer.rs @@ -87,10 +87,12 @@ impl AnalyzerBuilder<'_> { } } - fn get_tokenizer_name(&self) -> Result { - let tokenizer = self.params.get("tokenizer"); - if tokenizer.is_none() { - return Ok("standard".to_string()); + fn get_tokenizer_name(&self) -> Result{ + let tokenizer=self.params.get("tokenizer"); + if tokenizer.is_none(){ + return Err(TantivyBindingError::InternalError(format!( + "tokenizer name or type must be set" + ))); } if !tokenizer.unwrap().is_string() { return Err(TantivyBindingError::InternalError(format!( @@ -257,8 +259,14 @@ pub(crate) fn create_tokenizer_with_filter(params: &String) -> Result Result { #[cfg(test)] mod tests { use crate::tokenizer::create_tokenizer; - use regex; #[test] fn test_standard_analyzer() {