From ad9a0cae4869063409d619551822df974e5696ae Mon Sep 17 00:00:00 2001 From: aoiasd <45024769+aoiasd@users.noreply.github.com> Date: Tue, 28 Oct 2025 14:52:10 +0800 Subject: [PATCH] enhance: add global analyzer options (#44684) relate: https://github.com/milvus-io/milvus/issues/43687 Add global analyzer options, avoid having to merge some milvus params into user's analyzer params. Signed-off-by: aoiasd --- internal/core/src/segcore/tokenizer_c.cpp | 12 ++ internal/core/src/segcore/tokenizer_c.h | 4 + .../tantivy-binding/include/tantivy-binding.h | 2 + .../tantivy-binding/src/analyzer/mod.rs | 2 + .../src/analyzer/runtime_option.rs | 144 ++++++++++++++++++ .../analyzer/tokenizers/lindera_tokenizer.rs | 41 +++-- .../tantivy/tantivy-binding/src/array.rs | 8 + .../tantivy-binding/src/tokenizer_c.rs | 18 ++- internal/core/thirdparty/tantivy/tokenizer.h | 11 ++ internal/querynodev2/server.go | 3 + internal/util/analyzer/analyzer.go | 4 + .../analyzer/canalyzer/c_analyzer_factory.go | 136 +++++------------ .../analyzer/canalyzer/c_analyzer_test.go | 73 +-------- pkg/util/paramtable/function_param.go | 2 +- tests/go_client/testcases/query_test.go | 2 +- 15 files changed, 266 insertions(+), 196 deletions(-) create mode 100644 internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/runtime_option.rs diff --git a/internal/core/src/segcore/tokenizer_c.cpp b/internal/core/src/segcore/tokenizer_c.cpp index 8403328785..775e019648 100644 --- a/internal/core/src/segcore/tokenizer_c.cpp +++ b/internal/core/src/segcore/tokenizer_c.cpp @@ -20,6 +20,18 @@ using Map = std::map; +CStatus +set_tokenizer_option(const char* params) { + SCOPE_CGO_CALL_METRIC(); + + try { + milvus::tantivy::set_tokenizer_options(params); + return milvus::SuccessCStatus(); + } catch (std::exception& e) { + return milvus::FailureCStatus(&e); + } +} + CStatus create_tokenizer(const char* params, CTokenizer* tokenizer) { SCOPE_CGO_CALL_METRIC(); diff --git a/internal/core/src/segcore/tokenizer_c.h b/internal/core/src/segcore/tokenizer_c.h index d803c8e533..f6d9735b59 100644 --- a/internal/core/src/segcore/tokenizer_c.h +++ b/internal/core/src/segcore/tokenizer_c.h @@ -13,6 +13,7 @@ #include +#include "common/common_type_c.h" #include "segcore/token_stream_c.h" #include "common/type_c.h" @@ -22,6 +23,9 @@ extern "C" { typedef void* CTokenizer; +CStatus +set_tokenizer_option(const char* params); + CStatus create_tokenizer(const char* params, CTokenizer* tokenizer); diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h index 3567c803b2..bce05d8ea1 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h +++ b/internal/core/thirdparty/tantivy/tantivy-binding/include/tantivy-binding.h @@ -500,6 +500,8 @@ void *tantivy_clone_analyzer(void *ptr); void tantivy_free_analyzer(void *tokenizer); +RustResult tantivy_set_analyzer_options(const char *params); + bool tantivy_index_exist(const char *path); } // extern "C" diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/mod.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/mod.rs index 4a8f41625f..93e8afec07 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/mod.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/mod.rs @@ -2,8 +2,10 @@ mod analyzer; mod build_in_analyzer; mod dict; mod filter; +mod runtime_option; pub mod tokenizers; pub use self::analyzer::{create_analyzer, create_analyzer_by_json}; +pub use self::runtime_option::set_options; pub(crate) use self::build_in_analyzer::standard_analyzer; diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/runtime_option.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/runtime_option.rs new file mode 100644 index 0000000000..2848373fb6 --- /dev/null +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/runtime_option.rs @@ -0,0 +1,144 @@ +use crate::error::{Result, TantivyBindingError}; +use once_cell::sync::Lazy; +use serde_json as json; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +static GLOBAL_OPTIONS: Lazy> = Lazy::new(|| Arc::new(RuntimeOption::new())); + +// cache key +static LINDERA_DOWNLOAD_KEY: &str = "lindera_download_urls"; +static RESOURCE_MAP_KEY: &str = "resource_map"; + +// normal key +pub static DEFAULT_DICT_PATH_KEY: &str = "default_dict_path"; +pub static RESOURCE_PATH_KEY: &str = "resource_path"; + +pub fn set_options(params: &String) -> Result<()> { + GLOBAL_OPTIONS.set_json(params) +} + +pub fn get_options(key: &str) -> Option { + GLOBAL_OPTIONS.get(key) +} + +pub fn get_lindera_download_url(kind: &str) -> Option> { + GLOBAL_OPTIONS.get_lindera_download_urls(kind) +} + +pub fn get_resource_id(name: &str) -> Option { + GLOBAL_OPTIONS.get_resource_id(name) +} + +// analyzer options +struct RuntimeOption { + inner: RwLock, +} + +impl RuntimeOption { + fn new() -> Self { + return RuntimeOption { + inner: RwLock::new(RuntimeOptionInner::new()), + }; + } + + fn set_json(&self, json_params: &String) -> Result<()> { + let mut w = self.inner.write().unwrap(); + w.set_json(json_params) + } + + fn get(&self, key: &str) -> Option { + let r = self.inner.read().unwrap(); + r.params.get(key).map(|v| v.clone()) + } + + fn get_lindera_download_urls(&self, kind: &str) -> Option> { + let r = self.inner.read().unwrap(); + r.lindera_download_urls.get(kind).map(|v| v.clone()) + } + + fn get_resource_id(&self, name: &str) -> Option { + let r = self.inner.read().unwrap(); + r.resource_map.get(name).cloned() + } +} + +struct RuntimeOptionInner { + params: HashMap, + resource_map: HashMap, // resource name -> resource id + lindera_download_urls: HashMap>, // dict name -> url +} + +impl RuntimeOptionInner { + fn new() -> Self { + RuntimeOptionInner { + params: HashMap::new(), + resource_map: HashMap::new(), + lindera_download_urls: HashMap::new(), + } + } + + fn set_json(&mut self, json_params: &String) -> Result<()> { + let v = json::from_str::(json_params) + .map_err(|e| TantivyBindingError::JsonError(e))?; + + let m = v.as_object().ok_or(TantivyBindingError::InternalError( + "analyzer params should be json map".to_string(), + ))?; + + for (key, value) in m.to_owned() { + self.set(key, value)?; + } + + return Ok(()); + } + + fn set(&mut self, key: String, value: json::Value) -> Result<()> { + // cache linera download map + if key == LINDERA_DOWNLOAD_KEY { + self.lindera_download_urls = HashMap::new(); + + let m = value.as_object().ok_or(TantivyBindingError::InternalError( + "lindera download urls should be a json map".to_string(), + ))?; + + for (key, value) in m { + let array = value.as_array().ok_or(TantivyBindingError::InternalError( + "lindera download urls shoud be list".to_string(), + ))?; + + if !array.iter().all(|v| v.is_string()) { + return Err(TantivyBindingError::InternalError( + "all elements in lindera download urls must be string".to_string(), + )); + } + + let urls = array + .iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect(); + self.lindera_download_urls.insert(key.to_string(), urls); + } + return Ok(()); + } + + if key == RESOURCE_MAP_KEY { + self.resource_map = HashMap::new(); + + let m = value.as_object().ok_or(TantivyBindingError::InternalError( + "lindera download urls should be a json map".to_string(), + ))?; + + for (key, value) in m { + let url = value.as_i64().ok_or(TantivyBindingError::InternalError( + "lindera download url shoud be string".to_string(), + ))?; + self.resource_map.insert(key.to_string(), url); + } + return Ok(()); + } + + self.params.insert(key, value); + return Ok(()); + } +} diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/tokenizers/lindera_tokenizer.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/tokenizers/lindera_tokenizer.rs index 8deadd2564..4f4f72233a 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/tokenizers/lindera_tokenizer.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/analyzer/tokenizers/lindera_tokenizer.rs @@ -6,6 +6,7 @@ use lindera::mode::Mode; use lindera::segmenter::Segmenter; use lindera::token::Token as LToken; use lindera::tokenizer::Tokenizer as LTokenizer; +use log::warn; use tantivy::tokenizer::{Token, TokenStream, Tokenizer}; use lindera::token_filter::japanese_compound_word::JapaneseCompoundWordTokenFilter; @@ -16,7 +17,9 @@ use lindera::token_filter::korean_stop_tags::KoreanStopTagsTokenFilter; use lindera::token_filter::BoxTokenFilter as LTokenFilter; use crate::analyzer::dict::lindera::load_dictionary_from_kind; -use crate::analyzer::filter::get_string_list; +use crate::analyzer::runtime_option::{ + get_lindera_download_url, get_options, DEFAULT_DICT_PATH_KEY, +}; use crate::error::{Result, TantivyBindingError}; use serde_json as json; @@ -25,10 +28,8 @@ pub struct LinderaTokenStream<'a> { pub token: &'a mut Token, } -const DICTKINDKEY: &str = "dict_kind"; -const DICTBUILDDIRKEY: &str = "dict_build_dir"; -const DICTDOWNLOADURLKEY: &str = "download_urls"; -const FILTERKEY: &str = "filter"; +const DICT_KIND_KEY: &str = "dict_kind"; +const FILTER_KEY: &str = "filter"; impl<'a> TokenStream for LinderaTokenStream<'a> { fn advance(&mut self) -> bool { @@ -67,8 +68,8 @@ impl LinderaTokenizer { let kind: DictionaryKind = fetch_lindera_kind(params)?; // for download dict online - let build_dir = fetch_dict_build_dir(params)?; - let download_urls = fetch_dict_download_urls(params)?; + let build_dir = fetch_dict_build_dir()?; + let download_urls = get_lindera_download_url(kind.as_str()).unwrap_or(vec![]); let dictionary = load_dictionary_from_kind(&kind, build_dir, download_urls)?; @@ -132,7 +133,7 @@ impl DictionaryKindParser for &str { fn fetch_lindera_kind(params: &json::Map) -> Result { params - .get(DICTKINDKEY) + .get(DICT_KIND_KEY) .ok_or(TantivyBindingError::InvalidArgument(format!( "lindera tokenizer dict_kind must be set" )))? @@ -143,21 +144,13 @@ fn fetch_lindera_kind(params: &json::Map) -> Result) -> Result { - params - .get(DICTBUILDDIRKEY) - .map_or(Ok("/var/lib/milvus/dict/lindera".to_string()), |v| { - v.as_str() - .ok_or(TantivyBindingError::InvalidArgument(format!( - "dict build dir must be string" - ))) - .map(|s| s.to_string()) - }) -} - -fn fetch_dict_download_urls(params: &json::Map) -> Result> { - params.get(DICTDOWNLOADURLKEY).map_or(Ok(vec![]), |v| { - get_string_list(v, "lindera dict download urls") +fn fetch_dict_build_dir() -> Result { + get_options(DEFAULT_DICT_PATH_KEY).map_or(Ok("/var/lib/milvus/dict/lindera".to_string()), |v| { + v.as_str() + .ok_or(TantivyBindingError::InvalidArgument(format!( + "dict build dir must be string" + ))) + .map(|s| format!("{}/{}", s, "lindera").to_string()) }) } @@ -328,7 +321,7 @@ fn fetch_lindera_token_filters( ) -> Result> { let mut result: Vec = vec![]; - match params.get(FILTERKEY) { + match params.get(FILTER_KEY) { Some(v) => { let filter_list = v.as_array().ok_or_else(|| { TantivyBindingError::InvalidArgument(format!("lindera filters should be array")) diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs index 9712615c19..3cb91c83b1 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/array.rs @@ -131,6 +131,14 @@ pub struct RustResult { } impl RustResult { + pub fn from_success() -> Self { + RustResult { + success: true, + value: Value::None(()), + error: std::ptr::null(), + } + } + pub fn from_ptr(value: *mut c_void) -> Self { RustResult { success: true, diff --git a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs index 7ded786642..18fe18ab3b 100644 --- a/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs +++ b/internal/core/thirdparty/tantivy/tantivy-binding/src/tokenizer_c.rs @@ -1,8 +1,8 @@ use libc::{c_char, c_void}; use tantivy::tokenizer::TextAnalyzer; +use crate::analyzer::{create_analyzer, set_options}; use crate::{ - analyzer::create_analyzer, array::RustResult, log::init_log, string_c::c_str_to_str, @@ -34,3 +34,19 @@ pub extern "C" fn tantivy_clone_analyzer(ptr: *mut c_void) -> *mut c_void { pub extern "C" fn tantivy_free_analyzer(tokenizer: *mut c_void) { free_binding::(tokenizer); } + +#[no_mangle] +pub extern "C" fn tantivy_set_analyzer_options(params: *const c_char) -> RustResult { + init_log(); + let json_str = unsafe { c_str_to_str(params).to_string() }; + + set_options(&json_str).map_or_else( + |e| { + RustResult::from_error(format!( + "set analyzer option failed: {}, params: {}", + e, json_str + )) + }, + |_| RustResult::from_success(), + ) +} diff --git a/internal/core/thirdparty/tantivy/tokenizer.h b/internal/core/thirdparty/tantivy/tokenizer.h index 2870b01e2e..9b671d59e5 100644 --- a/internal/core/thirdparty/tantivy/tokenizer.h +++ b/internal/core/thirdparty/tantivy/tokenizer.h @@ -5,6 +5,7 @@ #include "rust-hashmap.h" #include "tantivy/rust-array.h" #include "token-stream.h" +#include "log/Log.h" namespace milvus::tantivy { @@ -58,4 +59,14 @@ struct Tokenizer { void* ptr_; }; +void +set_tokenizer_options(std::string&& params) { + auto shared_params = std::make_shared(params); + auto res = + RustResultWrapper(tantivy_set_analyzer_options(shared_params->c_str())); + AssertInfo(res.result_->success, + "Set analyzer option failed: {}", + res.result_->error); +} + } // namespace milvus::tantivy diff --git a/internal/querynodev2/server.go b/internal/querynodev2/server.go index 0612d0e3e0..f1a986d508 100644 --- a/internal/querynodev2/server.go +++ b/internal/querynodev2/server.go @@ -55,6 +55,7 @@ import ( "github.com/milvus-io/milvus/internal/registry" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/types" + "github.com/milvus-io/milvus/internal/util/analyzer" "github.com/milvus-io/milvus/internal/util/dependency" "github.com/milvus-io/milvus/internal/util/hookutil" "github.com/milvus-io/milvus/internal/util/initcore" @@ -300,6 +301,8 @@ func (node *QueryNode) Init() error { } node.factory.Init(paramtable.Get()) + // init analyzer options + analyzer.InitOptions() localRootPath := paramtable.Get().LocalStorageCfg.Path.GetValue() localUsedSize, err := segcore.GetLocalUsedSize(localRootPath) diff --git a/internal/util/analyzer/analyzer.go b/internal/util/analyzer/analyzer.go index f5a071a2b8..1704e8f9c9 100644 --- a/internal/util/analyzer/analyzer.go +++ b/internal/util/analyzer/analyzer.go @@ -17,3 +17,7 @@ func NewAnalyzer(param string) (Analyzer, error) { func ValidateAnalyzer(param string) error { return canalyzer.ValidateAnalyzer(param) } + +func InitOptions() { + canalyzer.InitOptions() +} diff --git a/internal/util/analyzer/canalyzer/c_analyzer_factory.go b/internal/util/analyzer/canalyzer/c_analyzer_factory.go index dde6f1d2e9..f8c9a42e45 100644 --- a/internal/util/analyzer/canalyzer/c_analyzer_factory.go +++ b/internal/util/analyzer/canalyzer/c_analyzer_factory.go @@ -10,21 +10,52 @@ import "C" import ( "encoding/json" - "fmt" - "path" + "sync" "unsafe" + "go.uber.org/zap" + "github.com/milvus-io/milvus/internal/util/analyzer/interfaces" - "github.com/milvus-io/milvus/pkg/v2/util/merr" + "github.com/milvus-io/milvus/pkg/v2/log" "github.com/milvus-io/milvus/pkg/v2/util/paramtable" ) -func NewAnalyzer(param string) (interfaces.Analyzer, error) { - param, err := CheckAndFillParams(param) +const ( + LinderaDictURLKey = "lindera_download_urls" + ResourceMapKey = "resource_map" + DictPathKey = "local_dict_path" + ResourcePathKey = "resource_path" +) + +var initOnce sync.Once + +func InitOptions() { + initOnce.Do(func() { + UpdateParams() + }) +} + +func UpdateParams() { + cfg := paramtable.Get() + params := map[string]any{} + params[LinderaDictURLKey] = cfg.FunctionCfg.LinderaDownloadUrls.GetValue() + params[DictPathKey] = cfg.FunctionCfg.LocalResourcePath.GetValue() + + bytes, err := json.Marshal(params) if err != nil { - return nil, err + log.Panic("init analyzer option failed", zap.Error(err)) } + paramPtr := C.CString(string(bytes)) + defer C.free(unsafe.Pointer(paramPtr)) + + status := C.set_tokenizer_option(paramPtr) + if err := HandleCStatus(&status, "failed to init segcore analyzer option"); err != nil { + log.Panic("init analyzer option failed", zap.Error(err)) + } +} + +func NewAnalyzer(param string) (interfaces.Analyzer, error) { paramPtr := C.CString(param) defer C.free(unsafe.Pointer(paramPtr)) @@ -38,11 +69,6 @@ func NewAnalyzer(param string) (interfaces.Analyzer, error) { } func ValidateAnalyzer(param string) error { - param, err := CheckAndFillParams(param) - if err != nil { - return err - } - paramPtr := C.CString(param) defer C.free(unsafe.Pointer(paramPtr)) @@ -52,91 +78,3 @@ func ValidateAnalyzer(param string) error { } return nil } - -func CheckAndFillParams(params string) (string, error) { - if len(params) == 0 { - return "", nil - } - - var paramMaps map[string]any - flag := false - err := json.Unmarshal([]byte(params), ¶mMaps) - if err != nil { - return "", merr.WrapErrAsInputError(fmt.Errorf("unmarshal analyzer params failed with json error: %s", err.Error())) - } - - tokenizer, ok := paramMaps["tokenizer"] - if !ok { - // skip check if no tokenizer params - return params, nil - } - - switch value := tokenizer.(type) { - case string: - // return if use build-in tokenizer - return params, nil - case map[string]any: - flag, err = CheckAndFillTokenizerParams(value) - if err != nil { - return "", err - } - default: - return "", merr.WrapErrAsInputError(fmt.Errorf("analyzer params set tokenizer with unknown type")) - } - - // remarshal json params if params map was changed. - if flag { - bytes, err := json.Marshal(paramMaps) - if err != nil { - return "", merr.WrapErrAsInputError(fmt.Errorf("marshal analyzer params failed with json error: %s", err.Error())) - } - return string(bytes), nil - } - return params, nil -} - -// fill some milvus params to tokenizer params -func CheckAndFillTokenizerParams(params map[string]any) (bool, error) { - v, ok := params["type"] - if !ok { - return false, merr.WrapErrAsInputError(fmt.Errorf("costom tokenizer must set type")) - } - - tokenizerType, ok := v.(string) - if !ok { - return false, merr.WrapErrAsInputError(fmt.Errorf("costom tokenizer type must be string")) - } - - switch tokenizerType { - case "lindera": - cfg := paramtable.Get() - - if _, ok := params["dict_build_dir"]; ok { - return false, merr.WrapErrAsInputError(fmt.Errorf("costom tokenizer dict_build_dir was system params, should not be set")) - } - // build lindera to LocalResourcePath/lindera/dict_kind - params["dict_build_dir"] = path.Join(cfg.FunctionCfg.LocalResourcePath.GetValue(), "lindera") - - v, ok := params["dict_kind"] - if !ok { - return false, merr.WrapErrAsInputError(fmt.Errorf("lindera tokenizer must set dict_kind")) - } - dictKind, ok := v.(string) - if !ok { - return false, merr.WrapErrAsInputError(fmt.Errorf("lindera tokenizer dict kind must be string")) - } - dictUrlsMap := cfg.FunctionCfg.LinderaDownloadUrls.GetValue() - - if _, ok := params["download_urls"]; ok { - return false, merr.WrapErrAsInputError(fmt.Errorf("costom tokenizer download_urls was system params, should not be set")) - } - - if value, ok := dictUrlsMap["."+dictKind]; ok { - // use download urls set in milvus yaml - params["download_urls"] = paramtable.ParseAsStings(value) - } - return true, nil - default: - return false, nil - } -} diff --git a/internal/util/analyzer/canalyzer/c_analyzer_test.go b/internal/util/analyzer/canalyzer/c_analyzer_test.go index 0adb786f2f..ee77c3ee3d 100644 --- a/internal/util/analyzer/canalyzer/c_analyzer_test.go +++ b/internal/util/analyzer/canalyzer/c_analyzer_test.go @@ -12,7 +12,6 @@ import ( "google.golang.org/grpc" pb "github.com/milvus-io/milvus-proto/go-api/v2/tokenizerpb" - "github.com/milvus-io/milvus/pkg/v2/util/paramtable" ) type mockServer struct { @@ -90,7 +89,7 @@ func TestAnalyzer(t *testing.T) { tokenStream := analyzer.NewTokenStream("张华考上了北京大学;李萍进了中等技术学校;我在百货公司当售货员:我们都有光明的前途") defer tokenStream.Destroy() for tokenStream.Advance() { - fmt.Println(tokenStream.Token()) + assert.NotEmpty(t, tokenStream.Token()) } } @@ -152,6 +151,8 @@ func TestAnalyzer(t *testing.T) { } func TestValidateAnalyzer(t *testing.T) { + InitOptions() + // valid analyzer { m := "{\"tokenizer\": \"standard\"}" @@ -172,71 +173,3 @@ func TestValidateAnalyzer(t *testing.T) { assert.Error(t, err) } } - -func TestCheckAndFillParams(t *testing.T) { - paramtable.Init() - paramtable.Get().SaveGroup(map[string]string{"function.analyzer.lindera.download_urls.ipadic": "/test/url"}) - - // normal case - { - m := "{\"tokenizer\": {\"type\":\"jieba\"}}" - _, err := CheckAndFillParams(m) - assert.NoError(t, err) - } - - // fill lindera tokenizer download urls and dict local path - { - m := "{\"tokenizer\": {\"type\":\"lindera\", \"dict_kind\": \"ipadic\"}}" - _, err := CheckAndFillParams(m) - assert.NoError(t, err) - } - - // error with wrong json - { - m := "{invalid json" - _, err := CheckAndFillParams(m) - assert.Error(t, err) - } - - // skip if use default analyzer - { - m := "{}" - _, err := CheckAndFillParams(m) - assert.NoError(t, err) - } - - // error tokenizer without type - { - m := "{\"tokenizer\": {\"dict_kind\": \"ipadic\"}}" - _, err := CheckAndFillParams(m) - assert.Error(t, err) - } - - // error tokenizer type not string - { - m := "{\"tokenizer\": {\"type\": 1, \"dict_kind\": \"ipadic\"}}" - _, err := CheckAndFillParams(m) - assert.Error(t, err) - } - - // error tokenizer params type - { - m := "{\"tokenizer\": 1}" - _, err := CheckAndFillParams(m) - assert.Error(t, err) - } - - // error set dict_build_dir by user - { - m := "{\"tokenizer\": {\"type\": \"lindera\", \"dict_kind\": \"ipadic\", \"dict_build_dir\": \"/tmp/milvus\"}}" - _, err := CheckAndFillParams(m) - assert.Error(t, err) - } - - // error lindera kind not set - { - m := "{\"tokenizer\": {\"type\": \"lindera\"}}" - _, err := CheckAndFillParams(m) - assert.Error(t, err) - } -} diff --git a/pkg/util/paramtable/function_param.go b/pkg/util/paramtable/function_param.go index 5ad0893a39..716add8087 100644 --- a/pkg/util/paramtable/function_param.go +++ b/pkg/util/paramtable/function_param.go @@ -141,7 +141,7 @@ func (p *functionConfig) init(base *BaseTable) { p.LocalResourcePath.Init(base.mgr) p.LinderaDownloadUrls = ParamGroup{ - KeyPrefix: "function.analyzer.lindera.download_urls", + KeyPrefix: "function.analyzer.lindera.download_urls.", Version: "2.5.16", } p.LinderaDownloadUrls.Init(base.mgr) diff --git a/tests/go_client/testcases/query_test.go b/tests/go_client/testcases/query_test.go index ea703b69b6..9ab824acac 100644 --- a/tests/go_client/testcases/query_test.go +++ b/tests/go_client/testcases/query_test.go @@ -1218,7 +1218,7 @@ func TestRunAnalyzer(t *testing.T) { // run analyzer with invalid params _, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("text doc").WithAnalyzerParamsStr("invalid params}")) - common.CheckErr(t, err, false, "json error") + common.CheckErr(t, err, false, "JsonError") // run analyzer with custom analyzer tokens, err = mc.RunAnalyzer(ctx, client.NewRunAnalyzerOption("test doc").