mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-28 22:45:26 +08:00
fix: char_group tokenizer only support one byte char as delimiters (#46193)
relate: https://github.com/milvus-io/milvus/issues/46192 Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
parent
6e2872c982
commit
c84b6d56f8
@ -85,7 +85,7 @@ impl CharGroupTokenizer {
|
||||
.to_string(),
|
||||
)),
|
||||
|v| {
|
||||
if v.len() == 1 {
|
||||
if v.chars().count() == 1 {
|
||||
delimiters.insert(v.chars().next().unwrap());
|
||||
return Ok(());
|
||||
}
|
||||
@ -204,7 +204,7 @@ mod tests {
|
||||
fn test_char_group_tokenizer() {
|
||||
let params = r#"{
|
||||
"type": "chargroup",
|
||||
"delimiters": ["o", "punctuation","digit"]
|
||||
"delimiters": ["o", "punctuation","digit", ","]
|
||||
}"#;
|
||||
let json_param = json::from_str::<json::Map<String, json::Value>>(¶ms);
|
||||
assert!(json_param.is_ok());
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user