feat: set related resource ids in collection schema (#46423)

Support crate analyzer with file resource info, and return used file
resource ids when validate analyzer.
Save the related resource ids in collection schema.
relate: https://github.com/milvus-io/milvus/issues/43687

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: analyzer file-resource resolution is deterministic and
traceable by threading a FileResourcePathHelper (collecting used
resource IDs in a HashSet) through all tokenizer/analyzer construction
and validation paths; validate_analyzer(params, extra_info) returns the
collected Vec<i64) which is propagated through C/Rust/Go layers to
callers (CValidateResult → RustResult::from_vec_i64 → Go []int64 →
querypb.ValidateAnalyzerResponse.ResourceIds →
CollectionSchema.FileResourceIds).

- Logic removed/simplified: ad‑hoc, scattered resource-path lookups and
per-filter file helpers (e.g., read_synonyms_file and other inline
file-reading logic) were consolidated into ResourceInfo +
FileResourcePathHelper and a centralized get_resource_path(helper, ...)
API; filter/tokenizer builder APIs now accept &mut
FileResourcePathHelper so all file path resolution and ID collection use
the same path and bookkeeping logic (redundant duplicated lookups
removed).

- Why no data loss or behavior regression: changes are additive and
default-preserving — existing call sites pass extra_info = "" so
analyzer creation/validation behavior and error paths remain unchanged;
new Collection.FileResourceIds is populated from resp.ResourceIds in
validateSchema and round‑tripped through marshal/unmarshal
(model.Collection ↔ schemapb.CollectionSchema) so schema persistence
uses the new list without overwriting other schema fields; proto change
adds a repeated field (resource_ids) which is wire‑compatible (older
clients ignore extra field). Concrete code paths: analyzer creation
still uses create_analyzer (now with extra_info ""), tokenizer
validation still returns errors as before but now also returns IDs via
CValidateResult/RustResult, and rootcoord.validateSchema assigns
resp.ResourceIds → schema.FileResourceIds.

- New capability added: end‑to‑end discovery, return, and persistence of
file resource IDs used by analyzers — validate flows now return resource
IDs and the system stores them in collection schema (affects tantivy
analyzer binding, canalyzer C bindings, internal/util analyzer APIs,
querynode ValidateAnalyzer response, and rootcoord/create_collection
flow).
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
aoiasd 2025-12-26 22:49:19 +08:00 committed by GitHub
parent 512884524b
commit 55feb7ded8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
45 changed files with 1295 additions and 803 deletions

2
go.mod
View File

@ -21,7 +21,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
github.com/klauspost/compress v1.18.0
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251215075310-deda9c0dcece
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251218031911-f415d420437f
github.com/minio/minio-go/v7 v7.0.73
github.com/panjf2000/ants/v2 v2.11.3 // indirect
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect

4
go.sum
View File

@ -799,8 +799,8 @@ github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6 h1:YHMFI6L
github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251215075310-deda9c0dcece h1:s0TFMZBxADKSzIr7LW/TE3L/WgCuo7QOfzkYX92Xog0=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251215075310-deda9c0dcece/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251218031911-f415d420437f h1:YQ61KOySWPEXv8ePkr0Cu5q5iVHN11IIUSTWIiALCE8=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251218031911-f415d420437f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=

View File

@ -45,7 +45,7 @@ TEST(CTokenizer, Default) {
auto analyzer_params = R"({"tokenizer": "standard"})";
CTokenizer tokenizer;
{
auto status = create_tokenizer(analyzer_params, &tokenizer);
auto status = create_tokenizer(analyzer_params, "", &tokenizer);
ASSERT_EQ(milvus::ErrorCode::Success, status.error_code);
}

View File

@ -30,9 +30,12 @@ set_tokenizer_option(const char* params) {
}
CStatus
create_tokenizer(const char* params, CTokenizer* tokenizer) {
create_tokenizer(const char* params,
const char* extra_info,
CTokenizer* tokenizer) {
try {
auto impl = std::make_unique<milvus::tantivy::Tokenizer>(params);
auto impl =
std::make_unique<milvus::tantivy::Tokenizer>(params, extra_info);
*tokenizer = impl.release();
return milvus::SuccessCStatus();
} catch (std::exception& e) {
@ -63,13 +66,14 @@ create_token_stream(CTokenizer tokenizer, const char* text, uint32_t text_len) {
return impl->CreateTokenStream(std::string(text, text_len)).release();
}
CStatus
validate_tokenizer(const char* params) {
CValidateResult
validate_tokenizer(const char* params, const char* extra_info) {
try {
auto impl = std::make_unique<milvus::tantivy::Tokenizer>(params);
return milvus::SuccessCStatus();
auto [ids, count] =
milvus::tantivy::validate_analyzer(params, extra_info);
return CValidateResult{ids, count, milvus::SuccessCStatus()};
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
return CValidateResult{nullptr, 0, milvus::FailureCStatus(&e)};
}
}

View File

@ -27,7 +27,9 @@ CStatus
set_tokenizer_option(const char* params);
CStatus
create_tokenizer(const char* params, CTokenizer* tokenizer);
create_tokenizer(const char* params,
const char* extra_info,
CTokenizer* tokenizer);
CStatus
clone_tokenizer(CTokenizer* tokenizer, CTokenizer* rst);
@ -35,8 +37,14 @@ clone_tokenizer(CTokenizer* tokenizer, CTokenizer* rst);
void
free_tokenizer(CTokenizer tokenizer);
CStatus
validate_tokenizer(const char* params);
typedef struct CValidateResult {
int64_t* resource_ids;
uint64_t resource_ids_count;
CStatus status;
} CValidateResult;
CValidateResult
validate_tokenizer(const char* params, const char* extra_info);
CTokenStream
create_token_stream(CTokenizer tokenizer, const char* text, uint32_t text_len);

View File

@ -32,7 +32,7 @@ fn bench_lindua_language_identifier_tokenizer(c: &mut Criterion) {
}
}
"#;
let mut analyzer = create_analyzer(params);
let mut analyzer = create_analyzer(params, "");
assert!(analyzer.is_ok(), "error: {}", analyzer.err().unwrap());
c.bench_function("test", |b| {
@ -64,7 +64,7 @@ fn bench_whatlang_language_identifier_tokenizer(c: &mut Criterion) {
}
}
"#;
let mut analyzer = create_analyzer(params);
let mut analyzer = create_analyzer(params, "");
assert!(analyzer.is_ok(), "error: {}", analyzer.err().unwrap());
c.bench_function("test", |b| {

View File

@ -497,7 +497,9 @@ const char *tantivy_token_stream_get_token(void *token_stream);
TantivyToken tantivy_token_stream_get_detailed_token(void *token_stream);
RustResult tantivy_create_analyzer(const char *analyzer_params);
RustResult tantivy_create_analyzer(const char *analyzer_params, const char *extra_info);
RustResult tantivy_validate_analyzer(const char *analyzer_params, const char *extra_info);
void *tantivy_clone_analyzer(void *ptr);

View File

@ -1,40 +1,30 @@
use log::warn;
use serde_json as json;
use std::collections::HashMap;
use tantivy::tokenizer::*;
use super::options::{get_global_file_resource_helper, FileResourcePathHelper};
use super::{build_in_analyzer::*, filter::*, tokenizers::get_builder_with_tokenizer};
use crate::analyzer::filter::{get_stop_words_list, get_string_list};
use crate::analyzer::filter::{create_filter, get_stop_words_list, get_string_list};
use crate::error::Result;
use crate::error::TantivyBindingError;
struct AnalyzerBuilder<'a> {
filters: HashMap<String, SystemFilter>,
helper: &'a mut FileResourcePathHelper,
params: &'a json::Map<String, json::Value>,
}
impl AnalyzerBuilder<'_> {
fn new(params: &json::Map<String, json::Value>) -> AnalyzerBuilder {
AnalyzerBuilder {
impl<'a> AnalyzerBuilder<'a> {
fn new(
params: &'a json::Map<String, json::Value>,
helper: &'a mut FileResourcePathHelper,
) -> Result<AnalyzerBuilder<'a>> {
Ok(AnalyzerBuilder {
filters: HashMap::new(),
params: params,
}
}
fn get_tokenizer_params(&self) -> Result<&json::Value> {
let tokenizer = self.params.get("tokenizer");
if tokenizer.is_none() {
return Err(TantivyBindingError::InternalError(format!(
"tokenizer name or type must be set"
)));
}
let value = tokenizer.unwrap();
if value.is_object() || value.is_string() {
return Ok(tokenizer.unwrap());
}
Err(TantivyBindingError::InternalError(format!(
"tokenizer name should be string or dict"
)))
helper: helper,
})
}
fn build_filter(
@ -73,7 +63,7 @@ impl AnalyzerBuilder<'_> {
}
}
} else if filter.is_object() {
let filter = SystemFilter::try_from(filter.as_object().unwrap())?;
let filter = create_filter(filter.as_object().unwrap(), &mut self.helper)?;
builder = filter.transform(builder);
}
}
@ -110,10 +100,13 @@ impl AnalyzerBuilder<'_> {
}
}
fn build_template(self, type_: &str) -> Result<TextAnalyzer> {
fn build_template(mut self, type_: &str) -> Result<TextAnalyzer> {
match type_ {
"standard" => Ok(standard_analyzer(self.get_stop_words_option()?)),
"chinese" => Ok(chinese_analyzer(self.get_stop_words_option()?)),
"chinese" => Ok(chinese_analyzer(
self.get_stop_words_option()?,
&mut self.helper,
)),
"english" => Ok(english_analyzer(self.get_stop_words_option()?)),
other_ => Err(TantivyBindingError::InternalError(format!(
"unknown build-in analyzer type: {}",
@ -128,7 +121,7 @@ impl AnalyzerBuilder<'_> {
Some(type_) => {
if !type_.is_string() {
return Err(TantivyBindingError::InternalError(format!(
"analyzer type shoud be string"
"analyzer type should be string"
)));
}
return self.build_template(type_.as_str().unwrap());
@ -137,8 +130,25 @@ impl AnalyzerBuilder<'_> {
};
//build custom analyzer
let tokenizer_params = self.get_tokenizer_params()?;
let mut builder = get_builder_with_tokenizer(&tokenizer_params, create_analyzer_by_json)?;
let tokenizer_params = self.params.get("tokenizer");
if tokenizer_params.is_none() {
return Err(TantivyBindingError::InternalError(format!(
"tokenizer name or type must be set"
)));
}
let value = tokenizer_params.unwrap();
if !value.is_object() && !value.is_string() {
return Err(TantivyBindingError::InternalError(format!(
"tokenizer name should be string or dict"
)));
}
let mut builder = get_builder_with_tokenizer(
tokenizer_params.unwrap(),
&mut self.helper,
create_analyzer_by_json,
)?;
// build and check other options
builder = self.build_option(builder)?;
@ -148,30 +158,50 @@ impl AnalyzerBuilder<'_> {
pub fn create_analyzer_by_json(
analyzer_params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<TextAnalyzer> {
if analyzer_params.is_empty() {
return Ok(standard_analyzer(vec![]));
}
let builder = AnalyzerBuilder::new(analyzer_params);
let builder = AnalyzerBuilder::new(analyzer_params, helper)?;
builder.build()
}
pub fn create_analyzer(params: &str) -> Result<TextAnalyzer> {
pub fn create_helper(extra_info: &str) -> Result<FileResourcePathHelper> {
if extra_info.is_empty() {
Ok(get_global_file_resource_helper())
} else {
Ok(FileResourcePathHelper::from_json(
&json::from_str::<json::Value>(&extra_info)
.map_err(|e| TantivyBindingError::JsonError(e))?,
)?)
}
}
pub fn create_analyzer(params: &str, extra_info: &str) -> Result<TextAnalyzer> {
if params.len() == 0 {
return Ok(standard_analyzer(vec![]));
}
let json_params =
json::from_str::<json::Value>(&params).map_err(|e| TantivyBindingError::JsonError(e))?;
let json_params = &json::from_str::<json::Map<String, json::Value>>(&params)
.map_err(|e| TantivyBindingError::JsonError(e))?;
create_analyzer_by_json(
json_params
.as_object()
.ok_or(TantivyBindingError::InternalError(
"params should be a json map".to_string(),
))?,
)
let mut helper = create_helper(extra_info)?;
create_analyzer_by_json(json_params, &mut helper)
}
pub fn validate_analyzer(params: &str, extra_info: &str) -> Result<Vec<i64>> {
if params.len() == 0 {
return Ok(vec![]);
}
let json_params = &json::from_str::<json::Map<String, json::Value>>(&params)
.map_err(|e| TantivyBindingError::JsonError(e))?;
let mut helper = create_helper(extra_info)?;
create_analyzer_by_json(json_params, &mut helper)?;
Ok(helper.get_resource_ids())
}
#[cfg(test)]
@ -185,7 +215,7 @@ mod tests {
"stop_words": ["_english_"]
}"#;
let tokenizer = create_analyzer(&params.to_string());
let tokenizer = create_analyzer(&params.to_string(), "");
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
}
@ -195,7 +225,7 @@ mod tests {
"type": "chinese"
}"#;
let tokenizer = create_analyzer(&params.to_string());
let tokenizer = create_analyzer(&params.to_string(), "");
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();
let mut stream = bining.token_stream("系统安全;,'';lxyz密码");
@ -219,7 +249,7 @@ mod tests {
}
}"#;
let tokenizer = create_analyzer(&params.to_string());
let tokenizer = create_analyzer(&params.to_string(), "");
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();

View File

@ -2,6 +2,7 @@ use tantivy::tokenizer::*;
use super::filter::stop_words;
use super::filter::*;
use super::options::FileResourcePathHelper;
use super::tokenizers::*;
// default build-in analyzer
@ -15,8 +16,13 @@ pub(crate) fn standard_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
builder.build()
}
pub fn chinese_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
let builder = jieba_builder(None).unwrap().filter(CnAlphaNumOnlyFilter);
pub fn chinese_analyzer(
stop_words: Vec<String>,
helper: &mut FileResourcePathHelper,
) -> TextAnalyzer {
let builder = jieba_builder(None, helper)
.unwrap()
.filter(CnAlphaNumOnlyFilter);
if stop_words.len() > 0 {
return builder.filter(StopWordFilter::remove(stop_words)).build();
}

View File

@ -1,5 +1,6 @@
use super::filter::FilterBuilder;
use super::util::read_line_file;
use crate::analyzer::options::FileResourcePathHelper;
use crate::error::{Result, TantivyBindingError};
use serde_json as json;
use tantivy::tokenizer::SplitCompoundWords;
@ -8,7 +9,10 @@ const WORD_LIST_KEY: &str = "word_list";
const WORD_LIST_FILE_KEY: &str = "word_list_file";
impl FilterBuilder for SplitCompoundWords {
fn from_json(params: &json::Map<String, json::Value>) -> Result<Self> {
fn from_json(
params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<Self> {
let mut dict = Vec::<String>::new();
if let Some(value) = params.get(WORD_LIST_KEY) {
if !value.is_array() {
@ -29,7 +33,12 @@ impl FilterBuilder for SplitCompoundWords {
}
if let Some(file_params) = params.get(WORD_LIST_FILE_KEY) {
read_line_file(&mut dict, file_params, "decompounder word list file")?;
read_line_file(
helper,
&mut dict,
file_params,
"decompounder word list file",
)?;
}
if dict.is_empty() {
@ -49,13 +58,17 @@ impl FilterBuilder for SplitCompoundWords {
#[cfg(test)]
mod tests {
use super::SplitCompoundWords;
use crate::analyzer::filter::FilterBuilder;
use crate::analyzer::tokenizers::standard_builder;
use crate::log::init_log;
use serde_json as json;
use std::collections::HashSet;
use std::path::Path;
use std::sync::Arc;
use serde_json as json;
use super::SplitCompoundWords;
use crate::analyzer::filter::FilterBuilder;
use crate::analyzer::options::{FileResourcePathHelper, ResourceInfo};
use crate::analyzer::tokenizers::standard_builder;
use crate::log::init_log;
#[test]
fn test_decompounder_filter_with_file() {
@ -74,7 +87,8 @@ mod tests {
);
let json_params = json::from_str::<json::Value>(&params).unwrap();
// let filter = SplitCompoundWords::from_dictionary(vec!["bank", "note"]);
let filter = SplitCompoundWords::from_json(json_params.as_object().unwrap());
let mut helper = FileResourcePathHelper::new(Arc::new(ResourceInfo::new()));
let filter = SplitCompoundWords::from_json(json_params.as_object().unwrap(), &mut helper);
assert!(filter.is_ok(), "error: {}", filter.err().unwrap());
let builder = standard_builder().filter(filter.unwrap());
let mut analyzer = builder.build();

View File

@ -4,6 +4,7 @@ use tantivy::tokenizer::*;
use super::{
CnAlphaNumOnlyFilter, CnCharOnlyFilter, RegexFilter, RemovePunctFilter, SynonymFilter,
};
use crate::analyzer::options::FileResourcePathHelper;
use crate::error::{Result, TantivyBindingError};
pub(crate) enum SystemFilter {
@ -23,7 +24,10 @@ pub(crate) enum SystemFilter {
}
pub(crate) trait FilterBuilder {
fn from_json(params: &json::Map<String, json::Value>) -> Result<Self>
fn from_json(
params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<Self>
where
Self: Sized;
}
@ -109,10 +113,10 @@ impl From<&str> for SystemFilter {
}
}
impl TryFrom<&json::Map<String, json::Value>> for SystemFilter {
type Error = TantivyBindingError;
fn try_from(params: &json::Map<String, json::Value>) -> Result<Self> {
pub fn create_filter(
params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<SystemFilter> {
match params.get(&"type".to_string()) {
Some(value) => {
if !value.is_string() {
@ -123,13 +127,14 @@ impl TryFrom<&json::Map<String, json::Value>> for SystemFilter {
match value.as_str().unwrap() {
"length" => get_length_filter(params),
"stop" => StopWordFilter::from_json(params).map(|f| SystemFilter::Stop(f)),
"decompounder" => {
SplitCompoundWords::from_json(params).map(|f| SystemFilter::Decompounder(f))
}
"stemmer" => Stemmer::from_json(params).map(|f| SystemFilter::Stemmer(f)),
"stop" => StopWordFilter::from_json(params, helper).map(|f| SystemFilter::Stop(f)),
"decompounder" => SplitCompoundWords::from_json(params, helper)
.map(|f| SystemFilter::Decompounder(f)),
"stemmer" => Stemmer::from_json(params, helper).map(|f| SystemFilter::Stemmer(f)),
"regex" => RegexFilter::from_json(params).map(|f| SystemFilter::Regex(f)),
"synonym" => SynonymFilter::from_json(params).map(|f| SystemFilter::Synonym(f)),
"synonym" => {
SynonymFilter::from_json(params, helper).map(|f| SystemFilter::Synonym(f))
}
other => Err(TantivyBindingError::InternalError(format!(
"unsupport filter type: {}",
other
@ -141,4 +146,3 @@ impl TryFrom<&json::Map<String, json::Value>> for SystemFilter {
)),
}
}
}

View File

@ -116,7 +116,7 @@ mod tests {
}]
}"#;
let tokenizer = create_analyzer(&params.to_string());
let tokenizer = create_analyzer(&params.to_string(), "");
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();

View File

@ -59,7 +59,7 @@ mod tests {
"filter": ["removepunct"]
}"#;
let tokenizer = create_analyzer(&params.to_string());
let tokenizer = create_analyzer(&params.to_string(), "");
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();

View File

@ -1,10 +1,14 @@
use super::filter::FilterBuilder;
use crate::analyzer::options::FileResourcePathHelper;
use crate::error::{Result, TantivyBindingError};
use serde_json as json;
use tantivy::tokenizer::{Language, Stemmer};
impl FilterBuilder for Stemmer {
fn from_json(params: &json::Map<String, json::Value>) -> Result<Self> {
fn from_json(
params: &json::Map<String, json::Value>,
_: &mut FileResourcePathHelper,
) -> Result<Self> {
let value = params.get("language");
if value.is_none() || !value.unwrap().is_string() {
return Err(TantivyBindingError::InternalError(

View File

@ -1,6 +1,7 @@
use super::filter::FilterBuilder;
use super::stop_words::fetch_language_stop_words;
use super::util::*;
use crate::analyzer::options::FileResourcePathHelper;
use crate::error::{Result, TantivyBindingError};
use serde_json as json;
use tantivy::tokenizer::StopWordFilter;
@ -28,14 +29,17 @@ pub(crate) fn get_stop_words_list(str_list: Vec<String>) -> Vec<String> {
}
impl FilterBuilder for StopWordFilter {
fn from_json(params: &json::Map<String, json::Value>) -> Result<Self> {
fn from_json(
params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<Self> {
let mut dict = Vec::<String>::new();
if let Some(value) = params.get(STOP_WORDS_LIST_KEY) {
dict = get_stop_words_list(get_string_list(value, "stop_words")?);
}
if let Some(file_params) = params.get(STOP_WORDS_FILE_KEY) {
read_line_file(&mut dict, file_params, "stop words dict file")?;
read_line_file(helper, &mut dict, file_params, "stop words dict file")?;
}
Ok(StopWordFilter::remove(dict))
@ -46,11 +50,13 @@ impl FilterBuilder for StopWordFilter {
mod tests {
use super::StopWordFilter;
use crate::analyzer::filter::FilterBuilder;
use crate::analyzer::options::{FileResourcePathHelper, ResourceInfo};
use crate::analyzer::tokenizers::standard_builder;
use crate::log::init_log;
use serde_json as json;
use std::collections::HashSet;
use std::path::Path;
use std::sync::Arc;
#[test]
fn test_stop_words_filter_with_file() {
@ -69,7 +75,8 @@ mod tests {
);
let json_params = json::from_str::<json::Value>(&params).unwrap();
let filter = StopWordFilter::from_json(json_params.as_object().unwrap());
let mut helper = FileResourcePathHelper::new(Arc::new(ResourceInfo::new()));
let filter = StopWordFilter::from_json(json_params.as_object().unwrap(), &mut helper);
assert!(filter.is_ok(), "error: {}", filter.err().unwrap());
let builder = standard_builder().filter(filter.unwrap());

View File

@ -1,4 +1,4 @@
use crate::analyzer::options::get_resource_path;
use crate::analyzer::options::{get_resource_path, FileResourcePathHelper};
use crate::error::{Result, TantivyBindingError};
use serde_json as json;
use std::collections::{HashMap, HashSet};
@ -199,30 +199,16 @@ impl SynonymDict {
}
}
fn read_synonyms_file(builder: &mut SynonymDictBuilder, params: &json::Value) -> Result<()> {
let path = get_resource_path(params, "synonyms dict file")?;
let file = std::fs::File::open(path)?;
let reader = std::io::BufReader::new(file);
for line in reader.lines() {
if let Ok(row_data) = line {
builder.add_row(&row_data)?;
} else {
return Err(TantivyBindingError::InternalError(format!(
"read synonyms dict file failed, error: {}",
line.unwrap_err().to_string()
)));
}
}
Ok(())
}
#[derive(Clone)]
pub struct SynonymFilter {
dict: Arc<SynonymDict>,
}
impl SynonymFilter {
pub fn from_json(params: &json::Map<String, json::Value>) -> Result<SynonymFilter> {
pub fn from_json(
params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<SynonymFilter> {
let expand = params.get("expand").map_or(Ok(true), |v| {
v.as_bool().ok_or(TantivyBindingError::InvalidArgument(
"create synonym filter failed, `expand` must be bool".to_string(),
@ -246,7 +232,19 @@ impl SynonymFilter {
}
if let Some(file_params) = params.get("synonyms_file") {
read_synonyms_file(&mut builder, file_params)?;
let path = get_resource_path(helper, file_params, "synonyms dict file")?;
let file = std::fs::File::open(path)?;
let reader = std::io::BufReader::new(file);
for line in reader.lines() {
if let Ok(row_data) = line {
builder.add_row(&row_data)?;
} else {
return Err(TantivyBindingError::InternalError(format!(
"read synonyms dict file failed, error: {}",
line.unwrap_err().to_string()
)));
}
}
}
Ok(SynonymFilter {
@ -350,11 +348,14 @@ impl<T: TokenStream> TokenStream for SynonymFilterStream<T> {
#[cfg(test)]
mod tests {
use super::SynonymFilter;
use crate::analyzer::options::{FileResourcePathHelper, ResourceInfo};
use crate::analyzer::tokenizers::standard_builder;
use crate::log::init_log;
use serde_json as json;
use std::collections::HashSet;
use std::path::Path;
use std::sync::Arc;
#[test]
fn test_synonym_filter() {
@ -365,7 +366,8 @@ mod tests {
"synonyms": ["trans => translate, \\=>", "\\\\test, test, tests"]
}"#;
let json_params = json::from_str::<json::Value>(&params).unwrap();
let filter = SynonymFilter::from_json(json_params.as_object().unwrap());
let mut helper = FileResourcePathHelper::new(Arc::new(ResourceInfo::new()));
let filter = SynonymFilter::from_json(json_params.as_object().unwrap(), &mut helper);
assert!(filter.is_ok(), "error: {}", filter.err().unwrap());
let builder = standard_builder().filter(filter.unwrap());
let mut analyzer = builder.build();
@ -402,7 +404,8 @@ mod tests {
}}"#
);
let json_params = json::from_str::<json::Value>(&params).unwrap();
let filter = SynonymFilter::from_json(json_params.as_object().unwrap());
let mut helper = FileResourcePathHelper::new(Arc::new(ResourceInfo::new()));
let filter = SynonymFilter::from_json(json_params.as_object().unwrap(), &mut helper);
assert!(filter.is_ok(), "error: {}", filter.err().unwrap());
let builder = standard_builder().filter(filter.unwrap());
let mut analyzer = builder.build();

View File

@ -1,4 +1,5 @@
use crate::analyzer::options::get_resource_path;
use crate::analyzer::options::FileResourcePathHelper;
use crate::error::{Result, TantivyBindingError};
use serde_json as json;
use std::io::BufRead;
@ -26,11 +27,12 @@ pub fn get_string_list(value: &json::Value, label: &str) -> Result<Vec<String>>
}
pub(crate) fn read_line_file(
helper: &mut FileResourcePathHelper,
dict: &mut Vec<String>,
params: &json::Value,
key: &str,
) -> Result<()> {
let path = get_resource_path(params, key)?;
let path = get_resource_path(helper, params, key)?;
let file = std::fs::File::open(path)?;
let reader = std::io::BufReader::new(file);
for line in reader.lines() {

View File

@ -2,10 +2,10 @@ mod analyzer;
mod build_in_analyzer;
mod dict;
mod filter;
mod options;
pub mod options;
pub mod tokenizers;
pub use self::analyzer::{create_analyzer, create_analyzer_by_json};
pub use self::analyzer::{create_analyzer, create_analyzer_by_json, validate_analyzer};
pub use self::options::set_options;
pub(crate) use self::build_in_analyzer::standard_analyzer;

View File

@ -0,0 +1,8 @@
// cache key
pub(crate) static LINDERA_DOWNLOAD_KEY: &str = "lindera_download_urls";
pub(crate) static RESOURCE_MAP_KEY: &str = "resource_map";
// normal key
pub static DEFAULT_DICT_PATH_KEY: &str = "default_dict_path";
pub static RESOURCE_PATH_KEY: &str = "resource_path";
pub static RESOURCE_STORAGE_NAME_KEY: &str = "storage_name";

View File

@ -1,8 +1,13 @@
mod common;
mod resource_info;
mod runtime_option;
mod util;
pub use self::runtime_option::{get_lindera_download_url, get_options, set_options};
pub use self::resource_info::{FileResourcePathHelper, ResourceInfo};
pub use self::runtime_option::{
get_global_file_resource_helper, get_lindera_download_url, get_options, set_options,
};
pub use self::util::get_resource_path;
pub use self::runtime_option::DEFAULT_DICT_PATH_KEY;
pub use self::common::{DEFAULT_DICT_PATH_KEY, RESOURCE_PATH_KEY};

View File

@ -0,0 +1,190 @@
// resource options
use super::common::*;
use super::runtime_option::get_options;
use crate::error::{Result, TantivyBindingError};
use serde_json as json;
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use std::sync::Arc;
pub struct ResourceInfo {
storage_name: Option<String>,
resource_map: HashMap<String, i64>,
}
impl ResourceInfo {
pub fn new() -> Self {
Self {
storage_name: None,
resource_map: HashMap::new(),
}
}
pub fn debug(&self) -> String {
format!(
"storage_name: {:?}, resource_map: {:?}",
self.storage_name, self.resource_map
)
}
pub fn from_global_json(value: &json::Value) -> Result<Self> {
let mut resource_map = HashMap::new();
let kv = value
.as_object()
.ok_or(TantivyBindingError::InternalError(format!(
"file resource map should be a json map, but got: {}",
json::to_string(value).unwrap()
)))?;
for (key, value) in kv {
let url = value
.as_i64()
.ok_or(TantivyBindingError::InternalError(format!(
"file resource id should be integer, but got: {}",
json::to_string(value).unwrap()
)))?;
resource_map.insert(key.to_string(), url);
}
Ok(Self {
storage_name: None,
resource_map,
})
}
pub fn from_json(value: &json::Value) -> Result<Self> {
let mut resource_map = HashMap::new();
let m = value
.as_object()
.ok_or(TantivyBindingError::InternalError(format!(
"extra info should be a json map, but got: {}",
json::to_string(value).unwrap()
)))?;
if let Some(v) = m.get(RESOURCE_MAP_KEY) {
let kv = v
.as_object()
.ok_or(TantivyBindingError::InternalError(format!(
"file resource map should be a json map, but got: {}",
json::to_string(v).unwrap()
)))?;
for (key, value) in kv {
let url = value
.as_i64()
.ok_or(TantivyBindingError::InternalError(format!(
"file resource id should be integer, but got: {}",
json::to_string(value).unwrap()
)))?;
resource_map.insert(key.to_string(), url);
}
}
let mut storage_name = None;
if let Some(v) = m.get(RESOURCE_STORAGE_NAME_KEY) {
let name = v
.as_str()
.ok_or(TantivyBindingError::InternalError(format!(
"storage_name must set as string, but got: {}",
json::to_string(v).unwrap()
)))?
.to_string();
storage_name = Some(name)
}
Ok(Self {
storage_name,
resource_map,
})
}
}
impl FileResourcePathBuilder for ResourceInfo {
fn get_resource_file_path(
&self,
resource_name: &str,
file_name: &str,
) -> Result<(i64, PathBuf)> {
let resource_id =
self.resource_map
.get(resource_name)
.ok_or(TantivyBindingError::InternalError(format!(
"file resource: {} not found in local resource list",
resource_name
)))?;
let base_value =
get_options(RESOURCE_PATH_KEY).ok_or(TantivyBindingError::InternalError(
"local_resource_path config not init success".to_string(),
))?;
let base = base_value
.as_str()
.ok_or(TantivyBindingError::InternalError(
"local_resource_path must set as string".to_string(),
))?;
if let Some(storage_name) = &self.storage_name {
return Ok((
resource_id.clone(),
PathBuf::new()
.join(base)
.join(storage_name)
.join(resource_id.to_string())
.join(file_name),
));
} else {
return Ok((
resource_id.clone(),
PathBuf::new()
.join(base)
.join(resource_id.to_string())
.join(file_name),
));
}
}
}
pub trait FileResourcePathBuilder {
fn get_resource_file_path(
&self,
resource_name: &str,
file_name: &str,
) -> Result<(i64, PathBuf)>;
}
pub struct FileResourcePathHelper {
builder: Arc<dyn FileResourcePathBuilder>,
ids: HashSet<i64>,
}
impl FileResourcePathHelper {
pub fn new(builder: Arc<dyn FileResourcePathBuilder>) -> Self {
Self {
builder,
ids: HashSet::new(),
}
}
pub fn from_json(value: &json::Value) -> Result<Self> {
let info = ResourceInfo::from_json(value)?;
let builder: Arc<dyn FileResourcePathBuilder> = Arc::new(info);
Ok(Self {
builder,
ids: HashSet::new(),
})
}
pub fn get_resource_file_path(
&mut self,
resource_name: &str,
file_name: &str,
) -> Result<PathBuf> {
let (resource_id, path) = self
.builder
.get_resource_file_path(resource_name, file_name)?;
self.ids.insert(resource_id);
Ok(path)
}
pub fn get_resource_ids(self) -> Vec<i64> {
self.ids.into_iter().collect()
}
}

View File

@ -1,3 +1,5 @@
use super::common::*;
use super::resource_info::{FileResourcePathBuilder, FileResourcePathHelper, ResourceInfo};
use crate::error::{Result, TantivyBindingError};
use once_cell::sync::Lazy;
use serde_json as json;
@ -7,14 +9,6 @@ use std::sync::{Arc, RwLock};
static GLOBAL_OPTIONS: Lazy<Arc<RuntimeOption>> = Lazy::new(|| Arc::new(RuntimeOption::new()));
// cache key
static LINDERA_DOWNLOAD_KEY: &str = "lindera_download_urls";
static RESOURCE_MAP_KEY: &str = "resource_map";
// normal key
pub static DEFAULT_DICT_PATH_KEY: &str = "default_dict_path";
pub static RESOURCE_PATH_KEY: &str = "resource_path";
pub fn set_options(params: &String) -> Result<()> {
GLOBAL_OPTIONS.set_json(params)
}
@ -27,8 +21,8 @@ pub fn get_lindera_download_url(kind: &str) -> Option<Vec<String>> {
GLOBAL_OPTIONS.get_lindera_download_urls(kind)
}
pub fn get_resource_file_path(resource_name: &str, file_name: &str) -> Result<PathBuf> {
GLOBAL_OPTIONS.get_resource_file_path(resource_name, file_name)
pub fn get_global_file_resource_helper() -> FileResourcePathHelper {
FileResourcePathHelper::new(GLOBAL_OPTIONS.clone())
}
// analyzer options
@ -57,35 +51,25 @@ impl RuntimeOption {
let r = self.inner.read().unwrap();
r.lindera_download_urls.get(kind).map(|v| v.clone())
}
}
fn get_resource_file_path(&self, resource_name: &str, file_name: &str) -> Result<PathBuf> {
// file resource
impl FileResourcePathBuilder for RuntimeOption {
fn get_resource_file_path(
&self,
resource_name: &str,
file_name: &str,
) -> Result<(i64, PathBuf)> {
let r = self.inner.read().unwrap();
let resource_id =
r.resource_map
.get(resource_name)
.ok_or(TantivyBindingError::InternalError(format!(
"file resource: {} not found in local resource list",
resource_name
)))?;
let base = r
.params
.get(RESOURCE_PATH_KEY)
.ok_or(TantivyBindingError::InternalError(
"local_resource_path config not init success".to_string(),
))?
.as_str()
.ok_or("local_resource_path must set as string")?;
return Ok(PathBuf::new()
.join(base)
.join(resource_id.to_string())
.join(file_name));
return r
.resource_info
.get_resource_file_path(resource_name, file_name);
}
}
struct RuntimeOptionInner {
params: HashMap<String, json::Value>,
resource_map: HashMap<String, i64>, // resource name -> resource id
resource_info: ResourceInfo, // resource name -> resource id
lindera_download_urls: HashMap<String, Vec<String>>, // dict name -> url
}
@ -93,7 +77,7 @@ impl RuntimeOptionInner {
fn new() -> Self {
RuntimeOptionInner {
params: HashMap::new(),
resource_map: HashMap::new(),
resource_info: ResourceInfo::new(),
lindera_download_urls: HashMap::new(),
}
}
@ -124,7 +108,7 @@ impl RuntimeOptionInner {
for (key, value) in m {
let array = value.as_array().ok_or(TantivyBindingError::InternalError(
"lindera download urls shoud be list".to_string(),
"lindera download urls should be list".to_string(),
))?;
if !array.iter().all(|v| v.is_string()) {
@ -143,18 +127,7 @@ impl RuntimeOptionInner {
}
if key == RESOURCE_MAP_KEY {
self.resource_map = HashMap::new();
let m = value.as_object().ok_or(TantivyBindingError::InternalError(
"lindera download urls should be a json map".to_string(),
))?;
for (key, value) in m {
let url = value.as_i64().ok_or(TantivyBindingError::InternalError(
"lindera download url shoud be string".to_string(),
))?;
self.resource_map.insert(key.to_string(), url);
}
self.resource_info = ResourceInfo::from_global_json(&value)?;
return Ok(());
}

View File

@ -1,10 +1,14 @@
use serde_json as json;
use std::path::{Path, PathBuf};
use super::runtime_option::get_resource_file_path;
use super::resource_info::FileResourcePathHelper;
use crate::error::{Result, TantivyBindingError};
pub fn get_resource_path(v: &json::Value, resource_key: &str) -> Result<PathBuf> {
pub fn get_resource_path(
helper: &mut FileResourcePathHelper,
v: &json::Value,
resource_key: &str,
) -> Result<PathBuf> {
if !v.is_object() {
return Err(TantivyBindingError::InvalidArgument(format!(
"file config of {} must be object",
@ -73,7 +77,7 @@ pub fn get_resource_path(v: &json::Value, resource_key: &str) -> Result<PathBuf>
resource_key
)))?;
self::get_resource_file_path(resource_name, file_name)
helper.get_resource_file_path(resource_name, file_name)
}
other => Err(TantivyBindingError::InvalidArgument(format!(
"unsupported file type {} of {}",

View File

@ -1,13 +1,14 @@
use core::{option::Option::Some, result::Result::Ok};
use jieba_rs;
use lazy_static::lazy_static;
use log::warn;
use serde_json as json;
use std::fs;
use std::io::BufReader;
use std::{borrow::Cow, path::PathBuf};
use tantivy::tokenizer::{Token, TokenStream, Tokenizer};
use crate::analyzer::options;
use crate::analyzer::options::{get_resource_path, FileResourcePathHelper};
use crate::error::{Result, TantivyBindingError};
lazy_static! {
@ -56,6 +57,7 @@ impl TokenStream for JiebaTokenStream {
fn get_jieba_dict(
params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<(Vec<String>, Option<String>, Option<PathBuf>)> {
let mut words = Vec::<String>::new();
let mut user_dict = None;
@ -101,7 +103,7 @@ fn get_jieba_dict(
match params.get("extra_dict_file") {
Some(v) => {
let path = options::get_resource_path(v, "jieba extra dict file")?;
let path = get_resource_path(helper, v, "jieba extra dict file")?;
user_dict = Some(path)
}
_ => {}
@ -156,8 +158,11 @@ impl<'a> JiebaTokenizer<'a> {
}
}
pub fn from_json(params: &json::Map<String, json::Value>) -> Result<JiebaTokenizer<'a>> {
let (words, system_dict, user_dict) = get_jieba_dict(params)?;
pub fn from_json(
params: &json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<JiebaTokenizer<'a>> {
let (words, system_dict, user_dict) = get_jieba_dict(params, helper)?;
let mut tokenizer =
system_dict.map_or(Ok(jieba_rs::Jieba::empty()), |name| match name.as_str() {
@ -242,8 +247,11 @@ impl Tokenizer for JiebaTokenizer<'static> {
#[cfg(test)]
mod tests {
use serde_json as json;
use std::sync::Arc;
use super::JiebaTokenizer;
use crate::analyzer::options::{FileResourcePathHelper, ResourceInfo};
use tantivy::tokenizer::TokenStream;
use tantivy::tokenizer::Tokenizer;
@ -255,7 +263,8 @@ mod tests {
let json_param = json::from_str::<json::Map<String, json::Value>>(&params);
assert!(json_param.is_ok());
let tokenizer = JiebaTokenizer::from_json(&json_param.unwrap());
let mut helper = FileResourcePathHelper::new(Arc::new(ResourceInfo::new()));
let tokenizer = JiebaTokenizer::from_json(&json_param.unwrap(), &mut helper);
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();
let mut stream = bining.token_stream("结巴分词器");
@ -280,7 +289,8 @@ mod tests {
let json_param = json::from_str::<json::Map<String, json::Value>>(&params);
assert!(json_param.is_ok());
let tokenizer = JiebaTokenizer::from_json(&json_param.unwrap());
let mut helper = FileResourcePathHelper::new(Arc::new(ResourceInfo::new()));
let tokenizer = JiebaTokenizer::from_json(&json_param.unwrap(), &mut helper);
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();
let mut stream = bining.token_stream("milvus结巴分词器中文测试");
@ -303,7 +313,8 @@ mod tests {
let json_param = json::from_str::<json::Map<String, json::Value>>(&params);
assert!(json_param.is_ok());
let tokenizer = JiebaTokenizer::from_json(&json_param.unwrap());
let mut helper = FileResourcePathHelper::new(Arc::new(ResourceInfo::new()));
let tokenizer = JiebaTokenizer::from_json(&json_param.unwrap(), &mut helper);
assert!(tokenizer.is_ok(), "error: {}", tokenizer.err().unwrap());
let mut bining = tokenizer.unwrap();
let mut stream = bining.token_stream("milvus結巴分詞器中文測試");

View File

@ -1,3 +1,4 @@
use crate::analyzer::options::FileResourcePathHelper;
use crate::error::{Result, TantivyBindingError};
use lingua::{LanguageDetector, LanguageDetectorBuilder};
use serde_json as json;
@ -164,7 +165,11 @@ impl<'a> LangIdentTokenizer<'a> {
pub fn from_json<'b>(
params: &'b json::Map<String, json::Value>,
fc: fn(&json::Map<String, json::Value>) -> Result<TextAnalyzer>,
helper: &mut FileResourcePathHelper,
fc: fn(
&json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<TextAnalyzer>,
) -> Result<LangIdentTokenizer<'a>> {
// init identfier for tokenizer
let identifier = params
@ -188,12 +193,15 @@ impl<'a> LangIdentTokenizer<'a> {
for (name, params) in sub_analyzers {
analyzer.add(
name,
fc(params.as_object().ok_or_else(|| {
fc(
params.as_object().ok_or_else(|| {
TantivyBindingError::InvalidArgument(format!(
"sub analyzer \"{}\" params must be dict",
name
))
})?)?,
})?,
helper,
)?,
);
}
@ -257,9 +265,11 @@ impl Tokenizer for LangIdentTokenizer<'static> {
#[cfg(test)]
mod tests {
use serde_json as json;
use std::sync::Arc;
use tantivy::tokenizer::Tokenizer;
use super::LangIdentTokenizer;
use crate::analyzer::options::{FileResourcePathHelper, ResourceInfo};
use crate::analyzer::tokenizers::lang_ident_tokenizer::BoxIdentifier;
use crate::analyzer::{create_analyzer, create_analyzer_by_json};
use crate::error::Result;
@ -276,8 +286,8 @@ mod tests {
let mut analyzer = LangIdentTokenizer::new(BoxIdentifier::default());
let result = || -> Result<()> {
analyzer.add("default", create_analyzer(standard_params)?);
analyzer.add("cmn", create_analyzer(jieba_params)?);
analyzer.add("default", create_analyzer(standard_params, "")?);
analyzer.add("cmn", create_analyzer(jieba_params, "")?);
Ok(())
}();
@ -304,6 +314,7 @@ mod tests {
let builder: std::result::Result<LangIdentTokenizer, crate::error::TantivyBindingError> =
LangIdentTokenizer::from_json(
json_params.as_object().unwrap(),
&mut FileResourcePathHelper::new(Arc::new(ResourceInfo::new())),
create_analyzer_by_json,
);
assert!(builder.is_ok(), "error: {}", builder.err().unwrap());
@ -337,6 +348,7 @@ mod tests {
let builder: std::result::Result<LangIdentTokenizer, crate::error::TantivyBindingError> =
LangIdentTokenizer::from_json(
json_params.as_object().unwrap(),
&mut FileResourcePathHelper::new(Arc::new(ResourceInfo::new())),
create_analyzer_by_json,
);
assert!(builder.is_ok(), "error: {}", builder.err().unwrap());
@ -372,6 +384,7 @@ mod tests {
let builder: std::result::Result<LangIdentTokenizer, crate::error::TantivyBindingError> =
LangIdentTokenizer::from_json(
json_params.as_object().unwrap(),
&mut FileResourcePathHelper::new(Arc::new(ResourceInfo::new())),
create_analyzer_by_json,
);
assert!(builder.is_ok(), "error: {}", builder.err().unwrap());

View File

@ -1,3 +1,4 @@
use crate::analyzer::options::FileResourcePathHelper;
use log::warn;
use serde_json as json;
use tantivy::tokenizer::*;
@ -24,24 +25,29 @@ pub fn icu_builder() -> TextAnalyzerBuilder {
pub fn lang_ident_builder(
params: Option<&json::Map<String, json::Value>>,
fc: fn(&json::Map<String, json::Value>) -> Result<TextAnalyzer>,
helper: &mut FileResourcePathHelper,
fc: fn(
&json::Map<String, json::Value>,
helper: &mut FileResourcePathHelper,
) -> Result<TextAnalyzer>,
) -> Result<TextAnalyzerBuilder> {
if params.is_none() {
return Err(TantivyBindingError::InvalidArgument(format!(
"lang ident tokenizer must be customized"
)));
}
let tokenizer = LangIdentTokenizer::from_json(params.unwrap(), fc)?;
let tokenizer = LangIdentTokenizer::from_json(params.unwrap(), helper, fc)?;
Ok(TextAnalyzer::builder(tokenizer).dynamic())
}
pub fn jieba_builder(
params: Option<&json::Map<String, json::Value>>,
helper: &mut FileResourcePathHelper,
) -> Result<TextAnalyzerBuilder> {
if params.is_none() {
return Ok(TextAnalyzer::builder(JiebaTokenizer::new()).dynamic());
}
let tokenizer = JiebaTokenizer::from_json(params.unwrap())?;
let tokenizer = JiebaTokenizer::from_json(params.unwrap(), helper)?;
Ok(TextAnalyzer::builder(tokenizer).dynamic())
}
@ -83,7 +89,8 @@ pub fn char_group_builder(
pub fn get_builder_with_tokenizer(
params: &json::Value,
fc: fn(&json::Map<String, json::Value>) -> Result<TextAnalyzer>,
helper: &mut FileResourcePathHelper,
fc: fn(&json::Map<String, json::Value>, &mut FileResourcePathHelper) -> Result<TextAnalyzer>,
) -> Result<TextAnalyzerBuilder> {
let name;
let params_map;
@ -113,11 +120,11 @@ pub fn get_builder_with_tokenizer(
match name {
"standard" => Ok(standard_builder()),
"whitespace" => Ok(whitespace_builder()),
"jieba" => jieba_builder(params_map),
"jieba" => jieba_builder(params_map, helper),
"lindera" => lindera_builder(params_map),
"char_group" => char_group_builder(params_map),
"icu" => Ok(icu_builder()),
"language_identifier" => lang_ident_builder(params_map, fc),
"language_identifier" => lang_ident_builder(params_map, helper, fc),
"grpc" => grpc_builder(params_map),
other => {
warn!("unsupported tokenizer: {}", other);

View File

@ -147,6 +147,14 @@ impl RustResult {
}
}
pub fn from_vec_i64(value: Vec<i64>) -> Self {
RustResult {
success: true,
value: Value::RustArrayI64(RustArrayI64::from_vec(value)),
error: std::ptr::null(),
}
}
pub fn from_error(error: String) -> Self {
RustResult {
success: false,
@ -184,6 +192,11 @@ pub extern "C" fn free_rust_result(result: RustResult) {
free_rust_array(array);
}
}
Value::RustArrayI64(array) => {
if !array.array.is_null() {
free_rust_array_i64(array);
}
}
_ => {}
}
if !result.error.is_null() {

View File

@ -49,7 +49,7 @@ pub extern "C" fn tantivy_register_tokenizer(
let real = ptr as *mut IndexReaderWrapper;
let tokenizer_name = cstr_to_str!(tokenizer_name);
let params = cstr_to_str!(analyzer_params);
let analyzer = create_analyzer(params);
let analyzer = create_analyzer(params, "");
match analyzer {
Ok(text_analyzer) => unsafe {
(*real).register_tokenizer(String::from(tokenizer_name), text_analyzer);

View File

@ -157,7 +157,7 @@ impl AnalyzerBuilder<'_> {
Some(type_) => {
if !type_.is_string() {
return Err(TantivyBindingError::InternalError(format!(
"analyzer type shoud be string"
"analyzer type should be string"
)));
}
return self.build_template(type_.as_str().unwrap());

View File

@ -37,7 +37,7 @@ impl IndexWriterWrapperImpl {
field_name
);
let tokenizer = create_analyzer(tokenizer_params)?;
let tokenizer = create_analyzer(tokenizer_params, "")?;
let (schema, field) = build_text_schema(field_name, tokenizer_name);
let index = if in_ram {

View File

@ -1,4 +1,5 @@
use crate::analyzer::create_analyzer_by_json;
use crate::analyzer::options::get_global_file_resource_helper;
use serde_json::{self, Value};
use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashMap};
@ -38,7 +39,9 @@ pub fn compute_phrase_match_slop(
.ok_or("Tokenizer params must be a JSON object")?;
// 2. Create Analyzer
let mut analyzer = create_analyzer_by_json(params_obj)
// TODO: support build helper from extra_info
let mut helper = get_global_file_resource_helper();
let mut analyzer = create_analyzer_by_json(params_obj, &mut helper)
.map_err(|e| format!("Failed to create analyzer: {:?}", e))?;
// 3. Tokenize Query

View File

@ -1,7 +1,7 @@
use libc::{c_char, c_void};
use tantivy::tokenizer::TextAnalyzer;
use crate::analyzer::{create_analyzer, set_options};
use crate::analyzer::{create_analyzer, set_options, validate_analyzer};
use crate::{
array::RustResult,
log::init_log,
@ -10,10 +10,14 @@ use crate::{
};
#[no_mangle]
pub extern "C" fn tantivy_create_analyzer(analyzer_params: *const c_char) -> RustResult {
pub extern "C" fn tantivy_create_analyzer(
analyzer_params: *const c_char,
extra_info: *const c_char,
) -> RustResult {
init_log();
let params = unsafe { c_str_to_str(analyzer_params).to_string() };
let analyzer = create_analyzer(&params);
let extra_info_str = unsafe { c_str_to_str(extra_info).to_string() };
let analyzer = create_analyzer(&params, &extra_info_str);
match analyzer {
Ok(text_analyzer) => RustResult::from_ptr(create_binding(text_analyzer)),
Err(err) => RustResult::from_error(format!(
@ -23,6 +27,24 @@ pub extern "C" fn tantivy_create_analyzer(analyzer_params: *const c_char) -> Rus
}
}
#[no_mangle]
pub extern "C" fn tantivy_validate_analyzer(
analyzer_params: *const c_char,
extra_info: *const c_char,
) -> RustResult {
init_log();
let params = unsafe { c_str_to_str(analyzer_params).to_string() };
let extra_info_str = unsafe { c_str_to_str(extra_info).to_string() };
let result = validate_analyzer(&params, &extra_info_str);
match result {
Ok(ids) => RustResult::from_vec_i64(ids),
Err(err) => RustResult::from_error(format!(
"validate tokenizer failed with error: {} param: {}",
err, params,
)),
}
}
#[no_mangle]
pub extern "C" fn tantivy_clone_analyzer(ptr: *mut c_void) -> *mut c_void {
let analyzer = ptr as *mut TextAnalyzer;

View File

@ -14,9 +14,20 @@ struct Tokenizer {
NO_COPY_OR_ASSIGN(Tokenizer);
explicit Tokenizer(std::string&& params) {
auto shared_params = std::make_shared<std::string>(std::move(params));
auto res =
RustResultWrapper(tantivy_create_analyzer(shared_params->c_str()));
auto shared_params = std::make_shared<std::string>(params);
auto res = RustResultWrapper(
tantivy_create_analyzer(shared_params->c_str(), ""));
AssertInfo(res.result_->success,
"Tokenizer creation failed: {}",
res.result_->error);
ptr_ = res.result_->value.ptr._0;
}
explicit Tokenizer(std::string&& params, std::string&& extra_info) {
auto shared_params = std::make_shared<std::string>(params);
auto shared_extra_info = std::make_shared<std::string>(extra_info);
auto res = RustResultWrapper(tantivy_create_analyzer(
shared_params->c_str(), shared_extra_info->c_str()));
AssertInfo(res.result_->success,
"Tokenizer creation failed: {}",
res.result_->error);
@ -69,4 +80,31 @@ set_tokenizer_options(std::string&& params) {
res.result_->error);
}
inline std::pair<int64_t*, size_t>
validate_analyzer(std::string&& params, std::string&& extra_info) {
auto shared_params = std::make_shared<std::string>(params);
auto shared_extra_info = std::make_shared<std::string>(extra_info);
auto res = RustResultWrapper(tantivy_validate_analyzer(
shared_params->c_str(), shared_extra_info->c_str()));
AssertInfo(res.result_->success,
"Validate analyzer params failed: {}",
res.result_->error);
auto array_wrapper =
RustArrayI64Wrapper(std::move(res.result_->value.rust_array_i64._0));
auto* array = array_wrapper.array_.array;
auto len = array_wrapper.array_.len;
int64_t* result = nullptr;
if (len > 0) {
result = static_cast<int64_t*>(malloc(len * sizeof(int64_t)));
if (result == nullptr) {
throw std::bad_alloc();
}
std::memcpy(result,
array,
len * sizeof(int64_t)); // Copy the array to the result
}
return {result, len};
}
} // namespace milvus::tantivy

View File

@ -17,6 +17,8 @@
package model
import (
"slices"
"github.com/samber/lo"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
@ -52,6 +54,7 @@ type Collection struct {
UpdateTimestamp uint64
SchemaVersion int32
ShardInfos map[string]*ShardInfo
FileResourceIds []int64
}
type ShardInfo struct {
@ -90,6 +93,7 @@ func (c *Collection) ShallowClone() *Collection {
UpdateTimestamp: c.UpdateTimestamp,
SchemaVersion: c.SchemaVersion,
ShardInfos: c.ShardInfos,
FileResourceIds: c.FileResourceIds,
}
}
@ -127,6 +131,7 @@ func (c *Collection) Clone() *Collection {
UpdateTimestamp: c.UpdateTimestamp,
SchemaVersion: c.SchemaVersion,
ShardInfos: shardInfos,
FileResourceIds: slices.Clone(c.FileResourceIds),
}
}
@ -232,6 +237,7 @@ func UnmarshalCollectionModel(coll *pb.CollectionInfo) *Collection {
UpdateTimestamp: coll.UpdateTimestamp,
SchemaVersion: coll.Schema.Version,
ShardInfos: shardInfos,
FileResourceIds: coll.Schema.GetFileResourceIds(),
}
}
@ -283,6 +289,7 @@ func marshalCollectionModelWithConfig(coll *Collection, c *config) *pb.Collectio
EnableDynamicField: coll.EnableDynamicField,
DbName: coll.DBName,
Version: coll.SchemaVersion,
FileResourceIds: coll.FileResourceIds,
}
if c.withFields {

View File

@ -1667,17 +1667,20 @@ func (node *QueryNode) ValidateAnalyzer(ctx context.Context, req *querypb.Valida
}
defer node.lifetime.Done()
resourceSet := typeutil.NewSet[int64]()
for _, info := range req.AnalyzerInfos {
err := analyzer.ValidateAnalyzer(info.GetParams())
ids, err := analyzer.ValidateAnalyzer(info.GetParams())
if err != nil {
if info.GetName() != "" {
return &querypb.ValidateAnalyzerResponse{Status: merr.Status(merr.WrapErrParameterInvalidMsg("validate analyzer failed for field: %s, name: %s, error: %v", info.GetField(), info.GetName(), err))}, nil
}
return &querypb.ValidateAnalyzerResponse{Status: merr.Status(merr.WrapErrParameterInvalidMsg("validate analyzer failed for field: %s, error: %v", info.GetField(), err))}, nil
}
resourceSet.Insert(ids...)
}
return &querypb.ValidateAnalyzerResponse{Status: merr.Status(nil)}, nil
return &querypb.ValidateAnalyzerResponse{Status: merr.Status(nil), ResourceIds: resourceSet.Collect()}, nil
}
type deleteRequestStringer struct {

View File

@ -200,6 +200,7 @@ func (t *createCollectionTask) validateSchema(ctx context.Context, schema *schem
}
// validate analyzer params at any streaming node
// and set file resource ids to schema
if len(analyzerInfos) > 0 {
resp, err := t.mixCoord.ValidateAnalyzer(t.ctx, &querypb.ValidateAnalyzerRequest{
AnalyzerInfos: analyzerInfos,
@ -211,6 +212,7 @@ func (t *createCollectionTask) validateSchema(ctx context.Context, schema *schem
if err := merr.Error(resp.GetStatus()); err != nil {
return err
}
schema.FileResourceIds = resp.GetResourceIds()
}
return validateFieldDataType(schema.GetFields())

View File

@ -204,6 +204,7 @@ func newCollectionModel(header *message.CreateCollectionMessageHeader, body *mes
UpdateTimestamp: ts,
SchemaVersion: 0,
ShardInfos: shardInfos,
FileResourceIds: body.CollectionSchema.GetFileResourceIds(),
}
}

View File

@ -1080,6 +1080,7 @@ func convertModelToDesc(collInfo *model.Collection, aliases []string, dbName str
Functions: model.MarshalFunctionModels(collInfo.Functions),
EnableDynamicField: collInfo.EnableDynamicField,
Properties: collInfo.Properties,
FileResourceIds: collInfo.FileResourceIds,
}
resp.CollectionID = collInfo.CollectionID
resp.VirtualChannelNames = collInfo.VirtualChannelNames

View File

@ -11,11 +11,15 @@ type (
)
func NewAnalyzer(param string) (Analyzer, error) {
return canalyzer.NewAnalyzer(param)
return canalyzer.NewAnalyzer(param, "")
}
func ValidateAnalyzer(param string) error {
return canalyzer.ValidateAnalyzer(param)
func ValidateAnalyzer(param string) ([]int64, error) {
return canalyzer.ValidateAnalyzer(param, "")
}
func UpdateGlobalResourceInfo(resourceMap map[string]int64) error {
return canalyzer.UpdateGlobalResourceInfo(resourceMap)
}
func InitOptions() {

View File

@ -13,9 +13,11 @@ import (
"sync"
"unsafe"
"github.com/cockroachdb/errors"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/util/analyzer/interfaces"
"github.com/milvus-io/milvus/internal/util/pathutil"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
@ -23,8 +25,8 @@ import (
const (
LinderaDictURLKey = "lindera_download_urls"
ResourceMapKey = "resource_map"
DictPathKey = "local_dict_path"
ResourcePathKey = "resource_path"
StorageNameKey = "storage_name"
)
var initOnce sync.Once
@ -39,7 +41,7 @@ func UpdateParams() {
cfg := paramtable.Get()
params := map[string]any{}
params[LinderaDictURLKey] = cfg.FunctionCfg.LinderaDownloadUrls.GetValue()
params[DictPathKey] = cfg.FunctionCfg.LocalResourcePath.GetValue()
params[ResourcePathKey] = pathutil.GetPath(pathutil.FileResourcePath, paramtable.GetNodeID())
bytes, err := json.Marshal(params)
if err != nil {
@ -55,12 +57,31 @@ func UpdateParams() {
}
}
func NewAnalyzer(param string) (interfaces.Analyzer, error) {
func UpdateGlobalResourceInfo(resourceMap map[string]int64) error {
bytes, err := json.Marshal(map[string]any{"resource_map": resourceMap})
if err != nil {
return errors.Wrap(err, "marshal global resource info failed")
}
paramPtr := C.CString(string(bytes))
defer C.free(unsafe.Pointer(paramPtr))
status := C.set_tokenizer_option(paramPtr)
if err := HandleCStatus(&status, "failed to update global resource info"); err != nil {
return errors.Wrap(err, "update global resource info failed")
}
return nil
}
func NewAnalyzer(param string, extraInfo string) (interfaces.Analyzer, error) {
paramPtr := C.CString(param)
defer C.free(unsafe.Pointer(paramPtr))
extraInfoPtr := C.CString(extraInfo)
defer C.free(unsafe.Pointer(extraInfoPtr))
var ptr C.CTokenizer
status := C.create_tokenizer(paramPtr, &ptr)
status := C.create_tokenizer(paramPtr, extraInfoPtr, &ptr)
if err := HandleCStatus(&status, "failed to create analyzer"); err != nil {
return nil, err
}
@ -68,13 +89,21 @@ func NewAnalyzer(param string) (interfaces.Analyzer, error) {
return NewCAnalyzer(ptr), nil
}
func ValidateAnalyzer(param string) error {
func ValidateAnalyzer(param string, extraInfo string) ([]int64, error) {
paramPtr := C.CString(param)
defer C.free(unsafe.Pointer(paramPtr))
status := C.validate_tokenizer(paramPtr)
if err := HandleCStatus(&status, "failed to create tokenizer"); err != nil {
return err
extraInfoPtr := C.CString(extraInfo)
defer C.free(unsafe.Pointer(extraInfoPtr))
result := C.validate_tokenizer(paramPtr, extraInfoPtr)
if err := HandleCStatus(&result.status, "failed to validate tokenizer"); err != nil {
return nil, err
}
return nil
cIds := unsafe.Slice((*int64)(unsafe.Pointer(result.resource_ids)), result.resource_ids_count)
goIds := make([]int64, len(cIds))
copy(goIds, cIds)
C.free(unsafe.Pointer(result.resource_ids))
return goIds, nil
}

View File

@ -4,6 +4,8 @@ import (
"context"
"fmt"
"net"
"os"
"path/filepath"
"strings"
"testing"
@ -12,6 +14,8 @@ import (
"google.golang.org/grpc"
pb "github.com/milvus-io/milvus-proto/go-api/v2/tokenizerpb"
"github.com/milvus-io/milvus/internal/util/pathutil"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
)
type mockServer struct {
@ -32,7 +36,7 @@ func TestAnalyzer(t *testing.T) {
// use default analyzer.
{
m := "{}"
analyzer, err := NewAnalyzer(m)
analyzer, err := NewAnalyzer(m, "")
assert.NoError(t, err)
defer analyzer.Destroy()
@ -48,7 +52,7 @@ func TestAnalyzer(t *testing.T) {
{
m := ""
analyzer, err := NewAnalyzer(m)
analyzer, err := NewAnalyzer(m, "")
assert.NoError(t, err)
defer analyzer.Destroy()
@ -65,7 +69,7 @@ func TestAnalyzer(t *testing.T) {
// use default tokenizer.
{
m := "{\"tokenizer\": \"standard\"}"
analyzer, err := NewAnalyzer(m)
analyzer, err := NewAnalyzer(m, "")
assert.NoError(t, err)
defer analyzer.Destroy()
@ -82,7 +86,7 @@ func TestAnalyzer(t *testing.T) {
// jieba tokenizer.
{
m := "{\"tokenizer\": \"jieba\"}"
analyzer, err := NewAnalyzer(m)
analyzer, err := NewAnalyzer(m, "")
assert.NoError(t, err)
defer analyzer.Destroy()
@ -124,7 +128,7 @@ func TestAnalyzer(t *testing.T) {
defer stop()
m := "{\"tokenizer\": {\"type\":\"grpc\", \"endpoint\":\"http://" + addr + "\"}}"
analyzer, err := NewAnalyzer(m)
analyzer, err := NewAnalyzer(m, "")
assert.NoError(t, err)
defer analyzer.Destroy()
@ -138,7 +142,7 @@ func TestAnalyzer(t *testing.T) {
// lindera tokenizer.
{
m := "{\"tokenizer\": {\"type\":\"lindera\", \"dict_kind\": \"ipadic\"}}"
tokenizer, err := NewAnalyzer(m)
tokenizer, err := NewAnalyzer(m, "")
require.NoError(t, err)
defer tokenizer.Destroy()
@ -156,20 +160,78 @@ func TestValidateAnalyzer(t *testing.T) {
// valid analyzer
{
m := "{\"tokenizer\": \"standard\"}"
err := ValidateAnalyzer(m)
ids, err := ValidateAnalyzer(m, "")
assert.NoError(t, err)
assert.Equal(t, len(ids), 0)
}
{
m := ""
err := ValidateAnalyzer(m)
_, err := ValidateAnalyzer(m, "")
assert.NoError(t, err)
}
// invalid tokenizer
{
m := "{\"tokenizer\": \"invalid\"}"
err := ValidateAnalyzer(m)
_, err := ValidateAnalyzer(m, "")
assert.Error(t, err)
}
// with user resource
{
resourcePath := pathutil.GetPath(pathutil.FileResourcePath, paramtable.GetNodeID())
defer os.RemoveAll(resourcePath)
UpdateParams()
resourceID := int64(100)
// mock remote resource file
dir := filepath.Join(resourcePath, "default", fmt.Sprintf("%d", resourceID))
err := os.MkdirAll(dir, os.ModePerm)
require.NoError(t, err)
f, err := os.Create(filepath.Join(dir, "jieba.txt"))
require.NoError(t, err)
f.WriteString("stop")
f.Close()
m := "{\"tokenizer\": \"standard\", \"filter\": [{\"type\": \"stop\", \"stop_words_file\": {\"type\": \"remote\",\"resource_name\": \"jieba_dict\", \"file_name\": \"jieba.txt\"}}]}"
ids, err := ValidateAnalyzer(m, "{\"resource_map\": {\"jieba_dict\": 100}, \"storage_name\": \"default\"}")
require.NoError(t, err)
assert.Equal(t, len(ids), 1)
assert.Equal(t, ids[0], resourceID)
}
// with user resource and update global resource info
{
resourcePath := pathutil.GetPath(pathutil.FileResourcePath, paramtable.GetNodeID())
defer os.RemoveAll(resourcePath)
UpdateParams()
resourceID := int64(100)
// mock remote resource file
dir := filepath.Join(resourcePath, fmt.Sprintf("%d", resourceID))
err := os.MkdirAll(dir, os.ModePerm)
require.NoError(t, err)
f, err := os.Create(filepath.Join(dir, "jieba.txt"))
require.NoError(t, err)
f.WriteString("stop")
f.Close()
m := "{\"tokenizer\": \"standard\", \"filter\": [{\"type\": \"stop\", \"stop_words_file\": {\"type\": \"remote\",\"resource_name\": \"jieba_dict\", \"file_name\": \"jieba.txt\"}}]}"
// update global resource info
err = UpdateGlobalResourceInfo(map[string]int64{"jieba_dict": resourceID})
require.NoError(t, err)
ids, err := ValidateAnalyzer(m, "")
require.NoError(t, err)
assert.Equal(t, len(ids), 1)
assert.Equal(t, ids[0], resourceID)
}
}

View File

@ -22,7 +22,7 @@ require (
github.com/jolestar/go-commons-pool/v2 v2.1.2
github.com/json-iterator/go v1.1.13-0.20220915233716-71ac16282d12
github.com/klauspost/compress v1.18.0
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251215075310-deda9c0dcece
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251218031911-f415d420437f
github.com/minio/minio-go/v7 v7.0.73
github.com/panjf2000/ants/v2 v2.11.3
github.com/prometheus/client_golang v1.20.5

View File

@ -482,8 +482,8 @@ github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6 h1:YHMFI6L
github.com/milvus-io/cgosymbolizer v0.0.0-20250318084424-114f4050c3a6/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251215075310-deda9c0dcece h1:s0TFMZBxADKSzIr7LW/TE3L/WgCuo7QOfzkYX92Xog0=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251215075310-deda9c0dcece/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251218031911-f415d420437f h1:YQ61KOySWPEXv8ePkr0Cu5q5iVHN11IIUSTWIiALCE8=
github.com/milvus-io/milvus-proto/go-api/v2 v2.6.6-0.20251218031911-f415d420437f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
github.com/minio/minio-go/v7 v7.0.73 h1:qr2vi96Qm7kZ4v7LLebjte+MQh621fFWnv93p12htEo=

View File

@ -1037,6 +1037,7 @@ message ValidateAnalyzerRequest{
message ValidateAnalyzerResponse{
common.Status status = 1;
repeated int64 resource_ids = 2;
}
message HighlightOptions{

File diff suppressed because it is too large Load Diff