fix: resolve CVE-2020-25576, WS-2023-0223 (#44163)

fix: issue https://github.com/milvus-io/milvus/issues/44160

WS-2023-0223 reported for
[atty-0.2.14.crate](https://ibmets.whitesourcesoftware.com/Wss/WSS.html#!libraryDetails;uuid=9c622063-376a-446b-bece-d7f6fd096758;project=7300448;orgToken=79623fcf-07fe-42b8-90bf-513fafeb41be)
CVE-2020-25576 reported for
[rand_core-0.3.1.crate](https://ibmets.whitesourcesoftware.com/Wss/WSS.html#!libraryDetails;uuid=20e2ad1b-c84c-4f18-98a9-4f27643b29ff;project=7300448;orgToken=79623fcf-07fe-42b8-90bf-513fafeb41be)

[atty-0.2.14.crate](https://ibmets.whitesourcesoftware.com/Wss/WSS.html#!libraryDetails;uuid=9c622063-376a-446b-bece-d7f6fd096758;project=7300448;orgToken=79623fcf-07fe-42b8-90bf-513fafeb41be)
is a transitive dependency coming from the root libraries
'cbindgen-0.26.0.crate' and 'criterion-0.4.0.crate'

[rand_core-0.3.1.crate](https://ibmets.whitesourcesoftware.com/Wss/WSS.html#!libraryDetails;uuid=20e2ad1b-c84c-4f18-98a9-4f27643b29ff;project=7300448;orgToken=79623fcf-07fe-42b8-90bf-513fafeb41be)
is also a transitive dependency coming from 'rand-0.3.23.crate' library
Path to dependency file:
/workspace/app/milvus/internal/core/thirdparty/tantivy/tantivy-binding/Cargo.toml
For Remediation, since these vulnerabilities are transitive one, the
root libraries should be updated to the latest non-vulnerable version

---------

Co-authored-by: Agnes-George1 <agnes.george1@ibm.com>
Co-authored-by: Abita Ann Augustine <abitaaugustine@gmail.com>
Co-authored-by: gifi-siby <gifi.s@ibm.com>
This commit is contained in:
Agnes George 2025-09-30 13:55:53 +05:30 committed by GitHub
parent bd6eb43617
commit aea0418713
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 748 additions and 754 deletions

File diff suppressed because it is too large Load Diff

View File

@ -39,7 +39,7 @@ serde_json = "1.0.128"
jieba-rs = "0.6.8"
regex = "1.11.1"
either = "1.13.0"
icu_segmenter = "2.0.0-beta2"
icu_segmenter = "2.0.0"
whatlang = "0.16.4"
lingua = "1.7.1"
fancy-regex = "0.14.0"
@ -66,12 +66,12 @@ flate2 = "1.1.0"
tar = "0.4.44"
[dev-dependencies]
rand = "0.3"
criterion = "0.4"
rand = "0.7"
criterion = "0.5"
tempfile = "3.0"
[build-dependencies]
cbindgen = "0.26.0"
cbindgen = "0.27.0"
tonic-build = "0.13.0"
[[bench]]

View File

@ -494,4 +494,4 @@ void tantivy_free_analyzer(void *tokenizer);
bool tantivy_index_exist(const char *path);
} // extern "C"
} // extern "C"

View File

@ -1,3 +1,4 @@
use icu_segmenter::options::WordBreakOptions;
use icu_segmenter::WordSegmenter;
use tantivy::tokenizer::{Token, TokenStream, Tokenizer};
@ -8,7 +9,7 @@ pub struct IcuTokenizer {
impl Clone for IcuTokenizer {
fn clone(&self) -> Self {
IcuTokenizer {
segmenter: WordSegmenter::new_auto(Default::default()),
segmenter: WordSegmenter::try_new_auto(WordBreakOptions::default()).unwrap(),
}
}
}
@ -41,30 +42,33 @@ impl TokenStream for IcuTokenStream {
impl IcuTokenizer {
pub fn new() -> IcuTokenizer {
IcuTokenizer {
segmenter: WordSegmenter::new_auto(Default::default()),
segmenter: WordSegmenter::try_new_auto(WordBreakOptions::default()).unwrap(),
}
}
fn tokenize(&self, text: &str) -> Vec<Token> {
let breakpoints: Vec<usize> = self.segmenter.segment_str(text).collect();
let mut tokens = vec![];
// Borrow the segmenter for segmentation
let borrowed_segmenter = self.segmenter.as_borrowed();
let breakpoints: Vec<usize> = borrowed_segmenter.segment_str(text).collect();
let mut tokens = Vec::with_capacity(breakpoints.len());
let mut offset = 0;
let mut position = 0;
for breakpoint in breakpoints.iter() {
if *breakpoint == offset {
for breakpoint in breakpoints {
if breakpoint == offset {
continue;
}
let token_str: &str = &text[offset..*breakpoint];
let token_str = &text[offset..breakpoint];
let token = Token {
text: token_str.to_string(),
offset_from: offset,
offset_to: *breakpoint,
position: position,
offset_to: breakpoint,
position,
position_length: token_str.chars().count(),
};
tokens.push(token);
offset = *breakpoint;
offset = breakpoint;
position += token_str.chars().count();
}
@ -77,10 +81,7 @@ impl Tokenizer for IcuTokenizer {
fn token_stream(&mut self, text: &str) -> IcuTokenStream {
let tokens = self.tokenize(text);
IcuTokenStream {
tokens: tokens,
index: 0,
}
IcuTokenStream { tokens, index: 0 }
}
}
@ -101,7 +102,7 @@ mod tests {
results.push(token.text.clone());
}
print!("test tokens :{:?}\n", results);
println!("test tokens: {:?}", results);
assert_eq!(results.len(), 24);
}
}

View File

@ -38,7 +38,7 @@ service RootCoord {
*
* @return Status
*/
rpc AddCollectionField(milvus.AddCollectionFieldRequest) returns (common.Status) {}
rpc AddCollectionField(.milvus.proto.milvus.AddCollectionFieldRequest) returns (common.Status) {}
/**
* @brief This method is used to test collection existence.