enhance: Add and run rust format command in makefile (#42807)

relate: https://github.com/milvus-io/milvus/issues/42806

Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
aoiasd 2025-06-20 10:22:39 +08:00 committed by GitHub
parent 6798fdc3b3
commit 43a9f7a79e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 47 additions and 35 deletions

View File

@ -145,6 +145,10 @@ cppcheck:
@#(env bash ${PWD}/scripts/core_build.sh -l)
@(env bash ${PWD}/scripts/check_cpp_fmt.sh)
rustcheck:
@echo "Running cargo format"
@env bash ${PWD}/scripts/run_cargo_format.sh ${PWD}/internal/core/thirdparty/tantivy/tantivy-binding/
fmt:
ifdef GO_DIFF_FILES
@ -201,7 +205,7 @@ static-check: getdeps
@echo "Start check go_client e2e package"
@source $(PWD)/scripts/setenv.sh && cd tests/go_client && GO111MODULE=on GOFLAGS=-buildvcs=false $(INSTALL_PATH)/golangci-lint run --build-tags L0,L1,L2,test --timeout=30m --config $(PWD)/tests/go_client/.golangci.yml
verifiers: build-cpp getdeps cppcheck fmt static-check
verifiers: build-cpp getdeps cppcheck rustcheck fmt static-check
# Build various components locally.
binlog:

View File

@ -1,8 +1,8 @@
use tantivy::tokenizer::*;
use super::tokenizers::*;
use super::filter::*;
use super::filter::stop_words;
use super::filter::*;
use super::tokenizers::*;
// default build-in analyzer
pub(crate) fn standard_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
@ -37,4 +37,4 @@ pub fn english_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
}
builder.build()
}
}

View File

@ -1,6 +1,6 @@
mod filter;
mod remove_punct_filter;
mod regex_filter;
mod remove_punct_filter;
pub(crate) mod stop_words;
mod util;

View File

@ -1,9 +1,9 @@
use core::{option::Option::Some, result::Result::Ok};
use jieba_rs;
use std::io::BufReader;
use lazy_static::lazy_static;
use serde_json as json;
use std::borrow::Cow;
use std::io::BufReader;
use tantivy::tokenizer::{Token, TokenStream, Tokenizer};
use crate::error::{Result, TantivyBindingError};
@ -52,7 +52,9 @@ impl TokenStream for JiebaTokenStream {
}
}
fn get_jieba_dict(params: &json::Map<String, json::Value>) -> Result<(Vec<String>, Option<String>)> {
fn get_jieba_dict(
params: &json::Map<String, json::Value>,
) -> Result<(Vec<String>, Option<String>)> {
match params.get("dict") {
Some(value) => {
if !value.is_array() {
@ -77,15 +79,13 @@ fn get_jieba_dict(params: &json::Map<String, json::Value>) -> Result<(Vec<String
)));
}
system_dict = Some(text)
} else{
} else {
dict.push(text);
}
}
Ok((dict, system_dict))
}
_ => {
Ok((vec![], Some("_default_".to_string())))
}
_ => Ok((vec![], Some("_default_".to_string()))),
}
}
@ -138,21 +138,23 @@ impl<'a> JiebaTokenizer<'a> {
pub fn from_json(params: &json::Map<String, json::Value>) -> Result<JiebaTokenizer<'a>> {
let (dict, system_dict) = get_jieba_dict(params)?;
let mut tokenizer = system_dict.map_or(Ok(jieba_rs::Jieba::empty()), |name| {
match name.as_str() {
let mut tokenizer =
system_dict.map_or(Ok(jieba_rs::Jieba::empty()), |name| match name.as_str() {
"_default_" => Ok(jieba_rs::Jieba::new()),
"_extend_default_" => {
let mut buf = BufReader::new(EXTEND_DEFAULT_DICT.as_bytes());
jieba_rs::Jieba::with_dict(&mut buf).map_err(|e|
TantivyBindingError::InternalError(format!("failed to load extend default system dict: {}", e))
)
},
jieba_rs::Jieba::with_dict(&mut buf).map_err(|e| {
TantivyBindingError::InternalError(format!(
"failed to load extend default system dict: {}",
e
))
})
}
_ => Err(TantivyBindingError::InternalError(format!(
"invalid system dict name: {}",
name
)))
}
})?;
))),
})?;
for word in dict {
tokenizer.add_word(word.as_str(), None, None);

View File

@ -5,7 +5,7 @@ use std::collections::HashMap;
use tantivy::tokenizer::{BoxTokenStream, TextAnalyzer, Tokenizer};
use whatlang::detect;
pub trait Identifier : Sync + Send{
pub trait Identifier: Sync + Send {
fn detect(&self, text: &str) -> String;
fn box_clone(&self) -> Box<dyn Identifier>;
}
@ -246,7 +246,6 @@ impl<'a> LangIdentTokenizer<'a> {
}
}
impl Tokenizer for LangIdentTokenizer<'static> {
type TokenStream<'a> = BoxTokenStream<'a>;

View File

@ -1,5 +1,5 @@
pub const ENGLISH: &[&str] = &[
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
"into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
"their", "then", "there", "these", "they", "this", "to", "was", "will", "with",
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with",
];

View File

@ -10,17 +10,17 @@ use crate::{
};
#[repr(C)]
pub struct TantivyToken{
pub struct TantivyToken {
pub token: *const c_char,
pub start_offset: i64,
pub end_offset:i64,
pub position:i64,
pub position_length:i64,
pub end_offset: i64,
pub position: i64,
pub position_length: i64,
}
impl TantivyToken{
pub fn from_token(token: &Token) -> Self{
TantivyToken{
impl TantivyToken {
pub fn from_token(token: &Token) -> Self {
TantivyToken {
token: create_string(&token.text),
start_offset: token.offset_from as i64,
end_offset: token.offset_to as i64,
@ -30,7 +30,6 @@ impl TantivyToken{
}
}
// Note: the tokenizer and text must be released after the token_stream.
#[no_mangle]
pub extern "C" fn tantivy_create_token_stream(
@ -62,7 +61,9 @@ pub extern "C" fn tantivy_token_stream_get_token(token_stream: *mut c_void) -> *
}
#[no_mangle]
pub extern "C" fn tantivy_token_stream_get_detailed_token(token_stream: *mut c_void) -> TantivyToken {
pub extern "C" fn tantivy_token_stream_get_detailed_token(
token_stream: *mut c_void,
) -> TantivyToken {
let real = token_stream as *mut BoxTokenStream<'_>;
TantivyToken::from_token(unsafe { (*real).token()})
TantivyToken::from_token(unsafe { (*real).token() })
}

View File

@ -0,0 +1,6 @@
#!/bin/bash
(
cd $1
cargo fmt
)