mirror of
https://gitee.com/milvus-io/milvus.git
synced 2026-01-07 19:31:51 +08:00
enhance: Add and run rust format command in makefile (#42807)
relate: https://github.com/milvus-io/milvus/issues/42806 Signed-off-by: aoiasd <zhicheng.yue@zilliz.com>
This commit is contained in:
parent
6798fdc3b3
commit
43a9f7a79e
6
Makefile
6
Makefile
@ -145,6 +145,10 @@ cppcheck:
|
||||
@#(env bash ${PWD}/scripts/core_build.sh -l)
|
||||
@(env bash ${PWD}/scripts/check_cpp_fmt.sh)
|
||||
|
||||
rustcheck:
|
||||
@echo "Running cargo format"
|
||||
@env bash ${PWD}/scripts/run_cargo_format.sh ${PWD}/internal/core/thirdparty/tantivy/tantivy-binding/
|
||||
|
||||
|
||||
fmt:
|
||||
ifdef GO_DIFF_FILES
|
||||
@ -201,7 +205,7 @@ static-check: getdeps
|
||||
@echo "Start check go_client e2e package"
|
||||
@source $(PWD)/scripts/setenv.sh && cd tests/go_client && GO111MODULE=on GOFLAGS=-buildvcs=false $(INSTALL_PATH)/golangci-lint run --build-tags L0,L1,L2,test --timeout=30m --config $(PWD)/tests/go_client/.golangci.yml
|
||||
|
||||
verifiers: build-cpp getdeps cppcheck fmt static-check
|
||||
verifiers: build-cpp getdeps cppcheck rustcheck fmt static-check
|
||||
|
||||
# Build various components locally.
|
||||
binlog:
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
use tantivy::tokenizer::*;
|
||||
|
||||
use super::tokenizers::*;
|
||||
use super::filter::*;
|
||||
use super::filter::stop_words;
|
||||
use super::filter::*;
|
||||
use super::tokenizers::*;
|
||||
|
||||
// default build-in analyzer
|
||||
pub(crate) fn standard_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
|
||||
@ -37,4 +37,4 @@ pub fn english_analyzer(stop_words: Vec<String>) -> TextAnalyzer {
|
||||
}
|
||||
|
||||
builder.build()
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
mod filter;
|
||||
mod remove_punct_filter;
|
||||
mod regex_filter;
|
||||
mod remove_punct_filter;
|
||||
pub(crate) mod stop_words;
|
||||
mod util;
|
||||
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
use core::{option::Option::Some, result::Result::Ok};
|
||||
use jieba_rs;
|
||||
use std::io::BufReader;
|
||||
use lazy_static::lazy_static;
|
||||
use serde_json as json;
|
||||
use std::borrow::Cow;
|
||||
use std::io::BufReader;
|
||||
use tantivy::tokenizer::{Token, TokenStream, Tokenizer};
|
||||
|
||||
use crate::error::{Result, TantivyBindingError};
|
||||
@ -52,7 +52,9 @@ impl TokenStream for JiebaTokenStream {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_jieba_dict(params: &json::Map<String, json::Value>) -> Result<(Vec<String>, Option<String>)> {
|
||||
fn get_jieba_dict(
|
||||
params: &json::Map<String, json::Value>,
|
||||
) -> Result<(Vec<String>, Option<String>)> {
|
||||
match params.get("dict") {
|
||||
Some(value) => {
|
||||
if !value.is_array() {
|
||||
@ -77,15 +79,13 @@ fn get_jieba_dict(params: &json::Map<String, json::Value>) -> Result<(Vec<String
|
||||
)));
|
||||
}
|
||||
system_dict = Some(text)
|
||||
} else{
|
||||
} else {
|
||||
dict.push(text);
|
||||
}
|
||||
}
|
||||
Ok((dict, system_dict))
|
||||
}
|
||||
_ => {
|
||||
Ok((vec![], Some("_default_".to_string())))
|
||||
}
|
||||
_ => Ok((vec![], Some("_default_".to_string()))),
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,21 +138,23 @@ impl<'a> JiebaTokenizer<'a> {
|
||||
pub fn from_json(params: &json::Map<String, json::Value>) -> Result<JiebaTokenizer<'a>> {
|
||||
let (dict, system_dict) = get_jieba_dict(params)?;
|
||||
|
||||
let mut tokenizer = system_dict.map_or(Ok(jieba_rs::Jieba::empty()), |name| {
|
||||
match name.as_str() {
|
||||
let mut tokenizer =
|
||||
system_dict.map_or(Ok(jieba_rs::Jieba::empty()), |name| match name.as_str() {
|
||||
"_default_" => Ok(jieba_rs::Jieba::new()),
|
||||
"_extend_default_" => {
|
||||
let mut buf = BufReader::new(EXTEND_DEFAULT_DICT.as_bytes());
|
||||
jieba_rs::Jieba::with_dict(&mut buf).map_err(|e|
|
||||
TantivyBindingError::InternalError(format!("failed to load extend default system dict: {}", e))
|
||||
)
|
||||
},
|
||||
jieba_rs::Jieba::with_dict(&mut buf).map_err(|e| {
|
||||
TantivyBindingError::InternalError(format!(
|
||||
"failed to load extend default system dict: {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
}
|
||||
_ => Err(TantivyBindingError::InternalError(format!(
|
||||
"invalid system dict name: {}",
|
||||
name
|
||||
)))
|
||||
}
|
||||
})?;
|
||||
))),
|
||||
})?;
|
||||
|
||||
for word in dict {
|
||||
tokenizer.add_word(word.as_str(), None, None);
|
||||
|
||||
@ -5,7 +5,7 @@ use std::collections::HashMap;
|
||||
use tantivy::tokenizer::{BoxTokenStream, TextAnalyzer, Tokenizer};
|
||||
use whatlang::detect;
|
||||
|
||||
pub trait Identifier : Sync + Send{
|
||||
pub trait Identifier: Sync + Send {
|
||||
fn detect(&self, text: &str) -> String;
|
||||
fn box_clone(&self) -> Box<dyn Identifier>;
|
||||
}
|
||||
@ -246,7 +246,6 @@ impl<'a> LangIdentTokenizer<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Tokenizer for LangIdentTokenizer<'static> {
|
||||
type TokenStream<'a> = BoxTokenStream<'a>;
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
pub const ENGLISH: &[&str] = &[
|
||||
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
|
||||
"into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
|
||||
"their", "then", "there", "these", "they", "this", "to", "was", "will", "with",
|
||||
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it",
|
||||
"no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these",
|
||||
"they", "this", "to", "was", "will", "with",
|
||||
];
|
||||
|
||||
@ -10,17 +10,17 @@ use crate::{
|
||||
};
|
||||
|
||||
#[repr(C)]
|
||||
pub struct TantivyToken{
|
||||
pub struct TantivyToken {
|
||||
pub token: *const c_char,
|
||||
pub start_offset: i64,
|
||||
pub end_offset:i64,
|
||||
pub position:i64,
|
||||
pub position_length:i64,
|
||||
pub end_offset: i64,
|
||||
pub position: i64,
|
||||
pub position_length: i64,
|
||||
}
|
||||
|
||||
impl TantivyToken{
|
||||
pub fn from_token(token: &Token) -> Self{
|
||||
TantivyToken{
|
||||
impl TantivyToken {
|
||||
pub fn from_token(token: &Token) -> Self {
|
||||
TantivyToken {
|
||||
token: create_string(&token.text),
|
||||
start_offset: token.offset_from as i64,
|
||||
end_offset: token.offset_to as i64,
|
||||
@ -30,7 +30,6 @@ impl TantivyToken{
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Note: the tokenizer and text must be released after the token_stream.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_create_token_stream(
|
||||
@ -62,7 +61,9 @@ pub extern "C" fn tantivy_token_stream_get_token(token_stream: *mut c_void) -> *
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn tantivy_token_stream_get_detailed_token(token_stream: *mut c_void) -> TantivyToken {
|
||||
pub extern "C" fn tantivy_token_stream_get_detailed_token(
|
||||
token_stream: *mut c_void,
|
||||
) -> TantivyToken {
|
||||
let real = token_stream as *mut BoxTokenStream<'_>;
|
||||
TantivyToken::from_token(unsafe { (*real).token()})
|
||||
TantivyToken::from_token(unsafe { (*real).token() })
|
||||
}
|
||||
|
||||
6
scripts/run_cargo_format.sh
Normal file
6
scripts/run_cargo_format.sh
Normal file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
(
|
||||
cd $1
|
||||
cargo fmt
|
||||
)
|
||||
Loading…
x
Reference in New Issue
Block a user