fix:修复2.2.0无法使用内置分析仪的问题,并优化注解的使用

This commit is contained in:
xgc 2024-11-29 16:29:16 +08:00
parent 5dc94f3aaf
commit e92cc55ec1
9 changed files with 88 additions and 47 deletions

View File

@ -34,35 +34,55 @@ public class TextEntity {
dataType = DataType.VarChar, dataType = DataType.VarChar,
enableAnalyzer = true, enableAnalyzer = true,
analyzerParams = @AnalyzerParams( analyzerParams = @AnalyzerParams(
builtInFilters = { type= AnalyzerType.CHINESE
@BuiltInFilter
},
customFilters = {
@CustomFilter(type = "length", max = 40),
@CustomFilter(type = "stop", stopWords = {"of", "to"})
}
) )
// analyzerParams = @AnalyzerParams(
// tokenizer= "standard",
// filter=@Filter(
// builtInFilters={
// BuiltInFilterType.lowercase
// },
// customFilters = {
// @CustomFilter(
// type = "length",
// max = 40
// ),
// @CustomFilter(
// type = "stop",
// stopWords = {"of","to"}
// )
// }
// )
// )
) )
private String text; private String text;
} }
``` ```
非专业人员不要设置 analyzerParams只需设置 enableAnalyzer = true即可。 非专业人员不要设置 analyzerParams只需设置 enableAnalyzer = true即可。
## 分析仪的使用
- 使用type指定内置分析仪
- 自定义分析仪,需配置分词器和过滤器
### 默认分析仪Type
- **默认分析仪**:可以根据 AnalyzerType 接口中的常量来设置具体的分词器类型
### 分词器Tokenizer ### 分词器Tokenizer
- **默认分词器**`standard` 分词器,基于语法规则将文本拆分为离散的单词单元。 - **默认分词器**`standard` 分词器,基于语法规则将文本拆分为离散的单词单元。
- **注解属性**`tokenizer`,其默认值为 `TokenizerType.standard` - **注解属性**`AnalyzerParams` 注解中使用 `tokenizer` 属性来配置分词器。如果未指定,则默认为空字符串
### 过滤器Filter ### 过滤器Filter
- **默认过滤器**`lowercase` 过滤器,将所有标记转换为小写,以支持不区分大小写的搜索。 - **默认过滤器**`lowercase` 过滤器,将所有标记转换为小写,以支持不区分大小写的搜索。
- **注解属性**`builtInFilters``customFilters`,分别用于配置内置过滤器和自定义过滤器。 - **注解属性**`Filter` 注解中使用 `builtInFilters``customFilters` 属性来配置内置过滤器和自定义过滤器。`builtInFilters` 属性接受 `BuiltInFilterType` 枚举值的数组,而 `customFilters` 属性接受 `CustomFilter` 注解的数组
### 自定义停用词StopWords ### 自定义停用词StopWords
- **可选参数**`stop_words`,用于指定要从分词结果中排除的停用词列表。 - **可选参数**`stop_words`,用于指定要从分词结果中排除的停用词列表。
- **注解属性**`customFilters` 中的 `stopWords` 属性,允许定义自定义停用词。 - **注解属性**`CustomFilter` 注解中使用 `stopWords` 属性来定义自定义停用词。

View File

@ -1,7 +1,5 @@
package org.dromara.milvus.plus.annotation; package org.dromara.milvus.plus.annotation;
import org.dromara.milvus.plus.model.TokenizerType;
import java.lang.annotation.ElementType; import java.lang.annotation.ElementType;
import java.lang.annotation.Retention; import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy; import java.lang.annotation.RetentionPolicy;
@ -13,9 +11,8 @@ import java.lang.annotation.Target;
@Target(ElementType.ANNOTATION_TYPE) @Target(ElementType.ANNOTATION_TYPE)
@Retention(RetentionPolicy.RUNTIME) @Retention(RetentionPolicy.RUNTIME)
public @interface AnalyzerParams { public @interface AnalyzerParams {
String type() default ""; // 分析器类型
TokenizerType tokenizer() default TokenizerType.standard; // 分词器配置 String tokenizer() default ""; // 自定义分词器配置
BuiltInFilter[] builtInFilters() default {}; //内置过滤器 Filter filter() default @Filter; //过滤器
CustomFilter[] customFilters() default {}; //自定义过滤器
} }

View File

@ -12,6 +12,10 @@ import java.lang.annotation.Target;
*/ */
@Target(ElementType.ANNOTATION_TYPE) @Target(ElementType.ANNOTATION_TYPE)
@Retention(RetentionPolicy.RUNTIME) @Retention(RetentionPolicy.RUNTIME)
public @interface BuiltInFilter { public @interface Filter {
BuiltInFilterType name() default BuiltInFilterType.lowercase;
BuiltInFilterType[] builtInFilters() default {};; //内置过滤器
CustomFilter[] customFilters() default {}; //自定义过滤器
} }

View File

@ -1,6 +1,7 @@
package org.dromara.milvus.plus.annotation; package org.dromara.milvus.plus.annotation;
import io.milvus.v2.common.DataType; import io.milvus.v2.common.DataType;
import org.dromara.milvus.plus.model.AnalyzerType;
import java.lang.annotation.ElementType; import java.lang.annotation.ElementType;
import java.lang.annotation.Retention; import java.lang.annotation.Retention;
@ -82,6 +83,7 @@ public @interface MilvusField {
/** /**
* 分析器参数 * 分析器参数
*/ */
AnalyzerParams analyzerParams() default @AnalyzerParams; AnalyzerParams analyzerParams() default @AnalyzerParams(type =AnalyzerType.STANDARD);
} }

View File

@ -0,0 +1,8 @@
package org.dromara.milvus.plus.model;
public interface AnalyzerType {
String STANDARD="standard";
String ENGLISH="english";
String CHINESE="chinese";
}

View File

@ -1,8 +0,0 @@
package org.dromara.milvus.plus.model;
/**
* 枚举表示内置的分词器类型
*/
public enum TokenizerType {
standard, whitespace, english, chinese
}

View File

@ -1,9 +1,12 @@
package org.dromara.milvus.plus.util; package org.dromara.milvus.plus.util;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.apache.commons.lang3.StringUtils;
import org.dromara.milvus.plus.annotation.AnalyzerParams; import org.dromara.milvus.plus.annotation.AnalyzerParams;
import org.dromara.milvus.plus.annotation.BuiltInFilter;
import org.dromara.milvus.plus.annotation.CustomFilter; import org.dromara.milvus.plus.annotation.CustomFilter;
import org.dromara.milvus.plus.annotation.Filter;
import org.dromara.milvus.plus.model.BuiltInFilterType;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -15,31 +18,46 @@ public class AnalyzerParamsUtils {
public static Map<String, Object> convertToMap(AnalyzerParams analyzerParams) { public static Map<String, Object> convertToMap(AnalyzerParams analyzerParams) {
Map<String, Object> paramsMap = new HashMap<>(); Map<String, Object> paramsMap = new HashMap<>();
if (analyzerParams != null) { if (analyzerParams != null) {
// 设置分词器 String type = analyzerParams.type();
paramsMap.put("tokenizer", analyzerParams.tokenizer().name().toLowerCase()); if(StringUtils.isNotEmpty(type)){
// 处理内置过滤器 //使用默认分析器
List<String> builtInFiltersList = new ArrayList<>(); paramsMap.put("type", type);
for (BuiltInFilter builtInFilter : analyzerParams.builtInFilters()) {
builtInFiltersList.add(builtInFilter.name().name());
} }
// 处理自定义过滤器 String tokenizer = analyzerParams.tokenizer();
if(StringUtils.isNotEmpty(tokenizer)){
// 设置分词器
paramsMap.put("tokenizer",tokenizer);
}
Filter filter = analyzerParams.filter();
List<String> builtInFiltersList = new ArrayList<>();
List<Map<String, Object>> customFiltersList = new ArrayList<>(); List<Map<String, Object>> customFiltersList = new ArrayList<>();
for (CustomFilter customFilter : analyzerParams.customFilters()) { if(filter!=null){
Map<String, Object> filterMap = new HashMap<>(); CustomFilter[] customFilters = filter.customFilters();
filterMap.put("type", customFilter.type()); BuiltInFilterType[] builtInFilterTypes = filter.builtInFilters();
if (customFilter.max() > 0) { // 处理内置过滤器
filterMap.put("max", customFilter.max()); for (BuiltInFilterType builtInFilterType : builtInFilterTypes) {
builtInFiltersList.add(builtInFilterType.name());
} }
if (customFilter.stopWords().length > 0) { //处理自定义过滤器
filterMap.put("stopWords", new ArrayList<>(Lists.newArrayList(customFilter.stopWords()))); for (CustomFilter customFilter : customFilters) {
Map<String, Object> filterMap = new HashMap<>();
filterMap.put("type", customFilter.type());
if (customFilter.max() > 0) {
filterMap.put("max", customFilter.max());
}
if (customFilter.stopWords().length > 0) {
filterMap.put("stop_words", new ArrayList<>(Lists.newArrayList(customFilter.stopWords())));
}
customFiltersList.add(filterMap);
} }
customFiltersList.add(filterMap);
} }
// 合并过滤器列表 // 合并过滤器列表
List<Object> filters = new ArrayList<>(); List<Object> filters = new ArrayList<>();
filters.addAll(builtInFiltersList); filters.addAll(builtInFiltersList);
filters.addAll(customFiltersList); filters.addAll(customFiltersList);
paramsMap.put("filter", filters); if(!CollectionUtils.isEmpty(filters)){
paramsMap.put("filter", filters);
}
} }
return paramsMap; return paramsMap;
} }

View File

@ -30,7 +30,7 @@
</developer> </developer>
</developers> </developers>
<properties> <properties>
<revision>2.2.0</revision> <revision>2.2.1</revision>
<maven.compiler.source>${java.version}</maven.compiler.source> <maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target> <maven.compiler.target>${java.version}</maven.compiler.target>
<maven-compiler.version>3.11.0</maven-compiler.version> <maven-compiler.version>3.11.0</maven-compiler.version>

View File

@ -21,7 +21,7 @@
<dependency> <dependency>
<groupId>org.dromara.milvus-plus</groupId> <groupId>org.dromara.milvus-plus</groupId>
<artifactId>milvus-plus-boot-starter</artifactId> <artifactId>milvus-plus-boot-starter</artifactId>
<version>2.2.0</version> <version>2.2.1</version>
</dependency> </dependency>
</dependencies> </dependencies>
<dependencyManagement> <dependencyManagement>