mirror of
https://gitee.com/dromara/MilvusPlus.git
synced 2025-12-06 08:58:26 +08:00
fix:修复2.2.0无法使用内置分析仪的问题,并优化注解的使用
This commit is contained in:
parent
5dc94f3aaf
commit
e92cc55ec1
@ -34,35 +34,55 @@ public class TextEntity {
|
|||||||
dataType = DataType.VarChar,
|
dataType = DataType.VarChar,
|
||||||
enableAnalyzer = true,
|
enableAnalyzer = true,
|
||||||
analyzerParams = @AnalyzerParams(
|
analyzerParams = @AnalyzerParams(
|
||||||
builtInFilters = {
|
type= AnalyzerType.CHINESE
|
||||||
@BuiltInFilter
|
|
||||||
},
|
|
||||||
customFilters = {
|
|
||||||
@CustomFilter(type = "length", max = 40),
|
|
||||||
@CustomFilter(type = "stop", stopWords = {"of", "to"})
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
// analyzerParams = @AnalyzerParams(
|
||||||
|
// tokenizer= "standard",
|
||||||
|
// filter=@Filter(
|
||||||
|
// builtInFilters={
|
||||||
|
// BuiltInFilterType.lowercase
|
||||||
|
// },
|
||||||
|
// customFilters = {
|
||||||
|
// @CustomFilter(
|
||||||
|
// type = "length",
|
||||||
|
// max = 40
|
||||||
|
// ),
|
||||||
|
// @CustomFilter(
|
||||||
|
// type = "stop",
|
||||||
|
// stopWords = {"of","to"}
|
||||||
|
// )
|
||||||
|
// }
|
||||||
|
// )
|
||||||
|
// )
|
||||||
)
|
)
|
||||||
private String text;
|
private String text;
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
非专业人员不要设置 analyzerParams,只需设置 enableAnalyzer = true即可。
|
非专业人员不要设置 analyzerParams,只需设置 enableAnalyzer = true即可。
|
||||||
|
|
||||||
|
## 分析仪的使用
|
||||||
|
|
||||||
|
- 使用type指定内置分析仪
|
||||||
|
- 自定义分析仪,需配置分词器和过滤器
|
||||||
|
|
||||||
|
### 默认分析仪(Type)
|
||||||
|
|
||||||
|
- **默认分析仪**:可以根据 AnalyzerType 接口中的常量来设置具体的分词器类型
|
||||||
|
|
||||||
### 分词器(Tokenizer)
|
### 分词器(Tokenizer)
|
||||||
|
|
||||||
- **默认分词器**:`standard` 分词器,基于语法规则将文本拆分为离散的单词单元。
|
- **默认分词器**:`standard` 分词器,基于语法规则将文本拆分为离散的单词单元。
|
||||||
- **注解属性**:`tokenizer`,其默认值为 `TokenizerType.standard`。
|
- **注解属性**:在 `AnalyzerParams` 注解中使用 `tokenizer` 属性来配置分词器。如果未指定,则默认为空字符串
|
||||||
|
|
||||||
### 过滤器(Filter)
|
### 过滤器(Filter)
|
||||||
|
|
||||||
- **默认过滤器**:`lowercase` 过滤器,将所有标记转换为小写,以支持不区分大小写的搜索。
|
- **默认过滤器**:`lowercase` 过滤器,将所有标记转换为小写,以支持不区分大小写的搜索。
|
||||||
- **注解属性**:`builtInFilters` 和 `customFilters`,分别用于配置内置过滤器和自定义过滤器。
|
- **注解属性**:在 `Filter` 注解中使用 `builtInFilters` 和 `customFilters` 属性来配置内置过滤器和自定义过滤器。`builtInFilters` 属性接受 `BuiltInFilterType` 枚举值的数组,而 `customFilters` 属性接受 `CustomFilter` 注解的数组。
|
||||||
|
|
||||||
### 自定义停用词(StopWords)
|
### 自定义停用词(StopWords)
|
||||||
|
|
||||||
- **可选参数**:`stop_words`,用于指定要从分词结果中排除的停用词列表。
|
- **可选参数**:`stop_words`,用于指定要从分词结果中排除的停用词列表。
|
||||||
- **注解属性**:`customFilters` 中的 `stopWords` 属性,允许定义自定义停用词。
|
- **注解属性**:在 `CustomFilter` 注解中使用 `stopWords` 属性来定义自定义停用词。
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
package org.dromara.milvus.plus.annotation;
|
package org.dromara.milvus.plus.annotation;
|
||||||
|
|
||||||
import org.dromara.milvus.plus.model.TokenizerType;
|
|
||||||
|
|
||||||
import java.lang.annotation.ElementType;
|
import java.lang.annotation.ElementType;
|
||||||
import java.lang.annotation.Retention;
|
import java.lang.annotation.Retention;
|
||||||
import java.lang.annotation.RetentionPolicy;
|
import java.lang.annotation.RetentionPolicy;
|
||||||
@ -13,9 +11,8 @@ import java.lang.annotation.Target;
|
|||||||
@Target(ElementType.ANNOTATION_TYPE)
|
@Target(ElementType.ANNOTATION_TYPE)
|
||||||
@Retention(RetentionPolicy.RUNTIME)
|
@Retention(RetentionPolicy.RUNTIME)
|
||||||
public @interface AnalyzerParams {
|
public @interface AnalyzerParams {
|
||||||
|
String type() default ""; // 分析器类型
|
||||||
TokenizerType tokenizer() default TokenizerType.standard; // 分词器配置
|
String tokenizer() default ""; // 自定义分词器配置
|
||||||
BuiltInFilter[] builtInFilters() default {}; //内置过滤器
|
Filter filter() default @Filter; //过滤器
|
||||||
CustomFilter[] customFilters() default {}; //自定义过滤器
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -12,6 +12,10 @@ import java.lang.annotation.Target;
|
|||||||
*/
|
*/
|
||||||
@Target(ElementType.ANNOTATION_TYPE)
|
@Target(ElementType.ANNOTATION_TYPE)
|
||||||
@Retention(RetentionPolicy.RUNTIME)
|
@Retention(RetentionPolicy.RUNTIME)
|
||||||
public @interface BuiltInFilter {
|
public @interface Filter {
|
||||||
BuiltInFilterType name() default BuiltInFilterType.lowercase;
|
|
||||||
|
BuiltInFilterType[] builtInFilters() default {};; //内置过滤器
|
||||||
|
|
||||||
|
CustomFilter[] customFilters() default {}; //自定义过滤器
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1,6 +1,7 @@
|
|||||||
package org.dromara.milvus.plus.annotation;
|
package org.dromara.milvus.plus.annotation;
|
||||||
|
|
||||||
import io.milvus.v2.common.DataType;
|
import io.milvus.v2.common.DataType;
|
||||||
|
import org.dromara.milvus.plus.model.AnalyzerType;
|
||||||
|
|
||||||
import java.lang.annotation.ElementType;
|
import java.lang.annotation.ElementType;
|
||||||
import java.lang.annotation.Retention;
|
import java.lang.annotation.Retention;
|
||||||
@ -82,6 +83,7 @@ public @interface MilvusField {
|
|||||||
/**
|
/**
|
||||||
* 分析器参数。
|
* 分析器参数。
|
||||||
*/
|
*/
|
||||||
AnalyzerParams analyzerParams() default @AnalyzerParams;
|
AnalyzerParams analyzerParams() default @AnalyzerParams(type =AnalyzerType.STANDARD);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package org.dromara.milvus.plus.model;
|
||||||
|
|
||||||
|
public interface AnalyzerType {
|
||||||
|
String STANDARD="standard";
|
||||||
|
String ENGLISH="english";
|
||||||
|
String CHINESE="chinese";
|
||||||
|
|
||||||
|
}
|
||||||
@ -1,8 +0,0 @@
|
|||||||
package org.dromara.milvus.plus.model;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 枚举表示内置的分词器类型。
|
|
||||||
*/
|
|
||||||
public enum TokenizerType {
|
|
||||||
standard, whitespace, english, chinese
|
|
||||||
}
|
|
||||||
@ -1,9 +1,12 @@
|
|||||||
package org.dromara.milvus.plus.util;
|
package org.dromara.milvus.plus.util;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dromara.milvus.plus.annotation.AnalyzerParams;
|
import org.dromara.milvus.plus.annotation.AnalyzerParams;
|
||||||
import org.dromara.milvus.plus.annotation.BuiltInFilter;
|
|
||||||
import org.dromara.milvus.plus.annotation.CustomFilter;
|
import org.dromara.milvus.plus.annotation.CustomFilter;
|
||||||
|
import org.dromara.milvus.plus.annotation.Filter;
|
||||||
|
import org.dromara.milvus.plus.model.BuiltInFilterType;
|
||||||
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -15,31 +18,46 @@ public class AnalyzerParamsUtils {
|
|||||||
public static Map<String, Object> convertToMap(AnalyzerParams analyzerParams) {
|
public static Map<String, Object> convertToMap(AnalyzerParams analyzerParams) {
|
||||||
Map<String, Object> paramsMap = new HashMap<>();
|
Map<String, Object> paramsMap = new HashMap<>();
|
||||||
if (analyzerParams != null) {
|
if (analyzerParams != null) {
|
||||||
// 设置分词器
|
String type = analyzerParams.type();
|
||||||
paramsMap.put("tokenizer", analyzerParams.tokenizer().name().toLowerCase());
|
if(StringUtils.isNotEmpty(type)){
|
||||||
// 处理内置过滤器
|
//使用默认分析器
|
||||||
List<String> builtInFiltersList = new ArrayList<>();
|
paramsMap.put("type", type);
|
||||||
for (BuiltInFilter builtInFilter : analyzerParams.builtInFilters()) {
|
|
||||||
builtInFiltersList.add(builtInFilter.name().name());
|
|
||||||
}
|
}
|
||||||
// 处理自定义过滤器
|
String tokenizer = analyzerParams.tokenizer();
|
||||||
|
if(StringUtils.isNotEmpty(tokenizer)){
|
||||||
|
// 设置分词器
|
||||||
|
paramsMap.put("tokenizer",tokenizer);
|
||||||
|
}
|
||||||
|
Filter filter = analyzerParams.filter();
|
||||||
|
List<String> builtInFiltersList = new ArrayList<>();
|
||||||
List<Map<String, Object>> customFiltersList = new ArrayList<>();
|
List<Map<String, Object>> customFiltersList = new ArrayList<>();
|
||||||
for (CustomFilter customFilter : analyzerParams.customFilters()) {
|
if(filter!=null){
|
||||||
Map<String, Object> filterMap = new HashMap<>();
|
CustomFilter[] customFilters = filter.customFilters();
|
||||||
filterMap.put("type", customFilter.type());
|
BuiltInFilterType[] builtInFilterTypes = filter.builtInFilters();
|
||||||
if (customFilter.max() > 0) {
|
// 处理内置过滤器
|
||||||
filterMap.put("max", customFilter.max());
|
for (BuiltInFilterType builtInFilterType : builtInFilterTypes) {
|
||||||
|
builtInFiltersList.add(builtInFilterType.name());
|
||||||
}
|
}
|
||||||
if (customFilter.stopWords().length > 0) {
|
//处理自定义过滤器
|
||||||
filterMap.put("stopWords", new ArrayList<>(Lists.newArrayList(customFilter.stopWords())));
|
for (CustomFilter customFilter : customFilters) {
|
||||||
|
Map<String, Object> filterMap = new HashMap<>();
|
||||||
|
filterMap.put("type", customFilter.type());
|
||||||
|
if (customFilter.max() > 0) {
|
||||||
|
filterMap.put("max", customFilter.max());
|
||||||
|
}
|
||||||
|
if (customFilter.stopWords().length > 0) {
|
||||||
|
filterMap.put("stop_words", new ArrayList<>(Lists.newArrayList(customFilter.stopWords())));
|
||||||
|
}
|
||||||
|
customFiltersList.add(filterMap);
|
||||||
}
|
}
|
||||||
customFiltersList.add(filterMap);
|
|
||||||
}
|
}
|
||||||
// 合并过滤器列表
|
// 合并过滤器列表
|
||||||
List<Object> filters = new ArrayList<>();
|
List<Object> filters = new ArrayList<>();
|
||||||
filters.addAll(builtInFiltersList);
|
filters.addAll(builtInFiltersList);
|
||||||
filters.addAll(customFiltersList);
|
filters.addAll(customFiltersList);
|
||||||
paramsMap.put("filter", filters);
|
if(!CollectionUtils.isEmpty(filters)){
|
||||||
|
paramsMap.put("filter", filters);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return paramsMap;
|
return paramsMap;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -30,7 +30,7 @@
|
|||||||
</developer>
|
</developer>
|
||||||
</developers>
|
</developers>
|
||||||
<properties>
|
<properties>
|
||||||
<revision>2.2.0</revision>
|
<revision>2.2.1</revision>
|
||||||
<maven.compiler.source>${java.version}</maven.compiler.source>
|
<maven.compiler.source>${java.version}</maven.compiler.source>
|
||||||
<maven.compiler.target>${java.version}</maven.compiler.target>
|
<maven.compiler.target>${java.version}</maven.compiler.target>
|
||||||
<maven-compiler.version>3.11.0</maven-compiler.version>
|
<maven-compiler.version>3.11.0</maven-compiler.version>
|
||||||
|
|||||||
@ -21,7 +21,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.dromara.milvus-plus</groupId>
|
<groupId>org.dromara.milvus-plus</groupId>
|
||||||
<artifactId>milvus-plus-boot-starter</artifactId>
|
<artifactId>milvus-plus-boot-starter</artifactId>
|
||||||
<version>2.2.0</version>
|
<version>2.2.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user