feat: add Sparse-BM25文本搜索,Tantivy文本匹配

This commit is contained in:
xgc 2024-11-29 11:25:11 +08:00
parent 6e66ab96ae
commit 996f91b539
25 changed files with 564 additions and 27 deletions

View File

@ -0,0 +1,89 @@
## 文本自动构建向量的搜索方式
与基于语义的密集向量搜索相结合,无需手动生成向量数据,从而简化了基于文本的搜索过程。此功能通过以下工作流程运行:
1. **文本输入**您插入原始文本文档或提供查询文本无需手动Embedding
2. **文本分析**Milvus 使用分析器将输入文本标记为单独的可搜索术语。
3. **函数处理**:内置函数接收标记化术语并将其转换为稀疏向量表示。
4. **集合存储**Milvus 将这些稀疏嵌入存储在集合中,以便高效检索。
5. **BM25 评分**在搜索过程中Milvus 应用 BM25 算法为存储的文档计算分数,并根据与查询文本的相关性对匹配结果进行排名。
<div style="display: inline-block; border: 4px solid #ccc; border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); margin: 10px; padding: 10px;">
<img src="./logo/text_embedding.png" alt="text_embedding" style="border-radius: 10px;" />
</div>
### 示例
在实体类中添加 `AnalyzerParams` 注解:
```java
import org.dromara.milvus.plus.annotation.*;
public class TextEntity {
@MilvusField(
name = "text",
dataType = DataType.VarChar,
enableAnalyzer = true,
analyzerParams = @AnalyzerParams(
builtInFilters = {
@BuiltInFilter
},
customFilters = {
@CustomFilter(type = "length", max = 40),
@CustomFilter(type = "stop", stopWords = {"of", "to"})
}
)
)
private String text;
}
```
非专业人员不要设置 analyzerParams只需设置 enableAnalyzer = true即可。
### 分词器Tokenizer
- **默认分词器**`standard` 分词器,基于语法规则将文本拆分为离散的单词单元。
- **注解属性**`tokenizer`,其默认值为 `TokenizerType.standard`
### 过滤器Filter
- **默认过滤器**`lowercase` 过滤器,将所有标记转换为小写,以支持不区分大小写的搜索。
- **注解属性**`builtInFilters``customFilters`,分别用于配置内置过滤器和自定义过滤器。
### 自定义停用词StopWords
- **可选参数**`stop_words`,用于指定要从分词结果中排除的停用词列表。
- **注解属性**`customFilters` 中的 `stopWords` 属性,允许定义自定义停用词。
### 内部处理
MilvusPlus内部会基于该注解实现以下步骤
- 生成存储文本对应Embedding存储的字段
- 定义一个函数将文本转换为稀疏向量的函数
- 创建该字段的索引
### 使用
```
MilvusResp<List<MilvusResult<Face>>> xx = mapper
.queryWrapper()
.textVector(Face::getText, "whats the focus of information retrieval?")
.topK(2)
.query();
```

46
Tantivy文本匹配.md Normal file
View File

@ -0,0 +1,46 @@
### 文本匹配的搜索方式
Milvus 集成了Tantivy来支持其底层倒排索引和基于术语的文本搜索。对于每个文本条目Milvus 按照以下步骤对其进行索引:
- 分析器: 分析器将输入文本标记为单个单词或标记,然后根据需要应用过滤器。这允许 Milvus 根据这些标记构建索引。
- 索引: 在文本分析之后Milvus 会创建一个倒排索引,将每个唯一的标记映射到包含它的文档。
<div style="display: inline-block; border: 4px solid #ccc; border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); margin: 10px; padding: 10px;">
<img src="./logo/text_match.png" alt="text_match" style="border-radius: 10px;" />
</div>
### 示例
请将`enableAnalyzer``enableMatch`参数都设置为True。
这将指示 Milvus 对文本进行标记并为指定字段创建倒排索引,从而实现快速高效的文本匹配。
```java
import org.dromara.milvus.plus.annotation.*;
public class TextEntity {
@MilvusField(
name = "text",
dataType = DataType.VarChar,
enableAnalyzer = true,
enableMatch = true
)
private String text;
}
```
### 使用文本匹配进行搜索
文本匹配可以与向量相似性搜索结合使用,以缩小搜索范围并提高搜索性能。通过在向量相似性搜索之前使用文本匹配过滤集合,可以减少需要搜索的文档数量,从而缩短查询时间。
```java
MilvusResp<List<MilvusResult<Face>>> xx = mapper
.queryWrapper()
.textVector(Face::getText, "whats the focus of information retrieval?")
.textMatch(Face::getText,"retrieval")
.textMatch(Face::getText,"information")
.topK(2)
.query();
```

BIN
logo/text_embedding.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

BIN
logo/text_match.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

View File

@ -1,14 +1,14 @@
package org.dromara.milvus.demo.java;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import io.milvus.v2.client.MilvusClientV2;
import org.dromara.milvus.demo.model.Face;
import org.dromara.milvus.plus.core.mapper.BaseMilvusMapper;
import org.dromara.milvus.plus.model.MilvusProperties;
import org.dromara.milvus.plus.model.vo.MilvusResp;
import org.dromara.milvus.plus.model.vo.MilvusResult;
import org.dromara.milvus.plus.service.impl.MilvusClientBuild;
import io.milvus.v2.client.MilvusClientV2;
import org.dromara.milvus.plus.util.GsonUtil;
import java.util.List;
@ -34,7 +34,7 @@ public class JavaTest {
.partition("face_01")
.topK(3)
.query();
System.out.println("标量查询 query--queryWrapper---{}"+JSONObject.toJSONString(query2));
System.out.println("标量查询 query--queryWrapper---{}"+ GsonUtil.toJson(query2));
build.close();
}
}

View File

@ -28,7 +28,7 @@
<dependency>
<groupId>io.milvus</groupId>
<artifactId>milvus-sdk-java</artifactId>
<version>2.4.4</version>
<version>2.5.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>

View File

@ -0,0 +1,21 @@
package org.dromara.milvus.plus.annotation;
import org.dromara.milvus.plus.model.TokenizerType;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* 表示分析器参数的注解包含分词器和过滤器列表
*/
@Target(ElementType.ANNOTATION_TYPE)
@Retention(RetentionPolicy.RUNTIME)
public @interface AnalyzerParams {
TokenizerType tokenizer() default TokenizerType.standard; // 分词器配置
BuiltInFilter[] builtInFilters() default {}; //内置过滤器
CustomFilter[] customFilters() default {}; //自定义过滤器
}

View File

@ -0,0 +1,17 @@
package org.dromara.milvus.plus.annotation;
import org.dromara.milvus.plus.model.BuiltInFilterType;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* 定义内置过滤器的注解
*/
@Target(ElementType.ANNOTATION_TYPE)
@Retention(RetentionPolicy.RUNTIME)
public @interface BuiltInFilter {
BuiltInFilterType name() default BuiltInFilterType.lowercase;
}

View File

@ -0,0 +1,17 @@
package org.dromara.milvus.plus.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* 定义自定义过滤器的注解
*/
@Target(ElementType.ANNOTATION_TYPE)
@Retention(RetentionPolicy.RUNTIME)
public @interface CustomFilter {
String type() default "";
int max() default 0;
String[] stopWords() default {};
}

View File

@ -67,4 +67,21 @@ public @interface MilvusField {
* 是否为分区键
*/
boolean isPartitionKey() default false;
/**
* 启动分析器
*/
boolean enableAnalyzer() default false;
/**
*
* 启用文本匹配
*/
boolean enableMatch() default false;
/**
* 分析器参数
*/
AnalyzerParams analyzerParams() default @AnalyzerParams;
}

View File

@ -7,6 +7,9 @@ import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.index.request.CreateIndexReq;
import java.util.List;
/**
* @author xgc
**/
@ -17,6 +20,8 @@ public class CollectionSchemaBuilder {
private final CreateCollectionReq.CollectionSchema schema;
private ConsistencyLevel consistencyLevel=ConsistencyLevel.BOUNDED;
private Boolean enableDynamicField=false;
private List<CreateCollectionReq.Function> functions;
public CollectionSchemaBuilder(Boolean enableDynamicField,String collectionName, MilvusClientV2 wrapper) {
this.collectionName = collectionName;
@ -40,6 +45,11 @@ public class CollectionSchemaBuilder {
}
return this;
}
public void addFun(List<CreateCollectionReq.Function> functions){
for (CreateCollectionReq.Function function : functions) {
schema.addFunction(function);
}
}
public void addConsistencyLevel(ConsistencyLevel level){
this.consistencyLevel=level;
}

View File

@ -2,10 +2,13 @@ package org.dromara.milvus.plus.converter;
import com.google.common.collect.Lists;
import io.milvus.common.clientenum.FunctionType;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.ConsistencyLevel;
import io.milvus.v2.common.DataType;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.GetLoadStateReq;
import io.milvus.v2.service.collection.request.LoadCollectionReq;
import io.milvus.v2.service.partition.request.CreatePartitionReq;
@ -20,6 +23,8 @@ import org.dromara.milvus.plus.cache.ConversionCache;
import org.dromara.milvus.plus.cache.MilvusCache;
import org.dromara.milvus.plus.cache.PropertyCache;
import org.dromara.milvus.plus.model.MilvusEntity;
import org.dromara.milvus.plus.util.AnalyzerParamsUtils;
import org.dromara.milvus.plus.util.GsonUtil;
import org.springframework.util.CollectionUtils;
import java.lang.reflect.Field;
@ -84,6 +89,7 @@ public class MilvusConverter {
// 用于存储属性与函数映射的缓存
PropertyCache propertyCache = new PropertyCache();
List<Field> fields = getAllFieldsFromClass(entityClass);
List<CreateCollectionReq.Function> functions=new ArrayList<>();
// 遍历实体类的所有字段读取@MilvusField注解信息
for (Field field : fields) {
MilvusField fieldAnnotation = field.getAnnotation(MilvusField.class);
@ -106,8 +112,35 @@ public class MilvusConverter {
.isPrimaryKey(fieldAnnotation.isPrimaryKey())
.isPartitionKey(fieldAnnotation.isPartitionKey())
.elementType(fieldAnnotation.elementType())
.enableAnalyzer(fieldAnnotation.enableAnalyzer())
.enableMatch(fieldAnnotation.enableMatch())
.autoID(false);
autoID=autoID?autoID:fieldAnnotation.autoID();
if(fieldAnnotation.enableAnalyzer()&&fieldAnnotation.dataType()==DataType.VarChar){
Map<String, Object> analyzerParams = AnalyzerParamsUtils.convertToMap(fieldAnnotation.analyzerParams());
log.info("-----------analyzerParams--------- \n"+ GsonUtil.toJson(analyzerParams));
builder.analyzerParams(analyzerParams);
//构建该文本对应的SPARSE_FLOAT_VECTOR向量字段
AddFieldReq sparse = AddFieldReq.builder().fieldName(fieldName + "_sparse").dataType(DataType.SparseFloatVector).build();
milvusFields.add(sparse);
//构建索引
IndexParam sparseIndex = IndexParam.builder()
.indexName(fieldName + "_sparse_index")
.fieldName(fieldName + "_sparse")
.indexType(IndexParam.IndexType.AUTOINDEX)
.metricType(IndexParam.MetricType.BM25)
.build();
indexParams.add(sparseIndex);
//定义一个函数将文本转换为稀疏向量
String funName = fieldName+"_bm25_emb";
CreateCollectionReq.Function fun= CreateCollectionReq.Function.builder().
name(funName).
functionType(FunctionType.BM25).
inputFieldNames(Lists.newArrayList(fieldName)).
outputFieldNames(Lists.newArrayList(fieldName + "_sparse")).build();
functions.add(fun);
}
// 描述
Optional.of(fieldAnnotation.description())
.filter(StringUtils::isNotEmpty).ifPresent(builder::description);
@ -134,6 +167,7 @@ public class MilvusConverter {
// 设置Milvus字段和索引参数
milvus.setMilvusFields(milvusFields);
milvus.setIndexParams(indexParams);
milvus.setFunctions(functions);
// 缓存转换结果和集合信息
ConversionCache conversionCache = new ConversionCache();
conversionCache.setMilvusEntity(milvus);
@ -226,8 +260,10 @@ public class MilvusConverter {
);
schemaBuilder.addField(milvusEntity.getMilvusFields().toArray(new AddFieldReq[0]));
schemaBuilder.addConsistencyLevel(milvusEntity.getConsistencyLevel());
schemaBuilder.addFun(milvusEntity.getFunctions());
log.info("-------create schema---------");
schemaBuilder.createSchema();
log.info("-------create schema fun---------");
schemaBuilder.createIndex(indexParams);
log.info("-------create index---------");
// 创建分区

View File

@ -41,9 +41,11 @@ public class SearchRespConverter {
Map<String, Object> entityMap = new HashMap<>();
for (Map.Entry<String, Object> entry : searchResult.getEntity().entrySet()) {
String key = propertyCache.findKeyByValue(entry.getKey());
if(key!=null){
Object value = entry.getValue();
entityMap.put(key,value);
}
}
// 将转换后的Map转换为Java实体类T
T entity = GsonUtil.convertMapToType(entityMap, entityType);
MilvusResult<T> tMilvusResult = new MilvusResult<>();
@ -111,9 +113,10 @@ public class SearchRespConverter {
// 通过属性缓存转换键名以适应Java实体的字段命名
for (Map.Entry<String, Object> entry : entityMap.entrySet()) {
String key = propertyCache.findKeyByValue(entry.getKey());
if(key!=null){
Object value = entry.getValue();
entityMap2.put(key,value);
}
}
// 使用转换工具将映射后的Map转换为指定类型的实体
T entity = GsonUtil.convertMapToType(entityMap2, entityType);

View File

@ -1,6 +1,7 @@
package org.dromara.milvus.plus.core.conditions;
import org.dromara.milvus.plus.core.FieldFunction;
import org.springframework.util.CollectionUtils;
import java.lang.reflect.Field;
import java.util.ArrayList;
@ -15,6 +16,7 @@ import java.util.stream.Collectors;
public abstract class ConditionBuilder<T> {
protected List<String> filters = new ArrayList<>();
protected List<String> textMatches =new ArrayList<>();
protected Map<String, Object> getPropertiesMap(T t) {
Map<String, Object> propertiesMap = new HashMap<>();
Class<?> clazz = t.getClass();
@ -35,6 +37,34 @@ public abstract class ConditionBuilder<T> {
return propertiesMap; // 返回包含属性名和属性值的Map
}
/**
* 添加 TEXT_MATCH 条件使用 FieldFunction支持多个值
*
* @param fieldName 字段函数
* @param values 要匹配的值列表
* @return 当前条件构建器对象
*/
protected ConditionBuilder<T> textMatch(String fieldName, List<String> values) {
String joinedValues = String.join(" ", values);
String match = "TEXT_MATCH(" + wrapFieldName(fieldName) + ", '" + joinedValues + "')";
textMatches.add(match);
return this;
}
protected ConditionBuilder<T> textMatch(String fieldName, String value) {
String match = "TEXT_MATCH(" + wrapFieldName(fieldName) + ", '" + value + "')";
textMatches.add(match);
return this;
}
protected ConditionBuilder<T> textMatch(FieldFunction<T,?> fieldName, String value) {
textMatch(fieldName.getFieldName(fieldName),value);
return this;
}
protected ConditionBuilder<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
textMatch(fieldName.getFieldName(fieldName),values);
return this;
}
/**
* 添加等于条件
*
@ -354,6 +384,10 @@ public abstract class ConditionBuilder<T> {
* @return 构建好的过滤条件字符串
*/
protected String buildFilters(){
if(!CollectionUtils.isEmpty(textMatches)){
String textMatchFilter = textMatches.stream().collect(Collectors.joining(" and "));
filters.add(textMatchFilter);
}
return filters.stream().collect(Collectors.joining(" && "));
}

View File

@ -578,6 +578,54 @@ public class LambdaDeleteWrapper<T> extends AbstractChainWrapper<T> implements
}
return this;
}
/**
* 添加 TEXT_MATCH 条件使用 FieldFunction支持多个值
*
* @param fieldName 字段函数
* @param values 要匹配的值列表
* @return 当前条件构建器对象
*/
public LambdaDeleteWrapper<T> textMatch(String fieldName, List<String> values) {
super.textMatch(fieldName,values);
return this;
}
public LambdaDeleteWrapper<T> textMatch(String fieldName, String value) {
super.textMatch(fieldName,value);
return this;
}
public LambdaDeleteWrapper<T> textMatch(FieldFunction<T,?> fieldName, String value) {
super.textMatch(fieldName,value);
return this;
}
public LambdaDeleteWrapper<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
super.textMatch(fieldName,values);
return this;
}
public LambdaDeleteWrapper<T> textMatch(boolean condition,String fieldName, List<String> values) {
if(condition){
super.textMatch(fieldName,values);
}
return this;
}
public LambdaDeleteWrapper<T> textMatch(boolean condition,String fieldName, String value) {
if(condition){
super.textMatch(fieldName,value);
}
return this;
}
public LambdaDeleteWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, String value) {
if(condition){
super.textMatch(fieldName,value);
}
return this;
}
public LambdaDeleteWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, List<String> values) {
if(condition){
super.textMatch(fieldName,values);
}
return this;
}
// Logic operations
public LambdaDeleteWrapper<T> and(ConditionBuilder<T> other) {

View File

@ -5,6 +5,7 @@ import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.ConsistencyLevel;
import io.milvus.v2.service.vector.request.*;
import io.milvus.v2.service.vector.request.data.BaseVector;
import io.milvus.v2.service.vector.request.data.EmbeddedText;
import io.milvus.v2.service.vector.request.data.FloatVec;
import io.milvus.v2.service.vector.request.ranker.BaseRanker;
import io.milvus.v2.service.vector.response.GetResp;
@ -677,6 +678,18 @@ public class LambdaQueryWrapper<T> extends AbstractChainWrapper<T> implements Wr
vectors.add(baseVector);
return this;
}
public LambdaQueryWrapper<T> textVector(FieldFunction<T,?> annsField, String vector) {
this.annsField=annsField.getFieldName(annsField)+"_sparse";
BaseVector baseVector = new EmbeddedText(vector);
vectors.add(baseVector);
return this;
}
public LambdaQueryWrapper<T> textVector(String annsField,String vector) {
this.annsField=annsField+"_sparse";
BaseVector baseVector = new EmbeddedText(vector);
vectors.add(baseVector);
return this;
}
public LambdaQueryWrapper<T> vector(BaseVector vector) {
vectors.add(vector);
@ -705,6 +718,55 @@ public class LambdaQueryWrapper<T> extends AbstractChainWrapper<T> implements Wr
this.setTopK(topK);
return this;
}
/**
* 添加 TEXT_MATCH 条件使用 FieldFunction支持多个值
*
* @param fieldName 字段函数
* @param values 要匹配的值列表
* @return 当前条件构建器对象
*/
public LambdaQueryWrapper<T> textMatch(String fieldName, List<String> values) {
super.textMatch(fieldName,values);
return this;
}
public LambdaQueryWrapper<T> textMatch(String fieldName, String value) {
super.textMatch(fieldName,value);
return this;
}
public LambdaQueryWrapper<T> textMatch(FieldFunction<T,?> fieldName, String value) {
super.textMatch(fieldName,value);
return this;
}
public LambdaQueryWrapper<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
super.textMatch(fieldName,values);
return this;
}
public LambdaQueryWrapper<T> textMatch(boolean condition,String fieldName, List<String> values) {
if(condition){
super.textMatch(fieldName,values);
}
return this;
}
public LambdaQueryWrapper<T> textMatch(boolean condition,String fieldName, String value) {
if(condition){
super.textMatch(fieldName,value);
}
return this;
}
public LambdaQueryWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, String value) {
if(condition){
super.textMatch(fieldName,value);
}
return this;
}
public LambdaQueryWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, List<String> values) {
if(condition){
super.textMatch(fieldName,values);
}
return this;
}
/**
* 构建完整的搜索请求
* @return 搜索请求对象

View File

@ -583,6 +583,54 @@ public class LambdaUpdateWrapper<T> extends AbstractChainWrapper<T> implements W
}
return this;
}
/**
* 添加 TEXT_MATCH 条件使用 FieldFunction支持多个值
*
* @param fieldName 字段函数
* @param values 要匹配的值列表
* @return 当前条件构建器对象
*/
public LambdaUpdateWrapper<T> textMatch(String fieldName, List<String> values) {
super.textMatch(fieldName,values);
return this;
}
public LambdaUpdateWrapper<T> textMatch(String fieldName, String value) {
super.textMatch(fieldName,value);
return this;
}
public LambdaUpdateWrapper<T> textMatch(FieldFunction<T,?> fieldName, String value) {
super.textMatch(fieldName,value);
return this;
}
public LambdaUpdateWrapper<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
super.textMatch(fieldName,values);
return this;
}
public LambdaUpdateWrapper<T> textMatch(boolean condition,String fieldName, List<String> values) {
if(condition){
super.textMatch(fieldName,values);
}
return this;
}
public LambdaUpdateWrapper<T> textMatch(boolean condition,String fieldName, String value) {
if(condition){
super.textMatch(fieldName,value);
}
return this;
}
public LambdaUpdateWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, String value) {
if(condition){
super.textMatch(fieldName,value);
}
return this;
}
public LambdaUpdateWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, List<String> values) {
if(condition){
super.textMatch(fieldName,values);
}
return this;
}
// Logic operations
public LambdaUpdateWrapper<T> and(ConditionBuilder<T> other) {

View File

@ -0,0 +1,8 @@
package org.dromara.milvus.plus.model;
/**
* 枚举表示内置的过滤器类型
*/
public enum BuiltInFilterType {
lowercase, asciifolding, alphanumonly, cnalphanumonly, cncharonly, stop, length, stemmer
}

View File

@ -3,6 +3,7 @@ package org.dromara.milvus.plus.model;
import io.milvus.v2.common.ConsistencyLevel;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.AddFieldReq;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import lombok.Data;
import java.util.List;
@ -19,4 +20,5 @@ public class MilvusEntity {
private List<String> partitionName;
private ConsistencyLevel consistencyLevel;
private Boolean enableDynamicField;
private List<CreateCollectionReq.Function> functions;
}

View File

@ -0,0 +1,8 @@
package org.dromara.milvus.plus.model;
/**
* 枚举表示内置的分词器类型
*/
public enum TokenizerType {
standard, whitespace, english, chinese
}

View File

@ -0,0 +1,47 @@
package org.dromara.milvus.plus.util;
import com.google.common.collect.Lists;
import org.dromara.milvus.plus.annotation.AnalyzerParams;
import org.dromara.milvus.plus.annotation.BuiltInFilter;
import org.dromara.milvus.plus.annotation.CustomFilter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class AnalyzerParamsUtils {
public static Map<String, Object> convertToMap(AnalyzerParams analyzerParams) {
Map<String, Object> paramsMap = new HashMap<>();
if (analyzerParams != null) {
// 设置分词器
paramsMap.put("tokenizer", analyzerParams.tokenizer().name().toLowerCase());
// 处理内置过滤器
List<String> builtInFiltersList = new ArrayList<>();
for (BuiltInFilter builtInFilter : analyzerParams.builtInFilters()) {
builtInFiltersList.add(builtInFilter.name().name());
}
// 处理自定义过滤器
List<Map<String, Object>> customFiltersList = new ArrayList<>();
for (CustomFilter customFilter : analyzerParams.customFilters()) {
Map<String, Object> filterMap = new HashMap<>();
filterMap.put("type", customFilter.type());
if (customFilter.max() > 0) {
filterMap.put("max", customFilter.max());
}
if (customFilter.stopWords().length > 0) {
filterMap.put("stopWords", new ArrayList<>(Lists.newArrayList(customFilter.stopWords())));
}
customFiltersList.add(filterMap);
}
// 合并过滤器列表
List<Object> filters = new ArrayList<>();
filters.addAll(builtInFiltersList);
filters.addAll(customFiltersList);
paramsMap.put("filter", filters);
}
return paramsMap;
}
}

View File

@ -1,14 +1,14 @@
package org.dromara.solon;
import com.alibaba.fastjson.JSONObject;
import org.dromara.milvus.plus.model.vo.MilvusResp;
import org.dromara.milvus.plus.model.vo.MilvusResult;
import org.dromara.solon.test.model.Face;
import org.dromara.solon.test.test.FaceMilvusMapper;
import io.milvus.v2.service.vector.response.DeleteResp;
import io.milvus.v2.service.vector.response.InsertResp;
import io.milvus.v2.service.vector.response.UpsertResp;
import lombok.extern.slf4j.Slf4j;
import org.dromara.milvus.plus.model.vo.MilvusResp;
import org.dromara.milvus.plus.model.vo.MilvusResult;
import org.dromara.milvus.plus.util.GsonUtil;
import org.dromara.solon.test.model.Face;
import org.dromara.solon.test.test.FaceMilvusMapper;
import org.noear.solon.annotation.Controller;
import org.noear.solon.annotation.Get;
import org.noear.solon.annotation.Inject;
@ -49,35 +49,35 @@ public class DemoController {
face1.setFaceVector(vector1);
faces.add(face1);
}
MilvusResp<InsertResp> insert = mapper.insert(faces.toArray(faces.toArray(new Face[0]))); log.info("insert--{}", JSONObject.toJSONString(insert));
MilvusResp<InsertResp> insert = mapper.insert(faces.toArray(faces.toArray(new Face[0]))); log.info("insert--{}", GsonUtil.toJson(insert));
//id查询
MilvusResp<List<MilvusResult<Face>>> query = mapper.getById(9l);
log.info("query--getById---{}", JSONObject.toJSONString(query));
log.info("query--getById---{}", GsonUtil.toJson(query));
//向量查询
MilvusResp<List<MilvusResult<Face>>> query1 = mapper.queryWrapper()
.vector(Face::getFaceVector, vector)
.ne(Face::getPersonId, 1L)
.topK(3)
.query();
log.info("向量查询 query--queryWrapper---{}", JSONObject.toJSONString(query1));
log.info("向量查询 query--queryWrapper---{}", GsonUtil.toJson(query1));
//标量查询
MilvusResp<List<MilvusResult<Face>>> query2 = mapper.queryWrapper()
.eq(Face::getPersonId, 2L)
.topK(3)
.query();
log.info("标量查询 query--queryWrapper---{}", JSONObject.toJSONString(query2));
log.info("标量查询 query--queryWrapper---{}", GsonUtil.toJson(query2));
//更新
vector.clear();
for (int i = 0; i < 128; i++) {
vector.add((float) (Math.random() * 100)); // 这里仅作为示例使用随机数
}
MilvusResp<UpsertResp> update = mapper.updateById(face);log.info("update--{}", JSONObject.toJSONString(update));
MilvusResp<UpsertResp> update = mapper.updateById(face);log.info("update--{}", GsonUtil.toJson(update));
//id查询
MilvusResp<List<MilvusResult<Face>>> query3 = mapper.getById(1L);log.info("query--getById---{}", JSONObject.toJSONString(query3));
MilvusResp<List<MilvusResult<Face>>> query3 = mapper.getById(1L);log.info("query--getById---{}", GsonUtil.toJson(query3));
//删除
MilvusResp<DeleteResp> remove = mapper.removeById(1L);log.info("remove--{}", JSONObject.toJSONString(remove));
MilvusResp<DeleteResp> remove = mapper.removeById(1L);log.info("remove--{}",GsonUtil.toJson(remove));
//查询
MilvusResp<List<MilvusResult<Face>>> query4 = mapper.getById(1L);log.info("query--{}", JSONObject.toJSONString(query4));
MilvusResp<List<MilvusResult<Face>>> query4 = mapper.getById(1L);log.info("query--{}", GsonUtil.toJson(query4));
}
}

View File

@ -21,7 +21,7 @@
<dependency>
<groupId>org.dromara.milvus-plus</groupId>
<artifactId>milvus-plus-boot-starter</artifactId>
<version>2.1.7</version>
<version>2.1.8</version>
</dependency>
</dependencies>
<dependencyManagement>

View File

@ -37,10 +37,9 @@ public class ApplicationRunnerTest implements ApplicationRunner {
@Override
public void run(ApplicationArguments args) throws InterruptedException {
milvusService.dropCollection("face_collection");
// insertFace();
// selectFace(12);
//// selectFace(11);
// selectFace(11);
// delFace(11);
// Thread.sleep(10000);
// countFace(22);
@ -48,14 +47,30 @@ public class ApplicationRunnerTest implements ApplicationRunner {
// vectorQuery();
// scalarQuery();
// update();
selectTextEmbedding();
}
private void selectTextEmbedding(){
MilvusResp<List<MilvusResult<Face>>> xx = mapper
.queryWrapper()
.textVector(Face::getText, "whats the focus of information retrieval?")
.textMatch(Face::getText,"retrieval")
.topK(2)
.query();
System.out.println("===");
}
private void selectFace(Integer temp){
MilvusResp<List<MilvusResult<Face>>> query = mapper.
queryWrapper()
.eq(Face::getTemp, temp)
.query(Face::getPersonName,Face::getTemp);
log.info("query temp 11--{}", GsonUtil.toJson(query));
LambdaQueryWrapper<Face> mapper = milvusService.ofQuery(Face.class);
MilvusResp<List<MilvusResult<Face>>> test = mapper
.eq(Face::getPersonName, "test")
.topK(1)
.query();
}
private void countFace(Integer temp){
MilvusResp<Long> query = mapper.
@ -69,7 +84,7 @@ public class ApplicationRunnerTest implements ApplicationRunner {
log.info("del temp 11 --{}", GsonUtil.toJson(remove));
}
private void insertFace() {
List<Face> faces = LongStream.range(1, 10)
List<Face> faces = LongStream.range(1, 2)
.mapToObj(i -> {
Face faceTmp = new Face();
// faceTmp.setPersonId(i);
@ -84,6 +99,7 @@ public class ApplicationRunnerTest implements ApplicationRunner {
person.setImages(Lists.newArrayList("https://baidu.com"));
faceTmp.setPerson(person);
faceTmp.setTemp(i%2==0?11:22);
faceTmp.setText(i % 2 == 0 ?"nformation retrieval is a field of study.":"information retrieval focuses on finding relevant information in large datasets.");
return faceTmp;
})
.collect(Collectors.toList());

View File

@ -38,6 +38,14 @@ public class Face {
)
private Integer temp;
@MilvusField(
name = "text",
dataType = DataType.VarChar,
enableAnalyzer = true,
enableMatch = true
)
private String text; // 文本
@MilvusField(
name = "face_vector", // 字段名称
dataType = DataType.FloatVector, // 数据类型为浮点型向量