diff --git a/README.cn.md b/README.cn.md index b471ad5..f5f7c93 100644 --- a/README.cn.md +++ b/README.cn.md @@ -31,7 +31,7 @@ org.dromara.milvus-plus milvus-plus-core - 2.1.8 + 2.2.0 ``` @@ -41,7 +41,7 @@ Spring应用支持: org.dromara.milvus-plus milvus-plus-boot-starter - 2.1.8 + 2.2.0 ``` @@ -51,11 +51,12 @@ Solon应用支持: org.dromara.milvus-plus milvus-plus-solon-plugin - 2.1.8 + 2.2.0 ``` ## 需知 +- 2.2.0版本支持数据库版本2.5.x,增强文本搜索能力 - 2.1.7版本之后groupId修改为 org.dromara.milvus-plus,版本之前为 org.dromara - 2.0.0版本必须使用索引注解定义索引,不然启动报错后,再添加无效,需要先删除集合 - 2.0.0版本暂未发布 MilvusService 功能 diff --git a/README.md b/README.md index 1e9e687..3ed657f 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Custom extension support: org.dromara.milvus-plus milvus-plus-core - 2.1.8 + 2.2.0 ``` @@ -39,7 +39,7 @@ Spring application support: org.dromara.milvus-plus milvus-plus-boot-starter - 2.1.8 + 2.2.0 ``` @@ -49,7 +49,7 @@ Solon application support: org.dromara.milvus-plus milvus-plus-solon-plugin - 2.1.8 + 2.2.0 ``` diff --git a/Sparse-BM25文本搜索.md b/Sparse-BM25文本搜索.md new file mode 100644 index 0000000..6eb2a46 --- /dev/null +++ b/Sparse-BM25文本搜索.md @@ -0,0 +1,89 @@ + + + + + + +## 文本自动构建向量的搜索方式 + + + +与基于语义的密集向量搜索相结合,无需手动生成向量数据,从而简化了基于文本的搜索过程。此功能通过以下工作流程运行: + +1. **文本输入**:您插入原始文本文档或提供查询文本,无需手动Embedding +2. **文本分析**:Milvus 使用分析器将输入文本标记为单独的可搜索术语。 +3. **函数处理**:内置函数接收标记化术语并将其转换为稀疏向量表示。 +4. **集合存储**:Milvus 将这些稀疏嵌入存储在集合中,以便高效检索。 +5. **BM25 评分**:在搜索过程中,Milvus 应用 BM25 算法为存储的文档计算分数,并根据与查询文本的相关性对匹配结果进行排名。 + +
+ text_embedding +
+ +### 示例 + +在实体类中添加 `AnalyzerParams` 注解: + +```java +import org.dromara.milvus.plus.annotation.*; + +public class TextEntity { + + @MilvusField( + name = "text", + dataType = DataType.VarChar, + enableAnalyzer = true, + analyzerParams = @AnalyzerParams( + builtInFilters = { + @BuiltInFilter + }, + customFilters = { + @CustomFilter(type = "length", max = 40), + @CustomFilter(type = "stop", stopWords = {"of", "to"}) + } + ) + ) + private String text; +} +``` + +非专业人员不要设置 analyzerParams,只需设置 enableAnalyzer = true即可。 + +### 分词器(Tokenizer) + +- **默认分词器**:`standard` 分词器,基于语法规则将文本拆分为离散的单词单元。 +- **注解属性**:`tokenizer`,其默认值为 `TokenizerType.standard`。 + +### 过滤器(Filter) + +- **默认过滤器**:`lowercase` 过滤器,将所有标记转换为小写,以支持不区分大小写的搜索。 +- **注解属性**:`builtInFilters` 和 `customFilters`,分别用于配置内置过滤器和自定义过滤器。 + +### 自定义停用词(StopWords) + +- **可选参数**:`stop_words`,用于指定要从分词结果中排除的停用词列表。 +- **注解属性**:`customFilters` 中的 `stopWords` 属性,允许定义自定义停用词。 + + + +### 内部处理 + +MilvusPlus内部会基于该注解,实现以下步骤 + +- 生成存储文本对应Embedding存储的字段 + +- 定义一个函数将文本转换为稀疏向量的函数 + +- 创建该字段的索引 + + + +### 使用 + +``` +MilvusResp>> xx = mapper + .queryWrapper() + .textVector(Face::getText, "whats the focus of information retrieval?") + .topK(2) + .query(); +``` \ No newline at end of file diff --git a/Tantivy文本匹配.md b/Tantivy文本匹配.md new file mode 100644 index 0000000..79c16b1 --- /dev/null +++ b/Tantivy文本匹配.md @@ -0,0 +1,46 @@ +### 文本匹配的搜索方式 + +Milvus 集成了Tantivy来支持其底层倒排索引和基于术语的文本搜索。对于每个文本条目,Milvus 按照以下步骤对其进行索引: + +- 分析器: 分析器将输入文本标记为单个单词或标记,然后根据需要应用过滤器。这允许 Milvus 根据这些标记构建索引。 +- 索引: 在文本分析之后,Milvus 会创建一个倒排索引,将每个唯一的标记映射到包含它的文档。 + + +
+ text_match +
+ +### 示例 +请将`enableAnalyzer`和`enableMatch`参数都设置为True。 +这将指示 Milvus 对文本进行标记并为指定字段创建倒排索引,从而实现快速高效的文本匹配。 + +```java +import org.dromara.milvus.plus.annotation.*; + +public class TextEntity { + + @MilvusField( + name = "text", + dataType = DataType.VarChar, + enableAnalyzer = true, + enableMatch = true + ) + private String text; +} +``` +### 使用文本匹配进行搜索 + +文本匹配可以与向量相似性搜索结合使用,以缩小搜索范围并提高搜索性能。通过在向量相似性搜索之前使用文本匹配过滤集合,可以减少需要搜索的文档数量,从而缩短查询时间。 + +```java + +MilvusResp>> xx = mapper + .queryWrapper() + .textVector(Face::getText, "whats the focus of information retrieval?") + .textMatch(Face::getText,"retrieval") + .textMatch(Face::getText,"information") + .topK(2) + .query(); + +``` + diff --git a/logo/text_embedding.png b/logo/text_embedding.png new file mode 100644 index 0000000..e10ebfb Binary files /dev/null and b/logo/text_embedding.png differ diff --git a/logo/text_match.png b/logo/text_match.png new file mode 100644 index 0000000..08edabe Binary files /dev/null and b/logo/text_match.png differ diff --git a/milvus-core-demo/pom.xml b/milvus-core-demo/pom.xml index 944b236..44dc47e 100644 --- a/milvus-core-demo/pom.xml +++ b/milvus-core-demo/pom.xml @@ -17,7 +17,7 @@ org.dromara.milvus-plus milvus-plus-core - 2.1.7 + 2.2.0 diff --git a/milvus-core-demo/src/main/java/org/dromara/milvus/demo/java/JavaTest.java b/milvus-core-demo/src/main/java/org/dromara/milvus/demo/java/JavaTest.java index d543a04..586db5e 100644 --- a/milvus-core-demo/src/main/java/org/dromara/milvus/demo/java/JavaTest.java +++ b/milvus-core-demo/src/main/java/org/dromara/milvus/demo/java/JavaTest.java @@ -1,14 +1,14 @@ package org.dromara.milvus.demo.java; -import com.alibaba.fastjson.JSONObject; import com.google.common.collect.Lists; +import io.milvus.v2.client.MilvusClientV2; import org.dromara.milvus.demo.model.Face; import org.dromara.milvus.plus.core.mapper.BaseMilvusMapper; import org.dromara.milvus.plus.model.MilvusProperties; import org.dromara.milvus.plus.model.vo.MilvusResp; import org.dromara.milvus.plus.model.vo.MilvusResult; import org.dromara.milvus.plus.service.impl.MilvusClientBuild; -import io.milvus.v2.client.MilvusClientV2; +import org.dromara.milvus.plus.util.GsonUtil; import java.util.List; @@ -34,7 +34,7 @@ public class JavaTest { .partition("face_01") .topK(3) .query(); - System.out.println("标量查询 query--queryWrapper---{}"+JSONObject.toJSONString(query2)); + System.out.println("标量查询 query--queryWrapper---{}"+ GsonUtil.toJson(query2)); build.close(); } } diff --git a/milvus-plus-boot-starter/src/main/java/org/dromara/milvus/plus/service/MilvusInit.java b/milvus-plus-boot-starter/src/main/java/org/dromara/milvus/plus/service/MilvusInit.java index 40452b1..027383a 100644 --- a/milvus-plus-boot-starter/src/main/java/org/dromara/milvus/plus/service/MilvusInit.java +++ b/milvus-plus-boot-starter/src/main/java/org/dromara/milvus/plus/service/MilvusInit.java @@ -54,6 +54,7 @@ public class MilvusInit extends AbstractMilvusClientBuilder implements Initializ " | |\\/| | | \\ \\ / / | | / __| | |_) | | | | / __|\n" + " | | | | | |\\ V /| |_| \\__ \\ | __/| | |_| \\__ \\\n" + " |_| |_|_|_| \\_/ \\__,_|___/ |_| |_|\\__,_|___/\n\n"; + System.out.println(banner); } } \ No newline at end of file diff --git a/milvus-plus-core/pom.xml b/milvus-plus-core/pom.xml index c218f35..7b3eee0 100644 --- a/milvus-plus-core/pom.xml +++ b/milvus-plus-core/pom.xml @@ -28,7 +28,7 @@ io.milvus milvus-sdk-java - 2.4.4 + 2.5.0 org.apache.logging.log4j diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/AnalyzerParams.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/AnalyzerParams.java new file mode 100644 index 0000000..3350411 --- /dev/null +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/AnalyzerParams.java @@ -0,0 +1,21 @@ +package org.dromara.milvus.plus.annotation; + +import org.dromara.milvus.plus.model.TokenizerType; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 表示分析器参数的注解,包含分词器和过滤器列表。 + */ +@Target(ElementType.ANNOTATION_TYPE) +@Retention(RetentionPolicy.RUNTIME) +public @interface AnalyzerParams { + + TokenizerType tokenizer() default TokenizerType.standard; // 分词器配置 + BuiltInFilter[] builtInFilters() default {}; //内置过滤器 + CustomFilter[] customFilters() default {}; //自定义过滤器 + +} \ No newline at end of file diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/BuiltInFilter.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/BuiltInFilter.java new file mode 100644 index 0000000..cdb2f60 --- /dev/null +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/BuiltInFilter.java @@ -0,0 +1,17 @@ +package org.dromara.milvus.plus.annotation; + +import org.dromara.milvus.plus.model.BuiltInFilterType; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 定义内置过滤器的注解。 + */ +@Target(ElementType.ANNOTATION_TYPE) +@Retention(RetentionPolicy.RUNTIME) +public @interface BuiltInFilter { + BuiltInFilterType name() default BuiltInFilterType.lowercase; +} \ No newline at end of file diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/CustomFilter.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/CustomFilter.java new file mode 100644 index 0000000..6aaa63d --- /dev/null +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/CustomFilter.java @@ -0,0 +1,17 @@ +package org.dromara.milvus.plus.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 定义自定义过滤器的注解。 + */ +@Target(ElementType.ANNOTATION_TYPE) +@Retention(RetentionPolicy.RUNTIME) +public @interface CustomFilter { + String type() default ""; + int max() default 0; + String[] stopWords() default {}; +} \ No newline at end of file diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/MilvusField.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/MilvusField.java index bbd4942..ca9dfb4 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/MilvusField.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/MilvusField.java @@ -67,4 +67,21 @@ public @interface MilvusField { * 是否为分区键 */ boolean isPartitionKey() default false; + + /** + * 启动分析器 + */ + boolean enableAnalyzer() default false; + + /** + * + * 启用文本匹配 + */ + boolean enableMatch() default false; + + /** + * 分析器参数。 + */ + AnalyzerParams analyzerParams() default @AnalyzerParams; + } \ No newline at end of file diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/builder/CollectionSchemaBuilder.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/builder/CollectionSchemaBuilder.java index 2c23551..59d7854 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/builder/CollectionSchemaBuilder.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/builder/CollectionSchemaBuilder.java @@ -7,6 +7,9 @@ import io.milvus.v2.common.IndexParam; import io.milvus.v2.service.collection.request.AddFieldReq; import io.milvus.v2.service.collection.request.CreateCollectionReq; import io.milvus.v2.service.index.request.CreateIndexReq; + +import java.util.List; + /** * @author xgc **/ @@ -17,6 +20,8 @@ public class CollectionSchemaBuilder { private final CreateCollectionReq.CollectionSchema schema; private ConsistencyLevel consistencyLevel=ConsistencyLevel.BOUNDED; private Boolean enableDynamicField=false; + private List functions; + public CollectionSchemaBuilder(Boolean enableDynamicField,String collectionName, MilvusClientV2 wrapper) { this.collectionName = collectionName; @@ -40,6 +45,11 @@ public class CollectionSchemaBuilder { } return this; } + public void addFun(List functions){ + for (CreateCollectionReq.Function function : functions) { + schema.addFunction(function); + } + } public void addConsistencyLevel(ConsistencyLevel level){ this.consistencyLevel=level; } diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/MilvusConverter.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/MilvusConverter.java index 7d6c027..c1daf04 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/MilvusConverter.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/MilvusConverter.java @@ -2,10 +2,13 @@ package org.dromara.milvus.plus.converter; import com.google.common.collect.Lists; +import io.milvus.common.clientenum.FunctionType; import io.milvus.v2.client.MilvusClientV2; import io.milvus.v2.common.ConsistencyLevel; +import io.milvus.v2.common.DataType; import io.milvus.v2.common.IndexParam; import io.milvus.v2.service.collection.request.AddFieldReq; +import io.milvus.v2.service.collection.request.CreateCollectionReq; import io.milvus.v2.service.collection.request.GetLoadStateReq; import io.milvus.v2.service.collection.request.LoadCollectionReq; import io.milvus.v2.service.partition.request.CreatePartitionReq; @@ -20,6 +23,8 @@ import org.dromara.milvus.plus.cache.ConversionCache; import org.dromara.milvus.plus.cache.MilvusCache; import org.dromara.milvus.plus.cache.PropertyCache; import org.dromara.milvus.plus.model.MilvusEntity; +import org.dromara.milvus.plus.util.AnalyzerParamsUtils; +import org.dromara.milvus.plus.util.GsonUtil; import org.springframework.util.CollectionUtils; import java.lang.reflect.Field; @@ -84,6 +89,7 @@ public class MilvusConverter { // 用于存储属性与函数映射的缓存 PropertyCache propertyCache = new PropertyCache(); List fields = getAllFieldsFromClass(entityClass); + List functions=new ArrayList<>(); // 遍历实体类的所有字段,读取@MilvusField注解信息 for (Field field : fields) { MilvusField fieldAnnotation = field.getAnnotation(MilvusField.class); @@ -106,8 +112,35 @@ public class MilvusConverter { .isPrimaryKey(fieldAnnotation.isPrimaryKey()) .isPartitionKey(fieldAnnotation.isPartitionKey()) .elementType(fieldAnnotation.elementType()) + .enableAnalyzer(fieldAnnotation.enableAnalyzer()) + .enableMatch(fieldAnnotation.enableMatch()) .autoID(false); autoID=autoID?autoID:fieldAnnotation.autoID(); + + if(fieldAnnotation.enableAnalyzer()&&fieldAnnotation.dataType()==DataType.VarChar){ + Map analyzerParams = AnalyzerParamsUtils.convertToMap(fieldAnnotation.analyzerParams()); + log.info("-----------analyzerParams--------- \n"+ GsonUtil.toJson(analyzerParams)); + builder.analyzerParams(analyzerParams); + //构建该文本对应的SPARSE_FLOAT_VECTOR向量字段 + AddFieldReq sparse = AddFieldReq.builder().fieldName(fieldName + "_sparse").dataType(DataType.SparseFloatVector).build(); + milvusFields.add(sparse); + //构建索引 + IndexParam sparseIndex = IndexParam.builder() + .indexName(fieldName + "_sparse_index") + .fieldName(fieldName + "_sparse") + .indexType(IndexParam.IndexType.AUTOINDEX) + .metricType(IndexParam.MetricType.BM25) + .build(); + indexParams.add(sparseIndex); + //定义一个函数,将文本转换为稀疏向量 + String funName = fieldName+"_bm25_emb"; + CreateCollectionReq.Function fun= CreateCollectionReq.Function.builder(). + name(funName). + functionType(FunctionType.BM25). + inputFieldNames(Lists.newArrayList(fieldName)). + outputFieldNames(Lists.newArrayList(fieldName + "_sparse")).build(); + functions.add(fun); + } // 描述 Optional.of(fieldAnnotation.description()) .filter(StringUtils::isNotEmpty).ifPresent(builder::description); @@ -134,6 +167,7 @@ public class MilvusConverter { // 设置Milvus字段和索引参数 milvus.setMilvusFields(milvusFields); milvus.setIndexParams(indexParams); + milvus.setFunctions(functions); // 缓存转换结果和集合信息 ConversionCache conversionCache = new ConversionCache(); conversionCache.setMilvusEntity(milvus); @@ -226,8 +260,10 @@ public class MilvusConverter { ); schemaBuilder.addField(milvusEntity.getMilvusFields().toArray(new AddFieldReq[0])); schemaBuilder.addConsistencyLevel(milvusEntity.getConsistencyLevel()); + schemaBuilder.addFun(milvusEntity.getFunctions()); log.info("-------create schema---------"); schemaBuilder.createSchema(); + log.info("-------create schema fun---------"); schemaBuilder.createIndex(indexParams); log.info("-------create index---------"); // 创建分区 diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/SearchRespConverter.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/SearchRespConverter.java index 9ffdd4d..dc4fcfc 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/SearchRespConverter.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/SearchRespConverter.java @@ -41,8 +41,10 @@ public class SearchRespConverter { Map entityMap = new HashMap<>(); for (Map.Entry entry : searchResult.getEntity().entrySet()) { String key = propertyCache.findKeyByValue(entry.getKey()); - Object value = entry.getValue(); - entityMap.put(key,value); + if(key!=null){ + Object value = entry.getValue(); + entityMap.put(key,value); + } } // 将转换后的Map转换为Java实体类T T entity = GsonUtil.convertMapToType(entityMap, entityType); @@ -111,9 +113,10 @@ public class SearchRespConverter { // 通过属性缓存转换键名,以适应Java实体的字段命名 for (Map.Entry entry : entityMap.entrySet()) { String key = propertyCache.findKeyByValue(entry.getKey()); - Object value = entry.getValue(); - entityMap2.put(key,value); - + if(key!=null){ + Object value = entry.getValue(); + entityMap2.put(key,value); + } } // 使用转换工具将映射后的Map转换为指定类型的实体 T entity = GsonUtil.convertMapToType(entityMap2, entityType); diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/ConditionBuilder.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/ConditionBuilder.java index 862d4f3..43532fe 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/ConditionBuilder.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/ConditionBuilder.java @@ -1,6 +1,7 @@ package org.dromara.milvus.plus.core.conditions; import org.dromara.milvus.plus.core.FieldFunction; +import org.springframework.util.CollectionUtils; import java.lang.reflect.Field; import java.util.ArrayList; @@ -15,6 +16,7 @@ import java.util.stream.Collectors; public abstract class ConditionBuilder { protected List filters = new ArrayList<>(); + protected List textMatches =new ArrayList<>(); protected Map getPropertiesMap(T t) { Map propertiesMap = new HashMap<>(); Class clazz = t.getClass(); @@ -35,6 +37,34 @@ public abstract class ConditionBuilder { return propertiesMap; // 返回包含属性名和属性值的Map } + + /** + * 添加 TEXT_MATCH 条件,使用 FieldFunction,支持多个值。 + * + * @param fieldName 字段函数 + * @param values 要匹配的值列表 + * @return 当前条件构建器对象 + */ + protected ConditionBuilder textMatch(String fieldName, List values) { + String joinedValues = String.join(" ", values); + String match = "TEXT_MATCH(" + wrapFieldName(fieldName) + ", '" + joinedValues + "')"; + textMatches.add(match); + return this; + } + protected ConditionBuilder textMatch(String fieldName, String value) { + String match = "TEXT_MATCH(" + wrapFieldName(fieldName) + ", '" + value + "')"; + textMatches.add(match); + return this; + } + protected ConditionBuilder textMatch(FieldFunction fieldName, String value) { + textMatch(fieldName.getFieldName(fieldName),value); + return this; + } + protected ConditionBuilder textMatch(FieldFunction fieldName, List values) { + textMatch(fieldName.getFieldName(fieldName),values); + return this; + } + /** * 添加等于条件。 * @@ -354,6 +384,10 @@ public abstract class ConditionBuilder { * @return 构建好的过滤条件字符串 */ protected String buildFilters(){ + if(!CollectionUtils.isEmpty(textMatches)){ + String textMatchFilter = textMatches.stream().collect(Collectors.joining(" and ")); + filters.add(textMatchFilter); + } return filters.stream().collect(Collectors.joining(" && ")); } diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaDeleteWrapper.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaDeleteWrapper.java index a38d771..034937b 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaDeleteWrapper.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaDeleteWrapper.java @@ -578,6 +578,54 @@ public class LambdaDeleteWrapper extends AbstractChainWrapper implements } return this; } + /** + * 添加 TEXT_MATCH 条件,使用 FieldFunction,支持多个值。 + * + * @param fieldName 字段函数 + * @param values 要匹配的值列表 + * @return 当前条件构建器对象 + */ + public LambdaDeleteWrapper textMatch(String fieldName, List values) { + super.textMatch(fieldName,values); + return this; + } + public LambdaDeleteWrapper textMatch(String fieldName, String value) { + super.textMatch(fieldName,value); + return this; + } + public LambdaDeleteWrapper textMatch(FieldFunction fieldName, String value) { + super.textMatch(fieldName,value); + return this; + } + public LambdaDeleteWrapper textMatch(FieldFunction fieldName, List values) { + super.textMatch(fieldName,values); + return this; + } + public LambdaDeleteWrapper textMatch(boolean condition,String fieldName, List values) { + if(condition){ + super.textMatch(fieldName,values); + } + return this; + } + public LambdaDeleteWrapper textMatch(boolean condition,String fieldName, String value) { + if(condition){ + super.textMatch(fieldName,value); + } + return this; + } + public LambdaDeleteWrapper textMatch(boolean condition,FieldFunction fieldName, String value) { + if(condition){ + super.textMatch(fieldName,value); + } + return this; + } + public LambdaDeleteWrapper textMatch(boolean condition,FieldFunction fieldName, List values) { + if(condition){ + super.textMatch(fieldName,values); + } + return this; + } + // Logic operations public LambdaDeleteWrapper and(ConditionBuilder other) { diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaQueryWrapper.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaQueryWrapper.java index c4b2c01..a6fb126 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaQueryWrapper.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaQueryWrapper.java @@ -5,6 +5,7 @@ import io.milvus.v2.client.MilvusClientV2; import io.milvus.v2.common.ConsistencyLevel; import io.milvus.v2.service.vector.request.*; import io.milvus.v2.service.vector.request.data.BaseVector; +import io.milvus.v2.service.vector.request.data.EmbeddedText; import io.milvus.v2.service.vector.request.data.FloatVec; import io.milvus.v2.service.vector.request.ranker.BaseRanker; import io.milvus.v2.service.vector.response.GetResp; @@ -677,6 +678,18 @@ public class LambdaQueryWrapper extends AbstractChainWrapper implements Wr vectors.add(baseVector); return this; } + public LambdaQueryWrapper textVector(FieldFunction annsField, String vector) { + this.annsField=annsField.getFieldName(annsField)+"_sparse"; + BaseVector baseVector = new EmbeddedText(vector); + vectors.add(baseVector); + return this; + } + public LambdaQueryWrapper textVector(String annsField,String vector) { + this.annsField=annsField+"_sparse"; + BaseVector baseVector = new EmbeddedText(vector); + vectors.add(baseVector); + return this; + } public LambdaQueryWrapper vector(BaseVector vector) { vectors.add(vector); @@ -705,6 +718,55 @@ public class LambdaQueryWrapper extends AbstractChainWrapper implements Wr this.setTopK(topK); return this; } + + /** + * 添加 TEXT_MATCH 条件,使用 FieldFunction,支持多个值。 + * + * @param fieldName 字段函数 + * @param values 要匹配的值列表 + * @return 当前条件构建器对象 + */ + public LambdaQueryWrapper textMatch(String fieldName, List values) { + super.textMatch(fieldName,values); + return this; + } + public LambdaQueryWrapper textMatch(String fieldName, String value) { + super.textMatch(fieldName,value); + return this; + } + public LambdaQueryWrapper textMatch(FieldFunction fieldName, String value) { + super.textMatch(fieldName,value); + return this; + } + public LambdaQueryWrapper textMatch(FieldFunction fieldName, List values) { + super.textMatch(fieldName,values); + return this; + } + public LambdaQueryWrapper textMatch(boolean condition,String fieldName, List values) { + if(condition){ + super.textMatch(fieldName,values); + } + return this; + } + public LambdaQueryWrapper textMatch(boolean condition,String fieldName, String value) { + if(condition){ + super.textMatch(fieldName,value); + } + return this; + } + public LambdaQueryWrapper textMatch(boolean condition,FieldFunction fieldName, String value) { + if(condition){ + super.textMatch(fieldName,value); + } + return this; + } + public LambdaQueryWrapper textMatch(boolean condition,FieldFunction fieldName, List values) { + if(condition){ + super.textMatch(fieldName,values); + } + return this; + } + /** * 构建完整的搜索请求 * @return 搜索请求对象 diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaUpdateWrapper.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaUpdateWrapper.java index ae31757..e521a54 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaUpdateWrapper.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaUpdateWrapper.java @@ -583,6 +583,54 @@ public class LambdaUpdateWrapper extends AbstractChainWrapper implements W } return this; } + /** + * 添加 TEXT_MATCH 条件,使用 FieldFunction,支持多个值。 + * + * @param fieldName 字段函数 + * @param values 要匹配的值列表 + * @return 当前条件构建器对象 + */ + public LambdaUpdateWrapper textMatch(String fieldName, List values) { + super.textMatch(fieldName,values); + return this; + } + public LambdaUpdateWrapper textMatch(String fieldName, String value) { + super.textMatch(fieldName,value); + return this; + } + public LambdaUpdateWrapper textMatch(FieldFunction fieldName, String value) { + super.textMatch(fieldName,value); + return this; + } + public LambdaUpdateWrapper textMatch(FieldFunction fieldName, List values) { + super.textMatch(fieldName,values); + return this; + } + public LambdaUpdateWrapper textMatch(boolean condition,String fieldName, List values) { + if(condition){ + super.textMatch(fieldName,values); + } + return this; + } + public LambdaUpdateWrapper textMatch(boolean condition,String fieldName, String value) { + if(condition){ + super.textMatch(fieldName,value); + } + return this; + } + public LambdaUpdateWrapper textMatch(boolean condition,FieldFunction fieldName, String value) { + if(condition){ + super.textMatch(fieldName,value); + } + return this; + } + public LambdaUpdateWrapper textMatch(boolean condition,FieldFunction fieldName, List values) { + if(condition){ + super.textMatch(fieldName,values); + } + return this; + } + // Logic operations public LambdaUpdateWrapper and(ConditionBuilder other) { diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/BuiltInFilterType.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/BuiltInFilterType.java new file mode 100644 index 0000000..edc545a --- /dev/null +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/BuiltInFilterType.java @@ -0,0 +1,8 @@ +package org.dromara.milvus.plus.model; + +/** + * 枚举表示内置的过滤器类型。 + */ +public enum BuiltInFilterType { + lowercase, asciifolding, alphanumonly, cnalphanumonly, cncharonly, stop, length, stemmer +} \ No newline at end of file diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/MilvusEntity.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/MilvusEntity.java index 2ee7ad1..e5cbe7f 100644 --- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/MilvusEntity.java +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/MilvusEntity.java @@ -3,6 +3,7 @@ package org.dromara.milvus.plus.model; import io.milvus.v2.common.ConsistencyLevel; import io.milvus.v2.common.IndexParam; import io.milvus.v2.service.collection.request.AddFieldReq; +import io.milvus.v2.service.collection.request.CreateCollectionReq; import lombok.Data; import java.util.List; @@ -19,4 +20,5 @@ public class MilvusEntity { private List partitionName; private ConsistencyLevel consistencyLevel; private Boolean enableDynamicField; + private List functions; } diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/TokenizerType.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/TokenizerType.java new file mode 100644 index 0000000..61f2e40 --- /dev/null +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/TokenizerType.java @@ -0,0 +1,8 @@ +package org.dromara.milvus.plus.model; + +/** + * 枚举表示内置的分词器类型。 + */ +public enum TokenizerType { + standard, whitespace, english, chinese +} \ No newline at end of file diff --git a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/util/AnalyzerParamsUtils.java b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/util/AnalyzerParamsUtils.java new file mode 100644 index 0000000..7d8bc08 --- /dev/null +++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/util/AnalyzerParamsUtils.java @@ -0,0 +1,47 @@ +package org.dromara.milvus.plus.util; + +import com.google.common.collect.Lists; +import org.dromara.milvus.plus.annotation.AnalyzerParams; +import org.dromara.milvus.plus.annotation.BuiltInFilter; +import org.dromara.milvus.plus.annotation.CustomFilter; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class AnalyzerParamsUtils { + + public static Map convertToMap(AnalyzerParams analyzerParams) { + Map paramsMap = new HashMap<>(); + if (analyzerParams != null) { + // 设置分词器 + paramsMap.put("tokenizer", analyzerParams.tokenizer().name().toLowerCase()); + // 处理内置过滤器 + List builtInFiltersList = new ArrayList<>(); + for (BuiltInFilter builtInFilter : analyzerParams.builtInFilters()) { + builtInFiltersList.add(builtInFilter.name().name()); + } + // 处理自定义过滤器 + List> customFiltersList = new ArrayList<>(); + for (CustomFilter customFilter : analyzerParams.customFilters()) { + Map filterMap = new HashMap<>(); + filterMap.put("type", customFilter.type()); + if (customFilter.max() > 0) { + filterMap.put("max", customFilter.max()); + } + if (customFilter.stopWords().length > 0) { + filterMap.put("stopWords", new ArrayList<>(Lists.newArrayList(customFilter.stopWords()))); + } + customFiltersList.add(filterMap); + } + // 合并过滤器列表 + List filters = new ArrayList<>(); + filters.addAll(builtInFiltersList); + filters.addAll(customFiltersList); + paramsMap.put("filter", filters); + } + return paramsMap; + } + +} \ No newline at end of file diff --git a/milvus-plus-parent/pom.xml b/milvus-plus-parent/pom.xml index 16d192f..0f9eb63 100644 --- a/milvus-plus-parent/pom.xml +++ b/milvus-plus-parent/pom.xml @@ -30,7 +30,7 @@ - 2.1.8 + 2.2.0 ${java.version} ${java.version} 3.11.0 diff --git a/milvus-plus-solon-plugin/src/main/java/org/dromara/solon/service/MilvusInit.java b/milvus-plus-solon-plugin/src/main/java/org/dromara/solon/service/MilvusInit.java index 2062722..9c0417a 100644 --- a/milvus-plus-solon-plugin/src/main/java/org/dromara/solon/service/MilvusInit.java +++ b/milvus-plus-solon-plugin/src/main/java/org/dromara/solon/service/MilvusInit.java @@ -43,6 +43,7 @@ public class MilvusInit extends AbstractMilvusClientBuilder implements Lifecycle " | |\\/| | | \\ \\ / / | | / __| | |_) | | | | / __|\n" + " | | | | | |\\ V /| |_| \\__ \\ | __/| | |_| \\__ \\\n" + " |_| |_|_|_| \\_/ \\__,_|___/ |_| |_|\\__,_|___/\n\n"; + System.out.println(banner); } } \ No newline at end of file diff --git a/milvus-solon-demo/src/main/java/org/dromara/solon/DemoController.java b/milvus-solon-demo/src/main/java/org/dromara/solon/DemoController.java index a16c1d8..9b8312c 100755 --- a/milvus-solon-demo/src/main/java/org/dromara/solon/DemoController.java +++ b/milvus-solon-demo/src/main/java/org/dromara/solon/DemoController.java @@ -1,14 +1,14 @@ package org.dromara.solon; -import com.alibaba.fastjson.JSONObject; -import org.dromara.milvus.plus.model.vo.MilvusResp; -import org.dromara.milvus.plus.model.vo.MilvusResult; -import org.dromara.solon.test.model.Face; -import org.dromara.solon.test.test.FaceMilvusMapper; import io.milvus.v2.service.vector.response.DeleteResp; import io.milvus.v2.service.vector.response.InsertResp; import io.milvus.v2.service.vector.response.UpsertResp; import lombok.extern.slf4j.Slf4j; +import org.dromara.milvus.plus.model.vo.MilvusResp; +import org.dromara.milvus.plus.model.vo.MilvusResult; +import org.dromara.milvus.plus.util.GsonUtil; +import org.dromara.solon.test.model.Face; +import org.dromara.solon.test.test.FaceMilvusMapper; import org.noear.solon.annotation.Controller; import org.noear.solon.annotation.Get; import org.noear.solon.annotation.Inject; @@ -49,35 +49,35 @@ public class DemoController { face1.setFaceVector(vector1); faces.add(face1); } - MilvusResp insert = mapper.insert(faces.toArray(faces.toArray(new Face[0]))); log.info("insert--{}", JSONObject.toJSONString(insert)); + MilvusResp insert = mapper.insert(faces.toArray(faces.toArray(new Face[0]))); log.info("insert--{}", GsonUtil.toJson(insert)); //id查询 MilvusResp>> query = mapper.getById(9l); - log.info("query--getById---{}", JSONObject.toJSONString(query)); + log.info("query--getById---{}", GsonUtil.toJson(query)); //向量查询 MilvusResp>> query1 = mapper.queryWrapper() .vector(Face::getFaceVector, vector) .ne(Face::getPersonId, 1L) .topK(3) .query(); - log.info("向量查询 query--queryWrapper---{}", JSONObject.toJSONString(query1)); + log.info("向量查询 query--queryWrapper---{}", GsonUtil.toJson(query1)); //标量查询 MilvusResp>> query2 = mapper.queryWrapper() .eq(Face::getPersonId, 2L) .topK(3) .query(); - log.info("标量查询 query--queryWrapper---{}", JSONObject.toJSONString(query2)); + log.info("标量查询 query--queryWrapper---{}", GsonUtil.toJson(query2)); //更新 vector.clear(); for (int i = 0; i < 128; i++) { vector.add((float) (Math.random() * 100)); // 这里仅作为示例使用随机数 } - MilvusResp update = mapper.updateById(face);log.info("update--{}", JSONObject.toJSONString(update)); + MilvusResp update = mapper.updateById(face);log.info("update--{}", GsonUtil.toJson(update)); //id查询 - MilvusResp>> query3 = mapper.getById(1L);log.info("query--getById---{}", JSONObject.toJSONString(query3)); + MilvusResp>> query3 = mapper.getById(1L);log.info("query--getById---{}", GsonUtil.toJson(query3)); //删除 - MilvusResp remove = mapper.removeById(1L);log.info("remove--{}", JSONObject.toJSONString(remove)); + MilvusResp remove = mapper.removeById(1L);log.info("remove--{}",GsonUtil.toJson(remove)); //查询 - MilvusResp>> query4 = mapper.getById(1L);log.info("query--{}", JSONObject.toJSONString(query4)); + MilvusResp>> query4 = mapper.getById(1L);log.info("query--{}", GsonUtil.toJson(query4)); } } \ No newline at end of file diff --git a/milvus-spring-demo/pom.xml b/milvus-spring-demo/pom.xml index 9e6d104..202ee1f 100644 --- a/milvus-spring-demo/pom.xml +++ b/milvus-spring-demo/pom.xml @@ -21,7 +21,7 @@ org.dromara.milvus-plus milvus-plus-boot-starter - 2.1.7 + 2.2.0 diff --git a/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/ApplicationRunnerTest.java b/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/ApplicationRunnerTest.java index 3f0d249..427c46f 100644 --- a/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/ApplicationRunnerTest.java +++ b/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/ApplicationRunnerTest.java @@ -37,25 +37,40 @@ public class ApplicationRunnerTest implements ApplicationRunner { @Override public void run(ApplicationArguments args) throws InterruptedException { - milvusService.dropCollection("face_collection"); // insertFace(); // selectFace(12); -//// selectFace(11); +// selectFace(11); // delFace(11); // Thread.sleep(10000); // countFace(22); // getByIdTest(); // vectorQuery(); // scalarQuery(); - //update(); +// update(); + selectTextEmbedding(); } + private void selectTextEmbedding(){ + MilvusResp>> xx = mapper + .queryWrapper() + .textVector(Face::getText, "whats the focus of information retrieval?") + .textMatch(Face::getText,"retrieval") + .topK(2) + .query(); + System.out.println("==="); + } private void selectFace(Integer temp){ MilvusResp>> query = mapper. queryWrapper() .eq(Face::getTemp, temp) .query(Face::getPersonName,Face::getTemp); log.info("query temp 11--{}", GsonUtil.toJson(query)); + + LambdaQueryWrapper mapper = milvusService.ofQuery(Face.class); + MilvusResp>> test = mapper + .eq(Face::getPersonName, "test") + .topK(1) + .query(); } private void countFace(Integer temp){ MilvusResp query = mapper. @@ -69,7 +84,7 @@ public class ApplicationRunnerTest implements ApplicationRunner { log.info("del temp 11 --{}", GsonUtil.toJson(remove)); } private void insertFace() { - List faces = LongStream.range(1, 10) + List faces = LongStream.range(1, 2) .mapToObj(i -> { Face faceTmp = new Face(); // faceTmp.setPersonId(i); @@ -84,6 +99,7 @@ public class ApplicationRunnerTest implements ApplicationRunner { person.setImages(Lists.newArrayList("https://baidu.com")); faceTmp.setPerson(person); faceTmp.setTemp(i%2==0?11:22); + faceTmp.setText(i % 2 == 0 ?"nformation retrieval is a field of study.":"information retrieval focuses on finding relevant information in large datasets."); return faceTmp; }) .collect(Collectors.toList()); diff --git a/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/model/Face.java b/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/model/Face.java index 4d3f38b..f9fa7ab 100644 --- a/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/model/Face.java +++ b/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/model/Face.java @@ -38,6 +38,14 @@ public class Face { ) private Integer temp; + @MilvusField( + name = "text", + dataType = DataType.VarChar, + enableAnalyzer = true, + enableMatch = true + ) + private String text; // 文本 + @MilvusField( name = "face_vector", // 字段名称 dataType = DataType.FloatVector, // 数据类型为浮点型向量