feat: add Sparse-BM25文本搜索,Tantivy文本匹配

2025-12-06 08:58:26 +08:00 · 2024-11-29 11:25:11 +08:00 · 2024-11-29 11:25:11 +08:00 · 996f91b539
commit 996f91b539
parent 6e66ab96ae
25 changed files with 564 additions and 27 deletions
--- a/Sparse-BM25文本搜索.md
+++ b/Sparse-BM25文本搜索.md
@ -0,0 +1,89 @@
+
+
+
+
+
+
+## 文本自动构建向量的搜索方式
+
+
+
+与基于语义的密集向量搜索相结合,无需手动生成向量数据，从而简化了基于文本的搜索过程。此功能通过以下工作流程运行：
+
+1. **文本输入**：您插入原始文本文档或提供查询文本，无需手动Embedding
+2. **文本分析**：Milvus 使用分析器将输入文本标记为单独的可搜索术语。
+3. **函数处理**：内置函数接收标记化术语并将其转换为稀疏向量表示。
+4. **集合存储**：Milvus 将这些稀疏嵌入存储在集合中，以便高效检索。
+5. **BM25 评分**：在搜索过程中，Milvus 应用 BM25 算法为存储的文档计算分数，并根据与查询文本的相关性对匹配结果进行排名。
+
+<div style="display: inline-block; border: 4px solid #ccc; border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); margin: 10px; padding: 10px;">
+  <img src="./logo/text_embedding.png" alt="text_embedding" style="border-radius: 10px;" />
+</div>
+
+### 示例
+
+在实体类中添加 `AnalyzerParams` 注解：
+
+```java
+import org.dromara.milvus.plus.annotation.*;
+
+public class TextEntity {
+
+    @MilvusField(
+            name = "text",
+            dataType = DataType.VarChar,
+            enableAnalyzer = true,
+            analyzerParams = @AnalyzerParams(
+                    builtInFilters = {
+                            @BuiltInFilter
+                    },
+                    customFilters = {
+                            @CustomFilter(type = "length", max = 40),
+                            @CustomFilter(type = "stop", stopWords = {"of", "to"})
+                    }
+            )
+    )
+    private String text; 
+}
+```
+
+非专业人员不要设置 analyzerParams，只需设置 enableAnalyzer = true即可。
+
+### 分词器（Tokenizer）
+
+- **默认分词器**：`standard` 分词器，基于语法规则将文本拆分为离散的单词单元。
+- **注解属性**：`tokenizer`，其默认值为 `TokenizerType.standard`。
+
+### 过滤器（Filter）
+
+- **默认过滤器**：`lowercase` 过滤器，将所有标记转换为小写，以支持不区分大小写的搜索。
+- **注解属性**：`builtInFilters` 和 `customFilters`，分别用于配置内置过滤器和自定义过滤器。
+
+### 自定义停用词（StopWords）
+
+- **可选参数**：`stop_words`，用于指定要从分词结果中排除的停用词列表。
+- **注解属性**：`customFilters` 中的 `stopWords` 属性，允许定义自定义停用词。
+
+
+
+### 内部处理
+
+MilvusPlus内部会基于该注解，实现以下步骤
+
+- 生成存储文本对应Embedding存储的字段
+
+- 定义一个函数将文本转换为稀疏向量的函数
+
+- 创建该字段的索引
+
+
+
+### 使用
+
+```
+MilvusResp<List<MilvusResult<Face>>> xx = mapper
+        .queryWrapper()
+        .textVector(Face::getText, "whats the focus of information retrieval?")
+        .topK(2)
+        .query();
+```
--- a/Tantivy文本匹配.md
+++ b/Tantivy文本匹配.md
@ -0,0 +1,46 @@
+### 文本匹配的搜索方式
+
+Milvus 集成了Tantivy来支持其底层倒排索引和基于术语的文本搜索。对于每个文本条目，Milvus 按照以下步骤对其进行索引：
+
+- 分析器: 分析器将输入文本标记为单个单词或标记，然后根据需要应用过滤器。这允许 Milvus 根据这些标记构建索引。
+- 索引: 在文本分析之后，Milvus 会创建一个倒排索引，将每个唯一的标记映射到包含它的文档。
+
+
+<div style="display: inline-block; border: 4px solid #ccc; border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); margin: 10px; padding: 10px;">
+  <img src="./logo/text_match.png" alt="text_match" style="border-radius: 10px;" />
+</div>
+
+### 示例
+请将`enableAnalyzer`和`enableMatch`参数都设置为True。
+这将指示 Milvus 对文本进行标记并为指定字段创建倒排索引，从而实现快速高效的文本匹配。
+
+```java
+import org.dromara.milvus.plus.annotation.*;
+
+public class TextEntity {
+
+    @MilvusField(
+            name = "text",
+            dataType = DataType.VarChar,
+            enableAnalyzer = true,
+            enableMatch = true
+    )
+    private String text; 
+}
+```
+### 使用文本匹配进行搜索
+
+文本匹配可以与向量相似性搜索结合使用，以缩小搜索范围并提高搜索性能。通过在向量相似性搜索之前使用文本匹配过滤集合，可以减少需要搜索的文档数量，从而缩短查询时间。
+
+```java
+
+MilvusResp<List<MilvusResult<Face>>> xx = mapper
+        .queryWrapper()
+        .textVector(Face::getText, "whats the focus of information retrieval?")
+        .textMatch(Face::getText,"retrieval")
+        .textMatch(Face::getText,"information")
+        .topK(2)
+        .query();
+
+```
+
--- a/logo/text_embedding.png
+++ b/logo/text_embedding.png
--- a/logo/text_match.png
+++ b/logo/text_match.png
--- a/milvus-core-demo/src/main/java/org/dromara/milvus/demo/java/JavaTest.java
+++ b/milvus-core-demo/src/main/java/org/dromara/milvus/demo/java/JavaTest.java
@ -1,14 +1,14 @@
 package org.dromara.milvus.demo.java;

-import com.alibaba.fastjson.JSONObject;
 import com.google.common.collect.Lists;
+import io.milvus.v2.client.MilvusClientV2;
 import org.dromara.milvus.demo.model.Face;
 import org.dromara.milvus.plus.core.mapper.BaseMilvusMapper;
 import org.dromara.milvus.plus.model.MilvusProperties;
 import org.dromara.milvus.plus.model.vo.MilvusResp;
 import org.dromara.milvus.plus.model.vo.MilvusResult;
 import org.dromara.milvus.plus.service.impl.MilvusClientBuild;
-import io.milvus.v2.client.MilvusClientV2;
+import org.dromara.milvus.plus.util.GsonUtil;

 import java.util.List;

@ -34,7 +34,7 @@ public class JavaTest {
                .partition("face_01")
                .topK(3)
                .query();
-        System.out.println("标量查询   query--queryWrapper---{}"+JSONObject.toJSONString(query2));
+        System.out.println("标量查询   query--queryWrapper---{}"+ GsonUtil.toJson(query2));
        build.close();
    }
 }
--- a/milvus-plus-core/pom.xml
+++ b/milvus-plus-core/pom.xml
@ -28,7 +28,7 @@
        <dependency>
            <groupId>io.milvus</groupId>
            <artifactId>milvus-sdk-java</artifactId>
-            <version>2.4.4</version>
+            <version>2.5.0</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.logging.log4j</groupId>
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/AnalyzerParams.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/AnalyzerParams.java
@ -0,0 +1,21 @@
+package org.dromara.milvus.plus.annotation;
+
+import org.dromara.milvus.plus.model.TokenizerType;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * 表示分析器参数的注解，包含分词器和过滤器列表。
+ */
+@Target(ElementType.ANNOTATION_TYPE)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface AnalyzerParams {
+
+    TokenizerType tokenizer() default TokenizerType.standard; // 分词器配置
+    BuiltInFilter[] builtInFilters() default {}; //内置过滤器
+    CustomFilter[] customFilters() default {}; //自定义过滤器
+
+}
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/BuiltInFilter.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/BuiltInFilter.java
@ -0,0 +1,17 @@
+package org.dromara.milvus.plus.annotation;
+
+import org.dromara.milvus.plus.model.BuiltInFilterType;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * 定义内置过滤器的注解。
+ */
+@Target(ElementType.ANNOTATION_TYPE)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface BuiltInFilter {
+    BuiltInFilterType name() default BuiltInFilterType.lowercase;
+}
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/CustomFilter.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/CustomFilter.java
@ -0,0 +1,17 @@
+package org.dromara.milvus.plus.annotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * 定义自定义过滤器的注解。
+ */
+@Target(ElementType.ANNOTATION_TYPE)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface CustomFilter {
+    String type() default "";
+    int max() default 0;
+    String[] stopWords() default {};
+}
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/MilvusField.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/annotation/MilvusField.java
@ -67,4 +67,21 @@ public @interface MilvusField {
     * 是否为分区键
     */
    boolean isPartitionKey() default false;
+
+    /**
+     * 启动分析器
+     */
+    boolean enableAnalyzer() default false;
+
+    /**
+     *
+     * 启用文本匹配
+     */
+    boolean enableMatch() default false;
+
+    /**
+     * 分析器参数。
+     */
+    AnalyzerParams analyzerParams() default @AnalyzerParams;
+
 }
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/builder/CollectionSchemaBuilder.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/builder/CollectionSchemaBuilder.java
@ -7,6 +7,9 @@ import io.milvus.v2.common.IndexParam;
 import io.milvus.v2.service.collection.request.AddFieldReq;
 import io.milvus.v2.service.collection.request.CreateCollectionReq;
 import io.milvus.v2.service.index.request.CreateIndexReq;
+
+import java.util.List;
+
 /**
 * @author xgc
 **/
@ -17,6 +20,8 @@ public class CollectionSchemaBuilder {
    private final CreateCollectionReq.CollectionSchema  schema;
    private ConsistencyLevel consistencyLevel=ConsistencyLevel.BOUNDED;
    private Boolean enableDynamicField=false;
+    private List<CreateCollectionReq.Function> functions;
+

    public CollectionSchemaBuilder(Boolean enableDynamicField,String collectionName, MilvusClientV2 wrapper) {
        this.collectionName = collectionName;
@ -40,6 +45,11 @@ public class CollectionSchemaBuilder {
        }
        return this;
    }
+    public void addFun(List<CreateCollectionReq.Function> functions){
+        for (CreateCollectionReq.Function function : functions) {
+            schema.addFunction(function);
+        }
+    }
    public void addConsistencyLevel(ConsistencyLevel level){
        this.consistencyLevel=level;
    }
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/MilvusConverter.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/MilvusConverter.java
@ -2,10 +2,13 @@ package org.dromara.milvus.plus.converter;


 import com.google.common.collect.Lists;
+import io.milvus.common.clientenum.FunctionType;
 import io.milvus.v2.client.MilvusClientV2;
 import io.milvus.v2.common.ConsistencyLevel;
+import io.milvus.v2.common.DataType;
 import io.milvus.v2.common.IndexParam;
 import io.milvus.v2.service.collection.request.AddFieldReq;
+import io.milvus.v2.service.collection.request.CreateCollectionReq;
 import io.milvus.v2.service.collection.request.GetLoadStateReq;
 import io.milvus.v2.service.collection.request.LoadCollectionReq;
 import io.milvus.v2.service.partition.request.CreatePartitionReq;
@ -20,6 +23,8 @@ import org.dromara.milvus.plus.cache.ConversionCache;
 import org.dromara.milvus.plus.cache.MilvusCache;
 import org.dromara.milvus.plus.cache.PropertyCache;
 import org.dromara.milvus.plus.model.MilvusEntity;
+import org.dromara.milvus.plus.util.AnalyzerParamsUtils;
+import org.dromara.milvus.plus.util.GsonUtil;
 import org.springframework.util.CollectionUtils;

 import java.lang.reflect.Field;
@ -84,6 +89,7 @@ public class MilvusConverter {
        // 用于存储属性与函数映射的缓存
        PropertyCache propertyCache = new PropertyCache();
        List<Field> fields = getAllFieldsFromClass(entityClass);
+        List<CreateCollectionReq.Function> functions=new ArrayList<>();
        // 遍历实体类的所有字段，读取@MilvusField注解信息
        for (Field field : fields) {
            MilvusField fieldAnnotation = field.getAnnotation(MilvusField.class);
@ -106,8 +112,35 @@ public class MilvusConverter {
                    .isPrimaryKey(fieldAnnotation.isPrimaryKey())
                    .isPartitionKey(fieldAnnotation.isPartitionKey())
                    .elementType(fieldAnnotation.elementType())
+                    .enableAnalyzer(fieldAnnotation.enableAnalyzer())
+                    .enableMatch(fieldAnnotation.enableMatch())
                    .autoID(false);
            autoID=autoID?autoID:fieldAnnotation.autoID();
+
+            if(fieldAnnotation.enableAnalyzer()&&fieldAnnotation.dataType()==DataType.VarChar){
+                Map<String, Object> analyzerParams = AnalyzerParamsUtils.convertToMap(fieldAnnotation.analyzerParams());
+                log.info("-----------analyzerParams--------- \n"+ GsonUtil.toJson(analyzerParams));
+                builder.analyzerParams(analyzerParams);
+                //构建该文本对应的SPARSE_FLOAT_VECTOR向量字段
+                AddFieldReq sparse = AddFieldReq.builder().fieldName(fieldName + "_sparse").dataType(DataType.SparseFloatVector).build();
+                milvusFields.add(sparse);
+                //构建索引
+                IndexParam sparseIndex = IndexParam.builder()
+                        .indexName(fieldName + "_sparse_index")
+                        .fieldName(fieldName + "_sparse")
+                        .indexType(IndexParam.IndexType.AUTOINDEX)
+                        .metricType(IndexParam.MetricType.BM25)
+                        .build();
+                indexParams.add(sparseIndex);
+                //定义一个函数，将文本转换为稀疏向量
+                String funName = fieldName+"_bm25_emb";
+                CreateCollectionReq.Function fun= CreateCollectionReq.Function.builder().
+                        name(funName).
+                        functionType(FunctionType.BM25).
+                        inputFieldNames(Lists.newArrayList(fieldName)).
+                        outputFieldNames(Lists.newArrayList(fieldName + "_sparse")).build();
+                functions.add(fun);
+            }
            // 描述
            Optional.of(fieldAnnotation.description())
                    .filter(StringUtils::isNotEmpty).ifPresent(builder::description);
@ -134,6 +167,7 @@ public class MilvusConverter {
        // 设置Milvus字段和索引参数
        milvus.setMilvusFields(milvusFields);
        milvus.setIndexParams(indexParams);
+        milvus.setFunctions(functions);
        // 缓存转换结果和集合信息
        ConversionCache conversionCache = new ConversionCache();
        conversionCache.setMilvusEntity(milvus);
@ -226,8 +260,10 @@ public class MilvusConverter {
        );
        schemaBuilder.addField(milvusEntity.getMilvusFields().toArray(new AddFieldReq[0]));
        schemaBuilder.addConsistencyLevel(milvusEntity.getConsistencyLevel());
+        schemaBuilder.addFun(milvusEntity.getFunctions());
        log.info("-------create schema---------");
        schemaBuilder.createSchema();
+        log.info("-------create schema fun---------");
        schemaBuilder.createIndex(indexParams);
        log.info("-------create index---------");
        // 创建分区
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/SearchRespConverter.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/converter/SearchRespConverter.java
@ -41,9 +41,11 @@ public class SearchRespConverter {
                    Map<String, Object> entityMap = new HashMap<>();
                    for (Map.Entry<String, Object> entry : searchResult.getEntity().entrySet()) {
                        String key = propertyCache.findKeyByValue(entry.getKey());
+                        if(key!=null){
                            Object value = entry.getValue();
                            entityMap.put(key,value);
                        }
+                    }
                    // 将转换后的Map转换为Java实体类T
                    T entity = GsonUtil.convertMapToType(entityMap, entityType);
                    MilvusResult<T> tMilvusResult = new MilvusResult<>();
@ -111,9 +113,10 @@ public class SearchRespConverter {
            // 通过属性缓存转换键名，以适应Java实体的字段命名
            for (Map.Entry<String, Object> entry : entityMap.entrySet()) {
                String key = propertyCache.findKeyByValue(entry.getKey());
+                if(key!=null){
                    Object value = entry.getValue();
                    entityMap2.put(key,value);
-
+                }
            }
            // 使用转换工具将映射后的Map转换为指定类型的实体
            T entity =  GsonUtil.convertMapToType(entityMap2, entityType);
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/ConditionBuilder.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/ConditionBuilder.java
@ -1,6 +1,7 @@
 package org.dromara.milvus.plus.core.conditions;

 import org.dromara.milvus.plus.core.FieldFunction;
+import org.springframework.util.CollectionUtils;

 import java.lang.reflect.Field;
 import java.util.ArrayList;
@ -15,6 +16,7 @@ import java.util.stream.Collectors;
 public abstract class ConditionBuilder<T> {

    protected List<String> filters = new ArrayList<>();
+    protected List<String> textMatches =new ArrayList<>();
    protected Map<String, Object> getPropertiesMap(T t) {
        Map<String, Object> propertiesMap = new HashMap<>();
        Class<?> clazz = t.getClass();
@ -35,6 +37,34 @@ public abstract class ConditionBuilder<T> {
        return propertiesMap; // 返回包含属性名和属性值的Map
    }

+
+    /**
+     * 添加 TEXT_MATCH 条件，使用 FieldFunction，支持多个值。
+     *
+     * @param fieldName 字段函数
+     * @param values    要匹配的值列表
+     * @return 当前条件构建器对象
+     */
+    protected ConditionBuilder<T> textMatch(String fieldName, List<String> values) {
+        String joinedValues = String.join(" ", values);
+        String match = "TEXT_MATCH(" + wrapFieldName(fieldName) + ", '" + joinedValues + "')";
+        textMatches.add(match);
+        return this;
+    }
+    protected ConditionBuilder<T> textMatch(String fieldName, String value) {
+        String match = "TEXT_MATCH(" + wrapFieldName(fieldName) + ", '" + value + "')";
+        textMatches.add(match);
+        return this;
+    }
+    protected ConditionBuilder<T> textMatch(FieldFunction<T,?> fieldName, String value) {
+        textMatch(fieldName.getFieldName(fieldName),value);
+        return this;
+    }
+    protected ConditionBuilder<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
+       textMatch(fieldName.getFieldName(fieldName),values);
+       return this;
+    }
+
    /**
     * 添加等于条件。
     *
@ -354,6 +384,10 @@ public abstract class ConditionBuilder<T> {
     * @return 构建好的过滤条件字符串
     */
    protected String buildFilters(){
+        if(!CollectionUtils.isEmpty(textMatches)){
+            String textMatchFilter = textMatches.stream().collect(Collectors.joining(" and "));
+            filters.add(textMatchFilter);
+        }
        return filters.stream().collect(Collectors.joining(" && "));
    }

--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaDeleteWrapper.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaDeleteWrapper.java
@ -578,6 +578,54 @@ public  class LambdaDeleteWrapper<T> extends AbstractChainWrapper<T> implements
        }
        return this;
    }
+    /**
+     * 添加 TEXT_MATCH 条件，使用 FieldFunction，支持多个值。
+     *
+     * @param fieldName 字段函数
+     * @param values    要匹配的值列表
+     * @return 当前条件构建器对象
+     */
+    public LambdaDeleteWrapper<T> textMatch(String fieldName, List<String> values) {
+        super.textMatch(fieldName,values);
+        return this;
+    }
+    public LambdaDeleteWrapper<T> textMatch(String fieldName, String value) {
+        super.textMatch(fieldName,value);
+        return this;
+    }
+    public LambdaDeleteWrapper<T> textMatch(FieldFunction<T,?> fieldName, String value) {
+        super.textMatch(fieldName,value);
+        return this;
+    }
+    public LambdaDeleteWrapper<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
+        super.textMatch(fieldName,values);
+        return this;
+    }
+    public LambdaDeleteWrapper<T> textMatch(boolean condition,String fieldName, List<String> values) {
+        if(condition){
+            super.textMatch(fieldName,values);
+        }
+        return this;
+    }
+    public LambdaDeleteWrapper<T> textMatch(boolean condition,String fieldName, String value) {
+        if(condition){
+            super.textMatch(fieldName,value);
+        }
+        return this;
+    }
+    public LambdaDeleteWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, String value) {
+        if(condition){
+            super.textMatch(fieldName,value);
+        }
+        return this;
+    }
+    public LambdaDeleteWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, List<String> values) {
+        if(condition){
+            super.textMatch(fieldName,values);
+        }
+        return this;
+    }
+

    // Logic operations
    public LambdaDeleteWrapper<T> and(ConditionBuilder<T> other) {
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaQueryWrapper.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaQueryWrapper.java
@ -5,6 +5,7 @@ import io.milvus.v2.client.MilvusClientV2;
 import io.milvus.v2.common.ConsistencyLevel;
 import io.milvus.v2.service.vector.request.*;
 import io.milvus.v2.service.vector.request.data.BaseVector;
+import io.milvus.v2.service.vector.request.data.EmbeddedText;
 import io.milvus.v2.service.vector.request.data.FloatVec;
 import io.milvus.v2.service.vector.request.ranker.BaseRanker;
 import io.milvus.v2.service.vector.response.GetResp;
@ -677,6 +678,18 @@ public class LambdaQueryWrapper<T> extends AbstractChainWrapper<T> implements Wr
        vectors.add(baseVector);
        return this;
    }
+    public LambdaQueryWrapper<T> textVector(FieldFunction<T,?> annsField, String vector) {
+        this.annsField=annsField.getFieldName(annsField)+"_sparse";
+        BaseVector baseVector = new EmbeddedText(vector);
+        vectors.add(baseVector);
+        return this;
+    }
+    public LambdaQueryWrapper<T> textVector(String annsField,String vector) {
+        this.annsField=annsField+"_sparse";
+        BaseVector baseVector = new EmbeddedText(vector);
+        vectors.add(baseVector);
+        return this;
+    }

    public LambdaQueryWrapper<T> vector(BaseVector vector) {
        vectors.add(vector);
@ -705,6 +718,55 @@ public class LambdaQueryWrapper<T> extends AbstractChainWrapper<T> implements Wr
        this.setTopK(topK);
        return this;
    }
+
+    /**
+     * 添加 TEXT_MATCH 条件，使用 FieldFunction，支持多个值。
+     *
+     * @param fieldName 字段函数
+     * @param values    要匹配的值列表
+     * @return 当前条件构建器对象
+     */
+    public LambdaQueryWrapper<T> textMatch(String fieldName, List<String> values) {
+        super.textMatch(fieldName,values);
+        return this;
+    }
+    public LambdaQueryWrapper<T> textMatch(String fieldName, String value) {
+        super.textMatch(fieldName,value);
+        return this;
+    }
+    public LambdaQueryWrapper<T> textMatch(FieldFunction<T,?> fieldName, String value) {
+        super.textMatch(fieldName,value);
+        return this;
+    }
+    public LambdaQueryWrapper<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
+        super.textMatch(fieldName,values);
+        return this;
+    }
+    public LambdaQueryWrapper<T> textMatch(boolean condition,String fieldName, List<String> values) {
+        if(condition){
+            super.textMatch(fieldName,values);
+        }
+        return this;
+    }
+    public LambdaQueryWrapper<T> textMatch(boolean condition,String fieldName, String value) {
+        if(condition){
+            super.textMatch(fieldName,value);
+        }
+        return this;
+    }
+    public LambdaQueryWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, String value) {
+        if(condition){
+            super.textMatch(fieldName,value);
+        }
+        return this;
+    }
+    public LambdaQueryWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, List<String> values) {
+        if(condition){
+            super.textMatch(fieldName,values);
+        }
+        return this;
+    }
+
    /**
     * 构建完整的搜索请求
     * @return 搜索请求对象
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaUpdateWrapper.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/core/conditions/LambdaUpdateWrapper.java
@ -583,6 +583,54 @@ public class LambdaUpdateWrapper<T> extends AbstractChainWrapper<T> implements W
        }
        return this;
    }
+    /**
+     * 添加 TEXT_MATCH 条件，使用 FieldFunction，支持多个值。
+     *
+     * @param fieldName 字段函数
+     * @param values    要匹配的值列表
+     * @return 当前条件构建器对象
+     */
+    public LambdaUpdateWrapper<T> textMatch(String fieldName, List<String> values) {
+        super.textMatch(fieldName,values);
+        return this;
+    }
+    public LambdaUpdateWrapper<T> textMatch(String fieldName, String value) {
+        super.textMatch(fieldName,value);
+        return this;
+    }
+    public LambdaUpdateWrapper<T> textMatch(FieldFunction<T,?> fieldName, String value) {
+        super.textMatch(fieldName,value);
+        return this;
+    }
+    public LambdaUpdateWrapper<T> textMatch(FieldFunction<T,?> fieldName, List<String> values) {
+        super.textMatch(fieldName,values);
+        return this;
+    }
+    public LambdaUpdateWrapper<T> textMatch(boolean condition,String fieldName, List<String> values) {
+        if(condition){
+            super.textMatch(fieldName,values);
+        }
+        return this;
+    }
+    public LambdaUpdateWrapper<T> textMatch(boolean condition,String fieldName, String value) {
+        if(condition){
+            super.textMatch(fieldName,value);
+        }
+        return this;
+    }
+    public LambdaUpdateWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, String value) {
+        if(condition){
+            super.textMatch(fieldName,value);
+        }
+        return this;
+    }
+    public LambdaUpdateWrapper<T> textMatch(boolean condition,FieldFunction<T,?> fieldName, List<String> values) {
+        if(condition){
+            super.textMatch(fieldName,values);
+        }
+        return this;
+    }
+

    // Logic operations
    public LambdaUpdateWrapper<T> and(ConditionBuilder<T> other) {
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/BuiltInFilterType.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/BuiltInFilterType.java
@ -0,0 +1,8 @@
+package org.dromara.milvus.plus.model;
+
+/**
+ * 枚举表示内置的过滤器类型。
+ */
+public enum BuiltInFilterType {
+    lowercase, asciifolding, alphanumonly, cnalphanumonly, cncharonly, stop, length, stemmer
+}
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/MilvusEntity.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/MilvusEntity.java
@ -3,6 +3,7 @@ package org.dromara.milvus.plus.model;
 import io.milvus.v2.common.ConsistencyLevel;
 import io.milvus.v2.common.IndexParam;
 import io.milvus.v2.service.collection.request.AddFieldReq;
+import io.milvus.v2.service.collection.request.CreateCollectionReq;
 import lombok.Data;

 import java.util.List;
@ -19,4 +20,5 @@ public class MilvusEntity {
    private List<String> partitionName;
    private ConsistencyLevel consistencyLevel;
    private Boolean enableDynamicField;
+    private List<CreateCollectionReq.Function> functions;
 }
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/TokenizerType.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/model/TokenizerType.java
@ -0,0 +1,8 @@
+package org.dromara.milvus.plus.model;
+
+/**
+ * 枚举表示内置的分词器类型。
+ */
+public enum TokenizerType {
+    standard, whitespace, english, chinese
+}
--- a/milvus-plus-core/src/main/java/org/dromara/milvus/plus/util/AnalyzerParamsUtils.java
+++ b/milvus-plus-core/src/main/java/org/dromara/milvus/plus/util/AnalyzerParamsUtils.java
@ -0,0 +1,47 @@
+package org.dromara.milvus.plus.util;
+
+import com.google.common.collect.Lists;
+import org.dromara.milvus.plus.annotation.AnalyzerParams;
+import org.dromara.milvus.plus.annotation.BuiltInFilter;
+import org.dromara.milvus.plus.annotation.CustomFilter;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class AnalyzerParamsUtils {
+
+    public static Map<String, Object> convertToMap(AnalyzerParams analyzerParams) {
+        Map<String, Object> paramsMap = new HashMap<>();
+        if (analyzerParams != null) {
+            // 设置分词器
+            paramsMap.put("tokenizer", analyzerParams.tokenizer().name().toLowerCase());
+            // 处理内置过滤器
+            List<String> builtInFiltersList = new ArrayList<>();
+            for (BuiltInFilter builtInFilter : analyzerParams.builtInFilters()) {
+                builtInFiltersList.add(builtInFilter.name().name());
+            }
+            // 处理自定义过滤器
+            List<Map<String, Object>> customFiltersList = new ArrayList<>();
+            for (CustomFilter customFilter : analyzerParams.customFilters()) {
+                Map<String, Object> filterMap = new HashMap<>();
+                filterMap.put("type", customFilter.type());
+                if (customFilter.max() > 0) {
+                    filterMap.put("max", customFilter.max());
+                }
+                if (customFilter.stopWords().length > 0) {
+                    filterMap.put("stopWords", new ArrayList<>(Lists.newArrayList(customFilter.stopWords())));
+                }
+                customFiltersList.add(filterMap);
+            }
+            // 合并过滤器列表
+            List<Object> filters = new ArrayList<>();
+            filters.addAll(builtInFiltersList);
+            filters.addAll(customFiltersList);
+            paramsMap.put("filter", filters);
+        }
+        return paramsMap;
+    }
+
+}
--- a/milvus-solon-demo/src/main/java/org/dromara/solon/DemoController.java
+++ b/milvus-solon-demo/src/main/java/org/dromara/solon/DemoController.java
@ -1,14 +1,14 @@
 package org.dromara.solon;

-import com.alibaba.fastjson.JSONObject;
-import org.dromara.milvus.plus.model.vo.MilvusResp;
-import org.dromara.milvus.plus.model.vo.MilvusResult;
-import org.dromara.solon.test.model.Face;
-import org.dromara.solon.test.test.FaceMilvusMapper;
 import io.milvus.v2.service.vector.response.DeleteResp;
 import io.milvus.v2.service.vector.response.InsertResp;
 import io.milvus.v2.service.vector.response.UpsertResp;
 import lombok.extern.slf4j.Slf4j;
+import org.dromara.milvus.plus.model.vo.MilvusResp;
+import org.dromara.milvus.plus.model.vo.MilvusResult;
+import org.dromara.milvus.plus.util.GsonUtil;
+import org.dromara.solon.test.model.Face;
+import org.dromara.solon.test.test.FaceMilvusMapper;
 import org.noear.solon.annotation.Controller;
 import org.noear.solon.annotation.Get;
 import org.noear.solon.annotation.Inject;
@ -49,35 +49,35 @@ public class DemoController {
            face1.setFaceVector(vector1);
            faces.add(face1);
        }
-        MilvusResp<InsertResp> insert = mapper.insert(faces.toArray(faces.toArray(new Face[0]))); log.info("insert--{}", JSONObject.toJSONString(insert));
+        MilvusResp<InsertResp> insert = mapper.insert(faces.toArray(faces.toArray(new Face[0]))); log.info("insert--{}", GsonUtil.toJson(insert));
        //id查询
        MilvusResp<List<MilvusResult<Face>>> query = mapper.getById(9l);
-        log.info("query--getById---{}", JSONObject.toJSONString(query));
+        log.info("query--getById---{}", GsonUtil.toJson(query));
        //向量查询
        MilvusResp<List<MilvusResult<Face>>> query1 = mapper.queryWrapper()
                .vector(Face::getFaceVector, vector)
                .ne(Face::getPersonId, 1L)
                .topK(3)
                .query();
-        log.info("向量查询 query--queryWrapper---{}", JSONObject.toJSONString(query1));
+        log.info("向量查询 query--queryWrapper---{}", GsonUtil.toJson(query1));
        //标量查询
        MilvusResp<List<MilvusResult<Face>>> query2 = mapper.queryWrapper()
                .eq(Face::getPersonId, 2L)
                .topK(3)
                .query();
-        log.info("标量查询   query--queryWrapper---{}", JSONObject.toJSONString(query2));
+        log.info("标量查询   query--queryWrapper---{}", GsonUtil.toJson(query2));
        //更新
        vector.clear();
        for (int i = 0; i < 128; i++) {
            vector.add((float) (Math.random() * 100)); // 这里仅作为示例使用随机数
        }
-        MilvusResp<UpsertResp> update = mapper.updateById(face);log.info("update--{}", JSONObject.toJSONString(update));
+        MilvusResp<UpsertResp> update = mapper.updateById(face);log.info("update--{}", GsonUtil.toJson(update));
        //id查询
-        MilvusResp<List<MilvusResult<Face>>> query3 = mapper.getById(1L);log.info("query--getById---{}", JSONObject.toJSONString(query3));
+        MilvusResp<List<MilvusResult<Face>>> query3 = mapper.getById(1L);log.info("query--getById---{}", GsonUtil.toJson(query3));
        //删除
-        MilvusResp<DeleteResp> remove = mapper.removeById(1L);log.info("remove--{}", JSONObject.toJSONString(remove));
+        MilvusResp<DeleteResp> remove = mapper.removeById(1L);log.info("remove--{}",GsonUtil.toJson(remove));
        //查询
-        MilvusResp<List<MilvusResult<Face>>> query4 = mapper.getById(1L);log.info("query--{}", JSONObject.toJSONString(query4));
+        MilvusResp<List<MilvusResult<Face>>> query4 = mapper.getById(1L);log.info("query--{}", GsonUtil.toJson(query4));

    }
 }
--- a/milvus-spring-demo/pom.xml
+++ b/milvus-spring-demo/pom.xml
@ -21,7 +21,7 @@
        <dependency>
            <groupId>org.dromara.milvus-plus</groupId>
            <artifactId>milvus-plus-boot-starter</artifactId>
-            <version>2.1.7</version>
+            <version>2.1.8</version>
        </dependency>
    </dependencies>
    <dependencyManagement>
--- a/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/ApplicationRunnerTest.java
+++ b/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/ApplicationRunnerTest.java
@ -37,10 +37,9 @@ public class ApplicationRunnerTest implements ApplicationRunner {

    @Override
    public void run(ApplicationArguments args) throws InterruptedException {
-        milvusService.dropCollection("face_collection");
 //        insertFace();
 //        selectFace(12);
-////        selectFace(11);
+//        selectFace(11);
 //        delFace(11);
 //        Thread.sleep(10000);
 //        countFace(22);
@ -48,14 +47,30 @@ public class ApplicationRunnerTest implements ApplicationRunner {
 //        vectorQuery();
 //        scalarQuery();
 //        update();
+       selectTextEmbedding();
    }

+    private void selectTextEmbedding(){
+        MilvusResp<List<MilvusResult<Face>>> xx = mapper
+                .queryWrapper()
+                .textVector(Face::getText, "whats the focus of information retrieval?")
+                .textMatch(Face::getText,"retrieval")
+                .topK(2)
+                .query();
+        System.out.println("===");
+    }
    private void selectFace(Integer temp){
        MilvusResp<List<MilvusResult<Face>>> query = mapper.
                queryWrapper()
                .eq(Face::getTemp, temp)
                .query(Face::getPersonName,Face::getTemp);
        log.info("query temp 11--{}", GsonUtil.toJson(query));
+
+        LambdaQueryWrapper<Face> mapper = milvusService.ofQuery(Face.class);
+        MilvusResp<List<MilvusResult<Face>>> test = mapper
+                .eq(Face::getPersonName, "test")
+                .topK(1)
+                .query();
    }
    private void countFace(Integer temp){
        MilvusResp<Long> query = mapper.
@ -69,7 +84,7 @@ public class ApplicationRunnerTest implements ApplicationRunner {
        log.info("del temp 11 --{}", GsonUtil.toJson(remove));
    }
    private void insertFace() {
-        List<Face> faces = LongStream.range(1, 10)
+        List<Face> faces = LongStream.range(1, 2)
                .mapToObj(i -> {
                    Face faceTmp = new Face();
                   // faceTmp.setPersonId(i);
@ -84,6 +99,7 @@ public class ApplicationRunnerTest implements ApplicationRunner {
                    person.setImages(Lists.newArrayList("https://baidu.com"));
                    faceTmp.setPerson(person);
                    faceTmp.setTemp(i%2==0?11:22);
+                    faceTmp.setText(i % 2 == 0 ?"nformation retrieval is a field of study.":"information retrieval focuses on finding relevant information in large datasets.");
                    return faceTmp;
                })
                .collect(Collectors.toList());
--- a/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/model/Face.java
+++ b/milvus-spring-demo/src/main/java/org/dromara/milvus/demo/model/Face.java
@ -38,6 +38,14 @@ public class Face {
    )
    private Integer temp;

+    @MilvusField(
+            name = "text",
+            dataType = DataType.VarChar,
+            enableAnalyzer = true,
+            enableMatch = true
+    )
+    private String text; // 文本
+
    @MilvusField(
            name = "face_vector", // 字段名称
            dataType = DataType.FloatVector, // 数据类型为浮点型向量