!123 ospp-向量检索

Merge pull request !123 from liaozhehao/feature-3.0.1
This commit is contained in:
阿杰 2025-09-24 06:22:31 +00:00 committed by Gitee
commit 893af26f8d
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
13 changed files with 330 additions and 42 deletions

View File

@ -70,6 +70,11 @@ public enum EsQueryTypeEnum {
GEO_POLYGON,
GEO_SHAPE_ID,
GEO_SHAPE,
/**
* 向量查询
*/
ANN,
KNN,
/**
* 父子类型查询
*/

View File

@ -100,7 +100,14 @@ public class GlobalConfig {
* Whether to intelligently add the. keyword suffix to the field. This configuration is enabled by default. The field type is KEYWORD only for annotation configuration_ The String field of TEXT or unconfigured type takes effect and only takes effect when the query requires that the field be of keyword type, so it is called smart! 是否智能为字段添加.keyword后缀 默认开启 此配置仅对注解配置字段类型为KEYWORD_TEXT或未配置类型的String字段生效并且只会在查询要求该字段必须为keyword类型的查询中才生效因此谓之智能!
*/
private boolean smartAddKeywordSuffix = true;
/**
* whether to enable knn-plugin. This configuration is enabled by default.If the plugin is not installed, it can be turned off temporarily (not recommended, poor performance). 是否开启knn插件 - 默认开启,若未安装该插件则可临时配置关闭(不推荐,性能表现差)
*/
private boolean enableKnnPlugin = true;
/**
* default date format 默认日期format格式
*/
private String defaultDateFormat = DEFAULT_DATE_TIME_FORMAT;
}
}

View File

@ -224,6 +224,10 @@ public class EntityInfo {
* 是否判定索引相同阶段
*/
private boolean indexEqualStage;
/**
* 是否开启knn插件 - 默认开启,若未安装该插件则可临时配置关闭(不推荐,性能表现差)
*/
private boolean enableKnnPlugin;
/**
* 获取需要进行查询的字段列表

View File

@ -1742,4 +1742,49 @@ public interface Func<Children, R> extends Serializable {
* @return wrapper
*/
Children bucketOrder(boolean condition, List<NamedValue<SortOrder>> bucketOrders);
default Children knn(R column, float[] queryVec, int k) {
return knn(true, column, queryVec, k);
}
default Children knn(boolean condition, R column, float[] queryVec, int k) {
return knn(condition, FieldUtils.getFieldName(column), queryVec, k);
}
/**
* knn算法向量查询
*
* @param condition 执行条件
* @param column 字段
* @param queryVec 查询向量
* @param k 需要返回的最相似的结果数量
* @return wrapper
*/
Children knn(boolean condition, String column, float[] queryVec, int k);
default Children ann(R column, float[] queryVec, int k) {
return ann(true, column, queryVec, k, k * 10);
}
default Children ann(R column, float[] queryVec, int k, int numCandidates) {
return ann(true, column, queryVec, k, numCandidates);
}
default Children ann(boolean condition, R column, float[] queryVec, int k, int numCandidates) {
return ann(condition, FieldUtils.getFieldName(column), queryVec, k, numCandidates);
}
/**
* ann算法向量查询
*
* @param condition 执行条件
* @param column 字段
* @param queryVec 查询向量
* @param k 需要返回的最相似的结果数量
* @param numCandidates 候选数量
* @return wrapper
*/
Children ann(boolean condition, String column, float[] queryVec, int k, int numCandidates);
}

View File

@ -776,4 +776,16 @@ public abstract class AbstractChainWrapper<T, R, Children extends AbstractChainW
getWrapper().join(column, parentName, childName);
return typedThis;
}
@Override
public Children knn(boolean condition, String column, float[] queryVec, int k) {
getWrapper().knn(condition, column, queryVec, k);
return typedThis;
}
@Override
public Children ann(boolean condition, String column, float[] queryVec, int k, int numCandidates) {
getWrapper().ann(condition, column, queryVec, k, numCandidates);
return typedThis;
}
}

View File

@ -380,6 +380,16 @@ public abstract class AbstractWrapper<T, R, Children extends AbstractWrapper<T,
return addParam(condition, GEO_SHAPE, column, geometry, shapeRelation, null, boost);
}
@Override
public Children knn(boolean condition, String column, float[] queryVec, int k) {
return addParam(condition, KNN, column, queryVec, k, null, null);
}
@Override
public Children ann(boolean condition, String column, float[] queryVec, int k, int numCandidates) {
return addParam(condition, ANN, column, queryVec, k, numCandidates, null);
}
@Override
public final Children orderBy(boolean condition, boolean isAsc, String... columns) {
if (ArrayUtils.isEmpty(columns)) {
@ -835,24 +845,6 @@ public abstract class AbstractWrapper<T, R, Children extends AbstractWrapper<T,
return typedThis;
}
/**
* 追加查询参数
*
* @param condition 条件
* @param queryTypeEnum 查询类型
* @param column
* @param val
* @param boost 权重
* @return wrapper
*/
private Children addParam(boolean condition, EsQueryTypeEnum queryTypeEnum, String column, Object val, Float boost) {
if (condition) {
Param param = new Param();
addBaseParam(param, queryTypeEnum, column, val, boost);
}
return typedThis;
}
/**
* 重载追加拓展参数
*
@ -924,6 +916,14 @@ public abstract class AbstractWrapper<T, R, Children extends AbstractWrapper<T,
return typedThis;
}
private Children addParam(boolean condition, EsQueryTypeEnum queryTypeEnum, String column, Object val, Float boost) {
if (condition) {
Param param = new Param();
addBaseParam(param, queryTypeEnum, column, val, boost);
}
return typedThis;
}
/**
* 重载追加嵌套条件

View File

@ -21,6 +21,7 @@ import org.dromara.easyes.core.toolkit.GeoUtils;
import org.dromara.easyes.core.toolkit.TreeBuilder;
import org.elasticsearch.geometry.Geometry;
import java.io.StringReader;
import java.time.ZoneId;
import java.util.*;
import java.util.stream.Collectors;
@ -145,7 +146,7 @@ public class WrapperProcessor {
.field(realField)
.gt(JsonData.of(param.getVal()))
.timeZone(param.getExt1() == null ? null : ((ZoneId) param.getExt1()).getId())
.format((String)param.getExt2())
.format((String) param.getExt2())
.boost(param.getBoost())
));
setBool(bool, query, param.getPrevQueryType());
@ -156,7 +157,7 @@ public class WrapperProcessor {
.field(realField)
.gte(JsonData.of(param.getVal()))
.timeZone(param.getExt1() == null ? null : ((ZoneId) param.getExt1()).getId())
.format((String)param.getExt2())
.format((String) param.getExt2())
.boost(param.getBoost())
));
setBool(bool, query, param.getPrevQueryType());
@ -167,7 +168,7 @@ public class WrapperProcessor {
.field(realField)
.lt(JsonData.of(param.getVal()))
.timeZone(param.getExt1() == null ? null : ((ZoneId) param.getExt1()).getId())
.format((String)param.getExt2())
.format((String) param.getExt2())
.boost(param.getBoost())
));
setBool(bool, query, param.getPrevQueryType());
@ -178,7 +179,7 @@ public class WrapperProcessor {
.field(realField)
.lte(JsonData.of(param.getVal()))
.timeZone(param.getExt1() == null ? null : ((ZoneId) param.getExt1()).getId())
.format((String)param.getExt2())
.format((String) param.getExt2())
.boost(param.getBoost())
));
setBool(bool, query, param.getPrevQueryType());
@ -190,7 +191,7 @@ public class WrapperProcessor {
.gte(JsonData.of(param.getExt1()))
.lte(JsonData.of(param.getExt2()))
.timeZone(param.getExt3() == null ? null : ((ZoneId) param.getExt3()).getId())
.format((String)param.getExt4())
.format((String) param.getExt4())
.boost(param.getBoost())
));
setBool(bool, query, param.getPrevQueryType());
@ -262,6 +263,26 @@ public class WrapperProcessor {
.build()._toQuery();
setBool(bool, query, param.getPrevQueryType());
break;
// 向量
case KNN:
realField = getRealField(param.getColumn(), mappingColumnMap, entityInfo);
if (entityInfo.isEnableKnnPlugin()) {
query = buildKnnQueryWithPlugin(realField, (float[]) param.getVal(), (int) param.getExt1());
} else {
query = buildKnnQuery(realField, (float[]) param.getVal(), (int) param.getExt1());
}
setBool(bool, query, param.getPrevQueryType());
break;
case ANN:
realField = getRealField(param.getColumn(), mappingColumnMap, entityInfo);
if (entityInfo.isEnableKnnPlugin()) {
query = buildAnnQueryWithPlugin(realField, (float[]) param.getVal(), (int) param.getExt1(), (int) param.getExt2());
} else {
query = buildAnnQuery(realField, (float[]) param.getVal(), (int) param.getExt1(), (int) param.getExt2());
}
setBool(bool, query, param.getPrevQueryType());
break;
case PARENT_ID:
realField = getRealField(param.getColumn(), mappingColumnMap, entityInfo);
query = Query.of(q -> q.parentId(p -> p.type(realField).id((String) param.getVal())));
@ -288,6 +309,7 @@ public class WrapperProcessor {
// 设置bool查询参数
setBool(bool, Query.of(x -> x.nested(nestedQueryBuilder.build())), param.getPrevQueryType());
break;
case HAS_PARENT:
// 如果用户没指定type框架可根据entityInfo上下文自行推断出其父type
String column = Optional.ofNullable(param.getColumn()).orElse(entityInfo.getParentJoinAlias());
@ -470,9 +492,10 @@ public class WrapperProcessor {
/**
* 获取兜底索引名称
*
* @param entityClass 实体类
* @param indexName 索引名
* @param <T> 泛型
* @param indexName 索引名
* @param <T> 泛型
* @return 索引名称
*/
public static <T> String getIndexName(Class<T> entityClass, String indexName) {
@ -487,8 +510,8 @@ public class WrapperProcessor {
* 获取兜底索引名称数组
*
* @param entityClass 实体类
* @param indexNames 原始索引名称数组
* @param <T> 泛型
* @param indexNames 原始索引名称数组
* @param <T> 泛型
* @return 目标索引名称数组
*/
public static <T> List<String> getIndexName(Class<T> entityClass, String[] indexNames) {
@ -506,9 +529,10 @@ public class WrapperProcessor {
/**
* 获取兜底索引名称数组
*
* @param entityClass 实体类
* @param indexNames 原始索引名称数组
* @param <T> 泛型
* @param indexNames 原始索引名称数组
* @param <T> 泛型
* @return 目标索引名称数组
*/
public static <T> List<String> getIndexName(Class<T> entityClass, Collection<String> indexNames) {
@ -784,4 +808,127 @@ public class WrapperProcessor {
throw new IllegalArgumentException();
}
}
/**
* 方法1: 构建一个 KNN (近似最近邻) 查询
* <p>
* 由于此方法签名中没有 'k'我们将调用 buildKnnQuery 并使用一个预设的默认值
*
* @param column 进行向量搜索的 dense_vector 字段名
* @param queryVec 查询向量
* @return 用于Elasticsearch搜索请求的Query对象
*/
private static Query buildKnnQuery(String column, float[] queryVec, int k) {
return buildAnnQuery(column, queryVec, k, k * 10);
}
/**
* 方法2: 构建一个 ANN (K最近邻) 查询
* <p>
* 此实现通过构造一个JSON字符串并使用 Query.withJson() 方法来创建查询
* 以兼容不包含原生 KnnQuery Builder 7.17.28 客户端版本
*
* @param column 进行向量搜索的 dense_vector 字段名
* @param queryVec 查询向量
* @param k 需要返回的最相似的结果数量
* @param numCandidates 候选数量
* @return 用于Elasticsearch搜索请求的Query对象
*/
private static Query buildAnnQuery(String column, float[] queryVec, int k, int numCandidates) {
if (queryVec == null || queryVec.length == 0) {
throw new IllegalArgumentException("查询向量(queryVec)不能为空。");
}
if (k <= 0) {
throw new IllegalArgumentException("K值必须为正整数。");
}
// 1. float[] 转换为 JSON 数组格式的字符串, e.g., "[0.1, 0.5, -0.2]"
// 1. 使用StringJoiner安全构建JSON数组字符串兼容JDK 8
StringJoiner sj = new StringJoiner(", ", "[", "]");
for (float v : queryVec) {
sj.add(String.valueOf(v)); // 直接转字符串行为等同于Float::toString
}
String vectorAsString = sj.toString();
// 2. 构建完整的 KNN 查询 JSON 字符串
String knnQueryJson = String.format(
"{" +
" \"knn\": {" +
" \"field\": \"%s\"," +
" \"query_vector\": %s," +
" \"k\": %d," +
" \"num_candidates\": %d" +
" }" +
"}",
column,
vectorAsString,
k,
numCandidates
);
// 3. 使用 withJson 方法将 JSON 字符串包装成 Query 对象
// Query.of() 接受一个 lambda我们用它来配置查询变体
// q.withJson() 接受一个 Reader所以我们用 StringReader 包装我们的 JSON 字符串
return Query.of(q -> q.withJson(new StringReader(knnQueryJson)));
}
/**
* 方法1: 构建一个基于 script_score kNN 查询
* 这种方法使用 Elasticsearch 的内建脚本能力进行向量相似度计算例如余弦相似度
* 它是一种精确的 K-NN 搜索但在大数据集上性能可能不如专门的 k-NN 插件
*
* @param column 包含向量的字段名 (类型应为 dense_vector)
* @param queryVec 用于查询的浮点数组向量
* @return 用于 script_score Query 对象
*/
private static Query buildKnnQueryWithPlugin(String column, float[] queryVec, int k) {
// Painless 脚本用于计算余弦相似度
// Elasticsearch cosineSimilarity 返回值在 [-1, 1] 之间
// ES score 要求为非负数所以 +1.0 将其范围移动到 [0, 2]
// 分数越高表示相似度越高
String scriptSource = "cosineSimilarity(params.query_vector, doc['" + column + "']) + 1.0";
Map<String, JsonData> params = new HashMap<>();
params.put("query_vector", JsonData.of(queryVec));
params.put("k", JsonData.of(k));
ScriptScoreQuery scriptScoreQuery = ScriptScoreQuery.of(ssq -> ssq
// 基础查询通常使用 match_all 搜索所有文档
.query(q -> q.matchAll(ma -> ma))
// 定义脚本和参数
.script(s -> s
.inline(i -> i
.source(scriptSource)
.params(params)
)
)
);
return new Query(scriptScoreQuery);
}
/**
* 方法2: 构建一个使用 K-NN 插件的查询
* 这种方法依赖于 OpenDistro/OpenSearch k-NN 插件它提供了专门的 `knn` 查询类型
* 用于执行高效的近似最近邻ANN搜索
* 注意因为 'knn' 不是 Elasticsearch 官方原生支持的查询类型在7.x版本中
* 我们需要使用 `_custom` 方法来构建这个自定义查询
*
* @param column 包含向量的字段名 (类型应为 knn_vector)
* @param queryVec 用于查询的浮点数组向量
* @param k 期望返回的最近邻结果数量
* @param numCandidates 候选数量
* @return 用于 k-NN 插件的 Query 对象
*/
private static Query buildAnnQueryWithPlugin(String column, float[] queryVec, int k, int numCandidates) {
// 构建完整的自定义查询 payload
Map<String, Object> knnQueryPayload = new LinkedHashMap<>();
knnQueryPayload.put("field", column);
knnQueryPayload.put("query_vector", queryVec);
knnQueryPayload.put("k", k);
knnQueryPayload.put("num_candidates", numCandidates);
// 使用 _custom 方法构建非标准的 "knn" 查询
return Query.of(q -> q._custom("knn", knnQueryPayload));
}
}

View File

@ -29,10 +29,7 @@ import static org.dromara.easyes.common.constants.BaseEsConstants.*;
* Copyright © 2021 xpc1024 All Rights Reserved
**/
public class EntityInfoHelper {
/**
* 获取索引settings方法名
*/
private final static String GET_SETTINGS_METHOD = "getSettings";
/**
* 储存反射类表信息
*/
@ -90,6 +87,8 @@ public class EntityInfoHelper {
initSettings(clazz, entityInfo);
// 初始化封装@Join父子类型注解信息
initJoin(clazz, globalConfig, entityInfo);
// 初始化knn插件配置
entityInfo.setEnableKnnPlugin(globalConfig.getDbConfig().isEnableKnnPlugin());
// 放入缓存
ENTITY_INFO_CACHE.put(clazz, entityInfo);

View File

@ -154,8 +154,8 @@ public class Document extends BaseJoin {
/**
* 稠密向量类型dims 非负 最大为2048
*/
@IndexField(fieldType = FieldType.DENSE_VECTOR, dims = 3)
private double[] vector;
@IndexField(fieldType = FieldType.DENSE_VECTOR, dims = 3, index = true)
private double[] myVectors;
@IndexField(fieldType = FieldType.KEYWORD, index = false)
private String indexFalse;

View File

@ -86,7 +86,7 @@ public class AllTest {
document.setMultiField("葡萄糖酸钙口服溶液");
document.setEnglish("Calcium Gluconate");
document.setBigNum(new BigDecimal("66.66"));
document.setVector(new double[]{0.39684247970581666, 0.768707156181666, 0.5145490765571666});
document.setMyVectors(new double[]{0.39684247970581666, 0.768707156181666, 0.5145490765571666});
// System.out.println(JsonUtils.toJsonPrettyStr(document));
int successCount = documentMapper.insert(document);
Assertions.assertEquals(successCount, 1);
@ -107,7 +107,7 @@ public class AllTest {
Point point = new Point(13.400544 + i, 52.530286 + i);
document.setGeoLocation(point.toString());
document.setStarNum(i);
document.setVector(new double[]{35.89684247970581666, 86.268707156181666, 133.1145490765571666});
document.setMyVectors(new double[]{35.89684247970581666, 86.268707156181666, 133.1145490765571666});
// 针对个别数据 造一些差异项 方便测试不同场景
if (i == 2) {
document.setLocation("40.17836693398477,116.64002551005981");

View File

@ -0,0 +1,69 @@
package org.dromara.easyes.test.vector;
import org.dromara.easyes.core.conditions.select.LambdaEsQueryWrapper;
import org.dromara.easyes.test.TestEasyEsApplication;
import org.dromara.easyes.test.entity.Document;
import org.dromara.easyes.test.mapper.DocumentMapper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import javax.annotation.Resource;
import java.util.List;
/**
* @author lzh
* @date 25/09
*/
@SpringBootTest(classes = TestEasyEsApplication.class)
public class KnnAndAnnTest {
@Resource
private DocumentMapper documentMapper;
private final static String testDataId = "lzh-2025";
@Test
public void testCreateVectorIndex() {
documentMapper.createIndex();
}
@Test
public void testInsertVectorData() {
// 测试插入数据
Document document = new Document();
document.setEsId(testDataId);
document.setContent("测试插入向量数据");
document.setMyVectors(new double[]{0.39684247970581055, 0.7687071561813354, 0.5145490765571594});
int successCount = documentMapper.insert(document);
Assertions.assertEquals(1, successCount);
}
@Test
public void testUpdateVectorData() {
// 测试插入数据
Document document = new Document();
document.setEsId(testDataId);
document.setContent("测试更新向量数据");
document.setMyVectors(new double[]{0.39684247970581666, 0.768707156181666, 0.5145490765571666});
int successCount = documentMapper.updateById(document);
Assertions.assertEquals(1, successCount);
}
@Test
public void testknn() {
LambdaEsQueryWrapper<Document> wrapper = new LambdaEsQueryWrapper<>();
float[] queryVector = {0.39684247970581055f, 0.7687071561813354f, 0.5145490765571594f};
wrapper.knn(Document::getMyVectors, queryVector, 10);
List<Document> documents = documentMapper.selectList(wrapper);
System.out.println(documents);
}
@Test
public void testAnn() {
LambdaEsQueryWrapper<Document> wrapper = new LambdaEsQueryWrapper<>();
float[] queryVector = {0.39684247970581055f, 0.7687071561813354f, 0.5145490765571594f};
wrapper.ann(Document::getMyVectors, queryVector, 10, 100);
List<Document> documents = documentMapper.selectList(wrapper);
System.out.println(documents);
}
}

View File

@ -32,7 +32,7 @@ public class VectorTest {
Document document = new Document();
document.setEsId("35f6fff8-1d3b-48b6-a765-028ec81b1437");
document.setContent("测试插入数据");
document.setVector(new double[]{0.39684247970581055, 0.7687071561813354, 0.5145490765571594});
document.setMyVectors(new double[]{0.39684247970581055, 0.7687071561813354, 0.5145490765571594});
int successCount = documentMapper.insert(document);
Assertions.assertEquals(1, successCount);
}
@ -43,7 +43,7 @@ public class VectorTest {
Document document = new Document();
document.setEsId("35f6fff8-1d3b-48b6-a765-028ec81b1437");
document.setContent("测试更新向量数据");
document.setVector(new double[]{0.39684247970581666, 0.768707156181666, 0.5145490765571666});
document.setMyVectors(new double[]{0.39684247970581666, 0.768707156181666, 0.5145490765571666});
int successCount = documentMapper.updateById(document);
Assertions.assertEquals(1, successCount);
}

View File

@ -2,9 +2,9 @@ easy-es:
# enable: true
address: 192.168.0.18:9200
compatible: true
# schema: http
# username: elastic
# password: mg123456
schema: https
username: elastic
password: 1dcNTydUfH4GuN9549wj
keep-alive-millis: 18000
global-config:
i-kun-mode: false