perf:使用Sunday算法,优化字串查询

This commit is contained in:
LettuceLeaves 2025-11-19 01:03:42 +08:00
parent 1bba40a32f
commit 3f15fdd44c
2 changed files with 88 additions and 14 deletions

View File

@ -2432,14 +2432,11 @@ public class CharSequenceUtil extends StrValidator {
continue; continue;
} }
if (ignoreCase) { if (ignoreCase) {
final char u1 = Character.toUpperCase(c1); final char u1 = Character.toLowerCase(c1);
final char u2 = Character.toUpperCase(c2); final char u2 = Character.toLowerCase(c2);
if (u1 == u2) { if (u1 == u2) {
continue; continue;
} }
if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
continue;
}
} }
return false; return false;
} }

View File

@ -20,6 +20,10 @@ import cn.hutool.v7.core.lang.Assert;
import cn.hutool.v7.core.text.CharSequenceUtil; import cn.hutool.v7.core.text.CharSequenceUtil;
import java.io.Serial; import java.io.Serial;
import java.util.HashMap;
import java.util.Map;
import static cn.hutool.v7.core.text.CharSequenceUtil.isSubEquals;
/** /**
* 字符串查找器 * 字符串查找器
@ -44,6 +48,8 @@ public class StrFinder extends TextFinder {
private final CharSequence strToFind; private final CharSequence strToFind;
private final boolean caseInsensitive; private final boolean caseInsensitive;
private Map<Character, Integer> forwardOffsetMap;
private Map<Character, Integer> reverseOffsetMap;
/** /**
* 构造 * 构造
@ -61,23 +67,54 @@ public class StrFinder extends TextFinder {
public int start(int from) { public int start(int from) {
Assert.notNull(this.text, "Text to find must be not null!"); Assert.notNull(this.text, "Text to find must be not null!");
final int subLen = strToFind.length(); final int subLen = strToFind.length();
final int textLen = text.length();
if (from < 0) { // 基于Sunday算法实现高效子串查询
from = 0;
}
int endLimit = getValidEndIndex();
if (negative) { if (negative) {
for (int i = from; i > endLimit; i--) { if (this.reverseOffsetMap == null) {
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { this.reverseOffsetMap = buildReverseOffsetMap(strToFind, caseInsensitive);
}
int maxIndex = textLen - subLen;
if (from > maxIndex) {
from = maxIndex;
}
int i = from;
while (i >= 0) {
if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
return i; return i;
} }
if (i - 1 < 0) {
break;
}
char preChar = text.charAt(i - 1);
int jump = reverseOffsetMap.getOrDefault(
caseInsensitive ? Character.toLowerCase(preChar) : preChar,
subLen + 1
);
i -= jump;
} }
} else { } else {
endLimit = endLimit - subLen + 1; if (this.forwardOffsetMap == null) {
for (int i = from; i < endLimit; i++) { this.forwardOffsetMap = buildForwardOffsetMap(strToFind, caseInsensitive);
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) { }
if (from < 0) {
from = 0;
}
int endLimit = textLen - subLen;
int i = from;
while (i <= endLimit) {
if (isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
return i; return i;
} }
if (i + subLen >= textLen) {
break;
}
char nextChar = text.charAt(i + subLen);
int jump = forwardOffsetMap.getOrDefault(
caseInsensitive ? Character.toLowerCase(nextChar) : nextChar,
subLen + 1
);
i += jump;
} }
} }
@ -91,4 +128,44 @@ public class StrFinder extends TextFinder {
} }
return start + strToFind.length(); return start + strToFind.length();
} }
/**
* 构建正向偏移表
*/
private static Map<Character, Integer> buildForwardOffsetMap(CharSequence pattern, boolean caseInsensitive) {
int m = pattern.length();
Map<Character, Integer> map = new HashMap<>(Math.min(m, 128));
for (int i = 0; i < m; i++) {
char c = pattern.charAt(i);
int jump = m - i;
if (caseInsensitive) {
map.put(Character.toLowerCase(c), jump);
} else {
map.put(c, jump);
}
}
return map;
}
/**
* 构建反向偏移表
*/
private static Map<Character, Integer> buildReverseOffsetMap(CharSequence pattern, boolean caseInsensitive) {
int m = pattern.length();
Map<Character, Integer> map = new HashMap<>(Math.min(m, 128));
for (int i = m - 1; i >= 0; i--) {
char c = pattern.charAt(i);
int jump = i + 1;
if (caseInsensitive) {
map.put(Character.toLowerCase(c), jump);
} else {
map.put(c, jump);
}
}
return map;
}
} }