Merge pull request #4092 from ET-yzk/v5-dev

fix(dfa): 修复WordTree.addWord在关键词以停顿词结尾时词尾标记错误的问题
This commit is contained in:
Golden Looly 2025-10-10 22:45:11 +08:00 committed by GitHub
commit fa2e9ea54a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 36 additions and 9 deletions

View File

@ -102,23 +102,22 @@ public class WordTree extends HashMap<Character, WordTree> {
WordTree parent = null;
WordTree current = this;
WordTree child;
char currentChar = 0;
Character lastAcceptedChar = null;
final int length = word.length();
for (int i = 0; i < length; i++) {
currentChar = word.charAt(i);
char currentChar = word.charAt(i);
if (charFilter.accept(currentChar)) {//只处理合法字符
child = current.get(currentChar);
if (child == null) {
//无子类新建一个子节点后存放下一个字符
child = new WordTree();
current.put(currentChar, child);
}
child = current.computeIfAbsent(currentChar, c -> new WordTree());
parent = current;
current = child;
lastAcceptedChar = currentChar;
}
}
// 仅当存在父节点且存在非停顿词时才设置词尾标记
// null != parent 条件成立时lastAcceptedChar != null 必然成立故也可以省去
if (null != parent) {
parent.setEnd(currentChar);
parent.setEnd(lastAcceptedChar);
}
return this;
}

View File

@ -116,6 +116,34 @@ public class DfaTest {
assertEquals(all, CollUtil.newArrayList("t-io"));
}
/**
* Github Issue #4091
* 测试当关键词以停顿词结尾时其合法前缀是否能被正确匹配
*/
@Test
public void addWordWithTrailingFilteredCharTest() {
WordTree tree = new WordTree();
tree.addWord("hello("); // 以停顿词 '(' 结尾
List<String> matches = tree.matchAll("hello", -1);
assertEquals(1, matches.size());
assertEquals("hello", matches.get(0));
}
/**
* Github Issue #4091
* 测试关键词中间包含停顿词的情况
*/
@Test
public void addWordWithMiddleFilteredCharTest() {
WordTree tree = new WordTree();
tree.addWord("he(llo"); // 中间 '(' 被过滤
List<String> matches = tree.matchAll("hello", -1);
assertEquals(1, matches.size());
assertEquals("hello", matches.get(0));
}
@Test
public void aTest(){
WordTree tree = new WordTree();