mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-12-08 10:09:06 +08:00
Merge pull request #4092 from ET-yzk/v5-dev
fix(dfa): 修复WordTree.addWord在关键词以停顿词结尾时词尾标记错误的问题
This commit is contained in:
commit
fa2e9ea54a
@ -102,23 +102,22 @@ public class WordTree extends HashMap<Character, WordTree> {
|
||||
WordTree parent = null;
|
||||
WordTree current = this;
|
||||
WordTree child;
|
||||
char currentChar = 0;
|
||||
Character lastAcceptedChar = null;
|
||||
|
||||
final int length = word.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
currentChar = word.charAt(i);
|
||||
char currentChar = word.charAt(i);
|
||||
if (charFilter.accept(currentChar)) {//只处理合法字符
|
||||
child = current.get(currentChar);
|
||||
if (child == null) {
|
||||
//无子类,新建一个子节点后存放下一个字符
|
||||
child = new WordTree();
|
||||
current.put(currentChar, child);
|
||||
}
|
||||
child = current.computeIfAbsent(currentChar, c -> new WordTree());
|
||||
parent = current;
|
||||
current = child;
|
||||
lastAcceptedChar = currentChar;
|
||||
}
|
||||
}
|
||||
// 仅当存在父节点且存在非停顿词时,才设置词尾标记
|
||||
// 当 null != parent 条件成立时,lastAcceptedChar != null 必然成立,故也可以省去
|
||||
if (null != parent) {
|
||||
parent.setEnd(currentChar);
|
||||
parent.setEnd(lastAcceptedChar);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -116,6 +116,34 @@ public class DfaTest {
|
||||
assertEquals(all, CollUtil.newArrayList("t-io"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Github Issue #4091
|
||||
* 测试当关键词以停顿词结尾时,其合法前缀是否能被正确匹配
|
||||
*/
|
||||
@Test
|
||||
public void addWordWithTrailingFilteredCharTest() {
|
||||
WordTree tree = new WordTree();
|
||||
tree.addWord("hello("); // 以停顿词 '(' 结尾
|
||||
|
||||
List<String> matches = tree.matchAll("hello", -1);
|
||||
assertEquals(1, matches.size());
|
||||
assertEquals("hello", matches.get(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Github Issue #4091
|
||||
* 测试关键词中间包含停顿词的情况
|
||||
*/
|
||||
@Test
|
||||
public void addWordWithMiddleFilteredCharTest() {
|
||||
WordTree tree = new WordTree();
|
||||
tree.addWord("he(llo"); // 中间 '(' 被过滤
|
||||
|
||||
List<String> matches = tree.matchAll("hello", -1);
|
||||
assertEquals(1, matches.size());
|
||||
assertEquals("hello", matches.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void aTest(){
|
||||
WordTree tree = new WordTree();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user