mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-12-08 10:09:06 +08:00
Merge pull request #4092 from ET-yzk/v5-dev
fix(dfa): 修复WordTree.addWord在关键词以停顿词结尾时词尾标记错误的问题
This commit is contained in:
commit
fa2e9ea54a
@ -102,23 +102,22 @@ public class WordTree extends HashMap<Character, WordTree> {
|
|||||||
WordTree parent = null;
|
WordTree parent = null;
|
||||||
WordTree current = this;
|
WordTree current = this;
|
||||||
WordTree child;
|
WordTree child;
|
||||||
char currentChar = 0;
|
Character lastAcceptedChar = null;
|
||||||
|
|
||||||
final int length = word.length();
|
final int length = word.length();
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
currentChar = word.charAt(i);
|
char currentChar = word.charAt(i);
|
||||||
if (charFilter.accept(currentChar)) {//只处理合法字符
|
if (charFilter.accept(currentChar)) {//只处理合法字符
|
||||||
child = current.get(currentChar);
|
child = current.computeIfAbsent(currentChar, c -> new WordTree());
|
||||||
if (child == null) {
|
|
||||||
//无子类,新建一个子节点后存放下一个字符
|
|
||||||
child = new WordTree();
|
|
||||||
current.put(currentChar, child);
|
|
||||||
}
|
|
||||||
parent = current;
|
parent = current;
|
||||||
current = child;
|
current = child;
|
||||||
|
lastAcceptedChar = currentChar;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// 仅当存在父节点且存在非停顿词时,才设置词尾标记
|
||||||
|
// 当 null != parent 条件成立时,lastAcceptedChar != null 必然成立,故也可以省去
|
||||||
if (null != parent) {
|
if (null != parent) {
|
||||||
parent.setEnd(currentChar);
|
parent.setEnd(lastAcceptedChar);
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -116,6 +116,34 @@ public class DfaTest {
|
|||||||
assertEquals(all, CollUtil.newArrayList("t-io"));
|
assertEquals(all, CollUtil.newArrayList("t-io"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Github Issue #4091
|
||||||
|
* 测试当关键词以停顿词结尾时,其合法前缀是否能被正确匹配
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void addWordWithTrailingFilteredCharTest() {
|
||||||
|
WordTree tree = new WordTree();
|
||||||
|
tree.addWord("hello("); // 以停顿词 '(' 结尾
|
||||||
|
|
||||||
|
List<String> matches = tree.matchAll("hello", -1);
|
||||||
|
assertEquals(1, matches.size());
|
||||||
|
assertEquals("hello", matches.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Github Issue #4091
|
||||||
|
* 测试关键词中间包含停顿词的情况
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void addWordWithMiddleFilteredCharTest() {
|
||||||
|
WordTree tree = new WordTree();
|
||||||
|
tree.addWord("he(llo"); // 中间 '(' 被过滤
|
||||||
|
|
||||||
|
List<String> matches = tree.matchAll("hello", -1);
|
||||||
|
assertEquals(1, matches.size());
|
||||||
|
assertEquals("hello", matches.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void aTest(){
|
public void aTest(){
|
||||||
WordTree tree = new WordTree();
|
WordTree tree = new WordTree();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user