From 3c067f18712c2f1a10fe9fae00393d1aa1291ae5 Mon Sep 17 00:00:00 2001 From: Looly Date: Tue, 25 Nov 2025 17:07:17 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96`EscapeUtil`=EF=BC=8C?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=E4=B8=8D=E8=A7=84=E8=8C=83=E7=9A=84=E8=BD=AC?= =?UTF-8?q?=E4=B9=89=EF=BC=88pr#4150@Github=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 4 +- .../main/java/cn/hutool/core/map/BiMap.java | 10 +- .../java/cn/hutool/core/util/EscapeUtil.java | 71 +++++++--- .../cn/hutool/core/util/EscapeUtilTest.java | 122 ++++++++++++++++++ 4 files changed, 183 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2d898cb0..9e439f7bb 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # 🚀Changelog ------------------------------------------------------------------------------------------------------------- -# 5.8.42(2025-11-24) +# 5.8.42(2025-11-25) ### 🐣新特性 * 【core 】 `ListUtil`增加`zip`方法(pr#4052@Github) @@ -9,6 +9,7 @@ * 【ai 】 增加代理支持(pr#4107@Github) * 【core 】 `CharSequenceUtil`增加`builder`方法重载(pr#4107@Github) * 【core 】 `Combination`和`Arrangement `重构避免数组频繁拷贝,并避免溢出(pr#4144@Github) +* 【core 】 优化`EscapeUtil`,兼容不规范的转义(pr#4150@Github) ### 🐞Bug修复 * 【jwt 】 修复verify方法在定义alg为`none`时验证失效问题(issue#4105@Github) @@ -27,6 +28,7 @@ * 【core 】 修复`CreditCodeUtil.randomCreditCode`部分字母未使用问题(pr#4149@Github) * 【core 】 修复`CacheableAnnotationAttribute`可能并发问题(pr#4149@Github) * 【core 】 修复`URLUtil.url`未断开连接问题(pr#4149@Github) +* 【core 】 修复`Bimap.put`重复put问题(pr#4150@Github) ------------------------------------------------------------------------------------------------------------- # 5.8.41(2025-10-12) diff --git a/hutool-core/src/main/java/cn/hutool/core/map/BiMap.java b/hutool-core/src/main/java/cn/hutool/core/map/BiMap.java index a376d71f1..fca928cbd 100644 --- a/hutool-core/src/main/java/cn/hutool/core/map/BiMap.java +++ b/hutool-core/src/main/java/cn/hutool/core/map/BiMap.java @@ -39,7 +39,7 @@ public class BiMap extends MapWrapper { } this.inverse.put(value, key); } - return super.put(key, value); + return oldValue; } @Override @@ -94,10 +94,12 @@ public class BiMap extends MapWrapper { @Override public V putIfAbsent(K key, V value) { - if (null != this.inverse) { - this.inverse.putIfAbsent(value, key); + final V oldValue = super.putIfAbsent(key, value); + // 只有当oldValue为null时(即key之前不存在),才更新反向Map + if (null == oldValue && null != this.inverse) { + this.inverse.put(value, key); } - return super.putIfAbsent(key, value); + return oldValue; } @Override diff --git a/hutool-core/src/main/java/cn/hutool/core/util/EscapeUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/EscapeUtil.java index f2b3dee92..1f362b964 100755 --- a/hutool-core/src/main/java/cn/hutool/core/util/EscapeUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/EscapeUtil.java @@ -10,7 +10,7 @@ import cn.hutool.core.text.escape.XmlUnescape; * 转义和反转义工具类Escape / Unescape
* escape采用ISO Latin字符集对指定的字符串进行编码。
* 所有的空格符、标点符号、特殊字符以及其他非ASCII字符都将被转化成%xx格式的字符编码(xx等于该字符在字符集表里面的编码的16进制数字)。 - * TODO 6.x迁移到core.text.escape包下 + * TODO 7.x迁移到core.text.escape包下 * * @author xiaoleilu */ @@ -20,11 +20,11 @@ public class EscapeUtil { * 不转义的符号编码 */ private static final String NOT_ESCAPE_CHARS = "*@-_+./"; - private static final Filter JS_ESCAPE_FILTER = c -> false == ( - Character.isDigit(c) - || Character.isLowerCase(c) - || Character.isUpperCase(c) - || StrUtil.contains(NOT_ESCAPE_CHARS, c) + private static final Filter JS_ESCAPE_FILTER = c -> !( + Character.isDigit(c) + || Character.isLowerCase(c) + || Character.isUpperCase(c) + || StrUtil.contains(NOT_ESCAPE_CHARS, c) ); /** @@ -122,7 +122,7 @@ public class EscapeUtil { char c; for (int i = 0; i < content.length(); i++) { c = content.charAt(i); - if (false == filter.accept(c)) { + if (!filter.accept(c)) { tmp.append(c); } else if (c < 256) { tmp.append("%"); @@ -143,36 +143,69 @@ public class EscapeUtil { } /** - * Escape解码 + * Escape解码支持两种转义格式的解码: + *
    + *
  • %XX - 两位十六进制数字,用于表示ASCII字符(0-255)
  • + *
  • %uXXXX - 四位十六进制数字,用于表示Unicode字符
  • + *
+ *

+ * 对于不完整的转义序列,本方法会将其原样保留而不抛出异常: + *

    + *
  • 字符串末尾的单独"%"字符会被原样保留
  • + *
  • "%u"后面不足4位十六进制数字时,整个不完整序列会被原样保留
  • + *
  • "%"后面不足2位十六进制数字时(非%u格式),整个不完整序列会被原样保留
  • + *
+ * 例如: + *
+	 * unescape("test%")      = "test%"     // 末尾的%被保留
+	 * unescape("test%u12")   = "test%u12"  // 不足4位,原样保留
+	 * unescape("test%2")     = "test%2"    // 不足2位,原样保留
+	 * unescape("test%20")    = "test "     // 正常解码空格
+	 * unescape("test%u4E2D") = "test中"    // 正常解码中文字符
+	 * 
* * @param content 被转义的内容 * @return 解码后的字符串 */ - public static String unescape(String content) { + public static String unescape(final String content) { if (StrUtil.isBlank(content)) { return content; } - StringBuilder tmp = new StringBuilder(content.length()); + final int len = content.length(); + final StringBuilder tmp = new StringBuilder(len); int lastPos = 0; int pos; char ch; - while (lastPos < content.length()) { + while (lastPos < len) { pos = content.indexOf("%", lastPos); if (pos == lastPos) { - if (content.charAt(pos + 1) == 'u') { - ch = (char) Integer.parseInt(content.substring(pos + 2, pos + 6), 16); - tmp.append(ch); - lastPos = pos + 6; + if (pos + 1 < len && content.charAt(pos + 1) == 'u') { + if (pos + 6 <= len) { + ch = (char) Integer.parseInt(content.substring(pos + 2, pos + 6), 16); + tmp.append(ch); + lastPos = pos + 6; + } else { + // Not enough characters, append as-is + tmp.append(content.substring(pos)); + lastPos = len; + } } else { - ch = (char) Integer.parseInt(content.substring(pos + 1, pos + 3), 16); - tmp.append(ch); - lastPos = pos + 3; + // Check if there's enough characters for hex escape (%XX) + if (pos + 3 <= len) { + ch = (char) Integer.parseInt(content.substring(pos + 1, pos + 3), 16); + tmp.append(ch); + lastPos = pos + 3; + } else { + // Not enough characters, append as-is + tmp.append(content.substring(pos)); + lastPos = len; + } } } else { if (pos == -1) { tmp.append(content.substring(lastPos)); - lastPos = content.length(); + lastPos = len; } else { tmp.append(content, lastPos, pos); lastPos = pos; diff --git a/hutool-core/src/test/java/cn/hutool/core/util/EscapeUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/util/EscapeUtilTest.java index 5f27e3cba..8fb9861cb 100755 --- a/hutool-core/src/test/java/cn/hutool/core/util/EscapeUtilTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/util/EscapeUtilTest.java @@ -66,4 +66,126 @@ public class EscapeUtilTest { final String s = EscapeUtil.unescapeHtml4(str); assertEquals("'some text with single quotes'", s); } + + @Test + public void escapeXmlTest(){ + final String a = "<>"; + final String escape = EscapeUtil.escapeXml(a); + assertEquals("<>", escape); + assertEquals("中文“双引号”", EscapeUtil.escapeXml("中文“双引号”")); + } + + @Test + void testUnescapeNull() { + assertNull(EscapeUtil.unescape(null)); + } + + @Test + void testUnescapeEmpty() { + assertEquals("", EscapeUtil.unescape("")); + } + + @Test + void testUnescapeBlank() { + assertEquals(" ", EscapeUtil.unescape(" ")); + } + + @Test + void testUnescapeAsciiCharacters() { + // 测试ASCII字符转义 + assertEquals("hello", EscapeUtil.unescape("hello")); + assertEquals("test space", EscapeUtil.unescape("test%20space")); + assertEquals("A", EscapeUtil.unescape("%41")); + assertEquals("a", EscapeUtil.unescape("%61")); + assertEquals("0", EscapeUtil.unescape("%30")); + assertEquals("!", EscapeUtil.unescape("%21")); + assertEquals("@", EscapeUtil.unescape("%40")); + assertEquals("#", EscapeUtil.unescape("%23")); + } + + @Test + void testUnescapeUnicodeCharacters() { + // 测试Unicode字符转义 + assertEquals("中", EscapeUtil.unescape("%u4E2D")); + assertEquals("文", EscapeUtil.unescape("%u6587")); + assertEquals("测", EscapeUtil.unescape("%u6D4B")); + assertEquals("试", EscapeUtil.unescape("%u8BD5")); + assertEquals("😊", EscapeUtil.unescape("%uD83D%uDE0A")); // 笑脸表情 + } + + @Test + void testUnescapeMixedContent() { + // 测试混合内容 + assertEquals("Hello 世界!", EscapeUtil.unescape("Hello%20%u4E16%u754C%21")); + assertEquals("测试: 100%", EscapeUtil.unescape("%u6D4B%u8BD5%3A%20100%25")); + assertEquals("a+b=c", EscapeUtil.unescape("a%2Bb%3Dc")); + } + + @Test + void testUnescapeIncompleteEscapeSequences() { + // 测试不完整的转义序列 + assertEquals("test%", EscapeUtil.unescape("test%")); + assertEquals("test%u", EscapeUtil.unescape("test%u")); + assertEquals("test%u1", EscapeUtil.unescape("test%u1")); + assertEquals("test%u12", EscapeUtil.unescape("test%u12")); + assertEquals("test%u123", EscapeUtil.unescape("test%u123")); + assertEquals("test%1", EscapeUtil.unescape("test%1")); + assertEquals("test%2", EscapeUtil.unescape("test%2")); + } + + @Test + void testUnescapeEdgeCases() { + // 测试边界情况 + assertEquals("%", EscapeUtil.unescape("%")); + assertEquals("%u", EscapeUtil.unescape("%u")); + assertEquals("%%", EscapeUtil.unescape("%%")); + assertEquals("%u%", EscapeUtil.unescape("%u%")); + assertEquals("100% complete", EscapeUtil.unescape("100%25%20complete")); + } + + @Test + void testUnescapeMultipleEscapeSequences() { + // 测试多个连续的转义序列 + assertEquals("ABC", EscapeUtil.unescape("%41%42%43")); + assertEquals("中文测试", EscapeUtil.unescape("%u4E2D%u6587%u6D4B%u8BD5")); + assertEquals("A 中 B", EscapeUtil.unescape("%41%20%u4E2D%20%42")); + } + + @Test + void testUnescapeSpecialCharacters() { + // 测试特殊字符 + assertEquals("\n", EscapeUtil.unescape("%0A")); + assertEquals("\r", EscapeUtil.unescape("%0D")); + assertEquals("\t", EscapeUtil.unescape("%09")); + assertEquals(" ", EscapeUtil.unescape("%20")); + assertEquals("<", EscapeUtil.unescape("%3C")); + assertEquals(">", EscapeUtil.unescape("%3E")); + assertEquals("&", EscapeUtil.unescape("%26")); + } + + @Test + void testUnescapeComplexScenario() { + // 测试复杂场景 + final String original = "Hello 世界! 这是测试。Email: test@example.com"; + final String escaped = "Hello%20%u4E16%u754C%21%20%u8FD9%u662F%u6D4B%u8BD5%u3002Email%3A%20test%40example.com"; + assertEquals(original, EscapeUtil.unescape(escaped)); + } + + @Test + void testUnescapeWithIncompleteAtEnd() { + // 测试末尾有不完整转义序列 + assertEquals("normal%", EscapeUtil.unescape("normal%")); + assertEquals("normal%u", EscapeUtil.unescape("normal%u")); + assertEquals("normal%u1", EscapeUtil.unescape("normal%u1")); + assertEquals("normal%1", EscapeUtil.unescape("normal%1")); + } + + @Test + void testUnescapeUppercaseHex() { + // 测试大写十六进制 + assertEquals("A", EscapeUtil.unescape("%41")); + assertEquals("A", EscapeUtil.unescape("%41")); + assertEquals("中", EscapeUtil.unescape("%u4E2D")); + assertEquals("中", EscapeUtil.unescape("%u4E2D")); + } }