Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fb1db93

Browse filesBrowse files
author
John J. Aylward
committed
Changes encoding to better match the XML spec section 2.2
1 parent adb0478 commit fb1db93
Copy full SHA for fb1db93

File tree

Expand file treeCollapse file tree

1 file changed

+27
-1
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

1 file changed

+27
-1
lines changed
Open diff view settings
Collapse file

‎XML.java‎

Copy file name to clipboardExpand all lines: XML.java
+27-1Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ public static String escape(String string) {
137137
sb.append("'");
138138
break;
139139
default:
140-
if (Character.isISOControl(cp)) {
140+
if (mustEscape(cp)) {
141141
sb.append("&#x");
142142
sb.append(Integer.toHexString(cp));
143143
sb.append(";");
@@ -149,6 +149,32 @@ public static String escape(String string) {
149149
return sb.toString();
150150
}
151151

152+
/**
153+
* @param cp code point to test
154+
* @return true if the code point is not valid for an XML
155+
*/
156+
private static boolean mustEscape(int cp) {
157+
/* Valid range from https://www.w3.org/TR/REC-xml/#charsets
158+
*
159+
* #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
160+
*
161+
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
162+
*/
163+
// isISOControl is true when (cp >= 0 && cp <= 0x1F) || (cp >= 0x7F && cp <= 0x9F)
164+
// all ISO control characters are out of range except tabs and new lines
165+
return (Character.isISOControl(cp)
166+
&& cp != 0x9
167+
&& cp != 0xA
168+
&& cp != 0xD
169+
) || !(
170+
// valid the range of acceptable characters that aren't control
171+
(cp >= 0x20 && cp <= 0xD7FF)
172+
|| (cp >= 0xE000 && cp <= 0xFFFD)
173+
|| (cp >= 0x10000 && cp <= 0x10FFFF)
174+
)
175+
;
176+
}
177+
152178
/**
153179
* Removes XML escapes from the string.
154180
*

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.