Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 32b72a6

Browse filesBrowse files
authored
Fix reading comments with UTF chars (fixes #238) (#240)
* Fix reading comments with UTF chars (fixes #238) * Fix printable methods to account for UTF chars
1 parent d3d137c commit 32b72a6
Copy full SHA for 32b72a6

File tree

Expand file treeCollapse file tree

2 files changed

+57
-8
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+57
-8
lines changed

‎src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java

Copy file name to clipboardExpand all lines: src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
+31-8Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2981,8 +2981,8 @@ private void parseComment()
29812981
// implements XML 1.0 Section 2.5 Comments
29822982

29832983
// ASSUMPTION: seen <!-
2984-
char ch = more();
2985-
if ( ch != '-' )
2984+
char cch = more();
2985+
if ( cch != '-' )
29862986
throw new XmlPullParserException( "expected <!-- for comment start", this, null );
29872987
if ( tokenize )
29882988
posStart = pos;
@@ -2999,7 +2999,19 @@ private void parseComment()
29992999
while ( true )
30003000
{
30013001
// scan until it hits -->
3002-
ch = more();
3002+
cch = more();
3003+
int ch;
3004+
char cch2;
3005+
if ( Character.isHighSurrogate( cch ) )
3006+
{
3007+
cch2 = more();
3008+
ch = Character.toCodePoint( cch, cch2 );
3009+
}
3010+
else
3011+
{
3012+
cch2 = 0;
3013+
ch = cch;
3014+
}
30033015
if ( seenDashDash && ch != '>' )
30043016
{
30053017
throw new XmlPullParserException( "in comment after two dashes (--) next character must be >"
@@ -3074,7 +3086,11 @@ else if ( ch == '\n' )
30743086
{
30753087
if ( pcEnd >= pc.length )
30763088
ensurePC( pcEnd );
3077-
pc[pcEnd++] = ch;
3089+
pc[pcEnd++] = cch;
3090+
if ( cch2 != 0 )
3091+
{
3092+
pc[pcEnd++] = cch2;
3093+
}
30783094
}
30793095
normalizedCR = false;
30803096
}
@@ -4153,7 +4169,7 @@ private static boolean isS( char ch )
41534169
// ch != '\u0000' ch < '\uFFFE'
41544170

41554171
// private char printable(char ch) { return ch; }
4156-
private static String printable( char ch )
4172+
private static String printable( int ch )
41574173
{
41584174
if ( ch == '\n' )
41594175
{
@@ -4175,18 +4191,25 @@ else if ( ch == '\'' )
41754191
{
41764192
return "\\u" + Integer.toHexString( ch );
41774193
}
4178-
return "" + ch;
4194+
if ( Character.isBmpCodePoint( ch ) )
4195+
{
4196+
return Character.toString( ( char ) ch );
4197+
}
4198+
else
4199+
{
4200+
return new String( new char[] { Character.highSurrogate( ch ), Character.lowSurrogate( ch ) } );
4201+
}
41794202
}
41804203

41814204
private static String printable( String s )
41824205
{
41834206
if ( s == null )
41844207
return null;
4185-
final int sLen = s.length();
4208+
final int sLen = s.codePointCount(0, s.length());
41864209
StringBuilder buf = new StringBuilder( sLen + 10 );
41874210
for ( int i = 0; i < sLen; ++i )
41884211
{
4189-
buf.append( printable( s.charAt( i ) ) );
4212+
buf.append( printable( s.codePointAt( i ) ) );
41904213
}
41914214
s = buf.toString();
41924215
return s;

‎src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java

Copy file name to clipboardExpand all lines: src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
+26Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,4 +1511,30 @@ public void testReplacementInPCArrayWithShorterCharArray()
15111511
fail( "should not raise exception: " + e );
15121512
}
15131513
}
1514+
1515+
/**
1516+
* Ensures emoji can be parsed correctly
1517+
*/
1518+
@Test
1519+
public void testUnicode() throws IOException {
1520+
String input = "<project><!--ALL TEH BOMS! \uD83D\uDCA3 --></project>";
1521+
1522+
try
1523+
{
1524+
MXParser parser = new MXParser();
1525+
parser.setInput( new StringReader( input ) );
1526+
1527+
assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
1528+
assertEquals( "project", parser.getName() );
1529+
assertEquals( XmlPullParser.COMMENT, parser.nextToken() );
1530+
assertEquals( "ALL TEH BOMS! \uD83D\uDCA3 ", parser.getText() );
1531+
assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
1532+
assertEquals( "project", parser.getName() );
1533+
}
1534+
catch ( XmlPullParserException e )
1535+
{
1536+
e.printStackTrace();
1537+
fail( "should not raise exception: " + e );
1538+
}
1539+
}
15141540
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.