Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b99f7c0

Browse filesBrowse files
belingueresmichael-o
authored andcommitted
Fixed regressions:
* #163 - new case: Don't assume UTF8 as default, to allow parsing from String. * #194 - Incorrect getText() after parsing the DOCDECL section. * Added tests exercising other regressions exposed while fixing this issues.
1 parent 3896620 commit b99f7c0
Copy full SHA for b99f7c0

File tree

Expand file treeCollapse file tree

6 files changed

+637
-45
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+637
-45
lines changed

‎src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java

Copy file name to clipboardExpand all lines: src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
+112-45Lines changed: 112 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len )
124124
// private String elValue[];
125125
private int elNamespaceCount[];
126126

127-
private String fileEncoding = "UTF8";
127+
private String fileEncoding = null;
128128

129129
/**
130130
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
@@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
587587
}
588588
}
589589

590-
/**
591-
* Unknown properties are <strong>always</strong> returned as false
590+
/**
591+
* Unknown properties are <strong>always</strong> returned as false
592592
*/
593593
@Override
594594
public boolean getFeature( String name )
@@ -1596,11 +1596,11 @@ else if ( ch == '&' )
15961596
}
15971597
final int oldStart = posStart + bufAbsoluteStart;
15981598
final int oldEnd = posEnd + bufAbsoluteStart;
1599-
final char[] resolvedEntity = parseEntityRef();
1599+
parseEntityRef();
16001600
if ( tokenize )
16011601
return eventType = ENTITY_REF;
16021602
// check if replacement text can be resolved !!!
1603-
if ( resolvedEntity == null )
1603+
if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
16041604
{
16051605
if ( entityRefName == null )
16061606
{
@@ -1628,7 +1628,7 @@ else if ( ch == '&' )
16281628
}
16291629
// assert usePC == true;
16301630
// write into PC replacement text - do merge for replacement text!!!!
1631-
for ( char aResolvedEntity : resolvedEntity )
1631+
for ( char aResolvedEntity : resolvedEntityRefCharBuf )
16321632
{
16331633
if ( pcEnd >= pc.length )
16341634
{
@@ -2675,9 +2675,28 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' )
26752675
return ch;
26762676
}
26772677

2678-
private char[] charRefOneCharBuf = new char[1];
2678+
// state representing that no entity ref have been resolved
2679+
private static final char[] BUF_NOT_RESOLVED = new char[0];
2680+
2681+
// predefined entity refs
2682+
private static final char[] BUF_LT = new char[] { '<' };
2683+
private static final char[] BUF_AMP = new char[] { '&' };
2684+
private static final char[] BUF_GT = new char[] { '>' };
2685+
private static final char[] BUF_APO = new char[] { '\'' };
2686+
private static final char[] BUF_QUOT = new char[] { '"' };
26792687

2680-
private char[] parseEntityRef()
2688+
private char[] resolvedEntityRefCharBuf = BUF_NOT_RESOLVED;
2689+
2690+
/**
2691+
* parse Entity Ref, either a character entity or one of the predefined name entities.
2692+
*
2693+
* @return the length of the valid found character reference, which may be one of the predefined character reference
2694+
* names (resolvedEntityRefCharBuf contains the replaced chars). Returns the length of the not found entity
2695+
* name, otherwise.
2696+
* @throws XmlPullParserException if invalid XML is detected.
2697+
* @throws IOException if an I/O error is found.
2698+
*/
2699+
private int parseCharOrPredefinedEntityRef()
26812700
throws XmlPullParserException, IOException
26822701
{
26832702
// entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
@@ -2686,6 +2705,8 @@ private char[] parseEntityRef()
26862705
// ASSUMPTION just after &
26872706
entityRefName = null;
26882707
posStart = pos;
2708+
int len = 0;
2709+
resolvedEntityRefCharBuf = BUF_NOT_RESOLVED;
26892710
char ch = more();
26902711
if ( ch == '#' )
26912712
{
@@ -2750,7 +2771,6 @@ else if ( ch >= 'A' && ch <= 'F' )
27502771
ch = more();
27512772
}
27522773
}
2753-
posEnd = pos - 1;
27542774

27552775
boolean isValidCodePoint = true;
27562776
try
@@ -2759,7 +2779,7 @@ else if ( ch >= 'A' && ch <= 'F' )
27592779
isValidCodePoint = isValidCodePoint( codePoint );
27602780
if ( isValidCodePoint )
27612781
{
2762-
charRefOneCharBuf = Character.toChars( codePoint );
2782+
resolvedEntityRefCharBuf = Character.toChars( codePoint );
27632783
}
27642784
}
27652785
catch ( IllegalArgumentException e )
@@ -2775,14 +2795,14 @@ else if ( ch >= 'A' && ch <= 'F' )
27752795

27762796
if ( tokenize )
27772797
{
2778-
text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
2798+
text = newString( resolvedEntityRefCharBuf, 0, resolvedEntityRefCharBuf.length );
27792799
}
2780-
return charRefOneCharBuf;
2800+
len = resolvedEntityRefCharBuf.length;
27812801
}
27822802
else
27832803
{
27842804
// [68] EntityRef ::= '&' Name ';'
2785-
// scan anem until ;
2805+
// scan name until ;
27862806
if ( !isNameStartChar( ch ) )
27872807
{
27882808
throw new XmlPullParserException( "entity reference names can not start with character '"
@@ -2801,17 +2821,15 @@ else if ( ch >= 'A' && ch <= 'F' )
28012821
+ printable( ch ) + "'", this, null );
28022822
}
28032823
}
2804-
posEnd = pos - 1;
28052824
// determine what name maps to
2806-
final int len = posEnd - posStart;
2825+
len = ( pos - 1 ) - posStart;
28072826
if ( len == 2 && buf[posStart] == 'l' && buf[posStart + 1] == 't' )
28082827
{
28092828
if ( tokenize )
28102829
{
28112830
text = "<";
28122831
}
2813-
charRefOneCharBuf[0] = '<';
2814-
return charRefOneCharBuf;
2832+
resolvedEntityRefCharBuf = BUF_LT;
28152833
// if(paramPC || isParserTokenizing) {
28162834
// if(pcEnd >= pc.length) ensurePC();
28172835
// pc[pcEnd++] = '<';
@@ -2823,17 +2841,15 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po
28232841
{
28242842
text = "&";
28252843
}
2826-
charRefOneCharBuf[0] = '&';
2827-
return charRefOneCharBuf;
2844+
resolvedEntityRefCharBuf = BUF_AMP;
28282845
}
28292846
else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
28302847
{
28312848
if ( tokenize )
28322849
{
28332850
text = ">";
28342851
}
2835-
charRefOneCharBuf[0] = '>';
2836-
return charRefOneCharBuf;
2852+
resolvedEntityRefCharBuf = BUF_GT;
28372853
}
28382854
else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[posStart + 2] == 'o'
28392855
&& buf[posStart + 3] == 's' )
@@ -2842,8 +2858,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po
28422858
{
28432859
text = "'";
28442860
}
2845-
charRefOneCharBuf[0] = '\'';
2846-
return charRefOneCharBuf;
2861+
resolvedEntityRefCharBuf = BUF_APO;
28472862
}
28482863
else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[posStart + 2] == 'o'
28492864
&& buf[posStart + 3] == 't' )
@@ -2852,25 +2867,65 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
28522867
{
28532868
text = "\"";
28542869
}
2855-
charRefOneCharBuf[0] = '"';
2856-
return charRefOneCharBuf;
2857-
}
2858-
else
2859-
{
2860-
final char[] result = lookuEntityReplacement( len );
2861-
if ( result != null )
2862-
{
2863-
return result;
2864-
}
2870+
resolvedEntityRefCharBuf = BUF_QUOT;
28652871
}
2866-
if ( tokenize )
2867-
text = null;
2868-
return null;
28692872
}
2873+
2874+
posEnd = pos;
2875+
2876+
return len;
2877+
}
2878+
2879+
/**
2880+
* Parse an entity reference inside the DOCDECL section.
2881+
*
2882+
* @throws XmlPullParserException if invalid XML is detected.
2883+
* @throws IOException if an I/O error is found.
2884+
*/
2885+
private void parseEntityRefInDocDecl()
2886+
throws XmlPullParserException, IOException
2887+
{
2888+
parseCharOrPredefinedEntityRef();
2889+
if (usePC) {
2890+
posStart--; // include in PC the starting '&' of the entity
2891+
joinPC();
2892+
}
2893+
2894+
if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
2895+
return;
2896+
if ( tokenize )
2897+
text = null;
2898+
}
2899+
2900+
/**
2901+
* Parse an entity reference inside a tag or attribute.
2902+
*
2903+
* @throws XmlPullParserException if invalid XML is detected.
2904+
* @throws IOException if an I/O error is found.
2905+
*/
2906+
private void parseEntityRef()
2907+
throws XmlPullParserException, IOException
2908+
{
2909+
final int len = parseCharOrPredefinedEntityRef();
2910+
2911+
posEnd--; // don't involve the final ';' from the entity in the search
2912+
2913+
if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) {
2914+
return;
2915+
}
2916+
2917+
resolvedEntityRefCharBuf = lookuEntityReplacement( len );
2918+
if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
2919+
{
2920+
return;
2921+
}
2922+
if ( tokenize )
2923+
text = null;
28702924
}
28712925

28722926
/**
2873-
* Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
2927+
* Check if the provided parameter is a valid Char. According to
2928+
* <a href="https://www.w3.org/TR/REC-xml/#NT-Char">https://www.w3.org/TR/REC-xml/#NT-Char</a>
28742929
*
28752930
* @param codePoint the numeric value to check
28762931
* @return true if it is a valid numeric character reference. False otherwise.
@@ -2883,8 +2938,6 @@ private static boolean isValidCodePoint( int codePoint )
28832938
}
28842939

28852940
private char[] lookuEntityReplacement( int entityNameLen )
2886-
throws XmlPullParserException, IOException
2887-
28882941
{
28892942
if ( !allStringsInterned )
28902943
{
@@ -2919,7 +2972,7 @@ private char[] lookuEntityReplacement( int entityNameLen )
29192972
}
29202973
}
29212974
}
2922-
return null;
2975+
return BUF_NOT_RESOLVED;
29232976
}
29242977

29252978
private void parseComment()
@@ -2977,7 +3030,7 @@ else if (isValidCodePoint( ch ))
29773030
}
29783031
else
29793032
{
2980-
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(((int) ch)) + " found in comment", this, null );
3033+
throw new XmlPullParserException( "Illegal character 0x" + Integer.toHexString(ch) + " found in comment", this, null );
29813034
}
29823035
if ( normalizeIgnorableWS )
29833036
{
@@ -3484,7 +3537,8 @@ else if ( ch == '>' && bracketLevel == 0 )
34843537
break;
34853538
else if ( ch == '&' )
34863539
{
3487-
extractEntityRef();
3540+
extractEntityRefInDocDecl();
3541+
continue;
34883542
}
34893543
if ( normalizeIgnorableWS )
34903544
{
@@ -3536,6 +3590,19 @@ else if ( ch == '\n' )
35363590

35373591
}
35383592
posEnd = pos - 1;
3593+
text = null;
3594+
}
3595+
3596+
private void extractEntityRefInDocDecl()
3597+
throws XmlPullParserException, IOException
3598+
{
3599+
// extractEntityRef
3600+
posEnd = pos - 1;
3601+
3602+
int prevPosStart = posStart;
3603+
parseEntityRefInDocDecl();
3604+
3605+
posStart = prevPosStart;
35393606
}
35403607

35413608
private void extractEntityRef()
@@ -3559,9 +3626,9 @@ private void extractEntityRef()
35593626
}
35603627
// assert usePC == true;
35613628

3562-
final char[] resolvedEntity = parseEntityRef();
3629+
parseEntityRef();
35633630
// check if replacement text can be resolved !!!
3564-
if ( resolvedEntity == null )
3631+
if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
35653632
{
35663633
if ( entityRefName == null )
35673634
{
@@ -3571,7 +3638,7 @@ private void extractEntityRef()
35713638
+ "'", this, null );
35723639
}
35733640
// write into PC replacement text - do merge for replacement text!!!!
3574-
for ( char aResolvedEntity : resolvedEntity )
3641+
for ( char aResolvedEntity : resolvedEntityRefCharBuf )
35753642
{
35763643
if ( pcEnd >= pc.length )
35773644
{

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.