@@ -238,6 +238,17 @@ class Sanitizer extends Wire {
238
238
'‍ ' , // zero width join
239
239
);
240
240
241
+ /**
242
+ * Characters blacklisted from UTF-8 page names
243
+ *
244
+ * @var string[]
245
+ *
246
+ */
247
+ protected $ pageNameBlacklist = array (
248
+ '/ ' , '\\' , '% ' , '" ' , "' " , '< ' , '> ' , '? ' , '! ' , '# ' , '@ ' , ': ' , '; ' , ', ' ,
249
+ '+ ' , '= ' , '* ' , '^ ' , '$ ' , '( ' , ') ' , '[ ' , '] ' , '{ ' , '} ' , '| ' , '& ' ,
250
+ );
251
+
241
252
/**
242
253
* Sanitizer method names (A-Z) and type(s) they return
243
254
*
@@ -903,6 +914,7 @@ public function pageNameUTF8($value, $maxLength = 128) {
903
914
if (!strlen ($ value )) return '' ;
904
915
905
916
$ config = $ this ->wire ()->config ;
917
+ $ keepGoing = true ;
906
918
907
919
// if UTF8 module is not enabled then delegate this call to regular pageName sanitizer
908
920
if ($ config ->pageNameCharset != 'UTF8 ' ) return $ this ->pageName ($ value , false , $ maxLength );
@@ -918,7 +930,8 @@ public function pageNameUTF8($value, $maxLength = 128) {
918
930
// whitelist of allowed characters and blacklist of disallowed characters
919
931
$ whitelist = $ config ->pageNameWhitelist ;
920
932
if (!strlen ($ whitelist )) $ whitelist = false ;
921
- $ blacklist = '/ \\%" \'<>?#@:;,+=*^$()[]{}|& ' ;
933
+
934
+ $ value = str_replace ($ this ->pageNameBlacklist , '- ' , $ value );
922
935
923
936
// we let regular pageName handle chars like these, if they appear without other UTF-8
924
937
$ extras = array ('. ' , '- ' , '_ ' , ', ' , '; ' , ': ' , '( ' , ') ' , '! ' , '? ' , '& ' , '% ' , '$ ' , '# ' , '@ ' );
@@ -933,43 +946,48 @@ public function pageNameUTF8($value, $maxLength = 128) {
933
946
if ($ this ->caches [$ k ] || $ tt ->strtolower ($ value ) === $ value ) {
934
947
// whitelist supports only lowercase OR value is all lowercase
935
948
// let regular pageName sanitizer handle this
936
- return $ this ->pageName ($ value , false , $ maxLength );
949
+ $ value = $ this ->pageName ($ value , false , $ maxLength );
950
+ // maintain old behavior for existing installations
951
+ if ($ this ->getPunycodeVersion () < 2 ) return $ value ;
952
+ $ keepGoing = false ;
937
953
}
938
954
}
939
955
940
- // validate that all characters are in our whitelist
941
- $ replacements = array ();
956
+ if ($ keepGoing ) {
957
+ // validate that all characters are in our whitelist
958
+ $ replacements = array ();
942
959
943
- for ($ n = 0 ; $ n < $ tt ->strlen ($ value ); $ n ++) {
944
- $ c = $ tt ->substr ($ value , $ n , 1 );
945
- $ inBlacklist = $ tt ->strpos ($ blacklist , $ c ) !== false || strpos ($ blacklist , $ c ) !== false ;
946
- $ inWhitelist = !$ inBlacklist && $ whitelist !== false && $ tt ->strpos ($ whitelist , $ c ) !== false ;
947
- if ($ inWhitelist && !$ inBlacklist ) {
948
- // in whitelist
949
- } else if ($ inBlacklist || !strlen (trim ($ c )) || ctype_cntrl ($ c )) {
950
- // character does not resolve to something visible or is in blacklist
951
- $ replacements [] = $ c ;
952
- } else if ($ whitelist === false ) {
953
- // whitelist disabled: allow everything that is not blacklisted
954
- } else {
955
- // character that is not in whitelist, double check case variants
956
- $ cLower = $ tt ->strtolower ($ c );
957
- $ cUpper = $ tt ->strtoupper ($ c );
958
- if ($ cLower !== $ c && $ tt ->strpos ($ whitelist , $ cLower ) !== false ) {
959
- // allow character and convert to lowercase variant
960
- $ value = $ tt ->substr ($ value , 0 , $ n ) . $ cLower . $ tt ->substr ($ value , $ n +1 );
961
- } else if ($ cUpper !== $ c && $ tt ->strpos ($ whitelist , $ cUpper ) !== false ) {
962
- // allow character and convert to uppercase varient
963
- $ value = $ tt ->substr ($ value , 0 , $ n ) . $ cUpper . $ tt ->substr ($ value , $ n +1 );
964
- } else {
965
- // queue character to be replaced
960
+ for ($ n = 0 ; $ n < $ tt ->strlen ($ value ); $ n ++) {
961
+ $ c = $ tt ->substr ($ value , $ n , 1 );
962
+ if ($ c === '- ' ) continue ;
963
+ $ inWhitelist = $ whitelist !== false && $ tt ->strpos ($ whitelist , $ c ) !== false ;
964
+ if ($ inWhitelist ) {
965
+ // in whitelist
966
+ } else if (!strlen (trim ($ c )) || ctype_cntrl ($ c )) {
967
+ // character does not resolve to something visible
966
968
$ replacements [] = $ c ;
969
+ } else if ($ whitelist === false ) {
970
+ // whitelist disabled: allow everything that is not blacklisted
971
+ } else {
972
+ // character that is not in whitelist, double check case variants
973
+ $ cLower = $ tt ->strtolower ($ c );
974
+ $ cUpper = $ tt ->strtoupper ($ c );
975
+ if ($ cLower !== $ c && $ tt ->strpos ($ whitelist , $ cLower ) !== false ) {
976
+ // allow character and convert to lowercase variant
977
+ $ value = $ tt ->substr ($ value , 0 , $ n ) . $ cLower . $ tt ->substr ($ value , $ n + 1 );
978
+ } else if ($ cUpper !== $ c && $ tt ->strpos ($ whitelist , $ cUpper ) !== false ) {
979
+ // allow character and convert to uppercase variant
980
+ $ value = $ tt ->substr ($ value , 0 , $ n ) . $ cUpper . $ tt ->substr ($ value , $ n + 1 );
981
+ } else {
982
+ // queue character to be replaced
983
+ $ replacements [] = $ c ;
984
+ }
967
985
}
968
986
}
969
- }
970
987
971
- // replace disallowed characters with "-"
972
- if (count ($ replacements )) $ value = str_replace ($ replacements , '- ' , $ value );
988
+ // replace disallowed characters with "-"
989
+ if (count ($ replacements )) $ value = str_replace ($ replacements , '- ' , $ value );
990
+ }
973
991
974
992
// replace doubled word separators
975
993
foreach ($ separators as $ c ) {
@@ -1059,6 +1077,7 @@ protected function punyEncodeName($value, $version = 0) {
1059
1077
1060
1078
if ($ version > 1 ) {
1061
1079
$ whitelist = $ this ->wire ()->config ->pageNameWhitelist ;
1080
+ $ value = str_replace ($ this ->pageNameBlacklist , '- ' , $ value );
1062
1081
$ v = '' ;
1063
1082
for ($ n = 0 ; $ n < $ tt ->strlen ($ value ); $ n ++) {
1064
1083
$ c = $ tt ->substr ($ value , $ n , 1 );
@@ -1083,7 +1102,15 @@ protected function punyEncodeName($value, $version = 0) {
1083
1102
$ value = str_replace ('__ ' , '_ ' , $ value );
1084
1103
}
1085
1104
1086
- if ($ version < 2 && strlen ($ value ) >= 50 ) {
1105
+ if ($ version > 1 ) {
1106
+ // version 2, 3
1107
+ while (strpos ($ value , '-- ' ) !== false ) {
1108
+ $ value = str_replace ('-- ' , '- ' , $ value );
1109
+ }
1110
+ $ value = trim ($ value , '- ' );
1111
+
1112
+ } else if (strlen ($ value ) >= 50 ) {
1113
+ // version 1
1087
1114
$ _value = $ value ;
1088
1115
$ parts = array ();
1089
1116
while (strlen ($ _value )) {
@@ -1145,7 +1172,7 @@ protected function punyEncodeName($value, $version = 0) {
1145
1172
* @since 3.0.244
1146
1173
*
1147
1174
*/
1148
- protected function getPunycodeVersion ($ version ) {
1175
+ protected function getPunycodeVersion ($ version = 0 ) {
1149
1176
$ config = $ this ->wire ()->config ;
1150
1177
if (!$ version && strpos ($ config ->pageNameWhitelist , 'v ' ) === 0 ) {
1151
1178
// i.e. "v3" specified at beginning of pageNameWhitelist
0 commit comments