Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 4604c09

Browse filesBrowse files
1 parent be3d17b commit 4604c09
Copy full SHA for 4604c09

File tree

1 file changed

+59
-32
lines changed
Filter options

1 file changed

+59
-32
lines changed

‎wire/core/Sanitizer.php

Copy file name to clipboardExpand all lines: wire/core/Sanitizer.php
+59-32Lines changed: 59 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,17 @@ class Sanitizer extends Wire {
238238
'‍', // zero width join
239239
);
240240

241+
/**
242+
* Characters blacklisted from UTF-8 page names
243+
*
244+
* @var string[]
245+
*
246+
*/
247+
protected $pageNameBlacklist = array(
248+
'/', '\\', '%', '"', "'", '<', '>', '?', '!', '#', '@', ':', ';', ',',
249+
'+', '=', '*', '^', '$', '(', ')', '[', ']', '{', '}', '|', '&',
250+
);
251+
241252
/**
242253
* Sanitizer method names (A-Z) and type(s) they return
243254
*
@@ -903,6 +914,7 @@ public function pageNameUTF8($value, $maxLength = 128) {
903914
if(!strlen($value)) return '';
904915

905916
$config = $this->wire()->config;
917+
$keepGoing = true;
906918

907919
// if UTF8 module is not enabled then delegate this call to regular pageName sanitizer
908920
if($config->pageNameCharset != 'UTF8') return $this->pageName($value, false, $maxLength);
@@ -918,7 +930,8 @@ public function pageNameUTF8($value, $maxLength = 128) {
918930
// whitelist of allowed characters and blacklist of disallowed characters
919931
$whitelist = $config->pageNameWhitelist;
920932
if(!strlen($whitelist)) $whitelist = false;
921-
$blacklist = '/\\%"\'<>?#@:;,+=*^$()[]{}|&';
933+
934+
$value = str_replace($this->pageNameBlacklist, '-', $value);
922935

923936
// we let regular pageName handle chars like these, if they appear without other UTF-8
924937
$extras = array('.', '-', '_', ',', ';', ':', '(', ')', '!', '?', '&', '%', '$', '#', '@');
@@ -933,43 +946,48 @@ public function pageNameUTF8($value, $maxLength = 128) {
933946
if($this->caches[$k] || $tt->strtolower($value) === $value) {
934947
// whitelist supports only lowercase OR value is all lowercase
935948
// let regular pageName sanitizer handle this
936-
return $this->pageName($value, false, $maxLength);
949+
$value = $this->pageName($value, false, $maxLength);
950+
// maintain old behavior for existing installations
951+
if($this->getPunycodeVersion() < 2) return $value;
952+
$keepGoing = false;
937953
}
938954
}
939955

940-
// validate that all characters are in our whitelist
941-
$replacements = array();
956+
if($keepGoing) {
957+
// validate that all characters are in our whitelist
958+
$replacements = array();
942959

943-
for($n = 0; $n < $tt->strlen($value); $n++) {
944-
$c = $tt->substr($value, $n, 1);
945-
$inBlacklist = $tt->strpos($blacklist, $c) !== false || strpos($blacklist, $c) !== false;
946-
$inWhitelist = !$inBlacklist && $whitelist !== false && $tt->strpos($whitelist, $c) !== false;
947-
if($inWhitelist && !$inBlacklist) {
948-
// in whitelist
949-
} else if($inBlacklist || !strlen(trim($c)) || ctype_cntrl($c)) {
950-
// character does not resolve to something visible or is in blacklist
951-
$replacements[] = $c;
952-
} else if($whitelist === false) {
953-
// whitelist disabled: allow everything that is not blacklisted
954-
} else {
955-
// character that is not in whitelist, double check case variants
956-
$cLower = $tt->strtolower($c);
957-
$cUpper = $tt->strtoupper($c);
958-
if($cLower !== $c && $tt->strpos($whitelist, $cLower) !== false) {
959-
// allow character and convert to lowercase variant
960-
$value = $tt->substr($value, 0, $n) . $cLower . $tt->substr($value, $n+1);
961-
} else if($cUpper !== $c && $tt->strpos($whitelist, $cUpper) !== false) {
962-
// allow character and convert to uppercase varient
963-
$value = $tt->substr($value, 0, $n) . $cUpper . $tt->substr($value, $n+1);
964-
} else {
965-
// queue character to be replaced
960+
for($n = 0; $n < $tt->strlen($value); $n++) {
961+
$c = $tt->substr($value, $n, 1);
962+
if($c === '-') continue;
963+
$inWhitelist = $whitelist !== false && $tt->strpos($whitelist, $c) !== false;
964+
if($inWhitelist) {
965+
// in whitelist
966+
} else if(!strlen(trim($c)) || ctype_cntrl($c)) {
967+
// character does not resolve to something visible
966968
$replacements[] = $c;
969+
} else if($whitelist === false) {
970+
// whitelist disabled: allow everything that is not blacklisted
971+
} else {
972+
// character that is not in whitelist, double check case variants
973+
$cLower = $tt->strtolower($c);
974+
$cUpper = $tt->strtoupper($c);
975+
if($cLower !== $c && $tt->strpos($whitelist, $cLower) !== false) {
976+
// allow character and convert to lowercase variant
977+
$value = $tt->substr($value, 0, $n) . $cLower . $tt->substr($value, $n + 1);
978+
} else if($cUpper !== $c && $tt->strpos($whitelist, $cUpper) !== false) {
979+
// allow character and convert to uppercase variant
980+
$value = $tt->substr($value, 0, $n) . $cUpper . $tt->substr($value, $n + 1);
981+
} else {
982+
// queue character to be replaced
983+
$replacements[] = $c;
984+
}
967985
}
968986
}
969-
}
970987

971-
// replace disallowed characters with "-"
972-
if(count($replacements)) $value = str_replace($replacements, '-', $value);
988+
// replace disallowed characters with "-"
989+
if(count($replacements)) $value = str_replace($replacements, '-', $value);
990+
}
973991

974992
// replace doubled word separators
975993
foreach($separators as $c) {
@@ -1059,6 +1077,7 @@ protected function punyEncodeName($value, $version = 0) {
10591077

10601078
if($version > 1) {
10611079
$whitelist = $this->wire()->config->pageNameWhitelist;
1080+
$value = str_replace($this->pageNameBlacklist, '-', $value);
10621081
$v = '';
10631082
for($n = 0; $n < $tt->strlen($value); $n++) {
10641083
$c = $tt->substr($value, $n, 1);
@@ -1083,7 +1102,15 @@ protected function punyEncodeName($value, $version = 0) {
10831102
$value = str_replace('__', '_', $value);
10841103
}
10851104

1086-
if($version < 2 && strlen($value) >= 50) {
1105+
if($version > 1) {
1106+
// version 2, 3
1107+
while(strpos($value, '--') !== false) {
1108+
$value = str_replace('--', '-', $value);
1109+
}
1110+
$value = trim($value, '-');
1111+
1112+
} else if(strlen($value) >= 50) {
1113+
// version 1
10871114
$_value = $value;
10881115
$parts = array();
10891116
while(strlen($_value)) {
@@ -1145,7 +1172,7 @@ protected function punyEncodeName($value, $version = 0) {
11451172
* @since 3.0.244
11461173
*
11471174
*/
1148-
protected function getPunycodeVersion($version) {
1175+
protected function getPunycodeVersion($version = 0) {
11491176
$config = $this->wire()->config;
11501177
if(!$version && strpos($config->pageNameWhitelist, 'v') === 0) {
11511178
// i.e. "v3" specified at beginning of pageNameWhitelist

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.