Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit bd8e0b3

Browse filesBrowse files
[String] Made AbstractString::width() follows POSIX.1-2001
Co-authored-by: Nicolas Grekas <nicolas.grekas@gmail.com>
1 parent af4035d commit bd8e0b3
Copy full SHA for bd8e0b3

12 files changed

+2698
-38
lines changed
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
/Resources/bin/update-data.php export-ignore
2+
/Resources/WcswidthDataGenerator.php export-ignore
13
/Tests export-ignore
24
/phpunit.xml.dist export-ignore
35
/.gitignore export-ignore

‎src/Symfony/Component/String/AbstractString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/AbstractString.php
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,9 @@ public function truncate(int $length, string $ellipsis = ''): self
646646
*/
647647
abstract public function upper(): self;
648648

649+
/**
650+
* Returns the printable length on a terminal.
651+
*/
649652
abstract public function width(bool $ignoreAnsiDecoration = true): int;
650653

651654
/**

‎src/Symfony/Component/String/AbstractUnicodeString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/AbstractUnicodeString.php
+78-12Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
342342
return $str;
343343
}
344344

345-
/**
346-
* {@inheritdoc}
347-
*/
348345
public function reverse(): parent
349346
{
350347
$str = clone $this;
@@ -434,22 +431,20 @@ public function width(bool $ignoreAnsiDecoration = true): int
434431
$s = str_replace(["\r\n", "\r"], "\n", $s);
435432
}
436433

434+
if (!$ignoreAnsiDecoration) {
435+
$s = preg_replace('/\p{Cc}|\x7F/u', '', $s);
436+
}
437+
437438
foreach (explode("\n", $s) as $s) {
438439
if ($ignoreAnsiDecoration) {
439-
$s = preg_replace('/\x1B(?:
440+
$s = preg_replace(['/\x1B(?:
440441
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
441442
| [P\]X^_] .*? \x1B\\\\
442443
| [\x41-\x7E]
443-
)/x', '', $s);
444+
)/x', '/\p{Cc}|\x7F/u'], '', $s);
444445
}
445446

446-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
447-
$s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
448-
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
449-
450-
if ($width < $w += mb_strlen($s, 'UTF-8') + ($wide << 1)) {
451-
$width = $w;
452-
}
447+
$width += $this->wcswidth($s);
453448
}
454449

455450
return $width;
@@ -493,4 +488,75 @@ private function pad(int $len, self $pad, int $type): parent
493488
throw new InvalidArgumentException('Invalid padding type.');
494489
}
495490
}
491+
492+
/**
493+
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
494+
*
495+
* Contrary to the source implementation, non printable characters (C0 and C1 control codes + DEL) does not return -1. They must be dropped before.
496+
*/
497+
private function wcswidth(string $string): int
498+
{
499+
$width = 0;
500+
501+
foreach (preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY) as $c) {
502+
$codePoint = mb_ord($c, 'UTF-8');
503+
504+
if (0 === $codePoint // NULL
505+
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER
506+
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
507+
|| 0x2028 === $codePoint // LINE SEPARATOR
508+
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR
509+
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
510+
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
511+
) {
512+
continue;
513+
}
514+
515+
static $tableZero;
516+
if (null === $tableZero) {
517+
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
518+
}
519+
520+
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
521+
$lbound = 0;
522+
while ($ubound >= $lbound) {
523+
$mid = floor(($lbound + $ubound) / 2);
524+
525+
if ($codePoint > $tableZero[$mid][1]) {
526+
$lbound = $mid + 1;
527+
} elseif ($codePoint < $tableZero[$mid][0]) {
528+
$ubound = $mid - 1;
529+
} else {
530+
continue 2;
531+
}
532+
}
533+
}
534+
535+
static $tableWide;
536+
if (null === $tableWide) {
537+
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
538+
}
539+
540+
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
541+
$lbound = 0;
542+
while ($ubound >= $lbound) {
543+
$mid = floor(($lbound + $ubound) / 2);
544+
545+
if ($codePoint > $tableWide[$mid][1]) {
546+
$lbound = $mid + 1;
547+
} elseif ($codePoint < $tableWide[$mid][0]) {
548+
$ubound = $mid - 1;
549+
} else {
550+
$width += 2;
551+
552+
continue 2;
553+
}
554+
}
555+
}
556+
557+
++$width;
558+
}
559+
560+
return $width;
561+
}
496562
}

‎src/Symfony/Component/String/ByteString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/ByteString.php
+2-26Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
303303
return $str;
304304
}
305305

306-
/**
307-
* {@inheritdoc}
308-
*/
309306
public function reverse(): parent
310307
{
311308
$str = clone $this;
@@ -460,29 +457,8 @@ public function upper(): parent
460457

461458
public function width(bool $ignoreAnsiDecoration = true): int
462459
{
463-
$width = 0;
464-
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
460+
$string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
465461

466-
if (false !== strpos($s, "\r")) {
467-
$s = str_replace(["\r\n", "\r"], "\n", $s);
468-
}
469-
470-
foreach (explode("\n", $s) as $s) {
471-
if ($ignoreAnsiDecoration) {
472-
$s = preg_replace('/\x1B(?:
473-
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
474-
| [P\]X^_] .*? \x1B\\\\
475-
| [\x41-\x7E]
476-
)/x', '', $s);
477-
}
478-
479-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
480-
481-
if ($width < $w += \strlen($s)) {
482-
$width = $w;
483-
}
484-
}
485-
486-
return $width;
462+
return (new CodePointString($string))->width($ignoreAnsiDecoration);
487463
}
488464
}

‎src/Symfony/Component/String/CHANGELOG.md

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/CHANGELOG.md
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CHANGELOG
55
-----
66

77
* Added the `AbstractString::reverse()` method.
8+
* Made `AbstractString::width()` follows POSIX.1-2001.
89

910
5.0.0
1011
-----
+113Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\String\Resources;
13+
14+
use Symfony\Component\HttpClient\HttpClient;
15+
use Symfony\Component\String\Exception\RuntimeException;
16+
use Symfony\Component\VarExporter\VarExporter;
17+
18+
/**
19+
* @internal
20+
*/
21+
final class WcswidthDataGenerator
22+
{
23+
private $outDir;
24+
25+
private $client;
26+
27+
public function __construct(string $outDir)
28+
{
29+
$this->outDir = $outDir;
30+
31+
$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/');
32+
}
33+
34+
public function generate(): void
35+
{
36+
$this->writeWideWidthData();
37+
38+
$this->writeZeroWidthData();
39+
}
40+
41+
private function writeWideWidthData(): void
42+
{
43+
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) {
44+
throw new RuntimeException('The Unicode version could not be determined.');
45+
}
46+
47+
$version = $matches[1];
48+
49+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) {
50+
throw new RuntimeException('The wide width pattern did not match anything.');
51+
}
52+
53+
$this->write('wcswidth_table_wide.php', $version, $matches);
54+
}
55+
56+
private function writeZeroWidthData(): void
57+
{
58+
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) {
59+
throw new RuntimeException('The Unicode version could not be determined.');
60+
}
61+
62+
$version = $matches[1];
63+
64+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) {
65+
throw new RuntimeException('The zero width pattern did not match anything.');
66+
}
67+
68+
$this->write('wcswidth_table_zero.php', $version, $matches);
69+
}
70+
71+
private function write(string $fileName, string $version, array $rawData): void
72+
{
73+
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n";
74+
75+
if (!file_put_contents($this->outDir.'/'.$fileName, $content)) {
76+
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName));
77+
}
78+
}
79+
80+
private function getHeader(string $version): string
81+
{
82+
$date = (new \DateTimeImmutable())->format('c');
83+
84+
return <<<EOT
85+
<?php
86+
87+
/*
88+
* This file has been auto-generated by the Symfony String Component for internal use.
89+
*
90+
* Unicode version: $version
91+
* Date: $date
92+
*/
93+
94+
95+
EOT;
96+
}
97+
98+
private function format(array $rawData): array
99+
{
100+
$data = array_map(static function (array $row): array {
101+
$start = $row[1];
102+
$end = $row[2] ?? $start;
103+
104+
return [hexdec($start), hexdec($end)];
105+
}, $rawData);
106+
107+
usort($data, static function (array $a, array $b) {
108+
return $a[0] - $b[0];
109+
});
110+
111+
return $data;
112+
}
113+
}
+55Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Component\String\Resources\WcswidthDataGenerator;
13+
14+
error_reporting(E_ALL);
15+
16+
set_error_handler(static function (int $type, string $msg, string $file, int $line): void {
17+
throw new \ErrorException($msg, 0, $type, $file, $line);
18+
});
19+
20+
set_exception_handler(static function (\Throwable $exception): void {
21+
echo "\n";
22+
23+
$cause = $exception;
24+
$root = true;
25+
26+
while (null !== $cause) {
27+
if (!$root) {
28+
echo "Caused by\n";
29+
}
30+
31+
echo get_class($cause).': '.$cause->getMessage()."\n";
32+
echo "\n";
33+
echo $cause->getFile().':'.$cause->getLine()."\n";
34+
echo $cause->getTraceAsString()."\n";
35+
36+
$cause = $cause->getPrevious();
37+
$root = false;
38+
}
39+
});
40+
41+
$autoload = __DIR__.'/../../vendor/autoload.php';
42+
43+
if (!file_exists($autoload)) {
44+
echo wordwrap('You should run "composer install" in the component before running this script.', 75)." Aborting.\n";
45+
46+
exit(1);
47+
}
48+
49+
require_once $autoload;
50+
51+
echo "Generating wcswidth tables data...\n";
52+
53+
(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate();
54+
55+
echo "Done.\n";

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.