Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 4967e13

Browse filesBrowse files
[String] Made AbstractString::width() follow POSIX.1-2001
Co-authored-by: Nicolas Grekas <nicolas.grekas@gmail.com>
1 parent 07818f2 commit 4967e13
Copy full SHA for 4967e13

12 files changed

+2698
-38
lines changed
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
/Resources/bin/update-data.php export-ignore
2+
/Resources/WcswidthDataGenerator.php export-ignore
13
/Tests export-ignore
24
/phpunit.xml.dist export-ignore
35
/.gitignore export-ignore

‎src/Symfony/Component/String/AbstractString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/AbstractString.php
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,9 @@ public function truncate(int $length, string $ellipsis = ''): self
646646
*/
647647
abstract public function upper(): self;
648648

649+
/**
650+
* Returns the printable length on a terminal.
651+
*/
649652
abstract public function width(bool $ignoreAnsiDecoration = true): int;
650653

651654
/**

‎src/Symfony/Component/String/AbstractUnicodeString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/AbstractUnicodeString.php
+78-12Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
352352
return $str;
353353
}
354354

355-
/**
356-
* {@inheritdoc}
357-
*/
358355
public function reverse(): parent
359356
{
360357
$str = clone $this;
@@ -444,22 +441,20 @@ public function width(bool $ignoreAnsiDecoration = true): int
444441
$s = str_replace(["\r\n", "\r"], "\n", $s);
445442
}
446443

444+
if (!$ignoreAnsiDecoration) {
445+
$s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s);
446+
}
447+
447448
foreach (explode("\n", $s) as $s) {
448449
if ($ignoreAnsiDecoration) {
449-
$s = preg_replace('/\x1B(?:
450+
$s = preg_replace('/(?:\x1B(?:
450451
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
451452
| [P\]X^_] .*? \x1B\\\\
452453
| [\x41-\x7E]
453-
)/x', '', $s);
454+
)|[\p{Cc}\x7F]++)/xu', '', $s);
454455
}
455456

456-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
457-
$s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
458-
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
459-
460-
if ($width < $w += mb_strlen($s, 'UTF-8') + ($wide << 1)) {
461-
$width = $w;
462-
}
457+
$width += $this->wcswidth($s);
463458
}
464459

465460
return $width;
@@ -503,4 +498,75 @@ private function pad(int $len, self $pad, int $type): parent
503498
throw new InvalidArgumentException('Invalid padding type.');
504499
}
505500
}
501+
502+
/**
503+
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
504+
*
505+
* Contrary to the source implementation, non printable characters (C0 and C1 control codes + DEL) does not return -1. They must be dropped before.
506+
*/
507+
private function wcswidth(string $string): int
508+
{
509+
$width = 0;
510+
511+
foreach (preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY) as $c) {
512+
$codePoint = mb_ord($c, 'UTF-8');
513+
514+
if (0 === $codePoint // NULL
515+
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER
516+
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
517+
|| 0x2028 === $codePoint // LINE SEPARATOR
518+
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR
519+
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
520+
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
521+
) {
522+
continue;
523+
}
524+
525+
static $tableZero;
526+
if (null === $tableZero) {
527+
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
528+
}
529+
530+
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
531+
$lbound = 0;
532+
while ($ubound >= $lbound) {
533+
$mid = floor(($lbound + $ubound) / 2);
534+
535+
if ($codePoint > $tableZero[$mid][1]) {
536+
$lbound = $mid + 1;
537+
} elseif ($codePoint < $tableZero[$mid][0]) {
538+
$ubound = $mid - 1;
539+
} else {
540+
continue 2;
541+
}
542+
}
543+
}
544+
545+
static $tableWide;
546+
if (null === $tableWide) {
547+
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
548+
}
549+
550+
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
551+
$lbound = 0;
552+
while ($ubound >= $lbound) {
553+
$mid = floor(($lbound + $ubound) / 2);
554+
555+
if ($codePoint > $tableWide[$mid][1]) {
556+
$lbound = $mid + 1;
557+
} elseif ($codePoint < $tableWide[$mid][0]) {
558+
$ubound = $mid - 1;
559+
} else {
560+
$width += 2;
561+
562+
continue 2;
563+
}
564+
}
565+
}
566+
567+
++$width;
568+
}
569+
570+
return $width;
571+
}
506572
}

‎src/Symfony/Component/String/ByteString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/ByteString.php
+2-26Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
303303
return $str;
304304
}
305305

306-
/**
307-
* {@inheritdoc}
308-
*/
309306
public function reverse(): parent
310307
{
311308
$str = clone $this;
@@ -460,29 +457,8 @@ public function upper(): parent
460457

461458
public function width(bool $ignoreAnsiDecoration = true): int
462459
{
463-
$width = 0;
464-
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
460+
$string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
465461

466-
if (false !== strpos($s, "\r")) {
467-
$s = str_replace(["\r\n", "\r"], "\n", $s);
468-
}
469-
470-
foreach (explode("\n", $s) as $s) {
471-
if ($ignoreAnsiDecoration) {
472-
$s = preg_replace('/\x1B(?:
473-
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
474-
| [P\]X^_] .*? \x1B\\\\
475-
| [\x41-\x7E]
476-
)/x', '', $s);
477-
}
478-
479-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
480-
481-
if ($width < $w += \strlen($s)) {
482-
$width = $w;
483-
}
484-
}
485-
486-
return $width;
462+
return (new CodePointString($string))->width($ignoreAnsiDecoration);
487463
}
488464
}

‎src/Symfony/Component/String/CHANGELOG.md

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/CHANGELOG.md
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CHANGELOG
55
-----
66

77
* Added the `AbstractString::reverse()` method.
8+
* Made `AbstractString::width()` follow POSIX.1-2001.
89

910
5.0.0
1011
-----
+113Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\String\Resources;
13+
14+
use Symfony\Component\HttpClient\HttpClient;
15+
use Symfony\Component\String\Exception\RuntimeException;
16+
use Symfony\Component\VarExporter\VarExporter;
17+
18+
/**
19+
* @internal
20+
*/
21+
final class WcswidthDataGenerator
22+
{
23+
private $outDir;
24+
25+
private $client;
26+
27+
public function __construct(string $outDir)
28+
{
29+
$this->outDir = $outDir;
30+
31+
$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/');
32+
}
33+
34+
public function generate(): void
35+
{
36+
$this->writeWideWidthData();
37+
38+
$this->writeZeroWidthData();
39+
}
40+
41+
private function writeWideWidthData(): void
42+
{
43+
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) {
44+
throw new RuntimeException('The Unicode version could not be determined.');
45+
}
46+
47+
$version = $matches[1];
48+
49+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) {
50+
throw new RuntimeException('The wide width pattern did not match anything.');
51+
}
52+
53+
$this->write('wcswidth_table_wide.php', $version, $matches);
54+
}
55+
56+
private function writeZeroWidthData(): void
57+
{
58+
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) {
59+
throw new RuntimeException('The Unicode version could not be determined.');
60+
}
61+
62+
$version = $matches[1];
63+
64+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) {
65+
throw new RuntimeException('The zero width pattern did not match anything.');
66+
}
67+
68+
$this->write('wcswidth_table_zero.php', $version, $matches);
69+
}
70+
71+
private function write(string $fileName, string $version, array $rawData): void
72+
{
73+
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n";
74+
75+
if (!file_put_contents($this->outDir.'/'.$fileName, $content)) {
76+
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName));
77+
}
78+
}
79+
80+
private function getHeader(string $version): string
81+
{
82+
$date = (new \DateTimeImmutable())->format('c');
83+
84+
return <<<EOT
85+
<?php
86+
87+
/*
88+
* This file has been auto-generated by the Symfony String Component for internal use.
89+
*
90+
* Unicode version: $version
91+
* Date: $date
92+
*/
93+
94+
95+
EOT;
96+
}
97+
98+
private function format(array $rawData): array
99+
{
100+
$data = array_map(static function (array $row): array {
101+
$start = $row[1];
102+
$end = $row[2] ?? $start;
103+
104+
return [hexdec($start), hexdec($end)];
105+
}, $rawData);
106+
107+
usort($data, static function (array $a, array $b): int {
108+
return $a[0] - $b[0];
109+
});
110+
111+
return $data;
112+
}
113+
}
+55Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Component\String\Resources\WcswidthDataGenerator;
13+
14+
error_reporting(E_ALL);
15+
16+
set_error_handler(static function (int $type, string $msg, string $file, int $line): void {
17+
throw new \ErrorException($msg, 0, $type, $file, $line);
18+
});
19+
20+
set_exception_handler(static function (\Throwable $exception): void {
21+
echo "\n";
22+
23+
$cause = $exception;
24+
$root = true;
25+
26+
while (null !== $cause) {
27+
if (!$root) {
28+
echo "Caused by\n";
29+
}
30+
31+
echo get_class($cause).': '.$cause->getMessage()."\n";
32+
echo "\n";
33+
echo $cause->getFile().':'.$cause->getLine()."\n";
34+
echo $cause->getTraceAsString()."\n";
35+
36+
$cause = $cause->getPrevious();
37+
$root = false;
38+
}
39+
});
40+
41+
$autoload = __DIR__.'/../../vendor/autoload.php';
42+
43+
if (!file_exists($autoload)) {
44+
echo wordwrap('You should run "composer install" in the component before running this script.', 75)." Aborting.\n";
45+
46+
exit(1);
47+
}
48+
49+
require_once $autoload;
50+
51+
echo "Generating wcswidth tables data...\n";
52+
53+
(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate();
54+
55+
echo "Done.\n";

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.