Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 75fc3fa

Browse filesBrowse files
committed
feature #35156 [String] Made AbstractString::width() follow POSIX.1-2001 (fancyweb)
This PR was merged into the 5.1-dev branch. Discussion ---------- [String] Made AbstractString::width() follow POSIX.1-2001 | Q | A | ------------- | --- | Branch? | master | Bug fix? | no | New feature? | yes | Deprecations? | no | Tickets | - | License | MIT | Doc PR | - This PR ports the wcswidth() function (see http://man7.org/linux/man-pages/man3/wcwidth.3.html and https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c) into the String component. This new method will be useful in the Console component to determine how many columns a character takes. I kind of copied the Intl data import strategy. Commits ------- 347d825 [String] Made AbstractString::width() follow POSIX.1-2001
2 parents e493752 + 347d825 commit 75fc3fa
Copy full SHA for 75fc3fa

12 files changed

+2704
-38
lines changed
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
/Resources/bin/update-data.php export-ignore
2+
/Resources/WcswidthDataGenerator.php export-ignore
13
/Tests export-ignore
24
/phpunit.xml.dist export-ignore
35
/.gitignore export-ignore

‎src/Symfony/Component/String/AbstractString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/AbstractString.php
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,9 @@ public function truncate(int $length, string $ellipsis = ''): self
646646
*/
647647
abstract public function upper(): self;
648648

649+
/**
650+
* Returns the printable length on a terminal.
651+
*/
649652
abstract public function width(bool $ignoreAnsiDecoration = true): int;
650653

651654
/**

‎src/Symfony/Component/String/AbstractUnicodeString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/AbstractUnicodeString.php
+84-12Lines changed: 84 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
352352
return $str;
353353
}
354354

355-
/**
356-
* {@inheritdoc}
357-
*/
358355
public function reverse(): parent
359356
{
360357
$str = clone $this;
@@ -444,22 +441,21 @@ public function width(bool $ignoreAnsiDecoration = true): int
444441
$s = str_replace(["\r\n", "\r"], "\n", $s);
445442
}
446443

444+
if (!$ignoreAnsiDecoration) {
445+
$s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s);
446+
}
447+
447448
foreach (explode("\n", $s) as $s) {
448449
if ($ignoreAnsiDecoration) {
449-
$s = preg_replace('/\x1B(?:
450+
$s = preg_replace('/(?:\x1B(?:
450451
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
451452
| [P\]X^_] .*? \x1B\\\\
452453
| [\x41-\x7E]
453-
)/x', '', $s);
454+
)|[\p{Cc}\x7F]++)/xu', '', $s);
454455
}
455456

456-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
457-
$s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
458-
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
459-
460-
if ($width < $w += mb_strlen($s, 'UTF-8') + ($wide << 1)) {
461-
$width = $w;
462-
}
457+
// Non printable characters have been dropped, so wcswidth cannot logically return -1.
458+
$width += $this->wcswidth($s);
463459
}
464460

465461
return $width;
@@ -503,4 +499,80 @@ private function pad(int $len, self $pad, int $type): parent
503499
throw new InvalidArgumentException('Invalid padding type.');
504500
}
505501
}
502+
503+
/**
504+
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
505+
*/
506+
private function wcswidth(string $string): int
507+
{
508+
$width = 0;
509+
510+
foreach (preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY) as $c) {
511+
$codePoint = mb_ord($c, 'UTF-8');
512+
513+
if (0 === $codePoint // NULL
514+
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER
515+
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
516+
|| 0x2028 === $codePoint // LINE SEPARATOR
517+
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR
518+
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
519+
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
520+
) {
521+
continue;
522+
}
523+
524+
// Non printable characters
525+
if (32 > $codePoint // C0 control characters
526+
|| (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
527+
) {
528+
return -1;
529+
}
530+
531+
static $tableZero;
532+
if (null === $tableZero) {
533+
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
534+
}
535+
536+
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
537+
$lbound = 0;
538+
while ($ubound >= $lbound) {
539+
$mid = floor(($lbound + $ubound) / 2);
540+
541+
if ($codePoint > $tableZero[$mid][1]) {
542+
$lbound = $mid + 1;
543+
} elseif ($codePoint < $tableZero[$mid][0]) {
544+
$ubound = $mid - 1;
545+
} else {
546+
continue 2;
547+
}
548+
}
549+
}
550+
551+
static $tableWide;
552+
if (null === $tableWide) {
553+
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
554+
}
555+
556+
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
557+
$lbound = 0;
558+
while ($ubound >= $lbound) {
559+
$mid = floor(($lbound + $ubound) / 2);
560+
561+
if ($codePoint > $tableWide[$mid][1]) {
562+
$lbound = $mid + 1;
563+
} elseif ($codePoint < $tableWide[$mid][0]) {
564+
$ubound = $mid - 1;
565+
} else {
566+
$width += 2;
567+
568+
continue 2;
569+
}
570+
}
571+
}
572+
573+
++$width;
574+
}
575+
576+
return $width;
577+
}
506578
}

‎src/Symfony/Component/String/ByteString.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/ByteString.php
+2-26Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
303303
return $str;
304304
}
305305

306-
/**
307-
* {@inheritdoc}
308-
*/
309306
public function reverse(): parent
310307
{
311308
$str = clone $this;
@@ -460,29 +457,8 @@ public function upper(): parent
460457

461458
public function width(bool $ignoreAnsiDecoration = true): int
462459
{
463-
$width = 0;
464-
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
460+
$string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
465461

466-
if (false !== strpos($s, "\r")) {
467-
$s = str_replace(["\r\n", "\r"], "\n", $s);
468-
}
469-
470-
foreach (explode("\n", $s) as $s) {
471-
if ($ignoreAnsiDecoration) {
472-
$s = preg_replace('/\x1B(?:
473-
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
474-
| [P\]X^_] .*? \x1B\\\\
475-
| [\x41-\x7E]
476-
)/x', '', $s);
477-
}
478-
479-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
480-
481-
if ($width < $w += \strlen($s)) {
482-
$width = $w;
483-
}
484-
}
485-
486-
return $width;
462+
return (new CodePointString($string))->width($ignoreAnsiDecoration);
487463
}
488464
}

‎src/Symfony/Component/String/CHANGELOG.md

Copy file name to clipboardExpand all lines: src/Symfony/Component/String/CHANGELOG.md
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CHANGELOG
55
-----
66

77
* Added the `AbstractString::reverse()` method.
8+
* Made `AbstractString::width()` follow POSIX.1-2001.
89

910
5.0.0
1011
-----
+113Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\String\Resources;
13+
14+
use Symfony\Component\HttpClient\HttpClient;
15+
use Symfony\Component\String\Exception\RuntimeException;
16+
use Symfony\Component\VarExporter\VarExporter;
17+
18+
/**
19+
* @internal
20+
*/
21+
final class WcswidthDataGenerator
22+
{
23+
private $outDir;
24+
25+
private $client;
26+
27+
public function __construct(string $outDir)
28+
{
29+
$this->outDir = $outDir;
30+
31+
$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/');
32+
}
33+
34+
public function generate(): void
35+
{
36+
$this->writeWideWidthData();
37+
38+
$this->writeZeroWidthData();
39+
}
40+
41+
private function writeWideWidthData(): void
42+
{
43+
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) {
44+
throw new RuntimeException('The Unicode version could not be determined.');
45+
}
46+
47+
$version = $matches[1];
48+
49+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) {
50+
throw new RuntimeException('The wide width pattern did not match anything.');
51+
}
52+
53+
$this->write('wcswidth_table_wide.php', $version, $matches);
54+
}
55+
56+
private function writeZeroWidthData(): void
57+
{
58+
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) {
59+
throw new RuntimeException('The Unicode version could not be determined.');
60+
}
61+
62+
$version = $matches[1];
63+
64+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) {
65+
throw new RuntimeException('The zero width pattern did not match anything.');
66+
}
67+
68+
$this->write('wcswidth_table_zero.php', $version, $matches);
69+
}
70+
71+
private function write(string $fileName, string $version, array $rawData): void
72+
{
73+
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n";
74+
75+
if (!file_put_contents($this->outDir.'/'.$fileName, $content)) {
76+
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName));
77+
}
78+
}
79+
80+
private function getHeader(string $version): string
81+
{
82+
$date = (new \DateTimeImmutable())->format('c');
83+
84+
return <<<EOT
85+
<?php
86+
87+
/*
88+
* This file has been auto-generated by the Symfony String Component for internal use.
89+
*
90+
* Unicode version: $version
91+
* Date: $date
92+
*/
93+
94+
95+
EOT;
96+
}
97+
98+
private function format(array $rawData): array
99+
{
100+
$data = array_map(static function (array $row): array {
101+
$start = $row[1];
102+
$end = $row[2] ?? $start;
103+
104+
return [hexdec($start), hexdec($end)];
105+
}, $rawData);
106+
107+
usort($data, static function (array $a, array $b): int {
108+
return $a[0] - $b[0];
109+
});
110+
111+
return $data;
112+
}
113+
}
+55Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Component\String\Resources\WcswidthDataGenerator;
13+
14+
error_reporting(E_ALL);
15+
16+
set_error_handler(static function (int $type, string $msg, string $file, int $line): void {
17+
throw new \ErrorException($msg, 0, $type, $file, $line);
18+
});
19+
20+
set_exception_handler(static function (\Throwable $exception): void {
21+
echo "\n";
22+
23+
$cause = $exception;
24+
$root = true;
25+
26+
while (null !== $cause) {
27+
if (!$root) {
28+
echo "Caused by\n";
29+
}
30+
31+
echo get_class($cause).': '.$cause->getMessage()."\n";
32+
echo "\n";
33+
echo $cause->getFile().':'.$cause->getLine()."\n";
34+
echo $cause->getTraceAsString()."\n";
35+
36+
$cause = $cause->getPrevious();
37+
$root = false;
38+
}
39+
});
40+
41+
$autoload = __DIR__.'/../../vendor/autoload.php';
42+
43+
if (!file_exists($autoload)) {
44+
echo wordwrap('You should run "composer install" in the component before running this script.', 75)." Aborting.\n";
45+
46+
exit(1);
47+
}
48+
49+
require_once $autoload;
50+
51+
echo "Generating wcswidth tables data...\n";
52+
53+
(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate();
54+
55+
echo "Done.\n";

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.