Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 97ea933

Browse filesBrowse files
lyrixxnicolas-grekas
authored andcommitted
[Intl] Add EmojiTransliterator to translate emoji to many locales
1 parent 46f7f27 commit 97ea933
Copy full SHA for 97ea933

File tree

Expand file treeCollapse file tree

12 files changed

+354
-3
lines changed
Filter options
Expand file treeCollapse file tree

12 files changed

+354
-3
lines changed

‎.github/workflows/package-tests.yml

Copy file name to clipboardExpand all lines: .github/workflows/package-tests.yml
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818

1919
- name: Find packages
2020
id: find-packages
21-
run: echo "::set-output name=packages::$(php .github/get-modified-packages.php $(find src/Symfony -mindepth 2 -type f -name composer.json -printf '%h\n' | jq -R -s -c 'split("\n")[:-1]') $(git diff --name-only origin/${{ github.base_ref }} HEAD | grep src/ | jq -R -s -c 'split("\n")[:-1]'))"
21+
run: echo "::set-output name=packages::$(php .github/get-modified-packages.php $(find src/Symfony -mindepth 2 -type f -name composer.json -printf '%h\n' | grep -v src/Symfony/Component/Intl/Resources/emoji |jq -R -s -c 'split("\n")[:-1]') $(git diff --name-only origin/${{ github.base_ref }} HEAD | grep src/ | jq -R -s -c 'split("\n")[:-1]'))"
2222

2323
- name: Verify meta files are correct
2424
run: |

‎.github/workflows/unit-tests.yml

Copy file name to clipboardExpand all lines: .github/workflows/unit-tests.yml
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ jobs:
9595
echo SYMFONY_DEPRECATIONS_HELPER=weak >> $GITHUB_ENV
9696
cp composer.json composer.json.orig
9797
echo -e '{\n"require":{'"$(grep phpunit-bridge composer.json)"'"php":"*"},"minimum-stability":"dev"}' > composer.json
98-
php .github/build-packages.php HEAD^ $SYMFONY_VERSION $(find src/Symfony -mindepth 2 -type f -name composer.json -printf '%h\n')
98+
php .github/build-packages.php HEAD^ $SYMFONY_VERSION $(find src/Symfony -mindepth 2 -type f -name composer.json -printf '%h\n' | grep -v src/Symfony/Component/Intl/Resources/emoji)
9999
mv composer.json composer.json.phpunit
100100
mv composer.json.orig composer.json
101101
fi

‎psalm.xml

Copy file name to clipboardExpand all lines: psalm.xml
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
<directory name="src/Symfony/*/*/Tests" />
1515
<directory name="src/Symfony/*/*/*/Tests" />
1616
<directory name="src/Symfony/*/*/*/*/Tests" />
17+
<directory name="src/Symfony/Component/Intl/Resources/emoji/" />
1718
<directory name="vendor" />
1819
</ignoreFiles>
1920
</projectFiles>

‎src/Symfony/Component/Intl/.gitattributes

Copy file name to clipboardExpand all lines: src/Symfony/Component/Intl/.gitattributes
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
/phpunit.xml.dist export-ignore
33
/.gitattributes export-ignore
44
/.gitignore export-ignore
5+
/Resources/emoji export-ignore

‎src/Symfony/Component/Intl/CHANGELOG.md

Copy file name to clipboardExpand all lines: src/Symfony/Component/Intl/CHANGELOG.md
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
CHANGELOG
22
=========
33

4+
6.2
5+
---
6+
7+
* Add `EmojiTransliterator` to translate emoji to many locales
8+
49
6.0
510
---
611

+11Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
.PHONY: help update build
2+
.DEFAULT_GOAL := help
3+
4+
update: ## Update sources
5+
@composer update
6+
7+
build: ## Build rules
8+
@./build.php
9+
10+
help:
11+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-7s\033[0m %s\n", $$1, $$2}'
+19Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Emoji Transliterator Builder
2+
3+
This folder contains the tool to build all transliterator rules.
4+
5+
## Requirements
6+
7+
* composer
8+
* PHP
9+
10+
## Update the rules
11+
12+
To update the rules, you need to update the version of `unicode-org/cldr` in the
13+
`composer.json` file, then run `make update`.
14+
15+
Finally, run the following command:
16+
17+
```bash
18+
make build
19+
```
+135Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/usr/bin/env php
2+
<?php
3+
4+
/*
5+
* This file is part of the Symfony package.
6+
*
7+
* (c) Fabien Potencier <fabien@symfony.com>
8+
*
9+
* For the full copyright and license information, please view the LICENSE
10+
* file that was distributed with this source code.
11+
*/
12+
13+
require __DIR__.'/vendor/autoload.php';
14+
15+
use Symfony\Component\Filesystem\Filesystem;
16+
use Symfony\Component\Finder\Finder;
17+
18+
Builder::cleanTarget();
19+
$emojisCodePoints = Builder::getEmojisCodePoints();
20+
Builder::saveRules(Builder::buildRules($emojisCodePoints));
21+
22+
final class Builder
23+
{
24+
private const TARGET_DIR = __DIR__.'/../data/transliterator/emoji/';
25+
26+
public static function getEmojisCodePoints(): array
27+
{
28+
$lines = file(__DIR__.'/vendor/unicode-org/cldr/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/emoji/emoji-test.txt');
29+
30+
$emojisCodePoints = [];
31+
foreach ($lines as $line) {
32+
$line = trim($line);
33+
if (!$line || str_starts_with($line, '#')) {
34+
continue;
35+
}
36+
37+
// 263A FE0F ; fully-qualified # ☺️ E0.6 smiling face
38+
preg_match('{^(?<codePoints>[\w ]+) +; [\w-]+ +# (?<emoji>.+) E\d+\.\d+ ?(?<name>.+)$}Uu', $line, $matches);
39+
if (!$matches) {
40+
throw new \DomainException("Could not parse line: \"$line\".");
41+
}
42+
43+
$codePoints = strtolower(trim($matches['codePoints']));
44+
$emojisCodePoints[$codePoints] = $matches['emoji'];
45+
// We also add a version without the "Zero Width Joiner"
46+
$codePoints = str_replace('200d ', '', $codePoints);
47+
$emojisCodePoints[$codePoints] = $matches['emoji'];
48+
}
49+
50+
return $emojisCodePoints;
51+
}
52+
53+
public static function buildRules(array $emojisCodePoints): Generator
54+
{
55+
$files = (new Finder())
56+
->files()
57+
->in([
58+
__DIR__.'/vendor/unicode-org/cldr/common/annotationsDerived',
59+
__DIR__.'/vendor/unicode-org/cldr/common/annotations',
60+
])
61+
->name('*.xml')
62+
;
63+
64+
$ignored = [];
65+
$mapsByLocale = [];
66+
67+
foreach ($files as $file) {
68+
$locale = $file->getBasename('.xml');
69+
70+
$document = new DOMDocument();
71+
$document->loadXML(file_get_contents($file));
72+
$xpath = new DOMXPath($document);
73+
$results = $xpath->query('.//annotation[@type="tts"]');
74+
75+
foreach ($results as $result) {
76+
$emoji = $result->getAttribute('cp');
77+
$name = $result->textContent;
78+
$parts = preg_split('//u', $emoji, -1, \PREG_SPLIT_NO_EMPTY);
79+
$emojiCodePoints = implode(' ', array_map('dechex', array_map('mb_ord', $parts)));
80+
if (!array_key_exists($emojiCodePoints, $emojisCodePoints)) {
81+
$ignored[] = [
82+
'locale' => $locale,
83+
'emoji' => $emoji,
84+
'name' => $name,
85+
];
86+
continue;
87+
}
88+
self::testEmoji($emoji, $locale);
89+
$codePointsCount = mb_strlen($emoji);
90+
$mapsByLocale[$locale][$codePointsCount][$emoji] = $name;
91+
}
92+
}
93+
94+
foreach ($mapsByLocale as $locale => $maps) {
95+
yield $locale => self::createRules($maps);
96+
}
97+
}
98+
99+
public static function cleanTarget(): void
100+
{
101+
$fs = new Filesystem();
102+
$fs->remove(self::TARGET_DIR);
103+
$fs->mkdir(self::TARGET_DIR);
104+
}
105+
106+
public static function saveRules(iterable $rulesByLocale): void
107+
{
108+
foreach ($rulesByLocale as $locale => $rules) {
109+
file_put_contents(self::TARGET_DIR."/$locale.txt", $rules);
110+
}
111+
}
112+
113+
private static function testEmoji(string $emoji, string $locale): void
114+
{
115+
if (!Transliterator::createFromRules("\\$emoji > test ;")) {
116+
throw new \RuntimeException(sprintf('Could not create transliterator for "%s" in "%s" locale. Error: "%s".', $emoji, $locale, intl_get_error_message()));
117+
}
118+
}
119+
120+
private static function createRules(array $maps): string
121+
{
122+
// We must sort the maps by the number of code points, because the order really matters:
123+
// 🫶🏼 must be before 🫶
124+
krsort($maps);
125+
$maps = array_merge(...$maps);
126+
127+
$rules = '';
128+
foreach ($maps as $emoji => $name) {
129+
$name = preg_replace('{([^[:alnum:]])}u', '\\\\$1', $name);
130+
$rules .= "\\$emoji > $name ;\n";
131+
}
132+
133+
return $rules;
134+
}
135+
}
+22Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"repositories": [
3+
{
4+
"type": "package",
5+
"package": {
6+
"name": "unicode-org/cldr",
7+
"version": "2022.06.29",
8+
"source": {
9+
"type": "git",
10+
"url": "https://github.com/unicode-org/cldr",
11+
"reference": "production/2022-06-29-1740z"
12+
}
13+
}
14+
}
15+
],
16+
"require": {
17+
"php": ">=7.2",
18+
"symfony/filesystem": "^6",
19+
"symfony/finder": "^6",
20+
"unicode-org/cldr": "*"
21+
}
22+
}
+116Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\Intl\Tests\Transliterator;
13+
14+
use PHPUnit\Framework\TestCase;
15+
use Symfony\Component\Finder\Finder;
16+
use Symfony\Component\Intl\Transliterator\EmojiTransliterator;
17+
18+
/**
19+
* @requires extension intl
20+
*/
21+
final class EmojiTransliteratorTest extends TestCase
22+
{
23+
public function provideTransliterateTests(): iterable
24+
{
25+
yield [
26+
'fr',
27+
'un 😺, 🐈‍⬛, et a 🦁 vont au 🏞️',
28+
'un chat qui sourit, chat noir, et a tête de lion vont au parc national️',
29+
];
30+
yield [
31+
'en',
32+
'a 😺, 🐈‍⬛, and a 🦁 go to 🏞️... 😍 🎉 💛',
33+
'a grinning cat, black cat, and a lion go to national park️... smiling face with heart-eyes party popper yellow heart',
34+
];
35+
36+
$specialArrowInput = '↔ - ↔️'; // The first arrow is particularly problematic!
37+
yield [
38+
'en',
39+
$specialArrowInput,
40+
'left-right arrow - left-right arrow️',
41+
];
42+
yield [
43+
'fr',
44+
$specialArrowInput,
45+
'flèche gauche droite - flèche gauche droite️',
46+
];
47+
}
48+
49+
/** @dataProvider provideTransliterateTests */
50+
public function testTransliterate(string $locale, string $input, string $expected)
51+
{
52+
$tr = EmojiTransliterator::getInstance($locale);
53+
54+
$this->assertSame($expected, $tr->transliterate($input));
55+
}
56+
57+
public function testTransliteratorCache()
58+
{
59+
$tr1 = EmojiTransliterator::getInstance('en');
60+
$tr2 = EmojiTransliterator::getInstance('en');
61+
62+
$this->assertSame($tr1, $tr2);
63+
}
64+
65+
public function provideLocaleTest(): iterable
66+
{
67+
$file = (new Finder())
68+
->in(__DIR__.'/../../Resources/data/transliterator/emoji')
69+
->name('*.txt')
70+
->files()
71+
;
72+
73+
foreach ($file as $file) {
74+
yield [$file->getBasename('.txt')];
75+
}
76+
}
77+
78+
/** @dataProvider provideLocaleTest */
79+
public function testAllTransliterator(string $locale)
80+
{
81+
$tr = EmojiTransliterator::getInstance($locale);
82+
83+
$this->assertNotEmpty($tr->transliterate('😀'));
84+
}
85+
86+
public function testTransliterateWithInvalidLocale()
87+
{
88+
$this->expectException(\InvalidArgumentException::class);
89+
$this->expectExceptionMessage('Invalid "../emoji/en" locale.');
90+
91+
EmojiTransliterator::getInstance('../emoji/en');
92+
}
93+
94+
public function testTransliterateWithMissingLocale()
95+
{
96+
$this->expectException(\RuntimeException::class);
97+
$this->expectExceptionMessage('The transliterator rules source does not exist for locale "invalid".');
98+
99+
EmojiTransliterator::getInstance('invalid');
100+
}
101+
102+
public function testTransliterateWithBrokenLocale()
103+
{
104+
$brokenFilename = __DIR__.'/../../Resources/data/transliterator/emoji/broken.txt';
105+
file_put_contents($brokenFilename, '😀 > oups\' ;');
106+
107+
$this->expectException(\RuntimeException::class);
108+
$this->expectExceptionMessage('Unable to create EmojiTransliterator instance: "transliterator_create_from_rules: unable to create ICU transliterator from rules (parse error at offset 4, after "😀 >", before or at " oups\' ;"): U_UNTERMINATED_QUOTE".');
109+
110+
try {
111+
EmojiTransliterator::getInstance('broken');
112+
} finally {
113+
unlink($brokenFilename);
114+
}
115+
}
116+
}
+40Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\Intl\Transliterator;
13+
14+
final class EmojiTransliterator
15+
{
16+
private static array $transliteratorsByLocale = [];
17+
18+
public static function getInstance(string $locale): \Transliterator
19+
{
20+
return self::$transliteratorsByLocale[$locale] ??= self::createTransliterator($locale);
21+
}
22+
23+
private static function createTransliterator(string $locale): \Transliterator
24+
{
25+
if (!preg_match('/^[a-z0-9@_\\.\\-]*$/i', $locale)) {
26+
throw new \InvalidArgumentException(sprintf('Invalid "%s" locale.', $locale));
27+
}
28+
29+
$rulesFilename = __DIR__."/../Resources/data/transliterator/emoji/$locale.txt";
30+
if (!is_file($rulesFilename)) {
31+
throw new \RuntimeException(sprintf('The transliterator rules source does not exist for locale "%s".', $locale));
32+
}
33+
34+
if (!$transliterator = \Transliterator::createFromRules(file_get_contents($rulesFilename))) {
35+
throw new \RuntimeException(sprintf('Unable to create EmojiTransliterator instance: "%s".', intl_get_error_message()));
36+
}
37+
38+
return $transliterator;
39+
}
40+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.