Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History
348 lines (316 loc) · 10.2 KB

File metadata and controls

348 lines (316 loc) · 10.2 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
<?php
declare(strict_types=1);
namespace PhpMyAdmin\SqlParser;
use ArrayAccess;
use Exception;
use Stringable;
use function mb_check_encoding;
use function mb_strlen;
use function ord;
/**
* Implementation for UTF-8 strings.
*
* The subscript operator in PHP, when used with string will return a byte and not a character. Because in UTF-8
* strings a character may occupy more than one byte, the subscript operator may return an invalid character.
*
* Because the lexer relies on the subscript operator this class had to be implemented.
*
* Implements array-like access for UTF-8 strings.
*
* In this library, this class should be used to parse UTF-8 queries.
*
* @implements ArrayAccess<int, string>
*/
class UtfString implements ArrayAccess, Stringable
{
/**
* The raw, multi-byte string.
*
* @var string
*/
public $str = '';
/**
* The index of current byte.
*
* For ASCII strings, the byte index is equal to the character index.
*
* @var int
*/
public $byteIdx = 0;
/**
* The index of current character.
*
* For non-ASCII strings, some characters occupy more than one byte and
* the character index will have a lower value than the byte index.
*
* @var int
*/
public $charIdx = 0;
/**
* The length of the string (in bytes).
*
* @var int
*/
public $byteLen = 0;
/**
* The length of the string (in characters).
*
* @var int
*/
public $charLen = 0;
/**
* A map of ASCII binary values to their ASCII code
* This is to improve performance and avoid calling ord($byte)
*
* Source: https://www.freecodecamp.org/news/ascii-table-hex-to-ascii-value-character-code-chart-2/
*
* @var array<int|string,int>
*/
protected static $asciiMap = [
"\0" => 0, // (00000000) NUL Null
"\t" => 9, // (00001001) HT Horizontal Tab
"\n" => 10, // (00001010) LF Newline / Line Feed
"\v" => 11, // (00001011) VT Vertical Tab
"\f" => 12, // (00001100) FF Form Feed
"\r" => 13, // (00001101) CR Carriage Return
' ' => 32, // (00100000) SP Space
'!' => 33, // (00100001) ! Exclamation mark
'"' => 34, // (00100010) " Double quote
'#' => 35, // (00100011) # Number
'$' => 36, // (00100100) $ Dollar
'%' => 37, // (00100101) % Percent
'&' => 38, // (00100110) & Ampersand
'\'' => 39, // (00100111) ' Single quote
'(' => 40, // (00101000) ( Left parenthesis
')' => 41, // (00101001) ) Right parenthesis
'*' => 42, // (00101010) * Asterisk
'+' => 43, // (00101011) + Plus
',' => 44, // (00101100) , Comma
'-' => 45, // (00101101) - Minus
'.' => 46, // (00101110) . Period
'/' => 47, // (00101111) / Slash
'0' => 48, // (00110000) 0 Zero
'1' => 49, // (00110001) 1 One
'2' => 50, // (00110010) 2 Two
'3' => 51, // (00110011) 3 Three
'4' => 52, // (00110100) 4 Four
'5' => 53, // (00110101) 5 Five
'6' => 54, // (00110110) 6 Six
'7' => 55, // (00110111) 7 Seven
'8' => 56, // (00111000) 8 Eight
'9' => 57, // (00111001) 9 Nine
':' => 58, // (00111010) : Colon
';' => 59, // (00111011) ; Semicolon
'<' => 60, // (00111100) < Less than
'=' => 61, // (00111101) = Equal sign
'>' => 62, // (00111110) > Greater than
'?' => 63, // (00111111) ? Question mark
'@' => 64, // (01000000) @ At sign
'A' => 65, // (01000001) A Uppercase A
'B' => 66, // (01000010) B Uppercase B
'C' => 67, // (01000011) C Uppercase C
'D' => 68, // (01000100) D Uppercase D
'E' => 69, // (01000101) E Uppercase E
'F' => 70, // (01000110) F Uppercase F
'G' => 71, // (01000111) G Uppercase G
'H' => 72, // (01001000) H Uppercase H
'I' => 73, // (01001001) I Uppercase I
'J' => 74, // (01001010) J Uppercase J
'K' => 75, // (01001011) K Uppercase K
'L' => 76, // (01001100) L Uppercase L
'M' => 77, // (01001101) M Uppercase M
'N' => 78, // (01001110) N Uppercase N
'O' => 79, // (01001111) O Uppercase O
'P' => 80, // (01010000) P Uppercase P
'Q' => 81, // (01010001) Q Uppercase Q
'R' => 82, // (01010010) R Uppercase R
'S' => 83, // (01010011) S Uppercase S
'T' => 84, // (01010100) T Uppercase T
'U' => 85, // (01010101) U Uppercase U
'V' => 86, // (01010110) V Uppercase V
'W' => 87, // (01010111) W Uppercase W
'X' => 88, // (01011000) X Uppercase X
'Y' => 89, // (01011001) Y Uppercase Y
'Z' => 90, // (01011010) Z Uppercase Z
'[' => 91, // (01011011) [ Left square bracket
'\\' => 92, // (01011100) \ backslash
']' => 93, // (01011101) ] Right square bracket
'^' => 94, // (01011110) ^ Caret / circumflex
'_' => 95, // (01011111) _ Underscore
'`' => 96, // (01100000) ` Grave / accent
'a' => 97, // (01100001) a Lowercase a
'b' => 98, // (01100010) b Lowercase b
'c' => 99, // (01100011) c Lowercase c
'd' => 100, // (01100100) d Lowercase d
'e' => 101, // (01100101) e Lowercase e
'f' => 102, // (01100110) f Lowercase
'g' => 103, // (01100111) g Lowercase g
'h' => 104, // (01101000) h Lowercase h
'i' => 105, // (01101001) i Lowercase i
'j' => 106, // (01101010) j Lowercase j
'k' => 107, // (01101011) k Lowercase k
'l' => 108, // (01101100) l Lowercase l
'm' => 109, // (01101101) m Lowercase m
'n' => 110, // (01101110) n Lowercase n
'o' => 111, // (01101111) o Lowercase o
'p' => 112, // (01110000) p Lowercase p
'q' => 113, // (01110001) q Lowercase q
'r' => 114, // (01110010) r Lowercase r
's' => 115, // (01110011) s Lowercase s
't' => 116, // (01110100) t Lowercase t
'u' => 117, // (01110101) u Lowercase u
'v' => 118, // (01110110) v Lowercase v
'w' => 119, // (01110111) w Lowercase w
'x' => 120, // (01111000) x Lowercase x
'y' => 121, // (01111001) y Lowercase y
'z' => 122, // (01111010) z Lowercase z
'{' => 123, // (01111011) { Left curly bracket
'|' => 124, // (01111100) | Vertical bar
'}' => 125, // (01111101) } Right curly bracket
'~' => 126, // (01111110) ~ Tilde
"\x7f" => 127, // (01111111) DEL Delete
];
/**
* @param string $str the string
*/
public function __construct($str)
{
$this->str = $str;
$this->byteLen = mb_strlen($str, '8bit');
if (! mb_check_encoding($str, 'UTF-8')) {
$this->charLen = 0;
} else {
$this->charLen = mb_strlen($str, 'UTF-8');
}
}
/**
* Checks if the given offset exists.
*
* @param int $offset the offset to be checked
*/
#[\ReturnTypeWillChange]
public function offsetExists($offset): bool
{
return ($offset >= 0) && ($offset < $this->charLen);
}
/**
* Gets the character at given offset.
*
* @param int $offset the offset to be returned
*
* @return string|null
*/
#[\ReturnTypeWillChange]
public function offsetGet($offset)
{
if (($offset < 0) || ($offset >= $this->charLen)) {
return null;
}
$delta = $offset - $this->charIdx;
if ($delta > 0) {
// Fast forwarding.
while ($delta-- > 0) {
$this->byteIdx += static::getCharLength($this->str[$this->byteIdx]);
++$this->charIdx;
}
} elseif ($delta < 0) {
// Rewinding.
while ($delta++ < 0) {
do {
$byte = ord($this->str[--$this->byteIdx]);
} while (($byte >= 128) && ($byte < 192));
--$this->charIdx;
}
}
$bytesCount = static::getCharLength($this->str[$this->byteIdx]);
$ret = '';
for ($i = 0; $bytesCount-- > 0; ++$i) {
$ret .= $this->str[$this->byteIdx + $i];
}
return $ret;
}
/**
* Sets the value of a character.
*
* @param int $offset the offset to be set
* @param string $value the value to be set
*
* @return void
*
* @throws Exception not implemented.
*/
#[\ReturnTypeWillChange]
public function offsetSet($offset, $value)
{
throw new Exception('Not implemented.');
}
/**
* Unsets an index.
*
* @param int $offset the value to be unset
*
* @return void
*
* @throws Exception not implemented.
*/
#[\ReturnTypeWillChange]
public function offsetUnset($offset)
{
throw new Exception('Not implemented.');
}
/**
* Gets the length of an UTF-8 character.
*
* According to RFC 3629, a UTF-8 character can have at most 4 bytes.
* However, this implementation supports UTF-8 characters containing up to 6
* bytes.
*
* @see https://tools.ietf.org/html/rfc3629
*
* @param string $byte the byte to be analyzed
*/
public static function getCharLength($byte): int
{
// Use the default ASCII map as queries are mostly ASCII chars
// ord($byte) has a performance cost
if (! isset(static::$asciiMap[$byte])) {
// Complete the cache with missing items
static::$asciiMap[$byte] = ord($byte);
}
$byte = static::$asciiMap[$byte];
if ($byte < 128) {
return 1;
}
if ($byte < 224) {
return 2;
}
if ($byte < 240) {
return 3;
}
if ($byte < 248) {
return 4;
}
if ($byte < 252) {
return 5; // unofficial
}
return 6; // unofficial
}
/**
* Returns the length in characters of the string.
*
* @return int
*/
public function length()
{
return $this->charLen;
}
/**
* Returns the contained string.
*
* @return string
*/
public function __toString()
{
return $this->str;
}
}
Morty Proxy This is a proxified and sanitized view of the page, visit original site.