4
4
5
5
#define out
6
6
#define inout
7
- #define countof(a) (sizeof(a)/sizeof(a[0]))
7
+ #define countof(a) int (sizeof(a)/sizeof(a[0]))
8
8
9
9
#define is_surrogate(ch) ((ch) >= 0xD800 && (ch) < 0xE000)
10
10
@@ -133,14 +133,9 @@ decoder::result decoder::process_an_item(string& input, int& index, string& outp
133
133
// https://encoding.spec.whatwg.org/#bom-sniff
134
134
encoding bom_sniff(const string& str)
135
135
{
136
- if (str.size() < 2) return encoding::null;
137
-
138
- byte utf8_bom[] = {0xEF, 0xBB, 0xBF};
139
- if (str.size() >= 3 && memcmp(str.data(), utf8_bom, 3) == 0) return encoding::utf_8;
140
-
141
- if (str[0] == 0xFE && str[1] == 0xFF) return encoding::utf_16be;
142
- if (str[0] == 0xFF && str[1] == 0xFE) return encoding::utf_16le;
143
-
136
+ if (str.substr(0, 3) == "\xEF\xBB\xBF") return encoding::utf_8;
137
+ if (str.substr(0, 2) == "\xFE\xFF") return encoding::utf_16be;
138
+ if (str.substr(0, 2) == "\xFF\xFE") return encoding::utf_16le;
144
139
return encoding::null;
145
140
}
146
141
@@ -183,7 +178,7 @@ struct utf_8_decoder : decoder
183
178
// https://encoding.spec.whatwg.org/#utf-8-decoder
184
179
decoder::result utf_8_decoder::handler(inout string& input, inout int& index, out int ch[2])
185
180
{
186
- int b = index == input.size() ? EOF : (byte)input[index++]; // read byte from input
181
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read byte from input
187
182
188
183
// 1. If byte is end-of-queue and UTF-8 bytes needed is not 0, set UTF-8 bytes needed to 0 and return error.
189
184
if (b == EOF && m_bytes_needed != 0)
@@ -385,7 +380,7 @@ int* single_byte_decoder::m_indexes[] =
385
380
// https://encoding.spec.whatwg.org/#single-byte-decoder
386
381
decoder::result single_byte_decoder::handler(string& input, int& index, int ch[2])
387
382
{
388
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
383
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
389
384
390
385
// 1. If byte is end-of-queue, return finished.
391
386
if (b == EOF) return result_finished;
@@ -459,7 +454,7 @@ int gb18030_decoder::ranges_code_point(int pointer)
459
454
// https://encoding.spec.whatwg.org/#gb18030-decoder
460
455
decoder::result gb18030_decoder::handler(string& input, int& index, int ch[2])
461
456
{
462
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
457
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
463
458
464
459
// 1. If byte is end-of-queue and gb18030 first, gb18030 second, and gb18030 third are 0x00, return finished.
465
460
if (b == EOF && m_first == 0 && m_second == 0 && m_third == 0)
@@ -611,7 +606,7 @@ int big5_decoder::m_index[] = {null,null,null,null,null,null,null,null,null,null
611
606
// https://encoding.spec.whatwg.org/#big5-decoder
612
607
decoder::result big5_decoder::handler(inout string& input, inout int& index, out int ch[2])
613
608
{
614
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
609
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
615
610
616
611
// 1. If byte is end-of-queue and Big5 lead is not 0x00, set Big5 lead to 0x00 and return error.
617
612
if (b == EOF && m_lead != 0)
@@ -703,7 +698,7 @@ struct euc_jp_decoder : jis_decoder
703
698
// https://encoding.spec.whatwg.org/#euc-jp-decoder
704
699
decoder::result euc_jp_decoder::handler(inout string& input, inout int& index, out int ch[2])
705
700
{
706
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
701
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
707
702
708
703
// 1. If byte is end-of-queue and EUC-JP lead is not 0x00, set EUC-JP lead to 0x00, and return error.
709
704
if (b == EOF && m_lead != 0)
@@ -812,7 +807,7 @@ struct iso_2022_jp_decoder : jis_decoder
812
807
// https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
813
808
decoder::result iso_2022_jp_decoder::handler(inout string& input, inout int& index, out int ch[2])
814
809
{
815
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
810
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
816
811
817
812
switch (m_state)
818
813
{
@@ -1013,7 +1008,7 @@ struct shift_jis_decoder : jis_decoder
1013
1008
// https://encoding.spec.whatwg.org/#shift_jis-decoder
1014
1009
decoder::result shift_jis_decoder::handler(inout string& input, inout int& index, out int ch[2])
1015
1010
{
1016
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
1011
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
1017
1012
1018
1013
// 1. If byte is end-of-queue and Shift_JIS lead is not 0x00, set Shift_JIS lead to 0x00 and return error.
1019
1014
if (b == EOF && m_lead != 0)
@@ -1107,7 +1102,7 @@ int euc_kr_decoder::m_index[] = {44034,44035,44037,44038,44043,44044,44045,44046
1107
1102
// https://encoding.spec.whatwg.org/#euc-kr-decoder
1108
1103
decoder::result euc_kr_decoder::handler(inout string& input, inout int& index, out int ch[2])
1109
1104
{
1110
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
1105
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
1111
1106
1112
1107
// 1. If byte is end-of-queue and EUC-KR lead is not 0x00, set EUC-KR lead to 0x00 and return error.
1113
1108
if (b == EOF && m_lead != 0)
@@ -1175,10 +1170,10 @@ struct replacement_decoder : decoder
1175
1170
};
1176
1171
1177
1172
// https://encoding.spec.whatwg.org/#replacement-decoder
1178
- decoder::result replacement_decoder::handler(inout string& input, inout int& index, out int ch [2])
1173
+ decoder::result replacement_decoder::handler(inout string& input, inout int& index, int[2])
1179
1174
{
1180
1175
// 1. If byte is end-of-queue, return finished.
1181
- if (index == input.size())
1176
+ if (index == (int) input.size())
1182
1177
return result_finished;
1183
1178
1184
1179
// 2. If replacement error returned is false, set replacement error returned to true and return error.
@@ -1208,7 +1203,7 @@ struct utf_16_decoder : decoder
1208
1203
// https://encoding.spec.whatwg.org/#shared-utf-16-decoder
1209
1204
decoder::result utf_16_decoder::handler(inout string& input, inout int& index, out int ch[2])
1210
1205
{
1211
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
1206
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
1212
1207
1213
1208
// 1. If byte is end-of-queue and either UTF-16 lead byte or UTF-16 lead surrogate is non-null, set UTF-16 lead byte and UTF-16 lead surrogate to null, and return error.
1214
1209
if (b == EOF && (m_lead_byte != 0 || m_lead_surrogate != 0))
@@ -1285,7 +1280,7 @@ struct x_user_defined_decoder : decoder
1285
1280
// https://encoding.spec.whatwg.org/#x-user-defined-decoder
1286
1281
decoder::result x_user_defined_decoder::handler(inout string& input, inout int& index, out int ch[2])
1287
1282
{
1288
- int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
1283
+ int b = index == (int) input.size() ? EOF : (byte)input[index++]; // read input byte
1289
1284
1290
1285
// 1. If byte is end-of-queue, return finished.
1291
1286
if (b == EOF)
@@ -1341,11 +1336,12 @@ decoder::ptr get_decoder(encoding _encoding)
1341
1336
1342
1337
case encoding::x_user_defined:
1343
1338
return make_shared<x_user_defined_decoder>();
1344
- }
1345
1339
1346
- // single-byte encoding
1347
- if (_encoding >= encoding::ibm866 && _encoding <= encoding::x_mac_cyrillic)
1348
- return make_shared<single_byte_decoder>(_encoding);
1340
+ default:
1341
+ // single-byte encoding
1342
+ if (_encoding >= encoding::ibm866 && _encoding <= encoding::x_mac_cyrillic)
1343
+ return make_shared<single_byte_decoder>(_encoding);
1344
+ }
1349
1345
1350
1346
return nullptr;
1351
1347
}
@@ -1357,7 +1353,7 @@ decoder::ptr get_decoder(encoding _encoding)
1357
1353
// https://encoding.spec.whatwg.org/encodings.json
1358
1354
struct {
1359
1355
const char* name;
1360
- encoding encoding ;
1356
+ encoding coding ;
1361
1357
} labels[] =
1362
1358
{
1363
1359
"unicode-1-1-utf-8", encoding::utf_8,
@@ -1636,7 +1632,7 @@ encoding get_encoding(string label)
1636
1632
for (int i = 0; i < countof(labels); i++)
1637
1633
{
1638
1634
if (label == labels[i].name)
1639
- return labels[i].encoding ;
1635
+ return labels[i].coding ;
1640
1636
}
1641
1637
return encoding::null;
1642
1638
}
0 commit comments