memecode
diff --git a/‎CMakeLists.txt
Copy file name to clipboardExpand all lines: CMakeLists.txt
+2Lines changed: 2 additions & 0 deletions b/‎CMakeLists.txt
Copy file name to clipboardExpand all lines: CMakeLists.txt
+2Lines changed: 2 additions & 0 deletions
diff --git a/‎src/encodings.cpp
Copy file name to clipboardExpand all lines: src/encodings.cpp
+23-27Lines changed: 23 additions & 27 deletions b/‎src/encodings.cpp
Copy file name to clipboardExpand all lines: src/encodings.cpp
+23-27Lines changed: 23 additions & 27 deletions
diff --git a/‎src/html.cpp
Copy file name to clipboardExpand all lines: src/html.cpp
+11-11Lines changed: 11 additions & 11 deletions b/‎src/html.cpp
Copy file name to clipboardExpand all lines: src/html.cpp
+11-11Lines changed: 11 additions & 11 deletions
@@ -49,6 +49,7 @@ set(SOURCE_LITEHTML
     src/el_text.cpp
     src/el_title.cpp
     src/el_tr.cpp
+    src/encodings.cpp
     src/html.cpp
     src/html_tag.cpp
     src/iterators.cpp
@@ -111,6 +112,7 @@ set(HEADER_LITEHTML
     include/litehtml/el_title.h
     include/litehtml/el_tr.h
     include/litehtml/element.h
+    include/litehtml/encodings.h
     include/litehtml/html.h
     include/litehtml/html_tag.h
     include/litehtml/iterators.h
 
@@ -4,7 +4,7 @@
 
 #define out
 #define inout
-#define countof(a) (sizeof(a)/sizeof(a[0]))
+#define countof(a) int(sizeof(a)/sizeof(a[0]))
 
 #define is_surrogate(ch) ((ch) >= 0xD800 && (ch) < 0xE000)
 
@@ -133,14 +133,9 @@ decoder::result decoder::process_an_item(string& input, int& index, string& outp
 // https://encoding.spec.whatwg.org/#bom-sniff
 encoding bom_sniff(const string& str)
 {
-	if (str.size() < 2) return encoding::null;
-
-	byte utf8_bom[] = {0xEF, 0xBB, 0xBF};
-	if (str.size() >= 3 && memcmp(str.data(), utf8_bom, 3) == 0) return encoding::utf_8;
-
-	if (str[0] == 0xFE && str[1] == 0xFF) return encoding::utf_16be;
-	if (str[0] == 0xFF && str[1] == 0xFE) return encoding::utf_16le;
-
+	if (str.substr(0, 3) == "\xEF\xBB\xBF") return encoding::utf_8;
+	if (str.substr(0, 2) == "\xFE\xFF") return encoding::utf_16be;
+	if (str.substr(0, 2) == "\xFF\xFE") return encoding::utf_16le;
 	return encoding::null;
 }
 
@@ -183,7 +178,7 @@ struct utf_8_decoder : decoder
 // https://encoding.spec.whatwg.org/#utf-8-decoder
 decoder::result utf_8_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read byte from input
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read byte from input
 
 	// 1. If byte is end-of-queue and UTF-8 bytes needed is not 0, set UTF-8 bytes needed to 0 and return error.
 	if (b == EOF && m_bytes_needed != 0)
@@ -385,7 +380,7 @@ int* single_byte_decoder::m_indexes[] =
 // https://encoding.spec.whatwg.org/#single-byte-decoder
 decoder::result single_byte_decoder::handler(string& input, int& index, int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue, return finished.
 	if (b == EOF) return result_finished;
@@ -459,7 +454,7 @@ int gb18030_decoder::ranges_code_point(int pointer)
 // https://encoding.spec.whatwg.org/#gb18030-decoder
 decoder::result gb18030_decoder::handler(string& input, int& index, int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue and gb18030 first, gb18030 second, and gb18030 third are 0x00, return finished.
 	if (b == EOF && m_first == 0 && m_second == 0 && m_third == 0)
@@ -611,7 +606,7 @@ int big5_decoder::m_index[] = {null,null,null,null,null,null,null,null,null,null
 // https://encoding.spec.whatwg.org/#big5-decoder
 decoder::result big5_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue and Big5 lead is not 0x00, set Big5 lead to 0x00 and return error.
 	if (b == EOF && m_lead != 0)
@@ -703,7 +698,7 @@ struct euc_jp_decoder : jis_decoder
 // https://encoding.spec.whatwg.org/#euc-jp-decoder
 decoder::result euc_jp_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue and EUC-JP lead is not 0x00, set EUC-JP lead to 0x00, and return error.
 	if (b == EOF && m_lead != 0)
@@ -812,7 +807,7 @@ struct iso_2022_jp_decoder : jis_decoder
 // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
 decoder::result iso_2022_jp_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	switch (m_state)
 	{
@@ -1013,7 +1008,7 @@ struct shift_jis_decoder : jis_decoder
 // https://encoding.spec.whatwg.org/#shift_jis-decoder
 decoder::result shift_jis_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue and Shift_JIS lead is not 0x00, set Shift_JIS lead to 0x00 and return error.
 	if (b == EOF && m_lead != 0)
@@ -1107,7 +1102,7 @@ int euc_kr_decoder::m_index[] = {44034,44035,44037,44038,44043,44044,44045,44046
 // https://encoding.spec.whatwg.org/#euc-kr-decoder
 decoder::result euc_kr_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue and EUC-KR lead is not 0x00, set EUC-KR lead to 0x00 and return error.
 	if (b == EOF && m_lead != 0)
@@ -1175,10 +1170,10 @@ struct replacement_decoder : decoder
 };
 
 // https://encoding.spec.whatwg.org/#replacement-decoder
-decoder::result replacement_decoder::handler(inout string& input, inout int& index, out int ch[2])
+decoder::result replacement_decoder::handler(inout string& input, inout int& index, int[2])
 {
 	// 1. If byte is end-of-queue, return finished.
-	if (index == input.size())
+	if (index == (int)input.size())
 		return result_finished;
 
 	// 2. If replacement error returned is false, set replacement error returned to true and return error.
@@ -1208,7 +1203,7 @@ struct utf_16_decoder : decoder
 // https://encoding.spec.whatwg.org/#shared-utf-16-decoder
 decoder::result utf_16_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue and either UTF-16 lead byte or UTF-16 lead surrogate is non-null, set UTF-16 lead byte and UTF-16 lead surrogate to null, and return error.
 	if (b == EOF && (m_lead_byte != 0 || m_lead_surrogate != 0))
@@ -1285,7 +1280,7 @@ struct x_user_defined_decoder : decoder
 // https://encoding.spec.whatwg.org/#x-user-defined-decoder
 decoder::result x_user_defined_decoder::handler(inout string& input, inout int& index, out int ch[2])
 {
-	int b = index == input.size() ? EOF : (byte)input[index++]; // read input byte
+	int b = index == (int)input.size() ? EOF : (byte)input[index++]; // read input byte
 
 	// 1. If byte is end-of-queue, return finished.
 	if (b == EOF)
@@ -1341,11 +1336,12 @@ decoder::ptr get_decoder(encoding _encoding)
 
 	case encoding::x_user_defined:
 		return make_shared<x_user_defined_decoder>();
-	}
 
-	// single-byte encoding
-	if (_encoding >= encoding::ibm866 && _encoding <= encoding::x_mac_cyrillic)
-		return make_shared<single_byte_decoder>(_encoding);
+	default:
+		// single-byte encoding
+		if (_encoding >= encoding::ibm866 && _encoding <= encoding::x_mac_cyrillic)
+			return make_shared<single_byte_decoder>(_encoding);
+	}
 
 	return nullptr;
 }
@@ -1357,7 +1353,7 @@ decoder::ptr get_decoder(encoding _encoding)
 // https://encoding.spec.whatwg.org/encodings.json
 struct {
 	const char* name;
-	encoding encoding;
+	encoding coding;
 } labels[] =
 {
 	"unicode-1-1-utf-8",	encoding::utf_8,
@@ -1636,7 +1632,7 @@ encoding get_encoding(string label)
 	for (int i = 0; i < countof(labels); i++)
 	{
 		if (label == labels[i].name)
-			return labels[i].encoding;
+			return labels[i].coding;
 	}
 	return encoding::null;
 }
 
@@ -24,15 +24,15 @@ string& trim(string &s, const string& chars_to_trim)
 	return s;
 }
 
-void litehtml::lcase(string &s) 
+void lcase(string &s) 
 {
 	for(char & i : s)
 	{
 		i = t_tolower(i);
 	}
 }
 
-litehtml::string::size_type litehtml::find_close_bracket(const string &s, string::size_type off, char open_b, char close_b)
+string::size_type find_close_bracket(const string &s, string::size_type off, char open_b, char close_b)
 {
 	int cnt = 0;
 	for(string::size_type i = off; i < s.length(); i++)
@@ -52,7 +52,7 @@ litehtml::string::size_type litehtml::find_close_bracket(const string &s, string
 	return string::npos;
 }
 
-litehtml::string litehtml::index_value(int index, const string& strings, char delim)
+string index_value(int index, const string& strings, char delim)
 {
 	std::vector<string> vals;
 	string delims;
@@ -65,7 +65,7 @@ litehtml::string litehtml::index_value(int index, const string& strings, char de
 	return std::to_string(index);
 }
 
-int litehtml::value_index( const string& val, const string& strings, int defValue, char delim )
+int value_index( const string& val, const string& strings, int defValue, char delim )
 {
 	if(val.empty() || strings.empty() || !delim)
 	{
@@ -102,7 +102,7 @@ int litehtml::value_index( const string& val, const string& strings, int defValu
 	return defValue;
 }
 
-bool litehtml::value_in_list( const string& val, const string& strings, char delim )
+bool value_in_list( const string& val, const string& strings, char delim )
 {
 	int idx = value_index(val, strings, -1, delim);
 	if(idx >= 0)
@@ -112,7 +112,7 @@ bool litehtml::value_in_list( const string& val, const string& strings, char del
 	return false;
 }
 
-void litehtml::split_string(const string& str, string_vector& tokens, const string& delims, const string& delims_preserve, const string& quote)
+void split_string(const string& str, string_vector& tokens, const string& delims, const string& delims_preserve, const string& quote)
 {
 	if(str.empty() || (delims.empty() && delims_preserve.empty()))
 	{
@@ -174,7 +174,7 @@ void litehtml::split_string(const string& str, string_vector& tokens, const stri
 	}
 }
 
-void litehtml::join_string(string& str, const string_vector& tokens, const string& delims)
+void join_string(string& str, const string_vector& tokens, const string& delims)
 {
 	str = "";
 	for (size_t i = 0; i < tokens.size(); i++)
@@ -187,7 +187,7 @@ void litehtml::join_string(string& str, const string_vector& tokens, const strin
 	}
 }
 
-int litehtml::t_strcasecmp(const char *s1, const char *s2)
+int t_strcasecmp(const char *s1, const char *s2)
 {
 	int i, d, c;
 
@@ -204,7 +204,7 @@ int litehtml::t_strcasecmp(const char *s1, const char *s2)
 	}
 }
 
-int litehtml::t_strncasecmp(const char *s1, const char *s2, size_t n)
+int t_strncasecmp(const char *s1, const char *s2, size_t n)
 {
 	int i, d, c;
 
@@ -223,7 +223,7 @@ int litehtml::t_strncasecmp(const char *s1, const char *s2, size_t n)
 	return 0;
 }
 
-litehtml::string litehtml::get_escaped_string(const string& in_str)
+string get_escaped_string(const string& in_str)
 {
 	string ret;
 	for (auto ch : in_str)
@@ -281,7 +281,7 @@ litehtml::string litehtml::get_escaped_string(const string& in_str)
 	return ret;
 }
 
-bool litehtml::is_number(const string& string, const bool allow_dot) {
+bool is_number(const string& string, const bool allow_dot) {
 	for (auto ch : string)
 	{
 		if (!(t_isdigit(ch) || (allow_dot && ch == '.')))
Original file line number	Diff line number	Diff line change
`@@ -24,15 +24,15 @@ string& trim(string &s, const string& chars_to_trim)`
`24`	`24`	`return s;`
`25`	`25`	`}`
`26`	`26`
`27`		`-void litehtml::lcase(string &s)`
	`27`	`+void lcase(string &s)`
`28`	`28`	`{`
`29`	`29`	`for(char & i : s)`
`30`	`30`	`{`
`31`	`31`	`i = t_tolower(i);`
`32`	`32`	`}`
`33`	`33`	`}`
`34`	`34`
`35`		`-litehtml::string::size_type litehtml::find_close_bracket(const string &s, string::size_type off, char open_b, char close_b)`
	`35`	`+string::size_type find_close_bracket(const string &s, string::size_type off, char open_b, char close_b)`
`36`	`36`	`{`
`37`	`37`	`int cnt = 0;`
`38`	`38`	`for(string::size_type i = off; i < s.length(); i++)`
`@@ -52,7 +52,7 @@ litehtml::string::size_type litehtml::find_close_bracket(const string &s, string`
`52`	`52`	`return string::npos;`
`53`	`53`	`}`
`54`	`54`
`55`		`-litehtml::string litehtml::index_value(int index, const string& strings, char delim)`
	`55`	`+string index_value(int index, const string& strings, char delim)`
`56`	`56`	`{`
`57`	`57`	`std::vector<string> vals;`
`58`	`58`	`string delims;`
`@@ -65,7 +65,7 @@ litehtml::string litehtml::index_value(int index, const string& strings, char de`
`65`	`65`	`return std::to_string(index);`
`66`	`66`	`}`
`67`	`67`
`68`		`-int litehtml::value_index( const string& val, const string& strings, int defValue, char delim )`
	`68`	`+int value_index( const string& val, const string& strings, int defValue, char delim )`
`69`	`69`	`{`
`70`	`70`	`if(val.empty() \|\| strings.empty() \|\| !delim)`
`71`	`71`	`{`
`@@ -102,7 +102,7 @@ int litehtml::value_index( const string& val, const string& strings, int defValu`
`102`	`102`	`return defValue;`
`103`	`103`	`}`
`104`	`104`
`105`		`-bool litehtml::value_in_list( const string& val, const string& strings, char delim )`
	`105`	`+bool value_in_list( const string& val, const string& strings, char delim )`
`106`	`106`	`{`
`107`	`107`	`int idx = value_index(val, strings, -1, delim);`
`108`	`108`	`if(idx >= 0)`
`@@ -112,7 +112,7 @@ bool litehtml::value_in_list( const string& val, const string& strings, char del`
`112`	`112`	`return false;`
`113`	`113`	`}`
`114`	`114`
`115`		`-void litehtml::split_string(const string& str, string_vector& tokens, const string& delims, const string& delims_preserve, const string& quote)`
	`115`	`+void split_string(const string& str, string_vector& tokens, const string& delims, const string& delims_preserve, const string& quote)`
`116`	`116`	`{`
`117`	`117`	`if(str.empty() \|\| (delims.empty() && delims_preserve.empty()))`
`118`	`118`	`{`
`@@ -174,7 +174,7 @@ void litehtml::split_string(const string& str, string_vector& tokens, const stri`
`174`	`174`	`}`
`175`	`175`	`}`
`176`	`176`
`177`		`-void litehtml::join_string(string& str, const string_vector& tokens, const string& delims)`
	`177`	`+void join_string(string& str, const string_vector& tokens, const string& delims)`
`178`	`178`	`{`
`179`	`179`	`str = "";`
`180`	`180`	`for (size_t i = 0; i < tokens.size(); i++)`
`@@ -187,7 +187,7 @@ void litehtml::join_string(string& str, const string_vector& tokens, const strin`
`187`	`187`	`}`
`188`	`188`	`}`
`189`	`189`
`190`		`-int litehtml::t_strcasecmp(const char s1, const char s2)`
	`190`	`+int t_strcasecmp(const char s1, const char s2)`
`191`	`191`	`{`
`192`	`192`	`int i, d, c;`
`193`	`193`
`@@ -204,7 +204,7 @@ int litehtml::t_strcasecmp(const char s1, const char s2)`
`204`	`204`	`}`
`205`	`205`	`}`
`206`	`206`
`207`		`-int litehtml::t_strncasecmp(const char s1, const char s2, size_t n)`
	`207`	`+int t_strncasecmp(const char s1, const char s2, size_t n)`
`208`	`208`	`{`
`209`	`209`	`int i, d, c;`
`210`	`210`
`@@ -223,7 +223,7 @@ int litehtml::t_strncasecmp(const char s1, const char s2, size_t n)`
`223`	`223`	`return 0;`
`224`	`224`	`}`
`225`	`225`
`226`		`-litehtml::string litehtml::get_escaped_string(const string& in_str)`
	`226`	`+string get_escaped_string(const string& in_str)`
`227`	`227`	`{`
`228`	`228`	`string ret;`
`229`	`229`	`for (auto ch : in_str)`
`@@ -281,7 +281,7 @@ litehtml::string litehtml::get_escaped_string(const string& in_str)`
`281`	`281`	`return ret;`
`282`	`282`	`}`
`283`	`283`
`284`		`-bool litehtml::is_number(const string& string, const bool allow_dot) {`
	`284`	`+bool is_number(const string& string, const bool allow_dot) {`
`285`	`285`	`for (auto ch : string)`
`286`	`286`	`{`
`287`	`287`	`if (!(t_isdigit(ch) \|\| (allow_dot && ch == '.')))`