Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9acae56

Browse filesBrowse files
committed
Inline basic UTF-8 functions.
Shows a measurable speedup when processing UTF-8 data, such as with the new builtin collation provider. Discussion: https://postgr.es/m/163f4e2190cdf67f67016044e503c5004547e5a9.camel@j-davis.com Reviewed-by: Peter Eisentraut
1 parent 2b52086 commit 9acae56
Copy full SHA for 9acae56

File tree

Expand file treeCollapse file tree

2 files changed

+61
-61
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+61
-61
lines changed

‎src/common/wchar.c

Copy file name to clipboardExpand all lines: src/common/wchar.c
-61Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -476,39 +476,6 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
476476
}
477477

478478

479-
/*
480-
* Map a Unicode code point to UTF-8. utf8string must have at least
481-
* unicode_utf8len(c) bytes available.
482-
*/
483-
unsigned char *
484-
unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
485-
{
486-
if (c <= 0x7F)
487-
{
488-
utf8string[0] = c;
489-
}
490-
else if (c <= 0x7FF)
491-
{
492-
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
493-
utf8string[1] = 0x80 | (c & 0x3F);
494-
}
495-
else if (c <= 0xFFFF)
496-
{
497-
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
498-
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
499-
utf8string[2] = 0x80 | (c & 0x3F);
500-
}
501-
else
502-
{
503-
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
504-
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
505-
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
506-
utf8string[3] = 0x80 | (c & 0x3F);
507-
}
508-
509-
return utf8string;
510-
}
511-
512479
/*
513480
* Trivial conversion from pg_wchar to UTF-8.
514481
* caller should allocate enough space for "to"
@@ -670,34 +637,6 @@ ucs_wcwidth(pg_wchar ucs)
670637
return 1;
671638
}
672639

673-
/*
674-
* Convert a UTF-8 character to a Unicode code point.
675-
* This is a one-character version of pg_utf2wchar_with_len.
676-
*
677-
* No error checks here, c must point to a long-enough string.
678-
*/
679-
pg_wchar
680-
utf8_to_unicode(const unsigned char *c)
681-
{
682-
if ((*c & 0x80) == 0)
683-
return (pg_wchar) c[0];
684-
else if ((*c & 0xe0) == 0xc0)
685-
return (pg_wchar) (((c[0] & 0x1f) << 6) |
686-
(c[1] & 0x3f));
687-
else if ((*c & 0xf0) == 0xe0)
688-
return (pg_wchar) (((c[0] & 0x0f) << 12) |
689-
((c[1] & 0x3f) << 6) |
690-
(c[2] & 0x3f));
691-
else if ((*c & 0xf8) == 0xf0)
692-
return (pg_wchar) (((c[0] & 0x07) << 18) |
693-
((c[1] & 0x3f) << 12) |
694-
((c[2] & 0x3f) << 6) |
695-
(c[3] & 0x3f));
696-
else
697-
/* that is an invalid code on purpose */
698-
return 0xffffffff;
699-
}
700-
701640
static int
702641
pg_utf_dsplen(const unsigned char *s)
703642
{

‎src/include/mb/pg_wchar.h

Copy file name to clipboardExpand all lines: src/include/mb/pg_wchar.h
+61Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,67 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
555555
return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
556556
}
557557

558+
/*
559+
* Convert a UTF-8 character to a Unicode code point.
560+
* This is a one-character version of pg_utf2wchar_with_len.
561+
*
562+
* No error checks here, c must point to a long-enough string.
563+
*/
564+
static inline pg_wchar
565+
utf8_to_unicode(const unsigned char *c)
566+
{
567+
if ((*c & 0x80) == 0)
568+
return (pg_wchar) c[0];
569+
else if ((*c & 0xe0) == 0xc0)
570+
return (pg_wchar) (((c[0] & 0x1f) << 6) |
571+
(c[1] & 0x3f));
572+
else if ((*c & 0xf0) == 0xe0)
573+
return (pg_wchar) (((c[0] & 0x0f) << 12) |
574+
((c[1] & 0x3f) << 6) |
575+
(c[2] & 0x3f));
576+
else if ((*c & 0xf8) == 0xf0)
577+
return (pg_wchar) (((c[0] & 0x07) << 18) |
578+
((c[1] & 0x3f) << 12) |
579+
((c[2] & 0x3f) << 6) |
580+
(c[3] & 0x3f));
581+
else
582+
/* that is an invalid code on purpose */
583+
return 0xffffffff;
584+
}
585+
586+
/*
587+
* Map a Unicode code point to UTF-8. utf8string must have at least
588+
* unicode_utf8len(c) bytes available.
589+
*/
590+
static inline unsigned char *
591+
unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
592+
{
593+
if (c <= 0x7F)
594+
{
595+
utf8string[0] = c;
596+
}
597+
else if (c <= 0x7FF)
598+
{
599+
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
600+
utf8string[1] = 0x80 | (c & 0x3F);
601+
}
602+
else if (c <= 0xFFFF)
603+
{
604+
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
605+
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
606+
utf8string[2] = 0x80 | (c & 0x3F);
607+
}
608+
else
609+
{
610+
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
611+
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
612+
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
613+
utf8string[3] = 0x80 | (c & 0x3F);
614+
}
615+
616+
return utf8string;
617+
}
618+
558619
/*
559620
* Number of bytes needed to represent the given char in UTF8.
560621
*/

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.