graphql-python
diff --git a/‎src/graphql/language/lexer.py
Copy file name to clipboardExpand all lines: src/graphql/language/lexer.py
+93-15Lines changed: 93 additions & 15 deletions b/‎src/graphql/language/lexer.py
Copy file name to clipboardExpand all lines: src/graphql/language/lexer.py
+93-15Lines changed: 93 additions & 15 deletions
diff --git a/‎src/graphql/language/print_string.py
Copy file name to clipboard
+81Lines changed: 81 additions & 0 deletions b/‎src/graphql/language/print_string.py
Copy file name to clipboard
+81Lines changed: 81 additions & 0 deletions
diff --git a/‎src/graphql/language/printer.py
Copy file name to clipboardExpand all lines: src/graphql/language/printer.py
+3-3Lines changed: 3 additions & 3 deletions b/‎src/graphql/language/printer.py
Copy file name to clipboardExpand all lines: src/graphql/language/printer.py
+3-3Lines changed: 3 additions & 3 deletions
@@ -67,12 +67,16 @@ def print_code_point_at(self, location: int) -> str:
         if location >= len(body):
             return TokenKind.EOF.value
         char = body[location]
-        code = ord(char)
         # Printable ASCII
-        if 0x20 <= code <= 0x7E:
+        if "\x20" <= char <= "\x7E":
             return "'\"'" if char == '"' else f"'{char}'"
         # Unicode code point
-        return f"U+{code:04X}"
+        point = (
+            decode_surrogate_pair(ord(char), ord(body[location + 1]))
+            if is_supplementary_code_point(body, location)
+            else ord(char)
+        )
+        return f"U+{point:04X}"
 
     def create_token(
         self, kind: TokenKind, start: int, end: int, value: Optional[str] = None
@@ -141,7 +145,8 @@ def read_next_token(self, start: int) -> Token:
                 if char == "'"
                 else (
                     f"Unexpected character: {self.print_code_point_at(position)}."
-                    if is_source_character(char)
+                    if is_unicode_scalar_value(char)
+                    or is_supplementary_code_point(body, position)
                     else f"Invalid character: {self.print_code_point_at(position)}."
                 )
             )
@@ -158,10 +163,14 @@ def read_comment(self, start: int) -> Token:
         position = start + 1
         while position < body_length:
             char = body[position]
-
-            if char in "\r\n" or not is_source_character(char):
+            if char in "\r\n":
                 break
-            position += 1
+            if is_unicode_scalar_value(char):
+                position += 1
+            elif is_supplementary_code_point(body, position):
+                position += 2
+            else:
+                break  # pragma: no cover
 
         return self.create_token(
             TokenKind.COMMENT,
@@ -270,7 +279,11 @@ def read_string(self, start: int) -> Token:
             if char == "\\":
                 append(body[chunk_start:position])
                 escape = (
-                    self.read_escaped_unicode(position)
+                    (
+                        self.read_escaped_unicode_variable_width(position)
+                        if body[position + 2 : position + 3] == "{"
+                        else self.read_escaped_unicode_fixed_width(position)
+                    )
                     if body[position + 1 : position + 2] == "u"
                     else self.read_escaped_character(position)
                 )
@@ -282,8 +295,10 @@ def read_string(self, start: int) -> Token:
             if char in "\r\n":
                 break
 
-            if is_source_character(char):
+            if is_unicode_scalar_value(char):
                 position += 1
+            elif is_supplementary_code_point(body, position):
+                position += 2
             else:
                 raise GraphQLSyntaxError(
                     self.source,
@@ -294,11 +309,50 @@ def read_string(self, start: int) -> Token:
 
         raise GraphQLSyntaxError(self.source, position, "Unterminated string.")
 
-    def read_escaped_unicode(self, position: int) -> EscapeSequence:
+    def read_escaped_unicode_variable_width(self, position: int) -> EscapeSequence:
+        body = self.source.body
+        point = 0
+        size = 3
+        max_size = min(12, len(body) - position)
+        # Cannot be larger than 12 chars (\u{00000000}).
+        while size < max_size:
+            char = body[position + size]
+            size += 1
+            if char == "}":
+                # Must be at least 5 chars (\u{0}) and encode a Unicode scalar value.
+                if size < 5 or not (
+                    0 <= point <= 0xD7FF or 0xE000 <= point <= 0x10FFFF
+                ):
+                    break
+                return EscapeSequence(chr(point), size)
+            # Append this hex digit to the code point.
+            point = (point << 4) | read_hex_digit(char)
+            if point < 0:
+                break
+
+        raise GraphQLSyntaxError(
+            self.source,
+            position,
+            f"Invalid Unicode escape sequence: '{body[position: position + size]}'.",
+        )
+
+    def read_escaped_unicode_fixed_width(self, position: int) -> EscapeSequence:
         body = self.source.body
         code = read_16_bit_hex_code(body, position + 2)
-        if code >= 0:
+
+        if 0 <= code <= 0xD7FF or 0xE000 <= code <= 0x10FFFF:
             return EscapeSequence(chr(code), 6)
+
+        # GraphQL allows JSON-style surrogate pair escape sequences, but only when
+        # a valid pair is formed.
+        if 0xD800 <= code <= 0xDBFF:
+            if body[position + 6 : position + 8] == "\\u":
+                trailing_code = read_16_bit_hex_code(body, position + 8)
+                if 0xDC00 <= trailing_code <= 0xDFFF:
+                    return EscapeSequence(
+                        chr(decode_surrogate_pair(code, trailing_code)), 12
+                    )
+
         raise GraphQLSyntaxError(
             self.source,
             position,
@@ -351,8 +405,10 @@ def read_block_string(self, start: int) -> Token:
                 self.line_start = position
                 continue
 
-            if is_source_character(char):
+            if is_unicode_scalar_value(char):
                 position += 1
+            elif is_supplementary_code_point(body, position):
+                position += 2
             else:
                 raise GraphQLSyntaxError(
                     self.source,
@@ -477,9 +533,31 @@ def read_hex_digit(char: str) -> int:
     return -1
 
 
-def is_source_character(char: str) -> bool:
-    """Check whether this is a SourceCharacter"""
-    return char >= " " or char in "\t\r\n"
+def is_unicode_scalar_value(char: str) -> bool:
+    """Check whether this is a Unicode scalar value.
+
+    A Unicode scalar value is any Unicode code point except surrogate code
+    points. In other words, the inclusive ranges of values 0x0000 to 0xD7FF and
+    0xE000 to 0x10FFFF.
+    """
+    return "\x00" <= char <= "\ud7ff" or "\ue000" <= char <= "\U0010ffff"
+
+
+def is_supplementary_code_point(body: str, location: int) -> bool:
+    """
+    Check whether the current location is a supplementary code point.
+
+    The GraphQL specification defines source text as a sequence of unicode scalar
+    values (which Unicode defines to exclude surrogate code points).
+    """
+    return (
+        "\ud800" <= body[location] <= "\udbff"
+        and "\udc00" <= body[location + 1] <= "\udfff"
+    )
+
+
+def decode_surrogate_pair(leading: int, trailing: int) -> int:
+    return 0x10000 + (((leading & 0x03FF) << 10) | (trailing & 0x03FF))
 
 
 def is_name_start(char: str) -> bool:
 
@@ -0,0 +1,81 @@
+__all__ = ["print_string"]
+
+
+def print_string(s: str) -> str:
+    """ "Print a string as a GraphQL StringValue literal.
+
+    Replaces control characters and excluded characters (" U+0022 and \\ U+005C)
+    with escape sequences.
+    """
+    return f'"{s.translate(escape_sequences)}"'
+
+
+escape_sequences = {
+    0x00: "\\u0000",
+    0x01: "\\u0001",
+    0x02: "\\u0002",
+    0x03: "\\u0003",
+    0x04: "\\u0004",
+    0x05: "\\u0005",
+    0x06: "\\u0006",
+    0x07: "\\u0007",
+    0x08: "\\b",
+    0x09: "\\t",
+    0x0A: "\\n",
+    0x0B: "\\u000B",
+    0x0C: "\\f",
+    0x0D: "\\r",
+    0x0E: "\\u000E",
+    0x0F: "\\u000F",
+    0x10: "\\u0010",
+    0x11: "\\u0011",
+    0x12: "\\u0012",
+    0x13: "\\u0013",
+    0x14: "\\u0014",
+    0x15: "\\u0015",
+    0x16: "\\u0016",
+    0x17: "\\u0017",
+    0x18: "\\u0018",
+    0x19: "\\u0019",
+    0x1A: "\\u001A",
+    0x1B: "\\u001B",
+    0x1C: "\\u001C",
+    0x1D: "\\u001D",
+    0x1E: "\\u001E",
+    0x1F: "\\u001F",
+    0x22: '\\"',
+    0x5C: "\\\\",
+    0x7F: "\\u007F",
+    0x80: "\\u0080",
+    0x81: "\\u0081",
+    0x82: "\\u0082",
+    0x83: "\\u0083",
+    0x84: "\\u0084",
+    0x85: "\\u0085",
+    0x86: "\\u0086",
+    0x87: "\\u0087",
+    0x88: "\\u0088",
+    0x89: "\\u0089",
+    0x8A: "\\u008A",
+    0x8B: "\\u008B",
+    0x8C: "\\u008C",
+    0x8D: "\\u008D",
+    0x8E: "\\u008E",
+    0x8F: "\\u008F",
+    0x90: "\\u0090",
+    0x91: "\\u0091",
+    0x92: "\\u0092",
+    0x93: "\\u0093",
+    0x94: "\\u0094",
+    0x95: "\\u0095",
+    0x96: "\\u0096",
+    0x97: "\\u0097",
+    0x98: "\\u0098",
+    0x99: "\\u0099",
+    0x9A: "\\u009A",
+    0x9B: "\\u009B",
+    0x9C: "\\u009C",
+    0x9D: "\\u009D",
+    0x9E: "\\u009E",
+    0x9F: "\\u009F",
+}
@@ -1,9 +1,9 @@
-from json import dumps
 from typing import Any, Collection, Optional
 
 from ..language.ast import Node, OperationType
-from .visitor import visit, Visitor
 from .block_string import print_block_string
+from .print_string import print_string
+from .visitor import visit, Visitor
 
 __all__ = ["print_ast"]
 
@@ -148,7 +148,7 @@ def leave_float_value(node: PrintedNode, *_args: Any) -> str:
     def leave_string_value(node: PrintedNode, *_args: Any) -> str:
         if node.block:
             return print_block_string(node.value)
-        return dumps(node.value)
+        return print_string(node.value)
 
     @staticmethod
     def leave_boolean_value(node: PrintedNode, *_args: Any) -> str: