Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 3cfdd8f

Browse filesBrowse files
committed
Clean up scan.l's handling of \r vs \n --- they are reliably treated as
equivalent now, which should make Windows and Mac clients happier. Also fix failure to handle SQL comments between segments of a multiline quoted literal.
1 parent 905404a commit 3cfdd8f
Copy full SHA for 3cfdd8f

File tree

1 file changed

+81
-36
lines changed
Filter options

1 file changed

+81
-36
lines changed

‎src/backend/parser/scan.l

Copy file name to clipboardExpand all lines: src/backend/parser/scan.l
+81-36Lines changed: 81 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.63 2000/01/26 05:56:43 momjian Exp $
12+
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.64 2000/02/19 04:17:25 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -41,15 +41,19 @@ static char *parseCh;
4141

4242
/* set up my input handler --- need one flavor for flex, one for lex */
4343
#if defined(FLEX_SCANNER)
44+
4445
#define YY_NO_UNPUT
4546
static int myinput(char* buf, int max);
4647
#undef YY_INPUT
4748
#define YY_INPUT(buf,result,max) {result = myinput(buf,max);}
48-
#else
49+
50+
#else /* !FLEX_SCANNER */
51+
4952
#undef input
5053
int input();
5154
#undef unput
5255
void unput(char);
56+
5357
#endif /* FLEX_SCANNER */
5458

5559
extern YYSTYPE yylval;
@@ -68,27 +72,22 @@ static int literalalloc; /* current allocated buffer size */
6872
static void addlit(char *ytext, int yleng);
6973

7074
%}
71-
/* OK, here is a short description of lex/flex rules behavior.
75+
/*
76+
* OK, here is a short description of lex/flex rules behavior.
7277
* The longest pattern which matches an input string is always chosen.
7378
* For equal-length patterns, the first occurring in the rules list is chosen.
74-
* INITIAL is the starting condition, to which all non-conditional rules apply.
75-
* When in an exclusive condition, only those rules defined for that condition apply.
79+
* INITIAL is the starting state, to which all non-conditional rules apply.
80+
* Exclusive states change parsing rules while the state is active. When in
81+
* an exclusive state, only those rules defined for that state apply.
7682
*
77-
* Exclusive states change parsing rules while the state is active.
78-
* There are exclusive states for quoted strings, extended comments,
79-
* and to eliminate parsing troubles for numeric strings.
83+
* We use exclusive states for quoted strings, extended comments,
84+
* and to eliminate parsing troubles for numeric strings.
8085
* Exclusive states:
8186
* <xb> binary numeric string - thomas 1997-11-16
8287
* <xc> extended C-style comments - tgl 1997-07-12
8388
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
8489
* <xh> hexadecimal numeric string - thomas 1997-11-16
8590
* <xq> quoted strings - tgl 1997-07-30
86-
*
87-
* The "extended comment" syntax closely resembles allowable operator syntax.
88-
* So, when in condition <xc>, only strings which would terminate the
89-
* "extended comment" trigger any action other than "ignore".
90-
* Be sure to match _any_ candidate comment, including those with appended
91-
* operator-like symbols. - thomas 1997-07-14
9291
*/
9392

9493
%x xb
@@ -101,46 +100,58 @@ static void addlit(char *ytext, int yleng);
101100
*/
102101
xbstart [bB]{quote}
103102
xbstop {quote}
104-
xbinside [^']*
105-
xbcat {quote}{space}*\n{space}*{quote}
103+
xbinside [^']+
104+
xbcat {quote}{whitespace_with_newline}{quote}
106105

107106
/* Hexadecimal number
108107
*/
109108
xhstart [xX]{quote}
110109
xhstop {quote}
111-
xhinside [^']*
112-
xhcat {quote}{space}*\n{space}*{quote}
110+
xhinside [^']+
111+
xhcat {quote}{whitespace_with_newline}{quote}
113112

114113
/* Extended quote
115114
* xqdouble implements SQL92 embedded quote
116115
* xqcat allows strings to cross input lines
117116
* Note: reduction of '' and \ sequences to output text is done in scanstr(),
118-
* not by rules here.
117+
* not by rules here. But we do get rid of xqcat sequences here.
119118
*/
120119
quote '
121120
xqstart {quote}
122121
xqstop {quote}
123122
xqdouble {quote}{quote}
124-
xqinside [^\\']*
123+
xqinside [^\\']+
125124
xqliteral [\\](.|\n)
126-
xqcat {quote}{space}*\n{space}*{quote}
125+
xqcat {quote}{whitespace_with_newline}{quote}
127126

128127
/* Delimited quote
129128
* Allows embedded spaces and other special characters into identifiers.
130129
*/
131130
dquote \"
132131
xdstart {dquote}
133132
xdstop {dquote}
134-
xdinside [^"]*
133+
xdinside [^"]+
135134

136-
/* Comments
135+
/* C-style comments
137136
* Ignored by the scanner and parser.
137+
*
138+
* The "extended comment" syntax closely resembles allowable operator syntax.
139+
* The tricky part here is to get lex to recognize a string starting with
140+
* slash-star as a comment, when interpreting it as an operator would produce
141+
* a longer match --- remember lex will prefer a longer match! So, we have
142+
* to provide a special rule for xcline (a complete comment that could
143+
* otherwise look like an operator), as well as append {op_and_self}* to
144+
* xcstart so that it matches at least as much as {operator} would.
145+
* Then the tie-breaker (first matching rule of same length) wins.
146+
* There is still a problem if someone writes, eg, slash-star-star-slash-plus.
147+
* It'll be taken as an xcstart, rather than xcline and an operator as one
148+
* could wish. I don't see any way around that given lex's behavior;
149+
* that someone will just have to write a space after the comment.
138150
*/
139-
xcline [\/][\*].*[\*][\/]{space}*\n*
140-
xcstart [\/][\*]{op_and_self}*
141-
xcstop {op_and_self}*[\*][\/]({space}*|\n)
142-
xcinside [^*]*
143-
xcstar [^/]
151+
xcline \/\*{op_and_self}*\*\/
152+
xcstart \/\*{op_and_self}*
153+
xcstop \*+\/
154+
xcinside ([^*]+)|(\*+[^/])
144155

145156
digit [0-9]
146157
letter [\200-\377_A-Za-z]
@@ -161,13 +172,44 @@ operator {op_and_self}+
161172

162173
integer {digit}+
163174
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
164-
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
175+
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
165176

166177
param \${integer}
167178

168-
comment ("--"|"//").*
179+
/*
180+
* In order to make the world safe for Windows and Mac clients as well as
181+
* Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
182+
* sequence will be seen as two successive newlines, but that doesn't cause
183+
* any problems. SQL92-style comments, which start with -- and extend to the
184+
* next newline, are treated as equivalent to a single whitespace character.
185+
*
186+
* NOTE a fine point: if there is no newline following --, we will absorb
187+
* everything to the end of the input as a comment. This is correct. Older
188+
* versions of Postgres failed to recognize -- as a comment if the input
189+
* did not end with a newline.
190+
*
191+
* XXX perhaps \f (formfeed) should be treated as a newline as well?
192+
*/
169193

170194
space [ \t\n\r\f]
195+
horiz_space [ \t\f]
196+
newline [\n\r]
197+
non_newline [^\n\r]
198+
199+
comment (("--"|"//"){non_newline}*)
200+
201+
whitespace ({space}|{comment})
202+
203+
/*
204+
* SQL92 requires at least one newline in the whitespace separating
205+
* string literals that are to be concatenated. Silly, but who are we
206+
* to argue? Note that {whitespace_with_newline} should not have * after
207+
* it, whereas {whitespace} should generally have a * after it...
208+
*/
209+
210+
horiz_whitespace ({horiz_space}|{comment})
211+
whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
212+
171213
other .
172214

173215
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
@@ -181,14 +223,16 @@ other .
181223
* of escaped-quote "\'".
182224
* Other embedded escaped characters are matched explicitly and the leading
183225
* backslash is dropped from the string. - thomas 1997-09-24
226+
* Note that xcline must appear before xcstart, which must appear before
227+
* operator, as explained above! Also whitespace (comment) must appear
228+
* before operator.
184229
*/
185230

186231
%%
187-
{comment} { /* ignore */ }
232+
{whitespace} { /* ignore */ }
188233

189234
{xcline} { /* ignore */ }
190235

191-
<xc>{xcstar} |
192236
{xcstart} { BEGIN(xc); }
193237

194238
<xc>{xcstop} { BEGIN(INITIAL); }
@@ -216,6 +260,7 @@ other .
216260
}
217261
<xh>{xhcat} |
218262
<xb>{xbcat} {
263+
/* ignore */
219264
}
220265

221266
{xhstart} {
@@ -249,6 +294,7 @@ other .
249294
addlit(yytext, yyleng);
250295
}
251296
<xq>{xqcat} {
297+
/* ignore */
252298
}
253299

254300

@@ -270,18 +316,18 @@ other .
270316
{self} { return yytext[0]; }
271317

272318
{operator} {
273-
if (strcmp((char*)yytext,"!=") == 0)
274-
yylval.str = pstrdup("<>"); /* compatability */
319+
if (strcmp((char*)yytext, "!=") == 0)
320+
yylval.str = pstrdup("<>"); /* compatibility */
275321
else
276322
yylval.str = pstrdup((char*)yytext);
277323
return Op;
278324
}
325+
279326
{param} {
280327
yylval.ival = atoi((char*)&yytext[1]);
281328
return PARAM;
282329
}
283330

284-
285331
{integer} {
286332
char* endptr;
287333

@@ -354,7 +400,6 @@ other .
354400
return IDENT;
355401
}
356402
}
357-
{space} { /* ignore */ }
358403

359404
{other} { return yytext[0]; }
360405

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.