blob: abf41bc0e2040fa2c7cd6b9522c44b13c53da7ff [file] [log] [blame]
Greg Hartman76d05dc2016-11-23 15:51:27 -08001/*
2** $Id: llex.c,v 2.63.1.2 2013/08/30 15:49:41 roberto Exp $
3** Lexical Analyzer
4** See Copyright Notice in lua.h
5*/
6
7
8#ifndef SYSLINUX
9#include <locale.h>
10#else
11#define getlocaledecpoint() '.'
12#endif
13#include <string.h>
14
15#define llex_c
16#define LUA_CORE
17
18#include "lua.h"
19
20#include "lctype.h"
21#include "ldo.h"
22#include "llex.h"
23#include "lobject.h"
24#include "lparser.h"
25#include "lstate.h"
26#include "lstring.h"
27#include "ltable.h"
28#include "lzio.h"
29
30
31
32#define next(ls) (ls->current = zgetc(ls->z))
33
34
35
36#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
37
38
39/* ORDER RESERVED */
40static const char *const luaX_tokens [] = {
41 "and", "break", "do", "else", "elseif",
42 "end", "false", "for", "function", "goto", "if",
43 "in", "local", "nil", "not", "or", "repeat",
44 "return", "then", "true", "until", "while",
45 "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
46 "<number>", "<name>", "<string>"
47};
48
49
50#define save_and_next(ls) (save(ls, ls->current), next(ls))
51
52
53static l_noret lexerror (LexState *ls, const char *msg, int token);
54
55
56static void save (LexState *ls, int c) {
57 Mbuffer *b = ls->buff;
58 if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
59 size_t newsize;
60 if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
61 lexerror(ls, "lexical element too long", 0);
62 newsize = luaZ_sizebuffer(b) * 2;
63 luaZ_resizebuffer(ls->L, b, newsize);
64 }
65 b->buffer[luaZ_bufflen(b)++] = cast(char, c);
66}
67
68
69void luaX_init (lua_State *L) {
70 int i;
71 for (i=0; i<NUM_RESERVED; i++) {
72 TString *ts = luaS_new(L, luaX_tokens[i]);
73 luaS_fix(ts); /* reserved words are never collected */
74 ts->tsv.extra = cast_byte(i+1); /* reserved word */
75 }
76}
77
78
79const char *luaX_token2str (LexState *ls, int token) {
80 if (token < FIRST_RESERVED) { /* single-byte symbols? */
81 lua_assert(token == cast(unsigned char, token));
82 return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
83 luaO_pushfstring(ls->L, "char(%d)", token);
84 }
85 else {
86 const char *s = luaX_tokens[token - FIRST_RESERVED];
87 if (token < TK_EOS) /* fixed format (symbols and reserved words)? */
88 return luaO_pushfstring(ls->L, LUA_QS, s);
89 else /* names, strings, and numerals */
90 return s;
91 }
92}
93
94
95static const char *txtToken (LexState *ls, int token) {
96 switch (token) {
97 case TK_NAME:
98 case TK_STRING:
99 case TK_NUMBER:
100 save(ls, '\0');
101 return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
102 default:
103 return luaX_token2str(ls, token);
104 }
105}
106
107
108static l_noret lexerror (LexState *ls, const char *msg, int token) {
109 char buff[LUA_IDSIZE];
110 luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
111 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
112 if (token)
113 luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
114 luaD_throw(ls->L, LUA_ERRSYNTAX);
115}
116
117
118l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
119 lexerror(ls, msg, ls->t.token);
120}
121
122
123/*
124** creates a new string and anchors it in function's table so that
125** it will not be collected until the end of the function's compilation
126** (by that time it should be anchored in function's prototype)
127*/
128TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
129 lua_State *L = ls->L;
130 TValue *o; /* entry for `str' */
131 TString *ts = luaS_newlstr(L, str, l); /* create new string */
132 setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */
133 o = luaH_set(L, ls->fs->h, L->top - 1);
134 if (ttisnil(o)) { /* not in use yet? (see 'addK') */
135 /* boolean value does not need GC barrier;
136 table has no metatable, so it does not need to invalidate cache */
137 setbvalue(o, 1); /* t[string] = true */
138 luaC_checkGC(L);
139 }
140 else { /* string already present */
141 ts = rawtsvalue(keyfromval(o)); /* re-use value previously stored */
142 }
143 L->top--; /* remove string from stack */
144 return ts;
145}
146
147
148/*
149** increment line number and skips newline sequence (any of
150** \n, \r, \n\r, or \r\n)
151*/
152static void inclinenumber (LexState *ls) {
153 int old = ls->current;
154 lua_assert(currIsNewline(ls));
155 next(ls); /* skip `\n' or `\r' */
156 if (currIsNewline(ls) && ls->current != old)
157 next(ls); /* skip `\n\r' or `\r\n' */
158 if (++ls->linenumber >= MAX_INT)
159 luaX_syntaxerror(ls, "chunk has too many lines");
160}
161
162
163void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
164 int firstchar) {
165 ls->decpoint = '.';
166 ls->L = L;
167 ls->current = firstchar;
168 ls->lookahead.token = TK_EOS; /* no look-ahead token */
169 ls->z = z;
170 ls->fs = NULL;
171 ls->linenumber = 1;
172 ls->lastline = 1;
173 ls->source = source;
174 ls->envn = luaS_new(L, LUA_ENV); /* create env name */
175 luaS_fix(ls->envn); /* never collect this name */
176 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
177}
178
179
180
181/*
182** =======================================================
183** LEXICAL ANALYZER
184** =======================================================
185*/
186
187
188
189static int check_next (LexState *ls, const char *set) {
190 if (ls->current == '\0' || !strchr(set, ls->current))
191 return 0;
192 save_and_next(ls);
193 return 1;
194}
195
196
197/*
198** change all characters 'from' in buffer to 'to'
199*/
200static void buffreplace (LexState *ls, char from, char to) {
201 size_t n = luaZ_bufflen(ls->buff);
202 char *p = luaZ_buffer(ls->buff);
203 while (n--)
204 if (p[n] == from) p[n] = to;
205}
206
207
208#if !defined(getlocaledecpoint)
209#define getlocaledecpoint() (localeconv()->decimal_point[0])
210#endif
211
212
213#define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
214
215/*
216** in case of format error, try to change decimal point separator to
217** the one defined in the current locale and check again
218*/
219static void trydecpoint (LexState *ls, SemInfo *seminfo) {
220 char old = ls->decpoint;
221 ls->decpoint = getlocaledecpoint();
222 buffreplace(ls, old, ls->decpoint); /* try new decimal separator */
223 if (!buff2d(ls->buff, &seminfo->r)) {
224 /* format error with correct decimal point: no more options */
225 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */
226 lexerror(ls, "malformed number", TK_NUMBER);
227 }
228}
229
230
231/* LUA_NUMBER */
232/*
233** this function is quite liberal in what it accepts, as 'luaO_str2d'
234** will reject ill-formed numerals.
235*/
236static void read_numeral (LexState *ls, SemInfo *seminfo) {
237 const char *expo = "Ee";
238 int first = ls->current;
239 lua_assert(lisdigit(ls->current));
240 save_and_next(ls);
241 if (first == '0' && check_next(ls, "Xx")) /* hexadecimal? */
242 expo = "Pp";
243 for (;;) {
244 if (check_next(ls, expo)) /* exponent part? */
245 check_next(ls, "+-"); /* optional exponent sign */
246 if (lisxdigit(ls->current) || ls->current == '.')
247 save_and_next(ls);
248 else break;
249 }
250 save(ls, '\0');
251 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */
252 if (!buff2d(ls->buff, &seminfo->r)) /* format error? */
253 trydecpoint(ls, seminfo); /* try to update decimal point separator */
254}
255
256
257/*
258** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
259** -1 if sequence is malformed
260*/
261static int skip_sep (LexState *ls) {
262 int count = 0;
263 int s = ls->current;
264 lua_assert(s == '[' || s == ']');
265 save_and_next(ls);
266 while (ls->current == '=') {
267 save_and_next(ls);
268 count++;
269 }
270 return (ls->current == s) ? count : (-count) - 1;
271}
272
273
274static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
275 save_and_next(ls); /* skip 2nd `[' */
276 if (currIsNewline(ls)) /* string starts with a newline? */
277 inclinenumber(ls); /* skip it */
278 for (;;) {
279 switch (ls->current) {
280 case EOZ:
281 lexerror(ls, (seminfo) ? "unfinished long string" :
282 "unfinished long comment", TK_EOS);
283 break; /* to avoid warnings */
284 case ']': {
285 if (skip_sep(ls) == sep) {
286 save_and_next(ls); /* skip 2nd `]' */
287 goto endloop;
288 }
289 break;
290 }
291 case '\n': case '\r': {
292 save(ls, '\n');
293 inclinenumber(ls);
294 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
295 break;
296 }
297 default: {
298 if (seminfo) save_and_next(ls);
299 else next(ls);
300 }
301 }
302 } endloop:
303 if (seminfo)
304 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
305 luaZ_bufflen(ls->buff) - 2*(2 + sep));
306}
307
308
309static void escerror (LexState *ls, int *c, int n, const char *msg) {
310 int i;
311 luaZ_resetbuffer(ls->buff); /* prepare error message */
312 save(ls, '\\');
313 for (i = 0; i < n && c[i] != EOZ; i++)
314 save(ls, c[i]);
315 lexerror(ls, msg, TK_STRING);
316}
317
318
319static int readhexaesc (LexState *ls) {
320 int c[3], i; /* keep input for error message */
321 int r = 0; /* result accumulator */
322 c[0] = 'x'; /* for error message */
323 for (i = 1; i < 3; i++) { /* read two hexadecimal digits */
324 c[i] = next(ls);
325 if (!lisxdigit(c[i]))
326 escerror(ls, c, i + 1, "hexadecimal digit expected");
327 r = (r << 4) + luaO_hexavalue(c[i]);
328 }
329 return r;
330}
331
332
333static int readdecesc (LexState *ls) {
334 int c[3], i;
335 int r = 0; /* result accumulator */
336 for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */
337 c[i] = ls->current;
338 r = 10*r + c[i] - '0';
339 next(ls);
340 }
341 if (r > UCHAR_MAX)
342 escerror(ls, c, i, "decimal escape too large");
343 return r;
344}
345
346
347static void read_string (LexState *ls, int del, SemInfo *seminfo) {
348 save_and_next(ls); /* keep delimiter (for error messages) */
349 while (ls->current != del) {
350 switch (ls->current) {
351 case EOZ:
352 lexerror(ls, "unfinished string", TK_EOS);
353 break; /* to avoid warnings */
354 case '\n':
355 case '\r':
356 lexerror(ls, "unfinished string", TK_STRING);
357 break; /* to avoid warnings */
358 case '\\': { /* escape sequences */
359 int c; /* final character to be saved */
360 next(ls); /* do not save the `\' */
361 switch (ls->current) {
362 case 'a': c = '\a'; goto read_save;
363 case 'b': c = '\b'; goto read_save;
364 case 'f': c = '\f'; goto read_save;
365 case 'n': c = '\n'; goto read_save;
366 case 'r': c = '\r'; goto read_save;
367 case 't': c = '\t'; goto read_save;
368 case 'v': c = '\v'; goto read_save;
369 case 'x': c = readhexaesc(ls); goto read_save;
370 case '\n': case '\r':
371 inclinenumber(ls); c = '\n'; goto only_save;
372 case '\\': case '\"': case '\'':
373 c = ls->current; goto read_save;
374 case EOZ: goto no_save; /* will raise an error next loop */
375 case 'z': { /* zap following span of spaces */
376 next(ls); /* skip the 'z' */
377 while (lisspace(ls->current)) {
378 if (currIsNewline(ls)) inclinenumber(ls);
379 else next(ls);
380 }
381 goto no_save;
382 }
383 default: {
384 if (!lisdigit(ls->current))
385 escerror(ls, &ls->current, 1, "invalid escape sequence");
386 /* digital escape \ddd */
387 c = readdecesc(ls);
388 goto only_save;
389 }
390 }
391 read_save: next(ls); /* read next character */
392 only_save: save(ls, c); /* save 'c' */
393 no_save: break;
394 }
395 default:
396 save_and_next(ls);
397 }
398 }
399 save_and_next(ls); /* skip delimiter */
400 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
401 luaZ_bufflen(ls->buff) - 2);
402}
403
404
405static int llex (LexState *ls, SemInfo *seminfo) {
406 luaZ_resetbuffer(ls->buff);
407 for (;;) {
408 switch (ls->current) {
409 case '\n': case '\r': { /* line breaks */
410 inclinenumber(ls);
411 break;
412 }
413 case ' ': case '\f': case '\t': case '\v': { /* spaces */
414 next(ls);
415 break;
416 }
417 case '-': { /* '-' or '--' (comment) */
418 next(ls);
419 if (ls->current != '-') return '-';
420 /* else is a comment */
421 next(ls);
422 if (ls->current == '[') { /* long comment? */
423 int sep = skip_sep(ls);
424 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */
425 if (sep >= 0) {
426 read_long_string(ls, NULL, sep); /* skip long comment */
427 luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
428 break;
429 }
430 }
431 /* else short comment */
432 while (!currIsNewline(ls) && ls->current != EOZ)
433 next(ls); /* skip until end of line (or end of file) */
434 break;
435 }
436 case '[': { /* long string or simply '[' */
437 int sep = skip_sep(ls);
438 if (sep >= 0) {
439 read_long_string(ls, seminfo, sep);
440 return TK_STRING;
441 }
442 else if (sep == -1) return '[';
443 else lexerror(ls, "invalid long string delimiter", TK_STRING);
444 }
445 case '=': {
446 next(ls);
447 if (ls->current != '=') return '=';
448 else { next(ls); return TK_EQ; }
449 }
450 case '<': {
451 next(ls);
452 if (ls->current != '=') return '<';
453 else { next(ls); return TK_LE; }
454 }
455 case '>': {
456 next(ls);
457 if (ls->current != '=') return '>';
458 else { next(ls); return TK_GE; }
459 }
460 case '~': {
461 next(ls);
462 if (ls->current != '=') return '~';
463 else { next(ls); return TK_NE; }
464 }
465 case ':': {
466 next(ls);
467 if (ls->current != ':') return ':';
468 else { next(ls); return TK_DBCOLON; }
469 }
470 case '"': case '\'': { /* short literal strings */
471 read_string(ls, ls->current, seminfo);
472 return TK_STRING;
473 }
474 case '.': { /* '.', '..', '...', or number */
475 save_and_next(ls);
476 if (check_next(ls, ".")) {
477 if (check_next(ls, "."))
478 return TK_DOTS; /* '...' */
479 else return TK_CONCAT; /* '..' */
480 }
481 else if (!lisdigit(ls->current)) return '.';
482 /* else go through */
483 }
484 case '0': case '1': case '2': case '3': case '4':
485 case '5': case '6': case '7': case '8': case '9': {
486 read_numeral(ls, seminfo);
487 return TK_NUMBER;
488 }
489 case EOZ: {
490 return TK_EOS;
491 }
492 default: {
493 if (lislalpha(ls->current)) { /* identifier or reserved word? */
494 TString *ts;
495 do {
496 save_and_next(ls);
497 } while (lislalnum(ls->current));
498 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
499 luaZ_bufflen(ls->buff));
500 seminfo->ts = ts;
501 if (isreserved(ts)) /* reserved word? */
502 return ts->tsv.extra - 1 + FIRST_RESERVED;
503 else {
504 return TK_NAME;
505 }
506 }
507 else { /* single-char tokens (+ - / ...) */
508 int c = ls->current;
509 next(ls);
510 return c;
511 }
512 }
513 }
514 }
515}
516
517
518void luaX_next (LexState *ls) {
519 ls->lastline = ls->linenumber;
520 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
521 ls->t = ls->lookahead; /* use this one */
522 ls->lookahead.token = TK_EOS; /* and discharge it */
523 }
524 else
525 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
526}
527
528
529int luaX_lookahead (LexState *ls) {
530 lua_assert(ls->lookahead.token == TK_EOS);
531 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
532 return ls->lookahead.token;
533}
534