blob: 40d0411121b7c9d1c9dced7f1282621796e0dd16 [file] [log] [blame]
Brent Austinba3052e2015-04-21 16:08:23 -07001// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package parse
6
7import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12)
13
14// item represents a token or text string returned from the scanner.
15type item struct {
Dan Willemsenebae3022017-01-13 23:01:08 -080016 typ itemType // The type of this item.
17 pos Pos // The starting position, in bytes, of this item in the input string.
18 val string // The value of this item.
19 line int // The line number at the start of this item.
Brent Austinba3052e2015-04-21 16:08:23 -070020}
21
22func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34}
35
36// itemType identifies the type of lex items.
37type itemType int
38
39const (
40 itemError itemType = iota // error occurred; value is text of error
41 itemBool // boolean constant
42 itemChar // printable ASCII character; grab bag for comma etc.
43 itemCharConstant // character constant
Colin Cross1f805522021-05-14 11:10:59 -070044 itemComment // comment text
Brent Austinba3052e2015-04-21 16:08:23 -070045 itemComplex // complex constant (1+2i); imaginary is just a number
Dan Willemsenc7413322018-08-27 23:21:26 -070046 itemAssign // equals ('=') introducing an assignment
47 itemDeclare // colon-equals (':=') introducing a declaration
Brent Austinba3052e2015-04-21 16:08:23 -070048 itemEOF
49 itemField // alphanumeric identifier starting with '.'
50 itemIdentifier // alphanumeric identifier not starting with '.'
51 itemLeftDelim // left action delimiter
52 itemLeftParen // '(' inside action
53 itemNumber // simple number, including imaginary
54 itemPipe // pipe symbol
55 itemRawString // raw quoted string (includes quotes)
56 itemRightDelim // right action delimiter
57 itemRightParen // ')' inside action
58 itemSpace // run of spaces separating arguments
59 itemString // quoted string (includes quotes)
60 itemText // plain text
61 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello'
62 // Keywords appear after all the rest.
63 itemKeyword // used only to delimit the keywords
Dan Willemsen38f2dba2016-07-08 14:54:35 -070064 itemBlock // block keyword
Dan Willemsenbc60c3c2021-12-15 01:09:00 -080065 itemBreak // break keyword
66 itemContinue // continue keyword
Brent Austinba3052e2015-04-21 16:08:23 -070067 itemDot // the cursor, spelled '.'
68 itemDefine // define keyword
69 itemElse // else keyword
70 itemEnd // end keyword
71 itemIf // if keyword
72 itemNil // the untyped nil constant, easiest to treat as a keyword
73 itemRange // range keyword
74 itemTemplate // template keyword
75 itemWith // with keyword
76)
77
78var key = map[string]itemType{
79 ".": itemDot,
Dan Willemsen38f2dba2016-07-08 14:54:35 -070080 "block": itemBlock,
Dan Willemsenbc60c3c2021-12-15 01:09:00 -080081 "break": itemBreak,
82 "continue": itemContinue,
Brent Austinba3052e2015-04-21 16:08:23 -070083 "define": itemDefine,
84 "else": itemElse,
85 "end": itemEnd,
86 "if": itemIf,
87 "range": itemRange,
88 "nil": itemNil,
89 "template": itemTemplate,
90 "with": itemWith,
91}
92
93const eof = -1
94
Dan Willemsen38f2dba2016-07-08 14:54:35 -070095// Trimming spaces.
96// If the action begins "{{- " rather than "{{", then all space/tab/newlines
97// preceding the action are trimmed; conversely if it ends " -}}" the
98// leading spaces are trimmed. This is done entirely in the lexer; the
Colin Cross1f805522021-05-14 11:10:59 -070099// parser never sees it happen. We require an ASCII space (' ', \t, \r, \n)
100// to be present to avoid ambiguity with things like "{{-3}}". It reads
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700101// better with the space present anyway. For simplicity, only ASCII
Colin Cross1f805522021-05-14 11:10:59 -0700102// does the job.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700103const (
Colin Cross1f805522021-05-14 11:10:59 -0700104 spaceChars = " \t\r\n" // These are the space characters defined by Go itself.
105 trimMarker = '-' // Attached to left/right delimiter, trims trailing spaces from preceding/following text.
106 trimMarkerLen = Pos(1 + 1) // marker plus space before or after
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700107)
108
Brent Austinba3052e2015-04-21 16:08:23 -0700109// stateFn represents the state of the scanner as a function that returns the next state.
110type stateFn func(*lexer) stateFn
111
112// lexer holds the state of the scanner.
113type lexer struct {
Colin Cross1f805522021-05-14 11:10:59 -0700114 name string // the name of the input; used only for error reports
115 input string // the string being scanned
116 leftDelim string // start of action
117 rightDelim string // end of action
118 emitComment bool // emit itemComment tokens.
119 pos Pos // current position in the input
120 start Pos // start position of this item
121 width Pos // width of last rune read from input
122 items chan item // channel of scanned items
123 parenDepth int // nesting depth of ( ) exprs
124 line int // 1+number of newlines seen
125 startLine int // start line of this item
Dan Willemsenbc60c3c2021-12-15 01:09:00 -0800126 breakOK bool // break keyword allowed
127 continueOK bool // continue keyword allowed
Brent Austinba3052e2015-04-21 16:08:23 -0700128}
129
130// next returns the next rune in the input.
131func (l *lexer) next() rune {
132 if int(l.pos) >= len(l.input) {
133 l.width = 0
134 return eof
135 }
136 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
137 l.width = Pos(w)
138 l.pos += l.width
Dan Willemsenebae3022017-01-13 23:01:08 -0800139 if r == '\n' {
140 l.line++
141 }
Brent Austinba3052e2015-04-21 16:08:23 -0700142 return r
143}
144
145// peek returns but does not consume the next rune in the input.
146func (l *lexer) peek() rune {
147 r := l.next()
148 l.backup()
149 return r
150}
151
152// backup steps back one rune. Can only be called once per call of next.
153func (l *lexer) backup() {
154 l.pos -= l.width
Dan Willemsenebae3022017-01-13 23:01:08 -0800155 // Correct newline count.
156 if l.width == 1 && l.input[l.pos] == '\n' {
157 l.line--
158 }
Brent Austinba3052e2015-04-21 16:08:23 -0700159}
160
161// emit passes an item back to the client.
162func (l *lexer) emit(t itemType) {
Colin Crossd9c6b802019-03-19 21:10:31 -0700163 l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
Brent Austinba3052e2015-04-21 16:08:23 -0700164 l.start = l.pos
Colin Crossd9c6b802019-03-19 21:10:31 -0700165 l.startLine = l.line
Brent Austinba3052e2015-04-21 16:08:23 -0700166}
167
168// ignore skips over the pending input before this point.
169func (l *lexer) ignore() {
Dan Willemsena3223282018-02-27 19:41:43 -0800170 l.line += strings.Count(l.input[l.start:l.pos], "\n")
Brent Austinba3052e2015-04-21 16:08:23 -0700171 l.start = l.pos
Colin Crossd9c6b802019-03-19 21:10:31 -0700172 l.startLine = l.line
Brent Austinba3052e2015-04-21 16:08:23 -0700173}
174
175// accept consumes the next rune if it's from the valid set.
176func (l *lexer) accept(valid string) bool {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700177 if strings.ContainsRune(valid, l.next()) {
Brent Austinba3052e2015-04-21 16:08:23 -0700178 return true
179 }
180 l.backup()
181 return false
182}
183
184// acceptRun consumes a run of runes from the valid set.
185func (l *lexer) acceptRun(valid string) {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700186 for strings.ContainsRune(valid, l.next()) {
Brent Austinba3052e2015-04-21 16:08:23 -0700187 }
188 l.backup()
189}
190
Brent Austinba3052e2015-04-21 16:08:23 -0700191// errorf returns an error token and terminates the scan by passing
192// back a nil pointer that will be the next state, terminating l.nextItem.
Dan Willemsenbc60c3c2021-12-15 01:09:00 -0800193func (l *lexer) errorf(format string, args ...any) stateFn {
Colin Crossd9c6b802019-03-19 21:10:31 -0700194 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
Brent Austinba3052e2015-04-21 16:08:23 -0700195 return nil
196}
197
198// nextItem returns the next item from the input.
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700199// Called by the parser, not in the lexing goroutine.
Brent Austinba3052e2015-04-21 16:08:23 -0700200func (l *lexer) nextItem() item {
Dan Willemsena3223282018-02-27 19:41:43 -0800201 return <-l.items
Brent Austinba3052e2015-04-21 16:08:23 -0700202}
203
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700204// drain drains the output so the lexing goroutine will exit.
205// Called by the parser, not in the lexing goroutine.
206func (l *lexer) drain() {
207 for range l.items {
208 }
209}
210
Brent Austinba3052e2015-04-21 16:08:23 -0700211// lex creates a new scanner for the input string.
Colin Cross1f805522021-05-14 11:10:59 -0700212func lex(name, input, left, right string, emitComment bool) *lexer {
Brent Austinba3052e2015-04-21 16:08:23 -0700213 if left == "" {
214 left = leftDelim
215 }
216 if right == "" {
217 right = rightDelim
218 }
219 l := &lexer{
Colin Cross1f805522021-05-14 11:10:59 -0700220 name: name,
221 input: input,
222 leftDelim: left,
223 rightDelim: right,
224 emitComment: emitComment,
225 items: make(chan item),
226 line: 1,
227 startLine: 1,
Brent Austinba3052e2015-04-21 16:08:23 -0700228 }
229 go l.run()
230 return l
231}
232
233// run runs the state machine for the lexer.
234func (l *lexer) run() {
Dan Willemsena3223282018-02-27 19:41:43 -0800235 for state := lexText; state != nil; {
236 state = state(l)
Brent Austinba3052e2015-04-21 16:08:23 -0700237 }
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700238 close(l.items)
Brent Austinba3052e2015-04-21 16:08:23 -0700239}
240
241// state functions
242
243const (
244 leftDelim = "{{"
245 rightDelim = "}}"
246 leftComment = "/*"
247 rightComment = "*/"
248)
249
250// lexText scans until an opening action delimiter, "{{".
251func lexText(l *lexer) stateFn {
Dan Willemsenebae3022017-01-13 23:01:08 -0800252 l.width = 0
253 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
254 ldn := Pos(len(l.leftDelim))
255 l.pos += Pos(x)
256 trimLength := Pos(0)
Colin Cross1f805522021-05-14 11:10:59 -0700257 if hasLeftTrimMarker(l.input[l.pos+ldn:]) {
Dan Willemsenebae3022017-01-13 23:01:08 -0800258 trimLength = rightTrimLength(l.input[l.start:l.pos])
Brent Austinba3052e2015-04-21 16:08:23 -0700259 }
Dan Willemsenebae3022017-01-13 23:01:08 -0800260 l.pos -= trimLength
261 if l.pos > l.start {
Colin Crossd9c6b802019-03-19 21:10:31 -0700262 l.line += strings.Count(l.input[l.start:l.pos], "\n")
Dan Willemsenebae3022017-01-13 23:01:08 -0800263 l.emit(itemText)
Brent Austinba3052e2015-04-21 16:08:23 -0700264 }
Dan Willemsenebae3022017-01-13 23:01:08 -0800265 l.pos += trimLength
266 l.ignore()
267 return lexLeftDelim
Brent Austinba3052e2015-04-21 16:08:23 -0700268 }
Colin Crossd9c6b802019-03-19 21:10:31 -0700269 l.pos = Pos(len(l.input))
Brent Austinba3052e2015-04-21 16:08:23 -0700270 // Correctly reached EOF.
271 if l.pos > l.start {
Colin Crossd9c6b802019-03-19 21:10:31 -0700272 l.line += strings.Count(l.input[l.start:l.pos], "\n")
Brent Austinba3052e2015-04-21 16:08:23 -0700273 l.emit(itemText)
274 }
275 l.emit(itemEOF)
276 return nil
277}
278
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700279// rightTrimLength returns the length of the spaces at the end of the string.
280func rightTrimLength(s string) Pos {
281 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
282}
283
284// atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker.
285func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
Colin Cross1f805522021-05-14 11:10:59 -0700286 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { // With trim marker.
Dan Willemsena3223282018-02-27 19:41:43 -0800287 return true, true
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700288 }
Colin Cross430342c2019-09-07 08:36:04 -0700289 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { // Without trim marker.
290 return true, false
291 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700292 return false, false
293}
294
295// leftTrimLength returns the length of the spaces at the beginning of the string.
296func leftTrimLength(s string) Pos {
297 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
298}
299
300// lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
Brent Austinba3052e2015-04-21 16:08:23 -0700301func lexLeftDelim(l *lexer) stateFn {
302 l.pos += Pos(len(l.leftDelim))
Colin Cross1f805522021-05-14 11:10:59 -0700303 trimSpace := hasLeftTrimMarker(l.input[l.pos:])
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700304 afterMarker := Pos(0)
305 if trimSpace {
306 afterMarker = trimMarkerLen
307 }
308 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
309 l.pos += afterMarker
310 l.ignore()
Brent Austinba3052e2015-04-21 16:08:23 -0700311 return lexComment
312 }
313 l.emit(itemLeftDelim)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700314 l.pos += afterMarker
315 l.ignore()
Brent Austinba3052e2015-04-21 16:08:23 -0700316 l.parenDepth = 0
317 return lexInsideAction
318}
319
320// lexComment scans a comment. The left comment marker is known to be present.
321func lexComment(l *lexer) stateFn {
322 l.pos += Pos(len(leftComment))
323 i := strings.Index(l.input[l.pos:], rightComment)
324 if i < 0 {
325 return l.errorf("unclosed comment")
326 }
327 l.pos += Pos(i + len(rightComment))
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700328 delim, trimSpace := l.atRightDelim()
329 if !delim {
Brent Austinba3052e2015-04-21 16:08:23 -0700330 return l.errorf("comment ends before closing delimiter")
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700331 }
Colin Cross1f805522021-05-14 11:10:59 -0700332 if l.emitComment {
333 l.emit(itemComment)
334 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700335 if trimSpace {
336 l.pos += trimMarkerLen
Brent Austinba3052e2015-04-21 16:08:23 -0700337 }
338 l.pos += Pos(len(l.rightDelim))
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700339 if trimSpace {
340 l.pos += leftTrimLength(l.input[l.pos:])
341 }
Brent Austinba3052e2015-04-21 16:08:23 -0700342 l.ignore()
343 return lexText
344}
345
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700346// lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker.
Brent Austinba3052e2015-04-21 16:08:23 -0700347func lexRightDelim(l *lexer) stateFn {
Colin Cross1f805522021-05-14 11:10:59 -0700348 trimSpace := hasRightTrimMarker(l.input[l.pos:])
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700349 if trimSpace {
350 l.pos += trimMarkerLen
351 l.ignore()
352 }
Brent Austinba3052e2015-04-21 16:08:23 -0700353 l.pos += Pos(len(l.rightDelim))
354 l.emit(itemRightDelim)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700355 if trimSpace {
356 l.pos += leftTrimLength(l.input[l.pos:])
357 l.ignore()
358 }
Brent Austinba3052e2015-04-21 16:08:23 -0700359 return lexText
360}
361
362// lexInsideAction scans the elements inside action delimiters.
363func lexInsideAction(l *lexer) stateFn {
364 // Either number, quoted string, or identifier.
365 // Spaces separate arguments; runs of spaces turn into itemSpace.
366 // Pipe symbols separate and are emitted.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700367 delim, _ := l.atRightDelim()
368 if delim {
Brent Austinba3052e2015-04-21 16:08:23 -0700369 if l.parenDepth == 0 {
370 return lexRightDelim
371 }
372 return l.errorf("unclosed left paren")
373 }
374 switch r := l.next(); {
Colin Cross1f805522021-05-14 11:10:59 -0700375 case r == eof:
Brent Austinba3052e2015-04-21 16:08:23 -0700376 return l.errorf("unclosed action")
377 case isSpace(r):
Colin Cross430342c2019-09-07 08:36:04 -0700378 l.backup() // Put space back in case we have " -}}".
Brent Austinba3052e2015-04-21 16:08:23 -0700379 return lexSpace
Dan Willemsenc7413322018-08-27 23:21:26 -0700380 case r == '=':
381 l.emit(itemAssign)
Brent Austinba3052e2015-04-21 16:08:23 -0700382 case r == ':':
383 if l.next() != '=' {
384 return l.errorf("expected :=")
385 }
Dan Willemsenc7413322018-08-27 23:21:26 -0700386 l.emit(itemDeclare)
Brent Austinba3052e2015-04-21 16:08:23 -0700387 case r == '|':
388 l.emit(itemPipe)
389 case r == '"':
390 return lexQuote
391 case r == '`':
392 return lexRawQuote
393 case r == '$':
394 return lexVariable
395 case r == '\'':
396 return lexChar
397 case r == '.':
398 // special look-ahead for ".field" so we don't break l.backup().
399 if l.pos < Pos(len(l.input)) {
400 r := l.input[l.pos]
401 if r < '0' || '9' < r {
402 return lexField
403 }
404 }
405 fallthrough // '.' can start a number.
406 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
407 l.backup()
408 return lexNumber
409 case isAlphaNumeric(r):
410 l.backup()
411 return lexIdentifier
412 case r == '(':
413 l.emit(itemLeftParen)
414 l.parenDepth++
Brent Austinba3052e2015-04-21 16:08:23 -0700415 case r == ')':
416 l.emit(itemRightParen)
417 l.parenDepth--
418 if l.parenDepth < 0 {
419 return l.errorf("unexpected right paren %#U", r)
420 }
Brent Austinba3052e2015-04-21 16:08:23 -0700421 case r <= unicode.MaxASCII && unicode.IsPrint(r):
422 l.emit(itemChar)
Brent Austinba3052e2015-04-21 16:08:23 -0700423 default:
424 return l.errorf("unrecognized character in action: %#U", r)
425 }
426 return lexInsideAction
427}
428
429// lexSpace scans a run of space characters.
Colin Cross430342c2019-09-07 08:36:04 -0700430// We have not consumed the first space, which is known to be present.
431// Take care if there is a trim-marked right delimiter, which starts with a space.
Brent Austinba3052e2015-04-21 16:08:23 -0700432func lexSpace(l *lexer) stateFn {
Colin Cross430342c2019-09-07 08:36:04 -0700433 var r rune
434 var numSpaces int
435 for {
436 r = l.peek()
437 if !isSpace(r) {
438 break
439 }
Brent Austinba3052e2015-04-21 16:08:23 -0700440 l.next()
Colin Cross430342c2019-09-07 08:36:04 -0700441 numSpaces++
442 }
443 // Be careful about a trim-marked closing delimiter, which has a minus
444 // after a space. We know there is a space, so check for the '-' that might follow.
Colin Cross1f805522021-05-14 11:10:59 -0700445 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) {
Colin Cross430342c2019-09-07 08:36:04 -0700446 l.backup() // Before the space.
447 if numSpaces == 1 {
448 return lexRightDelim // On the delim, so go right to that.
449 }
Brent Austinba3052e2015-04-21 16:08:23 -0700450 }
451 l.emit(itemSpace)
452 return lexInsideAction
453}
454
455// lexIdentifier scans an alphanumeric.
456func lexIdentifier(l *lexer) stateFn {
457Loop:
458 for {
459 switch r := l.next(); {
460 case isAlphaNumeric(r):
461 // absorb.
462 default:
463 l.backup()
464 word := l.input[l.start:l.pos]
465 if !l.atTerminator() {
466 return l.errorf("bad character %#U", r)
467 }
468 switch {
469 case key[word] > itemKeyword:
Dan Willemsenbc60c3c2021-12-15 01:09:00 -0800470 item := key[word]
471 if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK {
472 l.emit(itemIdentifier)
473 } else {
474 l.emit(item)
475 }
Brent Austinba3052e2015-04-21 16:08:23 -0700476 case word[0] == '.':
477 l.emit(itemField)
478 case word == "true", word == "false":
479 l.emit(itemBool)
480 default:
481 l.emit(itemIdentifier)
482 }
483 break Loop
484 }
485 }
486 return lexInsideAction
487}
488
489// lexField scans a field: .Alphanumeric.
490// The . has been scanned.
491func lexField(l *lexer) stateFn {
492 return lexFieldOrVariable(l, itemField)
493}
494
495// lexVariable scans a Variable: $Alphanumeric.
496// The $ has been scanned.
497func lexVariable(l *lexer) stateFn {
498 if l.atTerminator() { // Nothing interesting follows -> "$".
499 l.emit(itemVariable)
500 return lexInsideAction
501 }
502 return lexFieldOrVariable(l, itemVariable)
503}
504
505// lexVariable scans a field or variable: [.$]Alphanumeric.
506// The . or $ has been scanned.
507func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
508 if l.atTerminator() { // Nothing interesting follows -> "." or "$".
509 if typ == itemVariable {
510 l.emit(itemVariable)
511 } else {
512 l.emit(itemDot)
513 }
514 return lexInsideAction
515 }
516 var r rune
517 for {
518 r = l.next()
519 if !isAlphaNumeric(r) {
520 l.backup()
521 break
522 }
523 }
524 if !l.atTerminator() {
525 return l.errorf("bad character %#U", r)
526 }
527 l.emit(typ)
528 return lexInsideAction
529}
530
531// atTerminator reports whether the input is at valid termination character to
532// appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
533// like "$x+2" not being acceptable without a space, in case we decide one
534// day to implement arithmetic.
535func (l *lexer) atTerminator() bool {
536 r := l.peek()
Colin Cross1f805522021-05-14 11:10:59 -0700537 if isSpace(r) {
Brent Austinba3052e2015-04-21 16:08:23 -0700538 return true
539 }
540 switch r {
541 case eof, '.', ',', '|', ':', ')', '(':
542 return true
543 }
544 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will
545 // succeed but should fail) but only in extremely rare cases caused by willfully
546 // bad choice of delimiter.
547 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
548 return true
549 }
550 return false
551}
552
553// lexChar scans a character constant. The initial quote is already
554// scanned. Syntax checking is done by the parser.
555func lexChar(l *lexer) stateFn {
556Loop:
557 for {
558 switch l.next() {
559 case '\\':
560 if r := l.next(); r != eof && r != '\n' {
561 break
562 }
563 fallthrough
564 case eof, '\n':
565 return l.errorf("unterminated character constant")
566 case '\'':
567 break Loop
568 }
569 }
570 l.emit(itemCharConstant)
571 return lexInsideAction
572}
573
574// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
575// isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
576// and "089" - but when it's wrong the input is invalid and the parser (via
577// strconv) will notice.
578func lexNumber(l *lexer) stateFn {
579 if !l.scanNumber() {
580 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
581 }
582 if sign := l.peek(); sign == '+' || sign == '-' {
583 // Complex: 1+2i. No spaces, must end in 'i'.
584 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
585 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
586 }
587 l.emit(itemComplex)
588 } else {
589 l.emit(itemNumber)
590 }
591 return lexInsideAction
592}
593
594func (l *lexer) scanNumber() bool {
595 // Optional leading sign.
596 l.accept("+-")
597 // Is it hex?
Colin Cross430342c2019-09-07 08:36:04 -0700598 digits := "0123456789_"
599 if l.accept("0") {
600 // Note: Leading 0 does not mean octal in floats.
601 if l.accept("xX") {
602 digits = "0123456789abcdefABCDEF_"
603 } else if l.accept("oO") {
604 digits = "01234567_"
605 } else if l.accept("bB") {
606 digits = "01_"
607 }
Brent Austinba3052e2015-04-21 16:08:23 -0700608 }
609 l.acceptRun(digits)
610 if l.accept(".") {
611 l.acceptRun(digits)
612 }
Colin Cross430342c2019-09-07 08:36:04 -0700613 if len(digits) == 10+1 && l.accept("eE") {
Brent Austinba3052e2015-04-21 16:08:23 -0700614 l.accept("+-")
Colin Cross430342c2019-09-07 08:36:04 -0700615 l.acceptRun("0123456789_")
616 }
617 if len(digits) == 16+6+1 && l.accept("pP") {
618 l.accept("+-")
619 l.acceptRun("0123456789_")
Brent Austinba3052e2015-04-21 16:08:23 -0700620 }
621 // Is it imaginary?
622 l.accept("i")
623 // Next thing mustn't be alphanumeric.
624 if isAlphaNumeric(l.peek()) {
625 l.next()
626 return false
627 }
628 return true
629}
630
631// lexQuote scans a quoted string.
632func lexQuote(l *lexer) stateFn {
633Loop:
634 for {
635 switch l.next() {
636 case '\\':
637 if r := l.next(); r != eof && r != '\n' {
638 break
639 }
640 fallthrough
641 case eof, '\n':
642 return l.errorf("unterminated quoted string")
643 case '"':
644 break Loop
645 }
646 }
647 l.emit(itemString)
648 return lexInsideAction
649}
650
651// lexRawQuote scans a raw quoted string.
652func lexRawQuote(l *lexer) stateFn {
653Loop:
654 for {
655 switch l.next() {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700656 case eof:
Brent Austinba3052e2015-04-21 16:08:23 -0700657 return l.errorf("unterminated raw quoted string")
658 case '`':
659 break Loop
660 }
661 }
662 l.emit(itemRawString)
663 return lexInsideAction
664}
665
666// isSpace reports whether r is a space character.
667func isSpace(r rune) bool {
Colin Cross1f805522021-05-14 11:10:59 -0700668 return r == ' ' || r == '\t' || r == '\r' || r == '\n'
Brent Austinba3052e2015-04-21 16:08:23 -0700669}
670
671// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
672func isAlphaNumeric(r rune) bool {
673 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
674}
Colin Cross1f805522021-05-14 11:10:59 -0700675
676func hasLeftTrimMarker(s string) bool {
677 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1]))
678}
679
680func hasRightTrimMarker(s string) bool {
681 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker
682}