Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 1 | // Copyright 2011 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package parse |
| 6 | |
| 7 | import ( |
| 8 | "fmt" |
| 9 | "strings" |
| 10 | "unicode" |
| 11 | "unicode/utf8" |
| 12 | ) |
| 13 | |
| 14 | // item represents a token or text string returned from the scanner. |
| 15 | type item struct { |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 16 | typ itemType // The type of this item. |
| 17 | pos Pos // The starting position, in bytes, of this item in the input string. |
| 18 | val string // The value of this item. |
| 19 | line int // The line number at the start of this item. |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 20 | } |
| 21 | |
| 22 | func (i item) String() string { |
| 23 | switch { |
| 24 | case i.typ == itemEOF: |
| 25 | return "EOF" |
| 26 | case i.typ == itemError: |
| 27 | return i.val |
| 28 | case i.typ > itemKeyword: |
| 29 | return fmt.Sprintf("<%s>", i.val) |
| 30 | case len(i.val) > 10: |
| 31 | return fmt.Sprintf("%.10q...", i.val) |
| 32 | } |
| 33 | return fmt.Sprintf("%q", i.val) |
| 34 | } |
| 35 | |
| 36 | // itemType identifies the type of lex items. |
| 37 | type itemType int |
| 38 | |
| 39 | const ( |
| 40 | itemError itemType = iota // error occurred; value is text of error |
| 41 | itemBool // boolean constant |
| 42 | itemChar // printable ASCII character; grab bag for comma etc. |
| 43 | itemCharConstant // character constant |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 44 | itemComment // comment text |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 45 | itemComplex // complex constant (1+2i); imaginary is just a number |
Dan Willemsen | c741332 | 2018-08-27 23:21:26 -0700 | [diff] [blame] | 46 | itemAssign // equals ('=') introducing an assignment |
| 47 | itemDeclare // colon-equals (':=') introducing a declaration |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 48 | itemEOF |
| 49 | itemField // alphanumeric identifier starting with '.' |
| 50 | itemIdentifier // alphanumeric identifier not starting with '.' |
| 51 | itemLeftDelim // left action delimiter |
| 52 | itemLeftParen // '(' inside action |
| 53 | itemNumber // simple number, including imaginary |
| 54 | itemPipe // pipe symbol |
| 55 | itemRawString // raw quoted string (includes quotes) |
| 56 | itemRightDelim // right action delimiter |
| 57 | itemRightParen // ')' inside action |
| 58 | itemSpace // run of spaces separating arguments |
| 59 | itemString // quoted string (includes quotes) |
| 60 | itemText // plain text |
| 61 | itemVariable // variable starting with '$', such as '$' or '$1' or '$hello' |
| 62 | // Keywords appear after all the rest. |
| 63 | itemKeyword // used only to delimit the keywords |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 64 | itemBlock // block keyword |
Dan Willemsen | bc60c3c | 2021-12-15 01:09:00 -0800 | [diff] [blame] | 65 | itemBreak // break keyword |
| 66 | itemContinue // continue keyword |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 67 | itemDot // the cursor, spelled '.' |
| 68 | itemDefine // define keyword |
| 69 | itemElse // else keyword |
| 70 | itemEnd // end keyword |
| 71 | itemIf // if keyword |
| 72 | itemNil // the untyped nil constant, easiest to treat as a keyword |
| 73 | itemRange // range keyword |
| 74 | itemTemplate // template keyword |
| 75 | itemWith // with keyword |
| 76 | ) |
| 77 | |
| 78 | var key = map[string]itemType{ |
| 79 | ".": itemDot, |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 80 | "block": itemBlock, |
Dan Willemsen | bc60c3c | 2021-12-15 01:09:00 -0800 | [diff] [blame] | 81 | "break": itemBreak, |
| 82 | "continue": itemContinue, |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 83 | "define": itemDefine, |
| 84 | "else": itemElse, |
| 85 | "end": itemEnd, |
| 86 | "if": itemIf, |
| 87 | "range": itemRange, |
| 88 | "nil": itemNil, |
| 89 | "template": itemTemplate, |
| 90 | "with": itemWith, |
| 91 | } |
| 92 | |
| 93 | const eof = -1 |
| 94 | |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 95 | // Trimming spaces. |
| 96 | // If the action begins "{{- " rather than "{{", then all space/tab/newlines |
| 97 | // preceding the action are trimmed; conversely if it ends " -}}" the |
| 98 | // leading spaces are trimmed. This is done entirely in the lexer; the |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 99 | // parser never sees it happen. We require an ASCII space (' ', \t, \r, \n) |
| 100 | // to be present to avoid ambiguity with things like "{{-3}}". It reads |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 101 | // better with the space present anyway. For simplicity, only ASCII |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 102 | // does the job. |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 103 | const ( |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 104 | spaceChars = " \t\r\n" // These are the space characters defined by Go itself. |
| 105 | trimMarker = '-' // Attached to left/right delimiter, trims trailing spaces from preceding/following text. |
| 106 | trimMarkerLen = Pos(1 + 1) // marker plus space before or after |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 107 | ) |
| 108 | |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 109 | // stateFn represents the state of the scanner as a function that returns the next state. |
| 110 | type stateFn func(*lexer) stateFn |
| 111 | |
| 112 | // lexer holds the state of the scanner. |
| 113 | type lexer struct { |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 114 | name string // the name of the input; used only for error reports |
| 115 | input string // the string being scanned |
| 116 | leftDelim string // start of action |
| 117 | rightDelim string // end of action |
| 118 | emitComment bool // emit itemComment tokens. |
| 119 | pos Pos // current position in the input |
| 120 | start Pos // start position of this item |
| 121 | width Pos // width of last rune read from input |
| 122 | items chan item // channel of scanned items |
| 123 | parenDepth int // nesting depth of ( ) exprs |
| 124 | line int // 1+number of newlines seen |
| 125 | startLine int // start line of this item |
Dan Willemsen | bc60c3c | 2021-12-15 01:09:00 -0800 | [diff] [blame] | 126 | breakOK bool // break keyword allowed |
| 127 | continueOK bool // continue keyword allowed |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | // next returns the next rune in the input. |
| 131 | func (l *lexer) next() rune { |
| 132 | if int(l.pos) >= len(l.input) { |
| 133 | l.width = 0 |
| 134 | return eof |
| 135 | } |
| 136 | r, w := utf8.DecodeRuneInString(l.input[l.pos:]) |
| 137 | l.width = Pos(w) |
| 138 | l.pos += l.width |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 139 | if r == '\n' { |
| 140 | l.line++ |
| 141 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 142 | return r |
| 143 | } |
| 144 | |
| 145 | // peek returns but does not consume the next rune in the input. |
| 146 | func (l *lexer) peek() rune { |
| 147 | r := l.next() |
| 148 | l.backup() |
| 149 | return r |
| 150 | } |
| 151 | |
| 152 | // backup steps back one rune. Can only be called once per call of next. |
| 153 | func (l *lexer) backup() { |
| 154 | l.pos -= l.width |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 155 | // Correct newline count. |
| 156 | if l.width == 1 && l.input[l.pos] == '\n' { |
| 157 | l.line-- |
| 158 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 159 | } |
| 160 | |
| 161 | // emit passes an item back to the client. |
| 162 | func (l *lexer) emit(t itemType) { |
Colin Cross | d9c6b80 | 2019-03-19 21:10:31 -0700 | [diff] [blame] | 163 | l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine} |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 164 | l.start = l.pos |
Colin Cross | d9c6b80 | 2019-03-19 21:10:31 -0700 | [diff] [blame] | 165 | l.startLine = l.line |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 166 | } |
| 167 | |
| 168 | // ignore skips over the pending input before this point. |
| 169 | func (l *lexer) ignore() { |
Dan Willemsen | a322328 | 2018-02-27 19:41:43 -0800 | [diff] [blame] | 170 | l.line += strings.Count(l.input[l.start:l.pos], "\n") |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 171 | l.start = l.pos |
Colin Cross | d9c6b80 | 2019-03-19 21:10:31 -0700 | [diff] [blame] | 172 | l.startLine = l.line |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 173 | } |
| 174 | |
| 175 | // accept consumes the next rune if it's from the valid set. |
| 176 | func (l *lexer) accept(valid string) bool { |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 177 | if strings.ContainsRune(valid, l.next()) { |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 178 | return true |
| 179 | } |
| 180 | l.backup() |
| 181 | return false |
| 182 | } |
| 183 | |
| 184 | // acceptRun consumes a run of runes from the valid set. |
| 185 | func (l *lexer) acceptRun(valid string) { |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 186 | for strings.ContainsRune(valid, l.next()) { |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 187 | } |
| 188 | l.backup() |
| 189 | } |
| 190 | |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 191 | // errorf returns an error token and terminates the scan by passing |
| 192 | // back a nil pointer that will be the next state, terminating l.nextItem. |
Dan Willemsen | bc60c3c | 2021-12-15 01:09:00 -0800 | [diff] [blame] | 193 | func (l *lexer) errorf(format string, args ...any) stateFn { |
Colin Cross | d9c6b80 | 2019-03-19 21:10:31 -0700 | [diff] [blame] | 194 | l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 195 | return nil |
| 196 | } |
| 197 | |
| 198 | // nextItem returns the next item from the input. |
Dan Willemsen | 09eb3b1 | 2015-09-16 14:34:17 -0700 | [diff] [blame] | 199 | // Called by the parser, not in the lexing goroutine. |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 200 | func (l *lexer) nextItem() item { |
Dan Willemsen | a322328 | 2018-02-27 19:41:43 -0800 | [diff] [blame] | 201 | return <-l.items |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 202 | } |
| 203 | |
Dan Willemsen | 09eb3b1 | 2015-09-16 14:34:17 -0700 | [diff] [blame] | 204 | // drain drains the output so the lexing goroutine will exit. |
| 205 | // Called by the parser, not in the lexing goroutine. |
| 206 | func (l *lexer) drain() { |
| 207 | for range l.items { |
| 208 | } |
| 209 | } |
| 210 | |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 211 | // lex creates a new scanner for the input string. |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 212 | func lex(name, input, left, right string, emitComment bool) *lexer { |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 213 | if left == "" { |
| 214 | left = leftDelim |
| 215 | } |
| 216 | if right == "" { |
| 217 | right = rightDelim |
| 218 | } |
| 219 | l := &lexer{ |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 220 | name: name, |
| 221 | input: input, |
| 222 | leftDelim: left, |
| 223 | rightDelim: right, |
| 224 | emitComment: emitComment, |
| 225 | items: make(chan item), |
| 226 | line: 1, |
| 227 | startLine: 1, |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 228 | } |
| 229 | go l.run() |
| 230 | return l |
| 231 | } |
| 232 | |
| 233 | // run runs the state machine for the lexer. |
| 234 | func (l *lexer) run() { |
Dan Willemsen | a322328 | 2018-02-27 19:41:43 -0800 | [diff] [blame] | 235 | for state := lexText; state != nil; { |
| 236 | state = state(l) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 237 | } |
Dan Willemsen | 09eb3b1 | 2015-09-16 14:34:17 -0700 | [diff] [blame] | 238 | close(l.items) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 239 | } |
| 240 | |
| 241 | // state functions |
| 242 | |
| 243 | const ( |
| 244 | leftDelim = "{{" |
| 245 | rightDelim = "}}" |
| 246 | leftComment = "/*" |
| 247 | rightComment = "*/" |
| 248 | ) |
| 249 | |
| 250 | // lexText scans until an opening action delimiter, "{{". |
| 251 | func lexText(l *lexer) stateFn { |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 252 | l.width = 0 |
| 253 | if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { |
| 254 | ldn := Pos(len(l.leftDelim)) |
| 255 | l.pos += Pos(x) |
| 256 | trimLength := Pos(0) |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 257 | if hasLeftTrimMarker(l.input[l.pos+ldn:]) { |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 258 | trimLength = rightTrimLength(l.input[l.start:l.pos]) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 259 | } |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 260 | l.pos -= trimLength |
| 261 | if l.pos > l.start { |
Colin Cross | d9c6b80 | 2019-03-19 21:10:31 -0700 | [diff] [blame] | 262 | l.line += strings.Count(l.input[l.start:l.pos], "\n") |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 263 | l.emit(itemText) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 264 | } |
Dan Willemsen | ebae302 | 2017-01-13 23:01:08 -0800 | [diff] [blame] | 265 | l.pos += trimLength |
| 266 | l.ignore() |
| 267 | return lexLeftDelim |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 268 | } |
Colin Cross | d9c6b80 | 2019-03-19 21:10:31 -0700 | [diff] [blame] | 269 | l.pos = Pos(len(l.input)) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 270 | // Correctly reached EOF. |
| 271 | if l.pos > l.start { |
Colin Cross | d9c6b80 | 2019-03-19 21:10:31 -0700 | [diff] [blame] | 272 | l.line += strings.Count(l.input[l.start:l.pos], "\n") |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 273 | l.emit(itemText) |
| 274 | } |
| 275 | l.emit(itemEOF) |
| 276 | return nil |
| 277 | } |
| 278 | |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 279 | // rightTrimLength returns the length of the spaces at the end of the string. |
| 280 | func rightTrimLength(s string) Pos { |
| 281 | return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) |
| 282 | } |
| 283 | |
| 284 | // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker. |
| 285 | func (l *lexer) atRightDelim() (delim, trimSpaces bool) { |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 286 | if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { // With trim marker. |
Dan Willemsen | a322328 | 2018-02-27 19:41:43 -0800 | [diff] [blame] | 287 | return true, true |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 288 | } |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 289 | if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { // Without trim marker. |
| 290 | return true, false |
| 291 | } |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 292 | return false, false |
| 293 | } |
| 294 | |
| 295 | // leftTrimLength returns the length of the spaces at the beginning of the string. |
| 296 | func leftTrimLength(s string) Pos { |
| 297 | return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) |
| 298 | } |
| 299 | |
| 300 | // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker. |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 301 | func lexLeftDelim(l *lexer) stateFn { |
| 302 | l.pos += Pos(len(l.leftDelim)) |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 303 | trimSpace := hasLeftTrimMarker(l.input[l.pos:]) |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 304 | afterMarker := Pos(0) |
| 305 | if trimSpace { |
| 306 | afterMarker = trimMarkerLen |
| 307 | } |
| 308 | if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { |
| 309 | l.pos += afterMarker |
| 310 | l.ignore() |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 311 | return lexComment |
| 312 | } |
| 313 | l.emit(itemLeftDelim) |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 314 | l.pos += afterMarker |
| 315 | l.ignore() |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 316 | l.parenDepth = 0 |
| 317 | return lexInsideAction |
| 318 | } |
| 319 | |
| 320 | // lexComment scans a comment. The left comment marker is known to be present. |
| 321 | func lexComment(l *lexer) stateFn { |
| 322 | l.pos += Pos(len(leftComment)) |
| 323 | i := strings.Index(l.input[l.pos:], rightComment) |
| 324 | if i < 0 { |
| 325 | return l.errorf("unclosed comment") |
| 326 | } |
| 327 | l.pos += Pos(i + len(rightComment)) |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 328 | delim, trimSpace := l.atRightDelim() |
| 329 | if !delim { |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 330 | return l.errorf("comment ends before closing delimiter") |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 331 | } |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 332 | if l.emitComment { |
| 333 | l.emit(itemComment) |
| 334 | } |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 335 | if trimSpace { |
| 336 | l.pos += trimMarkerLen |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 337 | } |
| 338 | l.pos += Pos(len(l.rightDelim)) |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 339 | if trimSpace { |
| 340 | l.pos += leftTrimLength(l.input[l.pos:]) |
| 341 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 342 | l.ignore() |
| 343 | return lexText |
| 344 | } |
| 345 | |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 346 | // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker. |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 347 | func lexRightDelim(l *lexer) stateFn { |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 348 | trimSpace := hasRightTrimMarker(l.input[l.pos:]) |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 349 | if trimSpace { |
| 350 | l.pos += trimMarkerLen |
| 351 | l.ignore() |
| 352 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 353 | l.pos += Pos(len(l.rightDelim)) |
| 354 | l.emit(itemRightDelim) |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 355 | if trimSpace { |
| 356 | l.pos += leftTrimLength(l.input[l.pos:]) |
| 357 | l.ignore() |
| 358 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 359 | return lexText |
| 360 | } |
| 361 | |
| 362 | // lexInsideAction scans the elements inside action delimiters. |
| 363 | func lexInsideAction(l *lexer) stateFn { |
| 364 | // Either number, quoted string, or identifier. |
| 365 | // Spaces separate arguments; runs of spaces turn into itemSpace. |
| 366 | // Pipe symbols separate and are emitted. |
Dan Willemsen | 38f2dba | 2016-07-08 14:54:35 -0700 | [diff] [blame] | 367 | delim, _ := l.atRightDelim() |
| 368 | if delim { |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 369 | if l.parenDepth == 0 { |
| 370 | return lexRightDelim |
| 371 | } |
| 372 | return l.errorf("unclosed left paren") |
| 373 | } |
| 374 | switch r := l.next(); { |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 375 | case r == eof: |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 376 | return l.errorf("unclosed action") |
| 377 | case isSpace(r): |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 378 | l.backup() // Put space back in case we have " -}}". |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 379 | return lexSpace |
Dan Willemsen | c741332 | 2018-08-27 23:21:26 -0700 | [diff] [blame] | 380 | case r == '=': |
| 381 | l.emit(itemAssign) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 382 | case r == ':': |
| 383 | if l.next() != '=' { |
| 384 | return l.errorf("expected :=") |
| 385 | } |
Dan Willemsen | c741332 | 2018-08-27 23:21:26 -0700 | [diff] [blame] | 386 | l.emit(itemDeclare) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 387 | case r == '|': |
| 388 | l.emit(itemPipe) |
| 389 | case r == '"': |
| 390 | return lexQuote |
| 391 | case r == '`': |
| 392 | return lexRawQuote |
| 393 | case r == '$': |
| 394 | return lexVariable |
| 395 | case r == '\'': |
| 396 | return lexChar |
| 397 | case r == '.': |
| 398 | // special look-ahead for ".field" so we don't break l.backup(). |
| 399 | if l.pos < Pos(len(l.input)) { |
| 400 | r := l.input[l.pos] |
| 401 | if r < '0' || '9' < r { |
| 402 | return lexField |
| 403 | } |
| 404 | } |
| 405 | fallthrough // '.' can start a number. |
| 406 | case r == '+' || r == '-' || ('0' <= r && r <= '9'): |
| 407 | l.backup() |
| 408 | return lexNumber |
| 409 | case isAlphaNumeric(r): |
| 410 | l.backup() |
| 411 | return lexIdentifier |
| 412 | case r == '(': |
| 413 | l.emit(itemLeftParen) |
| 414 | l.parenDepth++ |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 415 | case r == ')': |
| 416 | l.emit(itemRightParen) |
| 417 | l.parenDepth-- |
| 418 | if l.parenDepth < 0 { |
| 419 | return l.errorf("unexpected right paren %#U", r) |
| 420 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 421 | case r <= unicode.MaxASCII && unicode.IsPrint(r): |
| 422 | l.emit(itemChar) |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 423 | default: |
| 424 | return l.errorf("unrecognized character in action: %#U", r) |
| 425 | } |
| 426 | return lexInsideAction |
| 427 | } |
| 428 | |
| 429 | // lexSpace scans a run of space characters. |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 430 | // We have not consumed the first space, which is known to be present. |
| 431 | // Take care if there is a trim-marked right delimiter, which starts with a space. |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 432 | func lexSpace(l *lexer) stateFn { |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 433 | var r rune |
| 434 | var numSpaces int |
| 435 | for { |
| 436 | r = l.peek() |
| 437 | if !isSpace(r) { |
| 438 | break |
| 439 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 440 | l.next() |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 441 | numSpaces++ |
| 442 | } |
| 443 | // Be careful about a trim-marked closing delimiter, which has a minus |
| 444 | // after a space. We know there is a space, so check for the '-' that might follow. |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 445 | if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) { |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 446 | l.backup() // Before the space. |
| 447 | if numSpaces == 1 { |
| 448 | return lexRightDelim // On the delim, so go right to that. |
| 449 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 450 | } |
| 451 | l.emit(itemSpace) |
| 452 | return lexInsideAction |
| 453 | } |
| 454 | |
| 455 | // lexIdentifier scans an alphanumeric. |
| 456 | func lexIdentifier(l *lexer) stateFn { |
| 457 | Loop: |
| 458 | for { |
| 459 | switch r := l.next(); { |
| 460 | case isAlphaNumeric(r): |
| 461 | // absorb. |
| 462 | default: |
| 463 | l.backup() |
| 464 | word := l.input[l.start:l.pos] |
| 465 | if !l.atTerminator() { |
| 466 | return l.errorf("bad character %#U", r) |
| 467 | } |
| 468 | switch { |
| 469 | case key[word] > itemKeyword: |
Dan Willemsen | bc60c3c | 2021-12-15 01:09:00 -0800 | [diff] [blame] | 470 | item := key[word] |
| 471 | if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK { |
| 472 | l.emit(itemIdentifier) |
| 473 | } else { |
| 474 | l.emit(item) |
| 475 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 476 | case word[0] == '.': |
| 477 | l.emit(itemField) |
| 478 | case word == "true", word == "false": |
| 479 | l.emit(itemBool) |
| 480 | default: |
| 481 | l.emit(itemIdentifier) |
| 482 | } |
| 483 | break Loop |
| 484 | } |
| 485 | } |
| 486 | return lexInsideAction |
| 487 | } |
| 488 | |
| 489 | // lexField scans a field: .Alphanumeric. |
| 490 | // The . has been scanned. |
| 491 | func lexField(l *lexer) stateFn { |
| 492 | return lexFieldOrVariable(l, itemField) |
| 493 | } |
| 494 | |
| 495 | // lexVariable scans a Variable: $Alphanumeric. |
| 496 | // The $ has been scanned. |
| 497 | func lexVariable(l *lexer) stateFn { |
| 498 | if l.atTerminator() { // Nothing interesting follows -> "$". |
| 499 | l.emit(itemVariable) |
| 500 | return lexInsideAction |
| 501 | } |
| 502 | return lexFieldOrVariable(l, itemVariable) |
| 503 | } |
| 504 | |
| 505 | // lexVariable scans a field or variable: [.$]Alphanumeric. |
| 506 | // The . or $ has been scanned. |
| 507 | func lexFieldOrVariable(l *lexer, typ itemType) stateFn { |
| 508 | if l.atTerminator() { // Nothing interesting follows -> "." or "$". |
| 509 | if typ == itemVariable { |
| 510 | l.emit(itemVariable) |
| 511 | } else { |
| 512 | l.emit(itemDot) |
| 513 | } |
| 514 | return lexInsideAction |
| 515 | } |
| 516 | var r rune |
| 517 | for { |
| 518 | r = l.next() |
| 519 | if !isAlphaNumeric(r) { |
| 520 | l.backup() |
| 521 | break |
| 522 | } |
| 523 | } |
| 524 | if !l.atTerminator() { |
| 525 | return l.errorf("bad character %#U", r) |
| 526 | } |
| 527 | l.emit(typ) |
| 528 | return lexInsideAction |
| 529 | } |
| 530 | |
| 531 | // atTerminator reports whether the input is at valid termination character to |
| 532 | // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases |
| 533 | // like "$x+2" not being acceptable without a space, in case we decide one |
| 534 | // day to implement arithmetic. |
| 535 | func (l *lexer) atTerminator() bool { |
| 536 | r := l.peek() |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 537 | if isSpace(r) { |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 538 | return true |
| 539 | } |
| 540 | switch r { |
| 541 | case eof, '.', ',', '|', ':', ')', '(': |
| 542 | return true |
| 543 | } |
| 544 | // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will |
| 545 | // succeed but should fail) but only in extremely rare cases caused by willfully |
| 546 | // bad choice of delimiter. |
| 547 | if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { |
| 548 | return true |
| 549 | } |
| 550 | return false |
| 551 | } |
| 552 | |
| 553 | // lexChar scans a character constant. The initial quote is already |
| 554 | // scanned. Syntax checking is done by the parser. |
| 555 | func lexChar(l *lexer) stateFn { |
| 556 | Loop: |
| 557 | for { |
| 558 | switch l.next() { |
| 559 | case '\\': |
| 560 | if r := l.next(); r != eof && r != '\n' { |
| 561 | break |
| 562 | } |
| 563 | fallthrough |
| 564 | case eof, '\n': |
| 565 | return l.errorf("unterminated character constant") |
| 566 | case '\'': |
| 567 | break Loop |
| 568 | } |
| 569 | } |
| 570 | l.emit(itemCharConstant) |
| 571 | return lexInsideAction |
| 572 | } |
| 573 | |
| 574 | // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This |
| 575 | // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" |
| 576 | // and "089" - but when it's wrong the input is invalid and the parser (via |
| 577 | // strconv) will notice. |
| 578 | func lexNumber(l *lexer) stateFn { |
| 579 | if !l.scanNumber() { |
| 580 | return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) |
| 581 | } |
| 582 | if sign := l.peek(); sign == '+' || sign == '-' { |
| 583 | // Complex: 1+2i. No spaces, must end in 'i'. |
| 584 | if !l.scanNumber() || l.input[l.pos-1] != 'i' { |
| 585 | return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) |
| 586 | } |
| 587 | l.emit(itemComplex) |
| 588 | } else { |
| 589 | l.emit(itemNumber) |
| 590 | } |
| 591 | return lexInsideAction |
| 592 | } |
| 593 | |
| 594 | func (l *lexer) scanNumber() bool { |
| 595 | // Optional leading sign. |
| 596 | l.accept("+-") |
| 597 | // Is it hex? |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 598 | digits := "0123456789_" |
| 599 | if l.accept("0") { |
| 600 | // Note: Leading 0 does not mean octal in floats. |
| 601 | if l.accept("xX") { |
| 602 | digits = "0123456789abcdefABCDEF_" |
| 603 | } else if l.accept("oO") { |
| 604 | digits = "01234567_" |
| 605 | } else if l.accept("bB") { |
| 606 | digits = "01_" |
| 607 | } |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 608 | } |
| 609 | l.acceptRun(digits) |
| 610 | if l.accept(".") { |
| 611 | l.acceptRun(digits) |
| 612 | } |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 613 | if len(digits) == 10+1 && l.accept("eE") { |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 614 | l.accept("+-") |
Colin Cross | 430342c | 2019-09-07 08:36:04 -0700 | [diff] [blame] | 615 | l.acceptRun("0123456789_") |
| 616 | } |
| 617 | if len(digits) == 16+6+1 && l.accept("pP") { |
| 618 | l.accept("+-") |
| 619 | l.acceptRun("0123456789_") |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 620 | } |
| 621 | // Is it imaginary? |
| 622 | l.accept("i") |
| 623 | // Next thing mustn't be alphanumeric. |
| 624 | if isAlphaNumeric(l.peek()) { |
| 625 | l.next() |
| 626 | return false |
| 627 | } |
| 628 | return true |
| 629 | } |
| 630 | |
| 631 | // lexQuote scans a quoted string. |
| 632 | func lexQuote(l *lexer) stateFn { |
| 633 | Loop: |
| 634 | for { |
| 635 | switch l.next() { |
| 636 | case '\\': |
| 637 | if r := l.next(); r != eof && r != '\n' { |
| 638 | break |
| 639 | } |
| 640 | fallthrough |
| 641 | case eof, '\n': |
| 642 | return l.errorf("unterminated quoted string") |
| 643 | case '"': |
| 644 | break Loop |
| 645 | } |
| 646 | } |
| 647 | l.emit(itemString) |
| 648 | return lexInsideAction |
| 649 | } |
| 650 | |
| 651 | // lexRawQuote scans a raw quoted string. |
| 652 | func lexRawQuote(l *lexer) stateFn { |
| 653 | Loop: |
| 654 | for { |
| 655 | switch l.next() { |
Dan Willemsen | 09eb3b1 | 2015-09-16 14:34:17 -0700 | [diff] [blame] | 656 | case eof: |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 657 | return l.errorf("unterminated raw quoted string") |
| 658 | case '`': |
| 659 | break Loop |
| 660 | } |
| 661 | } |
| 662 | l.emit(itemRawString) |
| 663 | return lexInsideAction |
| 664 | } |
| 665 | |
| 666 | // isSpace reports whether r is a space character. |
| 667 | func isSpace(r rune) bool { |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 668 | return r == ' ' || r == '\t' || r == '\r' || r == '\n' |
Brent Austin | ba3052e | 2015-04-21 16:08:23 -0700 | [diff] [blame] | 669 | } |
| 670 | |
| 671 | // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. |
| 672 | func isAlphaNumeric(r rune) bool { |
| 673 | return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) |
| 674 | } |
Colin Cross | 1f80552 | 2021-05-14 11:10:59 -0700 | [diff] [blame] | 675 | |
| 676 | func hasLeftTrimMarker(s string) bool { |
| 677 | return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1])) |
| 678 | } |
| 679 | |
| 680 | func hasRightTrimMarker(s string) bool { |
| 681 | return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker |
| 682 | } |