blob: b0b5f5baa540cc3cb20def96c2e3454168c61e4d [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001package protoparse
2
3import (
4 "bufio"
5 "bytes"
6 "errors"
7 "fmt"
8 "io"
9 "strconv"
10 "strings"
11 "unicode/utf8"
12)
13
14type runeReader struct {
15 rr *bufio.Reader
16 unread []rune
17 err error
18}
19
20func (rr *runeReader) readRune() (r rune, size int, err error) {
21 if rr.err != nil {
22 return 0, 0, rr.err
23 }
24 if len(rr.unread) > 0 {
25 r := rr.unread[len(rr.unread)-1]
26 rr.unread = rr.unread[:len(rr.unread)-1]
27 return r, utf8.RuneLen(r), nil
28 }
29 r, sz, err := rr.rr.ReadRune()
30 if err != nil {
31 rr.err = err
32 }
33 return r, sz, err
34}
35
36func (rr *runeReader) unreadRune(r rune) {
37 rr.unread = append(rr.unread, r)
38}
39
40func lexError(l protoLexer, pos *SourcePos, err string) {
41 pl := l.(*protoLex)
Scott Baker4a35a702019-11-26 08:17:33 -080042 _ = pl.errs.handleError(ErrorWithSourcePos{Underlying: errors.New(err), Pos: pos})
Zack Williamse940c7a2019-08-21 14:25:39 -070043}
44
45type protoLex struct {
46 filename string
47 input *runeReader
Scott Baker4a35a702019-11-26 08:17:33 -080048 errs *errorHandler
Zack Williamse940c7a2019-08-21 14:25:39 -070049 res *fileNode
50
51 lineNo int
52 colNo int
53 offset int
54
55 prevSym terminalNode
Scott Baker4a35a702019-11-26 08:17:33 -080056
57 prevLineNo int
58 prevColNo int
59 prevOffset int
60 comments []comment
Zack Williamse940c7a2019-08-21 14:25:39 -070061}
62
Scott Baker4a35a702019-11-26 08:17:33 -080063func newTestLexer(in io.Reader) *protoLex {
64 return newLexer(in, "test.proto", newErrorHandler(nil))
65}
66
67func newLexer(in io.Reader, filename string, errs *errorHandler) *protoLex {
68 return &protoLex{
69 input: &runeReader{rr: bufio.NewReader(in)},
70 filename: filename,
71 errs: errs,
72 }
Zack Williamse940c7a2019-08-21 14:25:39 -070073}
74
75var keywords = map[string]int{
76 "syntax": _SYNTAX,
77 "import": _IMPORT,
78 "weak": _WEAK,
79 "public": _PUBLIC,
80 "package": _PACKAGE,
81 "option": _OPTION,
82 "true": _TRUE,
83 "false": _FALSE,
84 "inf": _INF,
85 "nan": _NAN,
86 "repeated": _REPEATED,
87 "optional": _OPTIONAL,
88 "required": _REQUIRED,
89 "double": _DOUBLE,
90 "float": _FLOAT,
91 "int32": _INT32,
92 "int64": _INT64,
93 "uint32": _UINT32,
94 "uint64": _UINT64,
95 "sint32": _SINT32,
96 "sint64": _SINT64,
97 "fixed32": _FIXED32,
98 "fixed64": _FIXED64,
99 "sfixed32": _SFIXED32,
100 "sfixed64": _SFIXED64,
101 "bool": _BOOL,
102 "string": _STRING,
103 "bytes": _BYTES,
104 "group": _GROUP,
105 "oneof": _ONEOF,
106 "map": _MAP,
107 "extensions": _EXTENSIONS,
108 "to": _TO,
109 "max": _MAX,
110 "reserved": _RESERVED,
111 "enum": _ENUM,
112 "message": _MESSAGE,
113 "extend": _EXTEND,
114 "service": _SERVICE,
115 "rpc": _RPC,
116 "stream": _STREAM,
117 "returns": _RETURNS,
118}
119
Scott Baker4a35a702019-11-26 08:17:33 -0800120func (l *protoLex) cur() SourcePos {
121 return SourcePos{
Zack Williamse940c7a2019-08-21 14:25:39 -0700122 Filename: l.filename,
123 Offset: l.offset,
124 Line: l.lineNo + 1,
125 Col: l.colNo + 1,
126 }
127}
128
Scott Baker4a35a702019-11-26 08:17:33 -0800129func (l *protoLex) adjustPos(consumedChars ...rune) {
130 for _, c := range consumedChars {
131 switch c {
132 case '\n':
133 // new line, back to first column
134 l.colNo = 0
135 l.lineNo++
136 case '\r':
137 // no adjustment
138 case '\t':
139 // advance to next tab stop
140 mod := l.colNo % 8
141 l.colNo += 8 - mod
142 default:
143 l.colNo++
144 }
145 }
146}
147
Zack Williamse940c7a2019-08-21 14:25:39 -0700148func (l *protoLex) prev() *SourcePos {
149 if l.prevSym == nil {
150 return &SourcePos{
151 Filename: l.filename,
152 Offset: 0,
153 Line: 1,
154 Col: 1,
155 }
156 }
157 return l.prevSym.start()
158}
159
160func (l *protoLex) Lex(lval *protoSymType) int {
Scott Baker4a35a702019-11-26 08:17:33 -0800161 if l.errs.err != nil {
162 // if error reporter already returned non-nil error,
163 // we can skip the rest of the input
164 return 0
Zack Williamse940c7a2019-08-21 14:25:39 -0700165 }
166
Scott Baker4a35a702019-11-26 08:17:33 -0800167 l.prevLineNo = l.lineNo
168 l.prevColNo = l.colNo
169 l.prevOffset = l.offset
170 l.comments = nil
Zack Williamse940c7a2019-08-21 14:25:39 -0700171
172 for {
173 c, n, err := l.input.readRune()
174 if err == io.EOF {
175 // we're not actually returning a rune, but this will associate
176 // accumulated comments as a trailing comment on last symbol
177 // (if appropriate)
Scott Baker4a35a702019-11-26 08:17:33 -0800178 l.setRune(lval)
Zack Williamse940c7a2019-08-21 14:25:39 -0700179 return 0
180 } else if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800181 // we don't call setError because we don't want it wrapped
182 // with a source position because it's I/O, not syntax
183 lval.err = err
184 _ = l.errs.handleError(err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700185 return _ERROR
186 }
187
Scott Baker4a35a702019-11-26 08:17:33 -0800188 l.prevLineNo = l.lineNo
189 l.prevColNo = l.colNo
190 l.prevOffset = l.offset
Zack Williamse940c7a2019-08-21 14:25:39 -0700191
192 l.offset += n
Scott Baker4a35a702019-11-26 08:17:33 -0800193 l.adjustPos(c)
194 if strings.ContainsRune("\n\r\t ", c) {
Zack Williamse940c7a2019-08-21 14:25:39 -0700195 continue
196 }
197
198 if c == '.' {
Scott Baker4a35a702019-11-26 08:17:33 -0800199 // decimal literals could start with a dot
Zack Williamse940c7a2019-08-21 14:25:39 -0700200 cn, _, err := l.input.readRune()
201 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800202 l.setRune(lval)
Zack Williamse940c7a2019-08-21 14:25:39 -0700203 return int(c)
204 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700205 if cn >= '0' && cn <= '9' {
Scott Baker4a35a702019-11-26 08:17:33 -0800206 l.adjustPos(cn)
Zack Williamse940c7a2019-08-21 14:25:39 -0700207 token := []rune{c, cn}
208 token = l.readNumber(token, false, true)
209 f, err := strconv.ParseFloat(string(token), 64)
210 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800211 l.setError(lval, err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700212 return _ERROR
213 }
Scott Baker4a35a702019-11-26 08:17:33 -0800214 l.setFloat(lval, f)
Zack Williamse940c7a2019-08-21 14:25:39 -0700215 return _FLOAT_LIT
216 }
217 l.input.unreadRune(cn)
Scott Baker4a35a702019-11-26 08:17:33 -0800218 l.setRune(lval)
Zack Williamse940c7a2019-08-21 14:25:39 -0700219 return int(c)
220 }
221
222 if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
223 // identifier
224 token := []rune{c}
225 token = l.readIdentifier(token)
226 str := string(token)
Zack Williamse940c7a2019-08-21 14:25:39 -0700227 if t, ok := keywords[str]; ok {
Scott Baker4a35a702019-11-26 08:17:33 -0800228 l.setIdent(lval, str)
Zack Williamse940c7a2019-08-21 14:25:39 -0700229 return t
230 }
Scott Baker4a35a702019-11-26 08:17:33 -0800231 l.setIdent(lval, str)
Zack Williamse940c7a2019-08-21 14:25:39 -0700232 return _NAME
233 }
234
235 if c >= '0' && c <= '9' {
236 // integer or float literal
237 if c == '0' {
238 cn, _, err := l.input.readRune()
239 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800240 l.setInt(lval, 0)
Zack Williamse940c7a2019-08-21 14:25:39 -0700241 return _INT_LIT
242 }
243 if cn == 'x' || cn == 'X' {
244 cnn, _, err := l.input.readRune()
245 if err != nil {
246 l.input.unreadRune(cn)
Scott Baker4a35a702019-11-26 08:17:33 -0800247 l.setInt(lval, 0)
Zack Williamse940c7a2019-08-21 14:25:39 -0700248 return _INT_LIT
249 }
250 if (cnn >= '0' && cnn <= '9') || (cnn >= 'a' && cnn <= 'f') || (cnn >= 'A' && cnn <= 'F') {
251 // hexadecimal!
Scott Baker4a35a702019-11-26 08:17:33 -0800252 l.adjustPos(cn, cnn)
Zack Williamse940c7a2019-08-21 14:25:39 -0700253 token := []rune{cnn}
254 token = l.readHexNumber(token)
255 ui, err := strconv.ParseUint(string(token), 16, 64)
256 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800257 l.setError(lval, err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700258 return _ERROR
259 }
Scott Baker4a35a702019-11-26 08:17:33 -0800260 l.setInt(lval, ui)
Zack Williamse940c7a2019-08-21 14:25:39 -0700261 return _INT_LIT
262 }
263 l.input.unreadRune(cnn)
264 l.input.unreadRune(cn)
Scott Baker4a35a702019-11-26 08:17:33 -0800265 l.setInt(lval, 0)
Zack Williamse940c7a2019-08-21 14:25:39 -0700266 return _INT_LIT
267 } else {
268 l.input.unreadRune(cn)
269 }
270 }
271 token := []rune{c}
272 token = l.readNumber(token, true, true)
273 numstr := string(token)
274 if strings.Contains(numstr, ".") || strings.Contains(numstr, "e") || strings.Contains(numstr, "E") {
275 // floating point!
276 f, err := strconv.ParseFloat(numstr, 64)
277 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800278 l.setError(lval, err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700279 return _ERROR
280 }
Scott Baker4a35a702019-11-26 08:17:33 -0800281 l.setFloat(lval, f)
Zack Williamse940c7a2019-08-21 14:25:39 -0700282 return _FLOAT_LIT
283 }
284 // integer! (decimal or octal)
285 ui, err := strconv.ParseUint(numstr, 0, 64)
286 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800287 l.setError(lval, err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700288 return _ERROR
289 }
Scott Baker4a35a702019-11-26 08:17:33 -0800290 l.setInt(lval, ui)
Zack Williamse940c7a2019-08-21 14:25:39 -0700291 return _INT_LIT
292 }
293
294 if c == '\'' || c == '"' {
295 // string literal
296 str, err := l.readStringLiteral(c)
297 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800298 l.setError(lval, err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700299 return _ERROR
300 }
Scott Baker4a35a702019-11-26 08:17:33 -0800301 l.setString(lval, str)
Zack Williamse940c7a2019-08-21 14:25:39 -0700302 return _STRING_LIT
303 }
304
305 if c == '/' {
306 // comment
307 cn, _, err := l.input.readRune()
308 if err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800309 l.setRune(lval)
Zack Williamse940c7a2019-08-21 14:25:39 -0700310 return int(c)
311 }
312 if cn == '/' {
Scott Baker4a35a702019-11-26 08:17:33 -0800313 l.adjustPos(cn)
Zack Williamse940c7a2019-08-21 14:25:39 -0700314 hitNewline, txt := l.skipToEndOfLineComment()
Scott Baker4a35a702019-11-26 08:17:33 -0800315 commentPos := l.posRange()
Zack Williamse940c7a2019-08-21 14:25:39 -0700316 commentPos.end.Col++
317 if hitNewline {
Scott Baker4a35a702019-11-26 08:17:33 -0800318 // we don't do this inside of skipToEndOfLineComment
319 // because we want to know the length of previous
320 // line for calculation above
321 l.adjustPos('\n')
Zack Williamse940c7a2019-08-21 14:25:39 -0700322 }
Scott Baker4a35a702019-11-26 08:17:33 -0800323 l.comments = append(l.comments, comment{posRange: commentPos, text: txt})
Zack Williamse940c7a2019-08-21 14:25:39 -0700324 continue
325 }
326 if cn == '*' {
Scott Baker4a35a702019-11-26 08:17:33 -0800327 l.adjustPos(cn)
Zack Williamse940c7a2019-08-21 14:25:39 -0700328 if txt, ok := l.skipToEndOfBlockComment(); !ok {
Scott Baker4a35a702019-11-26 08:17:33 -0800329 l.setError(lval, errors.New("block comment never terminates, unexpected EOF"))
Zack Williamse940c7a2019-08-21 14:25:39 -0700330 return _ERROR
331 } else {
Scott Baker4a35a702019-11-26 08:17:33 -0800332 l.comments = append(l.comments, comment{posRange: l.posRange(), text: txt})
Zack Williamse940c7a2019-08-21 14:25:39 -0700333 }
334 continue
335 }
336 l.input.unreadRune(cn)
337 }
338
Scott Baker4a35a702019-11-26 08:17:33 -0800339 l.setRune(lval)
Zack Williamse940c7a2019-08-21 14:25:39 -0700340 return int(c)
341 }
342}
343
Scott Baker4a35a702019-11-26 08:17:33 -0800344func (l *protoLex) posRange() posRange {
345 return posRange{
346 start: SourcePos{
347 Filename: l.filename,
348 Offset: l.prevOffset,
349 Line: l.prevLineNo + 1,
350 Col: l.prevColNo + 1,
351 },
352 end: l.cur(),
353 }
354}
355
356func (l *protoLex) newBasicNode() basicNode {
357 return basicNode{
358 posRange: l.posRange(),
359 leading: l.comments,
360 }
361}
362
363func (l *protoLex) setPrev(n terminalNode) {
364 nStart := n.start().Line
365 if _, ok := n.(*basicNode); ok {
366 // if the node is a simple rune, don't attribute comments to it
367 // HACK: adjusting the start line makes leading comments appear
368 // detached so logic below will naturally associated trailing
369 // comment to previous symbol
370 nStart += 2
371 }
372 if l.prevSym != nil && len(n.leadingComments()) > 0 && l.prevSym.end().Line < nStart {
373 // we may need to re-attribute the first comment to
374 // instead be previous node's trailing comment
375 prevEnd := l.prevSym.end().Line
376 comments := n.leadingComments()
377 c := comments[0]
378 commentStart := c.start.Line
379 if commentStart == prevEnd {
380 // comment is on same line as previous symbol
381 n.popLeadingComment()
382 l.prevSym.pushTrailingComment(c)
383 } else if commentStart == prevEnd+1 {
384 // comment is right after previous symbol; see if it is detached
385 // and if so re-attribute
386 singleLineStyle := strings.HasPrefix(c.text, "//")
387 line := c.end.Line
388 groupEnd := -1
389 for i := 1; i < len(comments); i++ {
390 c := comments[i]
391 newGroup := false
392 if !singleLineStyle || c.start.Line > line+1 {
393 // we've found a gap between comments, which means the
394 // previous comments were detached
395 newGroup = true
396 } else {
397 line = c.end.Line
398 singleLineStyle = strings.HasPrefix(comments[i].text, "//")
399 if !singleLineStyle {
400 // we've found a switch from // comments to /*
401 // consider that a new group which means the
402 // previous comments were detached
403 newGroup = true
404 }
405 }
406 if newGroup {
407 groupEnd = i
408 break
409 }
410 }
411
412 if groupEnd == -1 {
413 // just one group of comments; we'll mark it as a trailing
414 // comment if it immediately follows previous symbol and is
415 // detached from current symbol
416 c1 := comments[0]
417 c2 := comments[len(comments)-1]
418 if c1.start.Line <= prevEnd+1 && c2.end.Line < nStart-1 {
419 groupEnd = len(comments)
420 }
421 }
422
423 for i := 0; i < groupEnd; i++ {
424 l.prevSym.pushTrailingComment(n.popLeadingComment())
425 }
426 }
427 }
428
429 l.prevSym = n
430}
431
432func (l *protoLex) setString(lval *protoSymType, val string) {
433 lval.s = &stringLiteralNode{basicNode: l.newBasicNode(), val: val}
434 l.setPrev(lval.s)
435}
436
437func (l *protoLex) setIdent(lval *protoSymType, val string) {
438 lval.id = &identNode{basicNode: l.newBasicNode(), val: val}
439 l.setPrev(lval.id)
440}
441
442func (l *protoLex) setInt(lval *protoSymType, val uint64) {
443 lval.i = &intLiteralNode{basicNode: l.newBasicNode(), val: val}
444 l.setPrev(lval.i)
445}
446
447func (l *protoLex) setFloat(lval *protoSymType, val float64) {
448 lval.f = &floatLiteralNode{basicNode: l.newBasicNode(), val: val}
449 l.setPrev(lval.f)
450}
451
452func (l *protoLex) setRune(lval *protoSymType) {
453 b := l.newBasicNode()
454 lval.b = &b
455 l.setPrev(lval.b)
456}
457
458func (l *protoLex) setError(lval *protoSymType, err error) {
459 lval.err = l.addSourceError(err)
460}
461
Zack Williamse940c7a2019-08-21 14:25:39 -0700462func (l *protoLex) readNumber(sofar []rune, allowDot bool, allowExp bool) []rune {
463 token := sofar
464 for {
465 c, _, err := l.input.readRune()
466 if err != nil {
467 break
468 }
469 if c == '.' {
470 if !allowDot {
471 l.input.unreadRune(c)
472 break
473 }
474 allowDot = false
Zack Williamse940c7a2019-08-21 14:25:39 -0700475 } else if c == 'e' || c == 'E' {
476 if !allowExp {
477 l.input.unreadRune(c)
478 break
479 }
480 allowExp = false
481 cn, _, err := l.input.readRune()
482 if err != nil {
483 l.input.unreadRune(c)
484 break
485 }
486 if cn == '-' || cn == '+' {
487 cnn, _, err := l.input.readRune()
488 if err != nil {
489 l.input.unreadRune(cn)
490 l.input.unreadRune(c)
491 break
492 }
493 if cnn < '0' || cnn > '9' {
494 l.input.unreadRune(cnn)
495 l.input.unreadRune(cn)
496 l.input.unreadRune(c)
497 break
498 }
Scott Baker4a35a702019-11-26 08:17:33 -0800499 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700500 token = append(token, c)
Scott Baker4a35a702019-11-26 08:17:33 -0800501 c, cn = cn, cnn
Zack Williamse940c7a2019-08-21 14:25:39 -0700502 } else if cn < '0' || cn > '9' {
503 l.input.unreadRune(cn)
504 l.input.unreadRune(c)
505 break
506 }
Scott Baker4a35a702019-11-26 08:17:33 -0800507 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700508 token = append(token, c)
509 c = cn
510 } else if c < '0' || c > '9' {
511 l.input.unreadRune(c)
512 break
513 }
Scott Baker4a35a702019-11-26 08:17:33 -0800514 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700515 token = append(token, c)
516 }
517 return token
518}
519
520func (l *protoLex) readHexNumber(sofar []rune) []rune {
521 token := sofar
522 for {
523 c, _, err := l.input.readRune()
524 if err != nil {
525 break
526 }
527 if (c < 'a' || c > 'f') && (c < 'A' || c > 'F') && (c < '0' || c > '9') {
528 l.input.unreadRune(c)
529 break
530 }
Scott Baker4a35a702019-11-26 08:17:33 -0800531 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700532 token = append(token, c)
533 }
534 return token
535}
536
537func (l *protoLex) readIdentifier(sofar []rune) []rune {
538 token := sofar
539 for {
540 c, _, err := l.input.readRune()
541 if err != nil {
542 break
543 }
Scott Baker4a35a702019-11-26 08:17:33 -0800544 if c != '_' && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') {
Zack Williamse940c7a2019-08-21 14:25:39 -0700545 l.input.unreadRune(c)
546 break
547 }
Scott Baker4a35a702019-11-26 08:17:33 -0800548 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700549 token = append(token, c)
550 }
551 return token
552}
553
554func (l *protoLex) readStringLiteral(quote rune) (string, error) {
555 var buf bytes.Buffer
556 for {
557 c, _, err := l.input.readRune()
558 if err != nil {
559 if err == io.EOF {
560 err = io.ErrUnexpectedEOF
561 }
562 return "", err
563 }
564 if c == '\n' {
Zack Williamse940c7a2019-08-21 14:25:39 -0700565 return "", errors.New("encountered end-of-line before end of string literal")
566 }
Scott Baker4a35a702019-11-26 08:17:33 -0800567 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700568 if c == quote {
569 break
570 }
571 if c == 0 {
572 return "", errors.New("null character ('\\0') not allowed in string literal")
573 }
574 if c == '\\' {
575 // escape sequence
576 c, _, err = l.input.readRune()
577 if err != nil {
578 return "", err
579 }
Scott Baker4a35a702019-11-26 08:17:33 -0800580 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700581 if c == 'x' || c == 'X' {
582 // hex escape
583 c, _, err := l.input.readRune()
584 if err != nil {
585 return "", err
586 }
Scott Baker4a35a702019-11-26 08:17:33 -0800587 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700588 c2, _, err := l.input.readRune()
589 if err != nil {
590 return "", err
591 }
592 var hex string
593 if (c2 < '0' || c2 > '9') && (c2 < 'a' || c2 > 'f') && (c2 < 'A' || c2 > 'F') {
594 l.input.unreadRune(c2)
595 hex = string(c)
596 } else {
Scott Baker4a35a702019-11-26 08:17:33 -0800597 l.adjustPos(c2)
Zack Williamse940c7a2019-08-21 14:25:39 -0700598 hex = string([]rune{c, c2})
599 }
600 i, err := strconv.ParseInt(hex, 16, 32)
601 if err != nil {
602 return "", fmt.Errorf("invalid hex escape: \\x%q", hex)
603 }
604 buf.WriteByte(byte(i))
605
606 } else if c >= '0' && c <= '7' {
607 // octal escape
608 c2, _, err := l.input.readRune()
609 if err != nil {
610 return "", err
611 }
612 var octal string
613 if c2 < '0' || c2 > '7' {
614 l.input.unreadRune(c2)
615 octal = string(c)
616 } else {
Scott Baker4a35a702019-11-26 08:17:33 -0800617 l.adjustPos(c2)
Zack Williamse940c7a2019-08-21 14:25:39 -0700618 c3, _, err := l.input.readRune()
619 if err != nil {
620 return "", err
621 }
622 if c3 < '0' || c3 > '7' {
623 l.input.unreadRune(c3)
624 octal = string([]rune{c, c2})
625 } else {
Scott Baker4a35a702019-11-26 08:17:33 -0800626 l.adjustPos(c3)
Zack Williamse940c7a2019-08-21 14:25:39 -0700627 octal = string([]rune{c, c2, c3})
628 }
629 }
630 i, err := strconv.ParseInt(octal, 8, 32)
631 if err != nil {
632 return "", fmt.Errorf("invalid octal escape: \\%q", octal)
633 }
634 if i > 0xff {
635 return "", fmt.Errorf("octal escape is out range, must be between 0 and 377: \\%q", octal)
636 }
637 buf.WriteByte(byte(i))
638
639 } else if c == 'u' {
640 // short unicode escape
641 u := make([]rune, 4)
642 for i := range u {
643 c, _, err := l.input.readRune()
644 if err != nil {
645 return "", err
646 }
Scott Baker4a35a702019-11-26 08:17:33 -0800647 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700648 u[i] = c
649 }
650 i, err := strconv.ParseInt(string(u), 16, 32)
651 if err != nil {
652 return "", fmt.Errorf("invalid unicode escape: \\u%q", string(u))
653 }
654 buf.WriteRune(rune(i))
655
656 } else if c == 'U' {
657 // long unicode escape
658 u := make([]rune, 8)
659 for i := range u {
660 c, _, err := l.input.readRune()
661 if err != nil {
662 return "", err
663 }
Scott Baker4a35a702019-11-26 08:17:33 -0800664 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700665 u[i] = c
666 }
667 i, err := strconv.ParseInt(string(u), 16, 32)
668 if err != nil {
669 return "", fmt.Errorf("invalid unicode escape: \\U%q", string(u))
670 }
671 if i > 0x10ffff || i < 0 {
672 return "", fmt.Errorf("unicode escape is out of range, must be between 0 and 0x10ffff: \\U%q", string(u))
673 }
674 buf.WriteRune(rune(i))
675
676 } else if c == 'a' {
677 buf.WriteByte('\a')
678 } else if c == 'b' {
679 buf.WriteByte('\b')
680 } else if c == 'f' {
681 buf.WriteByte('\f')
682 } else if c == 'n' {
683 buf.WriteByte('\n')
684 } else if c == 'r' {
685 buf.WriteByte('\r')
686 } else if c == 't' {
687 buf.WriteByte('\t')
688 } else if c == 'v' {
689 buf.WriteByte('\v')
690 } else if c == '\\' {
691 buf.WriteByte('\\')
692 } else if c == '\'' {
693 buf.WriteByte('\'')
694 } else if c == '"' {
695 buf.WriteByte('"')
696 } else if c == '?' {
697 buf.WriteByte('?')
698 } else {
699 return "", fmt.Errorf("invalid escape sequence: %q", "\\"+string(c))
700 }
701 } else {
702 buf.WriteRune(c)
703 }
704 }
705 return buf.String(), nil
706}
707
708func (l *protoLex) skipToEndOfLineComment() (bool, string) {
709 txt := []rune{'/', '/'}
710 for {
711 c, _, err := l.input.readRune()
712 if err != nil {
713 return false, string(txt)
714 }
715 if c == '\n' {
Scott Baker4a35a702019-11-26 08:17:33 -0800716 return true, string(append(txt, '\n'))
Zack Williamse940c7a2019-08-21 14:25:39 -0700717 }
Scott Baker4a35a702019-11-26 08:17:33 -0800718 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700719 txt = append(txt, c)
720 }
721}
722
723func (l *protoLex) skipToEndOfBlockComment() (string, bool) {
724 txt := []rune{'/', '*'}
725 for {
726 c, _, err := l.input.readRune()
727 if err != nil {
728 return "", false
729 }
Scott Baker4a35a702019-11-26 08:17:33 -0800730 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700731 txt = append(txt, c)
732 if c == '*' {
733 c, _, err := l.input.readRune()
734 if err != nil {
735 return "", false
736 }
737 if c == '/' {
Scott Baker4a35a702019-11-26 08:17:33 -0800738 l.adjustPos(c)
Zack Williamse940c7a2019-08-21 14:25:39 -0700739 txt = append(txt, c)
740 return string(txt), true
741 }
742 l.input.unreadRune(c)
743 }
744 }
745}
746
Scott Baker4a35a702019-11-26 08:17:33 -0800747func (l *protoLex) addSourceError(err error) ErrorWithPos {
748 ewp, ok := err.(ErrorWithPos)
749 if !ok {
750 ewp = ErrorWithSourcePos{Pos: l.prev(), Underlying: err}
Zack Williamse940c7a2019-08-21 14:25:39 -0700751 }
Scott Baker4a35a702019-11-26 08:17:33 -0800752 _ = l.errs.handleError(ewp)
753 return ewp
754}
755
756func (l *protoLex) Error(s string) {
757 _ = l.addSourceError(errors.New(s))
Zack Williamse940c7a2019-08-21 14:25:39 -0700758}