blob: 6eb1accb1b22689c382a3110a91e27c6021f612f [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001package protoparse
2
3import (
4 "bytes"
5 "errors"
6 "fmt"
7 "io"
8 "io/ioutil"
9 "math"
10 "os"
11 "path/filepath"
12 "sort"
13 "strings"
14
15 "github.com/golang/protobuf/proto"
16 dpb "github.com/golang/protobuf/protoc-gen-go/descriptor"
17
18 "github.com/jhump/protoreflect/desc"
19 "github.com/jhump/protoreflect/desc/internal"
20)
21
22//go:generate goyacc -o proto.y.go -p proto proto.y
23
Zack Williamse940c7a2019-08-21 14:25:39 -070024func init() {
25 protoErrorVerbose = true
26
27 // fix up the generated "token name" array so that error messages are nicer
28 setTokenName(_STRING_LIT, "string literal")
29 setTokenName(_INT_LIT, "int literal")
30 setTokenName(_FLOAT_LIT, "float literal")
31 setTokenName(_NAME, "identifier")
Zack Williamse940c7a2019-08-21 14:25:39 -070032 setTokenName(_ERROR, "error")
33 // for keywords, just show the keyword itself wrapped in quotes
34 for str, i := range keywords {
35 setTokenName(i, fmt.Sprintf(`"%s"`, str))
36 }
37}
38
39func setTokenName(token int, text string) {
40 // NB: this is based on logic in generated parse code that translates the
41 // int returned from the lexer into an internal token number.
42 var intern int
43 if token < len(protoTok1) {
44 intern = protoTok1[token]
45 } else {
46 if token >= protoPrivate {
47 if token < protoPrivate+len(protoTok2) {
48 intern = protoTok2[token-protoPrivate]
49 }
50 }
51 if intern == 0 {
52 for i := 0; i+1 < len(protoTok3); i += 2 {
53 if protoTok3[i] == token {
54 intern = protoTok3[i+1]
55 break
56 }
57 }
58 }
59 }
60
61 if intern >= 1 && intern-1 < len(protoToknames) {
62 protoToknames[intern-1] = text
63 return
64 }
65
66 panic(fmt.Sprintf("Unknown token value: %d", token))
67}
68
69// FileAccessor is an abstraction for opening proto source files. It takes the
70// name of the file to open and returns either the input reader or an error.
71type FileAccessor func(filename string) (io.ReadCloser, error)
72
73// FileContentsFromMap returns a FileAccessor that uses the given map of file
74// contents. This allows proto source files to be constructed in memory and
75// easily supplied to a parser. The map keys are the paths to the proto source
76// files, and the values are the actual proto source contents.
77func FileContentsFromMap(files map[string]string) FileAccessor {
78 return func(filename string) (io.ReadCloser, error) {
79 contents, ok := files[filename]
80 if !ok {
81 return nil, os.ErrNotExist
82 }
83 return ioutil.NopCloser(strings.NewReader(contents)), nil
84 }
85}
86
Zack Williamse940c7a2019-08-21 14:25:39 -070087// Parser parses proto source into descriptors.
88type Parser struct {
89 // The paths used to search for dependencies that are referenced in import
90 // statements in proto source files. If no import paths are provided then
91 // "." (current directory) is assumed to be the only import path.
92 //
93 // This setting is only used during ParseFiles operations. Since calls to
94 // ParseFilesButDoNotLink do not link, there is no need to load and parse
95 // dependencies.
96 ImportPaths []string
97
98 // If true, the supplied file names/paths need not necessarily match how the
99 // files are referenced in import statements. The parser will attempt to
100 // match import statements to supplied paths, "guessing" the import paths
101 // for the files. Note that this inference is not perfect and link errors
102 // could result. It works best when all proto files are organized such that
103 // a single import path can be inferred (e.g. all files under a single tree
104 // with import statements all being relative to the root of this tree).
105 InferImportPaths bool
106
107 // Used to create a reader for a given filename, when loading proto source
108 // file contents. If unset, os.Open is used. If ImportPaths is also empty
109 // then relative paths are will be relative to the process's current working
110 // directory.
111 Accessor FileAccessor
112
113 // If true, the resulting file descriptors will retain source code info,
114 // that maps elements to their location in the source files as well as
115 // includes comments found during parsing (and attributed to elements of
116 // the source file).
117 IncludeSourceCodeInfo bool
118
119 // If true, the results from ParseFilesButDoNotLink will be passed through
120 // some additional validations. But only constraints that do not require
121 // linking can be checked. These include proto2 vs. proto3 language features,
122 // looking for incorrect usage of reserved names or tags, and ensuring that
123 // fields have unique tags and that enum values have unique numbers (unless
124 // the enum allows aliases).
125 ValidateUnlinkedFiles bool
126
127 // If true, the results from ParseFilesButDoNotLink will have options
128 // interpreted. Any uninterpretable options (including any custom options or
129 // options that refer to message and enum types, which can only be
130 // interpreted after linking) will be left in uninterpreted_options. Also,
131 // the "default" pseudo-option for fields can only be interpreted for scalar
132 // fields, excluding enums. (Interpreting default values for enum fields
133 // requires resolving enum names, which requires linking.)
134 InterpretOptionsInUnlinkedFiles bool
Scott Baker4a35a702019-11-26 08:17:33 -0800135
136 // A custom reporter of syntax and link errors. If not specified, the
137 // default reporter just returns the reported error, which causes parsing
138 // to abort after encountering a single error.
139 //
140 // The reporter is not invoked for system or I/O errors, only for syntax and
141 // link errors.
142 ErrorReporter ErrorReporter
Zack Williamse940c7a2019-08-21 14:25:39 -0700143}
144
145// ParseFiles parses the named files into descriptors. The returned slice has
146// the same number of entries as the give filenames, in the same order. So the
147// first returned descriptor corresponds to the first given name, and so on.
148//
149// All dependencies for all specified files (including transitive dependencies)
150// must be accessible via the parser's Accessor or a link error will occur. The
151// exception to this rule is that files can import standard Google-provided
152// files -- e.g. google/protobuf/*.proto -- without needing to supply sources
153// for these files. Like protoc, this parser has a built-in version of these
154// files it can use if they aren't explicitly supplied.
Scott Baker4a35a702019-11-26 08:17:33 -0800155//
156// If the Parser has no ErrorReporter set and a syntax or link error occurs,
157// parsing will abort with the first such error encountered. If there is an
158// ErrorReporter configured and it returns non-nil, parsing will abort with the
159// error it returns. If syntax or link errors are encountered but the configured
160// ErrorReporter always returns nil, the parse fails with ErrInvalidSource.
Zack Williamse940c7a2019-08-21 14:25:39 -0700161func (p Parser) ParseFiles(filenames ...string) ([]*desc.FileDescriptor, error) {
162 accessor := p.Accessor
163 if accessor == nil {
164 accessor = func(name string) (io.ReadCloser, error) {
165 return os.Open(name)
166 }
167 }
168 paths := p.ImportPaths
169 if len(paths) > 0 {
170 acc := accessor
171 accessor = func(name string) (io.ReadCloser, error) {
172 var ret error
173 for _, path := range paths {
174 f, err := acc(filepath.Join(path, name))
175 if err != nil {
176 if ret == nil {
177 ret = err
178 }
179 continue
180 }
181 return f, nil
182 }
183 return nil, ret
184 }
185 }
186
187 protos := map[string]*parseResult{}
Scott Baker4a35a702019-11-26 08:17:33 -0800188 results := &parseResults{resultsByFilename: protos}
189 errs := newErrorHandler(p.ErrorReporter)
190 parseProtoFiles(accessor, filenames, errs, true, true, results)
191 if err := errs.getError(); err != nil {
Zack Williamse940c7a2019-08-21 14:25:39 -0700192 return nil, err
193 }
194 if p.InferImportPaths {
Scott Baker4a35a702019-11-26 08:17:33 -0800195 // TODO: if this re-writes one of the names in filenames, lookups below will break
Zack Williamse940c7a2019-08-21 14:25:39 -0700196 protos = fixupFilenames(protos)
197 }
Scott Baker4a35a702019-11-26 08:17:33 -0800198 linkedProtos, err := newLinker(results, errs).linkFiles()
Zack Williamse940c7a2019-08-21 14:25:39 -0700199 if err != nil {
200 return nil, err
201 }
202 if p.IncludeSourceCodeInfo {
203 for name, fd := range linkedProtos {
204 pr := protos[name]
205 fd.AsFileDescriptorProto().SourceCodeInfo = pr.generateSourceCodeInfo()
206 internal.RecomputeSourceInfo(fd)
207 }
208 }
209 fds := make([]*desc.FileDescriptor, len(filenames))
210 for i, name := range filenames {
211 fd := linkedProtos[name]
212 fds[i] = fd
213 }
214 return fds, nil
215}
216
217// ParseFilesButDoNotLink parses the named files into descriptor protos. The
218// results are just protos, not fully-linked descriptors. It is possible that
219// descriptors are invalid and still be returned in parsed form without error
220// due to the fact that the linking step is skipped (and thus many validation
221// steps omitted).
222//
223// There are a few side effects to not linking the descriptors:
224// 1. No options will be interpreted. Options can refer to extensions or have
225// message and enum types. Without linking, these extension and type
226// references are not resolved, so the options may not be interpretable.
227// So all options will appear in UninterpretedOption fields of the various
228// descriptor options messages.
229// 2. Type references will not be resolved. This means that the actual type
230// names in the descriptors may be unqualified and even relative to the
231// scope in which the type reference appears. This goes for fields that
232// have message and enum types. It also applies to methods and their
233// references to request and response message types.
234// 3. Enum fields are not known. Until a field's type reference is resolved
235// (during linking), it is not known whether the type refers to a message
236// or an enum. So all fields with such type references have their Type set
237// to TYPE_MESSAGE.
238//
239// This method will still validate the syntax of parsed files. If the parser's
240// ValidateUnlinkedFiles field is true, additional checks, beyond syntax will
241// also be performed.
Scott Baker4a35a702019-11-26 08:17:33 -0800242//
243// If the Parser has no ErrorReporter set and a syntax or link error occurs,
244// parsing will abort with the first such error encountered. If there is an
245// ErrorReporter configured and it returns non-nil, parsing will abort with the
246// error it returns. If syntax or link errors are encountered but the configured
247// ErrorReporter always returns nil, the parse fails with ErrInvalidSource.
Zack Williamse940c7a2019-08-21 14:25:39 -0700248func (p Parser) ParseFilesButDoNotLink(filenames ...string) ([]*dpb.FileDescriptorProto, error) {
249 accessor := p.Accessor
250 if accessor == nil {
251 accessor = func(name string) (io.ReadCloser, error) {
252 return os.Open(name)
253 }
254 }
255
256 protos := map[string]*parseResult{}
Scott Baker4a35a702019-11-26 08:17:33 -0800257 errs := newErrorHandler(p.ErrorReporter)
258 parseProtoFiles(accessor, filenames, errs, false, p.ValidateUnlinkedFiles, &parseResults{resultsByFilename: protos})
259 if err := errs.getError(); err != nil {
Zack Williamse940c7a2019-08-21 14:25:39 -0700260 return nil, err
261 }
262 if p.InferImportPaths {
Scott Baker4a35a702019-11-26 08:17:33 -0800263 // TODO: if this re-writes one of the names in filenames, lookups below will break
Zack Williamse940c7a2019-08-21 14:25:39 -0700264 protos = fixupFilenames(protos)
265 }
266 fds := make([]*dpb.FileDescriptorProto, len(filenames))
267 for i, name := range filenames {
268 pr := protos[name]
269 fd := pr.fd
270 if p.InterpretOptionsInUnlinkedFiles {
271 pr.lenient = true
Scott Baker4a35a702019-11-26 08:17:33 -0800272 _ = interpretFileOptions(pr, poorFileDescriptorish{FileDescriptorProto: fd})
Zack Williamse940c7a2019-08-21 14:25:39 -0700273 }
274 if p.IncludeSourceCodeInfo {
275 fd.SourceCodeInfo = pr.generateSourceCodeInfo()
276 }
277 fds[i] = fd
278 }
279 return fds, nil
280}
281
Zack Williamse940c7a2019-08-21 14:25:39 -0700282func fixupFilenames(protos map[string]*parseResult) map[string]*parseResult {
283 // In the event that the given filenames (keys in the supplied map) do not
284 // match the actual paths used in 'import' statements in the files, we try
285 // to revise names in the protos so that they will match and be linkable.
286 revisedProtos := map[string]*parseResult{}
287
288 protoPaths := map[string]struct{}{}
289 // TODO: this is O(n^2) but could likely be O(n) with a clever data structure (prefix tree that is indexed backwards?)
290 importCandidates := map[string]map[string]struct{}{}
291 candidatesAvailable := map[string]struct{}{}
292 for name := range protos {
293 candidatesAvailable[name] = struct{}{}
294 for _, f := range protos {
295 for _, imp := range f.fd.Dependency {
296 if strings.HasSuffix(name, imp) {
297 candidates := importCandidates[imp]
298 if candidates == nil {
299 candidates = map[string]struct{}{}
300 importCandidates[imp] = candidates
301 }
302 candidates[name] = struct{}{}
303 }
304 }
305 }
306 }
307 for imp, candidates := range importCandidates {
308 // if we found multiple possible candidates, use the one that is an exact match
309 // if it exists, and otherwise, guess that it's the shortest path (fewest elements)
310 var best string
311 for c := range candidates {
312 if _, ok := candidatesAvailable[c]; !ok {
313 // already used this candidate and re-written its filename accordingly
314 continue
315 }
316 if c == imp {
317 // exact match!
318 best = c
319 break
320 }
321 if best == "" {
322 best = c
323 } else {
324 // HACK: we can't actually tell which files is supposed to match
325 // this import, so arbitrarily pick the "shorter" one (fewest
326 // path elements) or, on a tie, the lexically earlier one
327 minLen := strings.Count(best, string(filepath.Separator))
328 cLen := strings.Count(c, string(filepath.Separator))
329 if cLen < minLen || (cLen == minLen && c < best) {
330 best = c
331 }
332 }
333 }
334 if best != "" {
335 prefix := best[:len(best)-len(imp)]
336 if len(prefix) > 0 {
337 protoPaths[prefix] = struct{}{}
338 }
339 f := protos[best]
340 f.fd.Name = proto.String(imp)
341 revisedProtos[imp] = f
342 delete(candidatesAvailable, best)
343 }
344 }
345
346 if len(candidatesAvailable) == 0 {
347 return revisedProtos
348 }
349
350 if len(protoPaths) == 0 {
351 for c := range candidatesAvailable {
352 revisedProtos[c] = protos[c]
353 }
354 return revisedProtos
355 }
356
357 // Any remaining candidates are entry-points (not imported by others), so
358 // the best bet to "fixing" their file name is to see if they're in one of
359 // the proto paths we found, and if so strip that prefix.
360 protoPathStrs := make([]string, len(protoPaths))
361 i := 0
362 for p := range protoPaths {
363 protoPathStrs[i] = p
364 i++
365 }
366 sort.Strings(protoPathStrs)
367 // we look at paths in reverse order, so we'll use a longer proto path if
368 // there is more than one match
369 for c := range candidatesAvailable {
370 var imp string
371 for i := len(protoPathStrs) - 1; i >= 0; i-- {
372 p := protoPathStrs[i]
373 if strings.HasPrefix(c, p) {
374 imp = c[len(p):]
375 break
376 }
377 }
378 if imp != "" {
379 f := protos[c]
380 f.fd.Name = proto.String(imp)
381 revisedProtos[imp] = f
382 } else {
383 revisedProtos[c] = protos[c]
384 }
385 }
386
387 return revisedProtos
388}
389
Scott Baker4a35a702019-11-26 08:17:33 -0800390func parseProtoFiles(acc FileAccessor, filenames []string, errs *errorHandler, recursive, validate bool, parsed *parseResults) {
Zack Williamse940c7a2019-08-21 14:25:39 -0700391 for _, name := range filenames {
Scott Baker4a35a702019-11-26 08:17:33 -0800392 parseProtoFile(acc, name, nil, errs, recursive, validate, parsed)
393 if errs.err != nil {
394 return
Zack Williamse940c7a2019-08-21 14:25:39 -0700395 }
Scott Baker4a35a702019-11-26 08:17:33 -0800396 }
397}
398
399func parseProtoFile(acc FileAccessor, filename string, importLoc *SourcePos, errs *errorHandler, recursive, validate bool, parsed *parseResults) {
400 if parsed.has(filename) {
401 return
402 }
403 in, err := acc(filename)
404 var result *parseResult
405 if err == nil {
406 // try to parse the bytes accessed
Zack Williamse940c7a2019-08-21 14:25:39 -0700407 func() {
Scott Baker4a35a702019-11-26 08:17:33 -0800408 defer func() {
409 // if we've already parsed contents, an error
410 // closing need not fail this operation
411 _ = in.Close()
412 }()
413 result = parseProto(filename, in, errs, validate)
Zack Williamse940c7a2019-08-21 14:25:39 -0700414 }()
Scott Baker4a35a702019-11-26 08:17:33 -0800415 } else if d, ok := standardImports[filename]; ok {
416 // it's a well-known import
417 // (we clone it to make sure we're not sharing state with other
418 // parsers, which could result in unsafe races if multiple
419 // parsers are trying to access it concurrently)
420 result = &parseResult{fd: proto.Clone(d).(*dpb.FileDescriptorProto)}
421 } else {
422 if !strings.Contains(err.Error(), filename) {
423 // an error message that doesn't indicate the file is awful!
424 err = fmt.Errorf("%s: %v", filename, err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700425 }
Scott Baker4a35a702019-11-26 08:17:33 -0800426 if _, ok := err.(ErrorWithPos); !ok && importLoc != nil {
427 // error has no source position? report it as the import line
428 err = ErrorWithSourcePos{
429 Pos: importLoc,
430 Underlying: err,
431 }
432 }
433 _ = errs.handleError(err)
434 return
435 }
436
437 parsed.add(filename, result)
438
439 if errs.getError() != nil {
440 return // abort
441 }
442
443 if recursive {
444 fd := result.fd
445 decl := result.getFileNode(fd)
446 fnode, ok := decl.(*fileNode)
447 if !ok {
448 // no AST for this file? use imports in descriptor
449 for _, dep := range fd.Dependency {
450 parseProtoFile(acc, dep, decl.start(), errs, true, validate, parsed)
451 if errs.getError() != nil {
452 return // abort
453 }
454 }
455 return
456 }
457 // we have an AST; use it so we can report import location in errors
458 for _, dep := range fnode.imports {
459 parseProtoFile(acc, dep.name.val, dep.name.start(), errs, true, validate, parsed)
460 if errs.getError() != nil {
461 return // abort
Zack Williamse940c7a2019-08-21 14:25:39 -0700462 }
463 }
464 }
Scott Baker4a35a702019-11-26 08:17:33 -0800465}
466
467type parseResults struct {
468 resultsByFilename map[string]*parseResult
469 filenames []string
470}
471
472func (r *parseResults) has(filename string) bool {
473 _, ok := r.resultsByFilename[filename]
474 return ok
475}
476
477func (r *parseResults) add(filename string, result *parseResult) {
478 r.resultsByFilename[filename] = result
479 r.filenames = append(r.filenames, filename)
Zack Williamse940c7a2019-08-21 14:25:39 -0700480}
481
482type parseResult struct {
Scott Baker4a35a702019-11-26 08:17:33 -0800483 // handles any errors encountered during parsing, construction of file descriptor,
484 // or validation
485 errs *errorHandler
486
Zack Williamse940c7a2019-08-21 14:25:39 -0700487 // the parsed file descriptor
488 fd *dpb.FileDescriptorProto
489
490 // if set to true, enables lenient interpretation of options, where
491 // unrecognized options will be left uninterpreted instead of resulting in a
492 // link error
493 lenient bool
494
495 // a map of elements in the descriptor to nodes in the AST
496 // (for extracting position information when validating the descriptor)
497 nodes map[proto.Message]node
498
499 // a map of uninterpreted option AST nodes to their relative path
500 // in the resulting options message
501 interpretedOptions map[*optionNode][]int32
502}
503
504func (r *parseResult) getFileNode(f *dpb.FileDescriptorProto) fileDecl {
505 if r.nodes == nil {
506 return noSourceNode{pos: unknownPos(f.GetName())}
507 }
508 return r.nodes[f].(fileDecl)
509}
510
511func (r *parseResult) getOptionNode(o *dpb.UninterpretedOption) optionDecl {
512 if r.nodes == nil {
513 return noSourceNode{pos: unknownPos(r.fd.GetName())}
514 }
515 return r.nodes[o].(optionDecl)
516}
517
518func (r *parseResult) getOptionNamePartNode(o *dpb.UninterpretedOption_NamePart) node {
519 if r.nodes == nil {
520 return noSourceNode{pos: unknownPos(r.fd.GetName())}
521 }
522 return r.nodes[o]
523}
524
Zack Williamse940c7a2019-08-21 14:25:39 -0700525func (r *parseResult) getFieldNode(f *dpb.FieldDescriptorProto) fieldDecl {
526 if r.nodes == nil {
527 return noSourceNode{pos: unknownPos(r.fd.GetName())}
528 }
529 return r.nodes[f].(fieldDecl)
530}
531
Zack Williamse940c7a2019-08-21 14:25:39 -0700532func (r *parseResult) getExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange) rangeDecl {
533 if r.nodes == nil {
534 return noSourceNode{pos: unknownPos(r.fd.GetName())}
535 }
536 return r.nodes[e].(rangeDecl)
537}
538
539func (r *parseResult) getMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange) rangeDecl {
540 if r.nodes == nil {
541 return noSourceNode{pos: unknownPos(r.fd.GetName())}
542 }
543 return r.nodes[rr].(rangeDecl)
544}
545
Zack Williamse940c7a2019-08-21 14:25:39 -0700546func (r *parseResult) getEnumValueNode(e *dpb.EnumValueDescriptorProto) enumValueDecl {
547 if r.nodes == nil {
548 return noSourceNode{pos: unknownPos(r.fd.GetName())}
549 }
550 return r.nodes[e].(enumValueDecl)
551}
552
553func (r *parseResult) getEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange) rangeDecl {
554 if r.nodes == nil {
555 return noSourceNode{pos: unknownPos(r.fd.GetName())}
556 }
557 return r.nodes[rr].(rangeDecl)
558}
559
Zack Williamse940c7a2019-08-21 14:25:39 -0700560func (r *parseResult) getMethodNode(m *dpb.MethodDescriptorProto) methodDecl {
561 if r.nodes == nil {
562 return noSourceNode{pos: unknownPos(r.fd.GetName())}
563 }
564 return r.nodes[m].(methodDecl)
565}
566
567func (r *parseResult) putFileNode(f *dpb.FileDescriptorProto, n *fileNode) {
568 r.nodes[f] = n
569}
570
571func (r *parseResult) putOptionNode(o *dpb.UninterpretedOption, n *optionNode) {
572 r.nodes[o] = n
573}
574
575func (r *parseResult) putOptionNamePartNode(o *dpb.UninterpretedOption_NamePart, n *optionNamePartNode) {
576 r.nodes[o] = n
577}
578
579func (r *parseResult) putMessageNode(m *dpb.DescriptorProto, n msgDecl) {
580 r.nodes[m] = n
581}
582
583func (r *parseResult) putFieldNode(f *dpb.FieldDescriptorProto, n fieldDecl) {
584 r.nodes[f] = n
585}
586
587func (r *parseResult) putOneOfNode(o *dpb.OneofDescriptorProto, n *oneOfNode) {
588 r.nodes[o] = n
589}
590
591func (r *parseResult) putExtensionRangeNode(e *dpb.DescriptorProto_ExtensionRange, n *rangeNode) {
592 r.nodes[e] = n
593}
594
595func (r *parseResult) putMessageReservedRangeNode(rr *dpb.DescriptorProto_ReservedRange, n *rangeNode) {
596 r.nodes[rr] = n
597}
598
599func (r *parseResult) putEnumNode(e *dpb.EnumDescriptorProto, n *enumNode) {
600 r.nodes[e] = n
601}
602
603func (r *parseResult) putEnumValueNode(e *dpb.EnumValueDescriptorProto, n *enumValueNode) {
604 r.nodes[e] = n
605}
606
607func (r *parseResult) putEnumReservedRangeNode(rr *dpb.EnumDescriptorProto_EnumReservedRange, n *rangeNode) {
608 r.nodes[rr] = n
609}
610
611func (r *parseResult) putServiceNode(s *dpb.ServiceDescriptorProto, n *serviceNode) {
612 r.nodes[s] = n
613}
614
615func (r *parseResult) putMethodNode(m *dpb.MethodDescriptorProto, n *methodNode) {
616 r.nodes[m] = n
617}
618
Scott Baker4a35a702019-11-26 08:17:33 -0800619func parseProto(filename string, r io.Reader, errs *errorHandler, validate bool) *parseResult {
620 lx := newLexer(r, filename, errs)
Zack Williamse940c7a2019-08-21 14:25:39 -0700621 protoParse(lx)
Scott Baker4a35a702019-11-26 08:17:33 -0800622
623 res := createParseResult(filename, lx.res, errs)
624 if validate {
625 basicValidate(res)
Zack Williamse940c7a2019-08-21 14:25:39 -0700626 }
627
Scott Baker4a35a702019-11-26 08:17:33 -0800628 return res
Zack Williamse940c7a2019-08-21 14:25:39 -0700629}
630
Scott Baker4a35a702019-11-26 08:17:33 -0800631func createParseResult(filename string, file *fileNode, errs *errorHandler) *parseResult {
Zack Williamse940c7a2019-08-21 14:25:39 -0700632 res := &parseResult{
Scott Baker4a35a702019-11-26 08:17:33 -0800633 errs: errs,
Zack Williamse940c7a2019-08-21 14:25:39 -0700634 nodes: map[proto.Message]node{},
635 interpretedOptions: map[*optionNode][]int32{},
636 }
Scott Baker4a35a702019-11-26 08:17:33 -0800637 if file == nil {
638 // nil AST means there was an error that prevented any parsing
639 // or the file was empty; synthesize empty non-nil AST
640 file = &fileNode{}
641 n := noSourceNode{pos: unknownPos(filename)}
642 file.setRange(&n, &n)
643 }
644 res.createFileDescriptor(filename, file)
645 return res
Zack Williamse940c7a2019-08-21 14:25:39 -0700646}
647
Scott Baker4a35a702019-11-26 08:17:33 -0800648func (r *parseResult) createFileDescriptor(filename string, file *fileNode) {
Zack Williamse940c7a2019-08-21 14:25:39 -0700649 fd := &dpb.FileDescriptorProto{Name: proto.String(filename)}
Scott Baker4a35a702019-11-26 08:17:33 -0800650 r.fd = fd
Zack Williamse940c7a2019-08-21 14:25:39 -0700651 r.putFileNode(fd, file)
652
653 isProto3 := false
654 if file.syntax != nil {
655 isProto3 = file.syntax.syntax.val == "proto3"
656 // proto2 is the default, so no need to set unless proto3
657 if isProto3 {
658 fd.Syntax = proto.String(file.syntax.syntax.val)
659 }
660 }
661
662 for _, decl := range file.decls {
663 if decl.enum != nil {
664 fd.EnumType = append(fd.EnumType, r.asEnumDescriptor(decl.enum))
665 } else if decl.extend != nil {
666 r.addExtensions(decl.extend, &fd.Extension, &fd.MessageType, isProto3)
667 } else if decl.imp != nil {
668 file.imports = append(file.imports, decl.imp)
669 index := len(fd.Dependency)
670 fd.Dependency = append(fd.Dependency, decl.imp.name.val)
671 if decl.imp.public {
672 fd.PublicDependency = append(fd.PublicDependency, int32(index))
673 } else if decl.imp.weak {
674 fd.WeakDependency = append(fd.WeakDependency, int32(index))
675 }
676 } else if decl.message != nil {
677 fd.MessageType = append(fd.MessageType, r.asMessageDescriptor(decl.message, isProto3))
678 } else if decl.option != nil {
679 if fd.Options == nil {
680 fd.Options = &dpb.FileOptions{}
681 }
682 fd.Options.UninterpretedOption = append(fd.Options.UninterpretedOption, r.asUninterpretedOption(decl.option))
683 } else if decl.service != nil {
684 fd.Service = append(fd.Service, r.asServiceDescriptor(decl.service))
685 } else if decl.pkg != nil {
686 if fd.Package != nil {
Scott Baker4a35a702019-11-26 08:17:33 -0800687 if r.errs.handleError(ErrorWithSourcePos{Pos: decl.pkg.start(), Underlying: errors.New("files should have only one package declaration")}) != nil {
688 return
689 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700690 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700691 fd.Package = proto.String(decl.pkg.name.val)
692 }
693 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700694}
695
696func (r *parseResult) asUninterpretedOptions(nodes []*optionNode) []*dpb.UninterpretedOption {
Scott Baker4a35a702019-11-26 08:17:33 -0800697 if len(nodes) == 0 {
698 return nil
699 }
Zack Williamse940c7a2019-08-21 14:25:39 -0700700 opts := make([]*dpb.UninterpretedOption, len(nodes))
701 for i, n := range nodes {
702 opts[i] = r.asUninterpretedOption(n)
703 }
704 return opts
705}
706
707func (r *parseResult) asUninterpretedOption(node *optionNode) *dpb.UninterpretedOption {
708 opt := &dpb.UninterpretedOption{Name: r.asUninterpretedOptionName(node.name.parts)}
709 r.putOptionNode(opt, node)
710
711 switch val := node.val.value().(type) {
712 case bool:
713 if val {
714 opt.IdentifierValue = proto.String("true")
715 } else {
716 opt.IdentifierValue = proto.String("false")
717 }
718 case int64:
719 opt.NegativeIntValue = proto.Int64(val)
720 case uint64:
721 opt.PositiveIntValue = proto.Uint64(val)
722 case float64:
723 opt.DoubleValue = proto.Float64(val)
724 case string:
725 opt.StringValue = []byte(val)
726 case identifier:
727 opt.IdentifierValue = proto.String(string(val))
728 case []*aggregateEntryNode:
729 var buf bytes.Buffer
730 aggToString(val, &buf)
731 aggStr := buf.String()
732 opt.AggregateValue = proto.String(aggStr)
733 }
734 return opt
735}
736
737func (r *parseResult) asUninterpretedOptionName(parts []*optionNamePartNode) []*dpb.UninterpretedOption_NamePart {
738 ret := make([]*dpb.UninterpretedOption_NamePart, len(parts))
739 for i, part := range parts {
740 txt := part.text.val
741 if !part.isExtension {
742 txt = part.text.val[part.offset : part.offset+part.length]
743 }
744 np := &dpb.UninterpretedOption_NamePart{
745 NamePart: proto.String(txt),
746 IsExtension: proto.Bool(part.isExtension),
747 }
748 r.putOptionNamePartNode(np, part)
749 ret[i] = np
750 }
751 return ret
752}
753
754func (r *parseResult) addExtensions(ext *extendNode, flds *[]*dpb.FieldDescriptorProto, msgs *[]*dpb.DescriptorProto, isProto3 bool) {
755 extendee := ext.extendee.val
756 for _, decl := range ext.decls {
757 if decl.field != nil {
758 decl.field.extendee = ext
759 fd := r.asFieldDescriptor(decl.field)
760 fd.Extendee = proto.String(extendee)
761 *flds = append(*flds, fd)
762 } else if decl.group != nil {
763 decl.group.extendee = ext
764 fd, md := r.asGroupDescriptors(decl.group, isProto3)
765 fd.Extendee = proto.String(extendee)
766 *flds = append(*flds, fd)
767 *msgs = append(*msgs, md)
768 }
769 }
770}
771
Scott Baker4a35a702019-11-26 08:17:33 -0800772func asLabel(lbl *fieldLabel) *dpb.FieldDescriptorProto_Label {
773 if lbl.identNode == nil {
Zack Williamse940c7a2019-08-21 14:25:39 -0700774 return nil
775 }
776 switch {
777 case lbl.repeated:
778 return dpb.FieldDescriptorProto_LABEL_REPEATED.Enum()
779 case lbl.required:
780 return dpb.FieldDescriptorProto_LABEL_REQUIRED.Enum()
781 default:
782 return dpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum()
783 }
784}
785
786func (r *parseResult) asFieldDescriptor(node *fieldNode) *dpb.FieldDescriptorProto {
Scott Baker4a35a702019-11-26 08:17:33 -0800787 fd := newFieldDescriptor(node.name.val, node.fldType.val, int32(node.tag.val), asLabel(&node.label))
Zack Williamse940c7a2019-08-21 14:25:39 -0700788 r.putFieldNode(fd, node)
Scott Baker4a35a702019-11-26 08:17:33 -0800789 if opts := node.options.Elements(); len(opts) > 0 {
790 fd.Options = &dpb.FieldOptions{UninterpretedOption: r.asUninterpretedOptions(opts)}
Zack Williamse940c7a2019-08-21 14:25:39 -0700791 }
792 return fd
793}
794
Scott Baker4a35a702019-11-26 08:17:33 -0800795var fieldTypes = map[string]dpb.FieldDescriptorProto_Type{
796 "double": dpb.FieldDescriptorProto_TYPE_DOUBLE,
797 "float": dpb.FieldDescriptorProto_TYPE_FLOAT,
798 "int32": dpb.FieldDescriptorProto_TYPE_INT32,
799 "int64": dpb.FieldDescriptorProto_TYPE_INT64,
800 "uint32": dpb.FieldDescriptorProto_TYPE_UINT32,
801 "uint64": dpb.FieldDescriptorProto_TYPE_UINT64,
802 "sint32": dpb.FieldDescriptorProto_TYPE_SINT32,
803 "sint64": dpb.FieldDescriptorProto_TYPE_SINT64,
804 "fixed32": dpb.FieldDescriptorProto_TYPE_FIXED32,
805 "fixed64": dpb.FieldDescriptorProto_TYPE_FIXED64,
806 "sfixed32": dpb.FieldDescriptorProto_TYPE_SFIXED32,
807 "sfixed64": dpb.FieldDescriptorProto_TYPE_SFIXED64,
808 "bool": dpb.FieldDescriptorProto_TYPE_BOOL,
809 "string": dpb.FieldDescriptorProto_TYPE_STRING,
810 "bytes": dpb.FieldDescriptorProto_TYPE_BYTES,
811}
812
Zack Williamse940c7a2019-08-21 14:25:39 -0700813func newFieldDescriptor(name string, fieldType string, tag int32, lbl *dpb.FieldDescriptorProto_Label) *dpb.FieldDescriptorProto {
814 fd := &dpb.FieldDescriptorProto{
815 Name: proto.String(name),
816 JsonName: proto.String(internal.JsonName(name)),
817 Number: proto.Int32(tag),
818 Label: lbl,
819 }
Scott Baker4a35a702019-11-26 08:17:33 -0800820 t, ok := fieldTypes[fieldType]
821 if ok {
822 fd.Type = t.Enum()
823 } else {
824 // NB: we don't have enough info to determine whether this is an enum
825 // or a message type, so we'll leave Type nil and set it later
826 // (during linking)
Zack Williamse940c7a2019-08-21 14:25:39 -0700827 fd.TypeName = proto.String(fieldType)
828 }
829 return fd
830}
831
832func (r *parseResult) asGroupDescriptors(group *groupNode, isProto3 bool) (*dpb.FieldDescriptorProto, *dpb.DescriptorProto) {
833 fieldName := strings.ToLower(group.name.val)
834 fd := &dpb.FieldDescriptorProto{
835 Name: proto.String(fieldName),
836 JsonName: proto.String(internal.JsonName(fieldName)),
837 Number: proto.Int32(int32(group.tag.val)),
Scott Baker4a35a702019-11-26 08:17:33 -0800838 Label: asLabel(&group.label),
Zack Williamse940c7a2019-08-21 14:25:39 -0700839 Type: dpb.FieldDescriptorProto_TYPE_GROUP.Enum(),
840 TypeName: proto.String(group.name.val),
841 }
842 r.putFieldNode(fd, group)
843 md := &dpb.DescriptorProto{Name: proto.String(group.name.val)}
844 r.putMessageNode(md, group)
Scott Baker4a35a702019-11-26 08:17:33 -0800845 r.addMessageDecls(md, group.decls, isProto3)
Zack Williamse940c7a2019-08-21 14:25:39 -0700846 return fd, md
847}
848
849func (r *parseResult) asMapDescriptors(mapField *mapFieldNode, isProto3 bool) (*dpb.FieldDescriptorProto, *dpb.DescriptorProto) {
850 var lbl *dpb.FieldDescriptorProto_Label
851 if !isProto3 {
852 lbl = dpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum()
853 }
Scott Baker4a35a702019-11-26 08:17:33 -0800854 keyFd := newFieldDescriptor("key", mapField.mapType.keyType.val, 1, lbl)
Zack Williamse940c7a2019-08-21 14:25:39 -0700855 r.putFieldNode(keyFd, mapField.keyField())
Scott Baker4a35a702019-11-26 08:17:33 -0800856 valFd := newFieldDescriptor("value", mapField.mapType.valueType.val, 2, lbl)
Zack Williamse940c7a2019-08-21 14:25:39 -0700857 r.putFieldNode(valFd, mapField.valueField())
858 entryName := internal.InitCap(internal.JsonName(mapField.name.val)) + "Entry"
859 fd := newFieldDescriptor(mapField.name.val, entryName, int32(mapField.tag.val), dpb.FieldDescriptorProto_LABEL_REPEATED.Enum())
Scott Baker4a35a702019-11-26 08:17:33 -0800860 if opts := mapField.options.Elements(); len(opts) > 0 {
861 fd.Options = &dpb.FieldOptions{UninterpretedOption: r.asUninterpretedOptions(opts)}
Zack Williamse940c7a2019-08-21 14:25:39 -0700862 }
863 r.putFieldNode(fd, mapField)
864 md := &dpb.DescriptorProto{
865 Name: proto.String(entryName),
866 Options: &dpb.MessageOptions{MapEntry: proto.Bool(true)},
867 Field: []*dpb.FieldDescriptorProto{keyFd, valFd},
868 }
869 r.putMessageNode(md, mapField)
870 return fd, md
871}
872
873func (r *parseResult) asExtensionRanges(node *extensionRangeNode) []*dpb.DescriptorProto_ExtensionRange {
Scott Baker4a35a702019-11-26 08:17:33 -0800874 opts := r.asUninterpretedOptions(node.options.Elements())
Zack Williamse940c7a2019-08-21 14:25:39 -0700875 ers := make([]*dpb.DescriptorProto_ExtensionRange, len(node.ranges))
876 for i, rng := range node.ranges {
877 er := &dpb.DescriptorProto_ExtensionRange{
878 Start: proto.Int32(rng.st),
879 End: proto.Int32(rng.en + 1),
880 }
881 if len(opts) > 0 {
882 er.Options = &dpb.ExtensionRangeOptions{UninterpretedOption: opts}
883 }
884 r.putExtensionRangeNode(er, rng)
885 ers[i] = er
886 }
887 return ers
888}
889
890func (r *parseResult) asEnumValue(ev *enumValueNode) *dpb.EnumValueDescriptorProto {
Scott Baker4a35a702019-11-26 08:17:33 -0800891 num := int32(ev.number.val)
Zack Williamse940c7a2019-08-21 14:25:39 -0700892 evd := &dpb.EnumValueDescriptorProto{Name: proto.String(ev.name.val), Number: proto.Int32(num)}
893 r.putEnumValueNode(evd, ev)
Scott Baker4a35a702019-11-26 08:17:33 -0800894 if opts := ev.options.Elements(); len(opts) > 0 {
895 evd.Options = &dpb.EnumValueOptions{UninterpretedOption: r.asUninterpretedOptions(opts)}
Zack Williamse940c7a2019-08-21 14:25:39 -0700896 }
897 return evd
898}
899
900func (r *parseResult) asMethodDescriptor(node *methodNode) *dpb.MethodDescriptorProto {
901 md := &dpb.MethodDescriptorProto{
902 Name: proto.String(node.name.val),
903 InputType: proto.String(node.input.msgType.val),
904 OutputType: proto.String(node.output.msgType.val),
905 }
906 r.putMethodNode(md, node)
907 if node.input.streamKeyword != nil {
908 md.ClientStreaming = proto.Bool(true)
909 }
910 if node.output.streamKeyword != nil {
911 md.ServerStreaming = proto.Bool(true)
912 }
913 // protoc always adds a MethodOptions if there are brackets
914 // We have a non-nil node.options if there are brackets
915 // We do the same to match protoc as closely as possible
916 // https://github.com/protocolbuffers/protobuf/blob/0c3f43a6190b77f1f68b7425d1b7e1a8257a8d0c/src/google/protobuf/compiler/parser.cc#L2152
917 if node.options != nil {
918 md.Options = &dpb.MethodOptions{UninterpretedOption: r.asUninterpretedOptions(node.options)}
919 }
920 return md
921}
922
923func (r *parseResult) asEnumDescriptor(en *enumNode) *dpb.EnumDescriptorProto {
924 ed := &dpb.EnumDescriptorProto{Name: proto.String(en.name.val)}
925 r.putEnumNode(ed, en)
926 for _, decl := range en.decls {
927 if decl.option != nil {
928 if ed.Options == nil {
929 ed.Options = &dpb.EnumOptions{}
930 }
931 ed.Options.UninterpretedOption = append(ed.Options.UninterpretedOption, r.asUninterpretedOption(decl.option))
932 } else if decl.value != nil {
933 ed.Value = append(ed.Value, r.asEnumValue(decl.value))
934 } else if decl.reserved != nil {
935 for _, n := range decl.reserved.names {
Zack Williamse940c7a2019-08-21 14:25:39 -0700936 ed.ReservedName = append(ed.ReservedName, n.val)
937 }
938 for _, rng := range decl.reserved.ranges {
939 ed.ReservedRange = append(ed.ReservedRange, r.asEnumReservedRange(rng))
940 }
941 }
942 }
943 return ed
944}
945
946func (r *parseResult) asEnumReservedRange(rng *rangeNode) *dpb.EnumDescriptorProto_EnumReservedRange {
947 rr := &dpb.EnumDescriptorProto_EnumReservedRange{
948 Start: proto.Int32(rng.st),
949 End: proto.Int32(rng.en),
950 }
951 r.putEnumReservedRangeNode(rr, rng)
952 return rr
953}
954
955func (r *parseResult) asMessageDescriptor(node *messageNode, isProto3 bool) *dpb.DescriptorProto {
956 msgd := &dpb.DescriptorProto{Name: proto.String(node.name.val)}
957 r.putMessageNode(msgd, node)
Scott Baker4a35a702019-11-26 08:17:33 -0800958 r.addMessageDecls(msgd, node.decls, isProto3)
Zack Williamse940c7a2019-08-21 14:25:39 -0700959 return msgd
960}
961
Scott Baker4a35a702019-11-26 08:17:33 -0800962func (r *parseResult) addMessageDecls(msgd *dpb.DescriptorProto, decls []*messageElement, isProto3 bool) {
Zack Williamse940c7a2019-08-21 14:25:39 -0700963 for _, decl := range decls {
964 if decl.enum != nil {
965 msgd.EnumType = append(msgd.EnumType, r.asEnumDescriptor(decl.enum))
966 } else if decl.extend != nil {
967 r.addExtensions(decl.extend, &msgd.Extension, &msgd.NestedType, isProto3)
968 } else if decl.extensionRange != nil {
969 msgd.ExtensionRange = append(msgd.ExtensionRange, r.asExtensionRanges(decl.extensionRange)...)
970 } else if decl.field != nil {
971 msgd.Field = append(msgd.Field, r.asFieldDescriptor(decl.field))
972 } else if decl.mapField != nil {
973 fd, md := r.asMapDescriptors(decl.mapField, isProto3)
974 msgd.Field = append(msgd.Field, fd)
975 msgd.NestedType = append(msgd.NestedType, md)
976 } else if decl.group != nil {
977 fd, md := r.asGroupDescriptors(decl.group, isProto3)
978 msgd.Field = append(msgd.Field, fd)
979 msgd.NestedType = append(msgd.NestedType, md)
980 } else if decl.oneOf != nil {
981 oodIndex := len(msgd.OneofDecl)
982 ood := &dpb.OneofDescriptorProto{Name: proto.String(decl.oneOf.name.val)}
983 r.putOneOfNode(ood, decl.oneOf)
984 msgd.OneofDecl = append(msgd.OneofDecl, ood)
985 for _, oodecl := range decl.oneOf.decls {
986 if oodecl.option != nil {
987 if ood.Options == nil {
988 ood.Options = &dpb.OneofOptions{}
989 }
990 ood.Options.UninterpretedOption = append(ood.Options.UninterpretedOption, r.asUninterpretedOption(oodecl.option))
991 } else if oodecl.field != nil {
992 fd := r.asFieldDescriptor(oodecl.field)
993 fd.OneofIndex = proto.Int32(int32(oodIndex))
994 msgd.Field = append(msgd.Field, fd)
Scott Baker4a35a702019-11-26 08:17:33 -0800995 } else if oodecl.group != nil {
996 fd, md := r.asGroupDescriptors(oodecl.group, isProto3)
997 fd.OneofIndex = proto.Int32(int32(oodIndex))
998 msgd.Field = append(msgd.Field, fd)
999 msgd.NestedType = append(msgd.NestedType, md)
Zack Williamse940c7a2019-08-21 14:25:39 -07001000 }
1001 }
1002 } else if decl.option != nil {
1003 if msgd.Options == nil {
1004 msgd.Options = &dpb.MessageOptions{}
1005 }
1006 msgd.Options.UninterpretedOption = append(msgd.Options.UninterpretedOption, r.asUninterpretedOption(decl.option))
1007 } else if decl.nested != nil {
1008 msgd.NestedType = append(msgd.NestedType, r.asMessageDescriptor(decl.nested, isProto3))
1009 } else if decl.reserved != nil {
1010 for _, n := range decl.reserved.names {
Zack Williamse940c7a2019-08-21 14:25:39 -07001011 msgd.ReservedName = append(msgd.ReservedName, n.val)
1012 }
1013 for _, rng := range decl.reserved.ranges {
1014 msgd.ReservedRange = append(msgd.ReservedRange, r.asMessageReservedRange(rng))
1015 }
1016 }
1017 }
1018}
1019
1020func (r *parseResult) asMessageReservedRange(rng *rangeNode) *dpb.DescriptorProto_ReservedRange {
1021 rr := &dpb.DescriptorProto_ReservedRange{
1022 Start: proto.Int32(rng.st),
1023 End: proto.Int32(rng.en + 1),
1024 }
1025 r.putMessageReservedRangeNode(rr, rng)
1026 return rr
1027}
1028
1029func (r *parseResult) asServiceDescriptor(svc *serviceNode) *dpb.ServiceDescriptorProto {
1030 sd := &dpb.ServiceDescriptorProto{Name: proto.String(svc.name.val)}
1031 r.putServiceNode(sd, svc)
1032 for _, decl := range svc.decls {
1033 if decl.option != nil {
1034 if sd.Options == nil {
1035 sd.Options = &dpb.ServiceOptions{}
1036 }
1037 sd.Options.UninterpretedOption = append(sd.Options.UninterpretedOption, r.asUninterpretedOption(decl.option))
1038 } else if decl.rpc != nil {
1039 sd.Method = append(sd.Method, r.asMethodDescriptor(decl.rpc))
1040 }
1041 }
1042 return sd
1043}
1044
Scott Baker4a35a702019-11-26 08:17:33 -08001045func toNameParts(ident *compoundIdentNode, offset int) []*optionNamePartNode {
Zack Williamse940c7a2019-08-21 14:25:39 -07001046 parts := strings.Split(ident.val[offset:], ".")
1047 ret := make([]*optionNamePartNode, len(parts))
1048 for i, p := range parts {
1049 ret[i] = &optionNamePartNode{text: ident, offset: offset, length: len(p)}
1050 ret[i].setRange(ident, ident)
1051 offset += len(p) + 1
1052 }
1053 return ret
1054}
1055
1056func checkUint64InInt32Range(lex protoLexer, pos *SourcePos, v uint64) {
1057 if v > math.MaxInt32 {
1058 lexError(lex, pos, fmt.Sprintf("constant %d is out of range for int32 (%d to %d)", v, math.MinInt32, math.MaxInt32))
1059 }
1060}
1061
1062func checkInt64InInt32Range(lex protoLexer, pos *SourcePos, v int64) {
1063 if v > math.MaxInt32 || v < math.MinInt32 {
1064 lexError(lex, pos, fmt.Sprintf("constant %d is out of range for int32 (%d to %d)", v, math.MinInt32, math.MaxInt32))
1065 }
1066}
1067
1068func checkTag(lex protoLexer, pos *SourcePos, v uint64) {
Scott Baker4a35a702019-11-26 08:17:33 -08001069 if v < 1 {
1070 lexError(lex, pos, fmt.Sprintf("tag number %d must be greater than zero", v))
1071 } else if v > internal.MaxTag {
Zack Williamse940c7a2019-08-21 14:25:39 -07001072 lexError(lex, pos, fmt.Sprintf("tag number %d is higher than max allowed tag number (%d)", v, internal.MaxTag))
1073 } else if v >= internal.SpecialReservedStart && v <= internal.SpecialReservedEnd {
1074 lexError(lex, pos, fmt.Sprintf("tag number %d is in disallowed reserved range %d-%d", v, internal.SpecialReservedStart, internal.SpecialReservedEnd))
1075 }
1076}
1077
1078func aggToString(agg []*aggregateEntryNode, buf *bytes.Buffer) {
1079 buf.WriteString("{")
1080 for _, a := range agg {
1081 buf.WriteString(" ")
1082 buf.WriteString(a.name.value())
1083 if v, ok := a.val.(*aggregateLiteralNode); ok {
1084 aggToString(v.elements, buf)
1085 } else {
1086 buf.WriteString(": ")
1087 elementToString(a.val.value(), buf)
1088 }
1089 }
1090 buf.WriteString(" }")
1091}
1092
1093func elementToString(v interface{}, buf *bytes.Buffer) {
1094 switch v := v.(type) {
1095 case bool, int64, uint64, identifier:
1096 fmt.Fprintf(buf, "%v", v)
1097 case float64:
1098 if math.IsInf(v, 1) {
1099 buf.WriteString(": inf")
1100 } else if math.IsInf(v, -1) {
1101 buf.WriteString(": -inf")
1102 } else if math.IsNaN(v) {
1103 buf.WriteString(": nan")
1104 } else {
1105 fmt.Fprintf(buf, ": %v", v)
1106 }
1107 case string:
1108 buf.WriteRune('"')
1109 writeEscapedBytes(buf, []byte(v))
1110 buf.WriteRune('"')
1111 case []valueNode:
1112 buf.WriteString(": [")
1113 first := true
1114 for _, e := range v {
1115 if first {
1116 first = false
1117 } else {
1118 buf.WriteString(", ")
1119 }
1120 elementToString(e.value(), buf)
1121 }
1122 buf.WriteString("]")
1123 case []*aggregateEntryNode:
1124 aggToString(v, buf)
1125 }
1126}
1127
1128func writeEscapedBytes(buf *bytes.Buffer, b []byte) {
1129 for _, c := range b {
1130 switch c {
1131 case '\n':
1132 buf.WriteString("\\n")
1133 case '\r':
1134 buf.WriteString("\\r")
1135 case '\t':
1136 buf.WriteString("\\t")
1137 case '"':
1138 buf.WriteString("\\\"")
1139 case '\'':
1140 buf.WriteString("\\'")
1141 case '\\':
1142 buf.WriteString("\\\\")
1143 default:
1144 if c >= 0x20 && c <= 0x7f && c != '"' && c != '\\' {
1145 // simple printable characters
1146 buf.WriteByte(c)
1147 } else {
1148 // use octal escape for all other values
1149 buf.WriteRune('\\')
1150 buf.WriteByte('0' + ((c >> 6) & 0x7))
1151 buf.WriteByte('0' + ((c >> 3) & 0x7))
1152 buf.WriteByte('0' + (c & 0x7))
1153 }
1154 }
1155 }
1156}
1157
Scott Baker4a35a702019-11-26 08:17:33 -08001158func basicValidate(res *parseResult) {
Zack Williamse940c7a2019-08-21 14:25:39 -07001159 fd := res.fd
1160 isProto3 := fd.GetSyntax() == "proto3"
1161
1162 for _, md := range fd.MessageType {
Scott Baker4a35a702019-11-26 08:17:33 -08001163 if validateMessage(res, isProto3, "", md) != nil {
1164 return
Zack Williamse940c7a2019-08-21 14:25:39 -07001165 }
1166 }
1167
1168 for _, ed := range fd.EnumType {
Scott Baker4a35a702019-11-26 08:17:33 -08001169 if validateEnum(res, isProto3, "", ed) != nil {
1170 return
Zack Williamse940c7a2019-08-21 14:25:39 -07001171 }
1172 }
1173
1174 for _, fld := range fd.Extension {
Scott Baker4a35a702019-11-26 08:17:33 -08001175 if validateField(res, isProto3, "", fld) != nil {
1176 return
Zack Williamse940c7a2019-08-21 14:25:39 -07001177 }
1178 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001179}
1180
1181func validateMessage(res *parseResult, isProto3 bool, prefix string, md *dpb.DescriptorProto) error {
1182 nextPrefix := md.GetName() + "."
1183
1184 for _, fld := range md.Field {
1185 if err := validateField(res, isProto3, nextPrefix, fld); err != nil {
1186 return err
1187 }
1188 }
1189 for _, fld := range md.Extension {
1190 if err := validateField(res, isProto3, nextPrefix, fld); err != nil {
1191 return err
1192 }
1193 }
1194 for _, ed := range md.EnumType {
1195 if err := validateEnum(res, isProto3, nextPrefix, ed); err != nil {
1196 return err
1197 }
1198 }
1199 for _, nmd := range md.NestedType {
1200 if err := validateMessage(res, isProto3, nextPrefix, nmd); err != nil {
1201 return err
1202 }
1203 }
1204
1205 scope := fmt.Sprintf("message %s%s", prefix, md.GetName())
1206
1207 if isProto3 && len(md.ExtensionRange) > 0 {
1208 n := res.getExtensionRangeNode(md.ExtensionRange[0])
Scott Baker4a35a702019-11-26 08:17:33 -08001209 if err := res.errs.handleError(ErrorWithSourcePos{Pos: n.start(), Underlying: fmt.Errorf("%s: extension ranges are not allowed in proto3", scope)}); err != nil {
1210 return err
1211 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001212 }
1213
1214 if index, err := findOption(res, scope, md.Options.GetUninterpretedOption(), "map_entry"); err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -08001215 if err := res.errs.handleError(err); err != nil {
1216 return err
1217 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001218 } else if index >= 0 {
1219 opt := md.Options.UninterpretedOption[index]
1220 optn := res.getOptionNode(opt)
1221 md.Options.UninterpretedOption = removeOption(md.Options.UninterpretedOption, index)
1222 valid := false
1223 if opt.IdentifierValue != nil {
1224 if opt.GetIdentifierValue() == "true" {
Zack Williamse940c7a2019-08-21 14:25:39 -07001225 valid = true
Scott Baker4a35a702019-11-26 08:17:33 -08001226 if err := res.errs.handleError(ErrorWithSourcePos{Pos: optn.getValue().start(), Underlying: fmt.Errorf("%s: map_entry option should not be set explicitly; use map type instead", scope)}); err != nil {
1227 return err
1228 }
1229 } else if opt.GetIdentifierValue() == "false" {
1230 valid = true
1231 md.Options.MapEntry = proto.Bool(false)
Zack Williamse940c7a2019-08-21 14:25:39 -07001232 }
1233 }
1234 if !valid {
Scott Baker4a35a702019-11-26 08:17:33 -08001235 if err := res.errs.handleError(ErrorWithSourcePos{Pos: optn.getValue().start(), Underlying: fmt.Errorf("%s: expecting bool value for map_entry option", scope)}); err != nil {
1236 return err
1237 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001238 }
1239 }
1240
1241 // reserved ranges should not overlap
1242 rsvd := make(tagRanges, len(md.ReservedRange))
1243 for i, r := range md.ReservedRange {
1244 n := res.getMessageReservedRangeNode(r)
1245 rsvd[i] = tagRange{start: r.GetStart(), end: r.GetEnd(), node: n}
1246
1247 }
1248 sort.Sort(rsvd)
1249 for i := 1; i < len(rsvd); i++ {
1250 if rsvd[i].start < rsvd[i-1].end {
Scott Baker4a35a702019-11-26 08:17:33 -08001251 if err := res.errs.handleError(ErrorWithSourcePos{Pos: rsvd[i].node.start(), Underlying: fmt.Errorf("%s: reserved ranges overlap: %d to %d and %d to %d", scope, rsvd[i-1].start, rsvd[i-1].end-1, rsvd[i].start, rsvd[i].end-1)}); err != nil {
1252 return err
1253 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001254 }
1255 }
1256
1257 // extensions ranges should not overlap
1258 exts := make(tagRanges, len(md.ExtensionRange))
1259 for i, r := range md.ExtensionRange {
1260 n := res.getExtensionRangeNode(r)
1261 exts[i] = tagRange{start: r.GetStart(), end: r.GetEnd(), node: n}
1262 }
1263 sort.Sort(exts)
1264 for i := 1; i < len(exts); i++ {
1265 if exts[i].start < exts[i-1].end {
Scott Baker4a35a702019-11-26 08:17:33 -08001266 if err := res.errs.handleError(ErrorWithSourcePos{Pos: exts[i].node.start(), Underlying: fmt.Errorf("%s: extension ranges overlap: %d to %d and %d to %d", scope, exts[i-1].start, exts[i-1].end-1, exts[i].start, exts[i].end-1)}); err != nil {
1267 return err
1268 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001269 }
1270 }
1271
1272 // see if any extension range overlaps any reserved range
1273 var i, j int // i indexes rsvd; j indexes exts
1274 for i < len(rsvd) && j < len(exts) {
1275 if rsvd[i].start >= exts[j].start && rsvd[i].start < exts[j].end ||
1276 exts[j].start >= rsvd[i].start && exts[j].start < rsvd[i].end {
1277
1278 var pos *SourcePos
1279 if rsvd[i].start >= exts[j].start && rsvd[i].start < exts[j].end {
1280 pos = rsvd[i].node.start()
1281 } else {
1282 pos = exts[j].node.start()
1283 }
1284 // ranges overlap
Scott Baker4a35a702019-11-26 08:17:33 -08001285 if err := res.errs.handleError(ErrorWithSourcePos{Pos: pos, Underlying: fmt.Errorf("%s: extension range %d to %d overlaps reserved range %d to %d", scope, exts[j].start, exts[j].end-1, rsvd[i].start, rsvd[i].end-1)}); err != nil {
1286 return err
1287 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001288 }
1289 if rsvd[i].start < exts[j].start {
1290 i++
1291 } else {
1292 j++
1293 }
1294 }
1295
1296 // now, check that fields don't re-use tags and don't try to use extension
1297 // or reserved ranges or reserved names
1298 rsvdNames := map[string]struct{}{}
1299 for _, n := range md.ReservedName {
1300 rsvdNames[n] = struct{}{}
1301 }
1302 fieldTags := map[int32]string{}
1303 for _, fld := range md.Field {
1304 fn := res.getFieldNode(fld)
1305 if _, ok := rsvdNames[fld.GetName()]; ok {
Scott Baker4a35a702019-11-26 08:17:33 -08001306 if err := res.errs.handleError(ErrorWithSourcePos{Pos: fn.fieldName().start(), Underlying: fmt.Errorf("%s: field %s is using a reserved name", scope, fld.GetName())}); err != nil {
1307 return err
1308 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001309 }
1310 if existing := fieldTags[fld.GetNumber()]; existing != "" {
Scott Baker4a35a702019-11-26 08:17:33 -08001311 if err := res.errs.handleError(ErrorWithSourcePos{Pos: fn.fieldTag().start(), Underlying: fmt.Errorf("%s: fields %s and %s both have the same tag %d", scope, existing, fld.GetName(), fld.GetNumber())}); err != nil {
1312 return err
1313 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001314 }
1315 fieldTags[fld.GetNumber()] = fld.GetName()
1316 // check reserved ranges
1317 r := sort.Search(len(rsvd), func(index int) bool { return rsvd[index].end > fld.GetNumber() })
1318 if r < len(rsvd) && rsvd[r].start <= fld.GetNumber() {
Scott Baker4a35a702019-11-26 08:17:33 -08001319 if err := res.errs.handleError(ErrorWithSourcePos{Pos: fn.fieldTag().start(), Underlying: fmt.Errorf("%s: field %s is using tag %d which is in reserved range %d to %d", scope, fld.GetName(), fld.GetNumber(), rsvd[r].start, rsvd[r].end-1)}); err != nil {
1320 return err
1321 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001322 }
1323 // and check extension ranges
1324 e := sort.Search(len(exts), func(index int) bool { return exts[index].end > fld.GetNumber() })
1325 if e < len(exts) && exts[e].start <= fld.GetNumber() {
Scott Baker4a35a702019-11-26 08:17:33 -08001326 if err := res.errs.handleError(ErrorWithSourcePos{Pos: fn.fieldTag().start(), Underlying: fmt.Errorf("%s: field %s is using tag %d which is in extension range %d to %d", scope, fld.GetName(), fld.GetNumber(), exts[e].start, exts[e].end-1)}); err != nil {
1327 return err
1328 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001329 }
1330 }
1331
1332 return nil
1333}
1334
1335func validateEnum(res *parseResult, isProto3 bool, prefix string, ed *dpb.EnumDescriptorProto) error {
1336 scope := fmt.Sprintf("enum %s%s", prefix, ed.GetName())
1337
Scott Baker4a35a702019-11-26 08:17:33 -08001338 allowAlias := false
Zack Williamse940c7a2019-08-21 14:25:39 -07001339 if index, err := findOption(res, scope, ed.Options.GetUninterpretedOption(), "allow_alias"); err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -08001340 if err := res.errs.handleError(err); err != nil {
1341 return err
1342 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001343 } else if index >= 0 {
1344 opt := ed.Options.UninterpretedOption[index]
Zack Williamse940c7a2019-08-21 14:25:39 -07001345 valid := false
1346 if opt.IdentifierValue != nil {
1347 if opt.GetIdentifierValue() == "true" {
Scott Baker4a35a702019-11-26 08:17:33 -08001348 allowAlias = true
Zack Williamse940c7a2019-08-21 14:25:39 -07001349 valid = true
1350 } else if opt.GetIdentifierValue() == "false" {
Zack Williamse940c7a2019-08-21 14:25:39 -07001351 valid = true
1352 }
1353 }
1354 if !valid {
1355 optNode := res.getOptionNode(opt)
Scott Baker4a35a702019-11-26 08:17:33 -08001356 if err := res.errs.handleError(ErrorWithSourcePos{Pos: optNode.getValue().start(), Underlying: fmt.Errorf("%s: expecting bool value for allow_alias option", scope)}); err != nil {
1357 return err
1358 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001359 }
1360 }
1361
1362 if isProto3 && ed.Value[0].GetNumber() != 0 {
1363 evNode := res.getEnumValueNode(ed.Value[0])
Scott Baker4a35a702019-11-26 08:17:33 -08001364 if err := res.errs.handleError(ErrorWithSourcePos{Pos: evNode.getNumber().start(), Underlying: fmt.Errorf("%s: proto3 requires that first value in enum have numeric value of 0", scope)}); err != nil {
1365 return err
1366 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001367 }
1368
Scott Baker4a35a702019-11-26 08:17:33 -08001369 if !allowAlias {
Zack Williamse940c7a2019-08-21 14:25:39 -07001370 // make sure all value numbers are distinct
1371 vals := map[int32]string{}
1372 for _, evd := range ed.Value {
1373 if existing := vals[evd.GetNumber()]; existing != "" {
1374 evNode := res.getEnumValueNode(evd)
Scott Baker4a35a702019-11-26 08:17:33 -08001375 if err := res.errs.handleError(ErrorWithSourcePos{Pos: evNode.getNumber().start(), Underlying: fmt.Errorf("%s: values %s and %s both have the same numeric value %d; use allow_alias option if intentional", scope, existing, evd.GetName(), evd.GetNumber())}); err != nil {
1376 return err
1377 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001378 }
1379 vals[evd.GetNumber()] = evd.GetName()
1380 }
1381 }
1382
1383 // reserved ranges should not overlap
1384 rsvd := make(tagRanges, len(ed.ReservedRange))
1385 for i, r := range ed.ReservedRange {
1386 n := res.getEnumReservedRangeNode(r)
1387 rsvd[i] = tagRange{start: r.GetStart(), end: r.GetEnd(), node: n}
1388 }
1389 sort.Sort(rsvd)
1390 for i := 1; i < len(rsvd); i++ {
1391 if rsvd[i].start <= rsvd[i-1].end {
Scott Baker4a35a702019-11-26 08:17:33 -08001392 if err := res.errs.handleError(ErrorWithSourcePos{Pos: rsvd[i].node.start(), Underlying: fmt.Errorf("%s: reserved ranges overlap: %d to %d and %d to %d", scope, rsvd[i-1].start, rsvd[i-1].end, rsvd[i].start, rsvd[i].end)}); err != nil {
1393 return err
1394 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001395 }
1396 }
1397
1398 // now, check that fields don't re-use tags and don't try to use extension
1399 // or reserved ranges or reserved names
1400 rsvdNames := map[string]struct{}{}
1401 for _, n := range ed.ReservedName {
1402 rsvdNames[n] = struct{}{}
1403 }
1404 for _, ev := range ed.Value {
1405 evn := res.getEnumValueNode(ev)
1406 if _, ok := rsvdNames[ev.GetName()]; ok {
Scott Baker4a35a702019-11-26 08:17:33 -08001407 if err := res.errs.handleError(ErrorWithSourcePos{Pos: evn.getName().start(), Underlying: fmt.Errorf("%s: value %s is using a reserved name", scope, ev.GetName())}); err != nil {
1408 return err
1409 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001410 }
1411 // check reserved ranges
1412 r := sort.Search(len(rsvd), func(index int) bool { return rsvd[index].end >= ev.GetNumber() })
1413 if r < len(rsvd) && rsvd[r].start <= ev.GetNumber() {
Scott Baker4a35a702019-11-26 08:17:33 -08001414 if err := res.errs.handleError(ErrorWithSourcePos{Pos: evn.getNumber().start(), Underlying: fmt.Errorf("%s: value %s is using number %d which is in reserved range %d to %d", scope, ev.GetName(), ev.GetNumber(), rsvd[r].start, rsvd[r].end)}); err != nil {
1415 return err
1416 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001417 }
1418 }
1419
1420 return nil
1421}
1422
1423func validateField(res *parseResult, isProto3 bool, prefix string, fld *dpb.FieldDescriptorProto) error {
1424 scope := fmt.Sprintf("field %s%s", prefix, fld.GetName())
1425
1426 node := res.getFieldNode(fld)
1427 if isProto3 {
1428 if fld.GetType() == dpb.FieldDescriptorProto_TYPE_GROUP {
1429 n := node.(*groupNode)
Scott Baker4a35a702019-11-26 08:17:33 -08001430 if err := res.errs.handleError(ErrorWithSourcePos{Pos: n.groupKeyword.start(), Underlying: fmt.Errorf("%s: groups are not allowed in proto3", scope)}); err != nil {
1431 return err
1432 }
1433 } else if fld.Label != nil && fld.GetLabel() != dpb.FieldDescriptorProto_LABEL_REPEATED {
1434 if err := res.errs.handleError(ErrorWithSourcePos{Pos: node.fieldLabel().start(), Underlying: fmt.Errorf("%s: field has label %v, but proto3 must omit labels other than 'repeated'", scope, fld.GetLabel())}); err != nil {
1435 return err
1436 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001437 }
1438 if index, err := findOption(res, scope, fld.Options.GetUninterpretedOption(), "default"); err != nil {
Scott Baker4a35a702019-11-26 08:17:33 -08001439 if err := res.errs.handleError(err); err != nil {
1440 return err
1441 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001442 } else if index >= 0 {
1443 optNode := res.getOptionNode(fld.Options.GetUninterpretedOption()[index])
Scott Baker4a35a702019-11-26 08:17:33 -08001444 if err := res.errs.handleError(ErrorWithSourcePos{Pos: optNode.getName().start(), Underlying: fmt.Errorf("%s: default values are not allowed in proto3", scope)}); err != nil {
1445 return err
1446 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001447 }
1448 } else {
1449 if fld.Label == nil && fld.OneofIndex == nil {
Scott Baker4a35a702019-11-26 08:17:33 -08001450 if err := res.errs.handleError(ErrorWithSourcePos{Pos: node.fieldName().start(), Underlying: fmt.Errorf("%s: field has no label, but proto2 must indicate 'optional' or 'required'", scope)}); err != nil {
1451 return err
1452 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001453 }
1454 if fld.GetExtendee() != "" && fld.Label != nil && fld.GetLabel() == dpb.FieldDescriptorProto_LABEL_REQUIRED {
Scott Baker4a35a702019-11-26 08:17:33 -08001455 if err := res.errs.handleError(ErrorWithSourcePos{Pos: node.fieldLabel().start(), Underlying: fmt.Errorf("%s: extension fields cannot be 'required'", scope)}); err != nil {
1456 return err
1457 }
Zack Williamse940c7a2019-08-21 14:25:39 -07001458 }
1459 }
1460
1461 // finally, set any missing label to optional
1462 if fld.Label == nil {
1463 fld.Label = dpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum()
1464 }
Scott Baker4a35a702019-11-26 08:17:33 -08001465
Zack Williamse940c7a2019-08-21 14:25:39 -07001466 return nil
1467}
1468
1469func findOption(res *parseResult, scope string, opts []*dpb.UninterpretedOption, name string) (int, error) {
1470 found := -1
1471 for i, opt := range opts {
1472 if len(opt.Name) != 1 {
1473 continue
1474 }
1475 if opt.Name[0].GetIsExtension() || opt.Name[0].GetNamePart() != name {
1476 continue
1477 }
1478 if found >= 0 {
1479 optNode := res.getOptionNode(opt)
1480 return -1, ErrorWithSourcePos{Pos: optNode.getName().start(), Underlying: fmt.Errorf("%s: option %s cannot be defined more than once", scope, name)}
1481 }
1482 found = i
1483 }
1484 return found, nil
1485}
1486
1487func removeOption(uo []*dpb.UninterpretedOption, indexToRemove int) []*dpb.UninterpretedOption {
1488 if indexToRemove == 0 {
1489 return uo[1:]
1490 } else if int(indexToRemove) == len(uo)-1 {
1491 return uo[:len(uo)-1]
1492 } else {
1493 return append(uo[:indexToRemove], uo[indexToRemove+1:]...)
1494 }
1495}
1496
1497type tagRange struct {
1498 start int32
1499 end int32
1500 node rangeDecl
1501}
1502
1503type tagRanges []tagRange
1504
1505func (r tagRanges) Len() int {
1506 return len(r)
1507}
1508
1509func (r tagRanges) Less(i, j int) bool {
1510 return r[i].start < r[j].start ||
1511 (r[i].start == r[j].start && r[i].end < r[j].end)
1512}
1513
1514func (r tagRanges) Swap(i, j int) {
1515 r[i], r[j] = r[j], r[i]
1516}