blob: f933cd85ef23406593675baee609a440dd735dc2 [file] [log] [blame]
Matteo Scandoloa6a3aee2019-11-26 13:30:14 -07001package httprule
2
3import (
4 "fmt"
5 "strings"
6
7 "github.com/golang/glog"
8)
9
10// InvalidTemplateError indicates that the path template is not valid.
11type InvalidTemplateError struct {
12 tmpl string
13 msg string
14}
15
16func (e InvalidTemplateError) Error() string {
17 return fmt.Sprintf("%s: %s", e.msg, e.tmpl)
18}
19
20// Parse parses the string representation of path template
21func Parse(tmpl string) (Compiler, error) {
22 if !strings.HasPrefix(tmpl, "/") {
23 return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"}
24 }
25 tokens, verb := tokenize(tmpl[1:])
26
27 p := parser{tokens: tokens}
28 segs, err := p.topLevelSegments()
29 if err != nil {
30 return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()}
31 }
32
33 return template{
34 segments: segs,
35 verb: verb,
36 template: tmpl,
37 }, nil
38}
39
40func tokenize(path string) (tokens []string, verb string) {
41 if path == "" {
42 return []string{eof}, ""
43 }
44
45 const (
46 init = iota
47 field
48 nested
49 )
50 var (
51 st = init
52 )
53 for path != "" {
54 var idx int
55 switch st {
56 case init:
57 idx = strings.IndexAny(path, "/{")
58 case field:
59 idx = strings.IndexAny(path, ".=}")
60 case nested:
61 idx = strings.IndexAny(path, "/}")
62 }
63 if idx < 0 {
64 tokens = append(tokens, path)
65 break
66 }
67 switch r := path[idx]; r {
68 case '/', '.':
69 case '{':
70 st = field
71 case '=':
72 st = nested
73 case '}':
74 st = init
75 }
76 if idx == 0 {
77 tokens = append(tokens, path[idx:idx+1])
78 } else {
79 tokens = append(tokens, path[:idx], path[idx:idx+1])
80 }
81 path = path[idx+1:]
82 }
83
84 l := len(tokens)
85 t := tokens[l-1]
86 if idx := strings.LastIndex(t, ":"); idx == 0 {
87 tokens, verb = tokens[:l-1], t[1:]
88 } else if idx > 0 {
89 tokens[l-1], verb = t[:idx], t[idx+1:]
90 }
91 tokens = append(tokens, eof)
92 return tokens, verb
93}
94
95// parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto.
96type parser struct {
97 tokens []string
98 accepted []string
99}
100
101// topLevelSegments is the target of this parser.
102func (p *parser) topLevelSegments() ([]segment, error) {
103 glog.V(1).Infof("Parsing %q", p.tokens)
104 segs, err := p.segments()
105 if err != nil {
106 return nil, err
107 }
108 glog.V(2).Infof("accept segments: %q; %q", p.accepted, p.tokens)
109 if _, err := p.accept(typeEOF); err != nil {
110 return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, ""))
111 }
112 glog.V(2).Infof("accept eof: %q; %q", p.accepted, p.tokens)
113 return segs, nil
114}
115
116func (p *parser) segments() ([]segment, error) {
117 s, err := p.segment()
118 if err != nil {
119 return nil, err
120 }
121 glog.V(2).Infof("accept segment: %q; %q", p.accepted, p.tokens)
122
123 segs := []segment{s}
124 for {
125 if _, err := p.accept("/"); err != nil {
126 return segs, nil
127 }
128 s, err := p.segment()
129 if err != nil {
130 return segs, err
131 }
132 segs = append(segs, s)
133 glog.V(2).Infof("accept segment: %q; %q", p.accepted, p.tokens)
134 }
135}
136
137func (p *parser) segment() (segment, error) {
138 if _, err := p.accept("*"); err == nil {
139 return wildcard{}, nil
140 }
141 if _, err := p.accept("**"); err == nil {
142 return deepWildcard{}, nil
143 }
144 if l, err := p.literal(); err == nil {
145 return l, nil
146 }
147
148 v, err := p.variable()
149 if err != nil {
150 return nil, fmt.Errorf("segment neither wildcards, literal or variable: %v", err)
151 }
152 return v, err
153}
154
155func (p *parser) literal() (segment, error) {
156 lit, err := p.accept(typeLiteral)
157 if err != nil {
158 return nil, err
159 }
160 return literal(lit), nil
161}
162
163func (p *parser) variable() (segment, error) {
164 if _, err := p.accept("{"); err != nil {
165 return nil, err
166 }
167
168 path, err := p.fieldPath()
169 if err != nil {
170 return nil, err
171 }
172
173 var segs []segment
174 if _, err := p.accept("="); err == nil {
175 segs, err = p.segments()
176 if err != nil {
177 return nil, fmt.Errorf("invalid segment in variable %q: %v", path, err)
178 }
179 } else {
180 segs = []segment{wildcard{}}
181 }
182
183 if _, err := p.accept("}"); err != nil {
184 return nil, fmt.Errorf("unterminated variable segment: %s", path)
185 }
186 return variable{
187 path: path,
188 segments: segs,
189 }, nil
190}
191
192func (p *parser) fieldPath() (string, error) {
193 c, err := p.accept(typeIdent)
194 if err != nil {
195 return "", err
196 }
197 components := []string{c}
198 for {
199 if _, err = p.accept("."); err != nil {
200 return strings.Join(components, "."), nil
201 }
202 c, err := p.accept(typeIdent)
203 if err != nil {
204 return "", fmt.Errorf("invalid field path component: %v", err)
205 }
206 components = append(components, c)
207 }
208}
209
210// A termType is a type of terminal symbols.
211type termType string
212
213// These constants define some of valid values of termType.
214// They improve readability of parse functions.
215//
216// You can also use "/", "*", "**", "." or "=" as valid values.
217const (
218 typeIdent = termType("ident")
219 typeLiteral = termType("literal")
220 typeEOF = termType("$")
221)
222
223const (
224 // eof is the terminal symbol which always appears at the end of token sequence.
225 eof = "\u0000"
226)
227
228// accept tries to accept a token in "p".
229// This function consumes a token and returns it if it matches to the specified "term".
230// If it doesn't match, the function does not consume any tokens and return an error.
231func (p *parser) accept(term termType) (string, error) {
232 t := p.tokens[0]
233 switch term {
234 case "/", "*", "**", ".", "=", "{", "}":
235 if t != string(term) && t != "/" {
236 return "", fmt.Errorf("expected %q but got %q", term, t)
237 }
238 case typeEOF:
239 if t != eof {
240 return "", fmt.Errorf("expected EOF but got %q", t)
241 }
242 case typeIdent:
243 if err := expectIdent(t); err != nil {
244 return "", err
245 }
246 case typeLiteral:
247 if err := expectPChars(t); err != nil {
248 return "", err
249 }
250 default:
251 return "", fmt.Errorf("unknown termType %q", term)
252 }
253 p.tokens = p.tokens[1:]
254 p.accepted = append(p.accepted, t)
255 return t, nil
256}
257
258// expectPChars determines if "t" consists of only pchars defined in RFC3986.
259//
260// https://www.ietf.org/rfc/rfc3986.txt, P.49
261// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
262// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
263// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
264// / "*" / "+" / "," / ";" / "="
265// pct-encoded = "%" HEXDIG HEXDIG
266func expectPChars(t string) error {
267 const (
268 init = iota
269 pct1
270 pct2
271 )
272 st := init
273 for _, r := range t {
274 if st != init {
275 if !isHexDigit(r) {
276 return fmt.Errorf("invalid hexdigit: %c(%U)", r, r)
277 }
278 switch st {
279 case pct1:
280 st = pct2
281 case pct2:
282 st = init
283 }
284 continue
285 }
286
287 // unreserved
288 switch {
289 case 'A' <= r && r <= 'Z':
290 continue
291 case 'a' <= r && r <= 'z':
292 continue
293 case '0' <= r && r <= '9':
294 continue
295 }
296 switch r {
297 case '-', '.', '_', '~':
298 // unreserved
299 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
300 // sub-delims
301 case ':', '@':
302 // rest of pchar
303 case '%':
304 // pct-encoded
305 st = pct1
306 default:
307 return fmt.Errorf("invalid character in path segment: %q(%U)", r, r)
308 }
309 }
310 if st != init {
311 return fmt.Errorf("invalid percent-encoding in %q", t)
312 }
313 return nil
314}
315
316// expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*).
317func expectIdent(ident string) error {
318 if ident == "" {
319 return fmt.Errorf("empty identifier")
320 }
321 for pos, r := range ident {
322 switch {
323 case '0' <= r && r <= '9':
324 if pos == 0 {
325 return fmt.Errorf("identifier starting with digit: %s", ident)
326 }
327 continue
328 case 'A' <= r && r <= 'Z':
329 continue
330 case 'a' <= r && r <= 'z':
331 continue
332 case r == '_':
333 continue
334 default:
335 return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident)
336 }
337 }
338 return nil
339}
340
341func isHexDigit(r rune) bool {
342 switch {
343 case '0' <= r && r <= '9':
344 return true
345 case 'A' <= r && r <= 'F':
346 return true
347 case 'a' <= r && r <= 'f':
348 return true
349 }
350 return false
351}