Matteo Scandolo | a6a3aee | 2019-11-26 13:30:14 -0700 | [diff] [blame] | 1 | package httprule |
| 2 | |
| 3 | import ( |
| 4 | "fmt" |
| 5 | "strings" |
| 6 | |
| 7 | "github.com/golang/glog" |
| 8 | ) |
| 9 | |
| 10 | // InvalidTemplateError indicates that the path template is not valid. |
| 11 | type InvalidTemplateError struct { |
| 12 | tmpl string |
| 13 | msg string |
| 14 | } |
| 15 | |
| 16 | func (e InvalidTemplateError) Error() string { |
| 17 | return fmt.Sprintf("%s: %s", e.msg, e.tmpl) |
| 18 | } |
| 19 | |
| 20 | // Parse parses the string representation of path template |
| 21 | func Parse(tmpl string) (Compiler, error) { |
| 22 | if !strings.HasPrefix(tmpl, "/") { |
| 23 | return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"} |
| 24 | } |
| 25 | tokens, verb := tokenize(tmpl[1:]) |
| 26 | |
| 27 | p := parser{tokens: tokens} |
| 28 | segs, err := p.topLevelSegments() |
| 29 | if err != nil { |
| 30 | return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()} |
| 31 | } |
| 32 | |
| 33 | return template{ |
| 34 | segments: segs, |
| 35 | verb: verb, |
| 36 | template: tmpl, |
| 37 | }, nil |
| 38 | } |
| 39 | |
| 40 | func tokenize(path string) (tokens []string, verb string) { |
| 41 | if path == "" { |
| 42 | return []string{eof}, "" |
| 43 | } |
| 44 | |
| 45 | const ( |
| 46 | init = iota |
| 47 | field |
| 48 | nested |
| 49 | ) |
| 50 | var ( |
| 51 | st = init |
| 52 | ) |
| 53 | for path != "" { |
| 54 | var idx int |
| 55 | switch st { |
| 56 | case init: |
| 57 | idx = strings.IndexAny(path, "/{") |
| 58 | case field: |
| 59 | idx = strings.IndexAny(path, ".=}") |
| 60 | case nested: |
| 61 | idx = strings.IndexAny(path, "/}") |
| 62 | } |
| 63 | if idx < 0 { |
| 64 | tokens = append(tokens, path) |
| 65 | break |
| 66 | } |
| 67 | switch r := path[idx]; r { |
| 68 | case '/', '.': |
| 69 | case '{': |
| 70 | st = field |
| 71 | case '=': |
| 72 | st = nested |
| 73 | case '}': |
| 74 | st = init |
| 75 | } |
| 76 | if idx == 0 { |
| 77 | tokens = append(tokens, path[idx:idx+1]) |
| 78 | } else { |
| 79 | tokens = append(tokens, path[:idx], path[idx:idx+1]) |
| 80 | } |
| 81 | path = path[idx+1:] |
| 82 | } |
| 83 | |
| 84 | l := len(tokens) |
| 85 | t := tokens[l-1] |
| 86 | if idx := strings.LastIndex(t, ":"); idx == 0 { |
| 87 | tokens, verb = tokens[:l-1], t[1:] |
| 88 | } else if idx > 0 { |
| 89 | tokens[l-1], verb = t[:idx], t[idx+1:] |
| 90 | } |
| 91 | tokens = append(tokens, eof) |
| 92 | return tokens, verb |
| 93 | } |
| 94 | |
| 95 | // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto. |
| 96 | type parser struct { |
| 97 | tokens []string |
| 98 | accepted []string |
| 99 | } |
| 100 | |
| 101 | // topLevelSegments is the target of this parser. |
| 102 | func (p *parser) topLevelSegments() ([]segment, error) { |
| 103 | glog.V(1).Infof("Parsing %q", p.tokens) |
| 104 | segs, err := p.segments() |
| 105 | if err != nil { |
| 106 | return nil, err |
| 107 | } |
| 108 | glog.V(2).Infof("accept segments: %q; %q", p.accepted, p.tokens) |
| 109 | if _, err := p.accept(typeEOF); err != nil { |
| 110 | return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, "")) |
| 111 | } |
| 112 | glog.V(2).Infof("accept eof: %q; %q", p.accepted, p.tokens) |
| 113 | return segs, nil |
| 114 | } |
| 115 | |
| 116 | func (p *parser) segments() ([]segment, error) { |
| 117 | s, err := p.segment() |
| 118 | if err != nil { |
| 119 | return nil, err |
| 120 | } |
| 121 | glog.V(2).Infof("accept segment: %q; %q", p.accepted, p.tokens) |
| 122 | |
| 123 | segs := []segment{s} |
| 124 | for { |
| 125 | if _, err := p.accept("/"); err != nil { |
| 126 | return segs, nil |
| 127 | } |
| 128 | s, err := p.segment() |
| 129 | if err != nil { |
| 130 | return segs, err |
| 131 | } |
| 132 | segs = append(segs, s) |
| 133 | glog.V(2).Infof("accept segment: %q; %q", p.accepted, p.tokens) |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | func (p *parser) segment() (segment, error) { |
| 138 | if _, err := p.accept("*"); err == nil { |
| 139 | return wildcard{}, nil |
| 140 | } |
| 141 | if _, err := p.accept("**"); err == nil { |
| 142 | return deepWildcard{}, nil |
| 143 | } |
| 144 | if l, err := p.literal(); err == nil { |
| 145 | return l, nil |
| 146 | } |
| 147 | |
| 148 | v, err := p.variable() |
| 149 | if err != nil { |
| 150 | return nil, fmt.Errorf("segment neither wildcards, literal or variable: %v", err) |
| 151 | } |
| 152 | return v, err |
| 153 | } |
| 154 | |
| 155 | func (p *parser) literal() (segment, error) { |
| 156 | lit, err := p.accept(typeLiteral) |
| 157 | if err != nil { |
| 158 | return nil, err |
| 159 | } |
| 160 | return literal(lit), nil |
| 161 | } |
| 162 | |
| 163 | func (p *parser) variable() (segment, error) { |
| 164 | if _, err := p.accept("{"); err != nil { |
| 165 | return nil, err |
| 166 | } |
| 167 | |
| 168 | path, err := p.fieldPath() |
| 169 | if err != nil { |
| 170 | return nil, err |
| 171 | } |
| 172 | |
| 173 | var segs []segment |
| 174 | if _, err := p.accept("="); err == nil { |
| 175 | segs, err = p.segments() |
| 176 | if err != nil { |
| 177 | return nil, fmt.Errorf("invalid segment in variable %q: %v", path, err) |
| 178 | } |
| 179 | } else { |
| 180 | segs = []segment{wildcard{}} |
| 181 | } |
| 182 | |
| 183 | if _, err := p.accept("}"); err != nil { |
| 184 | return nil, fmt.Errorf("unterminated variable segment: %s", path) |
| 185 | } |
| 186 | return variable{ |
| 187 | path: path, |
| 188 | segments: segs, |
| 189 | }, nil |
| 190 | } |
| 191 | |
| 192 | func (p *parser) fieldPath() (string, error) { |
| 193 | c, err := p.accept(typeIdent) |
| 194 | if err != nil { |
| 195 | return "", err |
| 196 | } |
| 197 | components := []string{c} |
| 198 | for { |
| 199 | if _, err = p.accept("."); err != nil { |
| 200 | return strings.Join(components, "."), nil |
| 201 | } |
| 202 | c, err := p.accept(typeIdent) |
| 203 | if err != nil { |
| 204 | return "", fmt.Errorf("invalid field path component: %v", err) |
| 205 | } |
| 206 | components = append(components, c) |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | // A termType is a type of terminal symbols. |
| 211 | type termType string |
| 212 | |
| 213 | // These constants define some of valid values of termType. |
| 214 | // They improve readability of parse functions. |
| 215 | // |
| 216 | // You can also use "/", "*", "**", "." or "=" as valid values. |
| 217 | const ( |
| 218 | typeIdent = termType("ident") |
| 219 | typeLiteral = termType("literal") |
| 220 | typeEOF = termType("$") |
| 221 | ) |
| 222 | |
| 223 | const ( |
| 224 | // eof is the terminal symbol which always appears at the end of token sequence. |
| 225 | eof = "\u0000" |
| 226 | ) |
| 227 | |
| 228 | // accept tries to accept a token in "p". |
| 229 | // This function consumes a token and returns it if it matches to the specified "term". |
| 230 | // If it doesn't match, the function does not consume any tokens and return an error. |
| 231 | func (p *parser) accept(term termType) (string, error) { |
| 232 | t := p.tokens[0] |
| 233 | switch term { |
| 234 | case "/", "*", "**", ".", "=", "{", "}": |
| 235 | if t != string(term) && t != "/" { |
| 236 | return "", fmt.Errorf("expected %q but got %q", term, t) |
| 237 | } |
| 238 | case typeEOF: |
| 239 | if t != eof { |
| 240 | return "", fmt.Errorf("expected EOF but got %q", t) |
| 241 | } |
| 242 | case typeIdent: |
| 243 | if err := expectIdent(t); err != nil { |
| 244 | return "", err |
| 245 | } |
| 246 | case typeLiteral: |
| 247 | if err := expectPChars(t); err != nil { |
| 248 | return "", err |
| 249 | } |
| 250 | default: |
| 251 | return "", fmt.Errorf("unknown termType %q", term) |
| 252 | } |
| 253 | p.tokens = p.tokens[1:] |
| 254 | p.accepted = append(p.accepted, t) |
| 255 | return t, nil |
| 256 | } |
| 257 | |
| 258 | // expectPChars determines if "t" consists of only pchars defined in RFC3986. |
| 259 | // |
| 260 | // https://www.ietf.org/rfc/rfc3986.txt, P.49 |
| 261 | // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" |
| 262 | // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
| 263 | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
| 264 | // / "*" / "+" / "," / ";" / "=" |
| 265 | // pct-encoded = "%" HEXDIG HEXDIG |
| 266 | func expectPChars(t string) error { |
| 267 | const ( |
| 268 | init = iota |
| 269 | pct1 |
| 270 | pct2 |
| 271 | ) |
| 272 | st := init |
| 273 | for _, r := range t { |
| 274 | if st != init { |
| 275 | if !isHexDigit(r) { |
| 276 | return fmt.Errorf("invalid hexdigit: %c(%U)", r, r) |
| 277 | } |
| 278 | switch st { |
| 279 | case pct1: |
| 280 | st = pct2 |
| 281 | case pct2: |
| 282 | st = init |
| 283 | } |
| 284 | continue |
| 285 | } |
| 286 | |
| 287 | // unreserved |
| 288 | switch { |
| 289 | case 'A' <= r && r <= 'Z': |
| 290 | continue |
| 291 | case 'a' <= r && r <= 'z': |
| 292 | continue |
| 293 | case '0' <= r && r <= '9': |
| 294 | continue |
| 295 | } |
| 296 | switch r { |
| 297 | case '-', '.', '_', '~': |
| 298 | // unreserved |
| 299 | case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': |
| 300 | // sub-delims |
| 301 | case ':', '@': |
| 302 | // rest of pchar |
| 303 | case '%': |
| 304 | // pct-encoded |
| 305 | st = pct1 |
| 306 | default: |
| 307 | return fmt.Errorf("invalid character in path segment: %q(%U)", r, r) |
| 308 | } |
| 309 | } |
| 310 | if st != init { |
| 311 | return fmt.Errorf("invalid percent-encoding in %q", t) |
| 312 | } |
| 313 | return nil |
| 314 | } |
| 315 | |
| 316 | // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*). |
| 317 | func expectIdent(ident string) error { |
| 318 | if ident == "" { |
| 319 | return fmt.Errorf("empty identifier") |
| 320 | } |
| 321 | for pos, r := range ident { |
| 322 | switch { |
| 323 | case '0' <= r && r <= '9': |
| 324 | if pos == 0 { |
| 325 | return fmt.Errorf("identifier starting with digit: %s", ident) |
| 326 | } |
| 327 | continue |
| 328 | case 'A' <= r && r <= 'Z': |
| 329 | continue |
| 330 | case 'a' <= r && r <= 'z': |
| 331 | continue |
| 332 | case r == '_': |
| 333 | continue |
| 334 | default: |
| 335 | return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident) |
| 336 | } |
| 337 | } |
| 338 | return nil |
| 339 | } |
| 340 | |
| 341 | func isHexDigit(r rune) bool { |
| 342 | switch { |
| 343 | case '0' <= r && r <= '9': |
| 344 | return true |
| 345 | case 'A' <= r && r <= 'F': |
| 346 | return true |
| 347 | case 'a' <= r && r <= 'f': |
| 348 | return true |
| 349 | } |
| 350 | return false |
| 351 | } |