David K. Bainbridge | 215e024 | 2017-09-05 23:18:24 -0700 | [diff] [blame] | 1 | package reference |
| 2 | |
| 3 | import "regexp" |
| 4 | |
| 5 | var ( |
| 6 | // alphaNumericRegexp defines the alpha numeric atom, typically a |
| 7 | // component of names. This only allows lower case characters and digits. |
| 8 | alphaNumericRegexp = match(`[a-z0-9]+`) |
| 9 | |
| 10 | // separatorRegexp defines the separators allowed to be embedded in name |
| 11 | // components. This allow one period, one or two underscore and multiple |
| 12 | // dashes. |
| 13 | separatorRegexp = match(`(?:[._]|__|[-]*)`) |
| 14 | |
| 15 | // nameComponentRegexp restricts registry path component names to start |
| 16 | // with at least one letter or number, with following parts able to be |
| 17 | // separated by one period, one or two underscore and multiple dashes. |
| 18 | nameComponentRegexp = expression( |
| 19 | alphaNumericRegexp, |
| 20 | optional(repeated(separatorRegexp, alphaNumericRegexp))) |
| 21 | |
| 22 | // domainComponentRegexp restricts the registry domain component of a |
| 23 | // repository name to start with a component as defined by DomainRegexp |
| 24 | // and followed by an optional port. |
| 25 | domainComponentRegexp = match(`(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`) |
| 26 | |
| 27 | // DomainRegexp defines the structure of potential domain components |
| 28 | // that may be part of image names. This is purposely a subset of what is |
| 29 | // allowed by DNS to ensure backwards compatibility with Docker image |
| 30 | // names. |
| 31 | DomainRegexp = expression( |
| 32 | domainComponentRegexp, |
| 33 | optional(repeated(literal(`.`), domainComponentRegexp)), |
| 34 | optional(literal(`:`), match(`[0-9]+`))) |
| 35 | |
| 36 | // TagRegexp matches valid tag names. From docker/docker:graph/tags.go. |
| 37 | TagRegexp = match(`[\w][\w.-]{0,127}`) |
| 38 | |
| 39 | // anchoredTagRegexp matches valid tag names, anchored at the start and |
| 40 | // end of the matched string. |
| 41 | anchoredTagRegexp = anchored(TagRegexp) |
| 42 | |
| 43 | // DigestRegexp matches valid digests. |
| 44 | DigestRegexp = match(`[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`) |
| 45 | |
| 46 | // anchoredDigestRegexp matches valid digests, anchored at the start and |
| 47 | // end of the matched string. |
| 48 | anchoredDigestRegexp = anchored(DigestRegexp) |
| 49 | |
| 50 | // NameRegexp is the format for the name component of references. The |
| 51 | // regexp has capturing groups for the domain and name part omitting |
| 52 | // the separating forward slash from either. |
| 53 | NameRegexp = expression( |
| 54 | optional(DomainRegexp, literal(`/`)), |
| 55 | nameComponentRegexp, |
| 56 | optional(repeated(literal(`/`), nameComponentRegexp))) |
| 57 | |
| 58 | // anchoredNameRegexp is used to parse a name value, capturing the |
| 59 | // domain and trailing components. |
| 60 | anchoredNameRegexp = anchored( |
| 61 | optional(capture(DomainRegexp), literal(`/`)), |
| 62 | capture(nameComponentRegexp, |
| 63 | optional(repeated(literal(`/`), nameComponentRegexp)))) |
| 64 | |
| 65 | // ReferenceRegexp is the full supported format of a reference. The regexp |
| 66 | // is anchored and has capturing groups for name, tag, and digest |
| 67 | // components. |
| 68 | ReferenceRegexp = anchored(capture(NameRegexp), |
| 69 | optional(literal(":"), capture(TagRegexp)), |
| 70 | optional(literal("@"), capture(DigestRegexp))) |
| 71 | |
| 72 | // IdentifierRegexp is the format for string identifier used as a |
| 73 | // content addressable identifier using sha256. These identifiers |
| 74 | // are like digests without the algorithm, since sha256 is used. |
| 75 | IdentifierRegexp = match(`([a-f0-9]{64})`) |
| 76 | |
| 77 | // ShortIdentifierRegexp is the format used to represent a prefix |
| 78 | // of an identifier. A prefix may be used to match a sha256 identifier |
| 79 | // within a list of trusted identifiers. |
| 80 | ShortIdentifierRegexp = match(`([a-f0-9]{6,64})`) |
| 81 | |
| 82 | // anchoredIdentifierRegexp is used to check or match an |
| 83 | // identifier value, anchored at start and end of string. |
| 84 | anchoredIdentifierRegexp = anchored(IdentifierRegexp) |
| 85 | |
| 86 | // anchoredShortIdentifierRegexp is used to check if a value |
| 87 | // is a possible identifier prefix, anchored at start and end |
| 88 | // of string. |
| 89 | anchoredShortIdentifierRegexp = anchored(ShortIdentifierRegexp) |
| 90 | ) |
| 91 | |
| 92 | // match compiles the string to a regular expression. |
| 93 | var match = regexp.MustCompile |
| 94 | |
| 95 | // literal compiles s into a literal regular expression, escaping any regexp |
| 96 | // reserved characters. |
| 97 | func literal(s string) *regexp.Regexp { |
| 98 | re := match(regexp.QuoteMeta(s)) |
| 99 | |
| 100 | if _, complete := re.LiteralPrefix(); !complete { |
| 101 | panic("must be a literal") |
| 102 | } |
| 103 | |
| 104 | return re |
| 105 | } |
| 106 | |
| 107 | // expression defines a full expression, where each regular expression must |
| 108 | // follow the previous. |
| 109 | func expression(res ...*regexp.Regexp) *regexp.Regexp { |
| 110 | var s string |
| 111 | for _, re := range res { |
| 112 | s += re.String() |
| 113 | } |
| 114 | |
| 115 | return match(s) |
| 116 | } |
| 117 | |
| 118 | // optional wraps the expression in a non-capturing group and makes the |
| 119 | // production optional. |
| 120 | func optional(res ...*regexp.Regexp) *regexp.Regexp { |
| 121 | return match(group(expression(res...)).String() + `?`) |
| 122 | } |
| 123 | |
| 124 | // repeated wraps the regexp in a non-capturing group to get one or more |
| 125 | // matches. |
| 126 | func repeated(res ...*regexp.Regexp) *regexp.Regexp { |
| 127 | return match(group(expression(res...)).String() + `+`) |
| 128 | } |
| 129 | |
| 130 | // group wraps the regexp in a non-capturing group. |
| 131 | func group(res ...*regexp.Regexp) *regexp.Regexp { |
| 132 | return match(`(?:` + expression(res...).String() + `)`) |
| 133 | } |
| 134 | |
| 135 | // capture wraps the expression in a capturing group. |
| 136 | func capture(res ...*regexp.Regexp) *regexp.Regexp { |
| 137 | return match(`(` + expression(res...).String() + `)`) |
| 138 | } |
| 139 | |
| 140 | // anchored anchors the regular expression by adding start and end delimiters. |
| 141 | func anchored(res ...*regexp.Regexp) *regexp.Regexp { |
| 142 | return match(`^` + expression(res...).String() + `$`) |
| 143 | } |