David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 1 | // |
| 2 | // Copyright (c) 2011-2019 Canonical Ltd |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 16 | package yaml |
| 17 | |
| 18 | import ( |
| 19 | "encoding/base64" |
| 20 | "math" |
| 21 | "regexp" |
| 22 | "strconv" |
| 23 | "strings" |
| 24 | "time" |
| 25 | ) |
| 26 | |
| 27 | type resolveMapItem struct { |
| 28 | value interface{} |
| 29 | tag string |
| 30 | } |
| 31 | |
| 32 | var resolveTable = make([]byte, 256) |
| 33 | var resolveMap = make(map[string]resolveMapItem) |
| 34 | |
| 35 | func init() { |
| 36 | t := resolveTable |
| 37 | t[int('+')] = 'S' // Sign |
| 38 | t[int('-')] = 'S' |
| 39 | for _, c := range "0123456789" { |
| 40 | t[int(c)] = 'D' // Digit |
| 41 | } |
| 42 | for _, c := range "yYnNtTfFoO~" { |
| 43 | t[int(c)] = 'M' // In map |
| 44 | } |
| 45 | t[int('.')] = '.' // Float (potentially in map) |
| 46 | |
| 47 | var resolveMapList = []struct { |
| 48 | v interface{} |
| 49 | tag string |
| 50 | l []string |
| 51 | }{ |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 52 | {true, boolTag, []string{"true", "True", "TRUE"}}, |
| 53 | {false, boolTag, []string{"false", "False", "FALSE"}}, |
| 54 | {nil, nullTag, []string{"", "~", "null", "Null", "NULL"}}, |
| 55 | {math.NaN(), floatTag, []string{".nan", ".NaN", ".NAN"}}, |
| 56 | {math.Inf(+1), floatTag, []string{".inf", ".Inf", ".INF"}}, |
| 57 | {math.Inf(+1), floatTag, []string{"+.inf", "+.Inf", "+.INF"}}, |
| 58 | {math.Inf(-1), floatTag, []string{"-.inf", "-.Inf", "-.INF"}}, |
| 59 | {"<<", mergeTag, []string{"<<"}}, |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 60 | } |
| 61 | |
| 62 | m := resolveMap |
| 63 | for _, item := range resolveMapList { |
| 64 | for _, s := range item.l { |
| 65 | m[s] = resolveMapItem{item.v, item.tag} |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 70 | const ( |
| 71 | nullTag = "!!null" |
| 72 | boolTag = "!!bool" |
| 73 | strTag = "!!str" |
| 74 | intTag = "!!int" |
| 75 | floatTag = "!!float" |
| 76 | timestampTag = "!!timestamp" |
| 77 | seqTag = "!!seq" |
| 78 | mapTag = "!!map" |
| 79 | binaryTag = "!!binary" |
| 80 | mergeTag = "!!merge" |
| 81 | ) |
| 82 | |
| 83 | var longTags = make(map[string]string) |
| 84 | var shortTags = make(map[string]string) |
| 85 | |
| 86 | func init() { |
| 87 | for _, stag := range []string{nullTag, boolTag, strTag, intTag, floatTag, timestampTag, seqTag, mapTag, binaryTag, mergeTag} { |
| 88 | ltag := longTag(stag) |
| 89 | longTags[stag] = ltag |
| 90 | shortTags[ltag] = stag |
| 91 | } |
| 92 | } |
| 93 | |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 94 | const longTagPrefix = "tag:yaml.org,2002:" |
| 95 | |
| 96 | func shortTag(tag string) string { |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 97 | if strings.HasPrefix(tag, longTagPrefix) { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 98 | if stag, ok := shortTags[tag]; ok { |
| 99 | return stag |
| 100 | } |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 101 | return "!!" + tag[len(longTagPrefix):] |
| 102 | } |
| 103 | return tag |
| 104 | } |
| 105 | |
| 106 | func longTag(tag string) string { |
| 107 | if strings.HasPrefix(tag, "!!") { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 108 | if ltag, ok := longTags[tag]; ok { |
| 109 | return ltag |
| 110 | } |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 111 | return longTagPrefix + tag[2:] |
| 112 | } |
| 113 | return tag |
| 114 | } |
| 115 | |
| 116 | func resolvableTag(tag string) bool { |
| 117 | switch tag { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 118 | case "", strTag, boolTag, intTag, floatTag, nullTag, timestampTag: |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 119 | return true |
| 120 | } |
| 121 | return false |
| 122 | } |
| 123 | |
| 124 | var yamlStyleFloat = regexp.MustCompile(`^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$`) |
| 125 | |
| 126 | func resolve(tag string, in string) (rtag string, out interface{}) { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 127 | tag = shortTag(tag) |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 128 | if !resolvableTag(tag) { |
| 129 | return tag, in |
| 130 | } |
| 131 | |
| 132 | defer func() { |
| 133 | switch tag { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 134 | case "", rtag, strTag, binaryTag: |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 135 | return |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 136 | case floatTag: |
| 137 | if rtag == intTag { |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 138 | switch v := out.(type) { |
| 139 | case int64: |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 140 | rtag = floatTag |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 141 | out = float64(v) |
| 142 | return |
| 143 | case int: |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 144 | rtag = floatTag |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 145 | out = float64(v) |
| 146 | return |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | failf("cannot decode %s `%s` as a %s", shortTag(rtag), in, shortTag(tag)) |
| 151 | }() |
| 152 | |
| 153 | // Any data is accepted as a !!str or !!binary. |
| 154 | // Otherwise, the prefix is enough of a hint about what it might be. |
| 155 | hint := byte('N') |
| 156 | if in != "" { |
| 157 | hint = resolveTable[in[0]] |
| 158 | } |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 159 | if hint != 0 && tag != strTag && tag != binaryTag { |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 160 | // Handle things we can lookup in a map. |
| 161 | if item, ok := resolveMap[in]; ok { |
| 162 | return item.tag, item.value |
| 163 | } |
| 164 | |
| 165 | // Base 60 floats are a bad idea, were dropped in YAML 1.2, and |
| 166 | // are purposefully unsupported here. They're still quoted on |
| 167 | // the way out for compatibility with other parser, though. |
| 168 | |
| 169 | switch hint { |
| 170 | case 'M': |
| 171 | // We've already checked the map above. |
| 172 | |
| 173 | case '.': |
| 174 | // Not in the map, so maybe a normal float. |
| 175 | floatv, err := strconv.ParseFloat(in, 64) |
| 176 | if err == nil { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 177 | return floatTag, floatv |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 178 | } |
| 179 | |
| 180 | case 'D', 'S': |
| 181 | // Int, float, or timestamp. |
| 182 | // Only try values as a timestamp if the value is unquoted or there's an explicit |
| 183 | // !!timestamp tag. |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 184 | if tag == "" || tag == timestampTag { |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 185 | t, ok := parseTimestamp(in) |
| 186 | if ok { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 187 | return timestampTag, t |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 188 | } |
| 189 | } |
| 190 | |
| 191 | plain := strings.Replace(in, "_", "", -1) |
| 192 | intv, err := strconv.ParseInt(plain, 0, 64) |
| 193 | if err == nil { |
| 194 | if intv == int64(int(intv)) { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 195 | return intTag, int(intv) |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 196 | } else { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 197 | return intTag, intv |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 198 | } |
| 199 | } |
| 200 | uintv, err := strconv.ParseUint(plain, 0, 64) |
| 201 | if err == nil { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 202 | return intTag, uintv |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 203 | } |
| 204 | if yamlStyleFloat.MatchString(plain) { |
| 205 | floatv, err := strconv.ParseFloat(plain, 64) |
| 206 | if err == nil { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 207 | return floatTag, floatv |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 208 | } |
| 209 | } |
| 210 | if strings.HasPrefix(plain, "0b") { |
| 211 | intv, err := strconv.ParseInt(plain[2:], 2, 64) |
| 212 | if err == nil { |
| 213 | if intv == int64(int(intv)) { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 214 | return intTag, int(intv) |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 215 | } else { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 216 | return intTag, intv |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 217 | } |
| 218 | } |
| 219 | uintv, err := strconv.ParseUint(plain[2:], 2, 64) |
| 220 | if err == nil { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 221 | return intTag, uintv |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 222 | } |
| 223 | } else if strings.HasPrefix(plain, "-0b") { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 224 | intv, err := strconv.ParseInt("-"+plain[3:], 2, 64) |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 225 | if err == nil { |
| 226 | if true || intv == int64(int(intv)) { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 227 | return intTag, int(intv) |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 228 | } else { |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 229 | return intTag, intv |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | // Octals as introduced in version 1.2 of the spec. |
| 234 | // Octals from the 1.1 spec, spelled as 0777, are still |
| 235 | // decoded by default in v3 as well for compatibility. |
| 236 | // May be dropped in v4 depending on how usage evolves. |
| 237 | if strings.HasPrefix(plain, "0o") { |
| 238 | intv, err := strconv.ParseInt(plain[2:], 8, 64) |
| 239 | if err == nil { |
| 240 | if intv == int64(int(intv)) { |
| 241 | return intTag, int(intv) |
| 242 | } else { |
| 243 | return intTag, intv |
| 244 | } |
| 245 | } |
| 246 | uintv, err := strconv.ParseUint(plain[2:], 8, 64) |
| 247 | if err == nil { |
| 248 | return intTag, uintv |
| 249 | } |
| 250 | } else if strings.HasPrefix(plain, "-0o") { |
| 251 | intv, err := strconv.ParseInt("-"+plain[3:], 8, 64) |
| 252 | if err == nil { |
| 253 | if true || intv == int64(int(intv)) { |
| 254 | return intTag, int(intv) |
| 255 | } else { |
| 256 | return intTag, intv |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 257 | } |
| 258 | } |
| 259 | } |
| 260 | default: |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 261 | panic("internal error: missing handler for resolver table: " + string(rune(hint)) + " (with " + in + ")") |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 262 | } |
| 263 | } |
David K. Bainbridge | e05cf0c | 2021-08-19 03:16:50 +0000 | [diff] [blame] | 264 | return strTag, in |
Jonathan Hart | 4b110f6 | 2020-03-13 17:36:19 -0700 | [diff] [blame] | 265 | } |
| 266 | |
| 267 | // encodeBase64 encodes s as base64 that is broken up into multiple lines |
| 268 | // as appropriate for the resulting length. |
| 269 | func encodeBase64(s string) string { |
| 270 | const lineLen = 70 |
| 271 | encLen := base64.StdEncoding.EncodedLen(len(s)) |
| 272 | lines := encLen/lineLen + 1 |
| 273 | buf := make([]byte, encLen*2+lines) |
| 274 | in := buf[0:encLen] |
| 275 | out := buf[encLen:] |
| 276 | base64.StdEncoding.Encode(in, []byte(s)) |
| 277 | k := 0 |
| 278 | for i := 0; i < len(in); i += lineLen { |
| 279 | j := i + lineLen |
| 280 | if j > len(in) { |
| 281 | j = len(in) |
| 282 | } |
| 283 | k += copy(out[k:], in[i:j]) |
| 284 | if lines > 1 { |
| 285 | out[k] = '\n' |
| 286 | k++ |
| 287 | } |
| 288 | } |
| 289 | return string(out[:k]) |
| 290 | } |
| 291 | |
| 292 | // This is a subset of the formats allowed by the regular expression |
| 293 | // defined at http://yaml.org/type/timestamp.html. |
| 294 | var allowedTimestampFormats = []string{ |
| 295 | "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. |
| 296 | "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". |
| 297 | "2006-1-2 15:4:5.999999999", // space separated with no time zone |
| 298 | "2006-1-2", // date only |
| 299 | // Notable exception: time.Parse cannot handle: "2001-12-14 21:59:43.10 -5" |
| 300 | // from the set of examples. |
| 301 | } |
| 302 | |
| 303 | // parseTimestamp parses s as a timestamp string and |
| 304 | // returns the timestamp and reports whether it succeeded. |
| 305 | // Timestamp formats are defined at http://yaml.org/type/timestamp.html |
| 306 | func parseTimestamp(s string) (time.Time, bool) { |
| 307 | // TODO write code to check all the formats supported by |
| 308 | // http://yaml.org/type/timestamp.html instead of using time.Parse. |
| 309 | |
| 310 | // Quick check: all date formats start with YYYY-. |
| 311 | i := 0 |
| 312 | for ; i < len(s); i++ { |
| 313 | if c := s[i]; c < '0' || c > '9' { |
| 314 | break |
| 315 | } |
| 316 | } |
| 317 | if i != 4 || i == len(s) || s[i] != '-' { |
| 318 | return time.Time{}, false |
| 319 | } |
| 320 | for _, format := range allowedTimestampFormats { |
| 321 | if t, err := time.Parse(format, s); err == nil { |
| 322 | return t, true |
| 323 | } |
| 324 | } |
| 325 | return time.Time{}, false |
| 326 | } |