William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 1 | // Copyright 2013 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package language |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
| 10 | "sort" |
| 11 | "strconv" |
| 12 | |
| 13 | "golang.org/x/text/internal/tag" |
| 14 | ) |
| 15 | |
| 16 | // findIndex tries to find the given tag in idx and returns a standardized error |
| 17 | // if it could not be found. |
| 18 | func findIndex(idx tag.Index, key []byte, form string) (index int, err error) { |
| 19 | if !tag.FixCase(form, key) { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 20 | return 0, ErrSyntax |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 21 | } |
| 22 | i := idx.Index(key) |
| 23 | if i == -1 { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 24 | return 0, NewValueError(key) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 25 | } |
| 26 | return i, nil |
| 27 | } |
| 28 | |
| 29 | func searchUint(imap []uint16, key uint16) int { |
| 30 | return sort.Search(len(imap), func(i int) bool { |
| 31 | return imap[i] >= key |
| 32 | }) |
| 33 | } |
| 34 | |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 35 | type Language uint16 |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 36 | |
| 37 | // getLangID returns the langID of s if s is a canonical subtag |
| 38 | // or langUnknown if s is not a canonical subtag. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 39 | func getLangID(s []byte) (Language, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 40 | if len(s) == 2 { |
| 41 | return getLangISO2(s) |
| 42 | } |
| 43 | return getLangISO3(s) |
| 44 | } |
| 45 | |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 46 | // TODO language normalization as well as the AliasMaps could be moved to the |
| 47 | // higher level package, but it is a bit tricky to separate the generation. |
| 48 | |
| 49 | func (id Language) Canonicalize() (Language, AliasType) { |
| 50 | return normLang(id) |
| 51 | } |
| 52 | |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 53 | // mapLang returns the mapped langID of id according to mapping m. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 54 | func normLang(id Language) (Language, AliasType) { |
| 55 | k := sort.Search(len(AliasMap), func(i int) bool { |
| 56 | return AliasMap[i].From >= uint16(id) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 57 | }) |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 58 | if k < len(AliasMap) && AliasMap[k].From == uint16(id) { |
| 59 | return Language(AliasMap[k].To), AliasTypes[k] |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 60 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 61 | return id, AliasTypeUnknown |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 62 | } |
| 63 | |
| 64 | // getLangISO2 returns the langID for the given 2-letter ISO language code |
| 65 | // or unknownLang if this does not exist. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 66 | func getLangISO2(s []byte) (Language, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 67 | if !tag.FixCase("zz", s) { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 68 | return 0, ErrSyntax |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 69 | } |
| 70 | if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 71 | return Language(i), nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 72 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 73 | return 0, NewValueError(s) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 74 | } |
| 75 | |
| 76 | const base = 'z' - 'a' + 1 |
| 77 | |
| 78 | func strToInt(s []byte) uint { |
| 79 | v := uint(0) |
| 80 | for i := 0; i < len(s); i++ { |
| 81 | v *= base |
| 82 | v += uint(s[i] - 'a') |
| 83 | } |
| 84 | return v |
| 85 | } |
| 86 | |
| 87 | // converts the given integer to the original ASCII string passed to strToInt. |
| 88 | // len(s) must match the number of characters obtained. |
| 89 | func intToStr(v uint, s []byte) { |
| 90 | for i := len(s) - 1; i >= 0; i-- { |
| 91 | s[i] = byte(v%base) + 'a' |
| 92 | v /= base |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | // getLangISO3 returns the langID for the given 3-letter ISO language code |
| 97 | // or unknownLang if this does not exist. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 98 | func getLangISO3(s []byte) (Language, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 99 | if tag.FixCase("und", s) { |
| 100 | // first try to match canonical 3-letter entries |
| 101 | for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) { |
| 102 | if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] { |
| 103 | // We treat "und" as special and always translate it to "unspecified". |
| 104 | // Note that ZZ and Zzzz are private use and are not treated as |
| 105 | // unspecified by default. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 106 | id := Language(i) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 107 | if id == nonCanonicalUnd { |
| 108 | return 0, nil |
| 109 | } |
| 110 | return id, nil |
| 111 | } |
| 112 | } |
| 113 | if i := altLangISO3.Index(s); i != -1 { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 114 | return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 115 | } |
| 116 | n := strToInt(s) |
| 117 | if langNoIndex[n/8]&(1<<(n%8)) != 0 { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 118 | return Language(n) + langNoIndexOffset, nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 119 | } |
| 120 | // Check for non-canonical uses of ISO3. |
| 121 | for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) { |
| 122 | if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 123 | return Language(i), nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 124 | } |
| 125 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 126 | return 0, NewValueError(s) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 127 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 128 | return 0, ErrSyntax |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 129 | } |
| 130 | |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 131 | // StringToBuf writes the string to b and returns the number of bytes |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 132 | // written. cap(b) must be >= 3. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 133 | func (id Language) StringToBuf(b []byte) int { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 134 | if id >= langNoIndexOffset { |
| 135 | intToStr(uint(id)-langNoIndexOffset, b[:3]) |
| 136 | return 3 |
| 137 | } else if id == 0 { |
| 138 | return copy(b, "und") |
| 139 | } |
| 140 | l := lang[id<<2:] |
| 141 | if l[3] == 0 { |
| 142 | return copy(b, l[:3]) |
| 143 | } |
| 144 | return copy(b, l[:2]) |
| 145 | } |
| 146 | |
| 147 | // String returns the BCP 47 representation of the langID. |
| 148 | // Use b as variable name, instead of id, to ensure the variable |
| 149 | // used is consistent with that of Base in which this type is embedded. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 150 | func (b Language) String() string { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 151 | if b == 0 { |
| 152 | return "und" |
| 153 | } else if b >= langNoIndexOffset { |
| 154 | b -= langNoIndexOffset |
| 155 | buf := [3]byte{} |
| 156 | intToStr(uint(b), buf[:]) |
| 157 | return string(buf[:]) |
| 158 | } |
| 159 | l := lang.Elem(int(b)) |
| 160 | if l[3] == 0 { |
| 161 | return l[:3] |
| 162 | } |
| 163 | return l[:2] |
| 164 | } |
| 165 | |
| 166 | // ISO3 returns the ISO 639-3 language code. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 167 | func (b Language) ISO3() string { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 168 | if b == 0 || b >= langNoIndexOffset { |
| 169 | return b.String() |
| 170 | } |
| 171 | l := lang.Elem(int(b)) |
| 172 | if l[3] == 0 { |
| 173 | return l[:3] |
| 174 | } else if l[2] == 0 { |
| 175 | return altLangISO3.Elem(int(l[3]))[:3] |
| 176 | } |
| 177 | // This allocation will only happen for 3-letter ISO codes |
| 178 | // that are non-canonical BCP 47 language identifiers. |
| 179 | return l[0:1] + l[2:4] |
| 180 | } |
| 181 | |
| 182 | // IsPrivateUse reports whether this language code is reserved for private use. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 183 | func (b Language) IsPrivateUse() bool { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 184 | return langPrivateStart <= b && b <= langPrivateEnd |
| 185 | } |
| 186 | |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 187 | // SuppressScript returns the script marked as SuppressScript in the IANA |
| 188 | // language tag repository, or 0 if there is no such script. |
| 189 | func (b Language) SuppressScript() Script { |
| 190 | if b < langNoIndexOffset { |
| 191 | return Script(suppressScript[b]) |
| 192 | } |
| 193 | return 0 |
| 194 | } |
| 195 | |
| 196 | type Region uint16 |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 197 | |
| 198 | // getRegionID returns the region id for s if s is a valid 2-letter region code |
| 199 | // or unknownRegion. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 200 | func getRegionID(s []byte) (Region, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 201 | if len(s) == 3 { |
| 202 | if isAlpha(s[0]) { |
| 203 | return getRegionISO3(s) |
| 204 | } |
| 205 | if i, err := strconv.ParseUint(string(s), 10, 10); err == nil { |
| 206 | return getRegionM49(int(i)) |
| 207 | } |
| 208 | } |
| 209 | return getRegionISO2(s) |
| 210 | } |
| 211 | |
| 212 | // getRegionISO2 returns the regionID for the given 2-letter ISO country code |
| 213 | // or unknownRegion if this does not exist. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 214 | func getRegionISO2(s []byte) (Region, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 215 | i, err := findIndex(regionISO, s, "ZZ") |
| 216 | if err != nil { |
| 217 | return 0, err |
| 218 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 219 | return Region(i) + isoRegionOffset, nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 220 | } |
| 221 | |
| 222 | // getRegionISO3 returns the regionID for the given 3-letter ISO country code |
| 223 | // or unknownRegion if this does not exist. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 224 | func getRegionISO3(s []byte) (Region, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 225 | if tag.FixCase("ZZZ", s) { |
| 226 | for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) { |
| 227 | if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 228 | return Region(i) + isoRegionOffset, nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 229 | } |
| 230 | } |
| 231 | for i := 0; i < len(altRegionISO3); i += 3 { |
| 232 | if tag.Compare(altRegionISO3[i:i+3], s) == 0 { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 233 | return Region(altRegionIDs[i/3]), nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 234 | } |
| 235 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 236 | return 0, NewValueError(s) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 237 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 238 | return 0, ErrSyntax |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 239 | } |
| 240 | |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 241 | func getRegionM49(n int) (Region, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 242 | if 0 < n && n <= 999 { |
| 243 | const ( |
| 244 | searchBits = 7 |
| 245 | regionBits = 9 |
| 246 | regionMask = 1<<regionBits - 1 |
| 247 | ) |
| 248 | idx := n >> searchBits |
| 249 | buf := fromM49[m49Index[idx]:m49Index[idx+1]] |
| 250 | val := uint16(n) << regionBits // we rely on bits shifting out |
| 251 | i := sort.Search(len(buf), func(i int) bool { |
| 252 | return buf[i] >= val |
| 253 | }) |
| 254 | if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 255 | return Region(r & regionMask), nil |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 256 | } |
| 257 | } |
| 258 | var e ValueError |
| 259 | fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n) |
| 260 | return 0, e |
| 261 | } |
| 262 | |
| 263 | // normRegion returns a region if r is deprecated or 0 otherwise. |
| 264 | // TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ). |
| 265 | // TODO: consider mapping split up regions to new most populous one (like CLDR). |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 266 | func normRegion(r Region) Region { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 267 | m := regionOldMap |
| 268 | k := sort.Search(len(m), func(i int) bool { |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 269 | return m[i].From >= uint16(r) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 270 | }) |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 271 | if k < len(m) && m[k].From == uint16(r) { |
| 272 | return Region(m[k].To) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 273 | } |
| 274 | return 0 |
| 275 | } |
| 276 | |
| 277 | const ( |
| 278 | iso3166UserAssigned = 1 << iota |
| 279 | ccTLD |
| 280 | bcp47Region |
| 281 | ) |
| 282 | |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 283 | func (r Region) typ() byte { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 284 | return regionTypes[r] |
| 285 | } |
| 286 | |
| 287 | // String returns the BCP 47 representation for the region. |
| 288 | // It returns "ZZ" for an unspecified region. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 289 | func (r Region) String() string { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 290 | if r < isoRegionOffset { |
| 291 | if r == 0 { |
| 292 | return "ZZ" |
| 293 | } |
| 294 | return fmt.Sprintf("%03d", r.M49()) |
| 295 | } |
| 296 | r -= isoRegionOffset |
| 297 | return regionISO.Elem(int(r))[:2] |
| 298 | } |
| 299 | |
| 300 | // ISO3 returns the 3-letter ISO code of r. |
| 301 | // Note that not all regions have a 3-letter ISO code. |
| 302 | // In such cases this method returns "ZZZ". |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 303 | func (r Region) ISO3() string { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 304 | if r < isoRegionOffset { |
| 305 | return "ZZZ" |
| 306 | } |
| 307 | r -= isoRegionOffset |
| 308 | reg := regionISO.Elem(int(r)) |
| 309 | switch reg[2] { |
| 310 | case 0: |
| 311 | return altRegionISO3[reg[3]:][:3] |
| 312 | case ' ': |
| 313 | return "ZZZ" |
| 314 | } |
| 315 | return reg[0:1] + reg[2:4] |
| 316 | } |
| 317 | |
| 318 | // M49 returns the UN M.49 encoding of r, or 0 if this encoding |
| 319 | // is not defined for r. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 320 | func (r Region) M49() int { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 321 | return int(m49[r]) |
| 322 | } |
| 323 | |
| 324 | // IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This |
| 325 | // may include private-use tags that are assigned by CLDR and used in this |
| 326 | // implementation. So IsPrivateUse and IsCountry can be simultaneously true. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 327 | func (r Region) IsPrivateUse() bool { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 328 | return r.typ()&iso3166UserAssigned != 0 |
| 329 | } |
| 330 | |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 331 | type Script uint8 |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 332 | |
| 333 | // getScriptID returns the script id for string s. It assumes that s |
| 334 | // is of the format [A-Z][a-z]{3}. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 335 | func getScriptID(idx tag.Index, s []byte) (Script, error) { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 336 | i, err := findIndex(idx, s, "Zzzz") |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 337 | return Script(i), err |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 338 | } |
| 339 | |
| 340 | // String returns the script code in title case. |
| 341 | // It returns "Zzzz" for an unspecified script. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 342 | func (s Script) String() string { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 343 | if s == 0 { |
| 344 | return "Zzzz" |
| 345 | } |
| 346 | return script.Elem(int(s)) |
| 347 | } |
| 348 | |
| 349 | // IsPrivateUse reports whether this script code is reserved for private use. |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 350 | func (s Script) IsPrivateUse() bool { |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 351 | return _Qaaa <= s && s <= _Qabx |
| 352 | } |
| 353 | |
| 354 | const ( |
| 355 | maxAltTaglen = len("en-US-POSIX") |
| 356 | maxLen = maxAltTaglen |
| 357 | ) |
| 358 | |
| 359 | var ( |
| 360 | // grandfatheredMap holds a mapping from legacy and grandfathered tags to |
| 361 | // their base language or index to more elaborate tag. |
| 362 | grandfatheredMap = map[[maxLen]byte]int16{ |
| 363 | [maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban |
| 364 | [maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami |
| 365 | [maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn |
| 366 | [maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak |
| 367 | [maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon |
| 368 | [maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux |
| 369 | [maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo |
| 370 | [maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn |
| 371 | [maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao |
| 372 | [maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay |
| 373 | [maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu |
| 374 | [maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok |
| 375 | [maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn |
| 376 | [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR |
| 377 | [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL |
| 378 | [maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE |
| 379 | [maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu |
| 380 | [maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka |
| 381 | [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan |
| 382 | [maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang |
| 383 | |
| 384 | // Grandfathered tags with no modern replacement will be converted as |
| 385 | // follows: |
| 386 | [maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish |
| 387 | [maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed |
| 388 | [maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default |
| 389 | [maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian |
| 390 | [maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo |
| 391 | [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min |
| 392 | |
| 393 | // CLDR-specific tag. |
| 394 | [maxLen]byte{'r', 'o', 'o', 't'}: 0, // root |
| 395 | [maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX" |
| 396 | } |
| 397 | |
| 398 | altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102} |
| 399 | |
| 400 | altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix" |
| 401 | ) |
| 402 | |
| 403 | func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) { |
| 404 | if v, ok := grandfatheredMap[s]; ok { |
| 405 | if v < 0 { |
| 406 | return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true |
| 407 | } |
Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame] | 408 | t.LangID = Language(v) |
William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 409 | return t, true |
| 410 | } |
| 411 | return t, false |
| 412 | } |