blob: 83816a72a8a0653a886857659aaf2dbcab5f1aa9 [file] [log] [blame]
Don Newton98fd8812019-09-23 15:15:02 -04001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go gen_index.go -output tables.go
6//go:generate go run gen_parents.go
7
8package compact
9
10// TODO: Remove above NOTE after:
11// - verifying that tables are dropped correctly (most notably matcher tables).
12
13import (
14 "strings"
15
16 "golang.org/x/text/internal/language"
17)
18
19// Tag represents a BCP 47 language tag. It is used to specify an instance of a
20// specific language or locale. All language tag values are guaranteed to be
21// well-formed.
22type Tag struct {
23 // NOTE: exported tags will become part of the public API.
24 language ID
25 locale ID
26 full fullTag // always a language.Tag for now.
27}
28
29const _und = 0
30
31type fullTag interface {
32 IsRoot() bool
33 Parent() language.Tag
34}
35
36// Make a compact Tag from a fully specified internal language Tag.
37func Make(t language.Tag) (tag Tag) {
38 if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
39 if r, err := language.ParseRegion(region[:2]); err == nil {
40 tFull := t
41 t, _ = t.SetTypeForKey("rg", "")
42 // TODO: should we not consider "va" for the language tag?
43 var exact1, exact2 bool
44 tag.language, exact1 = FromTag(t)
45 t.RegionID = r
46 tag.locale, exact2 = FromTag(t)
47 if !exact1 || !exact2 {
48 tag.full = tFull
49 }
50 return tag
51 }
52 }
53 lang, ok := FromTag(t)
54 tag.language = lang
55 tag.locale = lang
56 if !ok {
57 tag.full = t
58 }
59 return tag
60}
61
62// Tag returns an internal language Tag version of this tag.
63func (t Tag) Tag() language.Tag {
64 if t.full != nil {
65 return t.full.(language.Tag)
66 }
67 tag := t.language.Tag()
68 if t.language != t.locale {
69 loc := t.locale.Tag()
70 tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
71 }
72 return tag
73}
74
75// IsCompact reports whether this tag is fully defined in terms of ID.
76func (t *Tag) IsCompact() bool {
77 return t.full == nil
78}
79
80// MayHaveVariants reports whether a tag may have variants. If it returns false
81// it is guaranteed the tag does not have variants.
82func (t Tag) MayHaveVariants() bool {
83 return t.full != nil || int(t.language) >= len(coreTags)
84}
85
86// MayHaveExtensions reports whether a tag may have extensions. If it returns
87// false it is guaranteed the tag does not have them.
88func (t Tag) MayHaveExtensions() bool {
89 return t.full != nil ||
90 int(t.language) >= len(coreTags) ||
91 t.language != t.locale
92}
93
94// IsRoot returns true if t is equal to language "und".
95func (t Tag) IsRoot() bool {
96 if t.full != nil {
97 return t.full.IsRoot()
98 }
99 return t.language == _und
100}
101
102// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
103// specific language are substituted with fields from the parent language.
104// The parent for a language may change for newer versions of CLDR.
105func (t Tag) Parent() Tag {
106 if t.full != nil {
107 return Make(t.full.Parent())
108 }
109 if t.language != t.locale {
110 // Simulate stripping -u-rg-xxxxxx
111 return Tag{language: t.language, locale: t.language}
112 }
113 // TODO: use parent lookup table once cycle from internal package is
114 // removed. Probably by internalizing the table and declaring this fast
115 // enough.
116 // lang := compactID(internal.Parent(uint16(t.language)))
117 lang, _ := FromTag(t.language.Tag().Parent())
118 return Tag{language: lang, locale: lang}
119}
120
121// returns token t and the rest of the string.
122func nextToken(s string) (t, tail string) {
123 p := strings.Index(s[1:], "-")
124 if p == -1 {
125 return s[1:], ""
126 }
127 p++
128 return s[1:p], s[p:]
129}
130
131// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
132// for which data exists in the text repository.The index will change over time
133// and should not be stored in persistent storage. If t does not match a compact
134// index, exact will be false and the compact index will be returned for the
135// first match after repeatedly taking the Parent of t.
136func LanguageID(t Tag) (id ID, exact bool) {
137 return t.language, t.full == nil
138}
139
140// RegionalID returns the ID for the regional variant of this tag. This index is
141// used to indicate region-specific overrides, such as default currency, default
142// calendar and week data, default time cycle, and default measurement system
143// and unit preferences.
144//
145// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
146// settings for currency, number formatting, etc. The CompactIndex for this tag
147// will be that for en-GB, while the RegionalID will be the one corresponding to
148// en-US.
149func RegionalID(t Tag) (id ID, exact bool) {
150 return t.locale, t.full == nil
151}
152
153// LanguageTag returns t stripped of regional variant indicators.
154//
155// At the moment this means it is stripped of a regional and variant subtag "rg"
156// and "va" in the "u" extension.
157func (t Tag) LanguageTag() Tag {
158 if t.full == nil {
159 return Tag{language: t.language, locale: t.language}
160 }
161 tt := t.Tag()
162 tt.SetTypeForKey("rg", "")
163 tt.SetTypeForKey("va", "")
164 return Make(tt)
165}
166
167// RegionalTag returns the regional variant of the tag.
168//
169// At the moment this means that the region is set from the regional subtag
170// "rg" in the "u" extension.
171func (t Tag) RegionalTag() Tag {
172 rt := Tag{language: t.locale, locale: t.locale}
173 if t.full == nil {
174 return rt
175 }
176 b := language.Builder{}
177 tag := t.Tag()
178 // tag, _ = tag.SetTypeForKey("rg", "")
179 b.SetTag(t.locale.Tag())
180 if v := tag.Variants(); v != "" {
181 for _, v := range strings.Split(v, "-") {
182 b.AddVariant(v)
183 }
184 }
185 for _, e := range tag.Extensions() {
186 b.AddExt(e)
187 }
188 return t
189}
190
191// FromTag reports closest matching ID for an internal language Tag.
192func FromTag(t language.Tag) (id ID, exact bool) {
193 // TODO: perhaps give more frequent tags a lower index.
194 // TODO: we could make the indexes stable. This will excluded some
195 // possibilities for optimization, so don't do this quite yet.
196 exact = true
197
198 b, s, r := t.Raw()
199 if t.HasString() {
200 if t.IsPrivateUse() {
201 // We have no entries for user-defined tags.
202 return 0, false
203 }
204 hasExtra := false
205 if t.HasVariants() {
206 if t.HasExtensions() {
207 build := language.Builder{}
208 build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
209 build.AddVariant(t.Variants())
210 exact = false
211 t = build.Make()
212 }
213 hasExtra = true
214 } else if _, ok := t.Extension('u'); ok {
215 // TODO: va may mean something else. Consider not considering it.
216 // Strip all but the 'va' entry.
217 old := t
218 variant := t.TypeForKey("va")
219 t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
220 if variant != "" {
221 t, _ = t.SetTypeForKey("va", variant)
222 hasExtra = true
223 }
224 exact = old == t
225 } else {
226 exact = false
227 }
228 if hasExtra {
229 // We have some variants.
230 for i, s := range specialTags {
231 if s == t {
232 return ID(i + len(coreTags)), exact
233 }
234 }
235 exact = false
236 }
237 }
238 if x, ok := getCoreIndex(t); ok {
239 return x, exact
240 }
241 exact = false
242 if r != 0 && s == 0 {
243 // Deal with cases where an extra script is inserted for the region.
244 t, _ := t.Maximize()
245 if x, ok := getCoreIndex(t); ok {
246 return x, exact
247 }
248 }
249 for t = t.Parent(); t != root; t = t.Parent() {
250 // No variants specified: just compare core components.
251 // The key has the form lllssrrr, where l, s, and r are nibbles for
252 // respectively the langID, scriptID, and regionID.
253 if x, ok := getCoreIndex(t); ok {
254 return x, exact
255 }
256 }
257 return 0, exact
258}
259
260var root = language.Tag{}