blob: 75a2dbca76434aa39d322b6049afa867440c003d [file] [log] [blame]
Don Newton98fd8812019-09-23 15:15:02 -04001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package language
6
7import "errors"
8
9type scriptRegionFlags uint8
10
11const (
12 isList = 1 << iota
13 scriptInFrom
14 regionInFrom
15)
16
17func (t *Tag) setUndefinedLang(id Language) {
18 if t.LangID == 0 {
19 t.LangID = id
20 }
21}
22
23func (t *Tag) setUndefinedScript(id Script) {
24 if t.ScriptID == 0 {
25 t.ScriptID = id
26 }
27}
28
29func (t *Tag) setUndefinedRegion(id Region) {
30 if t.RegionID == 0 || t.RegionID.Contains(id) {
31 t.RegionID = id
32 }
33}
34
35// ErrMissingLikelyTagsData indicates no information was available
36// to compute likely values of missing tags.
37var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
38
39// addLikelySubtags sets subtags to their most likely value, given the locale.
40// In most cases this means setting fields for unknown values, but in some
41// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
42// if the given locale cannot be expanded.
43func (t Tag) addLikelySubtags() (Tag, error) {
44 id, err := addTags(t)
45 if err != nil {
46 return t, err
47 } else if id.equalTags(t) {
48 return t, nil
49 }
50 id.RemakeString()
51 return id, nil
52}
53
54// specializeRegion attempts to specialize a group region.
55func specializeRegion(t *Tag) bool {
56 if i := regionInclusion[t.RegionID]; i < nRegionGroups {
57 x := likelyRegionGroup[i]
58 if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
59 t.RegionID = Region(x.region)
60 }
61 return true
62 }
63 return false
64}
65
66// Maximize returns a new tag with missing tags filled in.
67func (t Tag) Maximize() (Tag, error) {
68 return addTags(t)
69}
70
71func addTags(t Tag) (Tag, error) {
72 // We leave private use identifiers alone.
73 if t.IsPrivateUse() {
74 return t, nil
75 }
76 if t.ScriptID != 0 && t.RegionID != 0 {
77 if t.LangID != 0 {
78 // already fully specified
79 specializeRegion(&t)
80 return t, nil
81 }
82 // Search matches for und-script-region. Note that for these cases
83 // region will never be a group so there is no need to check for this.
84 list := likelyRegion[t.RegionID : t.RegionID+1]
85 if x := list[0]; x.flags&isList != 0 {
86 list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
87 }
88 for _, x := range list {
89 // Deviating from the spec. See match_test.go for details.
90 if Script(x.script) == t.ScriptID {
91 t.setUndefinedLang(Language(x.lang))
92 return t, nil
93 }
94 }
95 }
96 if t.LangID != 0 {
97 // Search matches for lang-script and lang-region, where lang != und.
98 if t.LangID < langNoIndexOffset {
99 x := likelyLang[t.LangID]
100 if x.flags&isList != 0 {
101 list := likelyLangList[x.region : x.region+uint16(x.script)]
102 if t.ScriptID != 0 {
103 for _, x := range list {
104 if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
105 t.setUndefinedRegion(Region(x.region))
106 return t, nil
107 }
108 }
109 } else if t.RegionID != 0 {
110 count := 0
111 goodScript := true
112 tt := t
113 for _, x := range list {
114 // We visit all entries for which the script was not
115 // defined, including the ones where the region was not
116 // defined. This allows for proper disambiguation within
117 // regions.
118 if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
119 tt.RegionID = Region(x.region)
120 tt.setUndefinedScript(Script(x.script))
121 goodScript = goodScript && tt.ScriptID == Script(x.script)
122 count++
123 }
124 }
125 if count == 1 {
126 return tt, nil
127 }
128 // Even if we fail to find a unique Region, we might have
129 // an unambiguous script.
130 if goodScript {
131 t.ScriptID = tt.ScriptID
132 }
133 }
134 }
135 }
136 } else {
137 // Search matches for und-script.
138 if t.ScriptID != 0 {
139 x := likelyScript[t.ScriptID]
140 if x.region != 0 {
141 t.setUndefinedRegion(Region(x.region))
142 t.setUndefinedLang(Language(x.lang))
143 return t, nil
144 }
145 }
146 // Search matches for und-region. If und-script-region exists, it would
147 // have been found earlier.
148 if t.RegionID != 0 {
149 if i := regionInclusion[t.RegionID]; i < nRegionGroups {
150 x := likelyRegionGroup[i]
151 if x.region != 0 {
152 t.setUndefinedLang(Language(x.lang))
153 t.setUndefinedScript(Script(x.script))
154 t.RegionID = Region(x.region)
155 }
156 } else {
157 x := likelyRegion[t.RegionID]
158 if x.flags&isList != 0 {
159 x = likelyRegionList[x.lang]
160 }
161 if x.script != 0 && x.flags != scriptInFrom {
162 t.setUndefinedLang(Language(x.lang))
163 t.setUndefinedScript(Script(x.script))
164 return t, nil
165 }
166 }
167 }
168 }
169
170 // Search matches for lang.
171 if t.LangID < langNoIndexOffset {
172 x := likelyLang[t.LangID]
173 if x.flags&isList != 0 {
174 x = likelyLangList[x.region]
175 }
176 if x.region != 0 {
177 t.setUndefinedScript(Script(x.script))
178 t.setUndefinedRegion(Region(x.region))
179 }
180 specializeRegion(&t)
181 if t.LangID == 0 {
182 t.LangID = _en // default language
183 }
184 return t, nil
185 }
186 return t, ErrMissingLikelyTagsData
187}
188
189func (t *Tag) setTagsFrom(id Tag) {
190 t.LangID = id.LangID
191 t.ScriptID = id.ScriptID
192 t.RegionID = id.RegionID
193}
194
195// minimize removes the region or script subtags from t such that
196// t.addLikelySubtags() == t.minimize().addLikelySubtags().
197func (t Tag) minimize() (Tag, error) {
198 t, err := minimizeTags(t)
199 if err != nil {
200 return t, err
201 }
202 t.RemakeString()
203 return t, nil
204}
205
206// minimizeTags mimics the behavior of the ICU 51 C implementation.
207func minimizeTags(t Tag) (Tag, error) {
208 if t.equalTags(Und) {
209 return t, nil
210 }
211 max, err := addTags(t)
212 if err != nil {
213 return t, err
214 }
215 for _, id := range [...]Tag{
216 {LangID: t.LangID},
217 {LangID: t.LangID, RegionID: t.RegionID},
218 {LangID: t.LangID, ScriptID: t.ScriptID},
219 } {
220 if x, err := addTags(id); err == nil && max.equalTags(x) {
221 t.setTagsFrom(id)
222 break
223 }
224 }
225 return t, nil
226}