blob: 11acfd885627957280da6eb58f986b6542dd9192 [file] [log] [blame]
William Kurkianea869482019-04-09 15:16:11 -04001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package language
6
7import (
William Kurkianea869482019-04-09 15:16:11 -04008 "errors"
William Kurkianea869482019-04-09 15:16:11 -04009 "strconv"
10 "strings"
11
Abhilash S.L3b494632019-07-16 15:51:09 +053012 "golang.org/x/text/internal/language"
William Kurkianea869482019-04-09 15:16:11 -040013)
14
William Kurkianea869482019-04-09 15:16:11 -040015// ValueError is returned by any of the parsing functions when the
16// input is well-formed but the respective subtag is not recognized
17// as a valid value.
Abhilash S.L3b494632019-07-16 15:51:09 +053018type ValueError interface {
19 error
William Kurkianea869482019-04-09 15:16:11 -040020
Abhilash S.L3b494632019-07-16 15:51:09 +053021 // Subtag returns the subtag for which the error occurred.
22 Subtag() string
William Kurkianea869482019-04-09 15:16:11 -040023}
24
25// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
26// failed it returns an error and any part of the tag that could be parsed.
27// If parsing succeeded but an unknown value was found, it returns
28// ValueError. The Tag returned in this case is just stripped of the unknown
29// value. All other values are preserved. It accepts tags in the BCP 47 format
30// and extensions to this standard defined in
Abhilash S.L3b494632019-07-16 15:51:09 +053031// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
William Kurkianea869482019-04-09 15:16:11 -040032// The resulting tag is canonicalized using the default canonicalization type.
33func Parse(s string) (t Tag, err error) {
34 return Default.Parse(s)
35}
36
37// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
38// failed it returns an error and any part of the tag that could be parsed.
39// If parsing succeeded but an unknown value was found, it returns
40// ValueError. The Tag returned in this case is just stripped of the unknown
41// value. All other values are preserved. It accepts tags in the BCP 47 format
42// and extensions to this standard defined in
Abhilash S.L3b494632019-07-16 15:51:09 +053043// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
44// The resulting tag is canonicalized using the canonicalization type c.
William Kurkianea869482019-04-09 15:16:11 -040045func (c CanonType) Parse(s string) (t Tag, err error) {
Abhilash S.L3b494632019-07-16 15:51:09 +053046 tt, err := language.Parse(s)
47 if err != nil {
48 return makeTag(tt), err
William Kurkianea869482019-04-09 15:16:11 -040049 }
Abhilash S.L3b494632019-07-16 15:51:09 +053050 tt, changed := canonicalize(c, tt)
William Kurkianea869482019-04-09 15:16:11 -040051 if changed {
Abhilash S.L3b494632019-07-16 15:51:09 +053052 tt.RemakeString()
William Kurkianea869482019-04-09 15:16:11 -040053 }
Abhilash S.L3b494632019-07-16 15:51:09 +053054 return makeTag(tt), err
William Kurkianea869482019-04-09 15:16:11 -040055}
56
57// Compose creates a Tag from individual parts, which may be of type Tag, Base,
58// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
59// Base, Script or Region or slice of type Variant or Extension is passed more
60// than once, the latter will overwrite the former. Variants and Extensions are
61// accumulated, but if two extensions of the same type are passed, the latter
Abhilash S.L3b494632019-07-16 15:51:09 +053062// will replace the former. For -u extensions, though, the key-type pairs are
63// added, where later values overwrite older ones. A Tag overwrites all former
64// values and typically only makes sense as the first argument. The resulting
65// tag is returned after canonicalizing using the Default CanonType. If one or
66// more errors are encountered, one of the errors is returned.
William Kurkianea869482019-04-09 15:16:11 -040067func Compose(part ...interface{}) (t Tag, err error) {
68 return Default.Compose(part...)
69}
70
71// Compose creates a Tag from individual parts, which may be of type Tag, Base,
72// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
73// Base, Script or Region or slice of type Variant or Extension is passed more
74// than once, the latter will overwrite the former. Variants and Extensions are
75// accumulated, but if two extensions of the same type are passed, the latter
Abhilash S.L3b494632019-07-16 15:51:09 +053076// will replace the former. For -u extensions, though, the key-type pairs are
77// added, where later values overwrite older ones. A Tag overwrites all former
78// values and typically only makes sense as the first argument. The resulting
79// tag is returned after canonicalizing using CanonType c. If one or more errors
80// are encountered, one of the errors is returned.
William Kurkianea869482019-04-09 15:16:11 -040081func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
Abhilash S.L3b494632019-07-16 15:51:09 +053082 var b language.Builder
83 if err = update(&b, part...); err != nil {
William Kurkianea869482019-04-09 15:16:11 -040084 return und, err
85 }
Abhilash S.L3b494632019-07-16 15:51:09 +053086 b.Tag, _ = canonicalize(c, b.Tag)
87 return makeTag(b.Make()), err
William Kurkianea869482019-04-09 15:16:11 -040088}
89
90var errInvalidArgument = errors.New("invalid Extension or Variant")
91
Abhilash S.L3b494632019-07-16 15:51:09 +053092func update(b *language.Builder, part ...interface{}) (err error) {
William Kurkianea869482019-04-09 15:16:11 -040093 for _, x := range part {
94 switch v := x.(type) {
95 case Tag:
Abhilash S.L3b494632019-07-16 15:51:09 +053096 b.SetTag(v.tag())
William Kurkianea869482019-04-09 15:16:11 -040097 case Base:
Abhilash S.L3b494632019-07-16 15:51:09 +053098 b.Tag.LangID = v.langID
William Kurkianea869482019-04-09 15:16:11 -040099 case Script:
Abhilash S.L3b494632019-07-16 15:51:09 +0530100 b.Tag.ScriptID = v.scriptID
William Kurkianea869482019-04-09 15:16:11 -0400101 case Region:
Abhilash S.L3b494632019-07-16 15:51:09 +0530102 b.Tag.RegionID = v.regionID
William Kurkianea869482019-04-09 15:16:11 -0400103 case Variant:
Abhilash S.L3b494632019-07-16 15:51:09 +0530104 if v.variant == "" {
105 err = errInvalidArgument
106 break
William Kurkianea869482019-04-09 15:16:11 -0400107 }
Abhilash S.L3b494632019-07-16 15:51:09 +0530108 b.AddVariant(v.variant)
William Kurkianea869482019-04-09 15:16:11 -0400109 case Extension:
Abhilash S.L3b494632019-07-16 15:51:09 +0530110 if v.s == "" {
111 err = errInvalidArgument
112 break
William Kurkianea869482019-04-09 15:16:11 -0400113 }
Abhilash S.L3b494632019-07-16 15:51:09 +0530114 b.SetExt(v.s)
William Kurkianea869482019-04-09 15:16:11 -0400115 case []Variant:
Abhilash S.L3b494632019-07-16 15:51:09 +0530116 b.ClearVariants()
117 for _, v := range v {
118 b.AddVariant(v.variant)
William Kurkianea869482019-04-09 15:16:11 -0400119 }
120 case []Extension:
Abhilash S.L3b494632019-07-16 15:51:09 +0530121 b.ClearExtensions()
William Kurkianea869482019-04-09 15:16:11 -0400122 for _, e := range v {
Abhilash S.L3b494632019-07-16 15:51:09 +0530123 b.SetExt(e.s)
William Kurkianea869482019-04-09 15:16:11 -0400124 }
125 // TODO: support parsing of raw strings based on morphology or just extensions?
126 case error:
Abhilash S.L3b494632019-07-16 15:51:09 +0530127 if v != nil {
128 err = v
William Kurkianea869482019-04-09 15:16:11 -0400129 }
William Kurkianea869482019-04-09 15:16:11 -0400130 }
131 }
Abhilash S.L3b494632019-07-16 15:51:09 +0530132 return
William Kurkianea869482019-04-09 15:16:11 -0400133}
134
135var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
136
137// ParseAcceptLanguage parses the contents of an Accept-Language header as
138// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
139// a list of corresponding quality weights. It is more permissive than RFC 2616
140// and may return non-nil slices even if the input is not valid.
141// The Tags will be sorted by highest weight first and then by first occurrence.
142// Tags with a weight of zero will be dropped. An error will be returned if the
143// input could not be parsed.
144func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
145 var entry string
146 for s != "" {
147 if entry, s = split(s, ','); entry == "" {
148 continue
149 }
150
151 entry, weight := split(entry, ';')
152
153 // Scan the language.
154 t, err := Parse(entry)
155 if err != nil {
156 id, ok := acceptFallback[entry]
157 if !ok {
158 return nil, nil, err
159 }
Abhilash S.L3b494632019-07-16 15:51:09 +0530160 t = makeTag(language.Tag{LangID: id})
William Kurkianea869482019-04-09 15:16:11 -0400161 }
162
163 // Scan the optional weight.
164 w := 1.0
165 if weight != "" {
166 weight = consume(weight, 'q')
167 weight = consume(weight, '=')
168 // consume returns the empty string when a token could not be
169 // consumed, resulting in an error for ParseFloat.
170 if w, err = strconv.ParseFloat(weight, 32); err != nil {
171 return nil, nil, errInvalidWeight
172 }
173 // Drop tags with a quality weight of 0.
174 if w <= 0 {
175 continue
176 }
177 }
178
179 tag = append(tag, t)
180 q = append(q, float32(w))
181 }
182 sortStable(&tagSort{tag, q})
183 return tag, q, nil
184}
185
186// consume removes a leading token c from s and returns the result or the empty
187// string if there is no such token.
188func consume(s string, c byte) string {
189 if s == "" || s[0] != c {
190 return ""
191 }
192 return strings.TrimSpace(s[1:])
193}
194
195func split(s string, c byte) (head, tail string) {
196 if i := strings.IndexByte(s, c); i >= 0 {
197 return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
198 }
199 return strings.TrimSpace(s), ""
200}
201
Abhilash S.L3b494632019-07-16 15:51:09 +0530202// Add hack mapping to deal with a small number of cases that occur
William Kurkianea869482019-04-09 15:16:11 -0400203// in Accept-Language (with reasonable frequency).
Abhilash S.L3b494632019-07-16 15:51:09 +0530204var acceptFallback = map[string]language.Language{
William Kurkianea869482019-04-09 15:16:11 -0400205 "english": _en,
206 "deutsch": _de,
207 "italian": _it,
208 "french": _fr,
209 "*": _mul, // defined in the spec to match all languages.
210}
211
212type tagSort struct {
213 tag []Tag
214 q []float32
215}
216
217func (s *tagSort) Len() int {
218 return len(s.q)
219}
220
221func (s *tagSort) Less(i, j int) bool {
222 return s.q[i] > s.q[j]
223}
224
225func (s *tagSort) Swap(i, j int) {
226 s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
227 s.q[i], s.q[j] = s.q[j], s.q[i]
228}