| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package language |
| |
| import ( |
| "errors" |
| "strconv" |
| "strings" |
| |
| "golang.org/x/text/internal/language" |
| ) |
| |
| // ValueError is returned by any of the parsing functions when the |
| // input is well-formed but the respective subtag is not recognized |
| // as a valid value. |
| type ValueError interface { |
| error |
| |
| // Subtag returns the subtag for which the error occurred. |
| Subtag() string |
| } |
| |
| // Parse parses the given BCP 47 string and returns a valid Tag. If parsing |
| // failed it returns an error and any part of the tag that could be parsed. |
| // If parsing succeeded but an unknown value was found, it returns |
| // ValueError. The Tag returned in this case is just stripped of the unknown |
| // value. All other values are preserved. It accepts tags in the BCP 47 format |
| // and extensions to this standard defined in |
| // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. |
| // The resulting tag is canonicalized using the default canonicalization type. |
| func Parse(s string) (t Tag, err error) { |
| return Default.Parse(s) |
| } |
| |
| // Parse parses the given BCP 47 string and returns a valid Tag. If parsing |
| // failed it returns an error and any part of the tag that could be parsed. |
| // If parsing succeeded but an unknown value was found, it returns |
| // ValueError. The Tag returned in this case is just stripped of the unknown |
| // value. All other values are preserved. It accepts tags in the BCP 47 format |
| // and extensions to this standard defined in |
| // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. |
| // The resulting tag is canonicalized using the canonicalization type c. |
| func (c CanonType) Parse(s string) (t Tag, err error) { |
| tt, err := language.Parse(s) |
| if err != nil { |
| return makeTag(tt), err |
| } |
| tt, changed := canonicalize(c, tt) |
| if changed { |
| tt.RemakeString() |
| } |
| return makeTag(tt), err |
| } |
| |
| // Compose creates a Tag from individual parts, which may be of type Tag, Base, |
| // Script, Region, Variant, []Variant, Extension, []Extension or error. If a |
| // Base, Script or Region or slice of type Variant or Extension is passed more |
| // than once, the latter will overwrite the former. Variants and Extensions are |
| // accumulated, but if two extensions of the same type are passed, the latter |
| // will replace the former. For -u extensions, though, the key-type pairs are |
| // added, where later values overwrite older ones. A Tag overwrites all former |
| // values and typically only makes sense as the first argument. The resulting |
| // tag is returned after canonicalizing using the Default CanonType. If one or |
| // more errors are encountered, one of the errors is returned. |
| func Compose(part ...interface{}) (t Tag, err error) { |
| return Default.Compose(part...) |
| } |
| |
| // Compose creates a Tag from individual parts, which may be of type Tag, Base, |
| // Script, Region, Variant, []Variant, Extension, []Extension or error. If a |
| // Base, Script or Region or slice of type Variant or Extension is passed more |
| // than once, the latter will overwrite the former. Variants and Extensions are |
| // accumulated, but if two extensions of the same type are passed, the latter |
| // will replace the former. For -u extensions, though, the key-type pairs are |
| // added, where later values overwrite older ones. A Tag overwrites all former |
| // values and typically only makes sense as the first argument. The resulting |
| // tag is returned after canonicalizing using CanonType c. If one or more errors |
| // are encountered, one of the errors is returned. |
| func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { |
| var b language.Builder |
| if err = update(&b, part...); err != nil { |
| return und, err |
| } |
| b.Tag, _ = canonicalize(c, b.Tag) |
| return makeTag(b.Make()), err |
| } |
| |
| var errInvalidArgument = errors.New("invalid Extension or Variant") |
| |
| func update(b *language.Builder, part ...interface{}) (err error) { |
| for _, x := range part { |
| switch v := x.(type) { |
| case Tag: |
| b.SetTag(v.tag()) |
| case Base: |
| b.Tag.LangID = v.langID |
| case Script: |
| b.Tag.ScriptID = v.scriptID |
| case Region: |
| b.Tag.RegionID = v.regionID |
| case Variant: |
| if v.variant == "" { |
| err = errInvalidArgument |
| break |
| } |
| b.AddVariant(v.variant) |
| case Extension: |
| if v.s == "" { |
| err = errInvalidArgument |
| break |
| } |
| b.SetExt(v.s) |
| case []Variant: |
| b.ClearVariants() |
| for _, v := range v { |
| b.AddVariant(v.variant) |
| } |
| case []Extension: |
| b.ClearExtensions() |
| for _, e := range v { |
| b.SetExt(e.s) |
| } |
| // TODO: support parsing of raw strings based on morphology or just extensions? |
| case error: |
| if v != nil { |
| err = v |
| } |
| } |
| } |
| return |
| } |
| |
| var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") |
| |
| // ParseAcceptLanguage parses the contents of an Accept-Language header as |
| // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and |
| // a list of corresponding quality weights. It is more permissive than RFC 2616 |
| // and may return non-nil slices even if the input is not valid. |
| // The Tags will be sorted by highest weight first and then by first occurrence. |
| // Tags with a weight of zero will be dropped. An error will be returned if the |
| // input could not be parsed. |
| func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { |
| var entry string |
| for s != "" { |
| if entry, s = split(s, ','); entry == "" { |
| continue |
| } |
| |
| entry, weight := split(entry, ';') |
| |
| // Scan the language. |
| t, err := Parse(entry) |
| if err != nil { |
| id, ok := acceptFallback[entry] |
| if !ok { |
| return nil, nil, err |
| } |
| t = makeTag(language.Tag{LangID: id}) |
| } |
| |
| // Scan the optional weight. |
| w := 1.0 |
| if weight != "" { |
| weight = consume(weight, 'q') |
| weight = consume(weight, '=') |
| // consume returns the empty string when a token could not be |
| // consumed, resulting in an error for ParseFloat. |
| if w, err = strconv.ParseFloat(weight, 32); err != nil { |
| return nil, nil, errInvalidWeight |
| } |
| // Drop tags with a quality weight of 0. |
| if w <= 0 { |
| continue |
| } |
| } |
| |
| tag = append(tag, t) |
| q = append(q, float32(w)) |
| } |
| sortStable(&tagSort{tag, q}) |
| return tag, q, nil |
| } |
| |
| // consume removes a leading token c from s and returns the result or the empty |
| // string if there is no such token. |
| func consume(s string, c byte) string { |
| if s == "" || s[0] != c { |
| return "" |
| } |
| return strings.TrimSpace(s[1:]) |
| } |
| |
| func split(s string, c byte) (head, tail string) { |
| if i := strings.IndexByte(s, c); i >= 0 { |
| return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]) |
| } |
| return strings.TrimSpace(s), "" |
| } |
| |
| // Add hack mapping to deal with a small number of cases that occur |
| // in Accept-Language (with reasonable frequency). |
| var acceptFallback = map[string]language.Language{ |
| "english": _en, |
| "deutsch": _de, |
| "italian": _it, |
| "french": _fr, |
| "*": _mul, // defined in the spec to match all languages. |
| } |
| |
| type tagSort struct { |
| tag []Tag |
| q []float32 |
| } |
| |
| func (s *tagSort) Len() int { |
| return len(s.q) |
| } |
| |
| func (s *tagSort) Less(i, j int) bool { |
| return s.q[i] > s.q[j] |
| } |
| |
| func (s *tagSort) Swap(i, j int) { |
| s.tag[i], s.tag[j] = s.tag[j], s.tag[i] |
| s.q[i], s.q[j] = s.q[j], s.q[i] |
| } |