| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package language |
| |
| import ( |
| "bytes" |
| "errors" |
| "fmt" |
| "sort" |
| "strconv" |
| "strings" |
| |
| "golang.org/x/text/internal/tag" |
| ) |
| |
| // isAlpha returns true if the byte is not a digit. |
| // b must be an ASCII letter or digit. |
| func isAlpha(b byte) bool { |
| return b > '9' |
| } |
| |
| // isAlphaNum returns true if the string contains only ASCII letters or digits. |
| func isAlphaNum(s []byte) bool { |
| for _, c := range s { |
| if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // errSyntax is returned by any of the parsing functions when the |
| // input is not well-formed, according to BCP 47. |
| // TODO: return the position at which the syntax error occurred? |
| var errSyntax = errors.New("language: tag is not well-formed") |
| |
| // ValueError is returned by any of the parsing functions when the |
| // input is well-formed but the respective subtag is not recognized |
| // as a valid value. |
| type ValueError struct { |
| v [8]byte |
| } |
| |
| func mkErrInvalid(s []byte) error { |
| var e ValueError |
| copy(e.v[:], s) |
| return e |
| } |
| |
| func (e ValueError) tag() []byte { |
| n := bytes.IndexByte(e.v[:], 0) |
| if n == -1 { |
| n = 8 |
| } |
| return e.v[:n] |
| } |
| |
| // Error implements the error interface. |
| func (e ValueError) Error() string { |
| return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag()) |
| } |
| |
| // Subtag returns the subtag for which the error occurred. |
| func (e ValueError) Subtag() string { |
| return string(e.tag()) |
| } |
| |
| // scanner is used to scan BCP 47 tokens, which are separated by _ or -. |
| type scanner struct { |
| b []byte |
| bytes [max99thPercentileSize]byte |
| token []byte |
| start int // start position of the current token |
| end int // end position of the current token |
| next int // next point for scan |
| err error |
| done bool |
| } |
| |
| func makeScannerString(s string) scanner { |
| scan := scanner{} |
| if len(s) <= len(scan.bytes) { |
| scan.b = scan.bytes[:copy(scan.bytes[:], s)] |
| } else { |
| scan.b = []byte(s) |
| } |
| scan.init() |
| return scan |
| } |
| |
| // makeScanner returns a scanner using b as the input buffer. |
| // b is not copied and may be modified by the scanner routines. |
| func makeScanner(b []byte) scanner { |
| scan := scanner{b: b} |
| scan.init() |
| return scan |
| } |
| |
| func (s *scanner) init() { |
| for i, c := range s.b { |
| if c == '_' { |
| s.b[i] = '-' |
| } |
| } |
| s.scan() |
| } |
| |
| // restToLower converts the string between start and end to lower case. |
| func (s *scanner) toLower(start, end int) { |
| for i := start; i < end; i++ { |
| c := s.b[i] |
| if 'A' <= c && c <= 'Z' { |
| s.b[i] += 'a' - 'A' |
| } |
| } |
| } |
| |
| func (s *scanner) setError(e error) { |
| if s.err == nil || (e == errSyntax && s.err != errSyntax) { |
| s.err = e |
| } |
| } |
| |
| // resizeRange shrinks or grows the array at position oldStart such that |
| // a new string of size newSize can fit between oldStart and oldEnd. |
| // Sets the scan point to after the resized range. |
| func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) { |
| s.start = oldStart |
| if end := oldStart + newSize; end != oldEnd { |
| diff := end - oldEnd |
| if end < cap(s.b) { |
| b := make([]byte, len(s.b)+diff) |
| copy(b, s.b[:oldStart]) |
| copy(b[end:], s.b[oldEnd:]) |
| s.b = b |
| } else { |
| s.b = append(s.b[end:], s.b[oldEnd:]...) |
| } |
| s.next = end + (s.next - s.end) |
| s.end = end |
| } |
| } |
| |
| // replace replaces the current token with repl. |
| func (s *scanner) replace(repl string) { |
| s.resizeRange(s.start, s.end, len(repl)) |
| copy(s.b[s.start:], repl) |
| } |
| |
| // gobble removes the current token from the input. |
| // Caller must call scan after calling gobble. |
| func (s *scanner) gobble(e error) { |
| s.setError(e) |
| if s.start == 0 { |
| s.b = s.b[:+copy(s.b, s.b[s.next:])] |
| s.end = 0 |
| } else { |
| s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])] |
| s.end = s.start - 1 |
| } |
| s.next = s.start |
| } |
| |
| // deleteRange removes the given range from s.b before the current token. |
| func (s *scanner) deleteRange(start, end int) { |
| s.setError(errSyntax) |
| s.b = s.b[:start+copy(s.b[start:], s.b[end:])] |
| diff := end - start |
| s.next -= diff |
| s.start -= diff |
| s.end -= diff |
| } |
| |
| // scan parses the next token of a BCP 47 string. Tokens that are larger |
| // than 8 characters or include non-alphanumeric characters result in an error |
| // and are gobbled and removed from the output. |
| // It returns the end position of the last token consumed. |
| func (s *scanner) scan() (end int) { |
| end = s.end |
| s.token = nil |
| for s.start = s.next; s.next < len(s.b); { |
| i := bytes.IndexByte(s.b[s.next:], '-') |
| if i == -1 { |
| s.end = len(s.b) |
| s.next = len(s.b) |
| i = s.end - s.start |
| } else { |
| s.end = s.next + i |
| s.next = s.end + 1 |
| } |
| token := s.b[s.start:s.end] |
| if i < 1 || i > 8 || !isAlphaNum(token) { |
| s.gobble(errSyntax) |
| continue |
| } |
| s.token = token |
| return end |
| } |
| if n := len(s.b); n > 0 && s.b[n-1] == '-' { |
| s.setError(errSyntax) |
| s.b = s.b[:len(s.b)-1] |
| } |
| s.done = true |
| return end |
| } |
| |
| // acceptMinSize parses multiple tokens of the given size or greater. |
| // It returns the end position of the last token consumed. |
| func (s *scanner) acceptMinSize(min int) (end int) { |
| end = s.end |
| s.scan() |
| for ; len(s.token) >= min; s.scan() { |
| end = s.end |
| } |
| return end |
| } |
| |
| // Parse parses the given BCP 47 string and returns a valid Tag. If parsing |
| // failed it returns an error and any part of the tag that could be parsed. |
| // If parsing succeeded but an unknown value was found, it returns |
| // ValueError. The Tag returned in this case is just stripped of the unknown |
| // value. All other values are preserved. It accepts tags in the BCP 47 format |
| // and extensions to this standard defined in |
| // http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. |
| // The resulting tag is canonicalized using the default canonicalization type. |
| func Parse(s string) (t Tag, err error) { |
| return Default.Parse(s) |
| } |
| |
| // Parse parses the given BCP 47 string and returns a valid Tag. If parsing |
| // failed it returns an error and any part of the tag that could be parsed. |
| // If parsing succeeded but an unknown value was found, it returns |
| // ValueError. The Tag returned in this case is just stripped of the unknown |
| // value. All other values are preserved. It accepts tags in the BCP 47 format |
| // and extensions to this standard defined in |
| // http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. |
| // The resulting tag is canonicalized using the the canonicalization type c. |
| func (c CanonType) Parse(s string) (t Tag, err error) { |
| // TODO: consider supporting old-style locale key-value pairs. |
| if s == "" { |
| return und, errSyntax |
| } |
| if len(s) <= maxAltTaglen { |
| b := [maxAltTaglen]byte{} |
| for i, c := range s { |
| // Generating invalid UTF-8 is okay as it won't match. |
| if 'A' <= c && c <= 'Z' { |
| c += 'a' - 'A' |
| } else if c == '_' { |
| c = '-' |
| } |
| b[i] = byte(c) |
| } |
| if t, ok := grandfathered(b); ok { |
| return t, nil |
| } |
| } |
| scan := makeScannerString(s) |
| t, err = parse(&scan, s) |
| t, changed := t.canonicalize(c) |
| if changed { |
| t.remakeString() |
| } |
| return t, err |
| } |
| |
| func parse(scan *scanner, s string) (t Tag, err error) { |
| t = und |
| var end int |
| if n := len(scan.token); n <= 1 { |
| scan.toLower(0, len(scan.b)) |
| if n == 0 || scan.token[0] != 'x' { |
| return t, errSyntax |
| } |
| end = parseExtensions(scan) |
| } else if n >= 4 { |
| return und, errSyntax |
| } else { // the usual case |
| t, end = parseTag(scan) |
| if n := len(scan.token); n == 1 { |
| t.pExt = uint16(end) |
| end = parseExtensions(scan) |
| } else if end < len(scan.b) { |
| scan.setError(errSyntax) |
| scan.b = scan.b[:end] |
| } |
| } |
| if int(t.pVariant) < len(scan.b) { |
| if end < len(s) { |
| s = s[:end] |
| } |
| if len(s) > 0 && tag.Compare(s, scan.b) == 0 { |
| t.str = s |
| } else { |
| t.str = string(scan.b) |
| } |
| } else { |
| t.pVariant, t.pExt = 0, 0 |
| } |
| return t, scan.err |
| } |
| |
| // parseTag parses language, script, region and variants. |
| // It returns a Tag and the end position in the input that was parsed. |
| func parseTag(scan *scanner) (t Tag, end int) { |
| var e error |
| // TODO: set an error if an unknown lang, script or region is encountered. |
| t.lang, e = getLangID(scan.token) |
| scan.setError(e) |
| scan.replace(t.lang.String()) |
| langStart := scan.start |
| end = scan.scan() |
| for len(scan.token) == 3 && isAlpha(scan.token[0]) { |
| // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent |
| // to a tag of the form <extlang>. |
| lang, e := getLangID(scan.token) |
| if lang != 0 { |
| t.lang = lang |
| copy(scan.b[langStart:], lang.String()) |
| scan.b[langStart+3] = '-' |
| scan.start = langStart + 4 |
| } |
| scan.gobble(e) |
| end = scan.scan() |
| } |
| if len(scan.token) == 4 && isAlpha(scan.token[0]) { |
| t.script, e = getScriptID(script, scan.token) |
| if t.script == 0 { |
| scan.gobble(e) |
| } |
| end = scan.scan() |
| } |
| if n := len(scan.token); n >= 2 && n <= 3 { |
| t.region, e = getRegionID(scan.token) |
| if t.region == 0 { |
| scan.gobble(e) |
| } else { |
| scan.replace(t.region.String()) |
| } |
| end = scan.scan() |
| } |
| scan.toLower(scan.start, len(scan.b)) |
| t.pVariant = byte(end) |
| end = parseVariants(scan, end, t) |
| t.pExt = uint16(end) |
| return t, end |
| } |
| |
| var separator = []byte{'-'} |
| |
| // parseVariants scans tokens as long as each token is a valid variant string. |
| // Duplicate variants are removed. |
| func parseVariants(scan *scanner, end int, t Tag) int { |
| start := scan.start |
| varIDBuf := [4]uint8{} |
| variantBuf := [4][]byte{} |
| varID := varIDBuf[:0] |
| variant := variantBuf[:0] |
| last := -1 |
| needSort := false |
| for ; len(scan.token) >= 4; scan.scan() { |
| // TODO: measure the impact of needing this conversion and redesign |
| // the data structure if there is an issue. |
| v, ok := variantIndex[string(scan.token)] |
| if !ok { |
| // unknown variant |
| // TODO: allow user-defined variants? |
| scan.gobble(mkErrInvalid(scan.token)) |
| continue |
| } |
| varID = append(varID, v) |
| variant = append(variant, scan.token) |
| if !needSort { |
| if last < int(v) { |
| last = int(v) |
| } else { |
| needSort = true |
| // There is no legal combinations of more than 7 variants |
| // (and this is by no means a useful sequence). |
| const maxVariants = 8 |
| if len(varID) > maxVariants { |
| break |
| } |
| } |
| } |
| end = scan.end |
| } |
| if needSort { |
| sort.Sort(variantsSort{varID, variant}) |
| k, l := 0, -1 |
| for i, v := range varID { |
| w := int(v) |
| if l == w { |
| // Remove duplicates. |
| continue |
| } |
| varID[k] = varID[i] |
| variant[k] = variant[i] |
| k++ |
| l = w |
| } |
| if str := bytes.Join(variant[:k], separator); len(str) == 0 { |
| end = start - 1 |
| } else { |
| scan.resizeRange(start, end, len(str)) |
| copy(scan.b[scan.start:], str) |
| end = scan.end |
| } |
| } |
| return end |
| } |
| |
| type variantsSort struct { |
| i []uint8 |
| v [][]byte |
| } |
| |
| func (s variantsSort) Len() int { |
| return len(s.i) |
| } |
| |
| func (s variantsSort) Swap(i, j int) { |
| s.i[i], s.i[j] = s.i[j], s.i[i] |
| s.v[i], s.v[j] = s.v[j], s.v[i] |
| } |
| |
| func (s variantsSort) Less(i, j int) bool { |
| return s.i[i] < s.i[j] |
| } |
| |
| type bytesSort [][]byte |
| |
| func (b bytesSort) Len() int { |
| return len(b) |
| } |
| |
| func (b bytesSort) Swap(i, j int) { |
| b[i], b[j] = b[j], b[i] |
| } |
| |
| func (b bytesSort) Less(i, j int) bool { |
| return bytes.Compare(b[i], b[j]) == -1 |
| } |
| |
| // parseExtensions parses and normalizes the extensions in the buffer. |
| // It returns the last position of scan.b that is part of any extension. |
| // It also trims scan.b to remove excess parts accordingly. |
| func parseExtensions(scan *scanner) int { |
| start := scan.start |
| exts := [][]byte{} |
| private := []byte{} |
| end := scan.end |
| for len(scan.token) == 1 { |
| extStart := scan.start |
| ext := scan.token[0] |
| end = parseExtension(scan) |
| extension := scan.b[extStart:end] |
| if len(extension) < 3 || (ext != 'x' && len(extension) < 4) { |
| scan.setError(errSyntax) |
| end = extStart |
| continue |
| } else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) { |
| scan.b = scan.b[:end] |
| return end |
| } else if ext == 'x' { |
| private = extension |
| break |
| } |
| exts = append(exts, extension) |
| } |
| sort.Sort(bytesSort(exts)) |
| if len(private) > 0 { |
| exts = append(exts, private) |
| } |
| scan.b = scan.b[:start] |
| if len(exts) > 0 { |
| scan.b = append(scan.b, bytes.Join(exts, separator)...) |
| } else if start > 0 { |
| // Strip trailing '-'. |
| scan.b = scan.b[:start-1] |
| } |
| return end |
| } |
| |
| // parseExtension parses a single extension and returns the position of |
| // the extension end. |
| func parseExtension(scan *scanner) int { |
| start, end := scan.start, scan.end |
| switch scan.token[0] { |
| case 'u': |
| attrStart := end |
| scan.scan() |
| for last := []byte{}; len(scan.token) > 2; scan.scan() { |
| if bytes.Compare(scan.token, last) != -1 { |
| // Attributes are unsorted. Start over from scratch. |
| p := attrStart + 1 |
| scan.next = p |
| attrs := [][]byte{} |
| for scan.scan(); len(scan.token) > 2; scan.scan() { |
| attrs = append(attrs, scan.token) |
| end = scan.end |
| } |
| sort.Sort(bytesSort(attrs)) |
| copy(scan.b[p:], bytes.Join(attrs, separator)) |
| break |
| } |
| last = scan.token |
| end = scan.end |
| } |
| var last, key []byte |
| for attrEnd := end; len(scan.token) == 2; last = key { |
| key = scan.token |
| keyEnd := scan.end |
| end = scan.acceptMinSize(3) |
| // TODO: check key value validity |
| if keyEnd == end || bytes.Compare(key, last) != 1 { |
| // We have an invalid key or the keys are not sorted. |
| // Start scanning keys from scratch and reorder. |
| p := attrEnd + 1 |
| scan.next = p |
| keys := [][]byte{} |
| for scan.scan(); len(scan.token) == 2; { |
| keyStart, keyEnd := scan.start, scan.end |
| end = scan.acceptMinSize(3) |
| if keyEnd != end { |
| keys = append(keys, scan.b[keyStart:end]) |
| } else { |
| scan.setError(errSyntax) |
| end = keyStart |
| } |
| } |
| sort.Sort(bytesSort(keys)) |
| reordered := bytes.Join(keys, separator) |
| if e := p + len(reordered); e < end { |
| scan.deleteRange(e, end) |
| end = e |
| } |
| copy(scan.b[p:], bytes.Join(keys, separator)) |
| break |
| } |
| } |
| case 't': |
| scan.scan() |
| if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { |
| _, end = parseTag(scan) |
| scan.toLower(start, end) |
| } |
| for len(scan.token) == 2 && !isAlpha(scan.token[1]) { |
| end = scan.acceptMinSize(3) |
| } |
| case 'x': |
| end = scan.acceptMinSize(1) |
| default: |
| end = scan.acceptMinSize(2) |
| } |
| return end |
| } |
| |
| // Compose creates a Tag from individual parts, which may be of type Tag, Base, |
| // Script, Region, Variant, []Variant, Extension, []Extension or error. If a |
| // Base, Script or Region or slice of type Variant or Extension is passed more |
| // than once, the latter will overwrite the former. Variants and Extensions are |
| // accumulated, but if two extensions of the same type are passed, the latter |
| // will replace the former. A Tag overwrites all former values and typically |
| // only makes sense as the first argument. The resulting tag is returned after |
| // canonicalizing using the Default CanonType. If one or more errors are |
| // encountered, one of the errors is returned. |
| func Compose(part ...interface{}) (t Tag, err error) { |
| return Default.Compose(part...) |
| } |
| |
| // Compose creates a Tag from individual parts, which may be of type Tag, Base, |
| // Script, Region, Variant, []Variant, Extension, []Extension or error. If a |
| // Base, Script or Region or slice of type Variant or Extension is passed more |
| // than once, the latter will overwrite the former. Variants and Extensions are |
| // accumulated, but if two extensions of the same type are passed, the latter |
| // will replace the former. A Tag overwrites all former values and typically |
| // only makes sense as the first argument. The resulting tag is returned after |
| // canonicalizing using CanonType c. If one or more errors are encountered, |
| // one of the errors is returned. |
| func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { |
| var b builder |
| if err = b.update(part...); err != nil { |
| return und, err |
| } |
| t, _ = b.tag.canonicalize(c) |
| |
| if len(b.ext) > 0 || len(b.variant) > 0 { |
| sort.Sort(sortVariant(b.variant)) |
| sort.Strings(b.ext) |
| if b.private != "" { |
| b.ext = append(b.ext, b.private) |
| } |
| n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...) |
| buf := make([]byte, n) |
| p := t.genCoreBytes(buf) |
| t.pVariant = byte(p) |
| p += appendTokens(buf[p:], b.variant...) |
| t.pExt = uint16(p) |
| p += appendTokens(buf[p:], b.ext...) |
| t.str = string(buf[:p]) |
| } else if b.private != "" { |
| t.str = b.private |
| t.remakeString() |
| } |
| return |
| } |
| |
| type builder struct { |
| tag Tag |
| |
| private string // the x extension |
| ext []string |
| variant []string |
| |
| err error |
| } |
| |
| func (b *builder) addExt(e string) { |
| if e == "" { |
| } else if e[0] == 'x' { |
| b.private = e |
| } else { |
| b.ext = append(b.ext, e) |
| } |
| } |
| |
| var errInvalidArgument = errors.New("invalid Extension or Variant") |
| |
| func (b *builder) update(part ...interface{}) (err error) { |
| replace := func(l *[]string, s string, eq func(a, b string) bool) bool { |
| if s == "" { |
| b.err = errInvalidArgument |
| return true |
| } |
| for i, v := range *l { |
| if eq(v, s) { |
| (*l)[i] = s |
| return true |
| } |
| } |
| return false |
| } |
| for _, x := range part { |
| switch v := x.(type) { |
| case Tag: |
| b.tag.lang = v.lang |
| b.tag.region = v.region |
| b.tag.script = v.script |
| if v.str != "" { |
| b.variant = nil |
| for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; { |
| x, s = nextToken(s) |
| b.variant = append(b.variant, x) |
| } |
| b.ext, b.private = nil, "" |
| for i, e := int(v.pExt), ""; i < len(v.str); { |
| i, e = getExtension(v.str, i) |
| b.addExt(e) |
| } |
| } |
| case Base: |
| b.tag.lang = v.langID |
| case Script: |
| b.tag.script = v.scriptID |
| case Region: |
| b.tag.region = v.regionID |
| case Variant: |
| if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) { |
| b.variant = append(b.variant, v.variant) |
| } |
| case Extension: |
| if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) { |
| b.addExt(v.s) |
| } |
| case []Variant: |
| b.variant = nil |
| for _, x := range v { |
| b.update(x) |
| } |
| case []Extension: |
| b.ext, b.private = nil, "" |
| for _, e := range v { |
| b.update(e) |
| } |
| // TODO: support parsing of raw strings based on morphology or just extensions? |
| case error: |
| err = v |
| } |
| } |
| return |
| } |
| |
| func tokenLen(token ...string) (n int) { |
| for _, t := range token { |
| n += len(t) + 1 |
| } |
| return |
| } |
| |
| func appendTokens(b []byte, token ...string) int { |
| p := 0 |
| for _, t := range token { |
| b[p] = '-' |
| copy(b[p+1:], t) |
| p += 1 + len(t) |
| } |
| return p |
| } |
| |
| type sortVariant []string |
| |
| func (s sortVariant) Len() int { |
| return len(s) |
| } |
| |
| func (s sortVariant) Swap(i, j int) { |
| s[j], s[i] = s[i], s[j] |
| } |
| |
| func (s sortVariant) Less(i, j int) bool { |
| return variantIndex[s[i]] < variantIndex[s[j]] |
| } |
| |
| func findExt(list []string, x byte) int { |
| for i, e := range list { |
| if e[0] == x { |
| return i |
| } |
| } |
| return -1 |
| } |
| |
| // getExtension returns the name, body and end position of the extension. |
| func getExtension(s string, p int) (end int, ext string) { |
| if s[p] == '-' { |
| p++ |
| } |
| if s[p] == 'x' { |
| return len(s), s[p:] |
| } |
| end = nextExtension(s, p) |
| return end, s[p:end] |
| } |
| |
| // nextExtension finds the next extension within the string, searching |
| // for the -<char>- pattern from position p. |
| // In the fast majority of cases, language tags will have at most |
| // one extension and extensions tend to be small. |
| func nextExtension(s string, p int) int { |
| for n := len(s) - 3; p < n; { |
| if s[p] == '-' { |
| if s[p+2] == '-' { |
| return p |
| } |
| p += 3 |
| } else { |
| p++ |
| } |
| } |
| return len(s) |
| } |
| |
| var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") |
| |
| // ParseAcceptLanguage parses the contents of an Accept-Language header as |
| // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and |
| // a list of corresponding quality weights. It is more permissive than RFC 2616 |
| // and may return non-nil slices even if the input is not valid. |
| // The Tags will be sorted by highest weight first and then by first occurrence. |
| // Tags with a weight of zero will be dropped. An error will be returned if the |
| // input could not be parsed. |
| func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { |
| var entry string |
| for s != "" { |
| if entry, s = split(s, ','); entry == "" { |
| continue |
| } |
| |
| entry, weight := split(entry, ';') |
| |
| // Scan the language. |
| t, err := Parse(entry) |
| if err != nil { |
| id, ok := acceptFallback[entry] |
| if !ok { |
| return nil, nil, err |
| } |
| t = Tag{lang: id} |
| } |
| |
| // Scan the optional weight. |
| w := 1.0 |
| if weight != "" { |
| weight = consume(weight, 'q') |
| weight = consume(weight, '=') |
| // consume returns the empty string when a token could not be |
| // consumed, resulting in an error for ParseFloat. |
| if w, err = strconv.ParseFloat(weight, 32); err != nil { |
| return nil, nil, errInvalidWeight |
| } |
| // Drop tags with a quality weight of 0. |
| if w <= 0 { |
| continue |
| } |
| } |
| |
| tag = append(tag, t) |
| q = append(q, float32(w)) |
| } |
| sortStable(&tagSort{tag, q}) |
| return tag, q, nil |
| } |
| |
| // consume removes a leading token c from s and returns the result or the empty |
| // string if there is no such token. |
| func consume(s string, c byte) string { |
| if s == "" || s[0] != c { |
| return "" |
| } |
| return strings.TrimSpace(s[1:]) |
| } |
| |
| func split(s string, c byte) (head, tail string) { |
| if i := strings.IndexByte(s, c); i >= 0 { |
| return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]) |
| } |
| return strings.TrimSpace(s), "" |
| } |
| |
| // Add hack mapping to deal with a small number of cases that that occur |
| // in Accept-Language (with reasonable frequency). |
| var acceptFallback = map[string]langID{ |
| "english": _en, |
| "deutsch": _de, |
| "italian": _it, |
| "french": _fr, |
| "*": _mul, // defined in the spec to match all languages. |
| } |
| |
| type tagSort struct { |
| tag []Tag |
| q []float32 |
| } |
| |
| func (s *tagSort) Len() int { |
| return len(s.q) |
| } |
| |
| func (s *tagSort) Less(i, j int) bool { |
| return s.q[i] > s.q[j] |
| } |
| |
| func (s *tagSort) Swap(i, j int) { |
| s.tag[i], s.tag[j] = s.tag[j], s.tag[i] |
| s.q[i], s.q[j] = s.q[j], s.q[i] |
| } |