William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package colltab contains functionality related to collation tables. |
| 6 | // It is only to be used by the collate and search packages. |
| 7 | package colltab // import "golang.org/x/text/internal/colltab" |
| 8 | |
| 9 | import ( |
| 10 | "sort" |
| 11 | |
| 12 | "golang.org/x/text/language" |
| 13 | ) |
| 14 | |
| 15 | // MatchLang finds the index of t in tags, using a matching algorithm used for |
| 16 | // collation and search. tags[0] must be language.Und, the remaining tags should |
| 17 | // be sorted alphabetically. |
| 18 | // |
| 19 | // Language matching for collation and search is different from the matching |
| 20 | // defined by language.Matcher: the (inferred) base language must be an exact |
| 21 | // match for the relevant fields. For example, "gsw" should not match "de". |
| 22 | // Also the parent relation is different, as a parent may have a different |
| 23 | // script. So usually the parent of zh-Hant is und, whereas for MatchLang it is |
| 24 | // zh. |
| 25 | func MatchLang(t language.Tag, tags []language.Tag) int { |
| 26 | // Canonicalize the values, including collapsing macro languages. |
| 27 | t, _ = language.All.Canonicalize(t) |
| 28 | |
| 29 | base, conf := t.Base() |
| 30 | // Estimate the base language, but only use high-confidence values. |
| 31 | if conf < language.High { |
| 32 | // The root locale supports "search" and "standard". We assume that any |
| 33 | // implementation will only use one of both. |
| 34 | return 0 |
| 35 | } |
| 36 | |
| 37 | // Maximize base and script and normalize the tag. |
| 38 | if _, s, r := t.Raw(); (r != language.Region{}) { |
| 39 | p, _ := language.Raw.Compose(base, s, r) |
| 40 | // Taking the parent forces the script to be maximized. |
| 41 | p = p.Parent() |
| 42 | // Add back region and extensions. |
| 43 | t, _ = language.Raw.Compose(p, r, t.Extensions()) |
| 44 | } else { |
| 45 | // Set the maximized base language. |
| 46 | t, _ = language.Raw.Compose(base, s, t.Extensions()) |
| 47 | } |
| 48 | |
| 49 | // Find start index of the language tag. |
| 50 | start := 1 + sort.Search(len(tags)-1, func(i int) bool { |
| 51 | b, _, _ := tags[i+1].Raw() |
| 52 | return base.String() <= b.String() |
| 53 | }) |
| 54 | if start < len(tags) { |
| 55 | if b, _, _ := tags[start].Raw(); b != base { |
| 56 | return 0 |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | // Besides the base language, script and region, only the collation type and |
| 61 | // the custom variant defined in the 'u' extension are used to distinguish a |
| 62 | // locale. |
| 63 | // Strip all variants and extensions and add back the custom variant. |
| 64 | tdef, _ := language.Raw.Compose(t.Raw()) |
| 65 | tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va")) |
| 66 | |
| 67 | // First search for a specialized collation type, if present. |
| 68 | try := []language.Tag{tdef} |
| 69 | if co := t.TypeForKey("co"); co != "" { |
| 70 | tco, _ := tdef.SetTypeForKey("co", co) |
| 71 | try = []language.Tag{tco, tdef} |
| 72 | } |
| 73 | |
| 74 | for _, tx := range try { |
| 75 | for ; tx != language.Und; tx = parent(tx) { |
| 76 | for i, t := range tags[start:] { |
| 77 | if b, _, _ := t.Raw(); b != base { |
| 78 | break |
| 79 | } |
| 80 | if tx == t { |
| 81 | return start + i |
| 82 | } |
| 83 | } |
| 84 | } |
| 85 | } |
| 86 | return 0 |
| 87 | } |
| 88 | |
| 89 | // parent computes the structural parent. This means inheritance may change |
| 90 | // script. So, unlike the CLDR parent, parent(zh-Hant) == zh. |
| 91 | func parent(t language.Tag) language.Tag { |
| 92 | if t.TypeForKey("va") != "" { |
| 93 | t, _ = t.SetTypeForKey("va", "") |
| 94 | return t |
| 95 | } |
| 96 | result := language.Und |
| 97 | if b, s, r := t.Raw(); (r != language.Region{}) { |
| 98 | result, _ = language.Raw.Compose(b, s, t.Extensions()) |
| 99 | } else if (s != language.Script{}) { |
| 100 | result, _ = language.Raw.Compose(b, t.Extensions()) |
| 101 | } else if (b != language.Base{}) { |
| 102 | result, _ = language.Raw.Compose(t.Extensions()) |
| 103 | } |
| 104 | return result |
| 105 | } |