khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // +build ignore |
| 6 | |
| 7 | package main |
| 8 | |
| 9 | // This file generates derivative tables based on the language package itself. |
| 10 | |
| 11 | import ( |
| 12 | "bytes" |
| 13 | "flag" |
| 14 | "fmt" |
| 15 | "io/ioutil" |
| 16 | "log" |
| 17 | "reflect" |
| 18 | "sort" |
| 19 | "strings" |
| 20 | |
| 21 | "golang.org/x/text/internal/gen" |
| 22 | "golang.org/x/text/language" |
| 23 | "golang.org/x/text/unicode/cldr" |
| 24 | ) |
| 25 | |
| 26 | var ( |
| 27 | test = flag.Bool("test", false, |
| 28 | "test existing tables; can be used to compare web data with package data.") |
| 29 | |
| 30 | draft = flag.String("draft", |
| 31 | "contributed", |
| 32 | `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) |
| 33 | ) |
| 34 | |
| 35 | func main() { |
| 36 | gen.Init() |
| 37 | |
| 38 | // Read the CLDR zip file. |
| 39 | r := gen.OpenCLDRCoreZip() |
| 40 | defer r.Close() |
| 41 | |
| 42 | d := &cldr.Decoder{} |
| 43 | data, err := d.DecodeZip(r) |
| 44 | if err != nil { |
| 45 | log.Fatalf("DecodeZip: %v", err) |
| 46 | } |
| 47 | |
| 48 | w := gen.NewCodeWriter() |
| 49 | defer func() { |
| 50 | buf := &bytes.Buffer{} |
| 51 | |
| 52 | if _, err = w.WriteGo(buf, "language", ""); err != nil { |
| 53 | log.Fatalf("Error formatting file index.go: %v", err) |
| 54 | } |
| 55 | |
| 56 | // Since we're generating a table for our own package we need to rewrite |
| 57 | // doing the equivalent of go fmt -r 'language.b -> b'. Using |
| 58 | // bytes.Replace will do. |
| 59 | out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1) |
| 60 | if err := ioutil.WriteFile("index.go", out, 0600); err != nil { |
| 61 | log.Fatalf("Could not create file index.go: %v", err) |
| 62 | } |
| 63 | }() |
| 64 | |
| 65 | m := map[language.Tag]bool{} |
| 66 | for _, lang := range data.Locales() { |
| 67 | // We include all locales unconditionally to be consistent with en_US. |
| 68 | // We want en_US, even though it has no data associated with it. |
| 69 | |
| 70 | // TODO: put any of the languages for which no data exists at the end |
| 71 | // of the index. This allows all components based on ICU to use that |
| 72 | // as the cutoff point. |
| 73 | // if x := data.RawLDML(lang); false || |
| 74 | // x.LocaleDisplayNames != nil || |
| 75 | // x.Characters != nil || |
| 76 | // x.Delimiters != nil || |
| 77 | // x.Measurement != nil || |
| 78 | // x.Dates != nil || |
| 79 | // x.Numbers != nil || |
| 80 | // x.Units != nil || |
| 81 | // x.ListPatterns != nil || |
| 82 | // x.Collations != nil || |
| 83 | // x.Segmentations != nil || |
| 84 | // x.Rbnf != nil || |
| 85 | // x.Annotations != nil || |
| 86 | // x.Metadata != nil { |
| 87 | |
| 88 | // TODO: support POSIX natively, albeit non-standard. |
| 89 | tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) |
| 90 | m[tag] = true |
| 91 | // } |
| 92 | } |
| 93 | // Include locales for plural rules, which uses a different structure. |
| 94 | for _, plurals := range data.Supplemental().Plurals { |
| 95 | for _, rules := range plurals.PluralRules { |
| 96 | for _, lang := range strings.Split(rules.Locales, " ") { |
| 97 | m[language.Make(lang)] = true |
| 98 | } |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | var core, special []language.Tag |
| 103 | |
| 104 | for t := range m { |
| 105 | if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { |
| 106 | log.Fatalf("Unexpected extension %v in %v", x, t) |
| 107 | } |
| 108 | if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { |
| 109 | core = append(core, t) |
| 110 | } else { |
| 111 | special = append(special, t) |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | w.WriteComment(` |
| 116 | NumCompactTags is the number of common tags. The maximum tag is |
| 117 | NumCompactTags-1.`) |
| 118 | w.WriteConst("NumCompactTags", len(core)+len(special)) |
| 119 | |
| 120 | sort.Sort(byAlpha(special)) |
| 121 | w.WriteVar("specialTags", special) |
| 122 | |
| 123 | // TODO: order by frequency? |
| 124 | sort.Sort(byAlpha(core)) |
| 125 | |
| 126 | // Size computations are just an estimate. |
| 127 | w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size()) |
| 128 | w.Size += len(core) * 6 // size of uint32 and uint16 |
| 129 | |
| 130 | fmt.Fprintln(w) |
| 131 | fmt.Fprintln(w, "var coreTags = map[uint32]uint16{") |
| 132 | fmt.Fprintln(w, "0x0: 0, // und") |
| 133 | i := len(special) + 1 // Und and special tags already written. |
| 134 | for _, t := range core { |
| 135 | if t == language.Und { |
| 136 | continue |
| 137 | } |
| 138 | fmt.Fprint(w.Hash, t, i) |
| 139 | b, s, r := t.Raw() |
| 140 | fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n", |
| 141 | getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number |
| 142 | getIndex(s, 2), |
| 143 | getIndex(r, 3), |
| 144 | i, t) |
| 145 | i++ |
| 146 | } |
| 147 | fmt.Fprintln(w, "}") |
| 148 | } |
| 149 | |
| 150 | // getIndex prints the subtag type and extracts its index of size nibble. |
| 151 | // If the index is less than n nibbles, the result is prefixed with 0s. |
| 152 | func getIndex(x interface{}, n int) string { |
| 153 | s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00} |
| 154 | s = s[strings.Index(s, "0x")+2 : len(s)-1] |
| 155 | return strings.Repeat("0", n-len(s)) + s |
| 156 | } |
| 157 | |
| 158 | type byAlpha []language.Tag |
| 159 | |
| 160 | func (a byAlpha) Len() int { return len(a) } |
| 161 | func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
| 162 | func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() } |