blob: 5ca9bccac52be980884d74327bb4d209c517f391 [file] [log] [blame]
William Kurkianea869482019-04-09 15:16:11 -04001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7package main
8
9// This file generates derivative tables based on the language package itself.
10
11import (
12 "bytes"
13 "flag"
14 "fmt"
15 "io/ioutil"
16 "log"
17 "reflect"
18 "sort"
19 "strings"
20
21 "golang.org/x/text/internal/gen"
22 "golang.org/x/text/language"
23 "golang.org/x/text/unicode/cldr"
24)
25
26var (
27 test = flag.Bool("test", false,
28 "test existing tables; can be used to compare web data with package data.")
29
30 draft = flag.String("draft",
31 "contributed",
32 `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
33)
34
35func main() {
36 gen.Init()
37
38 // Read the CLDR zip file.
39 r := gen.OpenCLDRCoreZip()
40 defer r.Close()
41
42 d := &cldr.Decoder{}
43 data, err := d.DecodeZip(r)
44 if err != nil {
45 log.Fatalf("DecodeZip: %v", err)
46 }
47
48 w := gen.NewCodeWriter()
49 defer func() {
50 buf := &bytes.Buffer{}
51
52 if _, err = w.WriteGo(buf, "language", ""); err != nil {
53 log.Fatalf("Error formatting file index.go: %v", err)
54 }
55
56 // Since we're generating a table for our own package we need to rewrite
57 // doing the equivalent of go fmt -r 'language.b -> b'. Using
58 // bytes.Replace will do.
59 out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
60 if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
61 log.Fatalf("Could not create file index.go: %v", err)
62 }
63 }()
64
65 m := map[language.Tag]bool{}
66 for _, lang := range data.Locales() {
67 // We include all locales unconditionally to be consistent with en_US.
68 // We want en_US, even though it has no data associated with it.
69
70 // TODO: put any of the languages for which no data exists at the end
71 // of the index. This allows all components based on ICU to use that
72 // as the cutoff point.
73 // if x := data.RawLDML(lang); false ||
74 // x.LocaleDisplayNames != nil ||
75 // x.Characters != nil ||
76 // x.Delimiters != nil ||
77 // x.Measurement != nil ||
78 // x.Dates != nil ||
79 // x.Numbers != nil ||
80 // x.Units != nil ||
81 // x.ListPatterns != nil ||
82 // x.Collations != nil ||
83 // x.Segmentations != nil ||
84 // x.Rbnf != nil ||
85 // x.Annotations != nil ||
86 // x.Metadata != nil {
87
88 // TODO: support POSIX natively, albeit non-standard.
89 tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
90 m[tag] = true
91 // }
92 }
93 // Include locales for plural rules, which uses a different structure.
94 for _, plurals := range data.Supplemental().Plurals {
95 for _, rules := range plurals.PluralRules {
96 for _, lang := range strings.Split(rules.Locales, " ") {
97 m[language.Make(lang)] = true
98 }
99 }
100 }
101
102 var core, special []language.Tag
103
104 for t := range m {
105 if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
106 log.Fatalf("Unexpected extension %v in %v", x, t)
107 }
108 if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
109 core = append(core, t)
110 } else {
111 special = append(special, t)
112 }
113 }
114
115 w.WriteComment(`
116 NumCompactTags is the number of common tags. The maximum tag is
117 NumCompactTags-1.`)
118 w.WriteConst("NumCompactTags", len(core)+len(special))
119
120 sort.Sort(byAlpha(special))
121 w.WriteVar("specialTags", special)
122
123 // TODO: order by frequency?
124 sort.Sort(byAlpha(core))
125
126 // Size computations are just an estimate.
127 w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
128 w.Size += len(core) * 6 // size of uint32 and uint16
129
130 fmt.Fprintln(w)
131 fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
132 fmt.Fprintln(w, "0x0: 0, // und")
133 i := len(special) + 1 // Und and special tags already written.
134 for _, t := range core {
135 if t == language.Und {
136 continue
137 }
138 fmt.Fprint(w.Hash, t, i)
139 b, s, r := t.Raw()
140 fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
141 getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
142 getIndex(s, 2),
143 getIndex(r, 3),
144 i, t)
145 i++
146 }
147 fmt.Fprintln(w, "}")
148}
149
150// getIndex prints the subtag type and extracts its index of size nibble.
151// If the index is less than n nibbles, the result is prefixed with 0s.
152func getIndex(x interface{}, n int) string {
153 s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
154 s = s[strings.Index(s, "0x")+2 : len(s)-1]
155 return strings.Repeat("0", n-len(s)) + s
156}
157
158type byAlpha []language.Tag
159
160func (a byAlpha) Len() int { return len(a) }
161func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
162func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }