Blame - vendor/golang.org/x/text/internal/language/gen.go - ofagent-go

blob: cdcc7febcb361116d195bb44fb2afff4734daa2e [file] [log] [blame]

Don Newton	98fd881	2019-09-23 15:15:02 -0400	[diff] [blame^]	1	// Copyright 2013 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// +build ignore
				6
				7	// Language tag table generator.
				8	// Data read from the web.
				9
				10	package main
				11
				12	import (
				13	"bufio"
				14	"flag"
				15	"fmt"
				16	"io"
				17	"io/ioutil"
				18	"log"
				19	"math"
				20	"reflect"
				21	"regexp"
				22	"sort"
				23	"strconv"
				24	"strings"
				25
				26	"golang.org/x/text/internal/gen"
				27	"golang.org/x/text/internal/tag"
				28	"golang.org/x/text/unicode/cldr"
				29	)
				30
				31	var (
				32	test = flag.Bool("test",
				33	false,
				34	"test existing tables; can be used to compare web data with package data.")
				35	outputFile = flag.String("output",
				36	"tables.go",
				37	"output file for generated tables")
				38	)
				39
				40	var comment = []string{
				41	`
				42	lang holds an alphabetically sorted list of ISO-639 language identifiers.
				43	All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag.
				44	For 2-byte language identifiers, the two successive bytes have the following meaning:
				45	- if the first letter of the 2- and 3-letter ISO codes are the same:
				46	the second and third letter of the 3-letter ISO code.
				47	- otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
				48	For 3-byte language identifiers the 4th byte is 0.`,
				49	`
				50	langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
				51	in lookup tables. The language ids for these language codes are derived directly
				52	from the letters and are not consecutive.`,
				53	`
				54	altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
				55	to 2-letter language codes that cannot be derived using the method described above.
				56	Each 3-letter code is followed by its 1-byte langID.`,
				57	`
				58	altLangIndex is used to convert indexes in altLangISO3 to langIDs.`,
				59	`
				60	AliasMap maps langIDs to their suggested replacements.`,
				61	`
				62	script is an alphabetically sorted list of ISO 15924 codes. The index
				63	of the script in the string, divided by 4, is the internal scriptID.`,
				64	`
				65	isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
				66	for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
				67	the UN.M49 codes used for groups.)`,
				68	`
				69	regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
				70	Each 2-letter codes is followed by two bytes with the following meaning:
				71	- [A-Z}{2}: the first letter of the 2-letter code plus these two
				72	letters form the 3-letter ISO code.
				73	- 0, n: index into altRegionISO3.`,
				74	`
				75	regionTypes defines the status of a region for various standards.`,
				76	`
				77	m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
				78	codes indicating collections of regions.`,
				79	`
				80	m49Index gives indexes into fromM49 based on the three most significant bits
				81	of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in
				82	fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]]
				83	for an entry where the first 7 bits match the 7 lsb of the UN.M49 code.
				84	The region code is stored in the 9 lsb of the indexed value.`,
				85	`
				86	fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`,
				87	`
				88	altRegionISO3 holds a list of 3-letter region codes that cannot be
				89	mapped to 2-letter codes using the default algorithm. This is a short list.`,
				90	`
				91	altRegionIDs holds a list of regionIDs the positions of which match those
				92	of the 3-letter ISO codes in altRegionISO3.`,
				93	`
				94	variantNumSpecialized is the number of specialized variants in variants.`,
				95	`
				96	suppressScript is an index from langID to the dominant script for that language,
				97	if it exists. If a script is given, it should be suppressed from the language tag.`,
				98	`
				99	likelyLang is a lookup table, indexed by langID, for the most likely
				100	scripts and regions given incomplete information. If more entries exist for a
				101	given language, region and script are the index and size respectively
				102	of the list in likelyLangList.`,
				103	`
				104	likelyLangList holds lists info associated with likelyLang.`,
				105	`
				106	likelyRegion is a lookup table, indexed by regionID, for the most likely
				107	languages and scripts given incomplete information. If more entries exist
				108	for a given regionID, lang and script are the index and size respectively
				109	of the list in likelyRegionList.
				110	TODO: exclude containers and user-definable regions from the list.`,
				111	`
				112	likelyRegionList holds lists info associated with likelyRegion.`,
				113	`
				114	likelyScript is a lookup table, indexed by scriptID, for the most likely
				115	languages and regions given a script.`,
				116	`
				117	nRegionGroups is the number of region groups.`,
				118	`
				119	regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
				120	where each set holds all groupings that are directly connected in a region
				121	containment graph.`,
				122	`
				123	regionInclusionBits is an array of bit vectors where every vector represents
				124	a set of region groupings. These sets are used to compute the distance
				125	between two regions for the purpose of language matching.`,
				126	`
				127	regionInclusionNext marks, for each entry in regionInclusionBits, the set of
				128	all groups that are reachable from the groups set in the respective entry.`,
				129	}
				130
				131	// TODO: consider changing some of these structures to tries. This can reduce
				132	// memory, but may increase the need for memory allocations. This could be
				133	// mitigated if we can piggyback on language tags for common cases.
				134
				135	func failOnError(e error) {
				136	if e != nil {
				137	log.Panic(e)
				138	}
				139	}
				140
				141	type setType int
				142
				143	const (
				144	Indexed setType = 1 + iota // all elements must be of same size
				145	Linear
				146	)
				147
				148	type stringSet struct {
				149	s []string
				150	sorted, frozen bool
				151
				152	// We often need to update values after the creation of an index is completed.
				153	// We include a convenience map for keeping track of this.
				154	update map[string]string
				155	typ setType // used for checking.
				156	}
				157
				158	func (ss *stringSet) clone() stringSet {
				159	c := *ss
				160	c.s = append([]string(nil), c.s...)
				161	return c
				162	}
				163
				164	func (ss *stringSet) setType(t setType) {
				165	if ss.typ != t && ss.typ != 0 {
				166	log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
				167	}
				168	}
				169
				170	// parse parses a whitespace-separated string and initializes ss with its
				171	// components.
				172	func (ss *stringSet) parse(s string) {
				173	scan := bufio.NewScanner(strings.NewReader(s))
				174	scan.Split(bufio.ScanWords)
				175	for scan.Scan() {
				176	ss.add(scan.Text())
				177	}
				178	}
				179
				180	func (ss *stringSet) assertChangeable() {
				181	if ss.frozen {
				182	log.Panic("attempt to modify a frozen stringSet")
				183	}
				184	}
				185
				186	func (ss *stringSet) add(s string) {
				187	ss.assertChangeable()
				188	ss.s = append(ss.s, s)
				189	ss.sorted = ss.frozen
				190	}
				191
				192	func (ss *stringSet) freeze() {
				193	ss.compact()
				194	ss.frozen = true
				195	}
				196
				197	func (ss *stringSet) compact() {
				198	if ss.sorted {
				199	return
				200	}
				201	a := ss.s
				202	sort.Strings(a)
				203	k := 0
				204	for i := 1; i < len(a); i++ {
				205	if a[k] != a[i] {
				206	a[k+1] = a[i]
				207	k++
				208	}
				209	}
				210	ss.s = a[:k+1]
				211	ss.sorted = ss.frozen
				212	}
				213
				214	type funcSorter struct {
				215	fn func(a, b string) bool
				216	sort.StringSlice
				217	}
				218
				219	func (s funcSorter) Less(i, j int) bool {
				220	return s.fn(s.StringSlice[i], s.StringSlice[j])
				221	}
				222
				223	func (ss *stringSet) sortFunc(f func(a, b string) bool) {
				224	ss.compact()
				225	sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
				226	}
				227
				228	func (ss *stringSet) remove(s string) {
				229	ss.assertChangeable()
				230	if i, ok := ss.find(s); ok {
				231	copy(ss.s[i:], ss.s[i+1:])
				232	ss.s = ss.s[:len(ss.s)-1]
				233	}
				234	}
				235
				236	func (ss *stringSet) replace(ol, nu string) {
				237	ss.s[ss.index(ol)] = nu
				238	ss.sorted = ss.frozen
				239	}
				240
				241	func (ss *stringSet) index(s string) int {
				242	ss.setType(Indexed)
				243	i, ok := ss.find(s)
				244	if !ok {
				245	if i < len(ss.s) {
				246	log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
				247	}
				248	log.Panicf("find: item %q is not in list", s)
				249
				250	}
				251	return i
				252	}
				253
				254	func (ss *stringSet) find(s string) (int, bool) {
				255	ss.compact()
				256	i := sort.SearchStrings(ss.s, s)
				257	return i, i != len(ss.s) && ss.s[i] == s
				258	}
				259
				260	func (ss *stringSet) slice() []string {
				261	ss.compact()
				262	return ss.s
				263	}
				264
				265	func (ss *stringSet) updateLater(v, key string) {
				266	if ss.update == nil {
				267	ss.update = map[string]string{}
				268	}
				269	ss.update[v] = key
				270	}
				271
				272	// join joins the string and ensures that all entries are of the same length.
				273	func (ss *stringSet) join() string {
				274	ss.setType(Indexed)
				275	n := len(ss.s[0])
				276	for _, s := range ss.s {
				277	if len(s) != n {
				278	log.Panicf("join: not all entries are of the same length: %q", s)
				279	}
				280	}
				281	ss.s = append(ss.s, strings.Repeat("\xff", n))
				282	return strings.Join(ss.s, "")
				283	}
				284
				285	// ianaEntry holds information for an entry in the IANA Language Subtag Repository.
				286	// All types use the same entry.
				287	// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
				288	// fields.
				289	type ianaEntry struct {
				290	typ string
				291	description []string
				292	scope string
				293	added string
				294	preferred string
				295	deprecated string
				296	suppressScript string
				297	macro string
				298	prefix []string
				299	}
				300
				301	type builder struct {
				302	w *gen.CodeWriter
				303	hw io.Writer // MultiWriter for w and w.Hash
				304	data *cldr.CLDR
				305	supp *cldr.SupplementalData
				306
				307	// indices
				308	locale stringSet // common locales
				309	lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data
				310	langNoIndex stringSet // 3-letter ISO codes with no associated data
				311	script stringSet // 4-letter ISO codes
				312	region stringSet // 2-letter ISO or 3-digit UN M49 codes
				313	variant stringSet // 4-8-alphanumeric variant code.
				314
				315	// Region codes that are groups with their corresponding group IDs.
				316	groups map[int]index
				317
				318	// langInfo
				319	registry map[string]*ianaEntry
				320	}
				321
				322	type index uint
				323
				324	func newBuilder(w gen.CodeWriter) builder {
				325	r := gen.OpenCLDRCoreZip()
				326	defer r.Close()
				327	d := &cldr.Decoder{}
				328	data, err := d.DecodeZip(r)
				329	failOnError(err)
				330	b := builder{
				331	w: w,
				332	hw: io.MultiWriter(w, w.Hash),
				333	data: data,
				334	supp: data.Supplemental(),
				335	}
				336	b.parseRegistry()
				337	return &b
				338	}
				339
				340	func (b *builder) parseRegistry() {
				341	r := gen.OpenIANAFile("assignments/language-subtag-registry")
				342	defer r.Close()
				343	b.registry = make(map[string]*ianaEntry)
				344
				345	scan := bufio.NewScanner(r)
				346	scan.Split(bufio.ScanWords)
				347	var record *ianaEntry
				348	for more := scan.Scan(); more; {
				349	key := scan.Text()
				350	more = scan.Scan()
				351	value := scan.Text()
				352	switch key {
				353	case "Type:":
				354	record = &ianaEntry{typ: value}
				355	case "Subtag:", "Tag:":
				356	if s := strings.SplitN(value, "..", 2); len(s) > 1 {
				357	for a := s[0]; a <= s[1]; a = inc(a) {
				358	b.addToRegistry(a, record)
				359	}
				360	} else {
				361	b.addToRegistry(value, record)
				362	}
				363	case "Suppress-Script:":
				364	record.suppressScript = value
				365	case "Added:":
				366	record.added = value
				367	case "Deprecated:":
				368	record.deprecated = value
				369	case "Macrolanguage:":
				370	record.macro = value
				371	case "Preferred-Value:":
				372	record.preferred = value
				373	case "Prefix:":
				374	record.prefix = append(record.prefix, value)
				375	case "Scope:":
				376	record.scope = value
				377	case "Description:":
				378	buf := []byte(value)
				379	for more = scan.Scan(); more; more = scan.Scan() {
				380	b := scan.Bytes()
				381	if b[0] == '%' \|\| b[len(b)-1] == ':' {
				382	break
				383	}
				384	buf = append(buf, ' ')
				385	buf = append(buf, b...)
				386	}
				387	record.description = append(record.description, string(buf))
				388	continue
				389	default:
				390	continue
				391	}
				392	more = scan.Scan()
				393	}
				394	if scan.Err() != nil {
				395	log.Panic(scan.Err())
				396	}
				397	}
				398
				399	func (b builder) addToRegistry(key string, entry ianaEntry) {
				400	if info, ok := b.registry[key]; ok {
				401	if info.typ != "language" \|\| entry.typ != "extlang" {
				402	log.Fatalf("parseRegistry: tag %q already exists", key)
				403	}
				404	} else {
				405	b.registry[key] = entry
				406	}
				407	}
				408
				409	var commentIndex = make(map[string]string)
				410
				411	func init() {
				412	for _, s := range comment {
				413	key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
				414	commentIndex[key] = s
				415	}
				416	}
				417
				418	func (b *builder) comment(name string) {
				419	if s := commentIndex[name]; len(s) > 0 {
				420	b.w.WriteComment(s)
				421	} else {
				422	fmt.Fprintln(b.w)
				423	}
				424	}
				425
				426	func (b *builder) pf(f string, x ...interface{}) {
				427	fmt.Fprintf(b.hw, f, x...)
				428	fmt.Fprint(b.hw, "\n")
				429	}
				430
				431	func (b *builder) p(x ...interface{}) {
				432	fmt.Fprintln(b.hw, x...)
				433	}
				434
				435	func (b *builder) addSize(s int) {
				436	b.w.Size += s
				437	b.pf("// Size: %d bytes", s)
				438	}
				439
				440	func (b *builder) writeConst(name string, x interface{}) {
				441	b.comment(name)
				442	b.w.WriteConst(name, x)
				443	}
				444
				445	// writeConsts computes f(v) for all v in values and writes the results
				446	// as constants named _v to a single constant block.
				447	func (b *builder) writeConsts(f func(string) int, values ...string) {
				448	b.pf("const (")
				449	for _, v := range values {
				450	b.pf("\t_%s = %v", v, f(v))
				451	}
				452	b.pf(")")
				453	}
				454
				455	// writeType writes the type of the given value, which must be a struct.
				456	func (b *builder) writeType(value interface{}) {
				457	b.comment(reflect.TypeOf(value).Name())
				458	b.w.WriteType(value)
				459	}
				460
				461	func (b *builder) writeSlice(name string, ss interface{}) {
				462	b.writeSliceAddSize(name, 0, ss)
				463	}
				464
				465	func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) {
				466	b.comment(name)
				467	b.w.Size += extraSize
				468	v := reflect.ValueOf(ss)
				469	t := v.Type().Elem()
				470	b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len())
				471
				472	fmt.Fprintf(b.w, "var %s = ", name)
				473	b.w.WriteArray(ss)
				474	b.p()
				475	}
				476
				477	type FromTo struct {
				478	From, To uint16
				479	}
				480
				481	func (b builder) writeSortedMap(name string, ss stringSet, index func(s string) uint16) {
				482	ss.sortFunc(func(a, b string) bool {
				483	return index(a) < index(b)
				484	})
				485	m := []FromTo{}
				486	for _, s := range ss.s {
				487	m = append(m, FromTo{index(s), index(ss.update[s])})
				488	}
				489	b.writeSlice(name, m)
				490	}
				491
				492	const base = 'z' - 'a' + 1
				493
				494	func strToInt(s string) uint {
				495	v := uint(0)
				496	for i := 0; i < len(s); i++ {
				497	v *= base
				498	v += uint(s[i] - 'a')
				499	}
				500	return v
				501	}
				502
				503	// converts the given integer to the original ASCII string passed to strToInt.
				504	// len(s) must match the number of characters obtained.
				505	func intToStr(v uint, s []byte) {
				506	for i := len(s) - 1; i >= 0; i-- {
				507	s[i] = byte(v%base) + 'a'
				508	v /= base
				509	}
				510	}
				511
				512	func (b *builder) writeBitVector(name string, ss []string) {
				513	vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
				514	for _, s := range ss {
				515	v := strToInt(s)
				516	vec[v/8] \|= 1 << (v % 8)
				517	}
				518	b.writeSlice(name, vec)
				519	}
				520
				521	// TODO: convert this type into a list or two-stage trie.
				522	func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
				523	b.comment(name)
				524	v := reflect.ValueOf(m)
				525	sz := v.Len() * (2 + int(v.Type().Key().Size()))
				526	for _, k := range m {
				527	sz += len(k)
				528	}
				529	b.addSize(sz)
				530	keys := []string{}
				531	b.pf(`var %s = map[string]uint16{`, name)
				532	for k := range m {
				533	keys = append(keys, k)
				534	}
				535	sort.Strings(keys)
				536	for _, k := range keys {
				537	b.pf("\t%q: %v,", k, f(m[k]))
				538	}
				539	b.p("}")
				540	}
				541
				542	func (b *builder) writeMap(name string, m interface{}) {
				543	b.comment(name)
				544	v := reflect.ValueOf(m)
				545	sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size()))
				546	b.addSize(sz)
				547	f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool {
				548	return strings.IndexRune("{}, ", r) != -1
				549	})
				550	sort.Strings(f[1:])
				551	b.pf(`var %s = %s{`, name, f[0])
				552	for _, kv := range f[1:] {
				553	b.pf("\t%s,", kv)
				554	}
				555	b.p("}")
				556	}
				557
				558	func (b *builder) langIndex(s string) uint16 {
				559	if s == "und" {
				560	return 0
				561	}
				562	if i, ok := b.lang.find(s); ok {
				563	return uint16(i)
				564	}
				565	return uint16(strToInt(s)) + uint16(len(b.lang.s))
				566	}
				567
				568	// inc advances the string to its lexicographical successor.
				569	func inc(s string) string {
				570	const maxTagLength = 4
				571	var buf [maxTagLength]byte
				572	intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)])
				573	for i := 0; i < len(s); i++ {
				574	if s[i] <= 'Z' {
				575	buf[i] -= 'a' - 'A'
				576	}
				577	}
				578	return string(buf[:len(s)])
				579	}
				580
				581	func (b *builder) parseIndices() {
				582	meta := b.supp.Metadata
				583
				584	for k, v := range b.registry {
				585	var ss *stringSet
				586	switch v.typ {
				587	case "language":
				588	if len(k) == 2 \|\| v.suppressScript != "" \|\| v.scope == "special" {
				589	b.lang.add(k)
				590	continue
				591	} else {
				592	ss = &b.langNoIndex
				593	}
				594	case "region":
				595	ss = &b.region
				596	case "script":
				597	ss = &b.script
				598	case "variant":
				599	ss = &b.variant
				600	default:
				601	continue
				602	}
				603	ss.add(k)
				604	}
				605	// Include any language for which there is data.
				606	for _, lang := range b.data.Locales() {
				607	if x := b.data.RawLDML(lang); false \|\|
				608	x.LocaleDisplayNames != nil \|\|
				609	x.Characters != nil \|\|
				610	x.Delimiters != nil \|\|
				611	x.Measurement != nil \|\|
				612	x.Dates != nil \|\|
				613	x.Numbers != nil \|\|
				614	x.Units != nil \|\|
				615	x.ListPatterns != nil \|\|
				616	x.Collations != nil \|\|
				617	x.Segmentations != nil \|\|
				618	x.Rbnf != nil \|\|
				619	x.Annotations != nil \|\|
				620	x.Metadata != nil {
				621
				622	from := strings.Split(lang, "_")
				623	if lang := from[0]; lang != "root" {
				624	b.lang.add(lang)
				625	}
				626	}
				627	}
				628	// Include locales for plural rules, which uses a different structure.
				629	for _, plurals := range b.data.Supplemental().Plurals {
				630	for _, rules := range plurals.PluralRules {
				631	for _, lang := range strings.Split(rules.Locales, " ") {
				632	if lang = strings.Split(lang, "_")[0]; lang != "root" {
				633	b.lang.add(lang)
				634	}
				635	}
				636	}
				637	}
				638	// Include languages in likely subtags.
				639	for _, m := range b.supp.LikelySubtags.LikelySubtag {
				640	from := strings.Split(m.From, "_")
				641	b.lang.add(from[0])
				642	}
				643	// Include ISO-639 alpha-3 bibliographic entries.
				644	for _, a := range meta.Alias.LanguageAlias {
				645	if a.Reason == "bibliographic" {
				646	b.langNoIndex.add(a.Type)
				647	}
				648	}
				649	// Include regions in territoryAlias (not all are in the IANA registry!)
				650	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
				651	if len(reg.Type) == 2 {
				652	b.region.add(reg.Type)
				653	}
				654	}
				655
				656	for _, s := range b.lang.s {
				657	if len(s) == 3 {
				658	b.langNoIndex.remove(s)
				659	}
				660	}
				661	b.writeConst("NumLanguages", len(b.lang.slice())+len(b.langNoIndex.slice()))
				662	b.writeConst("NumScripts", len(b.script.slice()))
				663	b.writeConst("NumRegions", len(b.region.slice()))
				664
				665	// Add dummy codes at the start of each list to represent "unspecified".
				666	b.lang.add("---")
				667	b.script.add("----")
				668	b.region.add("---")
				669
				670	// common locales
				671	b.locale.parse(meta.DefaultContent.Locales)
				672	}
				673
				674	// TODO: region inclusion data will probably not be use used in future matchers.
				675
				676	func (b *builder) computeRegionGroups() {
				677	b.groups = make(map[int]index)
				678
				679	// Create group indices.
				680	for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
				681	b.groups[i] = index(len(b.groups))
				682	}
				683	for _, g := range b.supp.TerritoryContainment.Group {
				684	// Skip UN and EURO zone as they are flattening the containment
				685	// relationship.
				686	if g.Type == "EZ" \|\| g.Type == "UN" {
				687	continue
				688	}
				689	group := b.region.index(g.Type)
				690	if _, ok := b.groups[group]; !ok {
				691	b.groups[group] = index(len(b.groups))
				692	}
				693	}
				694	if len(b.groups) > 64 {
				695	log.Fatalf("only 64 groups supported, found %d", len(b.groups))
				696	}
				697	b.writeConst("nRegionGroups", len(b.groups))
				698	}
				699
				700	var langConsts = []string{
				701	"af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es",
				702	"et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is",
				703	"it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml",
				704	"mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt",
				705	"ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th",
				706	"tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu",
				707
				708	// constants for grandfathered tags (if not already defined)
				709	"jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu",
				710	"nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn",
				711	}
				712
				713	// writeLanguage generates all tables needed for language canonicalization.
				714	func (b *builder) writeLanguage() {
				715	meta := b.supp.Metadata
				716
				717	b.writeConst("nonCanonicalUnd", b.lang.index("und"))
				718	b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
				719	b.writeConst("langPrivateStart", b.langIndex("qaa"))
				720	b.writeConst("langPrivateEnd", b.langIndex("qtz"))
				721
				722	// Get language codes that need to be mapped (overlong 3-letter codes,
				723	// deprecated 2-letter codes, legacy and grandfathered tags.)
				724	langAliasMap := stringSet{}
				725	aliasTypeMap := map[string]AliasType{}
				726
				727	// altLangISO3 get the alternative ISO3 names that need to be mapped.
				728	altLangISO3 := stringSet{}
				729	// Add dummy start to avoid the use of index 0.
				730	altLangISO3.add("---")
				731	altLangISO3.updateLater("---", "aa")
				732
				733	lang := b.lang.clone()
				734	for _, a := range meta.Alias.LanguageAlias {
				735	if a.Replacement == "" {
				736	a.Replacement = "und"
				737	}
				738	// TODO: support mapping to tags
				739	repl := strings.SplitN(a.Replacement, "_", 2)[0]
				740	if a.Reason == "overlong" {
				741	if len(a.Replacement) == 2 && len(a.Type) == 3 {
				742	lang.updateLater(a.Replacement, a.Type)
				743	}
				744	} else if len(a.Type) <= 3 {
				745	switch a.Reason {
				746	case "macrolanguage":
				747	aliasTypeMap[a.Type] = Macro
				748	case "deprecated":
				749	// handled elsewhere
				750	continue
				751	case "bibliographic", "legacy":
				752	if a.Type == "no" {
				753	continue
				754	}
				755	aliasTypeMap[a.Type] = Legacy
				756	default:
				757	log.Fatalf("new %s alias: %s", a.Reason, a.Type)
				758	}
				759	langAliasMap.add(a.Type)
				760	langAliasMap.updateLater(a.Type, repl)
				761	}
				762	}
				763	// Manually add the mapping of "nb" (Norwegian) to its macro language.
				764	// This can be removed if CLDR adopts this change.
				765	langAliasMap.add("nb")
				766	langAliasMap.updateLater("nb", "no")
				767	aliasTypeMap["nb"] = Macro
				768
				769	for k, v := range b.registry {
				770	// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
				771	if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
				772	langAliasMap.add(k)
				773	langAliasMap.updateLater(k, v.preferred)
				774	aliasTypeMap[k] = Deprecated
				775	}
				776	}
				777	// Fix CLDR mappings.
				778	lang.updateLater("tl", "tgl")
				779	lang.updateLater("sh", "hbs")
				780	lang.updateLater("mo", "mol")
				781	lang.updateLater("no", "nor")
				782	lang.updateLater("tw", "twi")
				783	lang.updateLater("nb", "nob")
				784	lang.updateLater("ak", "aka")
				785	lang.updateLater("bh", "bih")
				786
				787	// Ensure that each 2-letter code is matched with a 3-letter code.
				788	for _, v := range lang.s[1:] {
				789	s, ok := lang.update[v]
				790	if !ok {
				791	if s, ok = lang.update[langAliasMap.update[v]]; !ok {
				792	continue
				793	}
				794	lang.update[v] = s
				795	}
				796	if v[0] != s[0] {
				797	altLangISO3.add(s)
				798	altLangISO3.updateLater(s, v)
				799	}
				800	}
				801
				802	// Complete canonicalized language tags.
				803	lang.freeze()
				804	for i, v := range lang.s {
				805	// We can avoid these manual entries by using the IANA registry directly.
				806	// Seems easier to update the list manually, as changes are rare.
				807	// The panic in this loop will trigger if we miss an entry.
				808	add := ""
				809	if s, ok := lang.update[v]; ok {
				810	if s[0] == v[0] {
				811	add = s[1:]
				812	} else {
				813	add = string([]byte{0, byte(altLangISO3.index(s))})
				814	}
				815	} else if len(v) == 3 {
				816	add = "\x00"
				817	} else {
				818	log.Panicf("no data for long form of %q", v)
				819	}
				820	lang.s[i] += add
				821	}
				822	b.writeConst("lang", tag.Index(lang.join()))
				823
				824	b.writeConst("langNoIndexOffset", len(b.lang.s))
				825
				826	// space of all valid 3-letter language identifiers.
				827	b.writeBitVector("langNoIndex", b.langNoIndex.slice())
				828
				829	altLangIndex := []uint16{}
				830	for i, s := range altLangISO3.slice() {
				831	altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))})
				832	if i > 0 {
				833	idx := b.lang.index(altLangISO3.update[s])
				834	altLangIndex = append(altLangIndex, uint16(idx))
				835	}
				836	}
				837	b.writeConst("altLangISO3", tag.Index(altLangISO3.join()))
				838	b.writeSlice("altLangIndex", altLangIndex)
				839
				840	b.writeSortedMap("AliasMap", &langAliasMap, b.langIndex)
				841	types := make([]AliasType, len(langAliasMap.s))
				842	for i, s := range langAliasMap.s {
				843	types[i] = aliasTypeMap[s]
				844	}
				845	b.writeSlice("AliasTypes", types)
				846	}
				847
				848	var scriptConsts = []string{
				849	"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
				850	"Zzzz",
				851	}
				852
				853	func (b *builder) writeScript() {
				854	b.writeConsts(b.script.index, scriptConsts...)
				855	b.writeConst("script", tag.Index(b.script.join()))
				856
				857	supp := make([]uint8, len(b.lang.slice()))
				858	for i, v := range b.lang.slice()[1:] {
				859	if sc := b.registry[v].suppressScript; sc != "" {
				860	supp[i+1] = uint8(b.script.index(sc))
				861	}
				862	}
				863	b.writeSlice("suppressScript", supp)
				864
				865	// There is only one deprecated script in CLDR. This value is hard-coded.
				866	// We check here if the code must be updated.
				867	for _, a := range b.supp.Metadata.Alias.ScriptAlias {
				868	if a.Type != "Qaai" {
				869	log.Panicf("unexpected deprecated stript %q", a.Type)
				870	}
				871	}
				872	}
				873
				874	func parseM49(s string) int16 {
				875	if len(s) == 0 {
				876	return 0
				877	}
				878	v, err := strconv.ParseUint(s, 10, 10)
				879	failOnError(err)
				880	return int16(v)
				881	}
				882
				883	var regionConsts = []string{
				884	"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
				885	"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
				886	}
				887
				888	func (b *builder) writeRegion() {
				889	b.writeConsts(b.region.index, regionConsts...)
				890
				891	isoOffset := b.region.index("AA")
				892	m49map := make([]int16, len(b.region.slice()))
				893	fromM49map := make(map[int16]int)
				894	altRegionISO3 := ""
				895	altRegionIDs := []uint16{}
				896
				897	b.writeConst("isoRegionOffset", isoOffset)
				898
				899	// 2-letter region lookup and mapping to numeric codes.
				900	regionISO := b.region.clone()
				901	regionISO.s = regionISO.s[isoOffset:]
				902	regionISO.sorted = false
				903
				904	regionTypes := make([]byte, len(b.region.s))
				905
				906	// Is the region valid BCP 47?
				907	for s, e := range b.registry {
				908	if len(s) == 2 && s == strings.ToUpper(s) {
				909	i := b.region.index(s)
				910	for _, d := range e.description {
				911	if strings.Contains(d, "Private use") {
				912	regionTypes[i] = iso3166UserAssigned
				913	}
				914	}
				915	regionTypes[i] \|= bcp47Region
				916	}
				917	}
				918
				919	// Is the region a valid ccTLD?
				920	r := gen.OpenIANAFile("domains/root/db")
				921	defer r.Close()
				922
				923	buf, err := ioutil.ReadAll(r)
				924	failOnError(err)
				925	re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`)
				926	for _, m := range re.FindAllSubmatch(buf, -1) {
				927	i := b.region.index(strings.ToUpper(string(m[1])))
				928	regionTypes[i] \|= ccTLD
				929	}
				930
				931	b.writeSlice("regionTypes", regionTypes)
				932
				933	iso3Set := make(map[string]int)
				934	update := func(iso2, iso3 string) {
				935	i := regionISO.index(iso2)
				936	if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] {
				937	regionISO.s[i] += iso3[1:]
				938	iso3Set[iso3] = -1
				939	} else {
				940	if ok && j >= 0 {
				941	regionISO.s[i] += string([]byte{0, byte(j)})
				942	} else {
				943	iso3Set[iso3] = len(altRegionISO3)
				944	regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
				945	altRegionISO3 += iso3
				946	altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
				947	}
				948	}
				949	}
				950	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
				951	i := regionISO.index(tc.Type) + isoOffset
				952	if d := m49map[i]; d != 0 {
				953	log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
				954	}
				955	m49 := parseM49(tc.Numeric)
				956	m49map[i] = m49
				957	if r := fromM49map[m49]; r == 0 {
				958	fromM49map[m49] = i
				959	} else if r != i {
				960	dep := b.registry[regionISO.s[r-isoOffset]].deprecated
				961	if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" \|\| t.deprecated > dep) {
				962	fromM49map[m49] = i
				963	}
				964	}
				965	}
				966	for _, ta := range b.supp.Metadata.Alias.TerritoryAlias {
				967	if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 {
				968	from := parseM49(ta.Type)
				969	if r := fromM49map[from]; r == 0 {
				970	fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset
				971	}
				972	}
				973	}
				974	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
				975	if len(tc.Alpha3) == 3 {
				976	update(tc.Type, tc.Alpha3)
				977	}
				978	}
				979	// This entries are not included in territoryCodes. Mostly 3-letter variants
				980	// of deleted codes and an entry for QU.
				981	for _, m := range []struct{ iso2, iso3 string }{
				982	{"CT", "CTE"},
				983	{"DY", "DHY"},
				984	{"HV", "HVO"},
				985	{"JT", "JTN"},
				986	{"MI", "MID"},
				987	{"NH", "NHB"},
				988	{"NQ", "ATN"},
				989	{"PC", "PCI"},
				990	{"PU", "PUS"},
				991	{"PZ", "PCZ"},
				992	{"RH", "RHO"},
				993	{"VD", "VDR"},
				994	{"WK", "WAK"},
				995	// These three-letter codes are used for others as well.
				996	{"FQ", "ATF"},
				997	} {
				998	update(m.iso2, m.iso3)
				999	}
				1000	for i, s := range regionISO.s {
				1001	if len(s) != 4 {
				1002	regionISO.s[i] = s + " "
				1003	}
				1004	}
				1005	b.writeConst("regionISO", tag.Index(regionISO.join()))
				1006	b.writeConst("altRegionISO3", altRegionISO3)
				1007	b.writeSlice("altRegionIDs", altRegionIDs)
				1008
				1009	// Create list of deprecated regions.
				1010	// TODO: consider inserting SF -> FI. Not included by CLDR, but is the only
				1011	// Transitionally-reserved mapping not included.
				1012	regionOldMap := stringSet{}
				1013	// Include regions in territoryAlias (not all are in the IANA registry!)
				1014	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
				1015	if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 {
				1016	regionOldMap.add(reg.Type)
				1017	regionOldMap.updateLater(reg.Type, reg.Replacement)
				1018	i, _ := regionISO.find(reg.Type)
				1019	j, _ := regionISO.find(reg.Replacement)
				1020	if k := m49map[i+isoOffset]; k == 0 {
				1021	m49map[i+isoOffset] = m49map[j+isoOffset]
				1022	}
				1023	}
				1024	}
				1025	b.writeSortedMap("regionOldMap", &regionOldMap, func(s string) uint16 {
				1026	return uint16(b.region.index(s))
				1027	})
				1028	// 3-digit region lookup, groupings.
				1029	for i := 1; i < isoOffset; i++ {
				1030	m := parseM49(b.region.s[i])
				1031	m49map[i] = m
				1032	fromM49map[m] = i
				1033	}
				1034	b.writeSlice("m49", m49map)
				1035
				1036	const (
				1037	searchBits = 7
				1038	regionBits = 9
				1039	)
				1040	if len(m49map) >= 1<<regionBits {
				1041	log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits)
				1042	}
				1043	m49Index := [9]int16{}
				1044	fromM49 := []uint16{}
				1045	m49 := []int{}
				1046	for k, _ := range fromM49map {
				1047	m49 = append(m49, int(k))
				1048	}
				1049	sort.Ints(m49)
				1050	for _, k := range m49[1:] {
				1051	val := (k & (1<<searchBits - 1)) << regionBits
				1052	fromM49 = append(fromM49, uint16(val\|fromM49map[int16(k)]))
				1053	m49Index[1:][k>>searchBits] = int16(len(fromM49))
				1054	}
				1055	b.writeSlice("m49Index", m49Index)
				1056	b.writeSlice("fromM49", fromM49)
				1057	}
				1058
				1059	const (
				1060	// TODO: put these lists in regionTypes as user data? Could be used for
				1061	// various optimizations and refinements and could be exposed in the API.
				1062	iso3166Except = "AC CP DG EA EU FX IC SU TA UK"
				1063	iso3166Trans = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions.
				1064	// DY and RH are actually not deleted, but indeterminately reserved.
				1065	iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD"
				1066	)
				1067
				1068	const (
				1069	iso3166UserAssigned = 1 << iota
				1070	ccTLD
				1071	bcp47Region
				1072	)
				1073
				1074	func find(list []string, s string) int {
				1075	for i, t := range list {
				1076	if t == s {
				1077	return i
				1078	}
				1079	}
				1080	return -1
				1081	}
				1082
				1083	// writeVariants generates per-variant information and creates a map from variant
				1084	// name to index value. We assign index values such that sorting multiple
				1085	// variants by index value will result in the correct order.
				1086	// There are two types of variants: specialized and general. Specialized variants
				1087	// are only applicable to certain language or language-script pairs. Generalized
				1088	// variants apply to any language. Generalized variants always sort after
				1089	// specialized variants. We will therefore always assign a higher index value
				1090	// to a generalized variant than any other variant. Generalized variants are
				1091	// sorted alphabetically among themselves.
				1092	// Specialized variants may also sort after other specialized variants. Such
				1093	// variants will be ordered after any of the variants they may follow.
				1094	// We assume that if a variant x is followed by a variant y, then for any prefix
				1095	// p of x, p-x is a prefix of y. This allows us to order tags based on the
				1096	// maximum of the length of any of its prefixes.
				1097	// TODO: it is possible to define a set of Prefix values on variants such that
				1098	// a total order cannot be defined to the point that this algorithm breaks.
				1099	// In other words, we cannot guarantee the same order of variants for the
				1100	// future using the same algorithm or for non-compliant combinations of
				1101	// variants. For this reason, consider using simple alphabetic sorting
				1102	// of variants and ignore Prefix restrictions altogether.
				1103	func (b *builder) writeVariant() {
				1104	generalized := stringSet{}
				1105	specialized := stringSet{}
				1106	specializedExtend := stringSet{}
				1107	// Collate the variants by type and check assumptions.
				1108	for _, v := range b.variant.slice() {
				1109	e := b.registry[v]
				1110	if len(e.prefix) == 0 {
				1111	generalized.add(v)
				1112	continue
				1113	}
				1114	c := strings.Split(e.prefix[0], "-")
				1115	hasScriptOrRegion := false
				1116	if len(c) > 1 {
				1117	_, hasScriptOrRegion = b.script.find(c[1])
				1118	if !hasScriptOrRegion {
				1119	_, hasScriptOrRegion = b.region.find(c[1])
				1120
				1121	}
				1122	}
				1123	if len(c) == 1 \|\| len(c) == 2 && hasScriptOrRegion {
				1124	// Variant is preceded by a language.
				1125	specialized.add(v)
				1126	continue
				1127	}
				1128	// Variant is preceded by another variant.
				1129	specializedExtend.add(v)
				1130	prefix := c[0] + "-"
				1131	if hasScriptOrRegion {
				1132	prefix += c[1]
				1133	}
				1134	for _, p := range e.prefix {
				1135	// Verify that the prefix minus the last element is a prefix of the
				1136	// predecessor element.
				1137	i := strings.LastIndex(p, "-")
				1138	pred := b.registry[p[i+1:]]
				1139	if find(pred.prefix, p[:i]) < 0 {
				1140	log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v)
				1141	}
				1142	// The sorting used below does not work in the general case. It works
				1143	// if we assume that variants that may be followed by others only have
				1144	// prefixes of the same length. Verify this.
				1145	count := strings.Count(p[:i], "-")
				1146	for _, q := range pred.prefix {
				1147	if c := strings.Count(q, "-"); c != count {
				1148	log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count)
				1149	}
				1150	}
				1151	if !strings.HasPrefix(p, prefix) {
				1152	log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix)
				1153	}
				1154	}
				1155	}
				1156
				1157	// Sort extended variants.
				1158	a := specializedExtend.s
				1159	less := func(v, w string) bool {
				1160	// Sort by the maximum number of elements.
				1161	maxCount := func(s string) (max int) {
				1162	for _, p := range b.registry[s].prefix {
				1163	if c := strings.Count(p, "-"); c > max {
				1164	max = c
				1165	}
				1166	}
				1167	return
				1168	}
				1169	if cv, cw := maxCount(v), maxCount(w); cv != cw {
				1170	return cv < cw
				1171	}
				1172	// Sort by name as tie breaker.
				1173	return v < w
				1174	}
				1175	sort.Sort(funcSorter{less, sort.StringSlice(a)})
				1176	specializedExtend.frozen = true
				1177
				1178	// Create index from variant name to index.
				1179	variantIndex := make(map[string]uint8)
				1180	add := func(s []string) {
				1181	for _, v := range s {
				1182	variantIndex[v] = uint8(len(variantIndex))
				1183	}
				1184	}
				1185	add(specialized.slice())
				1186	add(specializedExtend.s)
				1187	numSpecialized := len(variantIndex)
				1188	add(generalized.slice())
				1189	if n := len(variantIndex); n > 255 {
				1190	log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n)
				1191	}
				1192	b.writeMap("variantIndex", variantIndex)
				1193	b.writeConst("variantNumSpecialized", numSpecialized)
				1194	}
				1195
				1196	func (b *builder) writeLanguageInfo() {
				1197	}
				1198
				1199	// writeLikelyData writes tables that are used both for finding parent relations and for
				1200	// language matching. Each entry contains additional bits to indicate the status of the
				1201	// data to know when it cannot be used for parent relations.
				1202	func (b *builder) writeLikelyData() {
				1203	const (
				1204	isList = 1 << iota
				1205	scriptInFrom
				1206	regionInFrom
				1207	)
				1208	type ( // generated types
				1209	likelyScriptRegion struct {
				1210	region uint16
				1211	script uint8
				1212	flags uint8
				1213	}
				1214	likelyLangScript struct {
				1215	lang uint16
				1216	script uint8
				1217	flags uint8
				1218	}
				1219	likelyLangRegion struct {
				1220	lang uint16
				1221	region uint16
				1222	}
				1223	// likelyTag is used for getting likely tags for group regions, where
				1224	// the likely region might be a region contained in the group.
				1225	likelyTag struct {
				1226	lang uint16
				1227	region uint16
				1228	script uint8
				1229	}
				1230	)
				1231	var ( // generated variables
				1232	likelyRegionGroup = make([]likelyTag, len(b.groups))
				1233	likelyLang = make([]likelyScriptRegion, len(b.lang.s))
				1234	likelyRegion = make([]likelyLangScript, len(b.region.s))
				1235	likelyScript = make([]likelyLangRegion, len(b.script.s))
				1236	likelyLangList = []likelyScriptRegion{}
				1237	likelyRegionList = []likelyLangScript{}
				1238	)
				1239	type fromTo struct {
				1240	from, to []string
				1241	}
				1242	langToOther := map[int][]fromTo{}
				1243	regionToOther := map[int][]fromTo{}
				1244	for _, m := range b.supp.LikelySubtags.LikelySubtag {
				1245	from := strings.Split(m.From, "_")
				1246	to := strings.Split(m.To, "_")
				1247	if len(to) != 3 {
				1248	log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to))
				1249	}
				1250	if len(from) > 3 {
				1251	log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from))
				1252	}
				1253	if from[0] != to[0] && from[0] != "und" {
				1254	log.Fatalf("unexpected language change in expansion: %s -> %s", from, to)
				1255	}
				1256	if len(from) == 3 {
				1257	if from[2] != to[2] {
				1258	log.Fatalf("unexpected region change in expansion: %s -> %s", from, to)
				1259	}
				1260	if from[0] != "und" {
				1261	log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to)
				1262	}
				1263	}
				1264	if len(from) == 1 \|\| from[0] != "und" {
				1265	id := 0
				1266	if from[0] != "und" {
				1267	id = b.lang.index(from[0])
				1268	}
				1269	langToOther[id] = append(langToOther[id], fromTo{from, to})
				1270	} else if len(from) == 2 && len(from[1]) == 4 {
				1271	sid := b.script.index(from[1])
				1272	likelyScript[sid].lang = uint16(b.langIndex(to[0]))
				1273	likelyScript[sid].region = uint16(b.region.index(to[2]))
				1274	} else {
				1275	r := b.region.index(from[len(from)-1])
				1276	if id, ok := b.groups[r]; ok {
				1277	if from[0] != "und" {
				1278	log.Fatalf("region changed unexpectedly: %s -> %s", from, to)
				1279	}
				1280	likelyRegionGroup[id].lang = uint16(b.langIndex(to[0]))
				1281	likelyRegionGroup[id].script = uint8(b.script.index(to[1]))
				1282	likelyRegionGroup[id].region = uint16(b.region.index(to[2]))
				1283	} else {
				1284	regionToOther[r] = append(regionToOther[r], fromTo{from, to})
				1285	}
				1286	}
				1287	}
				1288	b.writeType(likelyLangRegion{})
				1289	b.writeSlice("likelyScript", likelyScript)
				1290
				1291	for id := range b.lang.s {
				1292	list := langToOther[id]
				1293	if len(list) == 1 {
				1294	likelyLang[id].region = uint16(b.region.index(list[0].to[2]))
				1295	likelyLang[id].script = uint8(b.script.index(list[0].to[1]))
				1296	} else if len(list) > 1 {
				1297	likelyLang[id].flags = isList
				1298	likelyLang[id].region = uint16(len(likelyLangList))
				1299	likelyLang[id].script = uint8(len(list))
				1300	for _, x := range list {
				1301	flags := uint8(0)
				1302	if len(x.from) > 1 {
				1303	if x.from[1] == x.to[2] {
				1304	flags = regionInFrom
				1305	} else {
				1306	flags = scriptInFrom
				1307	}
				1308	}
				1309	likelyLangList = append(likelyLangList, likelyScriptRegion{
				1310	region: uint16(b.region.index(x.to[2])),
				1311	script: uint8(b.script.index(x.to[1])),
				1312	flags: flags,
				1313	})
				1314	}
				1315	}
				1316	}
				1317	// TODO: merge suppressScript data with this table.
				1318	b.writeType(likelyScriptRegion{})
				1319	b.writeSlice("likelyLang", likelyLang)
				1320	b.writeSlice("likelyLangList", likelyLangList)
				1321
				1322	for id := range b.region.s {
				1323	list := regionToOther[id]
				1324	if len(list) == 1 {
				1325	likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0]))
				1326	likelyRegion[id].script = uint8(b.script.index(list[0].to[1]))
				1327	if len(list[0].from) > 2 {
				1328	likelyRegion[id].flags = scriptInFrom
				1329	}
				1330	} else if len(list) > 1 {
				1331	likelyRegion[id].flags = isList
				1332	likelyRegion[id].lang = uint16(len(likelyRegionList))
				1333	likelyRegion[id].script = uint8(len(list))
				1334	for i, x := range list {
				1335	if len(x.from) == 2 && i != 0 \|\| i > 0 && len(x.from) != 3 {
				1336	log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i)
				1337	}
				1338	x := likelyLangScript{
				1339	lang: uint16(b.langIndex(x.to[0])),
				1340	script: uint8(b.script.index(x.to[1])),
				1341	}
				1342	if len(list[0].from) > 2 {
				1343	x.flags = scriptInFrom
				1344	}
				1345	likelyRegionList = append(likelyRegionList, x)
				1346	}
				1347	}
				1348	}
				1349	b.writeType(likelyLangScript{})
				1350	b.writeSlice("likelyRegion", likelyRegion)
				1351	b.writeSlice("likelyRegionList", likelyRegionList)
				1352
				1353	b.writeType(likelyTag{})
				1354	b.writeSlice("likelyRegionGroup", likelyRegionGroup)
				1355	}
				1356
				1357	func (b *builder) writeRegionInclusionData() {
				1358	var (
				1359	// mm holds for each group the set of groups with a distance of 1.
				1360	mm = make(map[int][]index)
				1361
				1362	// containment holds for each group the transitive closure of
				1363	// containment of other groups.
				1364	containment = make(map[index][]index)
				1365	)
				1366	for _, g := range b.supp.TerritoryContainment.Group {
				1367	// Skip UN and EURO zone as they are flattening the containment
				1368	// relationship.
				1369	if g.Type == "EZ" \|\| g.Type == "UN" {
				1370	continue
				1371	}
				1372	group := b.region.index(g.Type)
				1373	groupIdx := b.groups[group]
				1374	for _, mem := range strings.Split(g.Contains, " ") {
				1375	r := b.region.index(mem)
				1376	mm[r] = append(mm[r], groupIdx)
				1377	if g, ok := b.groups[r]; ok {
				1378	mm[group] = append(mm[group], g)
				1379	containment[groupIdx] = append(containment[groupIdx], g)
				1380	}
				1381	}
				1382	}
				1383
				1384	regionContainment := make([]uint64, len(b.groups))
				1385	for _, g := range b.groups {
				1386	l := containment[g]
				1387
				1388	// Compute the transitive closure of containment.
				1389	for i := 0; i < len(l); i++ {
				1390	l = append(l, containment[l[i]]...)
				1391	}
				1392
				1393	// Compute the bitmask.
				1394	regionContainment[g] = 1 << g
				1395	for _, v := range l {
				1396	regionContainment[g] \|= 1 << v
				1397	}
				1398	}
				1399	b.writeSlice("regionContainment", regionContainment)
				1400
				1401	regionInclusion := make([]uint8, len(b.region.s))
				1402	bvs := make(map[uint64]index)
				1403	// Make the first bitvector positions correspond with the groups.
				1404	for r, i := range b.groups {
				1405	bv := uint64(1 << i)
				1406	for _, g := range mm[r] {
				1407	bv \|= 1 << g
				1408	}
				1409	bvs[bv] = i
				1410	regionInclusion[r] = uint8(bvs[bv])
				1411	}
				1412	for r := 1; r < len(b.region.s); r++ {
				1413	if _, ok := b.groups[r]; !ok {
				1414	bv := uint64(0)
				1415	for _, g := range mm[r] {
				1416	bv \|= 1 << g
				1417	}
				1418	if bv == 0 {
				1419	// Pick the world for unspecified regions.
				1420	bv = 1 << b.groups[b.region.index("001")]
				1421	}
				1422	if _, ok := bvs[bv]; !ok {
				1423	bvs[bv] = index(len(bvs))
				1424	}
				1425	regionInclusion[r] = uint8(bvs[bv])
				1426	}
				1427	}
				1428	b.writeSlice("regionInclusion", regionInclusion)
				1429	regionInclusionBits := make([]uint64, len(bvs))
				1430	for k, v := range bvs {
				1431	regionInclusionBits[v] = uint64(k)
				1432	}
				1433	// Add bit vectors for increasingly large distances until a fixed point is reached.
				1434	regionInclusionNext := []uint8{}
				1435	for i := 0; i < len(regionInclusionBits); i++ {
				1436	bits := regionInclusionBits[i]
				1437	next := bits
				1438	for i := uint(0); i < uint(len(b.groups)); i++ {
				1439	if bits&(1<<i) != 0 {
				1440	next \|= regionInclusionBits[i]
				1441	}
				1442	}
				1443	if _, ok := bvs[next]; !ok {
				1444	bvs[next] = index(len(bvs))
				1445	regionInclusionBits = append(regionInclusionBits, next)
				1446	}
				1447	regionInclusionNext = append(regionInclusionNext, uint8(bvs[next]))
				1448	}
				1449	b.writeSlice("regionInclusionBits", regionInclusionBits)
				1450	b.writeSlice("regionInclusionNext", regionInclusionNext)
				1451	}
				1452
				1453	type parentRel struct {
				1454	lang uint16
				1455	script uint8
				1456	maxScript uint8
				1457	toRegion uint16
				1458	fromRegion []uint16
				1459	}
				1460
				1461	func (b *builder) writeParents() {
				1462	b.writeType(parentRel{})
				1463
				1464	parents := []parentRel{}
				1465
				1466	// Construct parent overrides.
				1467	n := 0
				1468	for _, p := range b.data.Supplemental().ParentLocales.ParentLocale {
				1469	// Skipping non-standard scripts to root is implemented using addTags.
				1470	if p.Parent == "root" {
				1471	continue
				1472	}
				1473
				1474	sub := strings.Split(p.Parent, "_")
				1475	parent := parentRel{lang: b.langIndex(sub[0])}
				1476	if len(sub) == 2 {
				1477	// TODO: check that all undefined scripts are indeed Latn in these
				1478	// cases.
				1479	parent.maxScript = uint8(b.script.index("Latn"))
				1480	parent.toRegion = uint16(b.region.index(sub[1]))
				1481	} else {
				1482	parent.script = uint8(b.script.index(sub[1]))
				1483	parent.maxScript = parent.script
				1484	parent.toRegion = uint16(b.region.index(sub[2]))
				1485	}
				1486	for _, c := range strings.Split(p.Locales, " ") {
				1487	region := b.region.index(c[strings.LastIndex(c, "_")+1:])
				1488	parent.fromRegion = append(parent.fromRegion, uint16(region))
				1489	}
				1490	parents = append(parents, parent)
				1491	n += len(parent.fromRegion)
				1492	}
				1493	b.writeSliceAddSize("parents", n*2, parents)
				1494	}
				1495
				1496	func main() {
				1497	gen.Init()
				1498
				1499	gen.Repackage("gen_common.go", "common.go", "language")
				1500
				1501	w := gen.NewCodeWriter()
				1502	defer w.WriteGoFile("tables.go", "language")
				1503
				1504	fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`)
				1505
				1506	b := newBuilder(w)
				1507	gen.WriteCLDRVersion(w)
				1508
				1509	b.parseIndices()
				1510	b.writeType(FromTo{})
				1511	b.writeLanguage()
				1512	b.writeScript()
				1513	b.writeRegion()
				1514	b.writeVariant()
				1515	// TODO: b.writeLocale()
				1516	b.computeRegionGroups()
				1517	b.writeLikelyData()
				1518	b.writeRegionInclusionData()
				1519	b.writeParents()
				1520	}