Blame - vendor/golang.org/x/text/language/language.go - voltha-go

blob: b65e213ff86f08804cf14ee0fa515c1857834071 [file] [log] [blame]

khenaidoo	ac63710	2019-01-14 15:44:34 -0500	[diff] [blame]	1	// Copyright 2013 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	//go:generate go run gen.go gen_common.go -output tables.go
				6	//go:generate go run gen_index.go
				7
				8	package language
				9
				10	// TODO: Remove above NOTE after:
				11	// - verifying that tables are dropped correctly (most notably matcher tables).
				12
				13	import (
				14	"errors"
				15	"fmt"
				16	"strings"
				17	)
				18
				19	const (
				20	// maxCoreSize is the maximum size of a BCP 47 tag without variants and
				21	// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
				22	maxCoreSize = 12
				23
				24	// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
				25	// is large enough to hold at least 99% of the BCP 47 tags.
				26	max99thPercentileSize = 32
				27
				28	// maxSimpleUExtensionSize is the maximum size of a -u extension with one
				29	// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
				30	maxSimpleUExtensionSize = 14
				31	)
				32
				33	// Tag represents a BCP 47 language tag. It is used to specify an instance of a
				34	// specific language or locale. All language tag values are guaranteed to be
				35	// well-formed.
				36	type Tag struct {
				37	lang langID
				38	region regionID
				39	// TODO: we will soon run out of positions for script. Idea: instead of
				40	// storing lang, region, and script codes, store only the compact index and
				41	// have a lookup table from this code to its expansion. This greatly speeds
				42	// up table lookup, speed up common variant cases.
				43	// This will also immediately free up 3 extra bytes. Also, the pVariant
				44	// field can now be moved to the lookup table, as the compact index uniquely
				45	// determines the offset of a possible variant.
				46	script scriptID
				47	pVariant byte // offset in str, includes preceding '-'
				48	pExt uint16 // offset of first extension, includes preceding '-'
				49
				50	// str is the string representation of the Tag. It will only be used if the
				51	// tag has variants or extensions.
				52	str string
				53	}
				54
				55	// Make is a convenience wrapper for Parse that omits the error.
				56	// In case of an error, a sensible default is returned.
				57	func Make(s string) Tag {
				58	return Default.Make(s)
				59	}
				60
				61	// Make is a convenience wrapper for c.Parse that omits the error.
				62	// In case of an error, a sensible default is returned.
				63	func (c CanonType) Make(s string) Tag {
				64	t, _ := c.Parse(s)
				65	return t
				66	}
				67
				68	// Raw returns the raw base language, script and region, without making an
				69	// attempt to infer their values.
				70	func (t Tag) Raw() (b Base, s Script, r Region) {
				71	return Base{t.lang}, Script{t.script}, Region{t.region}
				72	}
				73
				74	// equalTags compares language, script and region subtags only.
				75	func (t Tag) equalTags(a Tag) bool {
				76	return t.lang == a.lang && t.script == a.script && t.region == a.region
				77	}
				78
				79	// IsRoot returns true if t is equal to language "und".
				80	func (t Tag) IsRoot() bool {
				81	if int(t.pVariant) < len(t.str) {
				82	return false
				83	}
				84	return t.equalTags(und)
				85	}
				86
				87	// private reports whether the Tag consists solely of a private use tag.
				88	func (t Tag) private() bool {
				89	return t.str != "" && t.pVariant == 0
				90	}
				91
				92	// CanonType can be used to enable or disable various types of canonicalization.
				93	type CanonType int
				94
				95	const (
				96	// Replace deprecated base languages with their preferred replacements.
				97	DeprecatedBase CanonType = 1 << iota
				98	// Replace deprecated scripts with their preferred replacements.
				99	DeprecatedScript
				100	// Replace deprecated regions with their preferred replacements.
				101	DeprecatedRegion
				102	// Remove redundant scripts.
				103	SuppressScript
				104	// Normalize legacy encodings. This includes legacy languages defined in
				105	// CLDR as well as bibliographic codes defined in ISO-639.
				106	Legacy
				107	// Map the dominant language of a macro language group to the macro language
				108	// subtag. For example cmn -> zh.
				109	Macro
				110	// The CLDR flag should be used if full compatibility with CLDR is required.
				111	// There are a few cases where language.Tag may differ from CLDR. To follow all
				112	// of CLDR's suggestions, use All\|CLDR.
				113	CLDR
				114
				115	// Raw can be used to Compose or Parse without Canonicalization.
				116	Raw CanonType = 0
				117
				118	// Replace all deprecated tags with their preferred replacements.
				119	Deprecated = DeprecatedBase \| DeprecatedScript \| DeprecatedRegion
				120
				121	// All canonicalizations recommended by BCP 47.
				122	BCP47 = Deprecated \| SuppressScript
				123
				124	// All canonicalizations.
				125	All = BCP47 \| Legacy \| Macro
				126
				127	// Default is the canonicalization used by Parse, Make and Compose. To
				128	// preserve as much information as possible, canonicalizations that remove
				129	// potentially valuable information are not included. The Matcher is
				130	// designed to recognize similar tags that would be the same if
				131	// they were canonicalized using All.
				132	Default = Deprecated \| Legacy
				133
				134	canonLang = DeprecatedBase \| Legacy \| Macro
				135
				136	// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
				137	)
				138
				139	// canonicalize returns the canonicalized equivalent of the tag and
				140	// whether there was any change.
				141	func (t Tag) canonicalize(c CanonType) (Tag, bool) {
				142	if c == Raw {
				143	return t, false
				144	}
				145	changed := false
				146	if c&SuppressScript != 0 {
				147	if t.lang < langNoIndexOffset && uint8(t.script) == suppressScript[t.lang] {
				148	t.script = 0
				149	changed = true
				150	}
				151	}
				152	if c&canonLang != 0 {
				153	for {
				154	if l, aliasType := normLang(t.lang); l != t.lang {
				155	switch aliasType {
				156	case langLegacy:
				157	if c&Legacy != 0 {
				158	if t.lang == _sh && t.script == 0 {
				159	t.script = _Latn
				160	}
				161	t.lang = l
				162	changed = true
				163	}
				164	case langMacro:
				165	if c&Macro != 0 {
				166	// We deviate here from CLDR. The mapping "nb" -> "no"
				167	// qualifies as a typical Macro language mapping. However,
				168	// for legacy reasons, CLDR maps "no", the macro language
				169	// code for Norwegian, to the dominant variant "nb". This
				170	// change is currently under consideration for CLDR as well.
				171	// See http://unicode.org/cldr/trac/ticket/2698 and also
				172	// http://unicode.org/cldr/trac/ticket/1790 for some of the
				173	// practical implications. TODO: this check could be removed
				174	// if CLDR adopts this change.
				175	if c&CLDR == 0 \|\| t.lang != _nb {
				176	changed = true
				177	t.lang = l
				178	}
				179	}
				180	case langDeprecated:
				181	if c&DeprecatedBase != 0 {
				182	if t.lang == _mo && t.region == 0 {
				183	t.region = _MD
				184	}
				185	t.lang = l
				186	changed = true
				187	// Other canonicalization types may still apply.
				188	continue
				189	}
				190	}
				191	} else if c&Legacy != 0 && t.lang == _no && c&CLDR != 0 {
				192	t.lang = _nb
				193	changed = true
				194	}
				195	break
				196	}
				197	}
				198	if c&DeprecatedScript != 0 {
				199	if t.script == _Qaai {
				200	changed = true
				201	t.script = _Zinh
				202	}
				203	}
				204	if c&DeprecatedRegion != 0 {
				205	if r := normRegion(t.region); r != 0 {
				206	changed = true
				207	t.region = r
				208	}
				209	}
				210	return t, changed
				211	}
				212
				213	// Canonicalize returns the canonicalized equivalent of the tag.
				214	func (c CanonType) Canonicalize(t Tag) (Tag, error) {
				215	t, changed := t.canonicalize(c)
				216	if changed {
				217	t.remakeString()
				218	}
				219	return t, nil
				220	}
				221
				222	// Confidence indicates the level of certainty for a given return value.
				223	// For example, Serbian may be written in Cyrillic or Latin script.
				224	// The confidence level indicates whether a value was explicitly specified,
				225	// whether it is typically the only possible value, or whether there is
				226	// an ambiguity.
				227	type Confidence int
				228
				229	const (
				230	No Confidence = iota // full confidence that there was no match
				231	Low // most likely value picked out of a set of alternatives
				232	High // value is generally assumed to be the correct match
				233	Exact // exact match or explicitly specified value
				234	)
				235
				236	var confName = []string{"No", "Low", "High", "Exact"}
				237
				238	func (c Confidence) String() string {
				239	return confName[c]
				240	}
				241
				242	// remakeString is used to update t.str in case lang, script or region changed.
				243	// It is assumed that pExt and pVariant still point to the start of the
				244	// respective parts.
				245	func (t *Tag) remakeString() {
				246	if t.str == "" {
				247	return
				248	}
				249	extra := t.str[t.pVariant:]
				250	if t.pVariant > 0 {
				251	extra = extra[1:]
				252	}
				253	if t.equalTags(und) && strings.HasPrefix(extra, "x-") {
				254	t.str = extra
				255	t.pVariant = 0
				256	t.pExt = 0
				257	return
				258	}
				259	var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
				260	b := buf[:t.genCoreBytes(buf[:])]
				261	if extra != "" {
				262	diff := len(b) - int(t.pVariant)
				263	b = append(b, '-')
				264	b = append(b, extra...)
				265	t.pVariant = uint8(int(t.pVariant) + diff)
				266	t.pExt = uint16(int(t.pExt) + diff)
				267	} else {
				268	t.pVariant = uint8(len(b))
				269	t.pExt = uint16(len(b))
				270	}
				271	t.str = string(b)
				272	}
				273
				274	// genCoreBytes writes a string for the base languages, script and region tags
				275	// to the given buffer and returns the number of bytes written. It will never
				276	// write more than maxCoreSize bytes.
				277	func (t *Tag) genCoreBytes(buf []byte) int {
				278	n := t.lang.stringToBuf(buf[:])
				279	if t.script != 0 {
				280	n += copy(buf[n:], "-")
				281	n += copy(buf[n:], t.script.String())
				282	}
				283	if t.region != 0 {
				284	n += copy(buf[n:], "-")
				285	n += copy(buf[n:], t.region.String())
				286	}
				287	return n
				288	}
				289
				290	// String returns the canonical string representation of the language tag.
				291	func (t Tag) String() string {
				292	if t.str != "" {
				293	return t.str
				294	}
				295	if t.script == 0 && t.region == 0 {
				296	return t.lang.String()
				297	}
				298	buf := [maxCoreSize]byte{}
				299	return string(buf[:t.genCoreBytes(buf[:])])
				300	}
				301
				302	// MarshalText implements encoding.TextMarshaler.
				303	func (t Tag) MarshalText() (text []byte, err error) {
				304	if t.str != "" {
				305	text = append(text, t.str...)
				306	} else if t.script == 0 && t.region == 0 {
				307	text = append(text, t.lang.String()...)
				308	} else {
				309	buf := [maxCoreSize]byte{}
				310	text = buf[:t.genCoreBytes(buf[:])]
				311	}
				312	return text, nil
				313	}
				314
				315	// UnmarshalText implements encoding.TextUnmarshaler.
				316	func (t *Tag) UnmarshalText(text []byte) error {
				317	tag, err := Raw.Parse(string(text))
				318	*t = tag
				319	return err
				320	}
				321
				322	// Base returns the base language of the language tag. If the base language is
				323	// unspecified, an attempt will be made to infer it from the context.
				324	// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
				325	func (t Tag) Base() (Base, Confidence) {
				326	if t.lang != 0 {
				327	return Base{t.lang}, Exact
				328	}
				329	c := High
				330	if t.script == 0 && !(Region{t.region}).IsCountry() {
				331	c = Low
				332	}
				333	if tag, err := addTags(t); err == nil && tag.lang != 0 {
				334	return Base{tag.lang}, c
				335	}
				336	return Base{0}, No
				337	}
				338
				339	// Script infers the script for the language tag. If it was not explicitly given, it will infer
				340	// a most likely candidate.
				341	// If more than one script is commonly used for a language, the most likely one
				342	// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
				343	// for Serbian.
				344	// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
				345	// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
				346	// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
				347	// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
				348	// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
				349	// Note that an inferred script is never guaranteed to be the correct one. Latin is
				350	// almost exclusively used for Afrikaans, but Arabic has been used for some texts
				351	// in the past. Also, the script that is commonly used may change over time.
				352	// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
				353	func (t Tag) Script() (Script, Confidence) {
				354	if t.script != 0 {
				355	return Script{t.script}, Exact
				356	}
				357	sc, c := scriptID(_Zzzz), No
				358	if t.lang < langNoIndexOffset {
				359	if scr := scriptID(suppressScript[t.lang]); scr != 0 {
				360	// Note: it is not always the case that a language with a suppress
				361	// script value is only written in one script (e.g. kk, ms, pa).
				362	if t.region == 0 {
				363	return Script{scriptID(scr)}, High
				364	}
				365	sc, c = scr, High
				366	}
				367	}
				368	if tag, err := addTags(t); err == nil {
				369	if tag.script != sc {
				370	sc, c = tag.script, Low
				371	}
				372	} else {
				373	t, _ = (Deprecated \| Macro).Canonicalize(t)
				374	if tag, err := addTags(t); err == nil && tag.script != sc {
				375	sc, c = tag.script, Low
				376	}
				377	}
				378	return Script{sc}, c
				379	}
				380
				381	// Region returns the region for the language tag. If it was not explicitly given, it will
				382	// infer a most likely candidate from the context.
				383	// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
				384	func (t Tag) Region() (Region, Confidence) {
				385	if t.region != 0 {
				386	return Region{t.region}, Exact
				387	}
				388	if t, err := addTags(t); err == nil {
				389	return Region{t.region}, Low // TODO: differentiate between high and low.
				390	}
				391	t, _ = (Deprecated \| Macro).Canonicalize(t)
				392	if tag, err := addTags(t); err == nil {
				393	return Region{tag.region}, Low
				394	}
				395	return Region{_ZZ}, No // TODO: return world instead of undetermined?
				396	}
				397
				398	// Variant returns the variants specified explicitly for this language tag.
				399	// or nil if no variant was specified.
				400	func (t Tag) Variants() []Variant {
				401	v := []Variant{}
				402	if int(t.pVariant) < int(t.pExt) {
				403	for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; {
				404	x, str = nextToken(str)
				405	v = append(v, Variant{x})
				406	}
				407	}
				408	return v
				409	}
				410
				411	// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
				412	// specific language are substituted with fields from the parent language.
				413	// The parent for a language may change for newer versions of CLDR.
				414	func (t Tag) Parent() Tag {
				415	if t.str != "" {
				416	// Strip the variants and extensions.
				417	t, _ = Raw.Compose(t.Raw())
				418	if t.region == 0 && t.script != 0 && t.lang != 0 {
				419	base, _ := addTags(Tag{lang: t.lang})
				420	if base.script == t.script {
				421	return Tag{lang: t.lang}
				422	}
				423	}
				424	return t
				425	}
				426	if t.lang != 0 {
				427	if t.region != 0 {
				428	maxScript := t.script
				429	if maxScript == 0 {
				430	max, _ := addTags(t)
				431	maxScript = max.script
				432	}
				433
				434	for i := range parents {
				435	if langID(parents[i].lang) == t.lang && scriptID(parents[i].maxScript) == maxScript {
				436	for _, r := range parents[i].fromRegion {
				437	if regionID(r) == t.region {
				438	return Tag{
				439	lang: t.lang,
				440	script: scriptID(parents[i].script),
				441	region: regionID(parents[i].toRegion),
				442	}
				443	}
				444	}
				445	}
				446	}
				447
				448	// Strip the script if it is the default one.
				449	base, _ := addTags(Tag{lang: t.lang})
				450	if base.script != maxScript {
				451	return Tag{lang: t.lang, script: maxScript}
				452	}
				453	return Tag{lang: t.lang}
				454	} else if t.script != 0 {
				455	// The parent for an base-script pair with a non-default script is
				456	// "und" instead of the base language.
				457	base, _ := addTags(Tag{lang: t.lang})
				458	if base.script != t.script {
				459	return und
				460	}
				461	return Tag{lang: t.lang}
				462	}
				463	}
				464	return und
				465	}
				466
				467	// returns token t and the rest of the string.
				468	func nextToken(s string) (t, tail string) {
				469	p := strings.Index(s[1:], "-")
				470	if p == -1 {
				471	return s[1:], ""
				472	}
				473	p++
				474	return s[1:p], s[p:]
				475	}
				476
				477	// Extension is a single BCP 47 extension.
				478	type Extension struct {
				479	s string
				480	}
				481
				482	// String returns the string representation of the extension, including the
				483	// type tag.
				484	func (e Extension) String() string {
				485	return e.s
				486	}
				487
				488	// ParseExtension parses s as an extension and returns it on success.
				489	func ParseExtension(s string) (e Extension, err error) {
				490	scan := makeScannerString(s)
				491	var end int
				492	if n := len(scan.token); n != 1 {
				493	return Extension{}, errSyntax
				494	}
				495	scan.toLower(0, len(scan.b))
				496	end = parseExtension(&scan)
				497	if end != len(s) {
				498	return Extension{}, errSyntax
				499	}
				500	return Extension{string(scan.b)}, nil
				501	}
				502
				503	// Type returns the one-byte extension type of e. It returns 0 for the zero
				504	// exception.
				505	func (e Extension) Type() byte {
				506	if e.s == "" {
				507	return 0
				508	}
				509	return e.s[0]
				510	}
				511
				512	// Tokens returns the list of tokens of e.
				513	func (e Extension) Tokens() []string {
				514	return strings.Split(e.s, "-")
				515	}
				516
				517	// Extension returns the extension of type x for tag t. It will return
				518	// false for ok if t does not have the requested extension. The returned
				519	// extension will be invalid in this case.
				520	func (t Tag) Extension(x byte) (ext Extension, ok bool) {
				521	for i := int(t.pExt); i < len(t.str)-1; {
				522	var ext string
				523	i, ext = getExtension(t.str, i)
				524	if ext[0] == x {
				525	return Extension{ext}, true
				526	}
				527	}
				528	return Extension{}, false
				529	}
				530
				531	// Extensions returns all extensions of t.
				532	func (t Tag) Extensions() []Extension {
				533	e := []Extension{}
				534	for i := int(t.pExt); i < len(t.str)-1; {
				535	var ext string
				536	i, ext = getExtension(t.str, i)
				537	e = append(e, Extension{ext})
				538	}
				539	return e
				540	}
				541
				542	// TypeForKey returns the type associated with the given key, where key and type
				543	// are of the allowed values defined for the Unicode locale extension ('u') in
				544	// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
				545	// TypeForKey will traverse the inheritance chain to get the correct value.
				546	func (t Tag) TypeForKey(key string) string {
				547	if start, end, _ := t.findTypeForKey(key); end != start {
				548	return t.str[start:end]
				549	}
				550	return ""
				551	}
				552
				553	var (
				554	errPrivateUse = errors.New("cannot set a key on a private use tag")
				555	errInvalidArguments = errors.New("invalid key or type")
				556	)
				557
				558	// SetTypeForKey returns a new Tag with the key set to type, where key and type
				559	// are of the allowed values defined for the Unicode locale extension ('u') in
				560	// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
				561	// An empty value removes an existing pair with the same key.
				562	func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
				563	if t.private() {
				564	return t, errPrivateUse
				565	}
				566	if len(key) != 2 {
				567	return t, errInvalidArguments
				568	}
				569
				570	// Remove the setting if value is "".
				571	if value == "" {
				572	start, end, _ := t.findTypeForKey(key)
				573	if start != end {
				574	// Remove key tag and leading '-'.
				575	start -= 4
				576
				577	// Remove a possible empty extension.
				578	if (end == len(t.str) \|\| t.str[end+2] == '-') && t.str[start-2] == '-' {
				579	start -= 2
				580	}
				581	if start == int(t.pVariant) && end == len(t.str) {
				582	t.str = ""
				583	t.pVariant, t.pExt = 0, 0
				584	} else {
				585	t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
				586	}
				587	}
				588	return t, nil
				589	}
				590
				591	if len(value) < 3 \|\| len(value) > 8 {
				592	return t, errInvalidArguments
				593	}
				594
				595	var (
				596	buf [maxCoreSize + maxSimpleUExtensionSize]byte
				597	uStart int // start of the -u extension.
				598	)
				599
				600	// Generate the tag string if needed.
				601	if t.str == "" {
				602	uStart = t.genCoreBytes(buf[:])
				603	buf[uStart] = '-'
				604	uStart++
				605	}
				606
				607	// Create new key-type pair and parse it to verify.
				608	b := buf[uStart:]
				609	copy(b, "u-")
				610	copy(b[2:], key)
				611	b[4] = '-'
				612	b = b[:5+copy(b[5:], value)]
				613	scan := makeScanner(b)
				614	if parseExtensions(&scan); scan.err != nil {
				615	return t, scan.err
				616	}
				617
				618	// Assemble the replacement string.
				619	if t.str == "" {
				620	t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
				621	t.str = string(buf[:uStart+len(b)])
				622	} else {
				623	s := t.str
				624	start, end, hasExt := t.findTypeForKey(key)
				625	if start == end {
				626	if hasExt {
				627	b = b[2:]
				628	}
				629	t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
				630	} else {
				631	t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
				632	}
				633	}
				634	return t, nil
				635	}
				636
				637	// findKeyAndType returns the start and end position for the type corresponding
				638	// to key or the point at which to insert the key-value pair if the type
				639	// wasn't found. The hasExt return value reports whether an -u extension was present.
				640	// Note: the extensions are typically very small and are likely to contain
				641	// only one key-type pair.
				642	func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
				643	p := int(t.pExt)
				644	if len(key) != 2 \|\| p == len(t.str) \|\| p == 0 {
				645	return p, p, false
				646	}
				647	s := t.str
				648
				649	// Find the correct extension.
				650	for p++; s[p] != 'u'; p++ {
				651	if s[p] > 'u' {
				652	p--
				653	return p, p, false
				654	}
				655	if p = nextExtension(s, p); p == len(s) {
				656	return len(s), len(s), false
				657	}
				658	}
				659	// Proceed to the hyphen following the extension name.
				660	p++
				661
				662	// curKey is the key currently being processed.
				663	curKey := ""
				664
				665	// Iterate over keys until we get the end of a section.
				666	for {
				667	// p points to the hyphen preceding the current token.
				668	if p3 := p + 3; s[p3] == '-' {
				669	// Found a key.
				670	// Check whether we just processed the key that was requested.
				671	if curKey == key {
				672	return start, p, true
				673	}
				674	// Set to the next key and continue scanning type tokens.
				675	curKey = s[p+1 : p3]
				676	if curKey > key {
				677	return p, p, true
				678	}
				679	// Start of the type token sequence.
				680	start = p + 4
				681	// A type is at least 3 characters long.
				682	p += 7 // 4 + 3
				683	} else {
				684	// Attribute or type, which is at least 3 characters long.
				685	p += 4
				686	}
				687	// p points past the third character of a type or attribute.
				688	max := p + 5 // maximum length of token plus hyphen.
				689	if len(s) < max {
				690	max = len(s)
				691	}
				692	for ; p < max && s[p] != '-'; p++ {
				693	}
				694	// Bail if we have exhausted all tokens or if the next token starts
				695	// a new extension.
				696	if p == len(s) \|\| s[p+2] == '-' {
				697	if curKey == key {
				698	return start, p, true
				699	}
				700	return p, p, true
				701	}
				702	}
				703	}
				704
				705	// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
				706	// for which data exists in the text repository. The index will change over time
				707	// and should not be stored in persistent storage. Extensions, except for the
				708	// 'va' type of the 'u' extension, are ignored. It will return 0, false if no
				709	// compact tag exists, where 0 is the index for the root language (Und).
				710	func CompactIndex(t Tag) (index int, ok bool) {
				711	// TODO: perhaps give more frequent tags a lower index.
				712	// TODO: we could make the indexes stable. This will excluded some
				713	// possibilities for optimization, so don't do this quite yet.
				714	b, s, r := t.Raw()
				715	if len(t.str) > 0 {
				716	if strings.HasPrefix(t.str, "x-") {
				717	// We have no entries for user-defined tags.
				718	return 0, false
				719	}
				720	if uint16(t.pVariant) != t.pExt {
				721	// There are no tags with variants and an u-va type.
				722	if t.TypeForKey("va") != "" {
				723	return 0, false
				724	}
				725	t, _ = Raw.Compose(b, s, r, t.Variants())
				726	} else if _, ok := t.Extension('u'); ok {
				727	// Strip all but the 'va' entry.
				728	variant := t.TypeForKey("va")
				729	t, _ = Raw.Compose(b, s, r)
				730	t, _ = t.SetTypeForKey("va", variant)
				731	}
				732	if len(t.str) > 0 {
				733	// We have some variants.
				734	for i, s := range specialTags {
				735	if s == t {
				736	return i + 1, true
				737	}
				738	}
				739	return 0, false
				740	}
				741	}
				742	// No variants specified: just compare core components.
				743	// The key has the form lllssrrr, where l, s, and r are nibbles for
				744	// respectively the langID, scriptID, and regionID.
				745	key := uint32(b.langID) << (8 + 12)
				746	key \|= uint32(s.scriptID) << 12
				747	key \|= uint32(r.regionID)
				748	x, ok := coreTags[key]
				749	return int(x), ok
				750	}
				751
				752	// Base is an ISO 639 language code, used for encoding the base language
				753	// of a language tag.
				754	type Base struct {
				755	langID
				756	}
				757
				758	// ParseBase parses a 2- or 3-letter ISO 639 code.
				759	// It returns a ValueError if s is a well-formed but unknown language identifier
				760	// or another error if another error occurred.
				761	func ParseBase(s string) (Base, error) {
				762	if n := len(s); n < 2 \|\| 3 < n {
				763	return Base{}, errSyntax
				764	}
				765	var buf [3]byte
				766	l, err := getLangID(buf[:copy(buf[:], s)])
				767	return Base{l}, err
				768	}
				769
				770	// Script is a 4-letter ISO 15924 code for representing scripts.
				771	// It is idiomatically represented in title case.
				772	type Script struct {
				773	scriptID
				774	}
				775
				776	// ParseScript parses a 4-letter ISO 15924 code.
				777	// It returns a ValueError if s is a well-formed but unknown script identifier
				778	// or another error if another error occurred.
				779	func ParseScript(s string) (Script, error) {
				780	if len(s) != 4 {
				781	return Script{}, errSyntax
				782	}
				783	var buf [4]byte
				784	sc, err := getScriptID(script, buf[:copy(buf[:], s)])
				785	return Script{sc}, err
				786	}
				787
				788	// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
				789	type Region struct {
				790	regionID
				791	}
				792
				793	// EncodeM49 returns the Region for the given UN M.49 code.
				794	// It returns an error if r is not a valid code.
				795	func EncodeM49(r int) (Region, error) {
				796	rid, err := getRegionM49(r)
				797	return Region{rid}, err
				798	}
				799
				800	// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
				801	// It returns a ValueError if s is a well-formed but unknown region identifier
				802	// or another error if another error occurred.
				803	func ParseRegion(s string) (Region, error) {
				804	if n := len(s); n < 2 \|\| 3 < n {
				805	return Region{}, errSyntax
				806	}
				807	var buf [3]byte
				808	r, err := getRegionID(buf[:copy(buf[:], s)])
				809	return Region{r}, err
				810	}
				811
				812	// IsCountry returns whether this region is a country or autonomous area. This
				813	// includes non-standard definitions from CLDR.
				814	func (r Region) IsCountry() bool {
				815	if r.regionID == 0 \|\| r.IsGroup() \|\| r.IsPrivateUse() && r.regionID != _XK {
				816	return false
				817	}
				818	return true
				819	}
				820
				821	// IsGroup returns whether this region defines a collection of regions. This
				822	// includes non-standard definitions from CLDR.
				823	func (r Region) IsGroup() bool {
				824	if r.regionID == 0 {
				825	return false
				826	}
				827	return int(regionInclusion[r.regionID]) < len(regionContainment)
				828	}
				829
				830	// Contains returns whether Region c is contained by Region r. It returns true
				831	// if c == r.
				832	func (r Region) Contains(c Region) bool {
				833	return r.regionID.contains(c.regionID)
				834	}
				835
				836	func (r regionID) contains(c regionID) bool {
				837	if r == c {
				838	return true
				839	}
				840	g := regionInclusion[r]
				841	if g >= nRegionGroups {
				842	return false
				843	}
				844	m := regionContainment[g]
				845
				846	d := regionInclusion[c]
				847	b := regionInclusionBits[d]
				848
				849	// A contained country may belong to multiple disjoint groups. Matching any
				850	// of these indicates containment. If the contained region is a group, it
				851	// must strictly be a subset.
				852	if d >= nRegionGroups {
				853	return b&m != 0
				854	}
				855	return b&^m == 0
				856	}
				857
				858	var errNoTLD = errors.New("language: region is not a valid ccTLD")
				859
				860	// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
				861	// In all other cases it returns either the region itself or an error.
				862	//
				863	// This method may return an error for a region for which there exists a
				864	// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
				865	// region will already be canonicalized it was obtained from a Tag that was
				866	// obtained using any of the default methods.
				867	func (r Region) TLD() (Region, error) {
				868	// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
				869	// difference between ISO 3166-1 and IANA ccTLD.
				870	if r.regionID == _GB {
				871	r = Region{_UK}
				872	}
				873	if (r.typ() & ccTLD) == 0 {
				874	return Region{}, errNoTLD
				875	}
				876	return r, nil
				877	}
				878
				879	// Canonicalize returns the region or a possible replacement if the region is
				880	// deprecated. It will not return a replacement for deprecated regions that
				881	// are split into multiple regions.
				882	func (r Region) Canonicalize() Region {
				883	if cr := normRegion(r.regionID); cr != 0 {
				884	return Region{cr}
				885	}
				886	return r
				887	}
				888
				889	// Variant represents a registered variant of a language as defined by BCP 47.
				890	type Variant struct {
				891	variant string
				892	}
				893
				894	// ParseVariant parses and returns a Variant. An error is returned if s is not
				895	// a valid variant.
				896	func ParseVariant(s string) (Variant, error) {
				897	s = strings.ToLower(s)
				898	if _, ok := variantIndex[s]; ok {
				899	return Variant{s}, nil
				900	}
				901	return Variant{}, mkErrInvalid([]byte(s))
				902	}
				903
				904	// String returns the string representation of the variant.
				905	func (v Variant) String() string {
				906	return v.variant
				907	}