Blame - vendor/golang.org/x/text/unicode/cldr/collate.go - ofagent-go

blob: 27c5bac9aa74093b0b68aeb887b0d43c354717d8 [file] [log] [blame]

Don Newton	98fd881	2019-09-23 15:15:02 -0400	[diff] [blame^]	1	// Copyright 2013 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package cldr
				6
				7	import (
				8	"bufio"
				9	"encoding/xml"
				10	"errors"
				11	"fmt"
				12	"strconv"
				13	"strings"
				14	"unicode"
				15	"unicode/utf8"
				16	)
				17
				18	// RuleProcessor can be passed to Collator's Process method, which
				19	// parses the rules and calls the respective method for each rule found.
				20	type RuleProcessor interface {
				21	Reset(anchor string, before int) error
				22	Insert(level int, str, context, extend string) error
				23	Index(id string)
				24	}
				25
				26	const (
				27	// cldrIndex is a Unicode-reserved sentinel value used to mark the start
				28	// of a grouping within an index.
				29	// We ignore any rule that starts with this rune.
				30	// See https://unicode.org/reports/tr35/#Collation_Elements for details.
				31	cldrIndex = "\uFDD0"
				32
				33	// specialAnchor is the format in which to represent logical reset positions,
				34	// such as "first tertiary ignorable".
				35	specialAnchor = "<%s/>"
				36	)
				37
				38	// Process parses the rules for the tailorings of this collation
				39	// and calls the respective methods of p for each rule found.
				40	func (c Collation) Process(p RuleProcessor) (err error) {
				41	if len(c.Cr) > 0 {
				42	if len(c.Cr) > 1 {
				43	return fmt.Errorf("multiple cr elements, want 0 or 1")
				44	}
				45	return processRules(p, c.Cr[0].Data())
				46	}
				47	if c.Rules.Any != nil {
				48	return c.processXML(p)
				49	}
				50	return errors.New("no tailoring data")
				51	}
				52
				53	// processRules parses rules in the Collation Rule Syntax defined in
				54	// https://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.
				55	func processRules(p RuleProcessor, s string) (err error) {
				56	chk := func(s string, e error) string {
				57	if err == nil {
				58	err = e
				59	}
				60	return s
				61	}
				62	i := 0 // Save the line number for use after the loop.
				63	scanner := bufio.NewScanner(strings.NewReader(s))
				64	for ; scanner.Scan() && err == nil; i++ {
				65	for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
				66	level := 5
				67	var ch byte
				68	switch ch, s = s[0], s[1:]; ch {
				69	case '&': // followed by <anchor> or '[' <key> ']'
				70	if s = skipSpace(s); consume(&s, '[') {
				71	s = chk(parseSpecialAnchor(p, s))
				72	} else {
				73	s = chk(parseAnchor(p, 0, s))
				74	}
				75	case '<': // sort relation '<'{1,4}, optionally followed by '*'.
				76	for level = 1; consume(&s, '<'); level++ {
				77	}
				78	if level > 4 {
				79	err = fmt.Errorf("level %d > 4", level)
				80	}
				81	fallthrough
				82	case '=': // identity relation, optionally followed by *.
				83	if consume(&s, '*') {
				84	s = chk(parseSequence(p, level, s))
				85	} else {
				86	s = chk(parseOrder(p, level, s))
				87	}
				88	default:
				89	chk("", fmt.Errorf("illegal operator %q", ch))
				90	break
				91	}
				92	}
				93	}
				94	if chk("", scanner.Err()); err != nil {
				95	return fmt.Errorf("%d: %v", i, err)
				96	}
				97	return nil
				98	}
				99
				100	// parseSpecialAnchor parses the anchor syntax which is either of the form
				101	// ['before' <level>] <anchor>
				102	// or
				103	// [<label>]
				104	// The starting should already be consumed.
				105	func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
				106	i := strings.IndexByte(s, ']')
				107	if i == -1 {
				108	return "", errors.New("unmatched bracket")
				109	}
				110	a := strings.TrimSpace(s[:i])
				111	s = s[i+1:]
				112	if strings.HasPrefix(a, "before ") {
				113	l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
				114	if err != nil {
				115	return s, err
				116	}
				117	return parseAnchor(p, int(l), s)
				118	}
				119	return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
				120	}
				121
				122	func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
				123	anchor, s, err := scanString(s)
				124	if err != nil {
				125	return s, err
				126	}
				127	return s, p.Reset(anchor, level)
				128	}
				129
				130	func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
				131	var value, context, extend string
				132	if value, s, err = scanString(s); err != nil {
				133	return s, err
				134	}
				135	if strings.HasPrefix(value, cldrIndex) {
				136	p.Index(value[len(cldrIndex):])
				137	return
				138	}
				139	if consume(&s, '\|') {
				140	if context, s, err = scanString(s); err != nil {
				141	return s, errors.New("missing string after context")
				142	}
				143	}
				144	if consume(&s, '/') {
				145	if extend, s, err = scanString(s); err != nil {
				146	return s, errors.New("missing string after extension")
				147	}
				148	}
				149	return s, p.Insert(level, value, context, extend)
				150	}
				151
				152	// scanString scans a single input string.
				153	func scanString(s string) (str, tail string, err error) {
				154	if s = skipSpace(s); s == "" {
				155	return s, s, errors.New("missing string")
				156	}
				157	buf := [16]byte{} // small but enough to hold most cases.
				158	value := buf[:0]
				159	for s != "" {
				160	if consume(&s, '\'') {
				161	i := strings.IndexByte(s, '\'')
				162	if i == -1 {
				163	return "", "", errors.New(`unmatched single quote`)
				164	}
				165	if i == 0 {
				166	value = append(value, '\'')
				167	} else {
				168	value = append(value, s[:i]...)
				169	}
				170	s = s[i+1:]
				171	continue
				172	}
				173	r, sz := utf8.DecodeRuneInString(s)
				174	if unicode.IsSpace(r) \|\| strings.ContainsRune("&<=#", r) {
				175	break
				176	}
				177	value = append(value, s[:sz]...)
				178	s = s[sz:]
				179	}
				180	return string(value), skipSpace(s), nil
				181	}
				182
				183	func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
				184	if s = skipSpace(s); s == "" {
				185	return s, errors.New("empty sequence")
				186	}
				187	last := rune(0)
				188	for s != "" {
				189	r, sz := utf8.DecodeRuneInString(s)
				190	s = s[sz:]
				191
				192	if r == '-' {
				193	// We have a range. The first element was already written.
				194	if last == 0 {
				195	return s, errors.New("range without starter value")
				196	}
				197	r, sz = utf8.DecodeRuneInString(s)
				198	s = s[sz:]
				199	if r == utf8.RuneError \|\| r < last {
				200	return s, fmt.Errorf("invalid range %q-%q", last, r)
				201	}
				202	for i := last + 1; i <= r; i++ {
				203	if err := p.Insert(level, string(i), "", ""); err != nil {
				204	return s, err
				205	}
				206	}
				207	last = 0
				208	continue
				209	}
				210
				211	if unicode.IsSpace(r) \|\| unicode.IsPunct(r) {
				212	break
				213	}
				214
				215	// normal case
				216	if err := p.Insert(level, string(r), "", ""); err != nil {
				217	return s, err
				218	}
				219	last = r
				220	}
				221	return s, nil
				222	}
				223
				224	func skipSpace(s string) string {
				225	return strings.TrimLeftFunc(s, unicode.IsSpace)
				226	}
				227
				228	// consumes returns whether the next byte is ch. If so, it gobbles it by
				229	// updating s.
				230	func consume(s *string, ch byte) (ok bool) {
				231	if s == "" \|\| (s)[0] != ch {
				232	return false
				233	}
				234	s = (s)[1:]
				235	return true
				236	}
				237
				238	// The following code parses Collation rules of CLDR version 24 and before.
				239
				240	var lmap = map[byte]int{
				241	'p': 1,
				242	's': 2,
				243	't': 3,
				244	'i': 5,
				245	}
				246
				247	type rulesElem struct {
				248	Rules struct {
				249	Common
				250	Any []*struct {
				251	XMLName xml.Name
				252	rule
				253	} `xml:",any"`
				254	} `xml:"rules"`
				255	}
				256
				257	type rule struct {
				258	Value string `xml:",chardata"`
				259	Before string `xml:"before,attr"`
				260	Any []*struct {
				261	XMLName xml.Name
				262	rule
				263	} `xml:",any"`
				264	}
				265
				266	var emptyValueError = errors.New("cldr: empty rule value")
				267
				268	func (r *rule) value() (string, error) {
				269	// Convert hexadecimal Unicode codepoint notation to a string.
				270	s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
				271	r.Value = s
				272	if s == "" {
				273	if len(r.Any) != 1 {
				274	return "", emptyValueError
				275	}
				276	r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
				277	r.Any = nil
				278	} else if len(r.Any) != 0 {
				279	return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
				280	}
				281	return r.Value, nil
				282	}
				283
				284	func (r rule) process(p RuleProcessor, name, context, extend string) error {
				285	v, err := r.value()
				286	if err != nil {
				287	return err
				288	}
				289	switch name {
				290	case "p", "s", "t", "i":
				291	if strings.HasPrefix(v, cldrIndex) {
				292	p.Index(v[len(cldrIndex):])
				293	return nil
				294	}
				295	if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
				296	return err
				297	}
				298	case "pc", "sc", "tc", "ic":
				299	level := lmap[name[0]]
				300	for _, s := range v {
				301	if err := p.Insert(level, string(s), context, extend); err != nil {
				302	return err
				303	}
				304	}
				305	default:
				306	return fmt.Errorf("cldr: unsupported tag: %q", name)
				307	}
				308	return nil
				309	}
				310
				311	// processXML parses the format of CLDR versions 24 and older.
				312	func (c Collation) processXML(p RuleProcessor) (err error) {
				313	// Collation is generated and defined in xml.go.
				314	var v string
				315	for _, r := range c.Rules.Any {
				316	switch r.XMLName.Local {
				317	case "reset":
				318	level := 0
				319	switch r.Before {
				320	case "primary", "1":
				321	level = 1
				322	case "secondary", "2":
				323	level = 2
				324	case "tertiary", "3":
				325	level = 3
				326	case "":
				327	default:
				328	return fmt.Errorf("cldr: unknown level %q", r.Before)
				329	}
				330	v, err = r.value()
				331	if err == nil {
				332	err = p.Reset(v, level)
				333	}
				334	case "x":
				335	var context, extend string
				336	for _, r1 := range r.Any {
				337	v, err = r1.value()
				338	switch r1.XMLName.Local {
				339	case "context":
				340	context = v
				341	case "extend":
				342	extend = v
				343	}
				344	}
				345	for _, r1 := range r.Any {
				346	if t := r1.XMLName.Local; t == "context" \|\| t == "extend" {
				347	continue
				348	}
				349	r1.rule.process(p, r1.XMLName.Local, context, extend)
				350	}
				351	default:
				352	err = r.rule.process(p, r.XMLName.Local, "", "")
				353	}
				354	if err != nil {
				355	return err
				356	}
				357	}
				358	return nil
				359	}