Blame - vendor/golang.org/x/text/internal/ucd/ucd.go - ofagent-go

blob: 0879bc84c87a2018a2eba7d08d95530d0ab8bde8 [file] [log] [blame]

Don Newton	98fd881	2019-09-23 15:15:02 -0400	[diff] [blame^]	1	// Copyright 2014 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// Package ucd provides a parser for Unicode Character Database files, the
				6	// format of which is defined in https://www.unicode.org/reports/tr44/. See
				7	// https://www.unicode.org/Public/UCD/latest/ucd/ for example files.
				8	//
				9	// It currently does not support substitutions of missing fields.
				10	package ucd // import "golang.org/x/text/internal/ucd"
				11
				12	import (
				13	"bufio"
				14	"errors"
				15	"fmt"
				16	"io"
				17	"log"
				18	"regexp"
				19	"strconv"
				20	"strings"
				21	)
				22
				23	// UnicodeData.txt fields.
				24	const (
				25	CodePoint = iota
				26	Name
				27	GeneralCategory
				28	CanonicalCombiningClass
				29	BidiClass
				30	DecompMapping
				31	DecimalValue
				32	DigitValue
				33	NumericValue
				34	BidiMirrored
				35	Unicode1Name
				36	ISOComment
				37	SimpleUppercaseMapping
				38	SimpleLowercaseMapping
				39	SimpleTitlecaseMapping
				40	)
				41
				42	// Parse calls f for each entry in the given reader of a UCD file. It will close
				43	// the reader upon return. It will call log.Fatal if any error occurred.
				44	//
				45	// This implements the most common usage pattern of using Parser.
				46	func Parse(r io.ReadCloser, f func(p *Parser)) {
				47	defer r.Close()
				48
				49	p := New(r)
				50	for p.Next() {
				51	f(p)
				52	}
				53	if err := p.Err(); err != nil {
				54	r.Close() // os.Exit will cause defers not to be called.
				55	log.Fatal(err)
				56	}
				57	}
				58
				59	// An Option is used to configure a Parser.
				60	type Option func(p *Parser)
				61
				62	func keepRanges(p *Parser) {
				63	p.keepRanges = true
				64	}
				65
				66	var (
				67	// KeepRanges prevents the expansion of ranges. The raw ranges can be
				68	// obtained by calling Range(0) on the parser.
				69	KeepRanges Option = keepRanges
				70	)
				71
				72	// The Part option register a handler for lines starting with a '@'. The text
				73	// after a '@' is available as the first field. Comments are handled as usual.
				74	func Part(f func(p *Parser)) Option {
				75	return func(p *Parser) {
				76	p.partHandler = f
				77	}
				78	}
				79
				80	// The CommentHandler option passes comments that are on a line by itself to
				81	// a given handler.
				82	func CommentHandler(f func(s string)) Option {
				83	return func(p *Parser) {
				84	p.commentHandler = f
				85	}
				86	}
				87
				88	// A Parser parses Unicode Character Database (UCD) files.
				89	type Parser struct {
				90	scanner *bufio.Scanner
				91
				92	keepRanges bool // Don't expand rune ranges in field 0.
				93
				94	err error
				95	comment string
				96	field []string
				97	// parsedRange is needed in case Range(0) is called more than once for one
				98	// field. In some cases this requires scanning ahead.
				99	line int
				100	parsedRange bool
				101	rangeStart, rangeEnd rune
				102
				103	partHandler func(p *Parser)
				104	commentHandler func(s string)
				105	}
				106
				107	func (p *Parser) setError(err error, msg string) {
				108	if p.err == nil && err != nil {
				109	if msg == "" {
				110	p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
				111	} else {
				112	p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
				113	}
				114	}
				115	}
				116
				117	func (p *Parser) getField(i int) string {
				118	if i >= len(p.field) {
				119	return ""
				120	}
				121	return p.field[i]
				122	}
				123
				124	// Err returns a non-nil error if any error occurred during parsing.
				125	func (p *Parser) Err() error {
				126	return p.err
				127	}
				128
				129	// New returns a Parser for the given Reader.
				130	func New(r io.Reader, o ...Option) *Parser {
				131	p := &Parser{
				132	scanner: bufio.NewScanner(r),
				133	}
				134	for _, f := range o {
				135	f(p)
				136	}
				137	return p
				138	}
				139
				140	// Next parses the next line in the file. It returns true if a line was parsed
				141	// and false if it reached the end of the file.
				142	func (p *Parser) Next() bool {
				143	if !p.keepRanges && p.rangeStart < p.rangeEnd {
				144	p.rangeStart++
				145	return true
				146	}
				147	p.comment = ""
				148	p.field = p.field[:0]
				149	p.parsedRange = false
				150
				151	for p.scanner.Scan() && p.err == nil {
				152	p.line++
				153	s := p.scanner.Text()
				154	if s == "" {
				155	continue
				156	}
				157	if s[0] == '#' {
				158	if p.commentHandler != nil {
				159	p.commentHandler(strings.TrimSpace(s[1:]))
				160	}
				161	continue
				162	}
				163
				164	// Parse line
				165	if i := strings.IndexByte(s, '#'); i != -1 {
				166	p.comment = strings.TrimSpace(s[i+1:])
				167	s = s[:i]
				168	}
				169	if s[0] == '@' {
				170	if p.partHandler != nil {
				171	p.field = append(p.field, strings.TrimSpace(s[1:]))
				172	p.partHandler(p)
				173	p.field = p.field[:0]
				174	}
				175	p.comment = ""
				176	continue
				177	}
				178	for {
				179	i := strings.IndexByte(s, ';')
				180	if i == -1 {
				181	p.field = append(p.field, strings.TrimSpace(s))
				182	break
				183	}
				184	p.field = append(p.field, strings.TrimSpace(s[:i]))
				185	s = s[i+1:]
				186	}
				187	if !p.keepRanges {
				188	p.rangeStart, p.rangeEnd = p.getRange(0)
				189	}
				190	return true
				191	}
				192	p.setError(p.scanner.Err(), "scanner failed")
				193	return false
				194	}
				195
				196	func parseRune(b string) (rune, error) {
				197	if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
				198	b = b[2:]
				199	}
				200	x, err := strconv.ParseUint(b, 16, 32)
				201	return rune(x), err
				202	}
				203
				204	func (p *Parser) parseRune(s string) rune {
				205	x, err := parseRune(s)
				206	p.setError(err, "failed to parse rune")
				207	return x
				208	}
				209
				210	// Rune parses and returns field i as a rune.
				211	func (p *Parser) Rune(i int) rune {
				212	if i > 0 \|\| p.keepRanges {
				213	return p.parseRune(p.getField(i))
				214	}
				215	return p.rangeStart
				216	}
				217
				218	// Runes interprets and returns field i as a sequence of runes.
				219	func (p *Parser) Runes(i int) (runes []rune) {
				220	add := func(s string) {
				221	if s = strings.TrimSpace(s); len(s) > 0 {
				222	runes = append(runes, p.parseRune(s))
				223	}
				224	}
				225	for b := p.getField(i); ; {
				226	i := strings.IndexByte(b, ' ')
				227	if i == -1 {
				228	add(b)
				229	break
				230	}
				231	add(b[:i])
				232	b = b[i+1:]
				233	}
				234	return
				235	}
				236
				237	var (
				238	errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
				239
				240	// reRange matches one line of a legacy rune range.
				241	reRange = regexp.MustCompile("^([0-9A-F]);<([^,]), ([^>])>(.)$")
				242	)
				243
				244	// Range parses and returns field i as a rune range. A range is inclusive at
				245	// both ends. If the field only has one rune, first and last will be identical.
				246	// It supports the legacy format for ranges used in UnicodeData.txt.
				247	func (p *Parser) Range(i int) (first, last rune) {
				248	if !p.keepRanges {
				249	return p.rangeStart, p.rangeStart
				250	}
				251	return p.getRange(i)
				252	}
				253
				254	func (p *Parser) getRange(i int) (first, last rune) {
				255	b := p.getField(i)
				256	if k := strings.Index(b, ".."); k != -1 {
				257	return p.parseRune(b[:k]), p.parseRune(b[k+2:])
				258	}
				259	// The first field may not be a rune, in which case we may ignore any error
				260	// and set the range as 0..0.
				261	x, err := parseRune(b)
				262	if err != nil {
				263	// Disable range parsing henceforth. This ensures that an error will be
				264	// returned if the user subsequently will try to parse this field as
				265	// a Rune.
				266	p.keepRanges = true
				267	}
				268	// Special case for UnicodeData that was retained for backwards compatibility.
				269	if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
				270	if p.parsedRange {
				271	return p.rangeStart, p.rangeEnd
				272	}
				273	mf := reRange.FindStringSubmatch(p.scanner.Text())
				274	p.line++
				275	if mf == nil \|\| !p.scanner.Scan() {
				276	p.setError(errIncorrectLegacyRange, "")
				277	return x, x
				278	}
				279	// Using Bytes would be more efficient here, but Text is a lot easier
				280	// and this is not a frequent case.
				281	ml := reRange.FindStringSubmatch(p.scanner.Text())
				282	if ml == nil \|\| mf[2] != ml[2] \|\| ml[3] != "Last" \|\| mf[4] != ml[4] {
				283	p.setError(errIncorrectLegacyRange, "")
				284	return x, x
				285	}
				286	p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
				287	p.parsedRange = true
				288	return p.rangeStart, p.rangeEnd
				289	}
				290	return x, x
				291	}
				292
				293	// bools recognizes all valid UCD boolean values.
				294	var bools = map[string]bool{
				295	"": false,
				296	"N": false,
				297	"No": false,
				298	"F": false,
				299	"False": false,
				300	"Y": true,
				301	"Yes": true,
				302	"T": true,
				303	"True": true,
				304	}
				305
				306	// Bool parses and returns field i as a boolean value.
				307	func (p *Parser) Bool(i int) bool {
				308	f := p.getField(i)
				309	for s, v := range bools {
				310	if f == s {
				311	return v
				312	}
				313	}
				314	p.setError(strconv.ErrSyntax, "error parsing bool")
				315	return false
				316	}
				317
				318	// Int parses and returns field i as an integer value.
				319	func (p *Parser) Int(i int) int {
				320	x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
				321	p.setError(err, "error parsing int")
				322	return int(x)
				323	}
				324
				325	// Uint parses and returns field i as an unsigned integer value.
				326	func (p *Parser) Uint(i int) uint {
				327	x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
				328	p.setError(err, "error parsing uint")
				329	return uint(x)
				330	}
				331
				332	// Float parses and returns field i as a decimal value.
				333	func (p *Parser) Float(i int) float64 {
				334	x, err := strconv.ParseFloat(string(p.getField(i)), 64)
				335	p.setError(err, "error parsing float")
				336	return x
				337	}
				338
				339	// String parses and returns field i as a string value.
				340	func (p *Parser) String(i int) string {
				341	return string(p.getField(i))
				342	}
				343
				344	// Strings parses and returns field i as a space-separated list of strings.
				345	func (p *Parser) Strings(i int) []string {
				346	ss := strings.Split(string(p.getField(i)), " ")
				347	for i, s := range ss {
				348	ss[i] = strings.TrimSpace(s)
				349	}
				350	return ss
				351	}
				352
				353	// Comment returns the comments for the current line.
				354	func (p *Parser) Comment() string {
				355	return string(p.comment)
				356	}
				357
				358	var errUndefinedEnum = errors.New("ucd: undefined enum value")
				359
				360	// Enum interprets and returns field i as a value that must be one of the values
				361	// in enum.
				362	func (p *Parser) Enum(i int, enum ...string) string {
				363	f := p.getField(i)
				364	for _, s := range enum {
				365	if f == s {
				366	return s
				367	}
				368	}
				369	p.setError(errUndefinedEnum, "error parsing enum")
				370	return ""
				371	}