Blame - vendor/golang.org/x/text/internal/colltab/numeric.go - ofagent-go

blob: 53b819cc3d5f36475db3796434255082d20df4cc [file] [log] [blame]

Don Newton	98fd881	2019-09-23 15:15:02 -0400	[diff] [blame^]	1	// Copyright 2014 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package colltab
				6
				7	import (
				8	"unicode"
				9	"unicode/utf8"
				10	)
				11
				12	// NewNumericWeighter wraps w to replace individual digits to sort based on their
				13	// numeric value.
				14	//
				15	// Weighter w must have a free primary weight after the primary weight for 9.
				16	// If this is not the case, numeric value will sort at the same primary level
				17	// as the first primary sorting after 9.
				18	func NewNumericWeighter(w Weighter) Weighter {
				19	getElem := func(s string) Elem {
				20	elems, _ := w.AppendNextString(nil, s)
				21	return elems[0]
				22	}
				23	nine := getElem("9")
				24
				25	// Numbers should order before zero, but the DUCET has no room for this.
				26	// TODO: move before zero once we use fractional collation elements.
				27	ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0)
				28
				29	return &numericWeighter{
				30	Weighter: w,
				31
				32	// We assume that w sorts digits of different kinds in order of numeric
				33	// value and that the tertiary weight order is preserved.
				34	//
				35	// TODO: evaluate whether it is worth basing the ranges on the Elem
				36	// encoding itself once the move to fractional weights is complete.
				37	zero: getElem("0"),
				38	zeroSpecialLo: getElem("０"), // U+FF10 FULLWIDTH DIGIT ZERO
				39	zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO
				40	nine: nine,
				41	nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE
				42	numberStart: ns,
				43	}
				44	}
				45
				46	// A numericWeighter translates a stream of digits into a stream of weights
				47	// representing the numeric value.
				48	type numericWeighter struct {
				49	Weighter
				50
				51	// The Elems below all demarcate boundaries of specific ranges. With the
				52	// current element encoding digits are in two ranges: normal (default
				53	// tertiary value) and special. For most languages, digits have collation
				54	// elements in the normal range.
				55	//
				56	// Note: the range tests are very specific for the element encoding used by
				57	// this implementation. The tests in collate_test.go are designed to fail
				58	// if this code is not updated when an encoding has changed.
				59
				60	zero Elem // normal digit zero
				61	zeroSpecialLo Elem // special digit zero, low tertiary value
				62	zeroSpecialHi Elem // special digit zero, high tertiary value
				63	nine Elem // normal digit nine
				64	nineSpecialHi Elem // special digit nine
				65	numberStart Elem
				66	}
				67
				68	// AppendNext calls the namesake of the underlying weigher, but replaces single
				69	// digits with weights representing their value.
				70	func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
				71	ce, n = nw.Weighter.AppendNext(buf, s)
				72	nc := numberConverter{
				73	elems: buf,
				74	w: nw,
				75	b: s,
				76	}
				77	isZero, ok := nc.checkNextDigit(ce)
				78	if !ok {
				79	return ce, n
				80	}
				81	// ce might have been grown already, so take it instead of buf.
				82	nc.init(ce, len(buf), isZero)
				83	for n < len(s) {
				84	ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
				85	nc.b = s
				86	n += sz
				87	if !nc.update(ce) {
				88	break
				89	}
				90	}
				91	return nc.result(), n
				92	}
				93
				94	// AppendNextString calls the namesake of the underlying weigher, but replaces
				95	// single digits with weights representing their value.
				96	func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) {
				97	ce, n = nw.Weighter.AppendNextString(buf, s)
				98	nc := numberConverter{
				99	elems: buf,
				100	w: nw,
				101	s: s,
				102	}
				103	isZero, ok := nc.checkNextDigit(ce)
				104	if !ok {
				105	return ce, n
				106	}
				107	nc.init(ce, len(buf), isZero)
				108	for n < len(s) {
				109	ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
				110	nc.s = s
				111	n += sz
				112	if !nc.update(ce) {
				113	break
				114	}
				115	}
				116	return nc.result(), n
				117	}
				118
				119	type numberConverter struct {
				120	w *numericWeighter
				121
				122	elems []Elem
				123	nDigits int
				124	lenIndex int
				125
				126	s string // set if the input was of type string
				127	b []byte // set if the input was of type []byte
				128	}
				129
				130	// init completes initialization of a numberConverter and prepares it for adding
				131	// more digits. elems is assumed to have a digit starting at oldLen.
				132	func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
				133	// Insert a marker indicating the start of a number and a placeholder
				134	// for the number of digits.
				135	if isZero {
				136	elems = append(elems[:oldLen], nc.w.numberStart, 0)
				137	} else {
				138	elems = append(elems, 0, 0)
				139	copy(elems[oldLen+2:], elems[oldLen:])
				140	elems[oldLen] = nc.w.numberStart
				141	elems[oldLen+1] = 0
				142
				143	nc.nDigits = 1
				144	}
				145	nc.elems = elems
				146	nc.lenIndex = oldLen + 1
				147	}
				148
				149	// checkNextDigit reports whether bufNew adds a single digit relative to the old
				150	// buffer. If it does, it also reports whether this digit is zero.
				151	func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) {
				152	if len(nc.elems) >= len(bufNew) {
				153	return false, false
				154	}
				155	e := bufNew[len(nc.elems)]
				156	if e < nc.w.zeroSpecialLo \|\| nc.w.nine < e {
				157	// Not a number.
				158	return false, false
				159	}
				160	if e < nc.w.zero {
				161	if e > nc.w.nineSpecialHi {
				162	// Not a number.
				163	return false, false
				164	}
				165	if !nc.isDigit() {
				166	return false, false
				167	}
				168	isZero = e <= nc.w.zeroSpecialHi
				169	} else {
				170	// This is the common case if we encounter a digit.
				171	isZero = e == nc.w.zero
				172	}
				173	// Test the remaining added collation elements have a zero primary value.
				174	if n := len(bufNew) - len(nc.elems); n > 1 {
				175	for i := len(nc.elems) + 1; i < len(bufNew); i++ {
				176	if bufNew[i].Primary() != 0 {
				177	return false, false
				178	}
				179	}
				180	// In some rare cases, collation elements will encode runes in
				181	// unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371)
				182	// are not in Nd. Also some digits that clearly belong in unicode.No,
				183	// like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have
				184	// collation elements indistinguishable from normal digits.
				185	// Unfortunately, this means we need to make this check for nearly all
				186	// non-Latin digits.
				187	//
				188	// TODO: check the performance impact and find something better if it is
				189	// an issue.
				190	if !nc.isDigit() {
				191	return false, false
				192	}
				193	}
				194	return isZero, true
				195	}
				196
				197	func (nc *numberConverter) isDigit() bool {
				198	if nc.b != nil {
				199	r, _ := utf8.DecodeRune(nc.b)
				200	return unicode.In(r, unicode.Nd)
				201	}
				202	r, _ := utf8.DecodeRuneInString(nc.s)
				203	return unicode.In(r, unicode.Nd)
				204	}
				205
				206	// We currently support a maximum of about 2M digits (the number of primary
				207	// values). Such numbers will compare correctly against small numbers, but their
				208	// comparison against other large numbers is undefined.
				209	//
				210	// TODO: define a proper fallback, such as comparing large numbers textually or
				211	// actually allowing numbers of unlimited length.
				212	//
				213	// TODO: cap this to a lower number (like 100) and maybe allow a larger number
				214	// in an option?
				215	const maxDigits = 1<<maxPrimaryBits - 1
				216
				217	func (nc *numberConverter) update(elems []Elem) bool {
				218	isZero, ok := nc.checkNextDigit(elems)
				219	if nc.nDigits == 0 && isZero {
				220	return true
				221	}
				222	nc.elems = elems
				223	if !ok {
				224	return false
				225	}
				226	nc.nDigits++
				227	return nc.nDigits < maxDigits
				228	}
				229
				230	// result fills in the length element for the digit sequence and returns the
				231	// completed collation elements.
				232	func (nc *numberConverter) result() []Elem {
				233	e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
				234	nc.elems[nc.lenIndex] = e
				235	return nc.elems
				236	}