Blame - vendor/golang.org/x/text/internal/colltab/iter.go - ofagent-go

blob: c1b1ba81ef0e012e62e36bc9a029578609d293fc [file] [log] [blame]

Don Newton	98fd881	2019-09-23 15:15:02 -0400	[diff] [blame^]	1	// Copyright 2015 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package colltab
				6
				7	// An Iter incrementally converts chunks of the input text to collation
				8	// elements, while ensuring that the collation elements are in normalized order
				9	// (that is, they are in the order as if the input text were normalized first).
				10	type Iter struct {
				11	Weighter Weighter
				12	Elems []Elem
				13	// N is the number of elements in Elems that will not be reordered on
				14	// subsequent iterations, N <= len(Elems).
				15	N int
				16
				17	bytes []byte
				18	str string
				19	// Because the Elems buffer may contain collation elements that are needed
				20	// for look-ahead, we need two positions in the text (bytes or str): one for
				21	// the end position in the text for the current iteration and one for the
				22	// start of the next call to appendNext.
				23	pEnd int // end position in text corresponding to N.
				24	pNext int // pEnd <= pNext.
				25	}
				26
				27	// Reset sets the position in the current input text to p and discards any
				28	// results obtained so far.
				29	func (i *Iter) Reset(p int) {
				30	i.Elems = i.Elems[:0]
				31	i.N = 0
				32	i.pEnd = p
				33	i.pNext = p
				34	}
				35
				36	// Len returns the length of the input text.
				37	func (i *Iter) Len() int {
				38	if i.bytes != nil {
				39	return len(i.bytes)
				40	}
				41	return len(i.str)
				42	}
				43
				44	// Discard removes the collation elements up to N.
				45	func (i *Iter) Discard() {
				46	// TODO: change this such that only modifiers following starters will have
				47	// to be copied.
				48	i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
				49	i.N = 0
				50	}
				51
				52	// End returns the end position of the input text for which Next has returned
				53	// results.
				54	func (i *Iter) End() int {
				55	return i.pEnd
				56	}
				57
				58	// SetInput resets i to input s.
				59	func (i *Iter) SetInput(s []byte) {
				60	i.bytes = s
				61	i.str = ""
				62	i.Reset(0)
				63	}
				64
				65	// SetInputString resets i to input s.
				66	func (i *Iter) SetInputString(s string) {
				67	i.str = s
				68	i.bytes = nil
				69	i.Reset(0)
				70	}
				71
				72	func (i *Iter) done() bool {
				73	return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
				74	}
				75
				76	func (i *Iter) appendNext() bool {
				77	if i.done() {
				78	return false
				79	}
				80	var sz int
				81	if i.bytes == nil {
				82	i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
				83	} else {
				84	i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
				85	}
				86	if sz == 0 {
				87	sz = 1
				88	}
				89	i.pNext += sz
				90	return true
				91	}
				92
				93	// Next appends Elems to the internal array. On each iteration, it will either
				94	// add starters or modifiers. In the majority of cases, an Elem with a primary
				95	// value > 0 will have a CCC of 0. The CCC values of collation elements are also
				96	// used to detect if the input string was not normalized and to adjust the
				97	// result accordingly.
				98	func (i *Iter) Next() bool {
				99	if i.N == len(i.Elems) && !i.appendNext() {
				100	return false
				101	}
				102
				103	// Check if the current segment starts with a starter.
				104	prevCCC := i.Elems[len(i.Elems)-1].CCC()
				105	if prevCCC == 0 {
				106	i.N = len(i.Elems)
				107	i.pEnd = i.pNext
				108	return true
				109	} else if i.Elems[i.N].CCC() == 0 {
				110	// set i.N to only cover part of i.Elems for which prevCCC == 0 and
				111	// use rest for the next call to next.
				112	for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
				113	}
				114	i.pEnd = i.pNext
				115	return true
				116	}
				117
				118	// The current (partial) segment starts with modifiers. We need to collect
				119	// all successive modifiers to ensure that they are normalized.
				120	for {
				121	p := len(i.Elems)
				122	i.pEnd = i.pNext
				123	if !i.appendNext() {
				124	break
				125	}
				126
				127	if ccc := i.Elems[p].CCC(); ccc == 0 \|\| len(i.Elems)-i.N > maxCombiningCharacters {
				128	// Leave the starter for the next iteration. This ensures that we
				129	// do not return sequences of collation elements that cross two
				130	// segments.
				131	//
				132	// TODO: handle large number of combining characters by fully
				133	// normalizing the input segment before iteration. This ensures
				134	// results are consistent across the text repo.
				135	i.N = p
				136	return true
				137	} else if ccc < prevCCC {
				138	i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
				139	} else {
				140	prevCCC = ccc
				141	}
				142	}
				143
				144	done := len(i.Elems) != i.N
				145	i.N = len(i.Elems)
				146	return done
				147	}
				148
				149	// nextNoNorm is the same as next, but does not "normalize" the collation
				150	// elements.
				151	func (i *Iter) nextNoNorm() bool {
				152	// TODO: remove this function. Using this instead of next does not seem
				153	// to improve performance in any significant way. We retain this until
				154	// later for evaluation purposes.
				155	if i.done() {
				156	return false
				157	}
				158	i.appendNext()
				159	i.N = len(i.Elems)
				160	return true
				161	}
				162
				163	const maxCombiningCharacters = 30
				164
				165	// doNorm reorders the collation elements in i.Elems.
				166	// It assumes that blocks of collation elements added with appendNext
				167	// either start and end with the same CCC or start with CCC == 0.
				168	// This allows for a single insertion point for the entire block.
				169	// The correctness of this assumption is verified in builder.go.
				170	func (i *Iter) doNorm(p int, ccc uint8) {
				171	n := len(i.Elems)
				172	k := p
				173	for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
				174	}
				175	i.Elems = append(i.Elems, i.Elems[p:k]...)
				176	copy(i.Elems[p:], i.Elems[k:])
				177	i.Elems = i.Elems[:n]
				178	}