blob: ea2a0803efe95f68ef5edf9c7c89775b52846959 [file] [log] [blame]
Don Newton98fd8812019-09-23 15:15:02 -04001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package rangetable
6
7import (
8 "unicode"
9)
10
11// atEnd is used to mark a completed iteration.
12const atEnd = unicode.MaxRune + 1
13
14// Merge returns a new RangeTable that is the union of the given tables.
15// It can also be used to compact user-created RangeTables. The entries in
16// R16 and R32 for any given RangeTable should be sorted and non-overlapping.
17//
18// A lookup in the resulting table can be several times faster than using In
19// directly on the ranges. Merge is an expensive operation, however, and only
20// makes sense if one intends to use the result for more than a couple of
21// hundred lookups.
22func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
23 rt := &unicode.RangeTable{}
24 if len(ranges) == 0 {
25 return rt
26 }
27
28 iter := tablesIter(make([]tableIndex, len(ranges)))
29
30 for i, t := range ranges {
31 iter[i] = tableIndex{t, 0, atEnd}
32 if len(t.R16) > 0 {
33 iter[i].next = rune(t.R16[0].Lo)
34 }
35 }
36
37 if r0 := iter.next16(); r0.Stride != 0 {
38 for {
39 r1 := iter.next16()
40 if r1.Stride == 0 {
41 rt.R16 = append(rt.R16, r0)
42 break
43 }
44 stride := r1.Lo - r0.Hi
45 if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
46 // Fully merge the next range into the previous one.
47 r0.Hi, r0.Stride = r1.Hi, stride
48 continue
49 } else if stride == r0.Stride {
50 // Move the first element of r1 to r0. This may eliminate an
51 // entry.
52 r0.Hi = r1.Lo
53 r0.Stride = stride
54 r1.Lo = r1.Lo + r1.Stride
55 if r1.Lo > r1.Hi {
56 continue
57 }
58 }
59 rt.R16 = append(rt.R16, r0)
60 r0 = r1
61 }
62 }
63
64 for i, t := range ranges {
65 iter[i] = tableIndex{t, 0, atEnd}
66 if len(t.R32) > 0 {
67 iter[i].next = rune(t.R32[0].Lo)
68 }
69 }
70
71 if r0 := iter.next32(); r0.Stride != 0 {
72 for {
73 r1 := iter.next32()
74 if r1.Stride == 0 {
75 rt.R32 = append(rt.R32, r0)
76 break
77 }
78 stride := r1.Lo - r0.Hi
79 if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
80 // Fully merge the next range into the previous one.
81 r0.Hi, r0.Stride = r1.Hi, stride
82 continue
83 } else if stride == r0.Stride {
84 // Move the first element of r1 to r0. This may eliminate an
85 // entry.
86 r0.Hi = r1.Lo
87 r1.Lo = r1.Lo + r1.Stride
88 if r1.Lo > r1.Hi {
89 continue
90 }
91 }
92 rt.R32 = append(rt.R32, r0)
93 r0 = r1
94 }
95 }
96
97 for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
98 rt.LatinOffset = i + 1
99 }
100
101 return rt
102}
103
104type tableIndex struct {
105 t *unicode.RangeTable
106 p uint32
107 next rune
108}
109
110type tablesIter []tableIndex
111
112// sortIter does an insertion sort using the next field of tableIndex. Insertion
113// sort is a good sorting algorithm for this case.
114func sortIter(t []tableIndex) {
115 for i := range t {
116 for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
117 t[j], t[j-1] = t[j-1], t[j]
118 }
119 }
120}
121
122// next16 finds the ranged to be added to the table. If ranges overlap between
123// multiple tables it clips the result to a non-overlapping range if the
124// elements are not fully subsumed. It returns a zero range if there are no more
125// ranges.
126func (ti tablesIter) next16() unicode.Range16 {
127 sortIter(ti)
128
129 t0 := ti[0]
130 if t0.next == atEnd {
131 return unicode.Range16{}
132 }
133 r0 := t0.t.R16[t0.p]
134 r0.Lo = uint16(t0.next)
135
136 // We restrict the Hi of the current range if it overlaps with another range.
137 for i := range ti {
138 tn := ti[i]
139 // Since our tableIndices are sorted by next, we can break if the there
140 // is no overlap. The first value of a next range can always be merged
141 // into the current one, so we can break in case of equality as well.
142 if rune(r0.Hi) <= tn.next {
143 break
144 }
145 rn := tn.t.R16[tn.p]
146 rn.Lo = uint16(tn.next)
147
148 // Limit r0.Hi based on next ranges in list, but allow it to overlap
149 // with ranges as long as it subsumes it.
150 m := (rn.Lo - r0.Lo) % r0.Stride
151 if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
152 // Overlap, take the min of the two Hi values: for simplicity's sake
153 // we only process one range at a time.
154 if r0.Hi > rn.Hi {
155 r0.Hi = rn.Hi
156 }
157 } else {
158 // Not a compatible stride. Set to the last possible value before
159 // rn.Lo, but ensure there is at least one value.
160 if x := rn.Lo - m; r0.Lo <= x {
161 r0.Hi = x
162 }
163 break
164 }
165 }
166
167 // Update the next values for each table.
168 for i := range ti {
169 tn := &ti[i]
170 if rune(r0.Hi) < tn.next {
171 break
172 }
173 rn := tn.t.R16[tn.p]
174 stride := rune(rn.Stride)
175 tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
176 if rune(rn.Hi) < tn.next {
177 if tn.p++; int(tn.p) == len(tn.t.R16) {
178 tn.next = atEnd
179 } else {
180 tn.next = rune(tn.t.R16[tn.p].Lo)
181 }
182 }
183 }
184
185 if r0.Lo == r0.Hi {
186 r0.Stride = 1
187 }
188
189 return r0
190}
191
192// next32 finds the ranged to be added to the table. If ranges overlap between
193// multiple tables it clips the result to a non-overlapping range if the
194// elements are not fully subsumed. It returns a zero range if there are no more
195// ranges.
196func (ti tablesIter) next32() unicode.Range32 {
197 sortIter(ti)
198
199 t0 := ti[0]
200 if t0.next == atEnd {
201 return unicode.Range32{}
202 }
203 r0 := t0.t.R32[t0.p]
204 r0.Lo = uint32(t0.next)
205
206 // We restrict the Hi of the current range if it overlaps with another range.
207 for i := range ti {
208 tn := ti[i]
209 // Since our tableIndices are sorted by next, we can break if the there
210 // is no overlap. The first value of a next range can always be merged
211 // into the current one, so we can break in case of equality as well.
212 if rune(r0.Hi) <= tn.next {
213 break
214 }
215 rn := tn.t.R32[tn.p]
216 rn.Lo = uint32(tn.next)
217
218 // Limit r0.Hi based on next ranges in list, but allow it to overlap
219 // with ranges as long as it subsumes it.
220 m := (rn.Lo - r0.Lo) % r0.Stride
221 if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
222 // Overlap, take the min of the two Hi values: for simplicity's sake
223 // we only process one range at a time.
224 if r0.Hi > rn.Hi {
225 r0.Hi = rn.Hi
226 }
227 } else {
228 // Not a compatible stride. Set to the last possible value before
229 // rn.Lo, but ensure there is at least one value.
230 if x := rn.Lo - m; r0.Lo <= x {
231 r0.Hi = x
232 }
233 break
234 }
235 }
236
237 // Update the next values for each table.
238 for i := range ti {
239 tn := &ti[i]
240 if rune(r0.Hi) < tn.next {
241 break
242 }
243 rn := tn.t.R32[tn.p]
244 stride := rune(rn.Stride)
245 tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
246 if rune(rn.Hi) < tn.next {
247 if tn.p++; int(tn.p) == len(tn.t.R32) {
248 tn.next = atEnd
249 } else {
250 tn.next = rune(tn.t.R32[tn.p].Lo)
251 }
252 }
253 }
254
255 if r0.Lo == r0.Hi {
256 r0.Stride = 1
257 }
258
259 return r0
260}