William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package rangetable |
| 6 | |
| 7 | import ( |
| 8 | "unicode" |
| 9 | ) |
| 10 | |
| 11 | // atEnd is used to mark a completed iteration. |
| 12 | const atEnd = unicode.MaxRune + 1 |
| 13 | |
| 14 | // Merge returns a new RangeTable that is the union of the given tables. |
| 15 | // It can also be used to compact user-created RangeTables. The entries in |
| 16 | // R16 and R32 for any given RangeTable should be sorted and non-overlapping. |
| 17 | // |
| 18 | // A lookup in the resulting table can be several times faster than using In |
| 19 | // directly on the ranges. Merge is an expensive operation, however, and only |
| 20 | // makes sense if one intends to use the result for more than a couple of |
| 21 | // hundred lookups. |
| 22 | func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable { |
| 23 | rt := &unicode.RangeTable{} |
| 24 | if len(ranges) == 0 { |
| 25 | return rt |
| 26 | } |
| 27 | |
| 28 | iter := tablesIter(make([]tableIndex, len(ranges))) |
| 29 | |
| 30 | for i, t := range ranges { |
| 31 | iter[i] = tableIndex{t, 0, atEnd} |
| 32 | if len(t.R16) > 0 { |
| 33 | iter[i].next = rune(t.R16[0].Lo) |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | if r0 := iter.next16(); r0.Stride != 0 { |
| 38 | for { |
| 39 | r1 := iter.next16() |
| 40 | if r1.Stride == 0 { |
| 41 | rt.R16 = append(rt.R16, r0) |
| 42 | break |
| 43 | } |
| 44 | stride := r1.Lo - r0.Hi |
| 45 | if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { |
| 46 | // Fully merge the next range into the previous one. |
| 47 | r0.Hi, r0.Stride = r1.Hi, stride |
| 48 | continue |
| 49 | } else if stride == r0.Stride { |
| 50 | // Move the first element of r1 to r0. This may eliminate an |
| 51 | // entry. |
| 52 | r0.Hi = r1.Lo |
| 53 | r0.Stride = stride |
| 54 | r1.Lo = r1.Lo + r1.Stride |
| 55 | if r1.Lo > r1.Hi { |
| 56 | continue |
| 57 | } |
| 58 | } |
| 59 | rt.R16 = append(rt.R16, r0) |
| 60 | r0 = r1 |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | for i, t := range ranges { |
| 65 | iter[i] = tableIndex{t, 0, atEnd} |
| 66 | if len(t.R32) > 0 { |
| 67 | iter[i].next = rune(t.R32[0].Lo) |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | if r0 := iter.next32(); r0.Stride != 0 { |
| 72 | for { |
| 73 | r1 := iter.next32() |
| 74 | if r1.Stride == 0 { |
| 75 | rt.R32 = append(rt.R32, r0) |
| 76 | break |
| 77 | } |
| 78 | stride := r1.Lo - r0.Hi |
| 79 | if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { |
| 80 | // Fully merge the next range into the previous one. |
| 81 | r0.Hi, r0.Stride = r1.Hi, stride |
| 82 | continue |
| 83 | } else if stride == r0.Stride { |
| 84 | // Move the first element of r1 to r0. This may eliminate an |
| 85 | // entry. |
| 86 | r0.Hi = r1.Lo |
| 87 | r1.Lo = r1.Lo + r1.Stride |
| 88 | if r1.Lo > r1.Hi { |
| 89 | continue |
| 90 | } |
| 91 | } |
| 92 | rt.R32 = append(rt.R32, r0) |
| 93 | r0 = r1 |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ { |
| 98 | rt.LatinOffset = i + 1 |
| 99 | } |
| 100 | |
| 101 | return rt |
| 102 | } |
| 103 | |
| 104 | type tableIndex struct { |
| 105 | t *unicode.RangeTable |
| 106 | p uint32 |
| 107 | next rune |
| 108 | } |
| 109 | |
| 110 | type tablesIter []tableIndex |
| 111 | |
| 112 | // sortIter does an insertion sort using the next field of tableIndex. Insertion |
| 113 | // sort is a good sorting algorithm for this case. |
| 114 | func sortIter(t []tableIndex) { |
| 115 | for i := range t { |
| 116 | for j := i; j > 0 && t[j-1].next > t[j].next; j-- { |
| 117 | t[j], t[j-1] = t[j-1], t[j] |
| 118 | } |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | // next16 finds the ranged to be added to the table. If ranges overlap between |
| 123 | // multiple tables it clips the result to a non-overlapping range if the |
| 124 | // elements are not fully subsumed. It returns a zero range if there are no more |
| 125 | // ranges. |
| 126 | func (ti tablesIter) next16() unicode.Range16 { |
| 127 | sortIter(ti) |
| 128 | |
| 129 | t0 := ti[0] |
| 130 | if t0.next == atEnd { |
| 131 | return unicode.Range16{} |
| 132 | } |
| 133 | r0 := t0.t.R16[t0.p] |
| 134 | r0.Lo = uint16(t0.next) |
| 135 | |
| 136 | // We restrict the Hi of the current range if it overlaps with another range. |
| 137 | for i := range ti { |
| 138 | tn := ti[i] |
| 139 | // Since our tableIndices are sorted by next, we can break if the there |
| 140 | // is no overlap. The first value of a next range can always be merged |
| 141 | // into the current one, so we can break in case of equality as well. |
| 142 | if rune(r0.Hi) <= tn.next { |
| 143 | break |
| 144 | } |
| 145 | rn := tn.t.R16[tn.p] |
| 146 | rn.Lo = uint16(tn.next) |
| 147 | |
| 148 | // Limit r0.Hi based on next ranges in list, but allow it to overlap |
| 149 | // with ranges as long as it subsumes it. |
| 150 | m := (rn.Lo - r0.Lo) % r0.Stride |
| 151 | if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { |
| 152 | // Overlap, take the min of the two Hi values: for simplicity's sake |
| 153 | // we only process one range at a time. |
| 154 | if r0.Hi > rn.Hi { |
| 155 | r0.Hi = rn.Hi |
| 156 | } |
| 157 | } else { |
| 158 | // Not a compatible stride. Set to the last possible value before |
| 159 | // rn.Lo, but ensure there is at least one value. |
| 160 | if x := rn.Lo - m; r0.Lo <= x { |
| 161 | r0.Hi = x |
| 162 | } |
| 163 | break |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | // Update the next values for each table. |
| 168 | for i := range ti { |
| 169 | tn := &ti[i] |
| 170 | if rune(r0.Hi) < tn.next { |
| 171 | break |
| 172 | } |
| 173 | rn := tn.t.R16[tn.p] |
| 174 | stride := rune(rn.Stride) |
| 175 | tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) |
| 176 | if rune(rn.Hi) < tn.next { |
| 177 | if tn.p++; int(tn.p) == len(tn.t.R16) { |
| 178 | tn.next = atEnd |
| 179 | } else { |
| 180 | tn.next = rune(tn.t.R16[tn.p].Lo) |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | if r0.Lo == r0.Hi { |
| 186 | r0.Stride = 1 |
| 187 | } |
| 188 | |
| 189 | return r0 |
| 190 | } |
| 191 | |
| 192 | // next32 finds the ranged to be added to the table. If ranges overlap between |
| 193 | // multiple tables it clips the result to a non-overlapping range if the |
| 194 | // elements are not fully subsumed. It returns a zero range if there are no more |
| 195 | // ranges. |
| 196 | func (ti tablesIter) next32() unicode.Range32 { |
| 197 | sortIter(ti) |
| 198 | |
| 199 | t0 := ti[0] |
| 200 | if t0.next == atEnd { |
| 201 | return unicode.Range32{} |
| 202 | } |
| 203 | r0 := t0.t.R32[t0.p] |
| 204 | r0.Lo = uint32(t0.next) |
| 205 | |
| 206 | // We restrict the Hi of the current range if it overlaps with another range. |
| 207 | for i := range ti { |
| 208 | tn := ti[i] |
| 209 | // Since our tableIndices are sorted by next, we can break if the there |
| 210 | // is no overlap. The first value of a next range can always be merged |
| 211 | // into the current one, so we can break in case of equality as well. |
| 212 | if rune(r0.Hi) <= tn.next { |
| 213 | break |
| 214 | } |
| 215 | rn := tn.t.R32[tn.p] |
| 216 | rn.Lo = uint32(tn.next) |
| 217 | |
| 218 | // Limit r0.Hi based on next ranges in list, but allow it to overlap |
| 219 | // with ranges as long as it subsumes it. |
| 220 | m := (rn.Lo - r0.Lo) % r0.Stride |
| 221 | if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { |
| 222 | // Overlap, take the min of the two Hi values: for simplicity's sake |
| 223 | // we only process one range at a time. |
| 224 | if r0.Hi > rn.Hi { |
| 225 | r0.Hi = rn.Hi |
| 226 | } |
| 227 | } else { |
| 228 | // Not a compatible stride. Set to the last possible value before |
| 229 | // rn.Lo, but ensure there is at least one value. |
| 230 | if x := rn.Lo - m; r0.Lo <= x { |
| 231 | r0.Hi = x |
| 232 | } |
| 233 | break |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | // Update the next values for each table. |
| 238 | for i := range ti { |
| 239 | tn := &ti[i] |
| 240 | if rune(r0.Hi) < tn.next { |
| 241 | break |
| 242 | } |
| 243 | rn := tn.t.R32[tn.p] |
| 244 | stride := rune(rn.Stride) |
| 245 | tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) |
| 246 | if rune(rn.Hi) < tn.next { |
| 247 | if tn.p++; int(tn.p) == len(tn.t.R32) { |
| 248 | tn.next = atEnd |
| 249 | } else { |
| 250 | tn.next = rune(tn.t.R32[tn.p].Lo) |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | if r0.Lo == r0.Hi { |
| 256 | r0.Stride = 1 |
| 257 | } |
| 258 | |
| 259 | return r0 |
| 260 | } |