Matteo Scandolo | a6a3aee | 2019-11-26 13:30:14 -0700 | [diff] [blame] | 1 | // Copyright 2011 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Note: the file data_test.go that is generated should not be checked in. |
| 6 | //go:generate go run maketables.go triegen.go |
| 7 | //go:generate go test -tags test |
| 8 | |
| 9 | // Package norm contains types and functions for normalizing Unicode strings. |
| 10 | package norm // import "golang.org/x/text/unicode/norm" |
| 11 | |
| 12 | import ( |
| 13 | "unicode/utf8" |
| 14 | |
| 15 | "golang.org/x/text/transform" |
| 16 | ) |
| 17 | |
| 18 | // A Form denotes a canonical representation of Unicode code points. |
| 19 | // The Unicode-defined normalization and equivalence forms are: |
| 20 | // |
| 21 | // NFC Unicode Normalization Form C |
| 22 | // NFD Unicode Normalization Form D |
| 23 | // NFKC Unicode Normalization Form KC |
| 24 | // NFKD Unicode Normalization Form KD |
| 25 | // |
| 26 | // For a Form f, this documentation uses the notation f(x) to mean |
| 27 | // the bytes or string x converted to the given form. |
| 28 | // A position n in x is called a boundary if conversion to the form can |
| 29 | // proceed independently on both sides: |
| 30 | // f(x) == append(f(x[0:n]), f(x[n:])...) |
| 31 | // |
Pragya Arya | 324337e | 2020-02-20 14:35:08 +0530 | [diff] [blame] | 32 | // References: https://unicode.org/reports/tr15/ and |
| 33 | // https://unicode.org/notes/tn5/. |
Matteo Scandolo | a6a3aee | 2019-11-26 13:30:14 -0700 | [diff] [blame] | 34 | type Form int |
| 35 | |
| 36 | const ( |
| 37 | NFC Form = iota |
| 38 | NFD |
| 39 | NFKC |
| 40 | NFKD |
| 41 | ) |
| 42 | |
| 43 | // Bytes returns f(b). May return b if f(b) = b. |
| 44 | func (f Form) Bytes(b []byte) []byte { |
| 45 | src := inputBytes(b) |
| 46 | ft := formTable[f] |
| 47 | n, ok := ft.quickSpan(src, 0, len(b), true) |
| 48 | if ok { |
| 49 | return b |
| 50 | } |
| 51 | out := make([]byte, n, len(b)) |
| 52 | copy(out, b[0:n]) |
| 53 | rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush} |
| 54 | return doAppendInner(&rb, n) |
| 55 | } |
| 56 | |
| 57 | // String returns f(s). |
| 58 | func (f Form) String(s string) string { |
| 59 | src := inputString(s) |
| 60 | ft := formTable[f] |
| 61 | n, ok := ft.quickSpan(src, 0, len(s), true) |
| 62 | if ok { |
| 63 | return s |
| 64 | } |
| 65 | out := make([]byte, n, len(s)) |
| 66 | copy(out, s[0:n]) |
| 67 | rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush} |
| 68 | return string(doAppendInner(&rb, n)) |
| 69 | } |
| 70 | |
| 71 | // IsNormal returns true if b == f(b). |
| 72 | func (f Form) IsNormal(b []byte) bool { |
| 73 | src := inputBytes(b) |
| 74 | ft := formTable[f] |
| 75 | bp, ok := ft.quickSpan(src, 0, len(b), true) |
| 76 | if ok { |
| 77 | return true |
| 78 | } |
| 79 | rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)} |
| 80 | rb.setFlusher(nil, cmpNormalBytes) |
| 81 | for bp < len(b) { |
| 82 | rb.out = b[bp:] |
| 83 | if bp = decomposeSegment(&rb, bp, true); bp < 0 { |
| 84 | return false |
| 85 | } |
| 86 | bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true) |
| 87 | } |
| 88 | return true |
| 89 | } |
| 90 | |
| 91 | func cmpNormalBytes(rb *reorderBuffer) bool { |
| 92 | b := rb.out |
| 93 | for i := 0; i < rb.nrune; i++ { |
| 94 | info := rb.rune[i] |
| 95 | if int(info.size) > len(b) { |
| 96 | return false |
| 97 | } |
| 98 | p := info.pos |
| 99 | pe := p + info.size |
| 100 | for ; p < pe; p++ { |
| 101 | if b[0] != rb.byte[p] { |
| 102 | return false |
| 103 | } |
| 104 | b = b[1:] |
| 105 | } |
| 106 | } |
| 107 | return true |
| 108 | } |
| 109 | |
| 110 | // IsNormalString returns true if s == f(s). |
| 111 | func (f Form) IsNormalString(s string) bool { |
| 112 | src := inputString(s) |
| 113 | ft := formTable[f] |
| 114 | bp, ok := ft.quickSpan(src, 0, len(s), true) |
| 115 | if ok { |
| 116 | return true |
| 117 | } |
| 118 | rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)} |
| 119 | rb.setFlusher(nil, func(rb *reorderBuffer) bool { |
| 120 | for i := 0; i < rb.nrune; i++ { |
| 121 | info := rb.rune[i] |
| 122 | if bp+int(info.size) > len(s) { |
| 123 | return false |
| 124 | } |
| 125 | p := info.pos |
| 126 | pe := p + info.size |
| 127 | for ; p < pe; p++ { |
| 128 | if s[bp] != rb.byte[p] { |
| 129 | return false |
| 130 | } |
| 131 | bp++ |
| 132 | } |
| 133 | } |
| 134 | return true |
| 135 | }) |
| 136 | for bp < len(s) { |
| 137 | if bp = decomposeSegment(&rb, bp, true); bp < 0 { |
| 138 | return false |
| 139 | } |
| 140 | bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true) |
| 141 | } |
| 142 | return true |
| 143 | } |
| 144 | |
| 145 | // patchTail fixes a case where a rune may be incorrectly normalized |
| 146 | // if it is followed by illegal continuation bytes. It returns the |
| 147 | // patched buffer and whether the decomposition is still in progress. |
| 148 | func patchTail(rb *reorderBuffer) bool { |
| 149 | info, p := lastRuneStart(&rb.f, rb.out) |
| 150 | if p == -1 || info.size == 0 { |
| 151 | return true |
| 152 | } |
| 153 | end := p + int(info.size) |
| 154 | extra := len(rb.out) - end |
| 155 | if extra > 0 { |
| 156 | // Potentially allocating memory. However, this only |
| 157 | // happens with ill-formed UTF-8. |
| 158 | x := make([]byte, 0) |
| 159 | x = append(x, rb.out[len(rb.out)-extra:]...) |
| 160 | rb.out = rb.out[:end] |
| 161 | decomposeToLastBoundary(rb) |
| 162 | rb.doFlush() |
| 163 | rb.out = append(rb.out, x...) |
| 164 | return false |
| 165 | } |
| 166 | buf := rb.out[p:] |
| 167 | rb.out = rb.out[:p] |
| 168 | decomposeToLastBoundary(rb) |
| 169 | if s := rb.ss.next(info); s == ssStarter { |
| 170 | rb.doFlush() |
| 171 | rb.ss.first(info) |
| 172 | } else if s == ssOverflow { |
| 173 | rb.doFlush() |
| 174 | rb.insertCGJ() |
| 175 | rb.ss = 0 |
| 176 | } |
| 177 | rb.insertUnsafe(inputBytes(buf), 0, info) |
| 178 | return true |
| 179 | } |
| 180 | |
| 181 | func appendQuick(rb *reorderBuffer, i int) int { |
| 182 | if rb.nsrc == i { |
| 183 | return i |
| 184 | } |
| 185 | end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true) |
| 186 | rb.out = rb.src.appendSlice(rb.out, i, end) |
| 187 | return end |
| 188 | } |
| 189 | |
| 190 | // Append returns f(append(out, b...)). |
| 191 | // The buffer out must be nil, empty, or equal to f(out). |
| 192 | func (f Form) Append(out []byte, src ...byte) []byte { |
| 193 | return f.doAppend(out, inputBytes(src), len(src)) |
| 194 | } |
| 195 | |
| 196 | func (f Form) doAppend(out []byte, src input, n int) []byte { |
| 197 | if n == 0 { |
| 198 | return out |
| 199 | } |
| 200 | ft := formTable[f] |
| 201 | // Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer. |
| 202 | if len(out) == 0 { |
| 203 | p, _ := ft.quickSpan(src, 0, n, true) |
| 204 | out = src.appendSlice(out, 0, p) |
| 205 | if p == n { |
| 206 | return out |
| 207 | } |
| 208 | rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush} |
| 209 | return doAppendInner(&rb, p) |
| 210 | } |
| 211 | rb := reorderBuffer{f: *ft, src: src, nsrc: n} |
| 212 | return doAppend(&rb, out, 0) |
| 213 | } |
| 214 | |
| 215 | func doAppend(rb *reorderBuffer, out []byte, p int) []byte { |
| 216 | rb.setFlusher(out, appendFlush) |
| 217 | src, n := rb.src, rb.nsrc |
| 218 | doMerge := len(out) > 0 |
| 219 | if q := src.skipContinuationBytes(p); q > p { |
| 220 | // Move leading non-starters to destination. |
| 221 | rb.out = src.appendSlice(rb.out, p, q) |
| 222 | p = q |
| 223 | doMerge = patchTail(rb) |
| 224 | } |
| 225 | fd := &rb.f |
| 226 | if doMerge { |
| 227 | var info Properties |
| 228 | if p < n { |
| 229 | info = fd.info(src, p) |
| 230 | if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 { |
| 231 | if p == 0 { |
| 232 | decomposeToLastBoundary(rb) |
| 233 | } |
| 234 | p = decomposeSegment(rb, p, true) |
| 235 | } |
| 236 | } |
| 237 | if info.size == 0 { |
| 238 | rb.doFlush() |
| 239 | // Append incomplete UTF-8 encoding. |
| 240 | return src.appendSlice(rb.out, p, n) |
| 241 | } |
| 242 | if rb.nrune > 0 { |
| 243 | return doAppendInner(rb, p) |
| 244 | } |
| 245 | } |
| 246 | p = appendQuick(rb, p) |
| 247 | return doAppendInner(rb, p) |
| 248 | } |
| 249 | |
| 250 | func doAppendInner(rb *reorderBuffer, p int) []byte { |
| 251 | for n := rb.nsrc; p < n; { |
| 252 | p = decomposeSegment(rb, p, true) |
| 253 | p = appendQuick(rb, p) |
| 254 | } |
| 255 | return rb.out |
| 256 | } |
| 257 | |
| 258 | // AppendString returns f(append(out, []byte(s))). |
| 259 | // The buffer out must be nil, empty, or equal to f(out). |
| 260 | func (f Form) AppendString(out []byte, src string) []byte { |
| 261 | return f.doAppend(out, inputString(src), len(src)) |
| 262 | } |
| 263 | |
| 264 | // QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]). |
| 265 | // It is not guaranteed to return the largest such n. |
| 266 | func (f Form) QuickSpan(b []byte) int { |
| 267 | n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true) |
| 268 | return n |
| 269 | } |
| 270 | |
| 271 | // Span implements transform.SpanningTransformer. It returns a boundary n such |
| 272 | // that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n. |
| 273 | func (f Form) Span(b []byte, atEOF bool) (n int, err error) { |
| 274 | n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF) |
| 275 | if n < len(b) { |
| 276 | if !ok { |
| 277 | err = transform.ErrEndOfSpan |
| 278 | } else { |
| 279 | err = transform.ErrShortSrc |
| 280 | } |
| 281 | } |
| 282 | return n, err |
| 283 | } |
| 284 | |
| 285 | // SpanString returns a boundary n such that s[0:n] == f(s[0:n]). |
| 286 | // It is not guaranteed to return the largest such n. |
| 287 | func (f Form) SpanString(s string, atEOF bool) (n int, err error) { |
| 288 | n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF) |
| 289 | if n < len(s) { |
| 290 | if !ok { |
| 291 | err = transform.ErrEndOfSpan |
| 292 | } else { |
| 293 | err = transform.ErrShortSrc |
| 294 | } |
| 295 | } |
| 296 | return n, err |
| 297 | } |
| 298 | |
| 299 | // quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and |
| 300 | // whether any non-normalized parts were found. If atEOF is false, n will |
| 301 | // not point past the last segment if this segment might be become |
| 302 | // non-normalized by appending other runes. |
| 303 | func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) { |
| 304 | var lastCC uint8 |
| 305 | ss := streamSafe(0) |
| 306 | lastSegStart := i |
| 307 | for n = end; i < n; { |
| 308 | if j := src.skipASCII(i, n); i != j { |
| 309 | i = j |
| 310 | lastSegStart = i - 1 |
| 311 | lastCC = 0 |
| 312 | ss = 0 |
| 313 | continue |
| 314 | } |
| 315 | info := f.info(src, i) |
| 316 | if info.size == 0 { |
| 317 | if atEOF { |
| 318 | // include incomplete runes |
| 319 | return n, true |
| 320 | } |
| 321 | return lastSegStart, true |
| 322 | } |
| 323 | // This block needs to be before the next, because it is possible to |
| 324 | // have an overflow for runes that are starters (e.g. with U+FF9E). |
| 325 | switch ss.next(info) { |
| 326 | case ssStarter: |
| 327 | lastSegStart = i |
| 328 | case ssOverflow: |
| 329 | return lastSegStart, false |
| 330 | case ssSuccess: |
| 331 | if lastCC > info.ccc { |
| 332 | return lastSegStart, false |
| 333 | } |
| 334 | } |
| 335 | if f.composing { |
| 336 | if !info.isYesC() { |
| 337 | break |
| 338 | } |
| 339 | } else { |
| 340 | if !info.isYesD() { |
| 341 | break |
| 342 | } |
| 343 | } |
| 344 | lastCC = info.ccc |
| 345 | i += int(info.size) |
| 346 | } |
| 347 | if i == n { |
| 348 | if !atEOF { |
| 349 | n = lastSegStart |
| 350 | } |
| 351 | return n, true |
| 352 | } |
| 353 | return lastSegStart, false |
| 354 | } |
| 355 | |
| 356 | // QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]). |
| 357 | // It is not guaranteed to return the largest such n. |
| 358 | func (f Form) QuickSpanString(s string) int { |
| 359 | n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true) |
| 360 | return n |
| 361 | } |
| 362 | |
| 363 | // FirstBoundary returns the position i of the first boundary in b |
| 364 | // or -1 if b contains no boundary. |
| 365 | func (f Form) FirstBoundary(b []byte) int { |
| 366 | return f.firstBoundary(inputBytes(b), len(b)) |
| 367 | } |
| 368 | |
| 369 | func (f Form) firstBoundary(src input, nsrc int) int { |
| 370 | i := src.skipContinuationBytes(0) |
| 371 | if i >= nsrc { |
| 372 | return -1 |
| 373 | } |
| 374 | fd := formTable[f] |
| 375 | ss := streamSafe(0) |
| 376 | // We should call ss.first here, but we can't as the first rune is |
| 377 | // skipped already. This means FirstBoundary can't really determine |
| 378 | // CGJ insertion points correctly. Luckily it doesn't have to. |
| 379 | for { |
| 380 | info := fd.info(src, i) |
| 381 | if info.size == 0 { |
| 382 | return -1 |
| 383 | } |
| 384 | if s := ss.next(info); s != ssSuccess { |
| 385 | return i |
| 386 | } |
| 387 | i += int(info.size) |
| 388 | if i >= nsrc { |
| 389 | if !info.BoundaryAfter() && !ss.isMax() { |
| 390 | return -1 |
| 391 | } |
| 392 | return nsrc |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | // FirstBoundaryInString returns the position i of the first boundary in s |
| 398 | // or -1 if s contains no boundary. |
| 399 | func (f Form) FirstBoundaryInString(s string) int { |
| 400 | return f.firstBoundary(inputString(s), len(s)) |
| 401 | } |
| 402 | |
| 403 | // NextBoundary reports the index of the boundary between the first and next |
| 404 | // segment in b or -1 if atEOF is false and there are not enough bytes to |
| 405 | // determine this boundary. |
| 406 | func (f Form) NextBoundary(b []byte, atEOF bool) int { |
| 407 | return f.nextBoundary(inputBytes(b), len(b), atEOF) |
| 408 | } |
| 409 | |
| 410 | // NextBoundaryInString reports the index of the boundary between the first and |
| 411 | // next segment in b or -1 if atEOF is false and there are not enough bytes to |
| 412 | // determine this boundary. |
| 413 | func (f Form) NextBoundaryInString(s string, atEOF bool) int { |
| 414 | return f.nextBoundary(inputString(s), len(s), atEOF) |
| 415 | } |
| 416 | |
| 417 | func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int { |
| 418 | if nsrc == 0 { |
| 419 | if atEOF { |
| 420 | return 0 |
| 421 | } |
| 422 | return -1 |
| 423 | } |
| 424 | fd := formTable[f] |
| 425 | info := fd.info(src, 0) |
| 426 | if info.size == 0 { |
| 427 | if atEOF { |
| 428 | return 1 |
| 429 | } |
| 430 | return -1 |
| 431 | } |
| 432 | ss := streamSafe(0) |
| 433 | ss.first(info) |
| 434 | |
| 435 | for i := int(info.size); i < nsrc; i += int(info.size) { |
| 436 | info = fd.info(src, i) |
| 437 | if info.size == 0 { |
| 438 | if atEOF { |
| 439 | return i |
| 440 | } |
| 441 | return -1 |
| 442 | } |
| 443 | // TODO: Using streamSafe to determine the boundary isn't the same as |
| 444 | // using BoundaryBefore. Determine which should be used. |
| 445 | if s := ss.next(info); s != ssSuccess { |
| 446 | return i |
| 447 | } |
| 448 | } |
| 449 | if !atEOF && !info.BoundaryAfter() && !ss.isMax() { |
| 450 | return -1 |
| 451 | } |
| 452 | return nsrc |
| 453 | } |
| 454 | |
| 455 | // LastBoundary returns the position i of the last boundary in b |
| 456 | // or -1 if b contains no boundary. |
| 457 | func (f Form) LastBoundary(b []byte) int { |
| 458 | return lastBoundary(formTable[f], b) |
| 459 | } |
| 460 | |
| 461 | func lastBoundary(fd *formInfo, b []byte) int { |
| 462 | i := len(b) |
| 463 | info, p := lastRuneStart(fd, b) |
| 464 | if p == -1 { |
| 465 | return -1 |
| 466 | } |
| 467 | if info.size == 0 { // ends with incomplete rune |
| 468 | if p == 0 { // starts with incomplete rune |
| 469 | return -1 |
| 470 | } |
| 471 | i = p |
| 472 | info, p = lastRuneStart(fd, b[:i]) |
| 473 | if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter |
| 474 | return i |
| 475 | } |
| 476 | } |
| 477 | if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8 |
| 478 | return i |
| 479 | } |
| 480 | if info.BoundaryAfter() { |
| 481 | return i |
| 482 | } |
| 483 | ss := streamSafe(0) |
| 484 | v := ss.backwards(info) |
| 485 | for i = p; i >= 0 && v != ssStarter; i = p { |
| 486 | info, p = lastRuneStart(fd, b[:i]) |
| 487 | if v = ss.backwards(info); v == ssOverflow { |
| 488 | break |
| 489 | } |
| 490 | if p+int(info.size) != i { |
| 491 | if p == -1 { // no boundary found |
| 492 | return -1 |
| 493 | } |
| 494 | return i // boundary after an illegal UTF-8 encoding |
| 495 | } |
| 496 | } |
| 497 | return i |
| 498 | } |
| 499 | |
| 500 | // decomposeSegment scans the first segment in src into rb. It inserts 0x034f |
| 501 | // (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters |
| 502 | // and returns the number of bytes consumed from src or iShortDst or iShortSrc. |
| 503 | func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int { |
| 504 | // Force one character to be consumed. |
| 505 | info := rb.f.info(rb.src, sp) |
| 506 | if info.size == 0 { |
| 507 | return 0 |
| 508 | } |
| 509 | if s := rb.ss.next(info); s == ssStarter { |
| 510 | // TODO: this could be removed if we don't support merging. |
| 511 | if rb.nrune > 0 { |
| 512 | goto end |
| 513 | } |
| 514 | } else if s == ssOverflow { |
| 515 | rb.insertCGJ() |
| 516 | goto end |
| 517 | } |
| 518 | if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { |
| 519 | return int(err) |
| 520 | } |
| 521 | for { |
| 522 | sp += int(info.size) |
| 523 | if sp >= rb.nsrc { |
| 524 | if !atEOF && !info.BoundaryAfter() { |
| 525 | return int(iShortSrc) |
| 526 | } |
| 527 | break |
| 528 | } |
| 529 | info = rb.f.info(rb.src, sp) |
| 530 | if info.size == 0 { |
| 531 | if !atEOF { |
| 532 | return int(iShortSrc) |
| 533 | } |
| 534 | break |
| 535 | } |
| 536 | if s := rb.ss.next(info); s == ssStarter { |
| 537 | break |
| 538 | } else if s == ssOverflow { |
| 539 | rb.insertCGJ() |
| 540 | break |
| 541 | } |
| 542 | if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { |
| 543 | return int(err) |
| 544 | } |
| 545 | } |
| 546 | end: |
| 547 | if !rb.doFlush() { |
| 548 | return int(iShortDst) |
| 549 | } |
| 550 | return sp |
| 551 | } |
| 552 | |
| 553 | // lastRuneStart returns the runeInfo and position of the last |
| 554 | // rune in buf or the zero runeInfo and -1 if no rune was found. |
| 555 | func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) { |
| 556 | p := len(buf) - 1 |
| 557 | for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- { |
| 558 | } |
| 559 | if p < 0 { |
| 560 | return Properties{}, -1 |
| 561 | } |
| 562 | return fd.info(inputBytes(buf), p), p |
| 563 | } |
| 564 | |
| 565 | // decomposeToLastBoundary finds an open segment at the end of the buffer |
| 566 | // and scans it into rb. Returns the buffer minus the last segment. |
| 567 | func decomposeToLastBoundary(rb *reorderBuffer) { |
| 568 | fd := &rb.f |
| 569 | info, i := lastRuneStart(fd, rb.out) |
| 570 | if int(info.size) != len(rb.out)-i { |
| 571 | // illegal trailing continuation bytes |
| 572 | return |
| 573 | } |
| 574 | if info.BoundaryAfter() { |
| 575 | return |
| 576 | } |
| 577 | var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order |
| 578 | padd := 0 |
| 579 | ss := streamSafe(0) |
| 580 | p := len(rb.out) |
| 581 | for { |
| 582 | add[padd] = info |
| 583 | v := ss.backwards(info) |
| 584 | if v == ssOverflow { |
| 585 | // Note that if we have an overflow, it the string we are appending to |
| 586 | // is not correctly normalized. In this case the behavior is undefined. |
| 587 | break |
| 588 | } |
| 589 | padd++ |
| 590 | p -= int(info.size) |
| 591 | if v == ssStarter || p < 0 { |
| 592 | break |
| 593 | } |
| 594 | info, i = lastRuneStart(fd, rb.out[:p]) |
| 595 | if int(info.size) != p-i { |
| 596 | break |
| 597 | } |
| 598 | } |
| 599 | rb.ss = ss |
| 600 | // Copy bytes for insertion as we may need to overwrite rb.out. |
| 601 | var buf [maxBufferSize * utf8.UTFMax]byte |
| 602 | cp := buf[:copy(buf[:], rb.out[p:])] |
| 603 | rb.out = rb.out[:p] |
| 604 | for padd--; padd >= 0; padd-- { |
| 605 | info = add[padd] |
| 606 | rb.insertUnsafe(inputBytes(cp), 0, info) |
| 607 | cp = cp[info.size:] |
| 608 | } |
| 609 | } |