David K. Bainbridge | 215e024 | 2017-09-05 23:18:24 -0700 | [diff] [blame] | 1 | // Copyright 2011 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package norm |
| 6 | |
| 7 | import ( |
| 8 | "strings" |
| 9 | "testing" |
| 10 | ) |
| 11 | |
| 12 | func doIterNorm(f Form, s string) []byte { |
| 13 | acc := []byte{} |
| 14 | i := Iter{} |
| 15 | i.InitString(f, s) |
| 16 | for !i.Done() { |
| 17 | acc = append(acc, i.Next()...) |
| 18 | } |
| 19 | return acc |
| 20 | } |
| 21 | |
| 22 | func TestIterNext(t *testing.T) { |
| 23 | runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte { |
| 24 | return doIterNorm(f, string(append(out, s...))) |
| 25 | }) |
| 26 | } |
| 27 | |
| 28 | type SegmentTest struct { |
| 29 | in string |
| 30 | out []string |
| 31 | } |
| 32 | |
| 33 | var segmentTests = []SegmentTest{ |
| 34 | {"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}}, |
| 35 | {rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")}, |
| 36 | {rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")}, |
| 37 | {rep('a', segSize) + "\u0300aa", |
| 38 | append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")}, |
| 39 | |
| 40 | // U+0f73 is NOT treated as a starter as it is a modifier |
| 41 | {"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}}, |
| 42 | {"a\u0f73", []string{"a\u0f73"}}, |
| 43 | |
| 44 | // U+ff9e is treated as a non-starter. |
| 45 | // TODO: should we? Note that this will only affect iteration, as whether |
| 46 | // or not we do so does not affect the normalization output and will either |
| 47 | // way result in consistent iteration output. |
| 48 | {"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}}, |
| 49 | {"a\uff9e", []string{"a\uff9e"}}, |
| 50 | } |
| 51 | |
| 52 | var segmentTestsK = []SegmentTest{ |
| 53 | {"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}}, |
| 54 | // last segment of multi-segment decomposition needs normalization |
| 55 | {"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}}, |
| 56 | {"\u320E", []string{"\x28", "\uAC00", "\x29"}}, |
| 57 | |
| 58 | // last segment should be copied to start of buffer. |
| 59 | {"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}}, |
| 60 | {"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}}, |
| 61 | {"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}}, |
| 62 | |
| 63 | // Hangul and Jamo are grouped together. |
| 64 | {"\uAC00", []string{"\u1100\u1161", ""}}, |
| 65 | {"\uAC01", []string{"\u1100\u1161\u11A8", ""}}, |
| 66 | {"\u1100\u1161", []string{"\u1100\u1161", ""}}, |
| 67 | } |
| 68 | |
| 69 | // Note that, by design, segmentation is equal for composing and decomposing forms. |
| 70 | func TestIterSegmentation(t *testing.T) { |
| 71 | segmentTest(t, "SegmentTestD", NFD, segmentTests) |
| 72 | segmentTest(t, "SegmentTestC", NFC, segmentTests) |
| 73 | segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK) |
| 74 | segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK) |
| 75 | } |
| 76 | |
| 77 | func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) { |
| 78 | iter := Iter{} |
| 79 | for i, tt := range tests { |
| 80 | iter.InitString(f, tt.in) |
| 81 | for j, seg := range tt.out { |
| 82 | if seg == "" { |
| 83 | if !iter.Done() { |
| 84 | res := string(iter.Next()) |
| 85 | t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res) |
| 86 | } |
| 87 | continue |
| 88 | } |
| 89 | if iter.Done() { |
| 90 | t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j) |
| 91 | } |
| 92 | seg = f.String(seg) |
| 93 | if res := string(iter.Next()); res != seg { |
| 94 | t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg)) |
| 95 | } |
| 96 | } |
| 97 | } |
| 98 | } |