Blame - unum/vendor/golang.org/x/text/unicode/norm/normalize_test.go - voltha

blob: 4f837372ca2ff30a0d26aab8fcfbdde6e2c047d1 [file] [log] [blame]

David K. Bainbridge	215e024	2017-09-05 23:18:24 -0700	[diff] [blame^]	1	// Copyright 2011 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package norm
				6
				7	import (
				8	"bytes"
				9	"flag"
				10	"fmt"
				11	"io"
				12	"log"
				13	"strings"
				14	"testing"
				15	"unicode/utf8"
				16
				17	"golang.org/x/text/internal/testtext"
				18	"golang.org/x/text/transform"
				19	)
				20
				21	var (
				22	testn = flag.Int("testn", -1, "specific test number to run or -1 for all")
				23	)
				24
				25	// pc replaces any rune r that is repeated n times, for n > 1, with r{n}.
				26	func pc(s string) []byte {
				27	b := bytes.NewBuffer(make([]byte, 0, len(s)))
				28	for i := 0; i < len(s); {
				29	r, sz := utf8.DecodeRuneInString(s[i:])
				30	n := 0
				31	if sz == 1 {
				32	// Special-case one-byte case to handle repetition for invalid UTF-8.
				33	for c := s[i]; i+n < len(s) && s[i+n] == c; n++ {
				34	}
				35	} else {
				36	for _, r2 := range s[i:] {
				37	if r2 != r {
				38	break
				39	}
				40	n++
				41	}
				42	}
				43	b.WriteString(s[i : i+sz])
				44	if n > 1 {
				45	fmt.Fprintf(b, "{%d}", n)
				46	}
				47	i += sz * n
				48	}
				49	return b.Bytes()
				50	}
				51
				52	// pidx finds the index from which two strings start to differ, plus context.
				53	// It returns the index and ellipsis if the index is greater than 0.
				54	func pidx(a, b string) (i int, prefix string) {
				55	for ; i < len(a) && i < len(b) && a[i] == b[i]; i++ {
				56	}
				57	if i < 8 {
				58	return 0, ""
				59	}
				60	i -= 3 // ensure taking at least one full rune before the difference.
				61	for k := i - 7; i > k && !utf8.RuneStart(a[i]); i-- {
				62	}
				63	return i, "..."
				64	}
				65
				66	type PositionTest struct {
				67	input string
				68	pos int
				69	buffer string // expected contents of reorderBuffer, if applicable
				70	}
				71
				72	type positionFunc func(rb *reorderBuffer, s string) (int, []byte)
				73
				74	func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []PositionTest) {
				75	rb := reorderBuffer{}
				76	rb.init(f, nil)
				77	for i, test := range tests {
				78	rb.reset()
				79	rb.src = inputString(test.input)
				80	rb.nsrc = len(test.input)
				81	pos, out := fn(&rb, test.input)
				82	if pos != test.pos {
				83	t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
				84	}
				85	if outs := string(out); outs != test.buffer {
				86	k, pfx := pidx(outs, test.buffer)
				87	t.Errorf("%s:%d: buffer \nwas %s%+q; \nwant %s%+q", name, i, pfx, pc(outs[k:]), pfx, pc(test.buffer[k:]))
				88	}
				89	}
				90	}
				91
				92	func grave(n int) string {
				93	return rep(0x0300, n)
				94	}
				95
				96	func rep(r rune, n int) string {
				97	return strings.Repeat(string(r), n)
				98	}
				99
				100	const segSize = maxByteBufferSize
				101
				102	var cgj = GraphemeJoiner
				103
				104	var decomposeSegmentTests = []PositionTest{
				105	// illegal runes
				106	{"\xC2", 0, ""},
				107	{"\xC0", 1, "\xC0"},
				108	{"\u00E0\x80", 2, "\u0061\u0300"},
				109	// starter
				110	{"a", 1, "a"},
				111	{"ab", 1, "a"},
				112	// starter + composing
				113	{"a\u0300", 3, "a\u0300"},
				114	{"a\u0300b", 3, "a\u0300"},
				115	// with decomposition
				116	{"\u00C0", 2, "A\u0300"},
				117	{"\u00C0b", 2, "A\u0300"},
				118	// long
				119	{grave(31), 60, grave(30) + cgj},
				120	{"a" + grave(31), 61, "a" + grave(30) + cgj},
				121
				122	// Stability tests: see http://www.unicode.org/review/pr-29.html.
				123	// U+0300 COMBINING GRAVE ACCENT;Mn;230;NSM;;;;;N;NON-SPACING GRAVE;;;;
				124	// U+0B47 ORIYA VOWEL SIGN E;Mc;0;L;;;;;N;;;;;
				125	// U+0B3E ORIYA VOWEL SIGN AA;Mc;0;L;;;;;N;;;;;
				126	// U+1100 HANGUL CHOSEONG KIYEOK;Lo;0;L;;;;;N;;;;;
				127	// U+1161 HANGUL JUNGSEONG A;Lo;0;L;;;;;N;;;;;
				128	{"\u0B47\u0300\u0B3E", 8, "\u0B47\u0300\u0B3E"},
				129	{"\u1100\u0300\u1161", 8, "\u1100\u0300\u1161"},
				130	{"\u0B47\u0B3E", 6, "\u0B47\u0B3E"},
				131	{"\u1100\u1161", 6, "\u1100\u1161"},
				132
				133	// U+04DA MALAYALAM VOWEL SIGN O;Mc;0;L;0D46 0D3E;;;;N;;;;;
				134	// Sequence of decomposing characters that are starters and modifiers.
				135	{"\u0d4a" + strings.Repeat("\u0d3e", 31), 90, "\u0d46" + strings.Repeat("\u0d3e", 30) + cgj},
				136
				137	{grave(30), 60, grave(30)},
				138	// U+FF9E is a starter, but decomposes to U+3099, which is not.
				139	{grave(30) + "\uff9e", 60, grave(30) + cgj},
				140	// ends with incomplete UTF-8 encoding
				141	{"\xCC", 0, ""},
				142	{"\u0300\xCC", 2, "\u0300"},
				143	}
				144
				145	func decomposeSegmentF(rb *reorderBuffer, s string) (int, []byte) {
				146	rb.initString(NFD, s)
				147	rb.setFlusher(nil, appendFlush)
				148	p := decomposeSegment(rb, 0, true)
				149	return p, rb.out
				150	}
				151
				152	func TestDecomposeSegment(t *testing.T) {
				153	runPosTests(t, "TestDecomposeSegment", NFC, decomposeSegmentF, decomposeSegmentTests)
				154	}
				155
				156	var firstBoundaryTests = []PositionTest{
				157	// no boundary
				158	{"", -1, ""},
				159	{"\u0300", -1, ""},
				160	{"\x80\x80", -1, ""},
				161	// illegal runes
				162	{"\xff", 0, ""},
				163	{"\u0300\xff", 2, ""},
				164	{"\u0300\xc0\x80\x80", 2, ""},
				165	// boundaries
				166	{"a", 0, ""},
				167	{"\u0300a", 2, ""},
				168	// Hangul
				169	{"\u1103\u1161", 0, ""},
				170	{"\u110B\u1173\u11B7", 0, ""},
				171	{"\u1161\u110B\u1173\u11B7", 3, ""},
				172	{"\u1173\u11B7\u1103\u1161", 6, ""},
				173	// too many combining characters.
				174	{grave(maxNonStarters - 1), -1, ""},
				175	{grave(maxNonStarters), 60, ""},
				176	{grave(maxNonStarters + 1), 60, ""},
				177	}
				178
				179	func firstBoundaryF(rb *reorderBuffer, s string) (int, []byte) {
				180	return rb.f.form.FirstBoundary([]byte(s)), nil
				181	}
				182
				183	func firstBoundaryStringF(rb *reorderBuffer, s string) (int, []byte) {
				184	return rb.f.form.FirstBoundaryInString(s), nil
				185	}
				186
				187	func TestFirstBoundary(t *testing.T) {
				188	runPosTests(t, "TestFirstBoundary", NFC, firstBoundaryF, firstBoundaryTests)
				189	runPosTests(t, "TestFirstBoundaryInString", NFC, firstBoundaryStringF, firstBoundaryTests)
				190	}
				191
				192	func TestNextBoundary(t *testing.T) {
				193	testCases := []struct {
				194	input string
				195	atEOF bool
				196	want int
				197	}{
				198	// no boundary
				199	{"", true, 0},
				200	{"", false, -1},
				201	{"\u0300", true, 2},
				202	{"\u0300", false, -1},
				203	{"\x80\x80", true, 1},
				204	{"\x80\x80", false, 1},
				205	// illegal runes
				206	{"\xff", false, 1},
				207	{"\u0300\xff", false, 2},
				208	{"\u0300\xc0\x80\x80", false, 2},
				209	{"\xc2\x80\x80", false, 2},
				210	{"\xc2", false, -1},
				211	{"\xc2", true, 1},
				212	{"a\u0300\xc2", false, -1},
				213	{"a\u0300\xc2", true, 3},
				214	// boundaries
				215	{"a", true, 1},
				216	{"a", false, -1},
				217	{"aa", false, 1},
				218	{"\u0300", true, 2},
				219	{"\u0300", false, -1},
				220	{"\u0300a", false, 2},
				221	// Hangul
				222	{"\u1103\u1161", true, 6},
				223	{"\u1103\u1161", false, -1},
				224	{"\u110B\u1173\u11B7", false, -1},
				225	{"\u110B\u1173\u11B7\u110B\u1173\u11B7", false, 9},
				226	{"\u1161\u110B\u1173\u11B7", false, 3},
				227	{"\u1173\u11B7\u1103\u1161", false, 6},
				228	// too many combining characters.
				229	{grave(maxNonStarters - 1), false, -1},
				230	{grave(maxNonStarters), false, 60},
				231	{grave(maxNonStarters + 1), false, 60},
				232	}
				233
				234	for _, tc := range testCases {
				235	if got := NFC.NextBoundary([]byte(tc.input), tc.atEOF); got != tc.want {
				236	t.Errorf("NextBoundary(%+q, %v) = %d; want %d", tc.input, tc.atEOF, got, tc.want)
				237	}
				238	if got := NFC.NextBoundaryInString(tc.input, tc.atEOF); got != tc.want {
				239	t.Errorf("NextBoundaryInString(%+q, %v) = %d; want %d", tc.input, tc.atEOF, got, tc.want)
				240	}
				241	}
				242	}
				243
				244	var decomposeToLastTests = []PositionTest{
				245	// ends with inert character
				246	{"Hello!", 6, ""},
				247	{"\u0632", 2, ""},
				248	{"a\u0301\u0635", 5, ""},
				249	// ends with non-inert starter
				250	{"a", 0, "a"},
				251	{"a\u0301a", 3, "a"},
				252	{"a\u0301\u03B9", 3, "\u03B9"},
				253	{"a\u0327", 0, "a\u0327"},
				254	// illegal runes
				255	{"\xFF", 1, ""},
				256	{"aa\xFF", 3, ""},
				257	{"\xC0\x80\x80", 3, ""},
				258	{"\xCC\x80\x80", 3, ""},
				259	// ends with incomplete UTF-8 encoding
				260	{"a\xCC", 2, ""},
				261	// ends with combining characters
				262	{"\u0300\u0301", 0, "\u0300\u0301"},
				263	{"a\u0300\u0301", 0, "a\u0300\u0301"},
				264	{"a\u0301\u0308", 0, "a\u0301\u0308"},
				265	{"a\u0308\u0301", 0, "a\u0308\u0301"},
				266	{"aaaa\u0300\u0301", 3, "a\u0300\u0301"},
				267	{"\u0300a\u0300\u0301", 2, "a\u0300\u0301"},
				268	{"\u00C0", 0, "A\u0300"},
				269	{"a\u00C0", 1, "A\u0300"},
				270	// decomposing
				271	{"a\u0300\u00E0", 3, "a\u0300"},
				272	// multisegment decompositions (flushes leading segments)
				273	{"a\u0300\uFDC0", 7, "\u064A"},
				274	{"\uFDC0" + grave(29), 4, "\u064A" + grave(29)},
				275	{"\uFDC0" + grave(30), 4, "\u064A" + grave(30)},
				276	{"\uFDC0" + grave(31), 5, grave(30)},
				277	{"\uFDFA" + grave(14), 31, "\u0645" + grave(14)},
				278	// Overflow
				279	{"\u00E0" + grave(29), 0, "a" + grave(30)},
				280	{"\u00E0" + grave(30), 2, grave(30)},
				281	// Hangul
				282	{"a\u1103", 1, "\u1103"},
				283	{"a\u110B", 1, "\u110B"},
				284	{"a\u110B\u1173", 1, "\u110B\u1173"},
				285	// See comment in composition.go:compBoundaryAfter.
				286	{"a\u110B\u1173\u11B7", 1, "\u110B\u1173\u11B7"},
				287	{"a\uC73C", 1, "\u110B\u1173"},
				288	{"다음", 3, "\u110B\u1173\u11B7"},
				289	{"다", 0, "\u1103\u1161"},
				290	{"\u1103\u1161\u110B\u1173\u11B7", 6, "\u110B\u1173\u11B7"},
				291	{"\u110B\u1173\u11B7\u1103\u1161", 9, "\u1103\u1161"},
				292	{"다음음", 6, "\u110B\u1173\u11B7"},
				293	{"음다다", 6, "\u1103\u1161"},
				294	// maximized buffer
				295	{"a" + grave(30), 0, "a" + grave(30)},
				296	// Buffer overflow
				297	{"a" + grave(31), 3, grave(30)},
				298	// weird UTF-8
				299	{"a\u0300\u11B7", 0, "a\u0300\u11B7"},
				300	}
				301
				302	func decomposeToLast(rb *reorderBuffer, s string) (int, []byte) {
				303	rb.setFlusher([]byte(s), appendFlush)
				304	decomposeToLastBoundary(rb)
				305	buf := rb.flush(nil)
				306	return len(rb.out), buf
				307	}
				308
				309	func TestDecomposeToLastBoundary(t *testing.T) {
				310	runPosTests(t, "TestDecomposeToLastBoundary", NFKC, decomposeToLast, decomposeToLastTests)
				311	}
				312
				313	var lastBoundaryTests = []PositionTest{
				314	// ends with inert character
				315	{"Hello!", 6, ""},
				316	{"\u0632", 2, ""},
				317	// ends with non-inert starter
				318	{"a", 0, ""},
				319	// illegal runes
				320	{"\xff", 1, ""},
				321	{"aa\xff", 3, ""},
				322	{"a\xff\u0300", 1, ""}, // TODO: should probably be 2.
				323	{"\xc0\x80\x80", 3, ""},
				324	{"\xc0\x80\x80\u0300", 3, ""},
				325	// ends with incomplete UTF-8 encoding
				326	{"\xCC", -1, ""},
				327	{"\xE0\x80", -1, ""},
				328	{"\xF0\x80\x80", -1, ""},
				329	{"a\xCC", 0, ""},
				330	{"\x80\xCC", 1, ""},
				331	{"\xCC\xCC", 1, ""},
				332	// ends with combining characters
				333	{"a\u0300\u0301", 0, ""},
				334	{"aaaa\u0300\u0301", 3, ""},
				335	{"\u0300a\u0300\u0301", 2, ""},
				336	{"\u00C2", 0, ""},
				337	{"a\u00C2", 1, ""},
				338	// decomposition may recombine
				339	{"\u0226", 0, ""},
				340	// no boundary
				341	{"", -1, ""},
				342	{"\u0300\u0301", -1, ""},
				343	{"\u0300", -1, ""},
				344	{"\x80\x80", -1, ""},
				345	{"\x80\x80\u0301", -1, ""},
				346	// Hangul
				347	{"다음", 3, ""},
				348	{"다", 0, ""},
				349	{"\u1103\u1161\u110B\u1173\u11B7", 6, ""},
				350	{"\u110B\u1173\u11B7\u1103\u1161", 9, ""},
				351	// too many combining characters.
				352	{grave(maxNonStarters - 1), -1, ""},
				353	// May still be preceded with a non-starter.
				354	{grave(maxNonStarters), -1, ""},
				355	// May still need to insert a cgj after the last combiner.
				356	{grave(maxNonStarters + 1), 2, ""},
				357	{grave(maxNonStarters + 2), 4, ""},
				358
				359	{"a" + grave(maxNonStarters-1), 0, ""},
				360	{"a" + grave(maxNonStarters), 0, ""},
				361	// May still need to insert a cgj after the last combiner.
				362	{"a" + grave(maxNonStarters+1), 3, ""},
				363	{"a" + grave(maxNonStarters+2), 5, ""},
				364	}
				365
				366	func lastBoundaryF(rb *reorderBuffer, s string) (int, []byte) {
				367	return rb.f.form.LastBoundary([]byte(s)), nil
				368	}
				369
				370	func TestLastBoundary(t *testing.T) {
				371	runPosTests(t, "TestLastBoundary", NFC, lastBoundaryF, lastBoundaryTests)
				372	}
				373
				374	type spanTest struct {
				375	input string
				376	atEOF bool
				377	n int
				378	err error
				379	}
				380
				381	var quickSpanTests = []spanTest{
				382	{"", true, 0, nil},
				383	// starters
				384	{"a", true, 1, nil},
				385	{"abc", true, 3, nil},
				386	{"\u043Eb", true, 3, nil},
				387	// incomplete last rune.
				388	{"\xCC", true, 1, nil},
				389	{"\xCC", false, 0, transform.ErrShortSrc},
				390	{"a\xCC", true, 2, nil},
				391	{"a\xCC", false, 0, transform.ErrShortSrc}, // TODO: could be 1 for NFD
				392	// incorrectly ordered combining characters
				393	{"\u0300\u0316", true, 0, transform.ErrEndOfSpan},
				394	{"\u0300\u0316", false, 0, transform.ErrEndOfSpan},
				395	{"\u0300\u0316cd", true, 0, transform.ErrEndOfSpan},
				396	{"\u0300\u0316cd", false, 0, transform.ErrEndOfSpan},
				397	// have a maximum number of combining characters.
				398	{rep(0x035D, 30) + "\u035B", true, 0, transform.ErrEndOfSpan},
				399	{"a" + rep(0x035D, 30) + "\u035B", true, 0, transform.ErrEndOfSpan},
				400	{"Ɵ" + rep(0x035D, 30) + "\u035B", true, 0, transform.ErrEndOfSpan},
				401	{"aa" + rep(0x035D, 30) + "\u035B", true, 1, transform.ErrEndOfSpan},
				402	{rep(0x035D, 30) + cgj + "\u035B", true, 64, nil},
				403	{"a" + rep(0x035D, 30) + cgj + "\u035B", true, 65, nil},
				404	{"Ɵ" + rep(0x035D, 30) + cgj + "\u035B", true, 66, nil},
				405	{"aa" + rep(0x035D, 30) + cgj + "\u035B", true, 66, nil},
				406
				407	{"a" + rep(0x035D, 30) + cgj + "\u035B", false, 61, transform.ErrShortSrc},
				408	{"Ɵ" + rep(0x035D, 30) + cgj + "\u035B", false, 62, transform.ErrShortSrc},
				409	{"aa" + rep(0x035D, 30) + cgj + "\u035B", false, 62, transform.ErrShortSrc},
				410	}
				411
				412	var quickSpanNFDTests = []spanTest{
				413	// needs decomposing
				414	{"\u00C0", true, 0, transform.ErrEndOfSpan},
				415	{"abc\u00C0", true, 3, transform.ErrEndOfSpan},
				416	// correctly ordered combining characters
				417	{"\u0300", true, 2, nil},
				418	{"ab\u0300", true, 4, nil},
				419	{"ab\u0300cd", true, 6, nil},
				420	{"\u0300cd", true, 4, nil},
				421	{"\u0316\u0300", true, 4, nil},
				422	{"ab\u0316\u0300", true, 6, nil},
				423	{"ab\u0316\u0300cd", true, 8, nil},
				424	{"ab\u0316\u0300\u00C0", true, 6, transform.ErrEndOfSpan},
				425	{"\u0316\u0300cd", true, 6, nil},
				426	{"\u043E\u0308b", true, 5, nil},
				427	// incorrectly ordered combining characters
				428	{"ab\u0300\u0316", true, 1, transform.ErrEndOfSpan}, // TODO: we could skip 'b' as well.
				429	{"ab\u0300\u0316cd", true, 1, transform.ErrEndOfSpan},
				430	// Hangul
				431	{"같은", true, 0, transform.ErrEndOfSpan},
				432	}
				433
				434	var quickSpanNFCTests = []spanTest{
				435	// okay composed
				436	{"\u00C0", true, 2, nil},
				437	{"abc\u00C0", true, 5, nil},
				438	// correctly ordered combining characters
				439	// TODO: b may combine with modifiers, which is why this fails. We could
				440	// make a more precise test that that actually checks whether last
				441	// characters combines. Probably not worth it.
				442	{"ab\u0300", true, 1, transform.ErrEndOfSpan},
				443	{"ab\u0300cd", true, 1, transform.ErrEndOfSpan},
				444	{"ab\u0316\u0300", true, 1, transform.ErrEndOfSpan},
				445	{"ab\u0316\u0300cd", true, 1, transform.ErrEndOfSpan},
				446	{"\u00C0\u035D", true, 4, nil},
				447	// we do not special case leading combining characters
				448	{"\u0300cd", true, 0, transform.ErrEndOfSpan},
				449	{"\u0300", true, 0, transform.ErrEndOfSpan},
				450	{"\u0316\u0300", true, 0, transform.ErrEndOfSpan},
				451	{"\u0316\u0300cd", true, 0, transform.ErrEndOfSpan},
				452	// incorrectly ordered combining characters
				453	{"ab\u0300\u0316", true, 1, transform.ErrEndOfSpan},
				454	{"ab\u0300\u0316cd", true, 1, transform.ErrEndOfSpan},
				455	// Hangul
				456	{"같은", true, 6, nil},
				457	{"같은", false, 3, transform.ErrShortSrc},
				458	// We return the start of the violating segment in case of overflow.
				459	{grave(30) + "\uff9e", true, 0, transform.ErrEndOfSpan},
				460	{grave(30), true, 0, transform.ErrEndOfSpan},
				461	}
				462
				463	func runSpanTests(t *testing.T, name string, f Form, testCases []spanTest) {
				464	for i, tc := range testCases {
				465	s := fmt.Sprintf("Bytes/%s/%d=%+q/atEOF=%v", name, i, pc(tc.input), tc.atEOF)
				466	ok := testtext.Run(t, s, func(t *testing.T) {
				467	n, err := f.Span([]byte(tc.input), tc.atEOF)
				468	if n != tc.n \|\| err != tc.err {
				469	t.Errorf("\n got %d, %v;\nwant %d, %v", n, err, tc.n, tc.err)
				470	}
				471	})
				472	if !ok {
				473	continue // Don't do the String variant if the Bytes variant failed.
				474	}
				475	s = fmt.Sprintf("String/%s/%d=%+q/atEOF=%v", name, i, pc(tc.input), tc.atEOF)
				476	testtext.Run(t, s, func(t *testing.T) {
				477	n, err := f.SpanString(tc.input, tc.atEOF)
				478	if n != tc.n \|\| err != tc.err {
				479	t.Errorf("\n got %d, %v;\nwant %d, %v", n, err, tc.n, tc.err)
				480	}
				481	})
				482	}
				483	}
				484
				485	func TestSpan(t *testing.T) {
				486	runSpanTests(t, "NFD", NFD, quickSpanTests)
				487	runSpanTests(t, "NFD", NFD, quickSpanNFDTests)
				488	runSpanTests(t, "NFC", NFC, quickSpanTests)
				489	runSpanTests(t, "NFC", NFC, quickSpanNFCTests)
				490	}
				491
				492	var isNormalTests = []PositionTest{
				493	{"", 1, ""},
				494	// illegal runes
				495	{"\xff", 1, ""},
				496	// starters
				497	{"a", 1, ""},
				498	{"abc", 1, ""},
				499	{"\u043Eb", 1, ""},
				500	// incorrectly ordered combining characters
				501	{"\u0300\u0316", 0, ""},
				502	{"ab\u0300\u0316", 0, ""},
				503	{"ab\u0300\u0316cd", 0, ""},
				504	{"\u0300\u0316cd", 0, ""},
				505	}
				506	var isNormalNFDTests = []PositionTest{
				507	// needs decomposing
				508	{"\u00C0", 0, ""},
				509	{"abc\u00C0", 0, ""},
				510	// correctly ordered combining characters
				511	{"\u0300", 1, ""},
				512	{"ab\u0300", 1, ""},
				513	{"ab\u0300cd", 1, ""},
				514	{"\u0300cd", 1, ""},
				515	{"\u0316\u0300", 1, ""},
				516	{"ab\u0316\u0300", 1, ""},
				517	{"ab\u0316\u0300cd", 1, ""},
				518	{"\u0316\u0300cd", 1, ""},
				519	{"\u043E\u0308b", 1, ""},
				520	// Hangul
				521	{"같은", 0, ""},
				522	}
				523	var isNormalNFCTests = []PositionTest{
				524	// okay composed
				525	{"\u00C0", 1, ""},
				526	{"abc\u00C0", 1, ""},
				527	// need reordering
				528	{"a\u0300", 0, ""},
				529	{"a\u0300cd", 0, ""},
				530	{"a\u0316\u0300", 0, ""},
				531	{"a\u0316\u0300cd", 0, ""},
				532	// correctly ordered combining characters
				533	{"ab\u0300", 1, ""},
				534	{"ab\u0300cd", 1, ""},
				535	{"ab\u0316\u0300", 1, ""},
				536	{"ab\u0316\u0300cd", 1, ""},
				537	{"\u00C0\u035D", 1, ""},
				538	{"\u0300", 1, ""},
				539	{"\u0316\u0300cd", 1, ""},
				540	// Hangul
				541	{"같은", 1, ""},
				542	}
				543
				544	var isNormalNFKXTests = []PositionTest{
				545	// Special case.
				546	{"\u00BC", 0, ""},
				547	}
				548
				549	func isNormalF(rb *reorderBuffer, s string) (int, []byte) {
				550	if rb.f.form.IsNormal([]byte(s)) {
				551	return 1, nil
				552	}
				553	return 0, nil
				554	}
				555
				556	func isNormalStringF(rb *reorderBuffer, s string) (int, []byte) {
				557	if rb.f.form.IsNormalString(s) {
				558	return 1, nil
				559	}
				560	return 0, nil
				561	}
				562
				563	func TestIsNormal(t *testing.T) {
				564	runPosTests(t, "TestIsNormalNFD1", NFD, isNormalF, isNormalTests)
				565	runPosTests(t, "TestIsNormalNFD2", NFD, isNormalF, isNormalNFDTests)
				566	runPosTests(t, "TestIsNormalNFC1", NFC, isNormalF, isNormalTests)
				567	runPosTests(t, "TestIsNormalNFC2", NFC, isNormalF, isNormalNFCTests)
				568	runPosTests(t, "TestIsNormalNFKD1", NFKD, isNormalF, isNormalTests)
				569	runPosTests(t, "TestIsNormalNFKD2", NFKD, isNormalF, isNormalNFDTests)
				570	runPosTests(t, "TestIsNormalNFKD3", NFKD, isNormalF, isNormalNFKXTests)
				571	runPosTests(t, "TestIsNormalNFKC1", NFKC, isNormalF, isNormalTests)
				572	runPosTests(t, "TestIsNormalNFKC2", NFKC, isNormalF, isNormalNFCTests)
				573	runPosTests(t, "TestIsNormalNFKC3", NFKC, isNormalF, isNormalNFKXTests)
				574	}
				575
				576	func TestIsNormalString(t *testing.T) {
				577	runPosTests(t, "TestIsNormalNFD1", NFD, isNormalStringF, isNormalTests)
				578	runPosTests(t, "TestIsNormalNFD2", NFD, isNormalStringF, isNormalNFDTests)
				579	runPosTests(t, "TestIsNormalNFC1", NFC, isNormalStringF, isNormalTests)
				580	runPosTests(t, "TestIsNormalNFC2", NFC, isNormalStringF, isNormalNFCTests)
				581	}
				582
				583	type AppendTest struct {
				584	left string
				585	right string
				586	out string
				587	}
				588
				589	type appendFunc func(f Form, out []byte, s string) []byte
				590
				591	var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"}
				592
				593	func runNormTests(t *testing.T, name string, fn appendFunc) {
				594	for f := NFC; f <= NFKD; f++ {
				595	runAppendTests(t, name, f, fn, normTests[f])
				596	}
				597	}
				598
				599	func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []AppendTest) {
				600	for i, test := range tests {
				601	t.Run(fmt.Sprintf("%s/%d", fstr[f], i), func(t *testing.T) {
				602	id := pc(test.left + test.right)
				603	if testn >= 0 && i != testn {
				604	return
				605	}
				606	t.Run("fn", func(t *testing.T) {
				607	out := []byte(test.left)
				608	have := string(fn(f, out, test.right))
				609	if len(have) != len(test.out) {
				610	t.Errorf("%+q: length is %d; want %d (%+q vs %+q)", id, len(have), len(test.out), pc(have), pc(test.out))
				611	}
				612	if have != test.out {
				613	k, pf := pidx(have, test.out)
				614	t.Errorf("%+q:\nwas %s%+q; \nwant %s%+q", id, pf, pc(have[k:]), pf, pc(test.out[k:]))
				615	}
				616	})
				617
				618	// Bootstrap by normalizing input. Ensures that the various variants
				619	// behave the same.
				620	for g := NFC; g <= NFKD; g++ {
				621	if f == g {
				622	continue
				623	}
				624	t.Run(fstr[g], func(t *testing.T) {
				625	want := g.String(test.left + test.right)
				626	have := string(fn(g, g.AppendString(nil, test.left), test.right))
				627	if len(have) != len(want) {
				628	t.Errorf("%+q: length is %d; want %d (%+q vs %+q)", id, len(have), len(want), pc(have), pc(want))
				629	}
				630	if have != want {
				631	k, pf := pidx(have, want)
				632	t.Errorf("%+q:\nwas %s%+q; \nwant %s%+q", id, pf, pc(have[k:]), pf, pc(want[k:]))
				633	}
				634	})
				635	}
				636	})
				637	}
				638	}
				639
				640	var normTests = [][]AppendTest{
				641	appendTestsNFC,
				642	appendTestsNFD,
				643	appendTestsNFKC,
				644	appendTestsNFKD,
				645	}
				646
				647	var appendTestsNFC = []AppendTest{
				648	{"", ascii, ascii},
				649	{"", txt_all, txt_all},
				650	{"\uff9e", grave(30), "\uff9e" + grave(29) + cgj + grave(1)},
				651	{grave(30), "\uff9e", grave(30) + cgj + "\uff9e"},
				652
				653	// Tests designed for Iter.
				654	{ // ordering of non-composing combining characters
				655	"",
				656	"\u0305\u0316",
				657	"\u0316\u0305",
				658	},
				659	{ // segment overflow
				660	"",
				661	"a" + rep(0x0305, maxNonStarters+4) + "\u0316",
				662	"a" + rep(0x0305, maxNonStarters) + cgj + "\u0316" + rep(0x305, 4),
				663	},
				664
				665	{ // Combine across non-blocking non-starters.
				666	// U+0327 COMBINING CEDILLA;Mn;202;NSM;;;;;N;NON-SPACING CEDILLA;;;;
				667	// U+0325 COMBINING RING BELOW;Mn;220;NSM;;;;;N;NON-SPACING RING BELOW;;;;
				668	"", "a\u0327\u0325", "\u1e01\u0327",
				669	},
				670
				671	{ // Jamo V+T does not combine.
				672	"",
				673	"\u1161\u11a8",
				674	"\u1161\u11a8",
				675	},
				676
				677	// Stability tests: see http://www.unicode.org/review/pr-29.html.
				678	{"", "\u0b47\u0300\u0b3e", "\u0b47\u0300\u0b3e"},
				679	{"", "\u1100\u0300\u1161", "\u1100\u0300\u1161"},
				680	{"", "\u0b47\u0b3e", "\u0b4b"},
				681	{"", "\u1100\u1161", "\uac00"},
				682
				683	// U+04DA MALAYALAM VOWEL SIGN O;Mc;0;L;0D46 0D3E;;;;N;;;;;
				684	{ // 0d4a starts a new segment.
				685	"",
				686	"\u0d4a" + strings.Repeat("\u0d3e", 15) + "\u0d4a" + strings.Repeat("\u0d3e", 15),
				687	"\u0d4a" + strings.Repeat("\u0d3e", 15) + "\u0d4a" + strings.Repeat("\u0d3e", 15),
				688	},
				689
				690	{ // Split combining characters.
				691	// TODO: don't insert CGJ before starters.
				692	"",
				693	"\u0d46" + strings.Repeat("\u0d3e", 31),
				694	"\u0d4a" + strings.Repeat("\u0d3e", 29) + cgj + "\u0d3e",
				695	},
				696
				697	{ // Split combining characters.
				698	"",
				699	"\u0d4a" + strings.Repeat("\u0d3e", 30),
				700	"\u0d4a" + strings.Repeat("\u0d3e", 29) + cgj + "\u0d3e",
				701	},
				702
				703	{ // https://golang.org/issues/20079
				704	"",
				705	"\xeb\u0344",
				706	"\xeb\u0308\u0301",
				707	},
				708
				709	{ // https://golang.org/issues/20079
				710	"",
				711	"\uac00" + strings.Repeat("\u0300", 30),
				712	"\uac00" + strings.Repeat("\u0300", 29) + "\u034f\u0300",
				713	},
				714
				715	{ // https://golang.org/issues/20079
				716	"",
				717	"\xeb" + strings.Repeat("\u0300", 31),
				718	"\xeb" + strings.Repeat("\u0300", 30) + "\u034f\u0300",
				719	},
				720	}
				721
				722	var appendTestsNFD = []AppendTest{
				723	// TODO: Move some of the tests here.
				724	}
				725
				726	var appendTestsNFKC = []AppendTest{
				727	// empty buffers
				728	{"", "", ""},
				729	{"a", "", "a"},
				730	{"", "a", "a"},
				731	{"", "\u0041\u0307\u0304", "\u01E0"},
				732	// segment split across buffers
				733	{"", "a\u0300b", "\u00E0b"},
				734	{"a", "\u0300b", "\u00E0b"},
				735	{"a", "\u0300\u0316", "\u00E0\u0316"},
				736	{"a", "\u0316\u0300", "\u00E0\u0316"},
				737	{"a", "\u0300a\u0300", "\u00E0\u00E0"},
				738	{"a", "\u0300a\u0300a\u0300", "\u00E0\u00E0\u00E0"},
				739	{"a", "\u0300aaa\u0300aaa\u0300", "\u00E0aa\u00E0aa\u00E0"},
				740	{"a\u0300", "\u0327", "\u00E0\u0327"},
				741	{"a\u0327", "\u0300", "\u00E0\u0327"},
				742	{"a\u0316", "\u0300", "\u00E0\u0316"},
				743	{"\u0041\u0307", "\u0304", "\u01E0"},
				744	// Hangul
				745	{"", "\u110B\u1173", "\uC73C"},
				746	{"", "\u1103\u1161", "\uB2E4"},
				747	{"", "\u110B\u1173\u11B7", "\uC74C"},
				748	{"", "\u320E", "\x28\uAC00\x29"},
				749	{"", "\x28\u1100\u1161\x29", "\x28\uAC00\x29"},
				750	{"\u1103", "\u1161", "\uB2E4"},
				751	{"\u110B", "\u1173\u11B7", "\uC74C"},
				752	{"\u110B\u1173", "\u11B7", "\uC74C"},
				753	{"\uC73C", "\u11B7", "\uC74C"},
				754	// UTF-8 encoding split across buffers
				755	{"a\xCC", "\x80", "\u00E0"},
				756	{"a\xCC", "\x80b", "\u00E0b"},
				757	{"a\xCC", "\x80a\u0300", "\u00E0\u00E0"},
				758	{"a\xCC", "\x80\x80", "\u00E0\x80"},
				759	{"a\xCC", "\x80\xCC", "\u00E0\xCC"},
				760	{"a\u0316\xCC", "\x80a\u0316\u0300", "\u00E0\u0316\u00E0\u0316"},
				761	// ending in incomplete UTF-8 encoding
				762	{"", "\xCC", "\xCC"},
				763	{"a", "\xCC", "a\xCC"},
				764	{"a", "b\xCC", "ab\xCC"},
				765	{"\u0226", "\xCC", "\u0226\xCC"},
				766	// illegal runes
				767	{"", "\x80", "\x80"},
				768	{"", "\x80\x80\x80", "\x80\x80\x80"},
				769	{"", "\xCC\x80\x80\x80", "\xCC\x80\x80\x80"},
				770	{"", "a\x80", "a\x80"},
				771	{"", "a\x80\x80\x80", "a\x80\x80\x80"},
				772	{"", "a\x80\x80\x80\x80\x80\x80", "a\x80\x80\x80\x80\x80\x80"},
				773	{"a", "\x80\x80\x80", "a\x80\x80\x80"},
				774	// overflow
				775	{"", strings.Repeat("\x80", 33), strings.Repeat("\x80", 33)},
				776	{strings.Repeat("\x80", 33), "", strings.Repeat("\x80", 33)},
				777	{strings.Repeat("\x80", 33), strings.Repeat("\x80", 33), strings.Repeat("\x80", 66)},
				778	// overflow of combining characters
				779	{"", grave(34), grave(30) + cgj + grave(4)},
				780	{"", grave(36), grave(30) + cgj + grave(6)},
				781	{grave(29), grave(5), grave(30) + cgj + grave(4)},
				782	{grave(30), grave(4), grave(30) + cgj + grave(4)},
				783	{grave(30), grave(3), grave(30) + cgj + grave(3)},
				784	{grave(30) + "\xCC", "\x80", grave(30) + cgj + grave(1)},
				785	{"", "\uFDFA" + grave(14), "\u0635\u0644\u0649 \u0627\u0644\u0644\u0647 \u0639\u0644\u064a\u0647 \u0648\u0633\u0644\u0645" + grave(14)},
				786	{"", "\uFDFA" + grave(28) + "\u0316", "\u0635\u0644\u0649 \u0627\u0644\u0644\u0647 \u0639\u0644\u064a\u0647 \u0648\u0633\u0644\u0645\u0316" + grave(28)},
				787	// - First rune has a trailing non-starter.
				788	{"\u00d5", grave(30), "\u00d5" + grave(29) + cgj + grave(1)},
				789	// - U+FF9E decomposes into a non-starter in compatibility mode. A CGJ must be
				790	// inserted even when FF9E starts a new segment.
				791	{"\uff9e", grave(30), "\u3099" + grave(29) + cgj + grave(1)},
				792	{grave(30), "\uff9e", grave(30) + cgj + "\u3099"},
				793	// - Many non-starter decompositions in a row causing overflow.
				794	{"", rep(0x340, 31), rep(0x300, 30) + cgj + "\u0300"},
				795	{"", rep(0xFF9E, 31), rep(0x3099, 30) + cgj + "\u3099"},
				796
				797	{"", "\u0644\u0625" + rep(0x300, 31), "\u0644\u0625" + rep(0x300, 29) + cgj + "\u0300\u0300"},
				798	{"", "\ufef9" + rep(0x300, 31), "\u0644\u0625" + rep(0x300, 29) + cgj + rep(0x0300, 2)},
				799	{"", "\ufef9" + rep(0x300, 31), "\u0644\u0625" + rep(0x300, 29) + cgj + rep(0x0300, 2)},
				800
				801	// U+0F81 TIBETAN VOWEL SIGN REVERSED II splits into two modifiers.
				802	{"", "\u0f7f" + rep(0xf71, 29) + "\u0f81", "\u0f7f" + rep(0xf71, 29) + cgj + "\u0f71\u0f80"},
				803	{"", "\u0f7f" + rep(0xf71, 28) + "\u0f81", "\u0f7f" + rep(0xf71, 29) + "\u0f80"},
				804	{"", "\u0f7f" + rep(0xf81, 16), "\u0f7f" + rep(0xf71, 15) + rep(0xf80, 15) + cgj + "\u0f71\u0f80"},
				805
				806	// weird UTF-8
				807	{"\u00E0\xE1", "\x86", "\u00E0\xE1\x86"},
				808	{"a\u0300\u11B7", "\u0300", "\u00E0\u11B7\u0300"},
				809	{"a\u0300\u11B7\u0300", "\u0300", "\u00E0\u11B7\u0300\u0300"},
				810	{"\u0300", "\xF8\x80\x80\x80\x80\u0300", "\u0300\xF8\x80\x80\x80\x80\u0300"},
				811	{"\u0300", "\xFC\x80\x80\x80\x80\x80\u0300", "\u0300\xFC\x80\x80\x80\x80\x80\u0300"},
				812	{"\xF8\x80\x80\x80\x80\u0300", "\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
				813	{"\xFC\x80\x80\x80\x80\x80\u0300", "\u0300", "\xFC\x80\x80\x80\x80\x80\u0300\u0300"},
				814	{"\xF8\x80\x80\x80", "\x80\u0300\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
				815
				816	{"", strings.Repeat("a\u0316\u0300", 6), strings.Repeat("\u00E0\u0316", 6)},
				817	// large input.
				818	{"", strings.Repeat("a\u0300\u0316", 31), strings.Repeat("\u00E0\u0316", 31)},
				819	{"", strings.Repeat("a\u0300\u0316", 4000), strings.Repeat("\u00E0\u0316", 4000)},
				820	{"", strings.Repeat("\x80\x80", 4000), strings.Repeat("\x80\x80", 4000)},
				821	{"", "\u0041\u0307\u0304", "\u01E0"},
				822	}
				823
				824	var appendTestsNFKD = []AppendTest{
				825	{"", "a" + grave(64), "a" + grave(30) + cgj + grave(30) + cgj + grave(4)},
				826
				827	{ // segment overflow on unchanged character
				828	"",
				829	"a" + grave(64) + "\u0316",
				830	"a" + grave(30) + cgj + grave(30) + cgj + "\u0316" + grave(4),
				831	},
				832	{ // segment overflow on unchanged character + start value
				833	"",
				834	"a" + grave(98) + "\u0316",
				835	"a" + grave(30) + cgj + grave(30) + cgj + grave(30) + cgj + "\u0316" + grave(8),
				836	},
				837	{ // segment overflow on decomposition. (U+0340 decomposes to U+0300.)
				838	"",
				839	"a" + grave(59) + "\u0340",
				840	"a" + grave(30) + cgj + grave(30),
				841	},
				842	{ // segment overflow on non-starter decomposition
				843	"",
				844	"a" + grave(33) + "\u0340" + grave(30) + "\u0320",
				845	"a" + grave(30) + cgj + grave(30) + cgj + "\u0320" + grave(4),
				846	},
				847	{ // start value after ASCII overflow
				848	"",
				849	rep('a', segSize) + grave(32) + "\u0320",
				850	rep('a', segSize) + grave(30) + cgj + "\u0320" + grave(2),
				851	},
				852	{ // Jamo overflow
				853	"",
				854	"\u1100\u1161" + grave(30) + "\u0320" + grave(2),
				855	"\u1100\u1161" + grave(29) + cgj + "\u0320" + grave(3),
				856	},
				857	{ // Hangul
				858	"",
				859	"\uac00",
				860	"\u1100\u1161",
				861	},
				862	{ // Hangul overflow
				863	"",
				864	"\uac00" + grave(32) + "\u0320",
				865	"\u1100\u1161" + grave(29) + cgj + "\u0320" + grave(3),
				866	},
				867	{ // Hangul overflow in Hangul mode.
				868	"",
				869	"\uac00\uac00" + grave(32) + "\u0320",
				870	"\u1100\u1161\u1100\u1161" + grave(29) + cgj + "\u0320" + grave(3),
				871	},
				872	{ // Hangul overflow in Hangul mode.
				873	"",
				874	strings.Repeat("\uac00", 3) + grave(32) + "\u0320",
				875	strings.Repeat("\u1100\u1161", 3) + grave(29) + cgj + "\u0320" + grave(3),
				876	},
				877	{ // start value after cc=0
				878	"",
				879	"您您" + grave(34) + "\u0320",
				880	"您您" + grave(30) + cgj + "\u0320" + grave(4),
				881	},
				882	{ // start value after normalization
				883	"",
				884	"\u0300\u0320a" + grave(34) + "\u0320",
				885	"\u0320\u0300a" + grave(30) + cgj + "\u0320" + grave(4),
				886	},
				887	{
				888	// U+0F81 TIBETAN VOWEL SIGN REVERSED II splits into two modifiers.
				889	"",
				890	"a\u0f7f" + rep(0xf71, 29) + "\u0f81",
				891	"a\u0f7f" + rep(0xf71, 29) + cgj + "\u0f71\u0f80",
				892	},
				893	}
				894
				895	func TestAppend(t *testing.T) {
				896	runNormTests(t, "Append", func(f Form, out []byte, s string) []byte {
				897	return f.Append(out, []byte(s)...)
				898	})
				899	}
				900
				901	func TestAppendString(t *testing.T) {
				902	runNormTests(t, "AppendString", func(f Form, out []byte, s string) []byte {
				903	return f.AppendString(out, s)
				904	})
				905	}
				906
				907	func TestBytes(t *testing.T) {
				908	runNormTests(t, "Bytes", func(f Form, out []byte, s string) []byte {
				909	buf := []byte{}
				910	buf = append(buf, out...)
				911	buf = append(buf, s...)
				912	return f.Bytes(buf)
				913	})
				914	}
				915
				916	func TestString(t *testing.T) {
				917	runNormTests(t, "String", func(f Form, out []byte, s string) []byte {
				918	outs := string(out) + s
				919	return []byte(f.String(outs))
				920	})
				921	}
				922
				923	func TestLinking(t *testing.T) {
				924	const prog = `
				925	package main
				926	import "fmt"
				927	import "golang.org/x/text/unicode/norm"
				928	func main() { fmt.Println(norm.%s) }
				929	`
				930	baseline, errB := testtext.CodeSize(fmt.Sprintf(prog, "MaxSegmentSize"))
				931	withTables, errT := testtext.CodeSize(fmt.Sprintf(prog, `NFC.String("")`))
				932	if errB != nil \|\| errT != nil {
				933	t.Skipf("code size failed: %v and %v", errB, errT)
				934	}
				935	// Tables are at least 50K
				936	if d := withTables - baseline; d < 50*1024 {
				937	t.Errorf("tables appear not to be dropped: %d - %d = %d",
				938	withTables, baseline, d)
				939	}
				940	}
				941
				942	func appendBench(f Form, in []byte) func() {
				943	buf := make([]byte, 0, 4*len(in))
				944	return func() {
				945	f.Append(buf, in...)
				946	}
				947	}
				948
				949	func bytesBench(f Form, in []byte) func() {
				950	return func() {
				951	f.Bytes(in)
				952	}
				953	}
				954
				955	func iterBench(f Form, in []byte) func() {
				956	iter := Iter{}
				957	return func() {
				958	iter.Init(f, in)
				959	for !iter.Done() {
				960	iter.Next()
				961	}
				962	}
				963	}
				964
				965	func transformBench(f Form, in []byte) func() {
				966	buf := make([]byte, 4*len(in))
				967	return func() {
				968	if _, n, err := f.Transform(buf, in, true); err != nil \|\| len(in) != n {
				969	log.Panic(n, len(in), err)
				970	}
				971	}
				972	}
				973
				974	func readerBench(f Form, in []byte) func() {
				975	buf := make([]byte, 4*len(in))
				976	return func() {
				977	r := f.Reader(bytes.NewReader(in))
				978	var err error
				979	for err == nil {
				980	_, err = r.Read(buf)
				981	}
				982	if err != io.EOF {
				983	panic("")
				984	}
				985	}
				986	}
				987
				988	func writerBench(f Form, in []byte) func() {
				989	buf := make([]byte, 0, 4*len(in))
				990	return func() {
				991	r := f.Writer(bytes.NewBuffer(buf))
				992	if _, err := r.Write(in); err != nil {
				993	panic("")
				994	}
				995	}
				996	}
				997
				998	func appendBenchmarks(bm []func(), f Form, in []byte) []func() {
				999	bm = append(bm, appendBench(f, in))
				1000	bm = append(bm, iterBench(f, in))
				1001	bm = append(bm, transformBench(f, in))
				1002	bm = append(bm, readerBench(f, in))
				1003	bm = append(bm, writerBench(f, in))
				1004	return bm
				1005	}
				1006
				1007	func doFormBenchmark(b *testing.B, inf, f Form, s string) {
				1008	b.StopTimer()
				1009	in := inf.Bytes([]byte(s))
				1010	bm := appendBenchmarks(nil, f, in)
				1011	b.SetBytes(int64(len(in) * len(bm)))
				1012	b.StartTimer()
				1013	for i := 0; i < b.N; i++ {
				1014	for _, fn := range bm {
				1015	fn()
				1016	}
				1017	}
				1018	}
				1019
				1020	func doSingle(b *testing.B, f func(Form, []byte) func(), s []byte) {
				1021	b.StopTimer()
				1022	fn := f(NFC, s)
				1023	b.SetBytes(int64(len(s)))
				1024	b.StartTimer()
				1025	for i := 0; i < b.N; i++ {
				1026	fn()
				1027	}
				1028	}
				1029
				1030	var (
				1031	smallNoChange = []byte("nörmalization")
				1032	smallChange = []byte("No\u0308rmalization")
				1033	ascii = strings.Repeat("There is nothing to change here! ", 500)
				1034	)
				1035
				1036	func lowerBench(f Form, in []byte) func() {
				1037	// Use package strings instead of bytes as it doesn't allocate memory
				1038	// if there aren't any changes.
				1039	s := string(in)
				1040	return func() {
				1041	strings.ToLower(s)
				1042	}
				1043	}
				1044
				1045	func BenchmarkLowerCaseNoChange(b *testing.B) {
				1046	doSingle(b, lowerBench, smallNoChange)
				1047	}
				1048	func BenchmarkLowerCaseChange(b *testing.B) {
				1049	doSingle(b, lowerBench, smallChange)
				1050	}
				1051
				1052	func quickSpanBench(f Form, in []byte) func() {
				1053	return func() {
				1054	f.QuickSpan(in)
				1055	}
				1056	}
				1057
				1058	func BenchmarkQuickSpanChangeNFC(b *testing.B) {
				1059	doSingle(b, quickSpanBench, smallNoChange)
				1060	}
				1061
				1062	func BenchmarkBytesNoChangeNFC(b *testing.B) {
				1063	doSingle(b, bytesBench, smallNoChange)
				1064	}
				1065	func BenchmarkBytesChangeNFC(b *testing.B) {
				1066	doSingle(b, bytesBench, smallChange)
				1067	}
				1068
				1069	func BenchmarkAppendNoChangeNFC(b *testing.B) {
				1070	doSingle(b, appendBench, smallNoChange)
				1071	}
				1072	func BenchmarkAppendChangeNFC(b *testing.B) {
				1073	doSingle(b, appendBench, smallChange)
				1074	}
				1075	func BenchmarkAppendLargeNFC(b *testing.B) {
				1076	doSingle(b, appendBench, txt_all_bytes)
				1077	}
				1078
				1079	func BenchmarkIterNoChangeNFC(b *testing.B) {
				1080	doSingle(b, iterBench, smallNoChange)
				1081	}
				1082	func BenchmarkIterChangeNFC(b *testing.B) {
				1083	doSingle(b, iterBench, smallChange)
				1084	}
				1085	func BenchmarkIterLargeNFC(b *testing.B) {
				1086	doSingle(b, iterBench, txt_all_bytes)
				1087	}
				1088
				1089	func BenchmarkTransformNoChangeNFC(b *testing.B) {
				1090	doSingle(b, transformBench, smallNoChange)
				1091	}
				1092	func BenchmarkTransformChangeNFC(b *testing.B) {
				1093	doSingle(b, transformBench, smallChange)
				1094	}
				1095	func BenchmarkTransformLargeNFC(b *testing.B) {
				1096	doSingle(b, transformBench, txt_all_bytes)
				1097	}
				1098
				1099	func BenchmarkNormalizeAsciiNFC(b *testing.B) {
				1100	doFormBenchmark(b, NFC, NFC, ascii)
				1101	}
				1102	func BenchmarkNormalizeAsciiNFD(b *testing.B) {
				1103	doFormBenchmark(b, NFC, NFD, ascii)
				1104	}
				1105	func BenchmarkNormalizeAsciiNFKC(b *testing.B) {
				1106	doFormBenchmark(b, NFC, NFKC, ascii)
				1107	}
				1108	func BenchmarkNormalizeAsciiNFKD(b *testing.B) {
				1109	doFormBenchmark(b, NFC, NFKD, ascii)
				1110	}
				1111
				1112	func BenchmarkNormalizeNFC2NFC(b *testing.B) {
				1113	doFormBenchmark(b, NFC, NFC, txt_all)
				1114	}
				1115	func BenchmarkNormalizeNFC2NFD(b *testing.B) {
				1116	doFormBenchmark(b, NFC, NFD, txt_all)
				1117	}
				1118	func BenchmarkNormalizeNFD2NFC(b *testing.B) {
				1119	doFormBenchmark(b, NFD, NFC, txt_all)
				1120	}
				1121	func BenchmarkNormalizeNFD2NFD(b *testing.B) {
				1122	doFormBenchmark(b, NFD, NFD, txt_all)
				1123	}
				1124
				1125	// Hangul is often special-cased, so we test it separately.
				1126	func BenchmarkNormalizeHangulNFC2NFC(b *testing.B) {
				1127	doFormBenchmark(b, NFC, NFC, txt_kr)
				1128	}
				1129	func BenchmarkNormalizeHangulNFC2NFD(b *testing.B) {
				1130	doFormBenchmark(b, NFC, NFD, txt_kr)
				1131	}
				1132	func BenchmarkNormalizeHangulNFD2NFC(b *testing.B) {
				1133	doFormBenchmark(b, NFD, NFC, txt_kr)
				1134	}
				1135	func BenchmarkNormalizeHangulNFD2NFD(b *testing.B) {
				1136	doFormBenchmark(b, NFD, NFD, txt_kr)
				1137	}
				1138
				1139	var forms = []Form{NFC, NFD, NFKC, NFKD}
				1140
				1141	func doTextBenchmark(b *testing.B, s string) {
				1142	b.StopTimer()
				1143	in := []byte(s)
				1144	bm := []func(){}
				1145	for _, f := range forms {
				1146	bm = appendBenchmarks(bm, f, in)
				1147	}
				1148	b.SetBytes(int64(len(s) * len(bm)))
				1149	b.StartTimer()
				1150	for i := 0; i < b.N; i++ {
				1151	for _, f := range bm {
				1152	f()
				1153	}
				1154	}
				1155	}
				1156
				1157	func BenchmarkCanonicalOrdering(b *testing.B) {
				1158	doTextBenchmark(b, txt_canon)
				1159	}
				1160	func BenchmarkExtendedLatin(b *testing.B) {
				1161	doTextBenchmark(b, txt_vn)
				1162	}
				1163	func BenchmarkMiscTwoByteUtf8(b *testing.B) {
				1164	doTextBenchmark(b, twoByteUtf8)
				1165	}
				1166	func BenchmarkMiscThreeByteUtf8(b *testing.B) {
				1167	doTextBenchmark(b, threeByteUtf8)
				1168	}
				1169	func BenchmarkHangul(b *testing.B) {
				1170	doTextBenchmark(b, txt_kr)
				1171	}
				1172	func BenchmarkJapanese(b *testing.B) {
				1173	doTextBenchmark(b, txt_jp)
				1174	}
				1175	func BenchmarkChinese(b *testing.B) {
				1176	doTextBenchmark(b, txt_cn)
				1177	}
				1178	func BenchmarkOverflow(b *testing.B) {
				1179	doTextBenchmark(b, overflow)
				1180	}
				1181
				1182	var overflow = string(bytes.Repeat([]byte("\u035D"), 4096)) + "\u035B"
				1183
				1184	// Tests sampled from the Canonical ordering tests (Part 2) of
				1185	// http://unicode.org/Public/UNIDATA/NormalizationTest.txt
				1186	const txt_canon = `\u0061\u0315\u0300\u05AE\u0300\u0062 \u0061\u0300\u0315\u0300\u05AE\u0062
				1187	\u0061\u0302\u0315\u0300\u05AE\u0062 \u0061\u0307\u0315\u0300\u05AE\u0062
				1188	\u0061\u0315\u0300\u05AE\u030A\u0062 \u0061\u059A\u0316\u302A\u031C\u0062
				1189	\u0061\u032E\u059A\u0316\u302A\u0062 \u0061\u0338\u093C\u0334\u0062
				1190	\u0061\u059A\u0316\u302A\u0339 \u0061\u0341\u0315\u0300\u05AE\u0062
				1191	\u0061\u0348\u059A\u0316\u302A\u0062 \u0061\u0361\u0345\u035D\u035C\u0062
				1192	\u0061\u0366\u0315\u0300\u05AE\u0062 \u0061\u0315\u0300\u05AE\u0486\u0062
				1193	\u0061\u05A4\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0613\u0062
				1194	\u0061\u0315\u0300\u05AE\u0615\u0062 \u0061\u0617\u0315\u0300\u05AE\u0062
				1195	\u0061\u0619\u0618\u064D\u064E\u0062 \u0061\u0315\u0300\u05AE\u0654\u0062
				1196	\u0061\u0315\u0300\u05AE\u06DC\u0062 \u0061\u0733\u0315\u0300\u05AE\u0062
				1197	\u0061\u0744\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0745\u0062
				1198	\u0061\u09CD\u05B0\u094D\u3099\u0062 \u0061\u0E38\u0E48\u0E38\u0C56\u0062
				1199	\u0061\u0EB8\u0E48\u0E38\u0E49\u0062 \u0061\u0F72\u0F71\u0EC8\u0F71\u0062
				1200	\u0061\u1039\u05B0\u094D\u3099\u0062 \u0061\u05B0\u094D\u3099\u1A60\u0062
				1201	\u0061\u3099\u093C\u0334\u1BE6\u0062 \u0061\u3099\u093C\u0334\u1C37\u0062
				1202	\u0061\u1CD9\u059A\u0316\u302A\u0062 \u0061\u2DED\u0315\u0300\u05AE\u0062
				1203	\u0061\u2DEF\u0315\u0300\u05AE\u0062 \u0061\u302D\u302E\u059A\u0316\u0062`
				1204
				1205	// Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
				1206	const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả.
				1207	Nếu bạn sử dụng, chuyển đổi, hoặc xây dựng dự án từ
				1208	nội dung được chia sẻ này, bạn phải áp dụng giấy phép này hoặc
				1209	một giấy phép khác có các điều khoản tương tự như giấy phép này
				1210	cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào
				1211	trên đây cũng có thể được miễn bỏ nếu bạn được sự cho phép của
				1212	người sở hữu bản quyền. Phạm vi công chúng — Khi tác phẩm hoặc
				1213	bất kỳ chương nào của tác phẩm đã trong vùng dành cho công
				1214	chúng theo quy định của pháp luật thì tình trạng của nó không
				1215	bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
				1216
				1217	// Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
				1218	const txt_ru = `При обязательном соблюдении следующих условий:
				1219	Attribution — Вы должны атрибутировать произведение (указывать
				1220	автора и источник) в порядке, предусмотренном автором или
				1221	лицензиаром (но только так, чтобы никоим образом не подразумевалось,
				1222	что они поддерживают вас или использование вами данного произведения).
				1223	Υπό τις ακόλουθες προϋποθέσεις:`
				1224
				1225	// Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
				1226	const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με τον
				1227	τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια
				1228	(χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή
				1229	τη χρήση του έργου από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε,
				1230	τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα
				1231	μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή
				1232	παρόμοια άδεια.`
				1233
				1234	// Taken from http://creativecommons.org/licenses/by-sa/3.0/deed.ar
				1235	const txt_ar = `بموجب الشروط التالية نسب المصنف — يجب عليك أن
				1236	تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من
				1237	الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل).
				1238	المشاركة على قدم المساواة — إذا كنت يعدل ، والتغيير ، أو الاستفادة
				1239	من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد
				1240	لهذا الترخيص.`
				1241
				1242	// Taken from http://creativecommons.org/licenses/by-sa/1.0/il/
				1243	const txt_il = `בכפוף לתנאים הבאים: ייחוס — עליך לייחס את היצירה (לתת קרדיט) באופן
				1244	המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך
				1245	שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה — אם תחליט/י לשנות,
				1246	לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך
				1247	החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.`
				1248
				1249	const twoByteUtf8 = txt_ru + txt_gr + txt_ar + txt_il
				1250
				1251	// Taken from http://creativecommons.org/licenses/by-sa/2.0/kr/
				1252	const txt_kr = `다음과 같은 조건을 따라야 합니다: 저작자표시
				1253	(Attribution) — 저작자나 이용허락자가 정한 방법으로 저작물의
				1254	원저작자를 표시하여야 합니다(그러나 원저작자가 이용자나 이용자의
				1255	이용을 보증하거나 추천한다는 의미로 표시해서는 안됩니다).
				1256	동일조건변경허락 — 이 저작물을 이용하여 만든 이차적 저작물에는 본
				1257	라이선스와 동일한 라이선스를 적용해야 합니다.`
				1258
				1259	// Taken from http://creativecommons.org/licenses/by-sa/3.0/th/
				1260	const txt_th = `ภายใต้เงื่อนไข ดังต่อไปนี้ : แสดงที่มา — คุณต้องแสดงที่
				1261	มาของงานดังกล่าว ตามรูปแบบที่ผู้สร้างสรรค์หรือผู้อนุญาตกำหนด (แต่
				1262	ไม่ใช่ในลักษณะที่ว่า พวกเขาสนับสนุนคุณหรือสนับสนุนการที่
				1263	คุณนำงานไปใช้) อนุญาตแบบเดียวกัน — หากคุณดัดแปลง เปลี่ยนรูป หรื
				1264	อต่อเติมงานนี้ คุณต้องใช้สัญญาอนุญาตแบบเดียวกันหรือแบบที่เหมื
				1265	อนกับสัญญาอนุญาตที่ใช้กับงานนี้เท่านั้น`
				1266
				1267	const threeByteUtf8 = txt_th
				1268
				1269	// Taken from http://creativecommons.org/licenses/by-sa/2.0/jp/
				1270	const txt_jp = `あなたの従うべき条件は以下の通りです。
				1271	表示 — あなたは原著作者のクレジットを表示しなければなりません。
				1272	継承 — もしあなたがこの作品を改変、変形または加工した場合、
				1273	あなたはその結果生じた作品をこの作品と同一の許諾条件の下でのみ
				1274	頒布することができます。`
				1275
				1276	// http://creativecommons.org/licenses/by-sa/2.5/cn/
				1277	const txt_cn = `您可以自由：复制、发行、展览、表演、放映、
				1278	广播或通过信息网络传播本作品创作演绎作品
				1279	对本作品进行商业性使用惟须遵守下列条件：
				1280	署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
				1281	相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作，
				1282	您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
				1283
				1284	const txt_cjk = txt_cn + txt_jp + txt_kr
				1285	const txt_all = txt_vn + twoByteUtf8 + threeByteUtf8 + txt_cjk
				1286
				1287	var txt_all_bytes = []byte(txt_all)