blob: a0eaa0d23be9d8a6b43f97ad4bf8321753816493 [file] [log] [blame]
Don Newton98fd8812019-09-23 15:15:02 -04001// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// The trie in this file is used to associate the first full character in an
6// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
7// sequence are used to lookup offsets in the index table to be used for the
8// next byte. The last byte is used to index into a table of collation elements.
9// For a full description, see go.text/collate/build/trie.go.
10
11package colltab
12
13const blockSize = 64
14
15type Trie struct {
16 Index0 []uint16 // index for first byte (0xC0-0xFF)
17 Values0 []uint32 // index for first byte (0x00-0x7F)
18 Index []uint16
19 Values []uint32
20}
21
22const (
23 t1 = 0x00 // 0000 0000
24 tx = 0x80 // 1000 0000
25 t2 = 0xC0 // 1100 0000
26 t3 = 0xE0 // 1110 0000
27 t4 = 0xF0 // 1111 0000
28 t5 = 0xF8 // 1111 1000
29 t6 = 0xFC // 1111 1100
30 te = 0xFE // 1111 1110
31)
32
33func (t *Trie) lookupValue(n uint16, b byte) Elem {
34 return Elem(t.Values[int(n)<<6+int(b)])
35}
36
37// lookup returns the trie value for the first UTF-8 encoding in s and
38// the width in bytes of this encoding. The size will be 0 if s does not
39// hold enough bytes to complete the encoding. len(s) must be greater than 0.
40func (t *Trie) lookup(s []byte) (v Elem, sz int) {
41 c0 := s[0]
42 switch {
43 case c0 < tx:
44 return Elem(t.Values0[c0]), 1
45 case c0 < t2:
46 return 0, 1
47 case c0 < t3:
48 if len(s) < 2 {
49 return 0, 0
50 }
51 i := t.Index0[c0]
52 c1 := s[1]
53 if c1 < tx || t2 <= c1 {
54 return 0, 1
55 }
56 return t.lookupValue(i, c1), 2
57 case c0 < t4:
58 if len(s) < 3 {
59 return 0, 0
60 }
61 i := t.Index0[c0]
62 c1 := s[1]
63 if c1 < tx || t2 <= c1 {
64 return 0, 1
65 }
66 o := int(i)<<6 + int(c1)
67 i = t.Index[o]
68 c2 := s[2]
69 if c2 < tx || t2 <= c2 {
70 return 0, 2
71 }
72 return t.lookupValue(i, c2), 3
73 case c0 < t5:
74 if len(s) < 4 {
75 return 0, 0
76 }
77 i := t.Index0[c0]
78 c1 := s[1]
79 if c1 < tx || t2 <= c1 {
80 return 0, 1
81 }
82 o := int(i)<<6 + int(c1)
83 i = t.Index[o]
84 c2 := s[2]
85 if c2 < tx || t2 <= c2 {
86 return 0, 2
87 }
88 o = int(i)<<6 + int(c2)
89 i = t.Index[o]
90 c3 := s[3]
91 if c3 < tx || t2 <= c3 {
92 return 0, 3
93 }
94 return t.lookupValue(i, c3), 4
95 }
96 // Illegal rune
97 return 0, 1
98}
99
100// The body of lookupString is a verbatim copy of that of lookup.
101func (t *Trie) lookupString(s string) (v Elem, sz int) {
102 c0 := s[0]
103 switch {
104 case c0 < tx:
105 return Elem(t.Values0[c0]), 1
106 case c0 < t2:
107 return 0, 1
108 case c0 < t3:
109 if len(s) < 2 {
110 return 0, 0
111 }
112 i := t.Index0[c0]
113 c1 := s[1]
114 if c1 < tx || t2 <= c1 {
115 return 0, 1
116 }
117 return t.lookupValue(i, c1), 2
118 case c0 < t4:
119 if len(s) < 3 {
120 return 0, 0
121 }
122 i := t.Index0[c0]
123 c1 := s[1]
124 if c1 < tx || t2 <= c1 {
125 return 0, 1
126 }
127 o := int(i)<<6 + int(c1)
128 i = t.Index[o]
129 c2 := s[2]
130 if c2 < tx || t2 <= c2 {
131 return 0, 2
132 }
133 return t.lookupValue(i, c2), 3
134 case c0 < t5:
135 if len(s) < 4 {
136 return 0, 0
137 }
138 i := t.Index0[c0]
139 c1 := s[1]
140 if c1 < tx || t2 <= c1 {
141 return 0, 1
142 }
143 o := int(i)<<6 + int(c1)
144 i = t.Index[o]
145 c2 := s[2]
146 if c2 < tx || t2 <= c2 {
147 return 0, 2
148 }
149 o = int(i)<<6 + int(c2)
150 i = t.Index[o]
151 c3 := s[3]
152 if c3 < tx || t2 <= c3 {
153 return 0, 3
154 }
155 return t.lookupValue(i, c3), 4
156 }
157 // Illegal rune
158 return 0, 1
159}