blob: 54c2ba0b3a2d9716669b2bc4f99c4f0a95da2233 [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001package jsoniter
2
3import (
4 "unicode/utf8"
5)
6
7// htmlSafeSet holds the value true if the ASCII character with the given
8// array position can be safely represented inside a JSON string, embedded
9// inside of HTML <script> tags, without any additional escaping.
10//
11// All values are true except for the ASCII control characters (0-31), the
12// double quote ("), the backslash character ("\"), HTML opening and closing
13// tags ("<" and ">"), and the ampersand ("&").
14var htmlSafeSet = [utf8.RuneSelf]bool{
15 ' ': true,
16 '!': true,
17 '"': false,
18 '#': true,
19 '$': true,
20 '%': true,
21 '&': false,
22 '\'': true,
23 '(': true,
24 ')': true,
25 '*': true,
26 '+': true,
27 ',': true,
28 '-': true,
29 '.': true,
30 '/': true,
31 '0': true,
32 '1': true,
33 '2': true,
34 '3': true,
35 '4': true,
36 '5': true,
37 '6': true,
38 '7': true,
39 '8': true,
40 '9': true,
41 ':': true,
42 ';': true,
43 '<': false,
44 '=': true,
45 '>': false,
46 '?': true,
47 '@': true,
48 'A': true,
49 'B': true,
50 'C': true,
51 'D': true,
52 'E': true,
53 'F': true,
54 'G': true,
55 'H': true,
56 'I': true,
57 'J': true,
58 'K': true,
59 'L': true,
60 'M': true,
61 'N': true,
62 'O': true,
63 'P': true,
64 'Q': true,
65 'R': true,
66 'S': true,
67 'T': true,
68 'U': true,
69 'V': true,
70 'W': true,
71 'X': true,
72 'Y': true,
73 'Z': true,
74 '[': true,
75 '\\': false,
76 ']': true,
77 '^': true,
78 '_': true,
79 '`': true,
80 'a': true,
81 'b': true,
82 'c': true,
83 'd': true,
84 'e': true,
85 'f': true,
86 'g': true,
87 'h': true,
88 'i': true,
89 'j': true,
90 'k': true,
91 'l': true,
92 'm': true,
93 'n': true,
94 'o': true,
95 'p': true,
96 'q': true,
97 'r': true,
98 's': true,
99 't': true,
100 'u': true,
101 'v': true,
102 'w': true,
103 'x': true,
104 'y': true,
105 'z': true,
106 '{': true,
107 '|': true,
108 '}': true,
109 '~': true,
110 '\u007f': true,
111}
112
113// safeSet holds the value true if the ASCII character with the given array
114// position can be represented inside a JSON string without any further
115// escaping.
116//
117// All values are true except for the ASCII control characters (0-31), the
118// double quote ("), and the backslash character ("\").
119var safeSet = [utf8.RuneSelf]bool{
120 ' ': true,
121 '!': true,
122 '"': false,
123 '#': true,
124 '$': true,
125 '%': true,
126 '&': true,
127 '\'': true,
128 '(': true,
129 ')': true,
130 '*': true,
131 '+': true,
132 ',': true,
133 '-': true,
134 '.': true,
135 '/': true,
136 '0': true,
137 '1': true,
138 '2': true,
139 '3': true,
140 '4': true,
141 '5': true,
142 '6': true,
143 '7': true,
144 '8': true,
145 '9': true,
146 ':': true,
147 ';': true,
148 '<': true,
149 '=': true,
150 '>': true,
151 '?': true,
152 '@': true,
153 'A': true,
154 'B': true,
155 'C': true,
156 'D': true,
157 'E': true,
158 'F': true,
159 'G': true,
160 'H': true,
161 'I': true,
162 'J': true,
163 'K': true,
164 'L': true,
165 'M': true,
166 'N': true,
167 'O': true,
168 'P': true,
169 'Q': true,
170 'R': true,
171 'S': true,
172 'T': true,
173 'U': true,
174 'V': true,
175 'W': true,
176 'X': true,
177 'Y': true,
178 'Z': true,
179 '[': true,
180 '\\': false,
181 ']': true,
182 '^': true,
183 '_': true,
184 '`': true,
185 'a': true,
186 'b': true,
187 'c': true,
188 'd': true,
189 'e': true,
190 'f': true,
191 'g': true,
192 'h': true,
193 'i': true,
194 'j': true,
195 'k': true,
196 'l': true,
197 'm': true,
198 'n': true,
199 'o': true,
200 'p': true,
201 'q': true,
202 'r': true,
203 's': true,
204 't': true,
205 'u': true,
206 'v': true,
207 'w': true,
208 'x': true,
209 'y': true,
210 'z': true,
211 '{': true,
212 '|': true,
213 '}': true,
214 '~': true,
215 '\u007f': true,
216}
217
218var hex = "0123456789abcdef"
219
220// WriteStringWithHTMLEscaped write string to stream with html special characters escaped
221func (stream *Stream) WriteStringWithHTMLEscaped(s string) {
222 valLen := len(s)
223 stream.buf = append(stream.buf, '"')
224 // write string, the fast path, without utf8 and escape support
225 i := 0
226 for ; i < valLen; i++ {
227 c := s[i]
228 if c < utf8.RuneSelf && htmlSafeSet[c] {
229 stream.buf = append(stream.buf, c)
230 } else {
231 break
232 }
233 }
234 if i == valLen {
235 stream.buf = append(stream.buf, '"')
236 return
237 }
238 writeStringSlowPathWithHTMLEscaped(stream, i, s, valLen)
239}
240
241func writeStringSlowPathWithHTMLEscaped(stream *Stream, i int, s string, valLen int) {
242 start := i
243 // for the remaining parts, we process them char by char
244 for i < valLen {
245 if b := s[i]; b < utf8.RuneSelf {
246 if htmlSafeSet[b] {
247 i++
248 continue
249 }
250 if start < i {
251 stream.WriteRaw(s[start:i])
252 }
253 switch b {
254 case '\\', '"':
255 stream.writeTwoBytes('\\', b)
256 case '\n':
257 stream.writeTwoBytes('\\', 'n')
258 case '\r':
259 stream.writeTwoBytes('\\', 'r')
260 case '\t':
261 stream.writeTwoBytes('\\', 't')
262 default:
263 // This encodes bytes < 0x20 except for \t, \n and \r.
264 // If escapeHTML is set, it also escapes <, >, and &
265 // because they can lead to security holes when
266 // user-controlled strings are rendered into JSON
267 // and served to some browsers.
268 stream.WriteRaw(`\u00`)
269 stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
270 }
271 i++
272 start = i
273 continue
274 }
275 c, size := utf8.DecodeRuneInString(s[i:])
276 if c == utf8.RuneError && size == 1 {
277 if start < i {
278 stream.WriteRaw(s[start:i])
279 }
280 stream.WriteRaw(`\ufffd`)
281 i++
282 start = i
283 continue
284 }
285 // U+2028 is LINE SEPARATOR.
286 // U+2029 is PARAGRAPH SEPARATOR.
287 // They are both technically valid characters in JSON strings,
288 // but don't work in JSONP, which has to be evaluated as JavaScript,
289 // and can lead to security holes there. It is valid JSON to
290 // escape them, so we do so unconditionally.
291 // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
292 if c == '\u2028' || c == '\u2029' {
293 if start < i {
294 stream.WriteRaw(s[start:i])
295 }
296 stream.WriteRaw(`\u202`)
297 stream.writeByte(hex[c&0xF])
298 i += size
299 start = i
300 continue
301 }
302 i += size
303 }
304 if start < len(s) {
305 stream.WriteRaw(s[start:])
306 }
307 stream.writeByte('"')
308}
309
310// WriteString write string to stream without html escape
311func (stream *Stream) WriteString(s string) {
312 valLen := len(s)
313 stream.buf = append(stream.buf, '"')
314 // write string, the fast path, without utf8 and escape support
315 i := 0
316 for ; i < valLen; i++ {
317 c := s[i]
318 if c > 31 && c != '"' && c != '\\' {
319 stream.buf = append(stream.buf, c)
320 } else {
321 break
322 }
323 }
324 if i == valLen {
325 stream.buf = append(stream.buf, '"')
326 return
327 }
328 writeStringSlowPath(stream, i, s, valLen)
329}
330
331func writeStringSlowPath(stream *Stream, i int, s string, valLen int) {
332 start := i
333 // for the remaining parts, we process them char by char
334 for i < valLen {
335 if b := s[i]; b < utf8.RuneSelf {
336 if safeSet[b] {
337 i++
338 continue
339 }
340 if start < i {
341 stream.WriteRaw(s[start:i])
342 }
343 switch b {
344 case '\\', '"':
345 stream.writeTwoBytes('\\', b)
346 case '\n':
347 stream.writeTwoBytes('\\', 'n')
348 case '\r':
349 stream.writeTwoBytes('\\', 'r')
350 case '\t':
351 stream.writeTwoBytes('\\', 't')
352 default:
353 // This encodes bytes < 0x20 except for \t, \n and \r.
354 // If escapeHTML is set, it also escapes <, >, and &
355 // because they can lead to security holes when
356 // user-controlled strings are rendered into JSON
357 // and served to some browsers.
358 stream.WriteRaw(`\u00`)
359 stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
360 }
361 i++
362 start = i
363 continue
364 }
365 i++
366 continue
367 }
368 if start < len(s) {
369 stream.WriteRaw(s[start:])
370 }
371 stream.writeByte('"')
372}