Matteo Scandolo | a428586 | 2020-12-01 18:10:10 -0800 | [diff] [blame] | 1 | package jsoniter |
| 2 | |
| 3 | import ( |
| 4 | "unicode/utf8" |
| 5 | ) |
| 6 | |
| 7 | // htmlSafeSet holds the value true if the ASCII character with the given |
| 8 | // array position can be safely represented inside a JSON string, embedded |
| 9 | // inside of HTML <script> tags, without any additional escaping. |
| 10 | // |
| 11 | // All values are true except for the ASCII control characters (0-31), the |
| 12 | // double quote ("), the backslash character ("\"), HTML opening and closing |
| 13 | // tags ("<" and ">"), and the ampersand ("&"). |
| 14 | var htmlSafeSet = [utf8.RuneSelf]bool{ |
| 15 | ' ': true, |
| 16 | '!': true, |
| 17 | '"': false, |
| 18 | '#': true, |
| 19 | '$': true, |
| 20 | '%': true, |
| 21 | '&': false, |
| 22 | '\'': true, |
| 23 | '(': true, |
| 24 | ')': true, |
| 25 | '*': true, |
| 26 | '+': true, |
| 27 | ',': true, |
| 28 | '-': true, |
| 29 | '.': true, |
| 30 | '/': true, |
| 31 | '0': true, |
| 32 | '1': true, |
| 33 | '2': true, |
| 34 | '3': true, |
| 35 | '4': true, |
| 36 | '5': true, |
| 37 | '6': true, |
| 38 | '7': true, |
| 39 | '8': true, |
| 40 | '9': true, |
| 41 | ':': true, |
| 42 | ';': true, |
| 43 | '<': false, |
| 44 | '=': true, |
| 45 | '>': false, |
| 46 | '?': true, |
| 47 | '@': true, |
| 48 | 'A': true, |
| 49 | 'B': true, |
| 50 | 'C': true, |
| 51 | 'D': true, |
| 52 | 'E': true, |
| 53 | 'F': true, |
| 54 | 'G': true, |
| 55 | 'H': true, |
| 56 | 'I': true, |
| 57 | 'J': true, |
| 58 | 'K': true, |
| 59 | 'L': true, |
| 60 | 'M': true, |
| 61 | 'N': true, |
| 62 | 'O': true, |
| 63 | 'P': true, |
| 64 | 'Q': true, |
| 65 | 'R': true, |
| 66 | 'S': true, |
| 67 | 'T': true, |
| 68 | 'U': true, |
| 69 | 'V': true, |
| 70 | 'W': true, |
| 71 | 'X': true, |
| 72 | 'Y': true, |
| 73 | 'Z': true, |
| 74 | '[': true, |
| 75 | '\\': false, |
| 76 | ']': true, |
| 77 | '^': true, |
| 78 | '_': true, |
| 79 | '`': true, |
| 80 | 'a': true, |
| 81 | 'b': true, |
| 82 | 'c': true, |
| 83 | 'd': true, |
| 84 | 'e': true, |
| 85 | 'f': true, |
| 86 | 'g': true, |
| 87 | 'h': true, |
| 88 | 'i': true, |
| 89 | 'j': true, |
| 90 | 'k': true, |
| 91 | 'l': true, |
| 92 | 'm': true, |
| 93 | 'n': true, |
| 94 | 'o': true, |
| 95 | 'p': true, |
| 96 | 'q': true, |
| 97 | 'r': true, |
| 98 | 's': true, |
| 99 | 't': true, |
| 100 | 'u': true, |
| 101 | 'v': true, |
| 102 | 'w': true, |
| 103 | 'x': true, |
| 104 | 'y': true, |
| 105 | 'z': true, |
| 106 | '{': true, |
| 107 | '|': true, |
| 108 | '}': true, |
| 109 | '~': true, |
| 110 | '\u007f': true, |
| 111 | } |
| 112 | |
| 113 | // safeSet holds the value true if the ASCII character with the given array |
| 114 | // position can be represented inside a JSON string without any further |
| 115 | // escaping. |
| 116 | // |
| 117 | // All values are true except for the ASCII control characters (0-31), the |
| 118 | // double quote ("), and the backslash character ("\"). |
| 119 | var safeSet = [utf8.RuneSelf]bool{ |
| 120 | ' ': true, |
| 121 | '!': true, |
| 122 | '"': false, |
| 123 | '#': true, |
| 124 | '$': true, |
| 125 | '%': true, |
| 126 | '&': true, |
| 127 | '\'': true, |
| 128 | '(': true, |
| 129 | ')': true, |
| 130 | '*': true, |
| 131 | '+': true, |
| 132 | ',': true, |
| 133 | '-': true, |
| 134 | '.': true, |
| 135 | '/': true, |
| 136 | '0': true, |
| 137 | '1': true, |
| 138 | '2': true, |
| 139 | '3': true, |
| 140 | '4': true, |
| 141 | '5': true, |
| 142 | '6': true, |
| 143 | '7': true, |
| 144 | '8': true, |
| 145 | '9': true, |
| 146 | ':': true, |
| 147 | ';': true, |
| 148 | '<': true, |
| 149 | '=': true, |
| 150 | '>': true, |
| 151 | '?': true, |
| 152 | '@': true, |
| 153 | 'A': true, |
| 154 | 'B': true, |
| 155 | 'C': true, |
| 156 | 'D': true, |
| 157 | 'E': true, |
| 158 | 'F': true, |
| 159 | 'G': true, |
| 160 | 'H': true, |
| 161 | 'I': true, |
| 162 | 'J': true, |
| 163 | 'K': true, |
| 164 | 'L': true, |
| 165 | 'M': true, |
| 166 | 'N': true, |
| 167 | 'O': true, |
| 168 | 'P': true, |
| 169 | 'Q': true, |
| 170 | 'R': true, |
| 171 | 'S': true, |
| 172 | 'T': true, |
| 173 | 'U': true, |
| 174 | 'V': true, |
| 175 | 'W': true, |
| 176 | 'X': true, |
| 177 | 'Y': true, |
| 178 | 'Z': true, |
| 179 | '[': true, |
| 180 | '\\': false, |
| 181 | ']': true, |
| 182 | '^': true, |
| 183 | '_': true, |
| 184 | '`': true, |
| 185 | 'a': true, |
| 186 | 'b': true, |
| 187 | 'c': true, |
| 188 | 'd': true, |
| 189 | 'e': true, |
| 190 | 'f': true, |
| 191 | 'g': true, |
| 192 | 'h': true, |
| 193 | 'i': true, |
| 194 | 'j': true, |
| 195 | 'k': true, |
| 196 | 'l': true, |
| 197 | 'm': true, |
| 198 | 'n': true, |
| 199 | 'o': true, |
| 200 | 'p': true, |
| 201 | 'q': true, |
| 202 | 'r': true, |
| 203 | 's': true, |
| 204 | 't': true, |
| 205 | 'u': true, |
| 206 | 'v': true, |
| 207 | 'w': true, |
| 208 | 'x': true, |
| 209 | 'y': true, |
| 210 | 'z': true, |
| 211 | '{': true, |
| 212 | '|': true, |
| 213 | '}': true, |
| 214 | '~': true, |
| 215 | '\u007f': true, |
| 216 | } |
| 217 | |
| 218 | var hex = "0123456789abcdef" |
| 219 | |
| 220 | // WriteStringWithHTMLEscaped write string to stream with html special characters escaped |
| 221 | func (stream *Stream) WriteStringWithHTMLEscaped(s string) { |
| 222 | valLen := len(s) |
| 223 | stream.buf = append(stream.buf, '"') |
| 224 | // write string, the fast path, without utf8 and escape support |
| 225 | i := 0 |
| 226 | for ; i < valLen; i++ { |
| 227 | c := s[i] |
| 228 | if c < utf8.RuneSelf && htmlSafeSet[c] { |
| 229 | stream.buf = append(stream.buf, c) |
| 230 | } else { |
| 231 | break |
| 232 | } |
| 233 | } |
| 234 | if i == valLen { |
| 235 | stream.buf = append(stream.buf, '"') |
| 236 | return |
| 237 | } |
| 238 | writeStringSlowPathWithHTMLEscaped(stream, i, s, valLen) |
| 239 | } |
| 240 | |
| 241 | func writeStringSlowPathWithHTMLEscaped(stream *Stream, i int, s string, valLen int) { |
| 242 | start := i |
| 243 | // for the remaining parts, we process them char by char |
| 244 | for i < valLen { |
| 245 | if b := s[i]; b < utf8.RuneSelf { |
| 246 | if htmlSafeSet[b] { |
| 247 | i++ |
| 248 | continue |
| 249 | } |
| 250 | if start < i { |
| 251 | stream.WriteRaw(s[start:i]) |
| 252 | } |
| 253 | switch b { |
| 254 | case '\\', '"': |
| 255 | stream.writeTwoBytes('\\', b) |
| 256 | case '\n': |
| 257 | stream.writeTwoBytes('\\', 'n') |
| 258 | case '\r': |
| 259 | stream.writeTwoBytes('\\', 'r') |
| 260 | case '\t': |
| 261 | stream.writeTwoBytes('\\', 't') |
| 262 | default: |
| 263 | // This encodes bytes < 0x20 except for \t, \n and \r. |
| 264 | // If escapeHTML is set, it also escapes <, >, and & |
| 265 | // because they can lead to security holes when |
| 266 | // user-controlled strings are rendered into JSON |
| 267 | // and served to some browsers. |
| 268 | stream.WriteRaw(`\u00`) |
| 269 | stream.writeTwoBytes(hex[b>>4], hex[b&0xF]) |
| 270 | } |
| 271 | i++ |
| 272 | start = i |
| 273 | continue |
| 274 | } |
| 275 | c, size := utf8.DecodeRuneInString(s[i:]) |
| 276 | if c == utf8.RuneError && size == 1 { |
| 277 | if start < i { |
| 278 | stream.WriteRaw(s[start:i]) |
| 279 | } |
| 280 | stream.WriteRaw(`\ufffd`) |
| 281 | i++ |
| 282 | start = i |
| 283 | continue |
| 284 | } |
| 285 | // U+2028 is LINE SEPARATOR. |
| 286 | // U+2029 is PARAGRAPH SEPARATOR. |
| 287 | // They are both technically valid characters in JSON strings, |
| 288 | // but don't work in JSONP, which has to be evaluated as JavaScript, |
| 289 | // and can lead to security holes there. It is valid JSON to |
| 290 | // escape them, so we do so unconditionally. |
| 291 | // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. |
| 292 | if c == '\u2028' || c == '\u2029' { |
| 293 | if start < i { |
| 294 | stream.WriteRaw(s[start:i]) |
| 295 | } |
| 296 | stream.WriteRaw(`\u202`) |
| 297 | stream.writeByte(hex[c&0xF]) |
| 298 | i += size |
| 299 | start = i |
| 300 | continue |
| 301 | } |
| 302 | i += size |
| 303 | } |
| 304 | if start < len(s) { |
| 305 | stream.WriteRaw(s[start:]) |
| 306 | } |
| 307 | stream.writeByte('"') |
| 308 | } |
| 309 | |
| 310 | // WriteString write string to stream without html escape |
| 311 | func (stream *Stream) WriteString(s string) { |
| 312 | valLen := len(s) |
| 313 | stream.buf = append(stream.buf, '"') |
| 314 | // write string, the fast path, without utf8 and escape support |
| 315 | i := 0 |
| 316 | for ; i < valLen; i++ { |
| 317 | c := s[i] |
| 318 | if c > 31 && c != '"' && c != '\\' { |
| 319 | stream.buf = append(stream.buf, c) |
| 320 | } else { |
| 321 | break |
| 322 | } |
| 323 | } |
| 324 | if i == valLen { |
| 325 | stream.buf = append(stream.buf, '"') |
| 326 | return |
| 327 | } |
| 328 | writeStringSlowPath(stream, i, s, valLen) |
| 329 | } |
| 330 | |
| 331 | func writeStringSlowPath(stream *Stream, i int, s string, valLen int) { |
| 332 | start := i |
| 333 | // for the remaining parts, we process them char by char |
| 334 | for i < valLen { |
| 335 | if b := s[i]; b < utf8.RuneSelf { |
| 336 | if safeSet[b] { |
| 337 | i++ |
| 338 | continue |
| 339 | } |
| 340 | if start < i { |
| 341 | stream.WriteRaw(s[start:i]) |
| 342 | } |
| 343 | switch b { |
| 344 | case '\\', '"': |
| 345 | stream.writeTwoBytes('\\', b) |
| 346 | case '\n': |
| 347 | stream.writeTwoBytes('\\', 'n') |
| 348 | case '\r': |
| 349 | stream.writeTwoBytes('\\', 'r') |
| 350 | case '\t': |
| 351 | stream.writeTwoBytes('\\', 't') |
| 352 | default: |
| 353 | // This encodes bytes < 0x20 except for \t, \n and \r. |
| 354 | // If escapeHTML is set, it also escapes <, >, and & |
| 355 | // because they can lead to security holes when |
| 356 | // user-controlled strings are rendered into JSON |
| 357 | // and served to some browsers. |
| 358 | stream.WriteRaw(`\u00`) |
| 359 | stream.writeTwoBytes(hex[b>>4], hex[b&0xF]) |
| 360 | } |
| 361 | i++ |
| 362 | start = i |
| 363 | continue |
| 364 | } |
| 365 | i++ |
| 366 | continue |
| 367 | } |
| 368 | if start < len(s) { |
| 369 | stream.WriteRaw(s[start:]) |
| 370 | } |
| 371 | stream.writeByte('"') |
| 372 | } |