blob: da289ccce6e23976e1edac56765cf935fd3b00c3 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package text
6
7import (
8 "math"
9 "math/bits"
10 "strconv"
11 "strings"
12 "unicode/utf8"
13
14 "google.golang.org/protobuf/internal/detrand"
15 "google.golang.org/protobuf/internal/errors"
16)
17
18// encType represents an encoding type.
19type encType uint8
20
21const (
22 _ encType = (1 << iota) / 2
23 name
24 scalar
25 messageOpen
26 messageClose
27)
28
29// Encoder provides methods to write out textproto constructs and values. The user is
30// responsible for producing valid sequences of constructs and values.
31type Encoder struct {
32 encoderState
33
34 indent string
35 delims [2]byte
36 outputASCII bool
37}
38
39type encoderState struct {
40 lastType encType
41 indents []byte
42 out []byte
43}
44
45// NewEncoder returns an Encoder.
46//
47// If indent is a non-empty string, it causes every entry in a List or Message
48// to be preceded by the indent and trailed by a newline.
49//
50// If delims is not the zero value, it controls the delimiter characters used
51// for messages (e.g., "{}" vs "<>").
52//
53// If outputASCII is true, strings will be serialized in such a way that
54// multi-byte UTF-8 sequences are escaped. This property ensures that the
55// overall output is ASCII (as opposed to UTF-8).
56func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
57 e := &Encoder{}
58 if len(indent) > 0 {
59 if strings.Trim(indent, " \t") != "" {
60 return nil, errors.New("indent may only be composed of space and tab characters")
61 }
62 e.indent = indent
63 }
64 switch delims {
65 case [2]byte{0, 0}:
66 e.delims = [2]byte{'{', '}'}
67 case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
68 e.delims = delims
69 default:
70 return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
71 }
72 e.outputASCII = outputASCII
73
74 return e, nil
75}
76
77// Bytes returns the content of the written bytes.
78func (e *Encoder) Bytes() []byte {
79 return e.out
80}
81
82// StartMessage writes out the '{' or '<' symbol.
83func (e *Encoder) StartMessage() {
84 e.prepareNext(messageOpen)
85 e.out = append(e.out, e.delims[0])
86}
87
88// EndMessage writes out the '}' or '>' symbol.
89func (e *Encoder) EndMessage() {
90 e.prepareNext(messageClose)
91 e.out = append(e.out, e.delims[1])
92}
93
94// WriteName writes out the field name and the separator ':'.
95func (e *Encoder) WriteName(s string) {
96 e.prepareNext(name)
97 e.out = append(e.out, s...)
98 e.out = append(e.out, ':')
99}
100
101// WriteBool writes out the given boolean value.
102func (e *Encoder) WriteBool(b bool) {
103 if b {
104 e.WriteLiteral("true")
105 } else {
106 e.WriteLiteral("false")
107 }
108}
109
110// WriteString writes out the given string value.
111func (e *Encoder) WriteString(s string) {
112 e.prepareNext(scalar)
113 e.out = appendString(e.out, s, e.outputASCII)
114}
115
116func appendString(out []byte, in string, outputASCII bool) []byte {
117 out = append(out, '"')
118 i := indexNeedEscapeInString(in)
119 in, out = in[i:], append(out, in[:i]...)
120 for len(in) > 0 {
121 switch r, n := utf8.DecodeRuneInString(in); {
122 case r == utf8.RuneError && n == 1:
123 // We do not report invalid UTF-8 because strings in the text format
124 // are used to represent both the proto string and bytes type.
125 r = rune(in[0])
126 fallthrough
127 case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
128 out = append(out, '\\')
129 switch r {
130 case '"', '\\':
131 out = append(out, byte(r))
132 case '\n':
133 out = append(out, 'n')
134 case '\r':
135 out = append(out, 'r')
136 case '\t':
137 out = append(out, 't')
138 default:
139 out = append(out, 'x')
140 out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
141 out = strconv.AppendUint(out, uint64(r), 16)
142 }
143 in = in[n:]
144 case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
145 out = append(out, '\\')
146 if r <= math.MaxUint16 {
147 out = append(out, 'u')
148 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
149 out = strconv.AppendUint(out, uint64(r), 16)
150 } else {
151 out = append(out, 'U')
152 out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
153 out = strconv.AppendUint(out, uint64(r), 16)
154 }
155 in = in[n:]
156 default:
157 i := indexNeedEscapeInString(in[n:])
158 in, out = in[n+i:], append(out, in[:n+i]...)
159 }
160 }
161 out = append(out, '"')
162 return out
163}
164
165// indexNeedEscapeInString returns the index of the character that needs
166// escaping. If no characters need escaping, this returns the input length.
167func indexNeedEscapeInString(s string) int {
168 for i := 0; i < len(s); i++ {
169 if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
170 return i
171 }
172 }
173 return len(s)
174}
175
176// WriteFloat writes out the given float value for given bitSize.
177func (e *Encoder) WriteFloat(n float64, bitSize int) {
178 e.prepareNext(scalar)
179 e.out = appendFloat(e.out, n, bitSize)
180}
181
182func appendFloat(out []byte, n float64, bitSize int) []byte {
183 switch {
184 case math.IsNaN(n):
185 return append(out, "nan"...)
186 case math.IsInf(n, +1):
187 return append(out, "inf"...)
188 case math.IsInf(n, -1):
189 return append(out, "-inf"...)
190 default:
191 return strconv.AppendFloat(out, n, 'g', -1, bitSize)
192 }
193}
194
195// WriteInt writes out the given signed integer value.
196func (e *Encoder) WriteInt(n int64) {
197 e.prepareNext(scalar)
198 e.out = append(e.out, strconv.FormatInt(n, 10)...)
199}
200
201// WriteUint writes out the given unsigned integer value.
202func (e *Encoder) WriteUint(n uint64) {
203 e.prepareNext(scalar)
204 e.out = append(e.out, strconv.FormatUint(n, 10)...)
205}
206
207// WriteLiteral writes out the given string as a literal value without quotes.
208// This is used for writing enum literal strings.
209func (e *Encoder) WriteLiteral(s string) {
210 e.prepareNext(scalar)
211 e.out = append(e.out, s...)
212}
213
214// prepareNext adds possible space and indentation for the next value based
215// on last encType and indent option. It also updates e.lastType to next.
216func (e *Encoder) prepareNext(next encType) {
217 defer func() {
218 e.lastType = next
219 }()
220
221 // Single line.
222 if len(e.indent) == 0 {
223 // Add space after each field before the next one.
224 if e.lastType&(scalar|messageClose) != 0 && next == name {
225 e.out = append(e.out, ' ')
226 // Add a random extra space to make output unstable.
227 if detrand.Bool() {
228 e.out = append(e.out, ' ')
229 }
230 }
231 return
232 }
233
234 // Multi-line.
235 switch {
236 case e.lastType == name:
237 e.out = append(e.out, ' ')
238 // Add a random extra space after name: to make output unstable.
239 if detrand.Bool() {
240 e.out = append(e.out, ' ')
241 }
242
243 case e.lastType == messageOpen && next != messageClose:
244 e.indents = append(e.indents, e.indent...)
245 e.out = append(e.out, '\n')
246 e.out = append(e.out, e.indents...)
247
248 case e.lastType&(scalar|messageClose) != 0:
249 if next == messageClose {
250 e.indents = e.indents[:len(e.indents)-len(e.indent)]
251 }
252 e.out = append(e.out, '\n')
253 e.out = append(e.out, e.indents...)
254 }
255}
256
257// Snapshot returns the current snapshot for use in Reset.
258func (e *Encoder) Snapshot() encoderState {
259 return e.encoderState
260}
261
262// Reset resets the Encoder to the given encoderState from a Snapshot.
263func (e *Encoder) Reset(es encoderState) {
264 e.encoderState = es
265}
266
267// AppendString appends the escaped form of the input string to b.
268func AppendString(b []byte, s string) []byte {
269 return appendString(b, s, false)
270}