blob: c4ba1c598fb138e8485514930411b0a850230f84 [file] [log] [blame]
Takahiro Suzuki241c10e2020-12-17 20:17:57 +09001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package text
6
7import (
8 "math"
9 "math/bits"
10 "strconv"
11 "strings"
12 "unicode/utf8"
13
14 "google.golang.org/protobuf/internal/detrand"
15 "google.golang.org/protobuf/internal/errors"
16)
17
18// encType represents an encoding type.
19type encType uint8
20
21const (
22 _ encType = (1 << iota) / 2
23 name
24 scalar
25 messageOpen
26 messageClose
27)
28
29// Encoder provides methods to write out textproto constructs and values. The user is
30// responsible for producing valid sequences of constructs and values.
31type Encoder struct {
32 encoderState
33
34 indent string
35 newline string // set to "\n" if len(indent) > 0
36 delims [2]byte
37 outputASCII bool
38}
39
40type encoderState struct {
41 lastType encType
42 indents []byte
43 out []byte
44}
45
46// NewEncoder returns an Encoder.
47//
48// If indent is a non-empty string, it causes every entry in a List or Message
49// to be preceded by the indent and trailed by a newline.
50//
51// If delims is not the zero value, it controls the delimiter characters used
52// for messages (e.g., "{}" vs "<>").
53//
54// If outputASCII is true, strings will be serialized in such a way that
55// multi-byte UTF-8 sequences are escaped. This property ensures that the
56// overall output is ASCII (as opposed to UTF-8).
57func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
58 e := &Encoder{}
59 if len(indent) > 0 {
60 if strings.Trim(indent, " \t") != "" {
61 return nil, errors.New("indent may only be composed of space and tab characters")
62 }
63 e.indent = indent
64 e.newline = "\n"
65 }
66 switch delims {
67 case [2]byte{0, 0}:
68 e.delims = [2]byte{'{', '}'}
69 case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
70 e.delims = delims
71 default:
72 return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
73 }
74 e.outputASCII = outputASCII
75
76 return e, nil
77}
78
79// Bytes returns the content of the written bytes.
80func (e *Encoder) Bytes() []byte {
81 return e.out
82}
83
84// StartMessage writes out the '{' or '<' symbol.
85func (e *Encoder) StartMessage() {
86 e.prepareNext(messageOpen)
87 e.out = append(e.out, e.delims[0])
88}
89
90// EndMessage writes out the '}' or '>' symbol.
91func (e *Encoder) EndMessage() {
92 e.prepareNext(messageClose)
93 e.out = append(e.out, e.delims[1])
94}
95
96// WriteName writes out the field name and the separator ':'.
97func (e *Encoder) WriteName(s string) {
98 e.prepareNext(name)
99 e.out = append(e.out, s...)
100 e.out = append(e.out, ':')
101}
102
103// WriteBool writes out the given boolean value.
104func (e *Encoder) WriteBool(b bool) {
105 if b {
106 e.WriteLiteral("true")
107 } else {
108 e.WriteLiteral("false")
109 }
110}
111
112// WriteString writes out the given string value.
113func (e *Encoder) WriteString(s string) {
114 e.prepareNext(scalar)
115 e.out = appendString(e.out, s, e.outputASCII)
116}
117
118func appendString(out []byte, in string, outputASCII bool) []byte {
119 out = append(out, '"')
120 i := indexNeedEscapeInString(in)
121 in, out = in[i:], append(out, in[:i]...)
122 for len(in) > 0 {
123 switch r, n := utf8.DecodeRuneInString(in); {
124 case r == utf8.RuneError && n == 1:
125 // We do not report invalid UTF-8 because strings in the text format
126 // are used to represent both the proto string and bytes type.
127 r = rune(in[0])
128 fallthrough
129 case r < ' ' || r == '"' || r == '\\':
130 out = append(out, '\\')
131 switch r {
132 case '"', '\\':
133 out = append(out, byte(r))
134 case '\n':
135 out = append(out, 'n')
136 case '\r':
137 out = append(out, 'r')
138 case '\t':
139 out = append(out, 't')
140 default:
141 out = append(out, 'x')
142 out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
143 out = strconv.AppendUint(out, uint64(r), 16)
144 }
145 in = in[n:]
146 case outputASCII && r >= utf8.RuneSelf:
147 out = append(out, '\\')
148 if r <= math.MaxUint16 {
149 out = append(out, 'u')
150 out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
151 out = strconv.AppendUint(out, uint64(r), 16)
152 } else {
153 out = append(out, 'U')
154 out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
155 out = strconv.AppendUint(out, uint64(r), 16)
156 }
157 in = in[n:]
158 default:
159 i := indexNeedEscapeInString(in[n:])
160 in, out = in[n+i:], append(out, in[:n+i]...)
161 }
162 }
163 out = append(out, '"')
164 return out
165}
166
167// indexNeedEscapeInString returns the index of the character that needs
168// escaping. If no characters need escaping, this returns the input length.
169func indexNeedEscapeInString(s string) int {
170 for i := 0; i < len(s); i++ {
171 if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
172 return i
173 }
174 }
175 return len(s)
176}
177
178// WriteFloat writes out the given float value for given bitSize.
179func (e *Encoder) WriteFloat(n float64, bitSize int) {
180 e.prepareNext(scalar)
181 e.out = appendFloat(e.out, n, bitSize)
182}
183
184func appendFloat(out []byte, n float64, bitSize int) []byte {
185 switch {
186 case math.IsNaN(n):
187 return append(out, "nan"...)
188 case math.IsInf(n, +1):
189 return append(out, "inf"...)
190 case math.IsInf(n, -1):
191 return append(out, "-inf"...)
192 default:
193 return strconv.AppendFloat(out, n, 'g', -1, bitSize)
194 }
195}
196
197// WriteInt writes out the given signed integer value.
198func (e *Encoder) WriteInt(n int64) {
199 e.prepareNext(scalar)
200 e.out = append(e.out, strconv.FormatInt(n, 10)...)
201}
202
203// WriteUint writes out the given unsigned integer value.
204func (e *Encoder) WriteUint(n uint64) {
205 e.prepareNext(scalar)
206 e.out = append(e.out, strconv.FormatUint(n, 10)...)
207}
208
209// WriteLiteral writes out the given string as a literal value without quotes.
210// This is used for writing enum literal strings.
211func (e *Encoder) WriteLiteral(s string) {
212 e.prepareNext(scalar)
213 e.out = append(e.out, s...)
214}
215
216// prepareNext adds possible space and indentation for the next value based
217// on last encType and indent option. It also updates e.lastType to next.
218func (e *Encoder) prepareNext(next encType) {
219 defer func() {
220 e.lastType = next
221 }()
222
223 // Single line.
224 if len(e.indent) == 0 {
225 // Add space after each field before the next one.
226 if e.lastType&(scalar|messageClose) != 0 && next == name {
227 e.out = append(e.out, ' ')
228 // Add a random extra space to make output unstable.
229 if detrand.Bool() {
230 e.out = append(e.out, ' ')
231 }
232 }
233 return
234 }
235
236 // Multi-line.
237 switch {
238 case e.lastType == name:
239 e.out = append(e.out, ' ')
240 // Add a random extra space after name: to make output unstable.
241 if detrand.Bool() {
242 e.out = append(e.out, ' ')
243 }
244
245 case e.lastType == messageOpen && next != messageClose:
246 e.indents = append(e.indents, e.indent...)
247 e.out = append(e.out, '\n')
248 e.out = append(e.out, e.indents...)
249
250 case e.lastType&(scalar|messageClose) != 0:
251 if next == messageClose {
252 e.indents = e.indents[:len(e.indents)-len(e.indent)]
253 }
254 e.out = append(e.out, '\n')
255 e.out = append(e.out, e.indents...)
256 }
257}
258
259// Snapshot returns the current snapshot for use in Reset.
260func (e *Encoder) Snapshot() encoderState {
261 return e.encoderState
262}
263
264// Reset resets the Encoder to the given encoderState from a Snapshot.
265func (e *Encoder) Reset(es encoderState) {
266 e.encoderState = es
267}