blob: a427f8b7043ba64ffdc3f8e1558e26e67fa8c110 [file] [log] [blame]
David K. Bainbridgebd6b2882021-08-26 13:31:02 +00001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package protowire parses and formats the raw wire encoding.
6// See https://developers.google.com/protocol-buffers/docs/encoding.
7//
8// For marshaling and unmarshaling entire protobuf messages,
9// use the "google.golang.org/protobuf/proto" package instead.
10package protowire
11
12import (
13 "io"
14 "math"
15 "math/bits"
16
17 "google.golang.org/protobuf/internal/errors"
18)
19
20// Number represents the field number.
21type Number int32
22
23const (
24 MinValidNumber Number = 1
25 FirstReservedNumber Number = 19000
26 LastReservedNumber Number = 19999
27 MaxValidNumber Number = 1<<29 - 1
28)
29
30// IsValid reports whether the field number is semantically valid.
31//
32// Note that while numbers within the reserved range are semantically invalid,
33// they are syntactically valid in the wire format.
34// Implementations may treat records with reserved field numbers as unknown.
35func (n Number) IsValid() bool {
36 return MinValidNumber <= n && n < FirstReservedNumber || LastReservedNumber < n && n <= MaxValidNumber
37}
38
39// Type represents the wire type.
40type Type int8
41
42const (
43 VarintType Type = 0
44 Fixed32Type Type = 5
45 Fixed64Type Type = 1
46 BytesType Type = 2
47 StartGroupType Type = 3
48 EndGroupType Type = 4
49)
50
51const (
52 _ = -iota
53 errCodeTruncated
54 errCodeFieldNumber
55 errCodeOverflow
56 errCodeReserved
57 errCodeEndGroup
58)
59
60var (
61 errFieldNumber = errors.New("invalid field number")
62 errOverflow = errors.New("variable length integer overflow")
63 errReserved = errors.New("cannot parse reserved wire type")
64 errEndGroup = errors.New("mismatching end group marker")
65 errParse = errors.New("parse error")
66)
67
68// ParseError converts an error code into an error value.
69// This returns nil if n is a non-negative number.
70func ParseError(n int) error {
71 if n >= 0 {
72 return nil
73 }
74 switch n {
75 case errCodeTruncated:
76 return io.ErrUnexpectedEOF
77 case errCodeFieldNumber:
78 return errFieldNumber
79 case errCodeOverflow:
80 return errOverflow
81 case errCodeReserved:
82 return errReserved
83 case errCodeEndGroup:
84 return errEndGroup
85 default:
86 return errParse
87 }
88}
89
90// ConsumeField parses an entire field record (both tag and value) and returns
91// the field number, the wire type, and the total length.
92// This returns a negative length upon an error (see ParseError).
93//
94// The total length includes the tag header and the end group marker (if the
95// field is a group).
96func ConsumeField(b []byte) (Number, Type, int) {
97 num, typ, n := ConsumeTag(b)
98 if n < 0 {
99 return 0, 0, n // forward error code
100 }
101 m := ConsumeFieldValue(num, typ, b[n:])
102 if m < 0 {
103 return 0, 0, m // forward error code
104 }
105 return num, typ, n + m
106}
107
108// ConsumeFieldValue parses a field value and returns its length.
109// This assumes that the field Number and wire Type have already been parsed.
110// This returns a negative length upon an error (see ParseError).
111//
112// When parsing a group, the length includes the end group marker and
113// the end group is verified to match the starting field number.
114func ConsumeFieldValue(num Number, typ Type, b []byte) (n int) {
115 switch typ {
116 case VarintType:
117 _, n = ConsumeVarint(b)
118 return n
119 case Fixed32Type:
120 _, n = ConsumeFixed32(b)
121 return n
122 case Fixed64Type:
123 _, n = ConsumeFixed64(b)
124 return n
125 case BytesType:
126 _, n = ConsumeBytes(b)
127 return n
128 case StartGroupType:
129 n0 := len(b)
130 for {
131 num2, typ2, n := ConsumeTag(b)
132 if n < 0 {
133 return n // forward error code
134 }
135 b = b[n:]
136 if typ2 == EndGroupType {
137 if num != num2 {
138 return errCodeEndGroup
139 }
140 return n0 - len(b)
141 }
142
143 n = ConsumeFieldValue(num2, typ2, b)
144 if n < 0 {
145 return n // forward error code
146 }
147 b = b[n:]
148 }
149 case EndGroupType:
150 return errCodeEndGroup
151 default:
152 return errCodeReserved
153 }
154}
155
156// AppendTag encodes num and typ as a varint-encoded tag and appends it to b.
157func AppendTag(b []byte, num Number, typ Type) []byte {
158 return AppendVarint(b, EncodeTag(num, typ))
159}
160
161// ConsumeTag parses b as a varint-encoded tag, reporting its length.
162// This returns a negative length upon an error (see ParseError).
163func ConsumeTag(b []byte) (Number, Type, int) {
164 v, n := ConsumeVarint(b)
165 if n < 0 {
166 return 0, 0, n // forward error code
167 }
168 num, typ := DecodeTag(v)
169 if num < MinValidNumber {
170 return 0, 0, errCodeFieldNumber
171 }
172 return num, typ, n
173}
174
175func SizeTag(num Number) int {
176 return SizeVarint(EncodeTag(num, 0)) // wire type has no effect on size
177}
178
179// AppendVarint appends v to b as a varint-encoded uint64.
180func AppendVarint(b []byte, v uint64) []byte {
181 switch {
182 case v < 1<<7:
183 b = append(b, byte(v))
184 case v < 1<<14:
185 b = append(b,
186 byte((v>>0)&0x7f|0x80),
187 byte(v>>7))
188 case v < 1<<21:
189 b = append(b,
190 byte((v>>0)&0x7f|0x80),
191 byte((v>>7)&0x7f|0x80),
192 byte(v>>14))
193 case v < 1<<28:
194 b = append(b,
195 byte((v>>0)&0x7f|0x80),
196 byte((v>>7)&0x7f|0x80),
197 byte((v>>14)&0x7f|0x80),
198 byte(v>>21))
199 case v < 1<<35:
200 b = append(b,
201 byte((v>>0)&0x7f|0x80),
202 byte((v>>7)&0x7f|0x80),
203 byte((v>>14)&0x7f|0x80),
204 byte((v>>21)&0x7f|0x80),
205 byte(v>>28))
206 case v < 1<<42:
207 b = append(b,
208 byte((v>>0)&0x7f|0x80),
209 byte((v>>7)&0x7f|0x80),
210 byte((v>>14)&0x7f|0x80),
211 byte((v>>21)&0x7f|0x80),
212 byte((v>>28)&0x7f|0x80),
213 byte(v>>35))
214 case v < 1<<49:
215 b = append(b,
216 byte((v>>0)&0x7f|0x80),
217 byte((v>>7)&0x7f|0x80),
218 byte((v>>14)&0x7f|0x80),
219 byte((v>>21)&0x7f|0x80),
220 byte((v>>28)&0x7f|0x80),
221 byte((v>>35)&0x7f|0x80),
222 byte(v>>42))
223 case v < 1<<56:
224 b = append(b,
225 byte((v>>0)&0x7f|0x80),
226 byte((v>>7)&0x7f|0x80),
227 byte((v>>14)&0x7f|0x80),
228 byte((v>>21)&0x7f|0x80),
229 byte((v>>28)&0x7f|0x80),
230 byte((v>>35)&0x7f|0x80),
231 byte((v>>42)&0x7f|0x80),
232 byte(v>>49))
233 case v < 1<<63:
234 b = append(b,
235 byte((v>>0)&0x7f|0x80),
236 byte((v>>7)&0x7f|0x80),
237 byte((v>>14)&0x7f|0x80),
238 byte((v>>21)&0x7f|0x80),
239 byte((v>>28)&0x7f|0x80),
240 byte((v>>35)&0x7f|0x80),
241 byte((v>>42)&0x7f|0x80),
242 byte((v>>49)&0x7f|0x80),
243 byte(v>>56))
244 default:
245 b = append(b,
246 byte((v>>0)&0x7f|0x80),
247 byte((v>>7)&0x7f|0x80),
248 byte((v>>14)&0x7f|0x80),
249 byte((v>>21)&0x7f|0x80),
250 byte((v>>28)&0x7f|0x80),
251 byte((v>>35)&0x7f|0x80),
252 byte((v>>42)&0x7f|0x80),
253 byte((v>>49)&0x7f|0x80),
254 byte((v>>56)&0x7f|0x80),
255 1)
256 }
257 return b
258}
259
260// ConsumeVarint parses b as a varint-encoded uint64, reporting its length.
261// This returns a negative length upon an error (see ParseError).
262func ConsumeVarint(b []byte) (v uint64, n int) {
263 var y uint64
264 if len(b) <= 0 {
265 return 0, errCodeTruncated
266 }
267 v = uint64(b[0])
268 if v < 0x80 {
269 return v, 1
270 }
271 v -= 0x80
272
273 if len(b) <= 1 {
274 return 0, errCodeTruncated
275 }
276 y = uint64(b[1])
277 v += y << 7
278 if y < 0x80 {
279 return v, 2
280 }
281 v -= 0x80 << 7
282
283 if len(b) <= 2 {
284 return 0, errCodeTruncated
285 }
286 y = uint64(b[2])
287 v += y << 14
288 if y < 0x80 {
289 return v, 3
290 }
291 v -= 0x80 << 14
292
293 if len(b) <= 3 {
294 return 0, errCodeTruncated
295 }
296 y = uint64(b[3])
297 v += y << 21
298 if y < 0x80 {
299 return v, 4
300 }
301 v -= 0x80 << 21
302
303 if len(b) <= 4 {
304 return 0, errCodeTruncated
305 }
306 y = uint64(b[4])
307 v += y << 28
308 if y < 0x80 {
309 return v, 5
310 }
311 v -= 0x80 << 28
312
313 if len(b) <= 5 {
314 return 0, errCodeTruncated
315 }
316 y = uint64(b[5])
317 v += y << 35
318 if y < 0x80 {
319 return v, 6
320 }
321 v -= 0x80 << 35
322
323 if len(b) <= 6 {
324 return 0, errCodeTruncated
325 }
326 y = uint64(b[6])
327 v += y << 42
328 if y < 0x80 {
329 return v, 7
330 }
331 v -= 0x80 << 42
332
333 if len(b) <= 7 {
334 return 0, errCodeTruncated
335 }
336 y = uint64(b[7])
337 v += y << 49
338 if y < 0x80 {
339 return v, 8
340 }
341 v -= 0x80 << 49
342
343 if len(b) <= 8 {
344 return 0, errCodeTruncated
345 }
346 y = uint64(b[8])
347 v += y << 56
348 if y < 0x80 {
349 return v, 9
350 }
351 v -= 0x80 << 56
352
353 if len(b) <= 9 {
354 return 0, errCodeTruncated
355 }
356 y = uint64(b[9])
357 v += y << 63
358 if y < 2 {
359 return v, 10
360 }
361 return 0, errCodeOverflow
362}
363
364// SizeVarint returns the encoded size of a varint.
365// The size is guaranteed to be within 1 and 10, inclusive.
366func SizeVarint(v uint64) int {
367 // This computes 1 + (bits.Len64(v)-1)/7.
368 // 9/64 is a good enough approximation of 1/7
369 return int(9*uint32(bits.Len64(v))+64) / 64
370}
371
372// AppendFixed32 appends v to b as a little-endian uint32.
373func AppendFixed32(b []byte, v uint32) []byte {
374 return append(b,
375 byte(v>>0),
376 byte(v>>8),
377 byte(v>>16),
378 byte(v>>24))
379}
380
381// ConsumeFixed32 parses b as a little-endian uint32, reporting its length.
382// This returns a negative length upon an error (see ParseError).
383func ConsumeFixed32(b []byte) (v uint32, n int) {
384 if len(b) < 4 {
385 return 0, errCodeTruncated
386 }
387 v = uint32(b[0])<<0 | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
388 return v, 4
389}
390
391// SizeFixed32 returns the encoded size of a fixed32; which is always 4.
392func SizeFixed32() int {
393 return 4
394}
395
396// AppendFixed64 appends v to b as a little-endian uint64.
397func AppendFixed64(b []byte, v uint64) []byte {
398 return append(b,
399 byte(v>>0),
400 byte(v>>8),
401 byte(v>>16),
402 byte(v>>24),
403 byte(v>>32),
404 byte(v>>40),
405 byte(v>>48),
406 byte(v>>56))
407}
408
409// ConsumeFixed64 parses b as a little-endian uint64, reporting its length.
410// This returns a negative length upon an error (see ParseError).
411func ConsumeFixed64(b []byte) (v uint64, n int) {
412 if len(b) < 8 {
413 return 0, errCodeTruncated
414 }
415 v = uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
416 return v, 8
417}
418
419// SizeFixed64 returns the encoded size of a fixed64; which is always 8.
420func SizeFixed64() int {
421 return 8
422}
423
424// AppendBytes appends v to b as a length-prefixed bytes value.
425func AppendBytes(b []byte, v []byte) []byte {
426 return append(AppendVarint(b, uint64(len(v))), v...)
427}
428
429// ConsumeBytes parses b as a length-prefixed bytes value, reporting its length.
430// This returns a negative length upon an error (see ParseError).
431func ConsumeBytes(b []byte) (v []byte, n int) {
432 m, n := ConsumeVarint(b)
433 if n < 0 {
434 return nil, n // forward error code
435 }
436 if m > uint64(len(b[n:])) {
437 return nil, errCodeTruncated
438 }
439 return b[n:][:m], n + int(m)
440}
441
442// SizeBytes returns the encoded size of a length-prefixed bytes value,
443// given only the length.
444func SizeBytes(n int) int {
445 return SizeVarint(uint64(n)) + n
446}
447
448// AppendString appends v to b as a length-prefixed bytes value.
449func AppendString(b []byte, v string) []byte {
450 return append(AppendVarint(b, uint64(len(v))), v...)
451}
452
453// ConsumeString parses b as a length-prefixed bytes value, reporting its length.
454// This returns a negative length upon an error (see ParseError).
455func ConsumeString(b []byte) (v string, n int) {
456 bb, n := ConsumeBytes(b)
457 return string(bb), n
458}
459
460// AppendGroup appends v to b as group value, with a trailing end group marker.
461// The value v must not contain the end marker.
462func AppendGroup(b []byte, num Number, v []byte) []byte {
463 return AppendVarint(append(b, v...), EncodeTag(num, EndGroupType))
464}
465
466// ConsumeGroup parses b as a group value until the trailing end group marker,
467// and verifies that the end marker matches the provided num. The value v
468// does not contain the end marker, while the length does contain the end marker.
469// This returns a negative length upon an error (see ParseError).
470func ConsumeGroup(num Number, b []byte) (v []byte, n int) {
471 n = ConsumeFieldValue(num, StartGroupType, b)
472 if n < 0 {
473 return nil, n // forward error code
474 }
475 b = b[:n]
476
477 // Truncate off end group marker, but need to handle denormalized varints.
478 // Assuming end marker is never 0 (which is always the case since
479 // EndGroupType is non-zero), we can truncate all trailing bytes where the
480 // lower 7 bits are all zero (implying that the varint is denormalized).
481 for len(b) > 0 && b[len(b)-1]&0x7f == 0 {
482 b = b[:len(b)-1]
483 }
484 b = b[:len(b)-SizeTag(num)]
485 return b, n
486}
487
488// SizeGroup returns the encoded size of a group, given only the length.
489func SizeGroup(num Number, n int) int {
490 return n + SizeTag(num)
491}
492
493// DecodeTag decodes the field Number and wire Type from its unified form.
494// The Number is -1 if the decoded field number overflows int32.
495// Other than overflow, this does not check for field number validity.
496func DecodeTag(x uint64) (Number, Type) {
497 // NOTE: MessageSet allows for larger field numbers than normal.
498 if x>>3 > uint64(math.MaxInt32) {
499 return -1, 0
500 }
501 return Number(x >> 3), Type(x & 7)
502}
503
504// EncodeTag encodes the field Number and wire Type into its unified form.
505func EncodeTag(num Number, typ Type) uint64 {
506 return uint64(num)<<3 | uint64(typ&7)
507}
508
509// DecodeZigZag decodes a zig-zag-encoded uint64 as an int64.
510// Input: {…, 5, 3, 1, 0, 2, 4, 6, …}
511// Output: {…, -3, -2, -1, 0, +1, +2, +3, …}
512func DecodeZigZag(x uint64) int64 {
513 return int64(x>>1) ^ int64(x)<<63>>63
514}
515
516// EncodeZigZag encodes an int64 as a zig-zag-encoded uint64.
517// Input: {…, -3, -2, -1, 0, +1, +2, +3, …}
518// Output: {…, 5, 3, 1, 0, 2, 4, 6, …}
519func EncodeZigZag(x int64) uint64 {
520 return uint64(x<<1) ^ uint64(x>>63)
521}
522
523// DecodeBool decodes a uint64 as a bool.
524// Input: { 0, 1, 2, …}
525// Output: {false, true, true, …}
526func DecodeBool(x uint64) bool {
527 return x != 0
528}
529
530// EncodeBool encodes a bool as a uint64.
531// Input: {false, true}
532// Output: { 0, 1}
533func EncodeBool(x bool) uint64 {
534 if x {
535 return 1
536 }
537 return 0
538}