blob: 0b74e76586b61adf69023b12d62caad8b7c9cb76 [file] [log] [blame]
Matteo Scandoloa4285862020-12-01 18:10:10 -08001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package strs provides string manipulation functionality specific to protobuf.
6package strs
7
8import (
9 "go/token"
10 "strings"
11 "unicode"
12 "unicode/utf8"
13
14 "google.golang.org/protobuf/internal/flags"
15 "google.golang.org/protobuf/reflect/protoreflect"
16)
17
18// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
19func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
20 if flags.ProtoLegacy {
21 if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
22 return fd.EnforceUTF8()
23 }
24 }
25 return fd.Syntax() == protoreflect.Proto3
26}
27
28// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
29//
30// If there is an interior underscore followed by a lower case letter,
31// drop the underscore and convert the letter to upper case.
32func GoCamelCase(s string) string {
33 // Invariant: if the next letter is lower case, it must be converted
34 // to upper case.
35 // That is, we process a word at a time, where words are marked by _ or
36 // upper case letter. Digits are treated as words.
37 var b []byte
38 for i := 0; i < len(s); i++ {
39 c := s[i]
40 switch {
41 case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
42 // Skip over '.' in ".{{lowercase}}".
43 case c == '.':
44 b = append(b, '_') // convert '.' to '_'
45 case c == '_' && (i == 0 || s[i-1] == '.'):
46 // Convert initial '_' to ensure we start with a capital letter.
47 // Do the same for '_' after '.' to match historic behavior.
48 b = append(b, 'X') // convert '_' to 'X'
49 case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
50 // Skip over '_' in "_{{lowercase}}".
51 case isASCIIDigit(c):
52 b = append(b, c)
53 default:
54 // Assume we have a letter now - if not, it's a bogus identifier.
55 // The next word is a sequence of characters that must start upper case.
56 if isASCIILower(c) {
57 c -= 'a' - 'A' // convert lowercase to uppercase
58 }
59 b = append(b, c)
60
61 // Accept lower case sequence that follows.
62 for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
63 b = append(b, s[i+1])
64 }
65 }
66 }
67 return string(b)
68}
69
70// GoSanitized converts a string to a valid Go identifier.
71func GoSanitized(s string) string {
72 // Sanitize the input to the set of valid characters,
73 // which must be '_' or be in the Unicode L or N categories.
74 s = strings.Map(func(r rune) rune {
75 if unicode.IsLetter(r) || unicode.IsDigit(r) {
76 return r
77 }
78 return '_'
79 }, s)
80
81 // Prepend '_' in the event of a Go keyword conflict or if
82 // the identifier is invalid (does not start in the Unicode L category).
83 r, _ := utf8.DecodeRuneInString(s)
84 if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
85 return "_" + s
86 }
87 return s
88}
89
90// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
91// according to the protobuf JSON specification.
92func JSONCamelCase(s string) string {
93 var b []byte
94 var wasUnderscore bool
95 for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
96 c := s[i]
97 if c != '_' {
98 if wasUnderscore && isASCIILower(c) {
99 c -= 'a' - 'A' // convert to uppercase
100 }
101 b = append(b, c)
102 }
103 wasUnderscore = c == '_'
104 }
105 return string(b)
106}
107
108// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
109// according to the protobuf JSON specification.
110func JSONSnakeCase(s string) string {
111 var b []byte
112 for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
113 c := s[i]
114 if isASCIIUpper(c) {
115 b = append(b, '_')
116 c += 'a' - 'A' // convert to lowercase
117 }
118 b = append(b, c)
119 }
120 return string(b)
121}
122
123// MapEntryName derives the name of the map entry message given the field name.
124// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
125func MapEntryName(s string) string {
126 var b []byte
127 upperNext := true
128 for _, c := range s {
129 switch {
130 case c == '_':
131 upperNext = true
132 case upperNext:
133 b = append(b, byte(unicode.ToUpper(c)))
134 upperNext = false
135 default:
136 b = append(b, byte(c))
137 }
138 }
139 b = append(b, "Entry"...)
140 return string(b)
141}
142
143// EnumValueName derives the camel-cased enum value name.
144// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
145func EnumValueName(s string) string {
146 var b []byte
147 upperNext := true
148 for _, c := range s {
149 switch {
150 case c == '_':
151 upperNext = true
152 case upperNext:
153 b = append(b, byte(unicode.ToUpper(c)))
154 upperNext = false
155 default:
156 b = append(b, byte(unicode.ToLower(c)))
157 upperNext = false
158 }
159 }
160 return string(b)
161}
162
163// TrimEnumPrefix trims the enum name prefix from an enum value name,
164// where the prefix is all lowercase without underscores.
165// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
166func TrimEnumPrefix(s, prefix string) string {
167 s0 := s // original input
168 for len(s) > 0 && len(prefix) > 0 {
169 if s[0] == '_' {
170 s = s[1:]
171 continue
172 }
173 if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
174 return s0 // no prefix match
175 }
176 s, prefix = s[1:], prefix[1:]
177 }
178 if len(prefix) > 0 {
179 return s0 // no prefix match
180 }
181 s = strings.TrimLeft(s, "_")
182 if len(s) == 0 {
183 return s0 // avoid returning empty string
184 }
185 return s
186}
187
188func isASCIILower(c byte) bool {
189 return 'a' <= c && c <= 'z'
190}
191func isASCIIUpper(c byte) bool {
192 return 'A' <= c && c <= 'Z'
193}
194func isASCIIDigit(c byte) bool {
195 return '0' <= c && c <= '9'
196}