Matteo Scandolo | a428586 | 2020-12-01 18:10:10 -0800 | [diff] [blame] | 1 | // Copyright 2019 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package strs provides string manipulation functionality specific to protobuf. |
| 6 | package strs |
| 7 | |
| 8 | import ( |
| 9 | "go/token" |
| 10 | "strings" |
| 11 | "unicode" |
| 12 | "unicode/utf8" |
| 13 | |
| 14 | "google.golang.org/protobuf/internal/flags" |
| 15 | "google.golang.org/protobuf/reflect/protoreflect" |
| 16 | ) |
| 17 | |
| 18 | // EnforceUTF8 reports whether to enforce strict UTF-8 validation. |
| 19 | func EnforceUTF8(fd protoreflect.FieldDescriptor) bool { |
| 20 | if flags.ProtoLegacy { |
| 21 | if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok { |
| 22 | return fd.EnforceUTF8() |
| 23 | } |
| 24 | } |
| 25 | return fd.Syntax() == protoreflect.Proto3 |
| 26 | } |
| 27 | |
| 28 | // GoCamelCase camel-cases a protobuf name for use as a Go identifier. |
| 29 | // |
| 30 | // If there is an interior underscore followed by a lower case letter, |
| 31 | // drop the underscore and convert the letter to upper case. |
| 32 | func GoCamelCase(s string) string { |
| 33 | // Invariant: if the next letter is lower case, it must be converted |
| 34 | // to upper case. |
| 35 | // That is, we process a word at a time, where words are marked by _ or |
| 36 | // upper case letter. Digits are treated as words. |
| 37 | var b []byte |
| 38 | for i := 0; i < len(s); i++ { |
| 39 | c := s[i] |
| 40 | switch { |
| 41 | case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): |
| 42 | // Skip over '.' in ".{{lowercase}}". |
| 43 | case c == '.': |
| 44 | b = append(b, '_') // convert '.' to '_' |
| 45 | case c == '_' && (i == 0 || s[i-1] == '.'): |
| 46 | // Convert initial '_' to ensure we start with a capital letter. |
| 47 | // Do the same for '_' after '.' to match historic behavior. |
| 48 | b = append(b, 'X') // convert '_' to 'X' |
| 49 | case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): |
| 50 | // Skip over '_' in "_{{lowercase}}". |
| 51 | case isASCIIDigit(c): |
| 52 | b = append(b, c) |
| 53 | default: |
| 54 | // Assume we have a letter now - if not, it's a bogus identifier. |
| 55 | // The next word is a sequence of characters that must start upper case. |
| 56 | if isASCIILower(c) { |
| 57 | c -= 'a' - 'A' // convert lowercase to uppercase |
| 58 | } |
| 59 | b = append(b, c) |
| 60 | |
| 61 | // Accept lower case sequence that follows. |
| 62 | for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { |
| 63 | b = append(b, s[i+1]) |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | return string(b) |
| 68 | } |
| 69 | |
| 70 | // GoSanitized converts a string to a valid Go identifier. |
| 71 | func GoSanitized(s string) string { |
| 72 | // Sanitize the input to the set of valid characters, |
| 73 | // which must be '_' or be in the Unicode L or N categories. |
| 74 | s = strings.Map(func(r rune) rune { |
| 75 | if unicode.IsLetter(r) || unicode.IsDigit(r) { |
| 76 | return r |
| 77 | } |
| 78 | return '_' |
| 79 | }, s) |
| 80 | |
| 81 | // Prepend '_' in the event of a Go keyword conflict or if |
| 82 | // the identifier is invalid (does not start in the Unicode L category). |
| 83 | r, _ := utf8.DecodeRuneInString(s) |
| 84 | if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { |
| 85 | return "_" + s |
| 86 | } |
| 87 | return s |
| 88 | } |
| 89 | |
| 90 | // JSONCamelCase converts a snake_case identifier to a camelCase identifier, |
| 91 | // according to the protobuf JSON specification. |
| 92 | func JSONCamelCase(s string) string { |
| 93 | var b []byte |
| 94 | var wasUnderscore bool |
| 95 | for i := 0; i < len(s); i++ { // proto identifiers are always ASCII |
| 96 | c := s[i] |
| 97 | if c != '_' { |
| 98 | if wasUnderscore && isASCIILower(c) { |
| 99 | c -= 'a' - 'A' // convert to uppercase |
| 100 | } |
| 101 | b = append(b, c) |
| 102 | } |
| 103 | wasUnderscore = c == '_' |
| 104 | } |
| 105 | return string(b) |
| 106 | } |
| 107 | |
| 108 | // JSONSnakeCase converts a camelCase identifier to a snake_case identifier, |
| 109 | // according to the protobuf JSON specification. |
| 110 | func JSONSnakeCase(s string) string { |
| 111 | var b []byte |
| 112 | for i := 0; i < len(s); i++ { // proto identifiers are always ASCII |
| 113 | c := s[i] |
| 114 | if isASCIIUpper(c) { |
| 115 | b = append(b, '_') |
| 116 | c += 'a' - 'A' // convert to lowercase |
| 117 | } |
| 118 | b = append(b, c) |
| 119 | } |
| 120 | return string(b) |
| 121 | } |
| 122 | |
| 123 | // MapEntryName derives the name of the map entry message given the field name. |
| 124 | // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057 |
| 125 | func MapEntryName(s string) string { |
| 126 | var b []byte |
| 127 | upperNext := true |
| 128 | for _, c := range s { |
| 129 | switch { |
| 130 | case c == '_': |
| 131 | upperNext = true |
| 132 | case upperNext: |
| 133 | b = append(b, byte(unicode.ToUpper(c))) |
| 134 | upperNext = false |
| 135 | default: |
| 136 | b = append(b, byte(c)) |
| 137 | } |
| 138 | } |
| 139 | b = append(b, "Entry"...) |
| 140 | return string(b) |
| 141 | } |
| 142 | |
| 143 | // EnumValueName derives the camel-cased enum value name. |
| 144 | // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313 |
| 145 | func EnumValueName(s string) string { |
| 146 | var b []byte |
| 147 | upperNext := true |
| 148 | for _, c := range s { |
| 149 | switch { |
| 150 | case c == '_': |
| 151 | upperNext = true |
| 152 | case upperNext: |
| 153 | b = append(b, byte(unicode.ToUpper(c))) |
| 154 | upperNext = false |
| 155 | default: |
| 156 | b = append(b, byte(unicode.ToLower(c))) |
| 157 | upperNext = false |
| 158 | } |
| 159 | } |
| 160 | return string(b) |
| 161 | } |
| 162 | |
| 163 | // TrimEnumPrefix trims the enum name prefix from an enum value name, |
| 164 | // where the prefix is all lowercase without underscores. |
| 165 | // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375 |
| 166 | func TrimEnumPrefix(s, prefix string) string { |
| 167 | s0 := s // original input |
| 168 | for len(s) > 0 && len(prefix) > 0 { |
| 169 | if s[0] == '_' { |
| 170 | s = s[1:] |
| 171 | continue |
| 172 | } |
| 173 | if unicode.ToLower(rune(s[0])) != rune(prefix[0]) { |
| 174 | return s0 // no prefix match |
| 175 | } |
| 176 | s, prefix = s[1:], prefix[1:] |
| 177 | } |
| 178 | if len(prefix) > 0 { |
| 179 | return s0 // no prefix match |
| 180 | } |
| 181 | s = strings.TrimLeft(s, "_") |
| 182 | if len(s) == 0 { |
| 183 | return s0 // avoid returning empty string |
| 184 | } |
| 185 | return s |
| 186 | } |
| 187 | |
| 188 | func isASCIILower(c byte) bool { |
| 189 | return 'a' <= c && c <= 'z' |
| 190 | } |
| 191 | func isASCIIUpper(c byte) bool { |
| 192 | return 'A' <= c && c <= 'Z' |
| 193 | } |
| 194 | func isASCIIDigit(c byte) bool { |
| 195 | return '0' <= c && c <= '9' |
| 196 | } |