Pragya Arya | 324337e | 2020-02-20 14:35:08 +0530 | [diff] [blame] | 1 | // Copyright 2019, The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE.md file. |
| 4 | |
| 5 | package cmp |
| 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
| 10 | "reflect" |
| 11 | "strings" |
| 12 | "unicode" |
| 13 | "unicode/utf8" |
| 14 | |
| 15 | "github.com/google/go-cmp/cmp/internal/diff" |
| 16 | ) |
| 17 | |
| 18 | // CanFormatDiffSlice reports whether we support custom formatting for nodes |
| 19 | // that are slices of primitive kinds or strings. |
| 20 | func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { |
| 21 | switch { |
| 22 | case opts.DiffMode != diffUnknown: |
| 23 | return false // Must be formatting in diff mode |
| 24 | case v.NumDiff == 0: |
| 25 | return false // No differences detected |
| 26 | case v.NumIgnored+v.NumCompared+v.NumTransformed > 0: |
| 27 | // TODO: Handle the case where someone uses bytes.Equal on a large slice. |
| 28 | return false // Some custom option was used to determined equality |
| 29 | case !v.ValueX.IsValid() || !v.ValueY.IsValid(): |
| 30 | return false // Both values must be valid |
| 31 | } |
| 32 | |
| 33 | switch t := v.Type; t.Kind() { |
| 34 | case reflect.String: |
| 35 | case reflect.Array, reflect.Slice: |
| 36 | // Only slices of primitive types have specialized handling. |
| 37 | switch t.Elem().Kind() { |
| 38 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, |
| 39 | reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, |
| 40 | reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: |
| 41 | default: |
| 42 | return false |
| 43 | } |
| 44 | |
| 45 | // If a sufficient number of elements already differ, |
| 46 | // use specialized formatting even if length requirement is not met. |
| 47 | if v.NumDiff > v.NumSame { |
| 48 | return true |
| 49 | } |
| 50 | default: |
| 51 | return false |
| 52 | } |
| 53 | |
| 54 | // Use specialized string diffing for longer slices or strings. |
| 55 | const minLength = 64 |
| 56 | return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength |
| 57 | } |
| 58 | |
| 59 | // FormatDiffSlice prints a diff for the slices (or strings) represented by v. |
| 60 | // This provides custom-tailored logic to make printing of differences in |
| 61 | // textual strings and slices of primitive kinds more readable. |
| 62 | func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { |
| 63 | assert(opts.DiffMode == diffUnknown) |
| 64 | t, vx, vy := v.Type, v.ValueX, v.ValueY |
| 65 | |
| 66 | // Auto-detect the type of the data. |
| 67 | var isLinedText, isText, isBinary bool |
| 68 | var sx, sy string |
| 69 | switch { |
| 70 | case t.Kind() == reflect.String: |
| 71 | sx, sy = vx.String(), vy.String() |
| 72 | isText = true // Initial estimate, verify later |
| 73 | case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)): |
| 74 | sx, sy = string(vx.Bytes()), string(vy.Bytes()) |
| 75 | isBinary = true // Initial estimate, verify later |
| 76 | case t.Kind() == reflect.Array: |
| 77 | // Arrays need to be addressable for slice operations to work. |
| 78 | vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem() |
| 79 | vx2.Set(vx) |
| 80 | vy2.Set(vy) |
| 81 | vx, vy = vx2, vy2 |
| 82 | } |
| 83 | if isText || isBinary { |
| 84 | var numLines, lastLineIdx, maxLineLen int |
| 85 | isBinary = false |
| 86 | for i, r := range sx + sy { |
| 87 | if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError { |
| 88 | isBinary = true |
| 89 | break |
| 90 | } |
| 91 | if r == '\n' { |
| 92 | if maxLineLen < i-lastLineIdx { |
| 93 | maxLineLen = i - lastLineIdx |
| 94 | } |
| 95 | lastLineIdx = i + 1 |
| 96 | numLines++ |
| 97 | } |
| 98 | } |
| 99 | isText = !isBinary |
| 100 | isLinedText = isText && numLines >= 4 && maxLineLen <= 256 |
| 101 | } |
| 102 | |
| 103 | // Format the string into printable records. |
| 104 | var list textList |
| 105 | var delim string |
| 106 | switch { |
| 107 | // If the text appears to be multi-lined text, |
| 108 | // then perform differencing across individual lines. |
| 109 | case isLinedText: |
| 110 | ssx := strings.Split(sx, "\n") |
| 111 | ssy := strings.Split(sy, "\n") |
| 112 | list = opts.formatDiffSlice( |
| 113 | reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line", |
| 114 | func(v reflect.Value, d diffMode) textRecord { |
| 115 | s := formatString(v.Index(0).String()) |
| 116 | return textRecord{Diff: d, Value: textLine(s)} |
| 117 | }, |
| 118 | ) |
| 119 | delim = "\n" |
| 120 | // If the text appears to be single-lined text, |
| 121 | // then perform differencing in approximately fixed-sized chunks. |
| 122 | // The output is printed as quoted strings. |
| 123 | case isText: |
| 124 | list = opts.formatDiffSlice( |
| 125 | reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte", |
| 126 | func(v reflect.Value, d diffMode) textRecord { |
| 127 | s := formatString(v.String()) |
| 128 | return textRecord{Diff: d, Value: textLine(s)} |
| 129 | }, |
| 130 | ) |
| 131 | delim = "" |
| 132 | // If the text appears to be binary data, |
| 133 | // then perform differencing in approximately fixed-sized chunks. |
| 134 | // The output is inspired by hexdump. |
| 135 | case isBinary: |
| 136 | list = opts.formatDiffSlice( |
| 137 | reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte", |
| 138 | func(v reflect.Value, d diffMode) textRecord { |
| 139 | var ss []string |
| 140 | for i := 0; i < v.Len(); i++ { |
| 141 | ss = append(ss, formatHex(v.Index(i).Uint())) |
| 142 | } |
| 143 | s := strings.Join(ss, ", ") |
| 144 | comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String()))) |
| 145 | return textRecord{Diff: d, Value: textLine(s), Comment: comment} |
| 146 | }, |
| 147 | ) |
| 148 | // For all other slices of primitive types, |
| 149 | // then perform differencing in approximately fixed-sized chunks. |
| 150 | // The size of each chunk depends on the width of the element kind. |
| 151 | default: |
| 152 | var chunkSize int |
| 153 | if t.Elem().Kind() == reflect.Bool { |
| 154 | chunkSize = 16 |
| 155 | } else { |
| 156 | switch t.Elem().Bits() { |
| 157 | case 8: |
| 158 | chunkSize = 16 |
| 159 | case 16: |
| 160 | chunkSize = 12 |
| 161 | case 32: |
| 162 | chunkSize = 8 |
| 163 | default: |
| 164 | chunkSize = 8 |
| 165 | } |
| 166 | } |
| 167 | list = opts.formatDiffSlice( |
| 168 | vx, vy, chunkSize, t.Elem().Kind().String(), |
| 169 | func(v reflect.Value, d diffMode) textRecord { |
| 170 | var ss []string |
| 171 | for i := 0; i < v.Len(); i++ { |
| 172 | switch t.Elem().Kind() { |
| 173 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: |
| 174 | ss = append(ss, fmt.Sprint(v.Index(i).Int())) |
| 175 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: |
| 176 | ss = append(ss, formatHex(v.Index(i).Uint())) |
| 177 | case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: |
| 178 | ss = append(ss, fmt.Sprint(v.Index(i).Interface())) |
| 179 | } |
| 180 | } |
| 181 | s := strings.Join(ss, ", ") |
| 182 | return textRecord{Diff: d, Value: textLine(s)} |
| 183 | }, |
| 184 | ) |
| 185 | } |
| 186 | |
| 187 | // Wrap the output with appropriate type information. |
| 188 | var out textNode = textWrap{"{", list, "}"} |
| 189 | if !isText { |
| 190 | // The "{...}" byte-sequence literal is not valid Go syntax for strings. |
| 191 | // Emit the type for extra clarity (e.g. "string{...}"). |
| 192 | if t.Kind() == reflect.String { |
| 193 | opts = opts.WithTypeMode(emitType) |
| 194 | } |
| 195 | return opts.FormatType(t, out) |
| 196 | } |
| 197 | switch t.Kind() { |
| 198 | case reflect.String: |
| 199 | out = textWrap{"strings.Join(", out, fmt.Sprintf(", %q)", delim)} |
| 200 | if t != reflect.TypeOf(string("")) { |
| 201 | out = opts.FormatType(t, out) |
| 202 | } |
| 203 | case reflect.Slice: |
| 204 | out = textWrap{"bytes.Join(", out, fmt.Sprintf(", %q)", delim)} |
| 205 | if t != reflect.TypeOf([]byte(nil)) { |
| 206 | out = opts.FormatType(t, out) |
| 207 | } |
| 208 | } |
| 209 | return out |
| 210 | } |
| 211 | |
| 212 | // formatASCII formats s as an ASCII string. |
| 213 | // This is useful for printing binary strings in a semi-legible way. |
| 214 | func formatASCII(s string) string { |
| 215 | b := bytes.Repeat([]byte{'.'}, len(s)) |
| 216 | for i := 0; i < len(s); i++ { |
| 217 | if ' ' <= s[i] && s[i] <= '~' { |
| 218 | b[i] = s[i] |
| 219 | } |
| 220 | } |
| 221 | return string(b) |
| 222 | } |
| 223 | |
| 224 | func (opts formatOptions) formatDiffSlice( |
| 225 | vx, vy reflect.Value, chunkSize int, name string, |
| 226 | makeRec func(reflect.Value, diffMode) textRecord, |
| 227 | ) (list textList) { |
| 228 | es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result { |
| 229 | return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface()) |
| 230 | }) |
| 231 | |
| 232 | appendChunks := func(v reflect.Value, d diffMode) int { |
| 233 | n0 := v.Len() |
| 234 | for v.Len() > 0 { |
| 235 | n := chunkSize |
| 236 | if n > v.Len() { |
| 237 | n = v.Len() |
| 238 | } |
| 239 | list = append(list, makeRec(v.Slice(0, n), d)) |
| 240 | v = v.Slice(n, v.Len()) |
| 241 | } |
| 242 | return n0 - v.Len() |
| 243 | } |
| 244 | |
| 245 | groups := coalesceAdjacentEdits(name, es) |
| 246 | groups = coalesceInterveningIdentical(groups, chunkSize/4) |
| 247 | for i, ds := range groups { |
| 248 | // Print equal. |
| 249 | if ds.NumDiff() == 0 { |
| 250 | // Compute the number of leading and trailing equal bytes to print. |
| 251 | var numLo, numHi int |
| 252 | numEqual := ds.NumIgnored + ds.NumIdentical |
| 253 | for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 { |
| 254 | numLo++ |
| 255 | } |
| 256 | for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 { |
| 257 | numHi++ |
| 258 | } |
| 259 | if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 { |
| 260 | numHi = numEqual - numLo // Avoid pointless coalescing of single equal row |
| 261 | } |
| 262 | |
| 263 | // Print the equal bytes. |
| 264 | appendChunks(vx.Slice(0, numLo), diffIdentical) |
| 265 | if numEqual > numLo+numHi { |
| 266 | ds.NumIdentical -= numLo + numHi |
| 267 | list.AppendEllipsis(ds) |
| 268 | } |
| 269 | appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical) |
| 270 | vx = vx.Slice(numEqual, vx.Len()) |
| 271 | vy = vy.Slice(numEqual, vy.Len()) |
| 272 | continue |
| 273 | } |
| 274 | |
| 275 | // Print unequal. |
| 276 | nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved) |
| 277 | vx = vx.Slice(nx, vx.Len()) |
| 278 | ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted) |
| 279 | vy = vy.Slice(ny, vy.Len()) |
| 280 | } |
| 281 | assert(vx.Len() == 0 && vy.Len() == 0) |
| 282 | return list |
| 283 | } |
| 284 | |
| 285 | // coalesceAdjacentEdits coalesces the list of edits into groups of adjacent |
| 286 | // equal or unequal counts. |
| 287 | func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) { |
| 288 | var prevCase int // Arbitrary index into which case last occurred |
| 289 | lastStats := func(i int) *diffStats { |
| 290 | if prevCase != i { |
| 291 | groups = append(groups, diffStats{Name: name}) |
| 292 | prevCase = i |
| 293 | } |
| 294 | return &groups[len(groups)-1] |
| 295 | } |
| 296 | for _, e := range es { |
| 297 | switch e { |
| 298 | case diff.Identity: |
| 299 | lastStats(1).NumIdentical++ |
| 300 | case diff.UniqueX: |
| 301 | lastStats(2).NumRemoved++ |
| 302 | case diff.UniqueY: |
| 303 | lastStats(2).NumInserted++ |
| 304 | case diff.Modified: |
| 305 | lastStats(2).NumModified++ |
| 306 | } |
| 307 | } |
| 308 | return groups |
| 309 | } |
| 310 | |
| 311 | // coalesceInterveningIdentical coalesces sufficiently short (<= windowSize) |
| 312 | // equal groups into adjacent unequal groups that currently result in a |
| 313 | // dual inserted/removed printout. This acts as a high-pass filter to smooth |
| 314 | // out high-frequency changes within the windowSize. |
| 315 | func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats { |
| 316 | groups, groupsOrig := groups[:0], groups |
| 317 | for i, ds := range groupsOrig { |
| 318 | if len(groups) >= 2 && ds.NumDiff() > 0 { |
| 319 | prev := &groups[len(groups)-2] // Unequal group |
| 320 | curr := &groups[len(groups)-1] // Equal group |
| 321 | next := &groupsOrig[i] // Unequal group |
| 322 | hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0 |
| 323 | hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0 |
| 324 | if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize { |
| 325 | *prev = prev.Append(*curr).Append(*next) |
| 326 | groups = groups[:len(groups)-1] // Truncate off equal group |
| 327 | continue |
| 328 | } |
| 329 | } |
| 330 | groups = append(groups, ds) |
| 331 | } |
| 332 | return groups |
| 333 | } |