blob: 168f92f3c1221fce6a6397fa5fa7ca67e9a87fd1 [file] [log] [blame]
Pragya Arya324337e2020-02-20 14:35:08 +05301// Copyright 2019, The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
David K. Bainbridgec415efe2021-08-19 13:05:21 +00003// license that can be found in the LICENSE file.
Pragya Arya324337e2020-02-20 14:35:08 +05304
5package cmp
6
7import (
8 "bytes"
9 "fmt"
10 "reflect"
David K. Bainbridgec415efe2021-08-19 13:05:21 +000011 "strconv"
Pragya Arya324337e2020-02-20 14:35:08 +053012 "strings"
13 "unicode"
14 "unicode/utf8"
15
16 "github.com/google/go-cmp/cmp/internal/diff"
17)
18
19// CanFormatDiffSlice reports whether we support custom formatting for nodes
20// that are slices of primitive kinds or strings.
21func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
22 switch {
23 case opts.DiffMode != diffUnknown:
24 return false // Must be formatting in diff mode
25 case v.NumDiff == 0:
26 return false // No differences detected
Pragya Arya324337e2020-02-20 14:35:08 +053027 case !v.ValueX.IsValid() || !v.ValueY.IsValid():
28 return false // Both values must be valid
David K. Bainbridgec415efe2021-08-19 13:05:21 +000029 case v.NumIgnored > 0:
30 return false // Some ignore option was used
31 case v.NumTransformed > 0:
32 return false // Some transform option was used
33 case v.NumCompared > 1:
34 return false // More than one comparison was used
35 case v.NumCompared == 1 && v.Type.Name() != "":
36 // The need for cmp to check applicability of options on every element
37 // in a slice is a significant performance detriment for large []byte.
38 // The workaround is to specify Comparer(bytes.Equal),
39 // which enables cmp to compare []byte more efficiently.
40 // If they differ, we still want to provide batched diffing.
41 // The logic disallows named types since they tend to have their own
42 // String method, with nicer formatting than what this provides.
43 return false
Pragya Arya324337e2020-02-20 14:35:08 +053044 }
45
David K. Bainbridgec415efe2021-08-19 13:05:21 +000046 // Check whether this is an interface with the same concrete types.
47 t := v.Type
48 vx, vy := v.ValueX, v.ValueY
49 if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() {
50 vx, vy = vx.Elem(), vy.Elem()
51 t = vx.Type()
52 }
53
54 // Check whether we provide specialized diffing for this type.
55 switch t.Kind() {
Pragya Arya324337e2020-02-20 14:35:08 +053056 case reflect.String:
57 case reflect.Array, reflect.Slice:
58 // Only slices of primitive types have specialized handling.
59 switch t.Elem().Kind() {
60 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
61 reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
62 reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
63 default:
64 return false
65 }
66
David K. Bainbridgec415efe2021-08-19 13:05:21 +000067 // Both slice values have to be non-empty.
68 if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) {
69 return false
70 }
71
Pragya Arya324337e2020-02-20 14:35:08 +053072 // If a sufficient number of elements already differ,
73 // use specialized formatting even if length requirement is not met.
74 if v.NumDiff > v.NumSame {
75 return true
76 }
77 default:
78 return false
79 }
80
81 // Use specialized string diffing for longer slices or strings.
82 const minLength = 64
David K. Bainbridgec415efe2021-08-19 13:05:21 +000083 return vx.Len() >= minLength && vy.Len() >= minLength
Pragya Arya324337e2020-02-20 14:35:08 +053084}
85
86// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
87// This provides custom-tailored logic to make printing of differences in
88// textual strings and slices of primitive kinds more readable.
89func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
90 assert(opts.DiffMode == diffUnknown)
91 t, vx, vy := v.Type, v.ValueX, v.ValueY
David K. Bainbridgec415efe2021-08-19 13:05:21 +000092 if t.Kind() == reflect.Interface {
93 vx, vy = vx.Elem(), vy.Elem()
94 t = vx.Type()
95 opts = opts.WithTypeMode(emitType)
96 }
Pragya Arya324337e2020-02-20 14:35:08 +053097
98 // Auto-detect the type of the data.
99 var isLinedText, isText, isBinary bool
100 var sx, sy string
101 switch {
102 case t.Kind() == reflect.String:
103 sx, sy = vx.String(), vy.String()
104 isText = true // Initial estimate, verify later
105 case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
106 sx, sy = string(vx.Bytes()), string(vy.Bytes())
107 isBinary = true // Initial estimate, verify later
108 case t.Kind() == reflect.Array:
109 // Arrays need to be addressable for slice operations to work.
110 vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
111 vx2.Set(vx)
112 vy2.Set(vy)
113 vx, vy = vx2, vy2
114 }
115 if isText || isBinary {
116 var numLines, lastLineIdx, maxLineLen int
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000117 isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy)
Pragya Arya324337e2020-02-20 14:35:08 +0530118 for i, r := range sx + sy {
119 if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
120 isBinary = true
121 break
122 }
123 if r == '\n' {
124 if maxLineLen < i-lastLineIdx {
125 maxLineLen = i - lastLineIdx
126 }
127 lastLineIdx = i + 1
128 numLines++
129 }
130 }
131 isText = !isBinary
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000132 isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
Pragya Arya324337e2020-02-20 14:35:08 +0530133 }
134
135 // Format the string into printable records.
136 var list textList
137 var delim string
138 switch {
139 // If the text appears to be multi-lined text,
140 // then perform differencing across individual lines.
141 case isLinedText:
142 ssx := strings.Split(sx, "\n")
143 ssy := strings.Split(sy, "\n")
144 list = opts.formatDiffSlice(
145 reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
146 func(v reflect.Value, d diffMode) textRecord {
147 s := formatString(v.Index(0).String())
148 return textRecord{Diff: d, Value: textLine(s)}
149 },
150 )
151 delim = "\n"
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000152
153 // If possible, use a custom triple-quote (""") syntax for printing
154 // differences in a string literal. This format is more readable,
155 // but has edge-cases where differences are visually indistinguishable.
156 // This format is avoided under the following conditions:
157 // • A line starts with `"""`
158 // • A line starts with "..."
159 // • A line contains non-printable characters
160 // • Adjacent different lines differ only by whitespace
161 //
162 // For example:
163 // """
164 // ... // 3 identical lines
165 // foo
166 // bar
167 // - baz
168 // + BAZ
169 // """
170 isTripleQuoted := true
171 prevRemoveLines := map[string]bool{}
172 prevInsertLines := map[string]bool{}
173 var list2 textList
174 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
175 for _, r := range list {
176 if !r.Value.Equal(textEllipsis) {
177 line, _ := strconv.Unquote(string(r.Value.(textLine)))
178 line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
179 normLine := strings.Map(func(r rune) rune {
180 if unicode.IsSpace(r) {
181 return -1 // drop whitespace to avoid visually indistinguishable output
182 }
183 return r
184 }, line)
185 isPrintable := func(r rune) bool {
186 return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
187 }
188 isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
189 switch r.Diff {
190 case diffRemoved:
191 isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
192 prevRemoveLines[normLine] = true
193 case diffInserted:
194 isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
195 prevInsertLines[normLine] = true
196 }
197 if !isTripleQuoted {
198 break
199 }
200 r.Value = textLine(line)
201 r.ElideComma = true
202 }
203 if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
204 prevRemoveLines = map[string]bool{}
205 prevInsertLines = map[string]bool{}
206 }
207 list2 = append(list2, r)
208 }
209 if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
210 list2 = list2[:len(list2)-1] // elide single empty line at the end
211 }
212 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
213 if isTripleQuoted {
214 var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
215 switch t.Kind() {
216 case reflect.String:
217 if t != reflect.TypeOf(string("")) {
218 out = opts.FormatType(t, out)
219 }
220 case reflect.Slice:
221 // Always emit type for slices since the triple-quote syntax
222 // looks like a string (not a slice).
223 opts = opts.WithTypeMode(emitType)
224 out = opts.FormatType(t, out)
225 }
226 return out
227 }
228
Pragya Arya324337e2020-02-20 14:35:08 +0530229 // If the text appears to be single-lined text,
230 // then perform differencing in approximately fixed-sized chunks.
231 // The output is printed as quoted strings.
232 case isText:
233 list = opts.formatDiffSlice(
234 reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
235 func(v reflect.Value, d diffMode) textRecord {
236 s := formatString(v.String())
237 return textRecord{Diff: d, Value: textLine(s)}
238 },
239 )
240 delim = ""
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000241
Pragya Arya324337e2020-02-20 14:35:08 +0530242 // If the text appears to be binary data,
243 // then perform differencing in approximately fixed-sized chunks.
244 // The output is inspired by hexdump.
245 case isBinary:
246 list = opts.formatDiffSlice(
247 reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
248 func(v reflect.Value, d diffMode) textRecord {
249 var ss []string
250 for i := 0; i < v.Len(); i++ {
251 ss = append(ss, formatHex(v.Index(i).Uint()))
252 }
253 s := strings.Join(ss, ", ")
254 comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
255 return textRecord{Diff: d, Value: textLine(s), Comment: comment}
256 },
257 )
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000258
Pragya Arya324337e2020-02-20 14:35:08 +0530259 // For all other slices of primitive types,
260 // then perform differencing in approximately fixed-sized chunks.
261 // The size of each chunk depends on the width of the element kind.
262 default:
263 var chunkSize int
264 if t.Elem().Kind() == reflect.Bool {
265 chunkSize = 16
266 } else {
267 switch t.Elem().Bits() {
268 case 8:
269 chunkSize = 16
270 case 16:
271 chunkSize = 12
272 case 32:
273 chunkSize = 8
274 default:
275 chunkSize = 8
276 }
277 }
278 list = opts.formatDiffSlice(
279 vx, vy, chunkSize, t.Elem().Kind().String(),
280 func(v reflect.Value, d diffMode) textRecord {
281 var ss []string
282 for i := 0; i < v.Len(); i++ {
283 switch t.Elem().Kind() {
284 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
285 ss = append(ss, fmt.Sprint(v.Index(i).Int()))
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000286 case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
287 ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
288 case reflect.Uint8, reflect.Uintptr:
Pragya Arya324337e2020-02-20 14:35:08 +0530289 ss = append(ss, formatHex(v.Index(i).Uint()))
290 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
291 ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
292 }
293 }
294 s := strings.Join(ss, ", ")
295 return textRecord{Diff: d, Value: textLine(s)}
296 },
297 )
298 }
299
300 // Wrap the output with appropriate type information.
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000301 var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
Pragya Arya324337e2020-02-20 14:35:08 +0530302 if !isText {
303 // The "{...}" byte-sequence literal is not valid Go syntax for strings.
304 // Emit the type for extra clarity (e.g. "string{...}").
305 if t.Kind() == reflect.String {
306 opts = opts.WithTypeMode(emitType)
307 }
308 return opts.FormatType(t, out)
309 }
310 switch t.Kind() {
311 case reflect.String:
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000312 out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
Pragya Arya324337e2020-02-20 14:35:08 +0530313 if t != reflect.TypeOf(string("")) {
314 out = opts.FormatType(t, out)
315 }
316 case reflect.Slice:
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000317 out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
Pragya Arya324337e2020-02-20 14:35:08 +0530318 if t != reflect.TypeOf([]byte(nil)) {
319 out = opts.FormatType(t, out)
320 }
321 }
322 return out
323}
324
325// formatASCII formats s as an ASCII string.
326// This is useful for printing binary strings in a semi-legible way.
327func formatASCII(s string) string {
328 b := bytes.Repeat([]byte{'.'}, len(s))
329 for i := 0; i < len(s); i++ {
330 if ' ' <= s[i] && s[i] <= '~' {
331 b[i] = s[i]
332 }
333 }
334 return string(b)
335}
336
337func (opts formatOptions) formatDiffSlice(
338 vx, vy reflect.Value, chunkSize int, name string,
339 makeRec func(reflect.Value, diffMode) textRecord,
340) (list textList) {
341 es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
342 return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
343 })
344
345 appendChunks := func(v reflect.Value, d diffMode) int {
346 n0 := v.Len()
347 for v.Len() > 0 {
348 n := chunkSize
349 if n > v.Len() {
350 n = v.Len()
351 }
352 list = append(list, makeRec(v.Slice(0, n), d))
353 v = v.Slice(n, v.Len())
354 }
355 return n0 - v.Len()
356 }
357
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000358 var numDiffs int
359 maxLen := -1
360 if opts.LimitVerbosity {
361 maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
362 opts.VerbosityLevel--
363 }
364
Pragya Arya324337e2020-02-20 14:35:08 +0530365 groups := coalesceAdjacentEdits(name, es)
366 groups = coalesceInterveningIdentical(groups, chunkSize/4)
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000367 maxGroup := diffStats{Name: name}
Pragya Arya324337e2020-02-20 14:35:08 +0530368 for i, ds := range groups {
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000369 if maxLen >= 0 && numDiffs >= maxLen {
370 maxGroup = maxGroup.Append(ds)
371 continue
372 }
373
Pragya Arya324337e2020-02-20 14:35:08 +0530374 // Print equal.
375 if ds.NumDiff() == 0 {
376 // Compute the number of leading and trailing equal bytes to print.
377 var numLo, numHi int
378 numEqual := ds.NumIgnored + ds.NumIdentical
379 for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
380 numLo++
381 }
382 for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
383 numHi++
384 }
385 if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
386 numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
387 }
388
389 // Print the equal bytes.
390 appendChunks(vx.Slice(0, numLo), diffIdentical)
391 if numEqual > numLo+numHi {
392 ds.NumIdentical -= numLo + numHi
393 list.AppendEllipsis(ds)
394 }
395 appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
396 vx = vx.Slice(numEqual, vx.Len())
397 vy = vy.Slice(numEqual, vy.Len())
398 continue
399 }
400
401 // Print unequal.
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000402 len0 := len(list)
Pragya Arya324337e2020-02-20 14:35:08 +0530403 nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
404 vx = vx.Slice(nx, vx.Len())
405 ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
406 vy = vy.Slice(ny, vy.Len())
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000407 numDiffs += len(list) - len0
Pragya Arya324337e2020-02-20 14:35:08 +0530408 }
David K. Bainbridgec415efe2021-08-19 13:05:21 +0000409 if maxGroup.IsZero() {
410 assert(vx.Len() == 0 && vy.Len() == 0)
411 } else {
412 list.AppendEllipsis(maxGroup)
413 }
Pragya Arya324337e2020-02-20 14:35:08 +0530414 return list
415}
416
417// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
418// equal or unequal counts.
419func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
420 var prevCase int // Arbitrary index into which case last occurred
421 lastStats := func(i int) *diffStats {
422 if prevCase != i {
423 groups = append(groups, diffStats{Name: name})
424 prevCase = i
425 }
426 return &groups[len(groups)-1]
427 }
428 for _, e := range es {
429 switch e {
430 case diff.Identity:
431 lastStats(1).NumIdentical++
432 case diff.UniqueX:
433 lastStats(2).NumRemoved++
434 case diff.UniqueY:
435 lastStats(2).NumInserted++
436 case diff.Modified:
437 lastStats(2).NumModified++
438 }
439 }
440 return groups
441}
442
443// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
444// equal groups into adjacent unequal groups that currently result in a
445// dual inserted/removed printout. This acts as a high-pass filter to smooth
446// out high-frequency changes within the windowSize.
447func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
448 groups, groupsOrig := groups[:0], groups
449 for i, ds := range groupsOrig {
450 if len(groups) >= 2 && ds.NumDiff() > 0 {
451 prev := &groups[len(groups)-2] // Unequal group
452 curr := &groups[len(groups)-1] // Equal group
453 next := &groupsOrig[i] // Unequal group
454 hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
455 hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
456 if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
457 *prev = prev.Append(*curr).Append(*next)
458 groups = groups[:len(groups)-1] // Truncate off equal group
459 continue
460 }
461 }
462 groups = append(groups, ds)
463 }
464 return groups
465}