1// Copyright 2019, The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package cmp 6 7import ( 8 "bytes" 9 "fmt" 10 "math" 11 "reflect" 12 "strconv" 13 "strings" 14 "unicode" 15 "unicode/utf8" 16 17 "github.com/google/go-cmp/cmp/internal/diff" 18) 19 20// CanFormatDiffSlice reports whether we support custom formatting for nodes 21// that are slices of primitive kinds or strings. 22func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool { 23 switch { 24 case opts.DiffMode != diffUnknown: 25 return false // Must be formatting in diff mode 26 case v.NumDiff == 0: 27 return false // No differences detected 28 case !v.ValueX.IsValid() || !v.ValueY.IsValid(): 29 return false // Both values must be valid 30 case v.NumIgnored > 0: 31 return false // Some ignore option was used 32 case v.NumTransformed > 0: 33 return false // Some transform option was used 34 case v.NumCompared > 1: 35 return false // More than one comparison was used 36 case v.NumCompared == 1 && v.Type.Name() != "": 37 // The need for cmp to check applicability of options on every element 38 // in a slice is a significant performance detriment for large []byte. 39 // The workaround is to specify Comparer(bytes.Equal), 40 // which enables cmp to compare []byte more efficiently. 41 // If they differ, we still want to provide batched diffing. 42 // The logic disallows named types since they tend to have their own 43 // String method, with nicer formatting than what this provides. 44 return false 45 } 46 47 // Check whether this is an interface with the same concrete types. 48 t := v.Type 49 vx, vy := v.ValueX, v.ValueY 50 if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() { 51 vx, vy = vx.Elem(), vy.Elem() 52 t = vx.Type() 53 } 54 55 // Check whether we provide specialized diffing for this type. 56 switch t.Kind() { 57 case reflect.String: 58 case reflect.Array, reflect.Slice: 59 // Only slices of primitive types have specialized handling. 60 switch t.Elem().Kind() { 61 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, 62 reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, 63 reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 64 default: 65 return false 66 } 67 68 // Both slice values have to be non-empty. 69 if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) { 70 return false 71 } 72 73 // If a sufficient number of elements already differ, 74 // use specialized formatting even if length requirement is not met. 75 if v.NumDiff > v.NumSame { 76 return true 77 } 78 default: 79 return false 80 } 81 82 // Use specialized string diffing for longer slices or strings. 83 const minLength = 32 84 return vx.Len() >= minLength && vy.Len() >= minLength 85} 86 87// FormatDiffSlice prints a diff for the slices (or strings) represented by v. 88// This provides custom-tailored logic to make printing of differences in 89// textual strings and slices of primitive kinds more readable. 90func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode { 91 assert(opts.DiffMode == diffUnknown) 92 t, vx, vy := v.Type, v.ValueX, v.ValueY 93 if t.Kind() == reflect.Interface { 94 vx, vy = vx.Elem(), vy.Elem() 95 t = vx.Type() 96 opts = opts.WithTypeMode(emitType) 97 } 98 99 // Auto-detect the type of the data. 100 var sx, sy string 101 var ssx, ssy []string 102 var isString, isMostlyText, isPureLinedText, isBinary bool 103 switch { 104 case t.Kind() == reflect.String: 105 sx, sy = vx.String(), vy.String() 106 isString = true 107 case t.Kind() == reflect.Slice && t.Elem() == byteType: 108 sx, sy = string(vx.Bytes()), string(vy.Bytes()) 109 isString = true 110 case t.Kind() == reflect.Array: 111 // Arrays need to be addressable for slice operations to work. 112 vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem() 113 vx2.Set(vx) 114 vy2.Set(vy) 115 vx, vy = vx2, vy2 116 } 117 if isString { 118 var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int 119 for i, r := range sx + sy { 120 numTotalRunes++ 121 if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError { 122 numValidRunes++ 123 } 124 if r == '\n' { 125 if maxLineLen < i-lastLineIdx { 126 maxLineLen = i - lastLineIdx 127 } 128 lastLineIdx = i + 1 129 numLines++ 130 } 131 } 132 isPureText := numValidRunes == numTotalRunes 133 isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes)) 134 isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024 135 isBinary = !isMostlyText 136 137 // Avoid diffing by lines if it produces a significantly more complex 138 // edit script than diffing by bytes. 139 if isPureLinedText { 140 ssx = strings.Split(sx, "\n") 141 ssy = strings.Split(sy, "\n") 142 esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result { 143 return diff.BoolResult(ssx[ix] == ssy[iy]) 144 }) 145 esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result { 146 return diff.BoolResult(sx[ix] == sy[iy]) 147 }) 148 efficiencyLines := float64(esLines.Dist()) / float64(len(esLines)) 149 efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes)) 150 quotedLength := len(strconv.Quote(sx + sy)) 151 unquotedLength := len(sx) + len(sy) 152 escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength) 153 isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1 154 } 155 } 156 157 // Format the string into printable records. 158 var list textList 159 var delim string 160 switch { 161 // If the text appears to be multi-lined text, 162 // then perform differencing across individual lines. 163 case isPureLinedText: 164 list = opts.formatDiffSlice( 165 reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line", 166 func(v reflect.Value, d diffMode) textRecord { 167 s := formatString(v.Index(0).String()) 168 return textRecord{Diff: d, Value: textLine(s)} 169 }, 170 ) 171 delim = "\n" 172 173 // If possible, use a custom triple-quote (""") syntax for printing 174 // differences in a string literal. This format is more readable, 175 // but has edge-cases where differences are visually indistinguishable. 176 // This format is avoided under the following conditions: 177 // - A line starts with `"""` 178 // - A line starts with "..." 179 // - A line contains non-printable characters 180 // - Adjacent different lines differ only by whitespace 181 // 182 // For example: 183 // 184 // """ 185 // ... // 3 identical lines 186 // foo 187 // bar 188 // - baz 189 // + BAZ 190 // """ 191 isTripleQuoted := true 192 prevRemoveLines := map[string]bool{} 193 prevInsertLines := map[string]bool{} 194 var list2 textList 195 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) 196 for _, r := range list { 197 if !r.Value.Equal(textEllipsis) { 198 line, _ := strconv.Unquote(string(r.Value.(textLine))) 199 line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support 200 normLine := strings.Map(func(r rune) rune { 201 if unicode.IsSpace(r) { 202 return -1 // drop whitespace to avoid visually indistinguishable output 203 } 204 return r 205 }, line) 206 isPrintable := func(r rune) bool { 207 return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable 208 } 209 isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == "" 210 switch r.Diff { 211 case diffRemoved: 212 isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine] 213 prevRemoveLines[normLine] = true 214 case diffInserted: 215 isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine] 216 prevInsertLines[normLine] = true 217 } 218 if !isTripleQuoted { 219 break 220 } 221 r.Value = textLine(line) 222 r.ElideComma = true 223 } 224 if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group 225 prevRemoveLines = map[string]bool{} 226 prevInsertLines = map[string]bool{} 227 } 228 list2 = append(list2, r) 229 } 230 if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 { 231 list2 = list2[:len(list2)-1] // elide single empty line at the end 232 } 233 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true}) 234 if isTripleQuoted { 235 var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"} 236 switch t.Kind() { 237 case reflect.String: 238 if t != stringType { 239 out = opts.FormatType(t, out) 240 } 241 case reflect.Slice: 242 // Always emit type for slices since the triple-quote syntax 243 // looks like a string (not a slice). 244 opts = opts.WithTypeMode(emitType) 245 out = opts.FormatType(t, out) 246 } 247 return out 248 } 249 250 // If the text appears to be single-lined text, 251 // then perform differencing in approximately fixed-sized chunks. 252 // The output is printed as quoted strings. 253 case isMostlyText: 254 list = opts.formatDiffSlice( 255 reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte", 256 func(v reflect.Value, d diffMode) textRecord { 257 s := formatString(v.String()) 258 return textRecord{Diff: d, Value: textLine(s)} 259 }, 260 ) 261 262 // If the text appears to be binary data, 263 // then perform differencing in approximately fixed-sized chunks. 264 // The output is inspired by hexdump. 265 case isBinary: 266 list = opts.formatDiffSlice( 267 reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte", 268 func(v reflect.Value, d diffMode) textRecord { 269 var ss []string 270 for i := 0; i < v.Len(); i++ { 271 ss = append(ss, formatHex(v.Index(i).Uint())) 272 } 273 s := strings.Join(ss, ", ") 274 comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String()))) 275 return textRecord{Diff: d, Value: textLine(s), Comment: comment} 276 }, 277 ) 278 279 // For all other slices of primitive types, 280 // then perform differencing in approximately fixed-sized chunks. 281 // The size of each chunk depends on the width of the element kind. 282 default: 283 var chunkSize int 284 if t.Elem().Kind() == reflect.Bool { 285 chunkSize = 16 286 } else { 287 switch t.Elem().Bits() { 288 case 8: 289 chunkSize = 16 290 case 16: 291 chunkSize = 12 292 case 32: 293 chunkSize = 8 294 default: 295 chunkSize = 8 296 } 297 } 298 list = opts.formatDiffSlice( 299 vx, vy, chunkSize, t.Elem().Kind().String(), 300 func(v reflect.Value, d diffMode) textRecord { 301 var ss []string 302 for i := 0; i < v.Len(); i++ { 303 switch t.Elem().Kind() { 304 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 305 ss = append(ss, fmt.Sprint(v.Index(i).Int())) 306 case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: 307 ss = append(ss, fmt.Sprint(v.Index(i).Uint())) 308 case reflect.Uint8, reflect.Uintptr: 309 ss = append(ss, formatHex(v.Index(i).Uint())) 310 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: 311 ss = append(ss, fmt.Sprint(v.Index(i).Interface())) 312 } 313 } 314 s := strings.Join(ss, ", ") 315 return textRecord{Diff: d, Value: textLine(s)} 316 }, 317 ) 318 } 319 320 // Wrap the output with appropriate type information. 321 var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"} 322 if !isMostlyText { 323 // The "{...}" byte-sequence literal is not valid Go syntax for strings. 324 // Emit the type for extra clarity (e.g. "string{...}"). 325 if t.Kind() == reflect.String { 326 opts = opts.WithTypeMode(emitType) 327 } 328 return opts.FormatType(t, out) 329 } 330 switch t.Kind() { 331 case reflect.String: 332 out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} 333 if t != stringType { 334 out = opts.FormatType(t, out) 335 } 336 case reflect.Slice: 337 out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)} 338 if t != bytesType { 339 out = opts.FormatType(t, out) 340 } 341 } 342 return out 343} 344 345// formatASCII formats s as an ASCII string. 346// This is useful for printing binary strings in a semi-legible way. 347func formatASCII(s string) string { 348 b := bytes.Repeat([]byte{'.'}, len(s)) 349 for i := 0; i < len(s); i++ { 350 if ' ' <= s[i] && s[i] <= '~' { 351 b[i] = s[i] 352 } 353 } 354 return string(b) 355} 356 357func (opts formatOptions) formatDiffSlice( 358 vx, vy reflect.Value, chunkSize int, name string, 359 makeRec func(reflect.Value, diffMode) textRecord, 360) (list textList) { 361 eq := func(ix, iy int) bool { 362 return vx.Index(ix).Interface() == vy.Index(iy).Interface() 363 } 364 es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result { 365 return diff.BoolResult(eq(ix, iy)) 366 }) 367 368 appendChunks := func(v reflect.Value, d diffMode) int { 369 n0 := v.Len() 370 for v.Len() > 0 { 371 n := chunkSize 372 if n > v.Len() { 373 n = v.Len() 374 } 375 list = append(list, makeRec(v.Slice(0, n), d)) 376 v = v.Slice(n, v.Len()) 377 } 378 return n0 - v.Len() 379 } 380 381 var numDiffs int 382 maxLen := -1 383 if opts.LimitVerbosity { 384 maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc... 385 opts.VerbosityLevel-- 386 } 387 388 groups := coalesceAdjacentEdits(name, es) 389 groups = coalesceInterveningIdentical(groups, chunkSize/4) 390 groups = cleanupSurroundingIdentical(groups, eq) 391 maxGroup := diffStats{Name: name} 392 for i, ds := range groups { 393 if maxLen >= 0 && numDiffs >= maxLen { 394 maxGroup = maxGroup.Append(ds) 395 continue 396 } 397 398 // Print equal. 399 if ds.NumDiff() == 0 { 400 // Compute the number of leading and trailing equal bytes to print. 401 var numLo, numHi int 402 numEqual := ds.NumIgnored + ds.NumIdentical 403 for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 { 404 numLo++ 405 } 406 for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 { 407 numHi++ 408 } 409 if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 { 410 numHi = numEqual - numLo // Avoid pointless coalescing of single equal row 411 } 412 413 // Print the equal bytes. 414 appendChunks(vx.Slice(0, numLo), diffIdentical) 415 if numEqual > numLo+numHi { 416 ds.NumIdentical -= numLo + numHi 417 list.AppendEllipsis(ds) 418 } 419 appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical) 420 vx = vx.Slice(numEqual, vx.Len()) 421 vy = vy.Slice(numEqual, vy.Len()) 422 continue 423 } 424 425 // Print unequal. 426 len0 := len(list) 427 nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved) 428 vx = vx.Slice(nx, vx.Len()) 429 ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted) 430 vy = vy.Slice(ny, vy.Len()) 431 numDiffs += len(list) - len0 432 } 433 if maxGroup.IsZero() { 434 assert(vx.Len() == 0 && vy.Len() == 0) 435 } else { 436 list.AppendEllipsis(maxGroup) 437 } 438 return list 439} 440 441// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent 442// equal or unequal counts. 443// 444// Example: 445// 446// Input: "..XXY...Y" 447// Output: [ 448// {NumIdentical: 2}, 449// {NumRemoved: 2, NumInserted 1}, 450// {NumIdentical: 3}, 451// {NumInserted: 1}, 452// ] 453func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) { 454 var prevMode byte 455 lastStats := func(mode byte) *diffStats { 456 if prevMode != mode { 457 groups = append(groups, diffStats{Name: name}) 458 prevMode = mode 459 } 460 return &groups[len(groups)-1] 461 } 462 for _, e := range es { 463 switch e { 464 case diff.Identity: 465 lastStats('=').NumIdentical++ 466 case diff.UniqueX: 467 lastStats('!').NumRemoved++ 468 case diff.UniqueY: 469 lastStats('!').NumInserted++ 470 case diff.Modified: 471 lastStats('!').NumModified++ 472 } 473 } 474 return groups 475} 476 477// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize) 478// equal groups into adjacent unequal groups that currently result in a 479// dual inserted/removed printout. This acts as a high-pass filter to smooth 480// out high-frequency changes within the windowSize. 481// 482// Example: 483// 484// WindowSize: 16, 485// Input: [ 486// {NumIdentical: 61}, // group 0 487// {NumRemoved: 3, NumInserted: 1}, // group 1 488// {NumIdentical: 6}, // ├── coalesce 489// {NumInserted: 2}, // ├── coalesce 490// {NumIdentical: 1}, // ├── coalesce 491// {NumRemoved: 9}, // └── coalesce 492// {NumIdentical: 64}, // group 2 493// {NumRemoved: 3, NumInserted: 1}, // group 3 494// {NumIdentical: 6}, // ├── coalesce 495// {NumInserted: 2}, // ├── coalesce 496// {NumIdentical: 1}, // ├── coalesce 497// {NumRemoved: 7}, // ├── coalesce 498// {NumIdentical: 1}, // ├── coalesce 499// {NumRemoved: 2}, // └── coalesce 500// {NumIdentical: 63}, // group 4 501// ] 502// Output: [ 503// {NumIdentical: 61}, 504// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, 505// {NumIdentical: 64}, 506// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3}, 507// {NumIdentical: 63}, 508// ] 509func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats { 510 groups, groupsOrig := groups[:0], groups 511 for i, ds := range groupsOrig { 512 if len(groups) >= 2 && ds.NumDiff() > 0 { 513 prev := &groups[len(groups)-2] // Unequal group 514 curr := &groups[len(groups)-1] // Equal group 515 next := &groupsOrig[i] // Unequal group 516 hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0 517 hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0 518 if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize { 519 *prev = prev.Append(*curr).Append(*next) 520 groups = groups[:len(groups)-1] // Truncate off equal group 521 continue 522 } 523 } 524 groups = append(groups, ds) 525 } 526 return groups 527} 528 529// cleanupSurroundingIdentical scans through all unequal groups, and 530// moves any leading sequence of equal elements to the preceding equal group and 531// moves and trailing sequence of equal elements to the succeeding equal group. 532// 533// This is necessary since coalesceInterveningIdentical may coalesce edit groups 534// together such that leading/trailing spans of equal elements becomes possible. 535// Note that this can occur even with an optimal diffing algorithm. 536// 537// Example: 538// 539// Input: [ 540// {NumIdentical: 61}, 541// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements 542// {NumIdentical: 67}, 543// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements 544// {NumIdentical: 54}, 545// ] 546// Output: [ 547// {NumIdentical: 64}, // incremented by 3 548// {NumRemoved: 9}, 549// {NumIdentical: 67}, 550// {NumRemoved: 9}, 551// {NumIdentical: 64}, // incremented by 10 552// ] 553func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats { 554 var ix, iy int // indexes into sequence x and y 555 for i, ds := range groups { 556 // Handle equal group. 557 if ds.NumDiff() == 0 { 558 ix += ds.NumIdentical 559 iy += ds.NumIdentical 560 continue 561 } 562 563 // Handle unequal group. 564 nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified 565 ny := ds.NumIdentical + ds.NumInserted + ds.NumModified 566 var numLeadingIdentical, numTrailingIdentical int 567 for j := 0; j < nx && j < ny && eq(ix+j, iy+j); j++ { 568 numLeadingIdentical++ 569 } 570 for j := 0; j < nx && j < ny && eq(ix+nx-1-j, iy+ny-1-j); j++ { 571 numTrailingIdentical++ 572 } 573 if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 { 574 if numLeadingIdentical > 0 { 575 // Remove leading identical span from this group and 576 // insert it into the preceding group. 577 if i-1 >= 0 { 578 groups[i-1].NumIdentical += numLeadingIdentical 579 } else { 580 // No preceding group exists, so prepend a new group, 581 // but do so after we finish iterating over all groups. 582 defer func() { 583 groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...) 584 }() 585 } 586 // Increment indexes since the preceding group would have handled this. 587 ix += numLeadingIdentical 588 iy += numLeadingIdentical 589 } 590 if numTrailingIdentical > 0 { 591 // Remove trailing identical span from this group and 592 // insert it into the succeeding group. 593 if i+1 < len(groups) { 594 groups[i+1].NumIdentical += numTrailingIdentical 595 } else { 596 // No succeeding group exists, so append a new group, 597 // but do so after we finish iterating over all groups. 598 defer func() { 599 groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical}) 600 }() 601 } 602 // Do not increment indexes since the succeeding group will handle this. 603 } 604 605 // Update this group since some identical elements were removed. 606 nx -= numIdentical 607 ny -= numIdentical 608 groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny} 609 } 610 ix += nx 611 iy += ny 612 } 613 return groups 614} 615