1// Copyright 2017, The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package diff 6 7import ( 8 "fmt" 9 "math/rand" 10 "strings" 11 "testing" 12 "unicode" 13) 14 15func TestDifference(t *testing.T) { 16 tests := []struct { 17 // Before passing x and y to Difference, we strip all spaces so that 18 // they can be used by the test author to indicate a missing symbol 19 // in one of the lists. 20 x, y string 21 want string // '|' separated list of possible outputs 22 }{{ 23 x: "", 24 y: "", 25 want: "", 26 }, { 27 x: "#", 28 y: "#", 29 want: ".", 30 }, { 31 x: "##", 32 y: "# ", 33 want: ".X|X.", 34 }, { 35 x: "a#", 36 y: "A ", 37 want: "MX", 38 }, { 39 x: "#a", 40 y: " A", 41 want: "XM", 42 }, { 43 x: "# ", 44 y: "##", 45 want: ".Y|Y.", 46 }, { 47 x: " #", 48 y: "@#", 49 want: "Y.", 50 }, { 51 x: "@#", 52 y: " #", 53 want: "X.", 54 }, { 55 x: "##########0123456789", 56 y: " 0123456789", 57 want: "XXXXXXXXXX..........", 58 }, { 59 x: " 0123456789", 60 y: "##########0123456789", 61 want: "YYYYYYYYYY..........", 62 }, { 63 x: "#####0123456789#####", 64 y: " 0123456789 ", 65 want: "XXXXX..........XXXXX", 66 }, { 67 x: " 0123456789 ", 68 y: "#####0123456789#####", 69 want: "YYYYY..........YYYYY", 70 }, { 71 x: "01234##########56789", 72 y: "01234 56789", 73 want: ".....XXXXXXXXXX.....", 74 }, { 75 x: "01234 56789", 76 y: "01234##########56789", 77 want: ".....YYYYYYYYYY.....", 78 }, { 79 x: "0123456789##########", 80 y: "0123456789 ", 81 want: "..........XXXXXXXXXX", 82 }, { 83 x: "0123456789 ", 84 y: "0123456789##########", 85 want: "..........YYYYYYYYYY", 86 }, { 87 x: "abcdefghij0123456789", 88 y: "ABCDEFGHIJ0123456789", 89 want: "MMMMMMMMMM..........", 90 }, { 91 x: "ABCDEFGHIJ0123456789", 92 y: "abcdefghij0123456789", 93 want: "MMMMMMMMMM..........", 94 }, { 95 x: "01234abcdefghij56789", 96 y: "01234ABCDEFGHIJ56789", 97 want: ".....MMMMMMMMMM.....", 98 }, { 99 x: "01234ABCDEFGHIJ56789", 100 y: "01234abcdefghij56789", 101 want: ".....MMMMMMMMMM.....", 102 }, { 103 x: "0123456789abcdefghij", 104 y: "0123456789ABCDEFGHIJ", 105 want: "..........MMMMMMMMMM", 106 }, { 107 x: "0123456789ABCDEFGHIJ", 108 y: "0123456789abcdefghij", 109 want: "..........MMMMMMMMMM", 110 }, { 111 x: "ABCDEFGHIJ0123456789 ", 112 y: " 0123456789abcdefghij", 113 want: "XXXXXXXXXX..........YYYYYYYYYY", 114 }, { 115 x: " 0123456789abcdefghij", 116 y: "ABCDEFGHIJ0123456789 ", 117 want: "YYYYYYYYYY..........XXXXXXXXXX", 118 }, { 119 x: "ABCDE0123456789 FGHIJ", 120 y: " 0123456789abcdefghij", 121 want: "XXXXX..........YYYYYMMMMM", 122 }, { 123 x: " 0123456789abcdefghij", 124 y: "ABCDE0123456789 FGHIJ", 125 want: "YYYYY..........XXXXXMMMMM", 126 }, { 127 x: "ABCDE01234F G H I J 56789 ", 128 y: " 01234 a b c d e56789fghij", 129 want: "XXXXX.....XYXYXYXYXY.....YYYYY", 130 }, { 131 x: " 01234a b c d e 56789fghij", 132 y: "ABCDE01234 F G H I J56789 ", 133 want: "YYYYY.....XYXYXYXYXY.....XXXXX", 134 }, { 135 x: "FGHIJ01234ABCDE56789 ", 136 y: " 01234abcde56789fghij", 137 want: "XXXXX.....MMMMM.....YYYYY", 138 }, { 139 x: " 01234abcde56789fghij", 140 y: "FGHIJ01234ABCDE56789 ", 141 want: "YYYYY.....MMMMM.....XXXXX", 142 }, { 143 x: "ABCAB BA ", 144 y: " C BABAC", 145 want: "XX.X.Y..Y|XX.Y.X..Y", 146 }, { 147 x: "# #### ###", 148 y: "#y####yy###", 149 want: ".Y....YY...", 150 }, { 151 x: "# #### # ##x#x", 152 y: "#y####y y## # ", 153 want: ".Y....YXY..X.X", 154 }, { 155 x: "###z#z###### x #", 156 y: "#y##Z#Z###### yy#", 157 want: ".Y..M.M......XYY.", 158 }, { 159 x: "0 12z3x 456789 x x 0", 160 y: "0y12Z3 y456789y y y0", 161 want: ".Y..M.XY......YXYXY.|.Y..M.XY......XYXYY.", 162 }, { 163 x: "0 2 4 6 8 ..................abXXcdEXF.ghXi", 164 y: " 1 3 5 7 9..................AB CDE F.GH I", 165 want: "XYXYXYXYXY..................MMXXMM.X..MMXM", 166 }, { 167 x: "I HG.F EDC BA..................9 7 5 3 1 ", 168 y: "iXhg.FXEdcXXba.................. 8 6 4 2 0", 169 want: "MYMM..Y.MMYYMM..................XYXYXYXYXY", 170 }, { 171 x: "x1234", 172 y: " 1234", 173 want: "X....", 174 }, { 175 x: "x123x4", 176 y: " 123 4", 177 want: "X...X.", 178 }, { 179 x: "x1234x56", 180 y: " 1234 ", 181 want: "X....XXX", 182 }, { 183 x: "x1234xxx56", 184 y: " 1234 56", 185 want: "X....XXX..", 186 }, { 187 x: ".1234...ab", 188 y: " 1234 AB", 189 want: "X....XXXMM", 190 }, { 191 x: "x1234xxab.", 192 y: " 1234 AB ", 193 want: "X....XXMMX", 194 }, { 195 x: " 0123456789", 196 y: "9012345678 ", 197 want: "Y.........X", 198 }, { 199 x: " 0123456789", 200 y: "8901234567 ", 201 want: "YY........XX", 202 }, { 203 x: " 0123456789", 204 y: "7890123456 ", 205 want: "YYY.......XXX", 206 }, { 207 x: " 0123456789", 208 y: "6789012345 ", 209 want: "YYYY......XXXX", 210 }, { 211 x: "0123456789 ", 212 y: " 5678901234", 213 want: "XXXXX.....YYYYY|YYYYY.....XXXXX", 214 }, { 215 x: "0123456789 ", 216 y: " 4567890123", 217 want: "XXXX......YYYY", 218 }, { 219 x: "0123456789 ", 220 y: " 3456789012", 221 want: "XXX.......YYY", 222 }, { 223 x: "0123456789 ", 224 y: " 2345678901", 225 want: "XX........YY", 226 }, { 227 x: "0123456789 ", 228 y: " 1234567890", 229 want: "X.........Y", 230 }, { 231 x: "0 1 2 3 45 6 7 8 9 ", 232 y: " 9 8 7 6 54 3 2 1 0", 233 want: "XYXYXYXYX.YXYXYXYXY", 234 }, { 235 x: "0 1 2345678 9 ", 236 y: " 6 72 5 819034", 237 want: "XYXY.XX.XX.Y.YYY", 238 }, { 239 x: "F B Q M O I G T L N72X90 E 4S P 651HKRJU DA 83CVZW", 240 y: " 5 W H XO10R9IV K ZLCTAJ8P3N SEQM4 7 2G6 UBD F ", 241 want: "XYXYXYXY.YYYY.YXYXY.YYYYYYY.XXXXXY.YY.XYXYY.XXXXXX.Y.XYXXXXXX", 242 }} 243 244 for _, tt := range tests { 245 t.Run("", func(t *testing.T) { 246 x := strings.Replace(tt.x, " ", "", -1) 247 y := strings.Replace(tt.y, " ", "", -1) 248 es := testStrings(t, x, y) 249 var want string 250 got := es.String() 251 for _, want = range strings.Split(tt.want, "|") { 252 if got == want { 253 return 254 } 255 } 256 t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, want) 257 }) 258 } 259} 260 261func TestDifferenceFuzz(t *testing.T) { 262 tests := []struct{ px, py, pm float32 }{ 263 {px: 0.0, py: 0.0, pm: 0.1}, 264 {px: 0.0, py: 0.1, pm: 0.0}, 265 {px: 0.1, py: 0.0, pm: 0.0}, 266 {px: 0.0, py: 0.1, pm: 0.1}, 267 {px: 0.1, py: 0.0, pm: 0.1}, 268 {px: 0.2, py: 0.2, pm: 0.2}, 269 {px: 0.3, py: 0.1, pm: 0.2}, 270 {px: 0.1, py: 0.3, pm: 0.2}, 271 {px: 0.2, py: 0.2, pm: 0.2}, 272 {px: 0.3, py: 0.3, pm: 0.3}, 273 {px: 0.1, py: 0.1, pm: 0.5}, 274 {px: 0.4, py: 0.1, pm: 0.5}, 275 {px: 0.3, py: 0.2, pm: 0.5}, 276 {px: 0.2, py: 0.3, pm: 0.5}, 277 {px: 0.1, py: 0.4, pm: 0.5}, 278 } 279 280 for i, tt := range tests { 281 t.Run(fmt.Sprintf("P%d", i), func(t *testing.T) { 282 // Sweep from 1B to 1KiB. 283 for n := 1; n <= 1024; n <<= 1 { 284 t.Run(fmt.Sprintf("N%d", n), func(t *testing.T) { 285 for j := 0; j < 10; j++ { 286 x, y := generateStrings(n, tt.px, tt.py, tt.pm, int64(j)) 287 testStrings(t, x, y) 288 } 289 }) 290 } 291 }) 292 } 293} 294 295func BenchmarkDifference(b *testing.B) { 296 for n := 1 << 10; n <= 1<<20; n <<= 2 { 297 b.Run(fmt.Sprintf("N%d", n), func(b *testing.B) { 298 x, y := generateStrings(n, 0.05, 0.05, 0.10, 0) 299 b.ReportAllocs() 300 b.SetBytes(int64(len(x) + len(y))) 301 for i := 0; i < b.N; i++ { 302 Difference(len(x), len(y), func(ix, iy int) Result { 303 return compareByte(x[ix], y[iy]) 304 }) 305 } 306 }) 307 } 308} 309 310func generateStrings(n int, px, py, pm float32, seed int64) (string, string) { 311 if px+py+pm > 1.0 { 312 panic("invalid probabilities") 313 } 314 py += px 315 pm += py 316 317 b := make([]byte, n) 318 r := rand.New(rand.NewSource(seed)) 319 r.Read(b) 320 321 var x, y []byte 322 for len(b) > 0 { 323 switch p := r.Float32(); { 324 case p < px: // UniqueX 325 x = append(x, b[0]) 326 case p < py: // UniqueY 327 y = append(y, b[0]) 328 case p < pm: // Modified 329 x = append(x, 'A'+(b[0]%26)) 330 y = append(y, 'a'+(b[0]%26)) 331 default: // Identity 332 x = append(x, b[0]) 333 y = append(y, b[0]) 334 } 335 b = b[1:] 336 } 337 return string(x), string(y) 338} 339 340func testStrings(t *testing.T, x, y string) EditScript { 341 es := Difference(len(x), len(y), func(ix, iy int) Result { 342 return compareByte(x[ix], y[iy]) 343 }) 344 if es.LenX() != len(x) { 345 t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x)) 346 } 347 if es.LenY() != len(y) { 348 t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y)) 349 } 350 if !validateScript(x, y, es) { 351 t.Errorf("invalid edit script: %v", es) 352 } 353 return es 354} 355 356func validateScript(x, y string, es EditScript) bool { 357 var bx, by []byte 358 for _, e := range es { 359 switch e { 360 case Identity: 361 if !compareByte(x[len(bx)], y[len(by)]).Equal() { 362 return false 363 } 364 bx = append(bx, x[len(bx)]) 365 by = append(by, y[len(by)]) 366 case UniqueX: 367 bx = append(bx, x[len(bx)]) 368 case UniqueY: 369 by = append(by, y[len(by)]) 370 case Modified: 371 if !compareByte(x[len(bx)], y[len(by)]).Similar() { 372 return false 373 } 374 bx = append(bx, x[len(bx)]) 375 by = append(by, y[len(by)]) 376 } 377 } 378 return string(bx) == x && string(by) == y 379} 380 381// compareByte returns a Result where the result is Equal if x == y, 382// similar if x and y differ only in casing, and different otherwise. 383func compareByte(x, y byte) (r Result) { 384 switch { 385 case x == y: 386 return equalResult // Identity 387 case unicode.ToUpper(rune(x)) == unicode.ToUpper(rune(y)): 388 return similarResult // Modified 389 default: 390 return differentResult // UniqueX or UniqueY 391 } 392} 393 394var ( 395 equalResult = Result{NumDiff: 0} 396 similarResult = Result{NumDiff: 1} 397 differentResult = Result{NumDiff: 2} 398) 399 400func TestResult(t *testing.T) { 401 tests := []struct { 402 result Result 403 wantEqual bool 404 wantSimilar bool 405 }{ 406 // equalResult is equal since NumDiff == 0, by definition of Equal method. 407 {equalResult, true, true}, 408 // similarResult is similar since it is a binary result where only one 409 // element was compared (i.e., Either NumSame==1 or NumDiff==1). 410 {similarResult, false, true}, 411 // differentResult is different since there are enough differences that 412 // it isn't even considered similar. 413 {differentResult, false, false}, 414 415 // Zero value is always equal. 416 {Result{NumSame: 0, NumDiff: 0}, true, true}, 417 418 // Binary comparisons (where NumSame+NumDiff == 1) are always similar. 419 {Result{NumSame: 1, NumDiff: 0}, true, true}, 420 {Result{NumSame: 0, NumDiff: 1}, false, true}, 421 422 // More complex ratios. The exact ratio for similarity may change, 423 // and may require updates to these test cases. 424 {Result{NumSame: 1, NumDiff: 1}, false, true}, 425 {Result{NumSame: 1, NumDiff: 2}, false, true}, 426 {Result{NumSame: 1, NumDiff: 3}, false, false}, 427 {Result{NumSame: 2, NumDiff: 1}, false, true}, 428 {Result{NumSame: 2, NumDiff: 2}, false, true}, 429 {Result{NumSame: 2, NumDiff: 3}, false, true}, 430 {Result{NumSame: 3, NumDiff: 1}, false, true}, 431 {Result{NumSame: 3, NumDiff: 2}, false, true}, 432 {Result{NumSame: 3, NumDiff: 3}, false, true}, 433 {Result{NumSame: 1000, NumDiff: 0}, true, true}, 434 {Result{NumSame: 1000, NumDiff: 1}, false, true}, 435 {Result{NumSame: 1000, NumDiff: 2}, false, true}, 436 {Result{NumSame: 0, NumDiff: 1000}, false, false}, 437 {Result{NumSame: 1, NumDiff: 1000}, false, false}, 438 {Result{NumSame: 2, NumDiff: 1000}, false, false}, 439 } 440 441 for _, tt := range tests { 442 if got := tt.result.Equal(); got != tt.wantEqual { 443 t.Errorf("%#v.Equal() = %v, want %v", tt.result, got, tt.wantEqual) 444 } 445 if got := tt.result.Similar(); got != tt.wantSimilar { 446 t.Errorf("%#v.Similar() = %v, want %v", tt.result, got, tt.wantSimilar) 447 } 448 } 449} 450