1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package regexp 6 7import ( 8 "reflect" 9 "regexp/syntax" 10 "slices" 11 "strings" 12 "testing" 13 "unicode/utf8" 14) 15 16var goodRe = []string{ 17 ``, 18 `.`, 19 `^.$`, 20 `a`, 21 `a*`, 22 `a+`, 23 `a?`, 24 `a|b`, 25 `a*|b*`, 26 `(a*|b)(c*|d)`, 27 `[a-z]`, 28 `[a-abc-c\-\]\[]`, 29 `[a-z]+`, 30 `[abc]`, 31 `[^1234]`, 32 `[^\n]`, 33 `\!\\`, 34} 35 36type stringError struct { 37 re string 38 err string 39} 40 41var badRe = []stringError{ 42 {`*`, "missing argument to repetition operator: `*`"}, 43 {`+`, "missing argument to repetition operator: `+`"}, 44 {`?`, "missing argument to repetition operator: `?`"}, 45 {`(abc`, "missing closing ): `(abc`"}, 46 {`abc)`, "unexpected ): `abc)`"}, 47 {`x[a-z`, "missing closing ]: `[a-z`"}, 48 {`[z-a]`, "invalid character class range: `z-a`"}, 49 {`abc\`, "trailing backslash at end of expression"}, 50 {`a**`, "invalid nested repetition operator: `**`"}, 51 {`a*+`, "invalid nested repetition operator: `*+`"}, 52 {`\x`, "invalid escape sequence: `\\x`"}, 53 {strings.Repeat(`\pL`, 27000), "expression too large"}, 54} 55 56func compileTest(t *testing.T, expr string, error string) *Regexp { 57 re, err := Compile(expr) 58 if error == "" && err != nil { 59 t.Error("compiling `", expr, "`; unexpected error: ", err.Error()) 60 } 61 if error != "" && err == nil { 62 t.Error("compiling `", expr, "`; missing error") 63 } else if error != "" && !strings.Contains(err.Error(), error) { 64 t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error) 65 } 66 return re 67} 68 69func TestGoodCompile(t *testing.T) { 70 for i := 0; i < len(goodRe); i++ { 71 compileTest(t, goodRe[i], "") 72 } 73} 74 75func TestBadCompile(t *testing.T) { 76 for i := 0; i < len(badRe); i++ { 77 compileTest(t, badRe[i].re, badRe[i].err) 78 } 79} 80 81func matchTest(t *testing.T, test *FindTest) { 82 re := compileTest(t, test.pat, "") 83 if re == nil { 84 return 85 } 86 m := re.MatchString(test.text) 87 if m != (len(test.matches) > 0) { 88 t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0) 89 } 90 // now try bytes 91 m = re.Match([]byte(test.text)) 92 if m != (len(test.matches) > 0) { 93 t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) 94 } 95} 96 97func TestMatch(t *testing.T) { 98 for _, test := range findTests { 99 matchTest(t, &test) 100 } 101} 102 103func matchFunctionTest(t *testing.T, test *FindTest) { 104 m, err := MatchString(test.pat, test.text) 105 if err == nil { 106 return 107 } 108 if m != (len(test.matches) > 0) { 109 t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) 110 } 111} 112 113func TestMatchFunction(t *testing.T) { 114 for _, test := range findTests { 115 matchFunctionTest(t, &test) 116 } 117} 118 119func copyMatchTest(t *testing.T, test *FindTest) { 120 re := compileTest(t, test.pat, "") 121 if re == nil { 122 return 123 } 124 m1 := re.MatchString(test.text) 125 m2 := re.Copy().MatchString(test.text) 126 if m1 != m2 { 127 t.Errorf("Copied Regexp match failure on %s: original gave %t; copy gave %t; should be %t", 128 test, m1, m2, len(test.matches) > 0) 129 } 130} 131 132func TestCopyMatch(t *testing.T) { 133 for _, test := range findTests { 134 copyMatchTest(t, &test) 135 } 136} 137 138type ReplaceTest struct { 139 pattern, replacement, input, output string 140} 141 142var replaceTests = []ReplaceTest{ 143 // Test empty input and/or replacement, with pattern that matches the empty string. 144 {"", "", "", ""}, 145 {"", "x", "", "x"}, 146 {"", "", "abc", "abc"}, 147 {"", "x", "abc", "xaxbxcx"}, 148 149 // Test empty input and/or replacement, with pattern that does not match the empty string. 150 {"b", "", "", ""}, 151 {"b", "x", "", ""}, 152 {"b", "", "abc", "ac"}, 153 {"b", "x", "abc", "axc"}, 154 {"y", "", "", ""}, 155 {"y", "x", "", ""}, 156 {"y", "", "abc", "abc"}, 157 {"y", "x", "abc", "abc"}, 158 159 // Multibyte characters -- verify that we don't try to match in the middle 160 // of a character. 161 {"[a-c]*", "x", "\u65e5", "x\u65e5x"}, 162 {"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"}, 163 164 // Start and end of a string. 165 {"^[a-c]*", "x", "abcdabc", "xdabc"}, 166 {"[a-c]*$", "x", "abcdabc", "abcdx"}, 167 {"^[a-c]*$", "x", "abcdabc", "abcdabc"}, 168 {"^[a-c]*", "x", "abc", "x"}, 169 {"[a-c]*$", "x", "abc", "x"}, 170 {"^[a-c]*$", "x", "abc", "x"}, 171 {"^[a-c]*", "x", "dabce", "xdabce"}, 172 {"[a-c]*$", "x", "dabce", "dabcex"}, 173 {"^[a-c]*$", "x", "dabce", "dabce"}, 174 {"^[a-c]*", "x", "", "x"}, 175 {"[a-c]*$", "x", "", "x"}, 176 {"^[a-c]*$", "x", "", "x"}, 177 178 {"^[a-c]+", "x", "abcdabc", "xdabc"}, 179 {"[a-c]+$", "x", "abcdabc", "abcdx"}, 180 {"^[a-c]+$", "x", "abcdabc", "abcdabc"}, 181 {"^[a-c]+", "x", "abc", "x"}, 182 {"[a-c]+$", "x", "abc", "x"}, 183 {"^[a-c]+$", "x", "abc", "x"}, 184 {"^[a-c]+", "x", "dabce", "dabce"}, 185 {"[a-c]+$", "x", "dabce", "dabce"}, 186 {"^[a-c]+$", "x", "dabce", "dabce"}, 187 {"^[a-c]+", "x", "", ""}, 188 {"[a-c]+$", "x", "", ""}, 189 {"^[a-c]+$", "x", "", ""}, 190 191 // Other cases. 192 {"abc", "def", "abcdefg", "defdefg"}, 193 {"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"}, 194 {"abc", "", "abcdabc", "d"}, 195 {"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"}, 196 {"abc", "d", "", ""}, 197 {"abc", "d", "abc", "d"}, 198 {".+", "x", "abc", "x"}, 199 {"[a-c]*", "x", "def", "xdxexfx"}, 200 {"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"}, 201 {"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"}, 202 203 // Substitutions 204 {"a+", "($0)", "banana", "b(a)n(a)n(a)"}, 205 {"a+", "(${0})", "banana", "b(a)n(a)n(a)"}, 206 {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"}, 207 {"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"}, 208 {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"}, 209 {"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "}, 210 {"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"}, 211 {"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"}, 212 {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"}, 213 {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"}, 214 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"}, 215 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"}, 216 {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""}, 217 {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"}, 218 {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"}, 219 {"a+", "${oops", "aaa", "${oops"}, 220 {"a+", "$$", "aaa", "$"}, 221 {"a+", "$", "aaa", "$"}, 222 223 // Substitution when subexpression isn't found 224 {"(x)?", "$1", "123", "123"}, 225 {"abc", "$1", "123", "123"}, 226 227 // Substitutions involving a (x){0} 228 {"(a)(b){0}(c)", ".$1|$3.", "xacxacx", "x.a|c.x.a|c.x"}, 229 {"(a)(((b))){0}c", ".$1.", "xacxacx", "x.a.x.a.x"}, 230 {"((a(b){0}){3}){5}(h)", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"}, 231 {"((a(b){0}){3}){5}h", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"}, 232} 233 234var replaceLiteralTests = []ReplaceTest{ 235 // Substitutions 236 {"a+", "($0)", "banana", "b($0)n($0)n($0)"}, 237 {"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"}, 238 {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"}, 239 {"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"}, 240 {"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"}, 241 {"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"}, 242 {"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"}, 243 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"}, 244 {"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"}, 245 {"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"}, 246 {"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"}, 247 {"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"}, 248 {"a+", "${oops", "aaa", "${oops"}, 249 {"a+", "$$", "aaa", "$$"}, 250 {"a+", "$", "aaa", "$"}, 251} 252 253type ReplaceFuncTest struct { 254 pattern string 255 replacement func(string) string 256 input, output string 257} 258 259var replaceFuncTests = []ReplaceFuncTest{ 260 {"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"}, 261 {"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"}, 262 {"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"}, 263} 264 265func TestReplaceAll(t *testing.T) { 266 for _, tc := range replaceTests { 267 re, err := Compile(tc.pattern) 268 if err != nil { 269 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 270 continue 271 } 272 actual := re.ReplaceAllString(tc.input, tc.replacement) 273 if actual != tc.output { 274 t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q", 275 tc.pattern, tc.input, tc.replacement, actual, tc.output) 276 } 277 // now try bytes 278 actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement))) 279 if actual != tc.output { 280 t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q", 281 tc.pattern, tc.input, tc.replacement, actual, tc.output) 282 } 283 } 284} 285 286func TestReplaceAllLiteral(t *testing.T) { 287 // Run ReplaceAll tests that do not have $ expansions. 288 for _, tc := range replaceTests { 289 if strings.Contains(tc.replacement, "$") { 290 continue 291 } 292 re, err := Compile(tc.pattern) 293 if err != nil { 294 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 295 continue 296 } 297 actual := re.ReplaceAllLiteralString(tc.input, tc.replacement) 298 if actual != tc.output { 299 t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q", 300 tc.pattern, tc.input, tc.replacement, actual, tc.output) 301 } 302 // now try bytes 303 actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement))) 304 if actual != tc.output { 305 t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q", 306 tc.pattern, tc.input, tc.replacement, actual, tc.output) 307 } 308 } 309 310 // Run literal-specific tests. 311 for _, tc := range replaceLiteralTests { 312 re, err := Compile(tc.pattern) 313 if err != nil { 314 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 315 continue 316 } 317 actual := re.ReplaceAllLiteralString(tc.input, tc.replacement) 318 if actual != tc.output { 319 t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q", 320 tc.pattern, tc.input, tc.replacement, actual, tc.output) 321 } 322 // now try bytes 323 actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement))) 324 if actual != tc.output { 325 t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q", 326 tc.pattern, tc.input, tc.replacement, actual, tc.output) 327 } 328 } 329} 330 331func TestReplaceAllFunc(t *testing.T) { 332 for _, tc := range replaceFuncTests { 333 re, err := Compile(tc.pattern) 334 if err != nil { 335 t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) 336 continue 337 } 338 actual := re.ReplaceAllStringFunc(tc.input, tc.replacement) 339 if actual != tc.output { 340 t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q", 341 tc.pattern, tc.input, actual, tc.output) 342 } 343 // now try bytes 344 actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) })) 345 if actual != tc.output { 346 t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q", 347 tc.pattern, tc.input, actual, tc.output) 348 } 349 } 350} 351 352type MetaTest struct { 353 pattern, output, literal string 354 isLiteral bool 355} 356 357var metaTests = []MetaTest{ 358 {``, ``, ``, true}, 359 {`foo`, `foo`, `foo`, true}, 360 {`日本語+`, `日本語\+`, `日本語`, false}, 361 {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator 362 {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators 363 {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false}, 364} 365 366var literalPrefixTests = []MetaTest{ 367 // See golang.org/issue/11175. 368 // output is unused. 369 {`^0^0$`, ``, `0`, false}, 370 {`^0^`, ``, ``, false}, 371 {`^0$`, ``, `0`, true}, 372 {`$0^`, ``, ``, false}, 373 {`$0$`, ``, ``, false}, 374 {`^^0$$`, ``, ``, false}, 375 {`^$^$`, ``, ``, false}, 376 {`$$0^^`, ``, ``, false}, 377 {`a\x{fffd}b`, ``, `a`, false}, 378 {`\x{fffd}b`, ``, ``, false}, 379 {"\ufffd", ``, ``, false}, 380} 381 382func TestQuoteMeta(t *testing.T) { 383 for _, tc := range metaTests { 384 // Verify that QuoteMeta returns the expected string. 385 quoted := QuoteMeta(tc.pattern) 386 if quoted != tc.output { 387 t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`", 388 tc.pattern, quoted, tc.output) 389 continue 390 } 391 392 // Verify that the quoted string is in fact treated as expected 393 // by Compile -- i.e. that it matches the original, unquoted string. 394 if tc.pattern != "" { 395 re, err := Compile(quoted) 396 if err != nil { 397 t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err) 398 continue 399 } 400 src := "abc" + tc.pattern + "def" 401 repl := "xyz" 402 replaced := re.ReplaceAllString(src, repl) 403 expected := "abcxyzdef" 404 if replaced != expected { 405 t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`", 406 tc.pattern, src, repl, replaced, expected) 407 } 408 } 409 } 410} 411 412func TestLiteralPrefix(t *testing.T) { 413 for _, tc := range append(metaTests, literalPrefixTests...) { 414 // Literal method needs to scan the pattern. 415 re := MustCompile(tc.pattern) 416 str, complete := re.LiteralPrefix() 417 if complete != tc.isLiteral { 418 t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral) 419 } 420 if str != tc.literal { 421 t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal) 422 } 423 } 424} 425 426type subexpIndex struct { 427 name string 428 index int 429} 430 431type subexpCase struct { 432 input string 433 num int 434 names []string 435 indices []subexpIndex 436} 437 438var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}} 439 440var subexpCases = []subexpCase{ 441 {``, 0, nil, emptySubexpIndices}, 442 {`.*`, 0, nil, emptySubexpIndices}, 443 {`abba`, 0, nil, emptySubexpIndices}, 444 {`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices}, 445 {`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices}, 446 {`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices}, 447 {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, 448 {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices}, 449 {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, 450 {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices}, 451 {`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}}, 452} 453 454func TestSubexp(t *testing.T) { 455 for _, c := range subexpCases { 456 re := MustCompile(c.input) 457 n := re.NumSubexp() 458 if n != c.num { 459 t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num) 460 continue 461 } 462 names := re.SubexpNames() 463 if len(names) != 1+n { 464 t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n) 465 continue 466 } 467 if c.names != nil { 468 for i := 0; i < 1+n; i++ { 469 if names[i] != c.names[i] { 470 t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i]) 471 } 472 } 473 } 474 for _, subexp := range c.indices { 475 index := re.SubexpIndex(subexp.name) 476 if index != subexp.index { 477 t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index) 478 } 479 } 480 } 481} 482 483var splitTests = []struct { 484 s string 485 r string 486 n int 487 out []string 488}{ 489 {"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}}, 490 {"foo:and:bar", ":", 1, []string{"foo:and:bar"}}, 491 {"foo:and:bar", ":", 2, []string{"foo", "and:bar"}}, 492 {"foo:and:bar", "foo", -1, []string{"", ":and:bar"}}, 493 {"foo:and:bar", "bar", -1, []string{"foo:and:", ""}}, 494 {"foo:and:bar", "baz", -1, []string{"foo:and:bar"}}, 495 {"baabaab", "a", -1, []string{"b", "", "b", "", "b"}}, 496 {"baabaab", "a*", -1, []string{"b", "b", "b"}}, 497 {"baabaab", "ba*", -1, []string{"", "", "", ""}}, 498 {"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}}, 499 {"foobar", "f+.*b+", -1, []string{"", "ar"}}, 500 {"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}}, 501 {"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}}, 502 {"a,b,c,d,e,f", ",", 0, nil}, 503 {",", ",", -1, []string{"", ""}}, 504 {",,,", ",", -1, []string{"", "", "", ""}}, 505 {"", ",", -1, []string{""}}, 506 {"", ".*", -1, []string{""}}, 507 {"", ".+", -1, []string{""}}, 508 {"", "", -1, []string{}}, 509 {"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}}, 510 {"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}}, 511 {":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}}, 512} 513 514func TestSplit(t *testing.T) { 515 for i, test := range splitTests { 516 re, err := Compile(test.r) 517 if err != nil { 518 t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error()) 519 continue 520 } 521 522 split := re.Split(test.s, test.n) 523 if !slices.Equal(split, test.out) { 524 t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out) 525 } 526 527 if QuoteMeta(test.r) == test.r { 528 strsplit := strings.SplitN(test.s, test.r, test.n) 529 if !slices.Equal(split, strsplit) { 530 t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit) 531 } 532 } 533 } 534} 535 536// The following sequence of Match calls used to panic. See issue #12980. 537func TestParseAndCompile(t *testing.T) { 538 expr := "a$" 539 s := "a\nb" 540 541 for i, tc := range []struct { 542 reFlags syntax.Flags 543 expMatch bool 544 }{ 545 {syntax.Perl | syntax.OneLine, false}, 546 {syntax.Perl &^ syntax.OneLine, true}, 547 } { 548 parsed, err := syntax.Parse(expr, tc.reFlags) 549 if err != nil { 550 t.Fatalf("%d: parse: %v", i, err) 551 } 552 re, err := Compile(parsed.String()) 553 if err != nil { 554 t.Fatalf("%d: compile: %v", i, err) 555 } 556 if match := re.MatchString(s); match != tc.expMatch { 557 t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch) 558 } 559 } 560} 561 562// Check that one-pass cutoff does trigger. 563func TestOnePassCutoff(t *testing.T) { 564 re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl) 565 if err != nil { 566 t.Fatalf("parse: %v", err) 567 } 568 p, err := syntax.Compile(re.Simplify()) 569 if err != nil { 570 t.Fatalf("compile: %v", err) 571 } 572 if compileOnePass(p) != nil { 573 t.Fatalf("makeOnePass succeeded; wanted nil") 574 } 575} 576 577// Check that the same machine can be used with the standard matcher 578// and then the backtracker when there are no captures. 579func TestSwitchBacktrack(t *testing.T) { 580 re := MustCompile(`a|b`) 581 long := make([]byte, maxBacktrackVector+1) 582 583 // The following sequence of Match calls used to panic. See issue #10319. 584 re.Match(long) // triggers standard matcher 585 re.Match(long[:1]) // triggers backtracker 586} 587 588func BenchmarkFind(b *testing.B) { 589 b.StopTimer() 590 re := MustCompile("a+b+") 591 wantSubs := "aaabb" 592 s := []byte("acbb" + wantSubs + "dd") 593 b.StartTimer() 594 b.ReportAllocs() 595 for i := 0; i < b.N; i++ { 596 subs := re.Find(s) 597 if string(subs) != wantSubs { 598 b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs) 599 } 600 } 601} 602 603func BenchmarkFindAllNoMatches(b *testing.B) { 604 re := MustCompile("a+b+") 605 s := []byte("acddee") 606 b.ReportAllocs() 607 b.ResetTimer() 608 for i := 0; i < b.N; i++ { 609 all := re.FindAll(s, -1) 610 if all != nil { 611 b.Fatalf("FindAll(%q) = %q; want nil", s, all) 612 } 613 } 614} 615 616func BenchmarkFindString(b *testing.B) { 617 b.StopTimer() 618 re := MustCompile("a+b+") 619 wantSubs := "aaabb" 620 s := "acbb" + wantSubs + "dd" 621 b.StartTimer() 622 b.ReportAllocs() 623 for i := 0; i < b.N; i++ { 624 subs := re.FindString(s) 625 if subs != wantSubs { 626 b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs) 627 } 628 } 629} 630 631func BenchmarkFindSubmatch(b *testing.B) { 632 b.StopTimer() 633 re := MustCompile("a(a+b+)b") 634 wantSubs := "aaabb" 635 s := []byte("acbb" + wantSubs + "dd") 636 b.StartTimer() 637 b.ReportAllocs() 638 for i := 0; i < b.N; i++ { 639 subs := re.FindSubmatch(s) 640 if string(subs[0]) != wantSubs { 641 b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs) 642 } 643 if string(subs[1]) != "aab" { 644 b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab") 645 } 646 } 647} 648 649func BenchmarkFindStringSubmatch(b *testing.B) { 650 b.StopTimer() 651 re := MustCompile("a(a+b+)b") 652 wantSubs := "aaabb" 653 s := "acbb" + wantSubs + "dd" 654 b.StartTimer() 655 b.ReportAllocs() 656 for i := 0; i < b.N; i++ { 657 subs := re.FindStringSubmatch(s) 658 if subs[0] != wantSubs { 659 b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs) 660 } 661 if subs[1] != "aab" { 662 b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab") 663 } 664 } 665} 666 667func BenchmarkLiteral(b *testing.B) { 668 x := strings.Repeat("x", 50) + "y" 669 b.StopTimer() 670 re := MustCompile("y") 671 b.StartTimer() 672 for i := 0; i < b.N; i++ { 673 if !re.MatchString(x) { 674 b.Fatalf("no match!") 675 } 676 } 677} 678 679func BenchmarkNotLiteral(b *testing.B) { 680 x := strings.Repeat("x", 50) + "y" 681 b.StopTimer() 682 re := MustCompile(".y") 683 b.StartTimer() 684 for i := 0; i < b.N; i++ { 685 if !re.MatchString(x) { 686 b.Fatalf("no match!") 687 } 688 } 689} 690 691func BenchmarkMatchClass(b *testing.B) { 692 b.StopTimer() 693 x := strings.Repeat("xxxx", 20) + "w" 694 re := MustCompile("[abcdw]") 695 b.StartTimer() 696 for i := 0; i < b.N; i++ { 697 if !re.MatchString(x) { 698 b.Fatalf("no match!") 699 } 700 } 701} 702 703func BenchmarkMatchClass_InRange(b *testing.B) { 704 b.StopTimer() 705 // 'b' is between 'a' and 'c', so the charclass 706 // range checking is no help here. 707 x := strings.Repeat("bbbb", 20) + "c" 708 re := MustCompile("[ac]") 709 b.StartTimer() 710 for i := 0; i < b.N; i++ { 711 if !re.MatchString(x) { 712 b.Fatalf("no match!") 713 } 714 } 715} 716 717func BenchmarkReplaceAll(b *testing.B) { 718 x := "abcdefghijklmnopqrstuvwxyz" 719 b.StopTimer() 720 re := MustCompile("[cjrw]") 721 b.StartTimer() 722 for i := 0; i < b.N; i++ { 723 re.ReplaceAllString(x, "") 724 } 725} 726 727func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) { 728 b.StopTimer() 729 x := []byte("abcdefghijklmnopqrstuvwxyz") 730 re := MustCompile("^zbc(d|e)") 731 b.StartTimer() 732 for i := 0; i < b.N; i++ { 733 re.Match(x) 734 } 735} 736 737func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) { 738 b.StopTimer() 739 x := []byte("abcdefghijklmnopqrstuvwxyz") 740 for i := 0; i < 15; i++ { 741 x = append(x, x...) 742 } 743 re := MustCompile("^zbc(d|e)") 744 b.StartTimer() 745 for i := 0; i < b.N; i++ { 746 re.Match(x) 747 } 748} 749 750func BenchmarkAnchoredShortMatch(b *testing.B) { 751 b.StopTimer() 752 x := []byte("abcdefghijklmnopqrstuvwxyz") 753 re := MustCompile("^.bc(d|e)") 754 b.StartTimer() 755 for i := 0; i < b.N; i++ { 756 re.Match(x) 757 } 758} 759 760func BenchmarkAnchoredLongMatch(b *testing.B) { 761 b.StopTimer() 762 x := []byte("abcdefghijklmnopqrstuvwxyz") 763 for i := 0; i < 15; i++ { 764 x = append(x, x...) 765 } 766 re := MustCompile("^.bc(d|e)") 767 b.StartTimer() 768 for i := 0; i < b.N; i++ { 769 re.Match(x) 770 } 771} 772 773func BenchmarkOnePassShortA(b *testing.B) { 774 b.StopTimer() 775 x := []byte("abcddddddeeeededd") 776 re := MustCompile("^.bc(d|e)*$") 777 b.StartTimer() 778 for i := 0; i < b.N; i++ { 779 re.Match(x) 780 } 781} 782 783func BenchmarkNotOnePassShortA(b *testing.B) { 784 b.StopTimer() 785 x := []byte("abcddddddeeeededd") 786 re := MustCompile(".bc(d|e)*$") 787 b.StartTimer() 788 for i := 0; i < b.N; i++ { 789 re.Match(x) 790 } 791} 792 793func BenchmarkOnePassShortB(b *testing.B) { 794 b.StopTimer() 795 x := []byte("abcddddddeeeededd") 796 re := MustCompile("^.bc(?:d|e)*$") 797 b.StartTimer() 798 for i := 0; i < b.N; i++ { 799 re.Match(x) 800 } 801} 802 803func BenchmarkNotOnePassShortB(b *testing.B) { 804 b.StopTimer() 805 x := []byte("abcddddddeeeededd") 806 re := MustCompile(".bc(?:d|e)*$") 807 b.StartTimer() 808 for i := 0; i < b.N; i++ { 809 re.Match(x) 810 } 811} 812 813func BenchmarkOnePassLongPrefix(b *testing.B) { 814 b.StopTimer() 815 x := []byte("abcdefghijklmnopqrstuvwxyz") 816 re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$") 817 b.StartTimer() 818 for i := 0; i < b.N; i++ { 819 re.Match(x) 820 } 821} 822 823func BenchmarkOnePassLongNotPrefix(b *testing.B) { 824 b.StopTimer() 825 x := []byte("abcdefghijklmnopqrstuvwxyz") 826 re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$") 827 b.StartTimer() 828 for i := 0; i < b.N; i++ { 829 re.Match(x) 830 } 831} 832 833func BenchmarkMatchParallelShared(b *testing.B) { 834 x := []byte("this is a long line that contains foo bar baz") 835 re := MustCompile("foo (ba+r)? baz") 836 b.ResetTimer() 837 b.RunParallel(func(pb *testing.PB) { 838 for pb.Next() { 839 re.Match(x) 840 } 841 }) 842} 843 844func BenchmarkMatchParallelCopied(b *testing.B) { 845 x := []byte("this is a long line that contains foo bar baz") 846 re := MustCompile("foo (ba+r)? baz") 847 b.ResetTimer() 848 b.RunParallel(func(pb *testing.PB) { 849 re := re.Copy() 850 for pb.Next() { 851 re.Match(x) 852 } 853 }) 854} 855 856var sink string 857 858func BenchmarkQuoteMetaAll(b *testing.B) { 859 specials := make([]byte, 0) 860 for i := byte(0); i < utf8.RuneSelf; i++ { 861 if special(i) { 862 specials = append(specials, i) 863 } 864 } 865 s := string(specials) 866 b.SetBytes(int64(len(s))) 867 b.ResetTimer() 868 for i := 0; i < b.N; i++ { 869 sink = QuoteMeta(s) 870 } 871} 872 873func BenchmarkQuoteMetaNone(b *testing.B) { 874 s := "abcdefghijklmnopqrstuvwxyz" 875 b.SetBytes(int64(len(s))) 876 b.ResetTimer() 877 for i := 0; i < b.N; i++ { 878 sink = QuoteMeta(s) 879 } 880} 881 882var compileBenchData = []struct{ name, re string }{ 883 {"Onepass", `^a.[l-nA-Cg-j]?e$`}, 884 {"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`}, 885 {"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)}, 886} 887 888func BenchmarkCompile(b *testing.B) { 889 for _, data := range compileBenchData { 890 b.Run(data.name, func(b *testing.B) { 891 b.ReportAllocs() 892 for i := 0; i < b.N; i++ { 893 if _, err := Compile(data.re); err != nil { 894 b.Fatal(err) 895 } 896 } 897 }) 898 } 899} 900 901func TestDeepEqual(t *testing.T) { 902 re1 := MustCompile("a.*b.*c.*d") 903 re2 := MustCompile("a.*b.*c.*d") 904 if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1. 905 t.Errorf("DeepEqual(re1, re2) = false, want true") 906 } 907 908 re1.MatchString("abcdefghijklmn") 909 if !reflect.DeepEqual(re1, re2) { 910 t.Errorf("DeepEqual(re1, re2) = false, want true") 911 } 912 913 re2.MatchString("abcdefghijklmn") 914 if !reflect.DeepEqual(re1, re2) { 915 t.Errorf("DeepEqual(re1, re2) = false, want true") 916 } 917 918 re2.MatchString(strings.Repeat("abcdefghijklmn", 100)) 919 if !reflect.DeepEqual(re1, re2) { 920 t.Errorf("DeepEqual(re1, re2) = false, want true") 921 } 922} 923 924var minInputLenTests = []struct { 925 Regexp string 926 min int 927}{ 928 {``, 0}, 929 {`a`, 1}, 930 {`aa`, 2}, 931 {`(aa)a`, 3}, 932 {`(?:aa)a`, 3}, 933 {`a?a`, 1}, 934 {`(aaa)|(aa)`, 2}, 935 {`(aa)+a`, 3}, 936 {`(aa)*a`, 1}, 937 {`(aa){3,5}`, 6}, 938 {`[a-z]`, 1}, 939 {`日`, 3}, 940} 941 942func TestMinInputLen(t *testing.T) { 943 for _, tt := range minInputLenTests { 944 re, _ := syntax.Parse(tt.Regexp, syntax.Perl) 945 m := minInputLen(re) 946 if m != tt.min { 947 t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min) 948 } 949 } 950} 951 952func TestUnmarshalText(t *testing.T) { 953 unmarshaled := new(Regexp) 954 for i := range goodRe { 955 re := compileTest(t, goodRe[i], "") 956 marshaled, err := re.MarshalText() 957 if err != nil { 958 t.Errorf("regexp %#q failed to marshal: %s", re, err) 959 continue 960 } 961 if err := unmarshaled.UnmarshalText(marshaled); err != nil { 962 t.Errorf("regexp %#q failed to unmarshal: %s", re, err) 963 continue 964 } 965 if unmarshaled.String() != goodRe[i] { 966 t.Errorf("UnmarshalText returned unexpected value: %s", unmarshaled.String()) 967 } 968 } 969 t.Run("invalid pattern", func(t *testing.T) { 970 re := new(Regexp) 971 err := re.UnmarshalText([]byte(`\`)) 972 if err == nil { 973 t.Error("unexpected success") 974 } 975 }) 976} 977