1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package regexp
6
7import (
8	"reflect"
9	"regexp/syntax"
10	"slices"
11	"strings"
12	"testing"
13	"unicode/utf8"
14)
15
16var goodRe = []string{
17	``,
18	`.`,
19	`^.$`,
20	`a`,
21	`a*`,
22	`a+`,
23	`a?`,
24	`a|b`,
25	`a*|b*`,
26	`(a*|b)(c*|d)`,
27	`[a-z]`,
28	`[a-abc-c\-\]\[]`,
29	`[a-z]+`,
30	`[abc]`,
31	`[^1234]`,
32	`[^\n]`,
33	`\!\\`,
34}
35
36type stringError struct {
37	re  string
38	err string
39}
40
41var badRe = []stringError{
42	{`*`, "missing argument to repetition operator: `*`"},
43	{`+`, "missing argument to repetition operator: `+`"},
44	{`?`, "missing argument to repetition operator: `?`"},
45	{`(abc`, "missing closing ): `(abc`"},
46	{`abc)`, "unexpected ): `abc)`"},
47	{`x[a-z`, "missing closing ]: `[a-z`"},
48	{`[z-a]`, "invalid character class range: `z-a`"},
49	{`abc\`, "trailing backslash at end of expression"},
50	{`a**`, "invalid nested repetition operator: `**`"},
51	{`a*+`, "invalid nested repetition operator: `*+`"},
52	{`\x`, "invalid escape sequence: `\\x`"},
53	{strings.Repeat(`\pL`, 27000), "expression too large"},
54}
55
56func compileTest(t *testing.T, expr string, error string) *Regexp {
57	re, err := Compile(expr)
58	if error == "" && err != nil {
59		t.Error("compiling `", expr, "`; unexpected error: ", err.Error())
60	}
61	if error != "" && err == nil {
62		t.Error("compiling `", expr, "`; missing error")
63	} else if error != "" && !strings.Contains(err.Error(), error) {
64		t.Error("compiling `", expr, "`; wrong error: ", err.Error(), "; want ", error)
65	}
66	return re
67}
68
69func TestGoodCompile(t *testing.T) {
70	for i := 0; i < len(goodRe); i++ {
71		compileTest(t, goodRe[i], "")
72	}
73}
74
75func TestBadCompile(t *testing.T) {
76	for i := 0; i < len(badRe); i++ {
77		compileTest(t, badRe[i].re, badRe[i].err)
78	}
79}
80
81func matchTest(t *testing.T, test *FindTest) {
82	re := compileTest(t, test.pat, "")
83	if re == nil {
84		return
85	}
86	m := re.MatchString(test.text)
87	if m != (len(test.matches) > 0) {
88		t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0)
89	}
90	// now try bytes
91	m = re.Match([]byte(test.text))
92	if m != (len(test.matches) > 0) {
93		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
94	}
95}
96
97func TestMatch(t *testing.T) {
98	for _, test := range findTests {
99		matchTest(t, &test)
100	}
101}
102
103func matchFunctionTest(t *testing.T, test *FindTest) {
104	m, err := MatchString(test.pat, test.text)
105	if err == nil {
106		return
107	}
108	if m != (len(test.matches) > 0) {
109		t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0)
110	}
111}
112
113func TestMatchFunction(t *testing.T) {
114	for _, test := range findTests {
115		matchFunctionTest(t, &test)
116	}
117}
118
119func copyMatchTest(t *testing.T, test *FindTest) {
120	re := compileTest(t, test.pat, "")
121	if re == nil {
122		return
123	}
124	m1 := re.MatchString(test.text)
125	m2 := re.Copy().MatchString(test.text)
126	if m1 != m2 {
127		t.Errorf("Copied Regexp match failure on %s: original gave %t; copy gave %t; should be %t",
128			test, m1, m2, len(test.matches) > 0)
129	}
130}
131
132func TestCopyMatch(t *testing.T) {
133	for _, test := range findTests {
134		copyMatchTest(t, &test)
135	}
136}
137
138type ReplaceTest struct {
139	pattern, replacement, input, output string
140}
141
142var replaceTests = []ReplaceTest{
143	// Test empty input and/or replacement, with pattern that matches the empty string.
144	{"", "", "", ""},
145	{"", "x", "", "x"},
146	{"", "", "abc", "abc"},
147	{"", "x", "abc", "xaxbxcx"},
148
149	// Test empty input and/or replacement, with pattern that does not match the empty string.
150	{"b", "", "", ""},
151	{"b", "x", "", ""},
152	{"b", "", "abc", "ac"},
153	{"b", "x", "abc", "axc"},
154	{"y", "", "", ""},
155	{"y", "x", "", ""},
156	{"y", "", "abc", "abc"},
157	{"y", "x", "abc", "abc"},
158
159	// Multibyte characters -- verify that we don't try to match in the middle
160	// of a character.
161	{"[a-c]*", "x", "\u65e5", "x\u65e5x"},
162	{"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"},
163
164	// Start and end of a string.
165	{"^[a-c]*", "x", "abcdabc", "xdabc"},
166	{"[a-c]*$", "x", "abcdabc", "abcdx"},
167	{"^[a-c]*$", "x", "abcdabc", "abcdabc"},
168	{"^[a-c]*", "x", "abc", "x"},
169	{"[a-c]*$", "x", "abc", "x"},
170	{"^[a-c]*$", "x", "abc", "x"},
171	{"^[a-c]*", "x", "dabce", "xdabce"},
172	{"[a-c]*$", "x", "dabce", "dabcex"},
173	{"^[a-c]*$", "x", "dabce", "dabce"},
174	{"^[a-c]*", "x", "", "x"},
175	{"[a-c]*$", "x", "", "x"},
176	{"^[a-c]*$", "x", "", "x"},
177
178	{"^[a-c]+", "x", "abcdabc", "xdabc"},
179	{"[a-c]+$", "x", "abcdabc", "abcdx"},
180	{"^[a-c]+$", "x", "abcdabc", "abcdabc"},
181	{"^[a-c]+", "x", "abc", "x"},
182	{"[a-c]+$", "x", "abc", "x"},
183	{"^[a-c]+$", "x", "abc", "x"},
184	{"^[a-c]+", "x", "dabce", "dabce"},
185	{"[a-c]+$", "x", "dabce", "dabce"},
186	{"^[a-c]+$", "x", "dabce", "dabce"},
187	{"^[a-c]+", "x", "", ""},
188	{"[a-c]+$", "x", "", ""},
189	{"^[a-c]+$", "x", "", ""},
190
191	// Other cases.
192	{"abc", "def", "abcdefg", "defdefg"},
193	{"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"},
194	{"abc", "", "abcdabc", "d"},
195	{"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"},
196	{"abc", "d", "", ""},
197	{"abc", "d", "abc", "d"},
198	{".+", "x", "abc", "x"},
199	{"[a-c]*", "x", "def", "xdxexfx"},
200	{"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"},
201	{"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"},
202
203	// Substitutions
204	{"a+", "($0)", "banana", "b(a)n(a)n(a)"},
205	{"a+", "(${0})", "banana", "b(a)n(a)n(a)"},
206	{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
207	{"a+", "(${0})$0", "banana", "b(a)an(a)an(a)a"},
208	{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, world"},
209	{"hello, (.+)", "goodbye, $1x", "hello, world", "goodbye, "},
210	{"hello, (.+)", "goodbye, ${1}x", "hello, world", "goodbye, worldx"},
211	{"hello, (.+)", "<$0><$1><$2><$3>", "hello, world", "<hello, world><world><><>"},
212	{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, world!"},
213	{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, world"},
214	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "hihihi"},
215	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "byebyebye"},
216	{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", ""},
217	{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "hiyz"},
218	{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $x"},
219	{"a+", "${oops", "aaa", "${oops"},
220	{"a+", "$$", "aaa", "$"},
221	{"a+", "$", "aaa", "$"},
222
223	// Substitution when subexpression isn't found
224	{"(x)?", "$1", "123", "123"},
225	{"abc", "$1", "123", "123"},
226
227	// Substitutions involving a (x){0}
228	{"(a)(b){0}(c)", ".$1|$3.", "xacxacx", "x.a|c.x.a|c.x"},
229	{"(a)(((b))){0}c", ".$1.", "xacxacx", "x.a.x.a.x"},
230	{"((a(b){0}){3}){5}(h)", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
231	{"((a(b){0}){3}){5}h", "y caramb$2", "say aaaaaaaaaaaaaaaah", "say ay caramba"},
232}
233
234var replaceLiteralTests = []ReplaceTest{
235	// Substitutions
236	{"a+", "($0)", "banana", "b($0)n($0)n($0)"},
237	{"a+", "(${0})", "banana", "b(${0})n(${0})n(${0})"},
238	{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
239	{"a+", "(${0})$0", "banana", "b(${0})$0n(${0})$0n(${0})$0"},
240	{"hello, (.+)", "goodbye, ${1}", "hello, world", "goodbye, ${1}"},
241	{"hello, (?P<noun>.+)", "goodbye, $noun!", "hello, world", "goodbye, $noun!"},
242	{"hello, (?P<noun>.+)", "goodbye, ${noun}", "hello, world", "goodbye, ${noun}"},
243	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "hi", "$x$x$x"},
244	{"(?P<x>hi)|(?P<x>bye)", "$x$x$x", "bye", "$x$x$x"},
245	{"(?P<x>hi)|(?P<x>bye)", "$xyz", "hi", "$xyz"},
246	{"(?P<x>hi)|(?P<x>bye)", "${x}yz", "hi", "${x}yz"},
247	{"(?P<x>hi)|(?P<x>bye)", "hello $$x", "hi", "hello $$x"},
248	{"a+", "${oops", "aaa", "${oops"},
249	{"a+", "$$", "aaa", "$$"},
250	{"a+", "$", "aaa", "$"},
251}
252
253type ReplaceFuncTest struct {
254	pattern       string
255	replacement   func(string) string
256	input, output string
257}
258
259var replaceFuncTests = []ReplaceFuncTest{
260	{"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"},
261	{"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"},
262	{"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"},
263}
264
265func TestReplaceAll(t *testing.T) {
266	for _, tc := range replaceTests {
267		re, err := Compile(tc.pattern)
268		if err != nil {
269			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
270			continue
271		}
272		actual := re.ReplaceAllString(tc.input, tc.replacement)
273		if actual != tc.output {
274			t.Errorf("%q.ReplaceAllString(%q,%q) = %q; want %q",
275				tc.pattern, tc.input, tc.replacement, actual, tc.output)
276		}
277		// now try bytes
278		actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement)))
279		if actual != tc.output {
280			t.Errorf("%q.ReplaceAll(%q,%q) = %q; want %q",
281				tc.pattern, tc.input, tc.replacement, actual, tc.output)
282		}
283	}
284}
285
286func TestReplaceAllLiteral(t *testing.T) {
287	// Run ReplaceAll tests that do not have $ expansions.
288	for _, tc := range replaceTests {
289		if strings.Contains(tc.replacement, "$") {
290			continue
291		}
292		re, err := Compile(tc.pattern)
293		if err != nil {
294			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
295			continue
296		}
297		actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
298		if actual != tc.output {
299			t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
300				tc.pattern, tc.input, tc.replacement, actual, tc.output)
301		}
302		// now try bytes
303		actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
304		if actual != tc.output {
305			t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
306				tc.pattern, tc.input, tc.replacement, actual, tc.output)
307		}
308	}
309
310	// Run literal-specific tests.
311	for _, tc := range replaceLiteralTests {
312		re, err := Compile(tc.pattern)
313		if err != nil {
314			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
315			continue
316		}
317		actual := re.ReplaceAllLiteralString(tc.input, tc.replacement)
318		if actual != tc.output {
319			t.Errorf("%q.ReplaceAllLiteralString(%q,%q) = %q; want %q",
320				tc.pattern, tc.input, tc.replacement, actual, tc.output)
321		}
322		// now try bytes
323		actual = string(re.ReplaceAllLiteral([]byte(tc.input), []byte(tc.replacement)))
324		if actual != tc.output {
325			t.Errorf("%q.ReplaceAllLiteral(%q,%q) = %q; want %q",
326				tc.pattern, tc.input, tc.replacement, actual, tc.output)
327		}
328	}
329}
330
331func TestReplaceAllFunc(t *testing.T) {
332	for _, tc := range replaceFuncTests {
333		re, err := Compile(tc.pattern)
334		if err != nil {
335			t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err)
336			continue
337		}
338		actual := re.ReplaceAllStringFunc(tc.input, tc.replacement)
339		if actual != tc.output {
340			t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
341				tc.pattern, tc.input, actual, tc.output)
342		}
343		// now try bytes
344		actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) }))
345		if actual != tc.output {
346			t.Errorf("%q.ReplaceFunc(%q,fn) = %q; want %q",
347				tc.pattern, tc.input, actual, tc.output)
348		}
349	}
350}
351
352type MetaTest struct {
353	pattern, output, literal string
354	isLiteral                bool
355}
356
357var metaTests = []MetaTest{
358	{``, ``, ``, true},
359	{`foo`, `foo`, `foo`, true},
360	{`日本語+`, `日本語\+`, `日本語`, false},
361	{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
362	{`foo.\$`, `foo\.\\\$`, `foo`, false},     // has escaped operators and real operators
363	{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
364}
365
366var literalPrefixTests = []MetaTest{
367	// See golang.org/issue/11175.
368	// output is unused.
369	{`^0^0$`, ``, `0`, false},
370	{`^0^`, ``, ``, false},
371	{`^0$`, ``, `0`, true},
372	{`$0^`, ``, ``, false},
373	{`$0$`, ``, ``, false},
374	{`^^0$$`, ``, ``, false},
375	{`^$^$`, ``, ``, false},
376	{`$$0^^`, ``, ``, false},
377	{`a\x{fffd}b`, ``, `a`, false},
378	{`\x{fffd}b`, ``, ``, false},
379	{"\ufffd", ``, ``, false},
380}
381
382func TestQuoteMeta(t *testing.T) {
383	for _, tc := range metaTests {
384		// Verify that QuoteMeta returns the expected string.
385		quoted := QuoteMeta(tc.pattern)
386		if quoted != tc.output {
387			t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`",
388				tc.pattern, quoted, tc.output)
389			continue
390		}
391
392		// Verify that the quoted string is in fact treated as expected
393		// by Compile -- i.e. that it matches the original, unquoted string.
394		if tc.pattern != "" {
395			re, err := Compile(quoted)
396			if err != nil {
397				t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err)
398				continue
399			}
400			src := "abc" + tc.pattern + "def"
401			repl := "xyz"
402			replaced := re.ReplaceAllString(src, repl)
403			expected := "abcxyzdef"
404			if replaced != expected {
405				t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`",
406					tc.pattern, src, repl, replaced, expected)
407			}
408		}
409	}
410}
411
412func TestLiteralPrefix(t *testing.T) {
413	for _, tc := range append(metaTests, literalPrefixTests...) {
414		// Literal method needs to scan the pattern.
415		re := MustCompile(tc.pattern)
416		str, complete := re.LiteralPrefix()
417		if complete != tc.isLiteral {
418			t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral)
419		}
420		if str != tc.literal {
421			t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal)
422		}
423	}
424}
425
426type subexpIndex struct {
427	name  string
428	index int
429}
430
431type subexpCase struct {
432	input   string
433	num     int
434	names   []string
435	indices []subexpIndex
436}
437
438var emptySubexpIndices = []subexpIndex{{"", -1}, {"missing", -1}}
439
440var subexpCases = []subexpCase{
441	{``, 0, nil, emptySubexpIndices},
442	{`.*`, 0, nil, emptySubexpIndices},
443	{`abba`, 0, nil, emptySubexpIndices},
444	{`ab(b)a`, 1, []string{"", ""}, emptySubexpIndices},
445	{`ab(.*)a`, 1, []string{"", ""}, emptySubexpIndices},
446	{`(.*)ab(.*)a`, 2, []string{"", "", ""}, emptySubexpIndices},
447	{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
448	{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}, emptySubexpIndices},
449	{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
450	{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}, emptySubexpIndices},
451	{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}, []subexpIndex{{"", -1}, {"missing", -1}, {"foo", 1}, {"bar", 2}}},
452}
453
454func TestSubexp(t *testing.T) {
455	for _, c := range subexpCases {
456		re := MustCompile(c.input)
457		n := re.NumSubexp()
458		if n != c.num {
459			t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num)
460			continue
461		}
462		names := re.SubexpNames()
463		if len(names) != 1+n {
464			t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n)
465			continue
466		}
467		if c.names != nil {
468			for i := 0; i < 1+n; i++ {
469				if names[i] != c.names[i] {
470					t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i])
471				}
472			}
473		}
474		for _, subexp := range c.indices {
475			index := re.SubexpIndex(subexp.name)
476			if index != subexp.index {
477				t.Errorf("%q: SubexpIndex(%q) = %d, want %d", c.input, subexp.name, index, subexp.index)
478			}
479		}
480	}
481}
482
483var splitTests = []struct {
484	s   string
485	r   string
486	n   int
487	out []string
488}{
489	{"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}},
490	{"foo:and:bar", ":", 1, []string{"foo:and:bar"}},
491	{"foo:and:bar", ":", 2, []string{"foo", "and:bar"}},
492	{"foo:and:bar", "foo", -1, []string{"", ":and:bar"}},
493	{"foo:and:bar", "bar", -1, []string{"foo:and:", ""}},
494	{"foo:and:bar", "baz", -1, []string{"foo:and:bar"}},
495	{"baabaab", "a", -1, []string{"b", "", "b", "", "b"}},
496	{"baabaab", "a*", -1, []string{"b", "b", "b"}},
497	{"baabaab", "ba*", -1, []string{"", "", "", ""}},
498	{"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}},
499	{"foobar", "f+.*b+", -1, []string{"", "ar"}},
500	{"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}},
501	{"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}},
502	{"a,b,c,d,e,f", ",", 0, nil},
503	{",", ",", -1, []string{"", ""}},
504	{",,,", ",", -1, []string{"", "", "", ""}},
505	{"", ",", -1, []string{""}},
506	{"", ".*", -1, []string{""}},
507	{"", ".+", -1, []string{""}},
508	{"", "", -1, []string{}},
509	{"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}},
510	{"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}},
511	{":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}},
512}
513
514func TestSplit(t *testing.T) {
515	for i, test := range splitTests {
516		re, err := Compile(test.r)
517		if err != nil {
518			t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error())
519			continue
520		}
521
522		split := re.Split(test.s, test.n)
523		if !slices.Equal(split, test.out) {
524			t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out)
525		}
526
527		if QuoteMeta(test.r) == test.r {
528			strsplit := strings.SplitN(test.s, test.r, test.n)
529			if !slices.Equal(split, strsplit) {
530				t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit)
531			}
532		}
533	}
534}
535
536// The following sequence of Match calls used to panic. See issue #12980.
537func TestParseAndCompile(t *testing.T) {
538	expr := "a$"
539	s := "a\nb"
540
541	for i, tc := range []struct {
542		reFlags  syntax.Flags
543		expMatch bool
544	}{
545		{syntax.Perl | syntax.OneLine, false},
546		{syntax.Perl &^ syntax.OneLine, true},
547	} {
548		parsed, err := syntax.Parse(expr, tc.reFlags)
549		if err != nil {
550			t.Fatalf("%d: parse: %v", i, err)
551		}
552		re, err := Compile(parsed.String())
553		if err != nil {
554			t.Fatalf("%d: compile: %v", i, err)
555		}
556		if match := re.MatchString(s); match != tc.expMatch {
557			t.Errorf("%d: %q.MatchString(%q)=%t; expected=%t", i, re, s, match, tc.expMatch)
558		}
559	}
560}
561
562// Check that one-pass cutoff does trigger.
563func TestOnePassCutoff(t *testing.T) {
564	re, err := syntax.Parse(`^x{1,1000}y{1,1000}$`, syntax.Perl)
565	if err != nil {
566		t.Fatalf("parse: %v", err)
567	}
568	p, err := syntax.Compile(re.Simplify())
569	if err != nil {
570		t.Fatalf("compile: %v", err)
571	}
572	if compileOnePass(p) != nil {
573		t.Fatalf("makeOnePass succeeded; wanted nil")
574	}
575}
576
577// Check that the same machine can be used with the standard matcher
578// and then the backtracker when there are no captures.
579func TestSwitchBacktrack(t *testing.T) {
580	re := MustCompile(`a|b`)
581	long := make([]byte, maxBacktrackVector+1)
582
583	// The following sequence of Match calls used to panic. See issue #10319.
584	re.Match(long)     // triggers standard matcher
585	re.Match(long[:1]) // triggers backtracker
586}
587
588func BenchmarkFind(b *testing.B) {
589	b.StopTimer()
590	re := MustCompile("a+b+")
591	wantSubs := "aaabb"
592	s := []byte("acbb" + wantSubs + "dd")
593	b.StartTimer()
594	b.ReportAllocs()
595	for i := 0; i < b.N; i++ {
596		subs := re.Find(s)
597		if string(subs) != wantSubs {
598			b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs)
599		}
600	}
601}
602
603func BenchmarkFindAllNoMatches(b *testing.B) {
604	re := MustCompile("a+b+")
605	s := []byte("acddee")
606	b.ReportAllocs()
607	b.ResetTimer()
608	for i := 0; i < b.N; i++ {
609		all := re.FindAll(s, -1)
610		if all != nil {
611			b.Fatalf("FindAll(%q) = %q; want nil", s, all)
612		}
613	}
614}
615
616func BenchmarkFindString(b *testing.B) {
617	b.StopTimer()
618	re := MustCompile("a+b+")
619	wantSubs := "aaabb"
620	s := "acbb" + wantSubs + "dd"
621	b.StartTimer()
622	b.ReportAllocs()
623	for i := 0; i < b.N; i++ {
624		subs := re.FindString(s)
625		if subs != wantSubs {
626			b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs)
627		}
628	}
629}
630
631func BenchmarkFindSubmatch(b *testing.B) {
632	b.StopTimer()
633	re := MustCompile("a(a+b+)b")
634	wantSubs := "aaabb"
635	s := []byte("acbb" + wantSubs + "dd")
636	b.StartTimer()
637	b.ReportAllocs()
638	for i := 0; i < b.N; i++ {
639		subs := re.FindSubmatch(s)
640		if string(subs[0]) != wantSubs {
641			b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
642		}
643		if string(subs[1]) != "aab" {
644			b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
645		}
646	}
647}
648
649func BenchmarkFindStringSubmatch(b *testing.B) {
650	b.StopTimer()
651	re := MustCompile("a(a+b+)b")
652	wantSubs := "aaabb"
653	s := "acbb" + wantSubs + "dd"
654	b.StartTimer()
655	b.ReportAllocs()
656	for i := 0; i < b.N; i++ {
657		subs := re.FindStringSubmatch(s)
658		if subs[0] != wantSubs {
659			b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
660		}
661		if subs[1] != "aab" {
662			b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
663		}
664	}
665}
666
667func BenchmarkLiteral(b *testing.B) {
668	x := strings.Repeat("x", 50) + "y"
669	b.StopTimer()
670	re := MustCompile("y")
671	b.StartTimer()
672	for i := 0; i < b.N; i++ {
673		if !re.MatchString(x) {
674			b.Fatalf("no match!")
675		}
676	}
677}
678
679func BenchmarkNotLiteral(b *testing.B) {
680	x := strings.Repeat("x", 50) + "y"
681	b.StopTimer()
682	re := MustCompile(".y")
683	b.StartTimer()
684	for i := 0; i < b.N; i++ {
685		if !re.MatchString(x) {
686			b.Fatalf("no match!")
687		}
688	}
689}
690
691func BenchmarkMatchClass(b *testing.B) {
692	b.StopTimer()
693	x := strings.Repeat("xxxx", 20) + "w"
694	re := MustCompile("[abcdw]")
695	b.StartTimer()
696	for i := 0; i < b.N; i++ {
697		if !re.MatchString(x) {
698			b.Fatalf("no match!")
699		}
700	}
701}
702
703func BenchmarkMatchClass_InRange(b *testing.B) {
704	b.StopTimer()
705	// 'b' is between 'a' and 'c', so the charclass
706	// range checking is no help here.
707	x := strings.Repeat("bbbb", 20) + "c"
708	re := MustCompile("[ac]")
709	b.StartTimer()
710	for i := 0; i < b.N; i++ {
711		if !re.MatchString(x) {
712			b.Fatalf("no match!")
713		}
714	}
715}
716
717func BenchmarkReplaceAll(b *testing.B) {
718	x := "abcdefghijklmnopqrstuvwxyz"
719	b.StopTimer()
720	re := MustCompile("[cjrw]")
721	b.StartTimer()
722	for i := 0; i < b.N; i++ {
723		re.ReplaceAllString(x, "")
724	}
725}
726
727func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
728	b.StopTimer()
729	x := []byte("abcdefghijklmnopqrstuvwxyz")
730	re := MustCompile("^zbc(d|e)")
731	b.StartTimer()
732	for i := 0; i < b.N; i++ {
733		re.Match(x)
734	}
735}
736
737func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
738	b.StopTimer()
739	x := []byte("abcdefghijklmnopqrstuvwxyz")
740	for i := 0; i < 15; i++ {
741		x = append(x, x...)
742	}
743	re := MustCompile("^zbc(d|e)")
744	b.StartTimer()
745	for i := 0; i < b.N; i++ {
746		re.Match(x)
747	}
748}
749
750func BenchmarkAnchoredShortMatch(b *testing.B) {
751	b.StopTimer()
752	x := []byte("abcdefghijklmnopqrstuvwxyz")
753	re := MustCompile("^.bc(d|e)")
754	b.StartTimer()
755	for i := 0; i < b.N; i++ {
756		re.Match(x)
757	}
758}
759
760func BenchmarkAnchoredLongMatch(b *testing.B) {
761	b.StopTimer()
762	x := []byte("abcdefghijklmnopqrstuvwxyz")
763	for i := 0; i < 15; i++ {
764		x = append(x, x...)
765	}
766	re := MustCompile("^.bc(d|e)")
767	b.StartTimer()
768	for i := 0; i < b.N; i++ {
769		re.Match(x)
770	}
771}
772
773func BenchmarkOnePassShortA(b *testing.B) {
774	b.StopTimer()
775	x := []byte("abcddddddeeeededd")
776	re := MustCompile("^.bc(d|e)*$")
777	b.StartTimer()
778	for i := 0; i < b.N; i++ {
779		re.Match(x)
780	}
781}
782
783func BenchmarkNotOnePassShortA(b *testing.B) {
784	b.StopTimer()
785	x := []byte("abcddddddeeeededd")
786	re := MustCompile(".bc(d|e)*$")
787	b.StartTimer()
788	for i := 0; i < b.N; i++ {
789		re.Match(x)
790	}
791}
792
793func BenchmarkOnePassShortB(b *testing.B) {
794	b.StopTimer()
795	x := []byte("abcddddddeeeededd")
796	re := MustCompile("^.bc(?:d|e)*$")
797	b.StartTimer()
798	for i := 0; i < b.N; i++ {
799		re.Match(x)
800	}
801}
802
803func BenchmarkNotOnePassShortB(b *testing.B) {
804	b.StopTimer()
805	x := []byte("abcddddddeeeededd")
806	re := MustCompile(".bc(?:d|e)*$")
807	b.StartTimer()
808	for i := 0; i < b.N; i++ {
809		re.Match(x)
810	}
811}
812
813func BenchmarkOnePassLongPrefix(b *testing.B) {
814	b.StopTimer()
815	x := []byte("abcdefghijklmnopqrstuvwxyz")
816	re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$")
817	b.StartTimer()
818	for i := 0; i < b.N; i++ {
819		re.Match(x)
820	}
821}
822
823func BenchmarkOnePassLongNotPrefix(b *testing.B) {
824	b.StopTimer()
825	x := []byte("abcdefghijklmnopqrstuvwxyz")
826	re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$")
827	b.StartTimer()
828	for i := 0; i < b.N; i++ {
829		re.Match(x)
830	}
831}
832
833func BenchmarkMatchParallelShared(b *testing.B) {
834	x := []byte("this is a long line that contains foo bar baz")
835	re := MustCompile("foo (ba+r)? baz")
836	b.ResetTimer()
837	b.RunParallel(func(pb *testing.PB) {
838		for pb.Next() {
839			re.Match(x)
840		}
841	})
842}
843
844func BenchmarkMatchParallelCopied(b *testing.B) {
845	x := []byte("this is a long line that contains foo bar baz")
846	re := MustCompile("foo (ba+r)? baz")
847	b.ResetTimer()
848	b.RunParallel(func(pb *testing.PB) {
849		re := re.Copy()
850		for pb.Next() {
851			re.Match(x)
852		}
853	})
854}
855
856var sink string
857
858func BenchmarkQuoteMetaAll(b *testing.B) {
859	specials := make([]byte, 0)
860	for i := byte(0); i < utf8.RuneSelf; i++ {
861		if special(i) {
862			specials = append(specials, i)
863		}
864	}
865	s := string(specials)
866	b.SetBytes(int64(len(s)))
867	b.ResetTimer()
868	for i := 0; i < b.N; i++ {
869		sink = QuoteMeta(s)
870	}
871}
872
873func BenchmarkQuoteMetaNone(b *testing.B) {
874	s := "abcdefghijklmnopqrstuvwxyz"
875	b.SetBytes(int64(len(s)))
876	b.ResetTimer()
877	for i := 0; i < b.N; i++ {
878		sink = QuoteMeta(s)
879	}
880}
881
882var compileBenchData = []struct{ name, re string }{
883	{"Onepass", `^a.[l-nA-Cg-j]?e$`},
884	{"Medium", `^((a|b|[d-z0-9])*(日){4,5}.)+$`},
885	{"Hard", strings.Repeat(`((abc)*|`, 50) + strings.Repeat(`)`, 50)},
886}
887
888func BenchmarkCompile(b *testing.B) {
889	for _, data := range compileBenchData {
890		b.Run(data.name, func(b *testing.B) {
891			b.ReportAllocs()
892			for i := 0; i < b.N; i++ {
893				if _, err := Compile(data.re); err != nil {
894					b.Fatal(err)
895				}
896			}
897		})
898	}
899}
900
901func TestDeepEqual(t *testing.T) {
902	re1 := MustCompile("a.*b.*c.*d")
903	re2 := MustCompile("a.*b.*c.*d")
904	if !reflect.DeepEqual(re1, re2) { // has always been true, since Go 1.
905		t.Errorf("DeepEqual(re1, re2) = false, want true")
906	}
907
908	re1.MatchString("abcdefghijklmn")
909	if !reflect.DeepEqual(re1, re2) {
910		t.Errorf("DeepEqual(re1, re2) = false, want true")
911	}
912
913	re2.MatchString("abcdefghijklmn")
914	if !reflect.DeepEqual(re1, re2) {
915		t.Errorf("DeepEqual(re1, re2) = false, want true")
916	}
917
918	re2.MatchString(strings.Repeat("abcdefghijklmn", 100))
919	if !reflect.DeepEqual(re1, re2) {
920		t.Errorf("DeepEqual(re1, re2) = false, want true")
921	}
922}
923
924var minInputLenTests = []struct {
925	Regexp string
926	min    int
927}{
928	{``, 0},
929	{`a`, 1},
930	{`aa`, 2},
931	{`(aa)a`, 3},
932	{`(?:aa)a`, 3},
933	{`a?a`, 1},
934	{`(aaa)|(aa)`, 2},
935	{`(aa)+a`, 3},
936	{`(aa)*a`, 1},
937	{`(aa){3,5}`, 6},
938	{`[a-z]`, 1},
939	{`日`, 3},
940}
941
942func TestMinInputLen(t *testing.T) {
943	for _, tt := range minInputLenTests {
944		re, _ := syntax.Parse(tt.Regexp, syntax.Perl)
945		m := minInputLen(re)
946		if m != tt.min {
947			t.Errorf("regexp %#q has minInputLen %d, should be %d", tt.Regexp, m, tt.min)
948		}
949	}
950}
951
952func TestUnmarshalText(t *testing.T) {
953	unmarshaled := new(Regexp)
954	for i := range goodRe {
955		re := compileTest(t, goodRe[i], "")
956		marshaled, err := re.MarshalText()
957		if err != nil {
958			t.Errorf("regexp %#q failed to marshal: %s", re, err)
959			continue
960		}
961		if err := unmarshaled.UnmarshalText(marshaled); err != nil {
962			t.Errorf("regexp %#q failed to unmarshal: %s", re, err)
963			continue
964		}
965		if unmarshaled.String() != goodRe[i] {
966			t.Errorf("UnmarshalText returned unexpected value: %s", unmarshaled.String())
967		}
968	}
969	t.Run("invalid pattern", func(t *testing.T) {
970		re := new(Regexp)
971		err := re.UnmarshalText([]byte(`\`))
972		if err == nil {
973			t.Error("unexpected success")
974		}
975	})
976}
977