1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package regexp
6
7import (
8	"fmt"
9	"strings"
10	"testing"
11)
12
13// For each pattern/text pair, what is the expected output of each function?
14// We can derive the textual results from the indexed results, the non-submatch
15// results from the submatched results, the single results from the 'all' results,
16// and the byte results from the string results. Therefore the table includes
17// only the FindAllStringSubmatchIndex result.
18type FindTest struct {
19	pat     string
20	text    string
21	matches [][]int
22}
23
24func (t FindTest) String() string {
25	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
26}
27
28var findTests = []FindTest{
29	{``, ``, build(1, 0, 0)},
30	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
31	{`a+`, "baaab", build(1, 1, 4)},
32	{"abcd..", "abcdef", build(1, 0, 6)},
33	{`a`, "a", build(1, 0, 1)},
34	{`x`, "y", nil},
35	{`b`, "abc", build(1, 1, 2)},
36	{`.`, "a", build(1, 0, 1)},
37	{`.*`, "abcdef", build(1, 0, 6)},
38	{`^`, "abcde", build(1, 0, 0)},
39	{`$`, "abcde", build(1, 5, 5)},
40	{`^abcd$`, "abcd", build(1, 0, 4)},
41	{`^bcd'`, "abcdef", nil},
42	{`^abcd$`, "abcde", nil},
43	{`a+`, "baaab", build(1, 1, 4)},
44	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
45	{`[a-z]+`, "abcd", build(1, 0, 4)},
46	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
47	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
48	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
49	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
50	{`日本語+`, "日本語", build(1, 0, 9)},
51	{`日本語+`, "日本語語語語", build(1, 0, 18)},
52	{`()`, "", build(1, 0, 0, 0, 0)},
53	{`(a)`, "a", build(1, 0, 1, 0, 1)},
54	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
55	{`(.*)`, "", build(1, 0, 0, 0, 0)},
56	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
57	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
58	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
59	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
60	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
61	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
62	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
63
64	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
65	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
66	{`[.]`, ".", build(1, 0, 1)},
67	{`/$`, "/abc/", build(1, 4, 5)},
68	{`/$`, "/abc", nil},
69
70	// multiple matches
71	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
72	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
73	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
74	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
75	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
76
77	// fixed bugs
78	{`ab$`, "cab", build(1, 1, 3)},
79	{`axxb$`, "axxcb", nil},
80	{`data`, "daXY data", build(1, 5, 9)},
81	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
82	{`zx+`, "zzx", build(1, 1, 3)},
83	{`ab$`, "abcab", build(1, 3, 5)},
84	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
85	{`(?:.|(?:.a))`, "", nil},
86	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
87	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
88	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
89	{`(?-s)(?:(?:^).)`, "\n", nil},
90	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
91	{`(?:(?:^).)`, "\n", nil},
92	{`\b`, "x", build(2, 0, 0, 1, 1)},
93	{`\b`, "xx", build(2, 0, 0, 2, 2)},
94	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
95	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
96	{`\B`, "x", nil},
97	{`\B`, "xx", build(1, 1, 1)},
98	{`\B`, "x y", nil},
99	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
100	{`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)},
101
102	// RE2 tests
103	{`[^\S\s]`, "abcd", nil},
104	{`[^\S[:space:]]`, "abcd", nil},
105	{`[^\D\d]`, "abcd", nil},
106	{`[^\D[:digit:]]`, "abcd", nil},
107	{`(?i)\W`, "x", nil},
108	{`(?i)\W`, "k", nil},
109	{`(?i)\W`, "s", nil},
110
111	// can backslash-escape any punctuation
112	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
113		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
114	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
115		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
116	{"\\`", "`", build(1, 0, 1)},
117	{"[\\`]+", "`", build(1, 0, 1)},
118
119	{"\ufffd", "\xff", build(1, 0, 1)},
120	{"\ufffd", "hello\xffworld", build(1, 5, 6)},
121	{`.*`, "hello\xffworld", build(1, 0, 11)},
122	{`\x{fffd}`, "\xc2\x00", build(1, 0, 1)},
123	{"[\ufffd]", "\xff", build(1, 0, 1)},
124	{`[\x{fffd}]`, "\xc2\x00", build(1, 0, 1)},
125
126	// long set of matches (longer than startSize)
127	{
128		".",
129		"qwertyuiopasdfghjklzxcvbnm1234567890",
130		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
131			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
132			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
133			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
134	},
135}
136
137// build is a helper to construct a [][]int by extracting n sequences from x.
138// This represents n matches with len(x)/n submatches each.
139func build(n int, x ...int) [][]int {
140	ret := make([][]int, n)
141	runLength := len(x) / n
142	j := 0
143	for i := range ret {
144		ret[i] = make([]int, runLength)
145		copy(ret[i], x[j:])
146		j += runLength
147		if j > len(x) {
148			panic("invalid build entry")
149		}
150	}
151	return ret
152}
153
154// First the simple cases.
155
156func TestFind(t *testing.T) {
157	for _, test := range findTests {
158		re := MustCompile(test.pat)
159		if re.String() != test.pat {
160			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
161		}
162		result := re.Find([]byte(test.text))
163		switch {
164		case len(test.matches) == 0 && len(result) == 0:
165			// ok
166		case test.matches == nil && result != nil:
167			t.Errorf("expected no match; got one: %s", test)
168		case test.matches != nil && result == nil:
169			t.Errorf("expected match; got none: %s", test)
170		case test.matches != nil && result != nil:
171			expect := test.text[test.matches[0][0]:test.matches[0][1]]
172			if len(result) != cap(result) {
173				t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
174			}
175			if expect != string(result) {
176				t.Errorf("expected %q got %q: %s", expect, result, test)
177			}
178		}
179	}
180}
181
182func TestFindString(t *testing.T) {
183	for _, test := range findTests {
184		result := MustCompile(test.pat).FindString(test.text)
185		switch {
186		case len(test.matches) == 0 && len(result) == 0:
187			// ok
188		case test.matches == nil && result != "":
189			t.Errorf("expected no match; got one: %s", test)
190		case test.matches != nil && result == "":
191			// Tricky because an empty result has two meanings: no match or empty match.
192			if test.matches[0][0] != test.matches[0][1] {
193				t.Errorf("expected match; got none: %s", test)
194			}
195		case test.matches != nil && result != "":
196			expect := test.text[test.matches[0][0]:test.matches[0][1]]
197			if expect != result {
198				t.Errorf("expected %q got %q: %s", expect, result, test)
199			}
200		}
201	}
202}
203
204func testFindIndex(test *FindTest, result []int, t *testing.T) {
205	switch {
206	case len(test.matches) == 0 && len(result) == 0:
207		// ok
208	case test.matches == nil && result != nil:
209		t.Errorf("expected no match; got one: %s", test)
210	case test.matches != nil && result == nil:
211		t.Errorf("expected match; got none: %s", test)
212	case test.matches != nil && result != nil:
213		expect := test.matches[0]
214		if expect[0] != result[0] || expect[1] != result[1] {
215			t.Errorf("expected %v got %v: %s", expect, result, test)
216		}
217	}
218}
219
220func TestFindIndex(t *testing.T) {
221	for _, test := range findTests {
222		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
223	}
224}
225
226func TestFindStringIndex(t *testing.T) {
227	for _, test := range findTests {
228		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
229	}
230}
231
232func TestFindReaderIndex(t *testing.T) {
233	for _, test := range findTests {
234		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
235	}
236}
237
238// Now come the simple All cases.
239
240func TestFindAll(t *testing.T) {
241	for _, test := range findTests {
242		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
243		switch {
244		case test.matches == nil && result == nil:
245			// ok
246		case test.matches == nil && result != nil:
247			t.Errorf("expected no match; got one: %s", test)
248		case test.matches != nil && result == nil:
249			t.Fatalf("expected match; got none: %s", test)
250		case test.matches != nil && result != nil:
251			if len(test.matches) != len(result) {
252				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
253				continue
254			}
255			for k, e := range test.matches {
256				got := result[k]
257				if len(got) != cap(got) {
258					t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
259				}
260				expect := test.text[e[0]:e[1]]
261				if expect != string(got) {
262					t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
263				}
264			}
265		}
266	}
267}
268
269func TestFindAllString(t *testing.T) {
270	for _, test := range findTests {
271		result := MustCompile(test.pat).FindAllString(test.text, -1)
272		switch {
273		case test.matches == nil && result == nil:
274			// ok
275		case test.matches == nil && result != nil:
276			t.Errorf("expected no match; got one: %s", test)
277		case test.matches != nil && result == nil:
278			t.Errorf("expected match; got none: %s", test)
279		case test.matches != nil && result != nil:
280			if len(test.matches) != len(result) {
281				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
282				continue
283			}
284			for k, e := range test.matches {
285				expect := test.text[e[0]:e[1]]
286				if expect != result[k] {
287					t.Errorf("expected %q got %q: %s", expect, result, test)
288				}
289			}
290		}
291	}
292}
293
294func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
295	switch {
296	case test.matches == nil && result == nil:
297		// ok
298	case test.matches == nil && result != nil:
299		t.Errorf("expected no match; got one: %s", test)
300	case test.matches != nil && result == nil:
301		t.Errorf("expected match; got none: %s", test)
302	case test.matches != nil && result != nil:
303		if len(test.matches) != len(result) {
304			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
305			return
306		}
307		for k, e := range test.matches {
308			if e[0] != result[k][0] || e[1] != result[k][1] {
309				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
310			}
311		}
312	}
313}
314
315func TestFindAllIndex(t *testing.T) {
316	for _, test := range findTests {
317		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
318	}
319}
320
321func TestFindAllStringIndex(t *testing.T) {
322	for _, test := range findTests {
323		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
324	}
325}
326
327// Now come the Submatch cases.
328
329func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
330	if len(submatches) != len(result)*2 {
331		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
332		return
333	}
334	for k := 0; k < len(submatches); k += 2 {
335		if submatches[k] == -1 {
336			if result[k/2] != nil {
337				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
338			}
339			continue
340		}
341		got := result[k/2]
342		if len(got) != cap(got) {
343			t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
344			return
345		}
346		expect := test.text[submatches[k]:submatches[k+1]]
347		if expect != string(got) {
348			t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
349			return
350		}
351	}
352}
353
354func TestFindSubmatch(t *testing.T) {
355	for _, test := range findTests {
356		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
357		switch {
358		case test.matches == nil && result == nil:
359			// ok
360		case test.matches == nil && result != nil:
361			t.Errorf("expected no match; got one: %s", test)
362		case test.matches != nil && result == nil:
363			t.Errorf("expected match; got none: %s", test)
364		case test.matches != nil && result != nil:
365			testSubmatchBytes(&test, 0, test.matches[0], result, t)
366		}
367	}
368}
369
370func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
371	if len(submatches) != len(result)*2 {
372		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
373		return
374	}
375	for k := 0; k < len(submatches); k += 2 {
376		if submatches[k] == -1 {
377			if result[k/2] != "" {
378				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
379			}
380			continue
381		}
382		expect := test.text[submatches[k]:submatches[k+1]]
383		if expect != result[k/2] {
384			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
385			return
386		}
387	}
388}
389
390func TestFindStringSubmatch(t *testing.T) {
391	for _, test := range findTests {
392		result := MustCompile(test.pat).FindStringSubmatch(test.text)
393		switch {
394		case test.matches == nil && result == nil:
395			// ok
396		case test.matches == nil && result != nil:
397			t.Errorf("expected no match; got one: %s", test)
398		case test.matches != nil && result == nil:
399			t.Errorf("expected match; got none: %s", test)
400		case test.matches != nil && result != nil:
401			testSubmatchString(&test, 0, test.matches[0], result, t)
402		}
403	}
404}
405
406func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
407	if len(expect) != len(result) {
408		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
409		return
410	}
411	for k, e := range expect {
412		if e != result[k] {
413			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
414		}
415	}
416}
417
418func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
419	switch {
420	case test.matches == nil && result == nil:
421		// ok
422	case test.matches == nil && result != nil:
423		t.Errorf("expected no match; got one: %s", test)
424	case test.matches != nil && result == nil:
425		t.Errorf("expected match; got none: %s", test)
426	case test.matches != nil && result != nil:
427		testSubmatchIndices(test, 0, test.matches[0], result, t)
428	}
429}
430
431func TestFindSubmatchIndex(t *testing.T) {
432	for _, test := range findTests {
433		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
434	}
435}
436
437func TestFindStringSubmatchIndex(t *testing.T) {
438	for _, test := range findTests {
439		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
440	}
441}
442
443func TestFindReaderSubmatchIndex(t *testing.T) {
444	for _, test := range findTests {
445		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
446	}
447}
448
449// Now come the monster AllSubmatch cases.
450
451func TestFindAllSubmatch(t *testing.T) {
452	for _, test := range findTests {
453		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
454		switch {
455		case test.matches == nil && result == nil:
456			// ok
457		case test.matches == nil && result != nil:
458			t.Errorf("expected no match; got one: %s", test)
459		case test.matches != nil && result == nil:
460			t.Errorf("expected match; got none: %s", test)
461		case len(test.matches) != len(result):
462			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
463		case test.matches != nil && result != nil:
464			for k, match := range test.matches {
465				testSubmatchBytes(&test, k, match, result[k], t)
466			}
467		}
468	}
469}
470
471func TestFindAllStringSubmatch(t *testing.T) {
472	for _, test := range findTests {
473		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
474		switch {
475		case test.matches == nil && result == nil:
476			// ok
477		case test.matches == nil && result != nil:
478			t.Errorf("expected no match; got one: %s", test)
479		case test.matches != nil && result == nil:
480			t.Errorf("expected match; got none: %s", test)
481		case len(test.matches) != len(result):
482			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
483		case test.matches != nil && result != nil:
484			for k, match := range test.matches {
485				testSubmatchString(&test, k, match, result[k], t)
486			}
487		}
488	}
489}
490
491func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
492	switch {
493	case test.matches == nil && result == nil:
494		// ok
495	case test.matches == nil && result != nil:
496		t.Errorf("expected no match; got one: %s", test)
497	case test.matches != nil && result == nil:
498		t.Errorf("expected match; got none: %s", test)
499	case len(test.matches) != len(result):
500		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
501	case test.matches != nil && result != nil:
502		for k, match := range test.matches {
503			testSubmatchIndices(test, k, match, result[k], t)
504		}
505	}
506}
507
508func TestFindAllSubmatchIndex(t *testing.T) {
509	for _, test := range findTests {
510		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
511	}
512}
513
514func TestFindAllStringSubmatchIndex(t *testing.T) {
515	for _, test := range findTests {
516		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
517	}
518}
519