1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package xml
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"reflect"
12	"strings"
13	"testing"
14	"unicode/utf8"
15)
16
17type toks struct {
18	earlyEOF bool
19	t        []Token
20}
21
22func (t *toks) Token() (Token, error) {
23	if len(t.t) == 0 {
24		return nil, io.EOF
25	}
26	var tok Token
27	tok, t.t = t.t[0], t.t[1:]
28	if t.earlyEOF && len(t.t) == 0 {
29		return tok, io.EOF
30	}
31	return tok, nil
32}
33
34func TestDecodeEOF(t *testing.T) {
35	start := StartElement{Name: Name{Local: "test"}}
36	tests := []struct {
37		name   string
38		tokens []Token
39		ok     bool
40	}{
41		{
42			name: "OK",
43			tokens: []Token{
44				start,
45				start.End(),
46			},
47			ok: true,
48		},
49		{
50			name: "Malformed",
51			tokens: []Token{
52				start,
53				StartElement{Name: Name{Local: "bad"}},
54				start.End(),
55			},
56			ok: false,
57		},
58	}
59	for _, tc := range tests {
60		for _, eof := range []bool{true, false} {
61			name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
62			t.Run(name, func(t *testing.T) {
63				d := NewTokenDecoder(&toks{
64					earlyEOF: eof,
65					t:        tc.tokens,
66				})
67				err := d.Decode(&struct {
68					XMLName Name `xml:"test"`
69				}{})
70				if tc.ok && err != nil {
71					t.Fatalf("d.Decode: expected nil error, got %v", err)
72				}
73				if _, ok := err.(*SyntaxError); !tc.ok && !ok {
74					t.Errorf("d.Decode: expected syntax error, got %v", err)
75				}
76			})
77		}
78	}
79}
80
81type toksNil struct {
82	returnEOF bool
83	t         []Token
84}
85
86func (t *toksNil) Token() (Token, error) {
87	if len(t.t) == 0 {
88		if !t.returnEOF {
89			// Return nil, nil before returning an EOF. It's legal, but
90			// discouraged.
91			t.returnEOF = true
92			return nil, nil
93		}
94		return nil, io.EOF
95	}
96	var tok Token
97	tok, t.t = t.t[0], t.t[1:]
98	return tok, nil
99}
100
101func TestDecodeNilToken(t *testing.T) {
102	for _, strict := range []bool{true, false} {
103		name := fmt.Sprintf("Strict=%v", strict)
104		t.Run(name, func(t *testing.T) {
105			start := StartElement{Name: Name{Local: "test"}}
106			bad := StartElement{Name: Name{Local: "bad"}}
107			d := NewTokenDecoder(&toksNil{
108				// Malformed
109				t: []Token{start, bad, start.End()},
110			})
111			d.Strict = strict
112			err := d.Decode(&struct {
113				XMLName Name `xml:"test"`
114			}{})
115			if _, ok := err.(*SyntaxError); !ok {
116				t.Errorf("d.Decode: expected syntax error, got %v", err)
117			}
118		})
119	}
120}
121
122const testInput = `
123<?xml version="1.0" encoding="UTF-8"?>
124<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
125  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
126<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
127	"\r\n\t" + `  >
128  <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
129  <query>&何; &is-it;</query>
130  <goodbye />
131  <outer foo:attr="value" xmlns:tag="ns4">
132    <inner/>
133  </outer>
134  <tag:name>
135    <![CDATA[Some text here.]]>
136  </tag:name>
137</body><!-- missing final newline -->`
138
139var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
140
141var rawTokens = []Token{
142	CharData("\n"),
143	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
144	CharData("\n"),
145	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
146  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
147	CharData("\n"),
148	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
149	CharData("\n  "),
150	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
151	CharData("World <>'\" 白鵬翔"),
152	EndElement{Name{"", "hello"}},
153	CharData("\n  "),
154	StartElement{Name{"", "query"}, []Attr{}},
155	CharData("What is it?"),
156	EndElement{Name{"", "query"}},
157	CharData("\n  "),
158	StartElement{Name{"", "goodbye"}, []Attr{}},
159	EndElement{Name{"", "goodbye"}},
160	CharData("\n  "),
161	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
162	CharData("\n    "),
163	StartElement{Name{"", "inner"}, []Attr{}},
164	EndElement{Name{"", "inner"}},
165	CharData("\n  "),
166	EndElement{Name{"", "outer"}},
167	CharData("\n  "),
168	StartElement{Name{"tag", "name"}, []Attr{}},
169	CharData("\n    "),
170	CharData("Some text here."),
171	CharData("\n  "),
172	EndElement{Name{"tag", "name"}},
173	CharData("\n"),
174	EndElement{Name{"", "body"}},
175	Comment(" missing final newline "),
176}
177
178var cookedTokens = []Token{
179	CharData("\n"),
180	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
181	CharData("\n"),
182	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
183  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
184	CharData("\n"),
185	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
186	CharData("\n  "),
187	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
188	CharData("World <>'\" 白鵬翔"),
189	EndElement{Name{"ns2", "hello"}},
190	CharData("\n  "),
191	StartElement{Name{"ns2", "query"}, []Attr{}},
192	CharData("What is it?"),
193	EndElement{Name{"ns2", "query"}},
194	CharData("\n  "),
195	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
196	EndElement{Name{"ns2", "goodbye"}},
197	CharData("\n  "),
198	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
199	CharData("\n    "),
200	StartElement{Name{"ns2", "inner"}, []Attr{}},
201	EndElement{Name{"ns2", "inner"}},
202	CharData("\n  "),
203	EndElement{Name{"ns2", "outer"}},
204	CharData("\n  "),
205	StartElement{Name{"ns3", "name"}, []Attr{}},
206	CharData("\n    "),
207	CharData("Some text here."),
208	CharData("\n  "),
209	EndElement{Name{"ns3", "name"}},
210	CharData("\n"),
211	EndElement{Name{"ns2", "body"}},
212	Comment(" missing final newline "),
213}
214
215const testInputAltEncoding = `
216<?xml version="1.0" encoding="x-testing-uppercase"?>
217<TAG>VALUE</TAG>`
218
219var rawTokensAltEncoding = []Token{
220	CharData("\n"),
221	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
222	CharData("\n"),
223	StartElement{Name{"", "tag"}, []Attr{}},
224	CharData("value"),
225	EndElement{Name{"", "tag"}},
226}
227
228var xmlInput = []string{
229	// unexpected EOF cases
230	"<",
231	"<t",
232	"<t ",
233	"<t/",
234	"<!",
235	"<!-",
236	"<!--",
237	"<!--c-",
238	"<!--c--",
239	"<!d",
240	"<t></",
241	"<t></t",
242	"<?",
243	"<?p",
244	"<t a",
245	"<t a=",
246	"<t a='",
247	"<t a=''",
248	"<t/><![",
249	"<t/><![C",
250	"<t/><![CDATA[d",
251	"<t/><![CDATA[d]",
252	"<t/><![CDATA[d]]",
253
254	// other Syntax errors
255	"<>",
256	"<t/a",
257	"<0 />",
258	"<?0 >",
259	//	"<!0 >",	// let the Token() caller handle
260	"</0>",
261	"<t 0=''>",
262	"<t a='&'>",
263	"<t a='<'>",
264	"<t>&nbspc;</t>",
265	"<t a>",
266	"<t a=>",
267	"<t a=v>",
268	//	"<![CDATA[d]]>",	// let the Token() caller handle
269	"<t></e>",
270	"<t></>",
271	"<t></t!",
272	"<t>cdata]]></t>",
273}
274
275func TestRawToken(t *testing.T) {
276	d := NewDecoder(strings.NewReader(testInput))
277	d.Entity = testEntity
278	testRawToken(t, d, testInput, rawTokens)
279}
280
281const nonStrictInput = `
282<tag>non&entity</tag>
283<tag>&unknown;entity</tag>
284<tag>&#123</tag>
285<tag>&#zzz;</tag>
286<tag>&なまえ3;</tag>
287<tag>&lt-gt;</tag>
288<tag>&;</tag>
289<tag>&0a;</tag>
290`
291
292var nonStrictTokens = []Token{
293	CharData("\n"),
294	StartElement{Name{"", "tag"}, []Attr{}},
295	CharData("non&entity"),
296	EndElement{Name{"", "tag"}},
297	CharData("\n"),
298	StartElement{Name{"", "tag"}, []Attr{}},
299	CharData("&unknown;entity"),
300	EndElement{Name{"", "tag"}},
301	CharData("\n"),
302	StartElement{Name{"", "tag"}, []Attr{}},
303	CharData("&#123"),
304	EndElement{Name{"", "tag"}},
305	CharData("\n"),
306	StartElement{Name{"", "tag"}, []Attr{}},
307	CharData("&#zzz;"),
308	EndElement{Name{"", "tag"}},
309	CharData("\n"),
310	StartElement{Name{"", "tag"}, []Attr{}},
311	CharData("&なまえ3;"),
312	EndElement{Name{"", "tag"}},
313	CharData("\n"),
314	StartElement{Name{"", "tag"}, []Attr{}},
315	CharData("&lt-gt;"),
316	EndElement{Name{"", "tag"}},
317	CharData("\n"),
318	StartElement{Name{"", "tag"}, []Attr{}},
319	CharData("&;"),
320	EndElement{Name{"", "tag"}},
321	CharData("\n"),
322	StartElement{Name{"", "tag"}, []Attr{}},
323	CharData("&0a;"),
324	EndElement{Name{"", "tag"}},
325	CharData("\n"),
326}
327
328func TestNonStrictRawToken(t *testing.T) {
329	d := NewDecoder(strings.NewReader(nonStrictInput))
330	d.Strict = false
331	testRawToken(t, d, nonStrictInput, nonStrictTokens)
332}
333
334type downCaser struct {
335	t *testing.T
336	r io.ByteReader
337}
338
339func (d *downCaser) ReadByte() (c byte, err error) {
340	c, err = d.r.ReadByte()
341	if c >= 'A' && c <= 'Z' {
342		c += 'a' - 'A'
343	}
344	return
345}
346
347func (d *downCaser) Read(p []byte) (int, error) {
348	d.t.Fatalf("unexpected Read call on downCaser reader")
349	panic("unreachable")
350}
351
352func TestRawTokenAltEncoding(t *testing.T) {
353	d := NewDecoder(strings.NewReader(testInputAltEncoding))
354	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
355		if charset != "x-testing-uppercase" {
356			t.Fatalf("unexpected charset %q", charset)
357		}
358		return &downCaser{t, input.(io.ByteReader)}, nil
359	}
360	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
361}
362
363func TestRawTokenAltEncodingNoConverter(t *testing.T) {
364	d := NewDecoder(strings.NewReader(testInputAltEncoding))
365	token, err := d.RawToken()
366	if token == nil {
367		t.Fatalf("expected a token on first RawToken call")
368	}
369	if err != nil {
370		t.Fatal(err)
371	}
372	token, err = d.RawToken()
373	if token != nil {
374		t.Errorf("expected a nil token; got %#v", token)
375	}
376	if err == nil {
377		t.Fatalf("expected an error on second RawToken call")
378	}
379	const encoding = "x-testing-uppercase"
380	if !strings.Contains(err.Error(), encoding) {
381		t.Errorf("expected error to contain %q; got error: %v",
382			encoding, err)
383	}
384}
385
386func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
387	lastEnd := int64(0)
388	for i, want := range rawTokens {
389		start := d.InputOffset()
390		have, err := d.RawToken()
391		end := d.InputOffset()
392		if err != nil {
393			t.Fatalf("token %d: unexpected error: %s", i, err)
394		}
395		if !reflect.DeepEqual(have, want) {
396			var shave, swant string
397			if _, ok := have.(CharData); ok {
398				shave = fmt.Sprintf("CharData(%q)", have)
399			} else {
400				shave = fmt.Sprintf("%#v", have)
401			}
402			if _, ok := want.(CharData); ok {
403				swant = fmt.Sprintf("CharData(%q)", want)
404			} else {
405				swant = fmt.Sprintf("%#v", want)
406			}
407			t.Errorf("token %d = %s, want %s", i, shave, swant)
408		}
409
410		// Check that InputOffset returned actual token.
411		switch {
412		case start < lastEnd:
413			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
414		case start >= end:
415			// Special case: EndElement can be synthesized.
416			if start == end && end == lastEnd {
417				break
418			}
419			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
420		case end > int64(len(raw)):
421			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
422		default:
423			text := raw[start:end]
424			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
425				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
426			}
427		}
428		lastEnd = end
429	}
430}
431
432// Ensure that directives (specifically !DOCTYPE) include the complete
433// text of any nested directives, noting that < and > do not change
434// nesting depth if they are in single or double quotes.
435
436var nestedDirectivesInput = `
437<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
438<!DOCTYPE [<!ENTITY xlt ">">]>
439<!DOCTYPE [<!ENTITY xlt "<">]>
440<!DOCTYPE [<!ENTITY xlt '>'>]>
441<!DOCTYPE [<!ENTITY xlt '<'>]>
442<!DOCTYPE [<!ENTITY xlt '">'>]>
443<!DOCTYPE [<!ENTITY xlt "'<">]>
444`
445
446var nestedDirectivesTokens = []Token{
447	CharData("\n"),
448	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
449	CharData("\n"),
450	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
451	CharData("\n"),
452	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
453	CharData("\n"),
454	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
455	CharData("\n"),
456	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
457	CharData("\n"),
458	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
459	CharData("\n"),
460	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
461	CharData("\n"),
462}
463
464func TestNestedDirectives(t *testing.T) {
465	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
466
467	for i, want := range nestedDirectivesTokens {
468		have, err := d.Token()
469		if err != nil {
470			t.Fatalf("token %d: unexpected error: %s", i, err)
471		}
472		if !reflect.DeepEqual(have, want) {
473			t.Errorf("token %d = %#v want %#v", i, have, want)
474		}
475	}
476}
477
478func TestToken(t *testing.T) {
479	d := NewDecoder(strings.NewReader(testInput))
480	d.Entity = testEntity
481
482	for i, want := range cookedTokens {
483		have, err := d.Token()
484		if err != nil {
485			t.Fatalf("token %d: unexpected error: %s", i, err)
486		}
487		if !reflect.DeepEqual(have, want) {
488			t.Errorf("token %d = %#v want %#v", i, have, want)
489		}
490	}
491}
492
493func TestSyntax(t *testing.T) {
494	for i := range xmlInput {
495		d := NewDecoder(strings.NewReader(xmlInput[i]))
496		var err error
497		for _, err = d.Token(); err == nil; _, err = d.Token() {
498		}
499		if _, ok := err.(*SyntaxError); !ok {
500			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
501		}
502	}
503}
504
505func TestInputLinePos(t *testing.T) {
506	testInput := `<root>
507<?pi
508 ?>  <elt
509att
510=
511"val">
512<![CDATA[
513]]><!--
514
515--></elt>
516</root>`
517	linePos := [][]int{
518		{1, 7},
519		{2, 1},
520		{3, 4},
521		{3, 6},
522		{6, 7},
523		{7, 1},
524		{8, 4},
525		{10, 4},
526		{10, 10},
527		{11, 1},
528		{11, 8},
529	}
530	dec := NewDecoder(strings.NewReader(testInput))
531	for _, want := range linePos {
532		if _, err := dec.Token(); err != nil {
533			t.Errorf("Unexpected error: %v", err)
534			continue
535		}
536
537		gotLine, gotCol := dec.InputPos()
538		if gotLine != want[0] || gotCol != want[1] {
539			t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
540		}
541	}
542}
543
544type allScalars struct {
545	True1     bool
546	True2     bool
547	False1    bool
548	False2    bool
549	Int       int
550	Int8      int8
551	Int16     int16
552	Int32     int32
553	Int64     int64
554	Uint      int
555	Uint8     uint8
556	Uint16    uint16
557	Uint32    uint32
558	Uint64    uint64
559	Uintptr   uintptr
560	Float32   float32
561	Float64   float64
562	String    string
563	PtrString *string
564}
565
566var all = allScalars{
567	True1:     true,
568	True2:     true,
569	False1:    false,
570	False2:    false,
571	Int:       1,
572	Int8:      -2,
573	Int16:     3,
574	Int32:     -4,
575	Int64:     5,
576	Uint:      6,
577	Uint8:     7,
578	Uint16:    8,
579	Uint32:    9,
580	Uint64:    10,
581	Uintptr:   11,
582	Float32:   13.0,
583	Float64:   14.0,
584	String:    "15",
585	PtrString: &sixteen,
586}
587
588var sixteen = "16"
589
590const testScalarsInput = `<allscalars>
591	<True1>true</True1>
592	<True2>1</True2>
593	<False1>false</False1>
594	<False2>0</False2>
595	<Int>1</Int>
596	<Int8>-2</Int8>
597	<Int16>3</Int16>
598	<Int32>-4</Int32>
599	<Int64>5</Int64>
600	<Uint>6</Uint>
601	<Uint8>7</Uint8>
602	<Uint16>8</Uint16>
603	<Uint32>9</Uint32>
604	<Uint64>10</Uint64>
605	<Uintptr>11</Uintptr>
606	<Float>12.0</Float>
607	<Float32>13.0</Float32>
608	<Float64>14.0</Float64>
609	<String>15</String>
610	<PtrString>16</PtrString>
611</allscalars>`
612
613func TestAllScalars(t *testing.T) {
614	var a allScalars
615	err := Unmarshal([]byte(testScalarsInput), &a)
616
617	if err != nil {
618		t.Fatal(err)
619	}
620	if !reflect.DeepEqual(a, all) {
621		t.Errorf("have %+v want %+v", a, all)
622	}
623}
624
625type item struct {
626	FieldA string
627}
628
629func TestIssue569(t *testing.T) {
630	data := `<item><FieldA>abcd</FieldA></item>`
631	var i item
632	err := Unmarshal([]byte(data), &i)
633
634	if err != nil || i.FieldA != "abcd" {
635		t.Fatal("Expecting abcd")
636	}
637}
638
639func TestUnquotedAttrs(t *testing.T) {
640	data := "<tag attr=azAZ09:-_\t>"
641	d := NewDecoder(strings.NewReader(data))
642	d.Strict = false
643	token, err := d.Token()
644	if _, ok := err.(*SyntaxError); ok {
645		t.Errorf("Unexpected error: %v", err)
646	}
647	if token.(StartElement).Name.Local != "tag" {
648		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
649	}
650	attr := token.(StartElement).Attr[0]
651	if attr.Value != "azAZ09:-_" {
652		t.Errorf("Unexpected attribute value: %v", attr.Value)
653	}
654	if attr.Name.Local != "attr" {
655		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
656	}
657}
658
659func TestValuelessAttrs(t *testing.T) {
660	tests := [][3]string{
661		{"<p nowrap>", "p", "nowrap"},
662		{"<p nowrap >", "p", "nowrap"},
663		{"<input checked/>", "input", "checked"},
664		{"<input checked />", "input", "checked"},
665	}
666	for _, test := range tests {
667		d := NewDecoder(strings.NewReader(test[0]))
668		d.Strict = false
669		token, err := d.Token()
670		if _, ok := err.(*SyntaxError); ok {
671			t.Errorf("Unexpected error: %v", err)
672		}
673		if token.(StartElement).Name.Local != test[1] {
674			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
675		}
676		attr := token.(StartElement).Attr[0]
677		if attr.Value != test[2] {
678			t.Errorf("Unexpected attribute value: %v", attr.Value)
679		}
680		if attr.Name.Local != test[2] {
681			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
682		}
683	}
684}
685
686func TestCopyTokenCharData(t *testing.T) {
687	data := []byte("same data")
688	var tok1 Token = CharData(data)
689	tok2 := CopyToken(tok1)
690	if !reflect.DeepEqual(tok1, tok2) {
691		t.Error("CopyToken(CharData) != CharData")
692	}
693	data[1] = 'o'
694	if reflect.DeepEqual(tok1, tok2) {
695		t.Error("CopyToken(CharData) uses same buffer.")
696	}
697}
698
699func TestCopyTokenStartElement(t *testing.T) {
700	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
701	var tok1 Token = elt
702	tok2 := CopyToken(tok1)
703	if tok1.(StartElement).Attr[0].Value != "en" {
704		t.Error("CopyToken overwrote Attr[0]")
705	}
706	if !reflect.DeepEqual(tok1, tok2) {
707		t.Error("CopyToken(StartElement) != StartElement")
708	}
709	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
710	if reflect.DeepEqual(tok1, tok2) {
711		t.Error("CopyToken(CharData) uses same buffer.")
712	}
713}
714
715func TestCopyTokenComment(t *testing.T) {
716	data := []byte("<!-- some comment -->")
717	var tok1 Token = Comment(data)
718	tok2 := CopyToken(tok1)
719	if !reflect.DeepEqual(tok1, tok2) {
720		t.Error("CopyToken(Comment) != Comment")
721	}
722	data[1] = 'o'
723	if reflect.DeepEqual(tok1, tok2) {
724		t.Error("CopyToken(Comment) uses same buffer.")
725	}
726}
727
728func TestSyntaxErrorLineNum(t *testing.T) {
729	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
730	d := NewDecoder(strings.NewReader(testInput))
731	var err error
732	for _, err = d.Token(); err == nil; _, err = d.Token() {
733	}
734	synerr, ok := err.(*SyntaxError)
735	if !ok {
736		t.Error("Expected SyntaxError.")
737	}
738	if synerr.Line != 3 {
739		t.Error("SyntaxError didn't have correct line number.")
740	}
741}
742
743func TestTrailingRawToken(t *testing.T) {
744	input := `<FOO></FOO>  `
745	d := NewDecoder(strings.NewReader(input))
746	var err error
747	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
748	}
749	if err != io.EOF {
750		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
751	}
752}
753
754func TestTrailingToken(t *testing.T) {
755	input := `<FOO></FOO>  `
756	d := NewDecoder(strings.NewReader(input))
757	var err error
758	for _, err = d.Token(); err == nil; _, err = d.Token() {
759	}
760	if err != io.EOF {
761		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
762	}
763}
764
765func TestEntityInsideCDATA(t *testing.T) {
766	input := `<test><![CDATA[ &val=foo ]]></test>`
767	d := NewDecoder(strings.NewReader(input))
768	var err error
769	for _, err = d.Token(); err == nil; _, err = d.Token() {
770	}
771	if err != io.EOF {
772		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
773	}
774}
775
776var characterTests = []struct {
777	in  string
778	err string
779}{
780	{"\x12<doc/>", "illegal character code U+0012"},
781	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
782	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
783	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
784	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
785	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
786	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
787	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
788	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
789}
790
791func TestDisallowedCharacters(t *testing.T) {
792
793	for i, tt := range characterTests {
794		d := NewDecoder(strings.NewReader(tt.in))
795		var err error
796
797		for err == nil {
798			_, err = d.Token()
799		}
800		synerr, ok := err.(*SyntaxError)
801		if !ok {
802			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
803		}
804		if synerr.Msg != tt.err {
805			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
806		}
807	}
808}
809
810func TestIsInCharacterRange(t *testing.T) {
811	invalid := []rune{
812		utf8.MaxRune + 1,
813		0xD800, // surrogate min
814		0xDFFF, // surrogate max
815		-1,
816	}
817	for _, r := range invalid {
818		if isInCharacterRange(r) {
819			t.Errorf("rune %U considered valid", r)
820		}
821	}
822}
823
824var procInstTests = []struct {
825	input  string
826	expect [2]string
827}{
828	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
829	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
830	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
831	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
832	{`encoding="FOO" `, [2]string{"", "FOO"}},
833	{`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
834	{`version= encoding=`, [2]string{"", ""}},
835	{`encoding="version=1.0"`, [2]string{"", "version=1.0"}},
836	{``, [2]string{"", ""}},
837	// TODO: what's the right approach to handle these nested cases?
838	{`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}},
839	{`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}},
840}
841
842func TestProcInstEncoding(t *testing.T) {
843	for _, test := range procInstTests {
844		if got := procInst("version", test.input); got != test.expect[0] {
845			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
846		}
847		if got := procInst("encoding", test.input); got != test.expect[1] {
848			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
849		}
850	}
851}
852
853// Ensure that directives with comments include the complete
854// text of any nested directives.
855
856var directivesWithCommentsInput = `
857<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
858<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
859<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
860`
861
862var directivesWithCommentsTokens = []Token{
863	CharData("\n"),
864	Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
865	CharData("\n"),
866	Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
867	CharData("\n"),
868	Directive(`DOCTYPE <!-> <!>       [<!ENTITY go "Golang"> ]`),
869	CharData("\n"),
870}
871
872func TestDirectivesWithComments(t *testing.T) {
873	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
874
875	for i, want := range directivesWithCommentsTokens {
876		have, err := d.Token()
877		if err != nil {
878			t.Fatalf("token %d: unexpected error: %s", i, err)
879		}
880		if !reflect.DeepEqual(have, want) {
881			t.Errorf("token %d = %#v want %#v", i, have, want)
882		}
883	}
884}
885
886// Writer whose Write method always returns an error.
887type errWriter struct{}
888
889func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
890
891func TestEscapeTextIOErrors(t *testing.T) {
892	expectErr := "unwritable"
893	err := EscapeText(errWriter{}, []byte{'A'})
894
895	if err == nil || err.Error() != expectErr {
896		t.Errorf("have %v, want %v", err, expectErr)
897	}
898}
899
900func TestEscapeTextInvalidChar(t *testing.T) {
901	input := []byte("A \x00 terminated string.")
902	expected := "A \uFFFD terminated string."
903
904	buff := new(strings.Builder)
905	if err := EscapeText(buff, input); err != nil {
906		t.Fatalf("have %v, want nil", err)
907	}
908	text := buff.String()
909
910	if text != expected {
911		t.Errorf("have %v, want %v", text, expected)
912	}
913}
914
915func TestIssue5880(t *testing.T) {
916	type T []byte
917	data, err := Marshal(T{192, 168, 0, 1})
918	if err != nil {
919		t.Errorf("Marshal error: %v", err)
920	}
921	if !utf8.Valid(data) {
922		t.Errorf("Marshal generated invalid UTF-8: %x", data)
923	}
924}
925
926func TestIssue8535(t *testing.T) {
927
928	type ExampleConflict struct {
929		XMLName  Name   `xml:"example"`
930		Link     string `xml:"link"`
931		AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
932	}
933	testCase := `<example>
934			<title>Example</title>
935			<link>http://example.com/default</link> <!-- not assigned -->
936			<link>http://example.com/home</link> <!-- not assigned -->
937			<ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
938		</example>`
939
940	var dest ExampleConflict
941	d := NewDecoder(strings.NewReader(testCase))
942	if err := d.Decode(&dest); err != nil {
943		t.Fatal(err)
944	}
945}
946
947func TestEncodeXMLNS(t *testing.T) {
948	testCases := []struct {
949		f    func() ([]byte, error)
950		want string
951		ok   bool
952	}{
953		{encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
954		{encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
955		{encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
956		{encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
957	}
958
959	for i, tc := range testCases {
960		if b, err := tc.f(); err == nil {
961			if got, want := string(b), tc.want; got != want {
962				t.Errorf("%d: got %s, want %s \n", i, got, want)
963			}
964		} else {
965			t.Errorf("%d: marshal failed with %s", i, err)
966		}
967	}
968}
969
970func encodeXMLNS1() ([]byte, error) {
971
972	type T struct {
973		XMLName Name   `xml:"Test"`
974		Ns      string `xml:"xmlns,attr"`
975		Body    string
976	}
977
978	s := &T{Ns: "http://example.com/ns", Body: "hello world"}
979	return Marshal(s)
980}
981
982func encodeXMLNS2() ([]byte, error) {
983
984	type Test struct {
985		Body string `xml:"http://example.com/ns body"`
986	}
987
988	s := &Test{Body: "hello world"}
989	return Marshal(s)
990}
991
992func encodeXMLNS3() ([]byte, error) {
993
994	type Test struct {
995		XMLName Name `xml:"http://example.com/ns Test"`
996		Body    string
997	}
998
999	//s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
1000	// as documentation states
1001	s := &Test{Body: "hello world"}
1002	return Marshal(s)
1003}
1004
1005func encodeXMLNS4() ([]byte, error) {
1006
1007	type Test struct {
1008		Ns   string `xml:"xmlns,attr"`
1009		Body string
1010	}
1011
1012	s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
1013	return Marshal(s)
1014}
1015
1016func TestIssue11405(t *testing.T) {
1017	testCases := []string{
1018		"<root>",
1019		"<root><foo>",
1020		"<root><foo></foo>",
1021	}
1022	for _, tc := range testCases {
1023		d := NewDecoder(strings.NewReader(tc))
1024		var err error
1025		for {
1026			_, err = d.Token()
1027			if err != nil {
1028				break
1029			}
1030		}
1031		if _, ok := err.(*SyntaxError); !ok {
1032			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
1033		}
1034	}
1035}
1036
1037func TestIssue12417(t *testing.T) {
1038	testCases := []struct {
1039		s  string
1040		ok bool
1041	}{
1042		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
1043		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
1044		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
1045		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
1046	}
1047	for _, tc := range testCases {
1048		d := NewDecoder(strings.NewReader(tc.s))
1049		var err error
1050		for {
1051			_, err = d.Token()
1052			if err != nil {
1053				if err == io.EOF {
1054					err = nil
1055				}
1056				break
1057			}
1058		}
1059		if err != nil && tc.ok {
1060			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
1061			continue
1062		}
1063		if err == nil && !tc.ok {
1064			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
1065		}
1066	}
1067}
1068
1069func TestIssue7113(t *testing.T) {
1070	type C struct {
1071		XMLName Name `xml:""` // Sets empty namespace
1072	}
1073
1074	type D struct {
1075		XMLName Name `xml:"d"`
1076	}
1077
1078	type A struct {
1079		XMLName Name `xml:""`
1080		C       C    `xml:""`
1081		D       D
1082	}
1083
1084	var a A
1085	structSpace := "b"
1086	xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>`
1087	t.Log(xmlTest)
1088	err := Unmarshal([]byte(xmlTest), &a)
1089	if err != nil {
1090		t.Fatal(err)
1091	}
1092
1093	if a.XMLName.Space != structSpace {
1094		t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace)
1095	}
1096	if len(a.C.XMLName.Space) != 0 {
1097		t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
1098	}
1099
1100	var b []byte
1101	b, err = Marshal(&a)
1102	if err != nil {
1103		t.Fatal(err)
1104	}
1105	if len(a.C.XMLName.Space) != 0 {
1106		t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
1107	}
1108	if string(b) != xmlTest {
1109		t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest)
1110	}
1111	var c A
1112	err = Unmarshal(b, &c)
1113	if err != nil {
1114		t.Fatalf("second Unmarshal failed: %s", err)
1115	}
1116	if c.XMLName.Space != "b" {
1117		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
1118	}
1119	if len(c.C.XMLName.Space) != 0 {
1120		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
1121	}
1122}
1123
1124func TestIssue20396(t *testing.T) {
1125
1126	var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
1127
1128	testCases := []struct {
1129		s       string
1130		wantErr error
1131	}{
1132		{`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
1133			UnmarshalError("XML syntax error on line 1: expected element name after <")},
1134		{`<a:te=st xmlns:a="abcd"/>`, attrError},
1135		{`<a:te&st xmlns:a="abcd"/>`, attrError},
1136		{`<a:test xmlns:a="abcd"/>`, nil},
1137		{`<a:te:st xmlns:a="abcd">1</a:te:st>`,
1138			UnmarshalError("XML syntax error on line 1: expected element name after <")},
1139		{`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
1140		{`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
1141		{`<a:test xmlns:a="abcd">1</a:test>`, nil},
1142	}
1143
1144	var dest string
1145	for _, tc := range testCases {
1146		if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
1147			if got == nil {
1148				t.Errorf("%s: Unexpected success, want %v", tc.s, want)
1149			} else if want == nil {
1150				t.Errorf("%s: Unexpected error, got %v", tc.s, got)
1151			} else if got.Error() != want.Error() {
1152				t.Errorf("%s: got %v, want %v", tc.s, got, want)
1153			}
1154		}
1155	}
1156}
1157
1158func TestIssue20685(t *testing.T) {
1159	testCases := []struct {
1160		s  string
1161		ok bool
1162	}{
1163		{`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
1164		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
1165		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
1166		{`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
1167		{`<x:book xmlns:x="abcd">one</y:book>`, false},
1168		{`<x:book>one</y:book>`, false},
1169		{`<xbook>one</ybook>`, false},
1170	}
1171	for _, tc := range testCases {
1172		d := NewDecoder(strings.NewReader(tc.s))
1173		var err error
1174		for {
1175			_, err = d.Token()
1176			if err != nil {
1177				if err == io.EOF {
1178					err = nil
1179				}
1180				break
1181			}
1182		}
1183		if err != nil && tc.ok {
1184			t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
1185			continue
1186		}
1187		if err == nil && !tc.ok {
1188			t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
1189		}
1190	}
1191}
1192
1193func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
1194	return func(src TokenReader) TokenReader {
1195		return mapper{
1196			t: src,
1197			f: mapping,
1198		}
1199	}
1200}
1201
1202type mapper struct {
1203	t TokenReader
1204	f func(Token) Token
1205}
1206
1207func (m mapper) Token() (Token, error) {
1208	tok, err := m.t.Token()
1209	if err != nil {
1210		return nil, err
1211	}
1212	return m.f(tok), nil
1213}
1214
1215func TestNewTokenDecoderIdempotent(t *testing.T) {
1216	d := NewDecoder(strings.NewReader(`<br>`))
1217	d2 := NewTokenDecoder(d)
1218	if d != d2 {
1219		t.Error("NewTokenDecoder did not detect underlying Decoder")
1220	}
1221}
1222
1223func TestWrapDecoder(t *testing.T) {
1224	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
1225	m := tokenMap(func(t Token) Token {
1226		switch tok := t.(type) {
1227		case StartElement:
1228			if tok.Name.Local == "quote" {
1229				tok.Name.Local = "blocking"
1230				return tok
1231			}
1232		case EndElement:
1233			if tok.Name.Local == "quote" {
1234				tok.Name.Local = "blocking"
1235				return tok
1236			}
1237		}
1238		return t
1239	})
1240
1241	d = NewTokenDecoder(m(d))
1242
1243	o := struct {
1244		XMLName  Name   `xml:"blocking"`
1245		Chardata string `xml:",chardata"`
1246	}{}
1247
1248	if err := d.Decode(&o); err != nil {
1249		t.Fatal("Got unexpected error while decoding:", err)
1250	}
1251
1252	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
1253		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
1254	}
1255}
1256
1257type tokReader struct{}
1258
1259func (tokReader) Token() (Token, error) {
1260	return StartElement{}, nil
1261}
1262
1263type Failure struct{}
1264
1265func (Failure) UnmarshalXML(*Decoder, StartElement) error {
1266	return nil
1267}
1268
1269func TestTokenUnmarshaler(t *testing.T) {
1270	defer func() {
1271		if r := recover(); r != nil {
1272			t.Error("Unexpected panic using custom token unmarshaler")
1273		}
1274	}()
1275
1276	d := NewTokenDecoder(tokReader{})
1277	d.Decode(&Failure{})
1278}
1279
1280func testRoundTrip(t *testing.T, input string) {
1281	d := NewDecoder(strings.NewReader(input))
1282	var tokens []Token
1283	var buf bytes.Buffer
1284	e := NewEncoder(&buf)
1285	for {
1286		tok, err := d.Token()
1287		if err == io.EOF {
1288			break
1289		}
1290		if err != nil {
1291			t.Fatalf("invalid input: %v", err)
1292		}
1293		if err := e.EncodeToken(tok); err != nil {
1294			t.Fatalf("failed to re-encode input: %v", err)
1295		}
1296		tokens = append(tokens, CopyToken(tok))
1297	}
1298	if err := e.Flush(); err != nil {
1299		t.Fatal(err)
1300	}
1301
1302	d = NewDecoder(&buf)
1303	for {
1304		tok, err := d.Token()
1305		if err == io.EOF {
1306			break
1307		}
1308		if err != nil {
1309			t.Fatalf("failed to decode output: %v", err)
1310		}
1311		if len(tokens) == 0 {
1312			t.Fatalf("unexpected token: %#v", tok)
1313		}
1314		a, b := tokens[0], tok
1315		if !reflect.DeepEqual(a, b) {
1316			t.Fatalf("token mismatch: %#v vs %#v", a, b)
1317		}
1318		tokens = tokens[1:]
1319	}
1320	if len(tokens) > 0 {
1321		t.Fatalf("lost tokens: %#v", tokens)
1322	}
1323}
1324
1325func TestRoundTrip(t *testing.T) {
1326	tests := map[string]string{
1327		"trailing colon":         `<foo abc:="x"></foo>`,
1328		"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
1329	}
1330	for name, input := range tests {
1331		t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
1332	}
1333}
1334
1335func TestParseErrors(t *testing.T) {
1336	withDefaultHeader := func(s string) string {
1337		return `<?xml version="1.0" encoding="UTF-8"?>` + s
1338	}
1339	tests := []struct {
1340		src string
1341		err string
1342	}{
1343		{withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
1344		{withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
1345		{withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
1346		{withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
1347		{withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
1348		{withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
1349		{withDefaultHeader(`<zzz:foo xmlns:zzz="http://example.com"><bar>baz</bar></foo>`),
1350			`element <foo> in space zzz closed by </foo> in space ""`},
1351		{withDefaultHeader("\xf1"), `invalid UTF-8`},
1352
1353		// Header-related errors.
1354		{`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
1355
1356		// Cases below are for "no errors".
1357		{withDefaultHeader(`<?ok?>`), ``},
1358		{withDefaultHeader(`<?ok version="ok"?>`), ``},
1359	}
1360
1361	for _, test := range tests {
1362		d := NewDecoder(strings.NewReader(test.src))
1363		var err error
1364		for {
1365			_, err = d.Token()
1366			if err != nil {
1367				break
1368			}
1369		}
1370		if test.err == "" {
1371			if err != io.EOF {
1372				t.Errorf("parse %s: have %q error, expected none", test.src, err)
1373			}
1374			continue
1375		}
1376		// Inv: err != nil
1377		if err == io.EOF {
1378			t.Errorf("parse %s: unexpected EOF", test.src)
1379			continue
1380		}
1381		if !strings.Contains(err.Error(), test.err) {
1382			t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err)
1383			continue
1384		}
1385	}
1386}
1387
1388const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
1389<br>
1390<br/><br/>
1391<br><br>
1392<br></br>
1393<BR>
1394<BR/><BR/>
1395<Br></Br>
1396<BR><span id="test">abc</span><br/><br/>`
1397
1398func BenchmarkHTMLAutoClose(b *testing.B) {
1399	b.RunParallel(func(p *testing.PB) {
1400		for p.Next() {
1401			d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1402			d.Strict = false
1403			d.AutoClose = HTMLAutoClose
1404			d.Entity = HTMLEntity
1405			for {
1406				_, err := d.Token()
1407				if err != nil {
1408					if err == io.EOF {
1409						break
1410					}
1411					b.Fatalf("unexpected error: %v", err)
1412				}
1413			}
1414		}
1415	})
1416}
1417
1418func TestHTMLAutoClose(t *testing.T) {
1419	wantTokens := []Token{
1420		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
1421		CharData("\n"),
1422		StartElement{Name{"", "br"}, []Attr{}},
1423		EndElement{Name{"", "br"}},
1424		CharData("\n"),
1425		StartElement{Name{"", "br"}, []Attr{}},
1426		EndElement{Name{"", "br"}},
1427		StartElement{Name{"", "br"}, []Attr{}},
1428		EndElement{Name{"", "br"}},
1429		CharData("\n"),
1430		StartElement{Name{"", "br"}, []Attr{}},
1431		EndElement{Name{"", "br"}},
1432		StartElement{Name{"", "br"}, []Attr{}},
1433		EndElement{Name{"", "br"}},
1434		CharData("\n"),
1435		StartElement{Name{"", "br"}, []Attr{}},
1436		EndElement{Name{"", "br"}},
1437		CharData("\n"),
1438		StartElement{Name{"", "BR"}, []Attr{}},
1439		EndElement{Name{"", "BR"}},
1440		CharData("\n"),
1441		StartElement{Name{"", "BR"}, []Attr{}},
1442		EndElement{Name{"", "BR"}},
1443		StartElement{Name{"", "BR"}, []Attr{}},
1444		EndElement{Name{"", "BR"}},
1445		CharData("\n"),
1446		StartElement{Name{"", "Br"}, []Attr{}},
1447		EndElement{Name{"", "Br"}},
1448		CharData("\n"),
1449		StartElement{Name{"", "BR"}, []Attr{}},
1450		EndElement{Name{"", "BR"}},
1451		StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
1452		CharData("abc"),
1453		EndElement{Name{"", "span"}},
1454		StartElement{Name{"", "br"}, []Attr{}},
1455		EndElement{Name{"", "br"}},
1456		StartElement{Name{"", "br"}, []Attr{}},
1457		EndElement{Name{"", "br"}},
1458	}
1459
1460	d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1461	d.Strict = false
1462	d.AutoClose = HTMLAutoClose
1463	d.Entity = HTMLEntity
1464	var haveTokens []Token
1465	for {
1466		tok, err := d.Token()
1467		if err != nil {
1468			if err == io.EOF {
1469				break
1470			}
1471			t.Fatalf("unexpected error: %v", err)
1472		}
1473		haveTokens = append(haveTokens, CopyToken(tok))
1474	}
1475	if len(haveTokens) != len(wantTokens) {
1476		t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
1477	}
1478	for i, want := range wantTokens {
1479		if i >= len(haveTokens) {
1480			t.Errorf("token[%d] expected %#v, have no token", i, want)
1481		} else {
1482			have := haveTokens[i]
1483			if !reflect.DeepEqual(have, want) {
1484				t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
1485			}
1486		}
1487	}
1488}
1489