1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package utf16_test
6
7import (
8	"internal/testenv"
9	"reflect"
10	"testing"
11	"unicode"
12	. "unicode/utf16"
13)
14
15// Validate the constants redefined from unicode.
16func TestConstants(t *testing.T) {
17	if MaxRune != unicode.MaxRune {
18		t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
19	}
20	if ReplacementChar != unicode.ReplacementChar {
21		t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar)
22	}
23}
24
25func TestRuneLen(t *testing.T) {
26	for _, tt := range []struct {
27		r      rune
28		length int
29	}{
30		{0, 1},
31		{Surr1 - 1, 1},
32		{Surr3, 1},
33		{SurrSelf - 1, 1},
34		{SurrSelf, 2},
35		{MaxRune, 2},
36		{MaxRune + 1, -1},
37		{-1, -1},
38	} {
39		if length := RuneLen(tt.r); length != tt.length {
40			t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, length, tt.length)
41		}
42	}
43}
44
45type encodeTest struct {
46	in  []rune
47	out []uint16
48}
49
50var encodeTests = []encodeTest{
51	{[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
52	{[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
53		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
54	{[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
55		[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
56}
57
58func TestEncode(t *testing.T) {
59	for _, tt := range encodeTests {
60		out := Encode(tt.in)
61		if !reflect.DeepEqual(out, tt.out) {
62			t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out)
63		}
64	}
65}
66
67func TestAppendRune(t *testing.T) {
68	for _, tt := range encodeTests {
69		var out []uint16
70		for _, u := range tt.in {
71			out = AppendRune(out, u)
72		}
73		if !reflect.DeepEqual(out, tt.out) {
74			t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out)
75		}
76	}
77}
78
79func TestEncodeRune(t *testing.T) {
80	for i, tt := range encodeTests {
81		j := 0
82		for _, r := range tt.in {
83			r1, r2 := EncodeRune(r)
84			if r < 0x10000 || r > unicode.MaxRune {
85				if j >= len(tt.out) {
86					t.Errorf("#%d: ran out of tt.out", i)
87					break
88				}
89				if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
90					t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
91				}
92				j++
93			} else {
94				if j+1 >= len(tt.out) {
95					t.Errorf("#%d: ran out of tt.out", i)
96					break
97				}
98				if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) {
99					t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
100				}
101				j += 2
102				dec := DecodeRune(r1, r2)
103				if dec != r {
104					t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
105				}
106			}
107		}
108		if j != len(tt.out) {
109			t.Errorf("#%d: EncodeRune didn't generate enough output", i)
110		}
111	}
112}
113
114type decodeTest struct {
115	in  []uint16
116	out []rune
117}
118
119var decodeTests = []decodeTest{
120	{[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}},
121	{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
122		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
123	{[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}},
124	{[]uint16{0xdfff}, []rune{0xfffd}},
125}
126
127func TestAllocationsDecode(t *testing.T) {
128	testenv.SkipIfOptimizationOff(t)
129
130	for _, tt := range decodeTests {
131		allocs := testing.AllocsPerRun(10, func() {
132			out := Decode(tt.in)
133			if out == nil {
134				t.Errorf("Decode(%x) = nil", tt.in)
135			}
136		})
137		if allocs > 0 {
138			t.Errorf("Decode allocated %v times", allocs)
139		}
140	}
141}
142
143func TestDecode(t *testing.T) {
144	for _, tt := range decodeTests {
145		out := Decode(tt.in)
146		if !reflect.DeepEqual(out, tt.out) {
147			t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out)
148		}
149	}
150}
151
152var decodeRuneTests = []struct {
153	r1, r2 rune
154	want   rune
155}{
156	{0xd800, 0xdc00, 0x10000},
157	{0xd800, 0xdc01, 0x10001},
158	{0xd808, 0xdf45, 0x12345},
159	{0xdbff, 0xdfff, 0x10ffff},
160	{0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted
161}
162
163func TestDecodeRune(t *testing.T) {
164	for i, tt := range decodeRuneTests {
165		got := DecodeRune(tt.r1, tt.r2)
166		if got != tt.want {
167			t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want)
168		}
169	}
170}
171
172var surrogateTests = []struct {
173	r    rune
174	want bool
175}{
176	// from https://en.wikipedia.org/wiki/UTF-16
177	{'\u007A', false},     // LATIN SMALL LETTER Z
178	{'\u6C34', false},     // CJK UNIFIED IDEOGRAPH-6C34 (water)
179	{'\uFEFF', false},     // Byte Order Mark
180	{'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point)
181	{'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF
182	{'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point)
183
184	{rune(0xd7ff), false}, // surr1-1
185	{rune(0xd800), true},  // surr1
186	{rune(0xdc00), true},  // surr2
187	{rune(0xe000), false}, // surr3
188	{rune(0xdfff), true},  // surr3-1
189}
190
191func TestIsSurrogate(t *testing.T) {
192	for i, tt := range surrogateTests {
193		got := IsSurrogate(tt.r)
194		if got != tt.want {
195			t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want)
196		}
197	}
198}
199
200func BenchmarkDecodeValidASCII(b *testing.B) {
201	// "hello world"
202	data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100}
203	for i := 0; i < b.N; i++ {
204		Decode(data)
205	}
206}
207
208func BenchmarkDecodeValidJapaneseChars(b *testing.B) {
209	// "日本語日本語日本語"
210	data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486}
211	for i := 0; i < b.N; i++ {
212		Decode(data)
213	}
214}
215
216func BenchmarkDecodeRune(b *testing.B) {
217	rs := make([]rune, 10)
218	// U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS
219	for i, u := range []rune{'��', '��', '��', '��', '��'} {
220		rs[2*i], rs[2*i+1] = EncodeRune(u)
221	}
222
223	b.ResetTimer()
224	for i := 0; i < b.N; i++ {
225		for j := 0; j < 5; j++ {
226			DecodeRune(rs[2*j], rs[2*j+1])
227		}
228	}
229}
230
231func BenchmarkEncodeValidASCII(b *testing.B) {
232	data := []rune{'h', 'e', 'l', 'l', 'o'}
233	for i := 0; i < b.N; i++ {
234		Encode(data)
235	}
236}
237
238func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
239	data := []rune{'日', '本', '語'}
240	for i := 0; i < b.N; i++ {
241		Encode(data)
242	}
243}
244
245func BenchmarkAppendRuneValidASCII(b *testing.B) {
246	data := []rune{'h', 'e', 'l', 'l', 'o'}
247	a := make([]uint16, 0, len(data)*2)
248	for i := 0; i < b.N; i++ {
249		for _, u := range data {
250			a = AppendRune(a, u)
251		}
252		a = a[:0]
253	}
254}
255
256func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) {
257	data := []rune{'日', '本', '語'}
258	a := make([]uint16, 0, len(data)*2)
259	for i := 0; i < b.N; i++ {
260		for _, u := range data {
261			a = AppendRune(a, u)
262		}
263		a = a[:0]
264	}
265}
266
267func BenchmarkEncodeRune(b *testing.B) {
268	for i := 0; i < b.N; i++ {
269		for _, u := range []rune{'��', '��', '��', '��', '��'} {
270			EncodeRune(u)
271		}
272	}
273}
274