xref: /aosp_15_r20/external/licenseclassifier/v2/classifier_test.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1*46c4c49dSIbrahim Kanouche// Copyright 2020 Google Inc.
2*46c4c49dSIbrahim Kanouche//
3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License");
4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License.
5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at
6*46c4c49dSIbrahim Kanouche//
7*46c4c49dSIbrahim Kanouche//     http://www.apache.org/licenses/LICENSE-2.0
8*46c4c49dSIbrahim Kanouche//
9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software
10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS,
11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and
13*46c4c49dSIbrahim Kanouche// limitations under the License.
14*46c4c49dSIbrahim Kanouche
15*46c4c49dSIbrahim Kanouchepackage classifier
16*46c4c49dSIbrahim Kanouche
17*46c4c49dSIbrahim Kanoucheimport (
18*46c4c49dSIbrahim Kanouche	"bytes"
19*46c4c49dSIbrahim Kanouche	"errors"
20*46c4c49dSIbrahim Kanouche	"io/ioutil"
21*46c4c49dSIbrahim Kanouche	"log"
22*46c4c49dSIbrahim Kanouche	"os"
23*46c4c49dSIbrahim Kanouche	"path"
24*46c4c49dSIbrahim Kanouche	"path/filepath"
25*46c4c49dSIbrahim Kanouche	"sort"
26*46c4c49dSIbrahim Kanouche	"strings"
27*46c4c49dSIbrahim Kanouche	"testing"
28*46c4c49dSIbrahim Kanouche	"testing/iotest"
29*46c4c49dSIbrahim Kanouche
30*46c4c49dSIbrahim Kanouche	"github.com/davecgh/go-spew/spew"
31*46c4c49dSIbrahim Kanouche	"github.com/google/go-cmp/cmp"
32*46c4c49dSIbrahim Kanouche)
33*46c4c49dSIbrahim Kanouche
34*46c4c49dSIbrahim Kanouchetype scenario struct {
35*46c4c49dSIbrahim Kanouche	expected []string
36*46c4c49dSIbrahim Kanouche	data     []byte
37*46c4c49dSIbrahim Kanouche}
38*46c4c49dSIbrahim Kanouche
39*46c4c49dSIbrahim Kanouchevar defaultThreshold = .8
40*46c4c49dSIbrahim Kanouchevar baseLicenses = "assets"
41*46c4c49dSIbrahim Kanouche
42*46c4c49dSIbrahim Kanouchefunc classifier() (*Classifier, error) {
43*46c4c49dSIbrahim Kanouche	c := NewClassifier(defaultThreshold)
44*46c4c49dSIbrahim Kanouche	return c, c.LoadLicenses(path.Join(baseLicenses))
45*46c4c49dSIbrahim Kanouche}
46*46c4c49dSIbrahim Kanouche
47*46c4c49dSIbrahim Kanouchefunc getScenarioFilenames() ([]string, error) {
48*46c4c49dSIbrahim Kanouche	scenarios := "scenarios"
49*46c4c49dSIbrahim Kanouche	var files []string
50*46c4c49dSIbrahim Kanouche	err := filepath.Walk(path.Join(scenarios), func(path string, info os.FileInfo, err error) error {
51*46c4c49dSIbrahim Kanouche		if err != nil {
52*46c4c49dSIbrahim Kanouche			return err
53*46c4c49dSIbrahim Kanouche		}
54*46c4c49dSIbrahim Kanouche		if strings.HasSuffix(path, "md") || info.IsDir() {
55*46c4c49dSIbrahim Kanouche			return nil
56*46c4c49dSIbrahim Kanouche		}
57*46c4c49dSIbrahim Kanouche		files = append(files, path)
58*46c4c49dSIbrahim Kanouche		return nil
59*46c4c49dSIbrahim Kanouche	})
60*46c4c49dSIbrahim Kanouche
61*46c4c49dSIbrahim Kanouche	return files, err
62*46c4c49dSIbrahim Kanouche}
63*46c4c49dSIbrahim Kanouche
64*46c4c49dSIbrahim Kanouchefunc TestMatchScenarios(t *testing.T) {
65*46c4c49dSIbrahim Kanouche	c, err := classifier()
66*46c4c49dSIbrahim Kanouche	if err != nil {
67*46c4c49dSIbrahim Kanouche		t.Fatalf("couldn't instantiate standard test classifier: %v", err)
68*46c4c49dSIbrahim Kanouche	}
69*46c4c49dSIbrahim Kanouche
70*46c4c49dSIbrahim Kanouche	files, err := getScenarioFilenames()
71*46c4c49dSIbrahim Kanouche	if err != nil {
72*46c4c49dSIbrahim Kanouche		t.Fatalf("encountered error walking scenarios directory: %v", err)
73*46c4c49dSIbrahim Kanouche	}
74*46c4c49dSIbrahim Kanouche
75*46c4c49dSIbrahim Kanouche	for _, f := range files {
76*46c4c49dSIbrahim Kanouche		s := readScenario(f)
77*46c4c49dSIbrahim Kanouche
78*46c4c49dSIbrahim Kanouche		m := c.Match(s.data)
79*46c4c49dSIbrahim Kanouche		checkMatches(t, m.Matches, f, s.expected)
80*46c4c49dSIbrahim Kanouche	}
81*46c4c49dSIbrahim Kanouche}
82*46c4c49dSIbrahim Kanouche
83*46c4c49dSIbrahim Kanouchefunc readScenario(path string) *scenario {
84*46c4c49dSIbrahim Kanouche	var s scenario
85*46c4c49dSIbrahim Kanouche	b, err := ioutil.ReadFile(path)
86*46c4c49dSIbrahim Kanouche	if err != nil {
87*46c4c49dSIbrahim Kanouche		log.Fatalf("Couldn't read scenario %s: %v", path, err)
88*46c4c49dSIbrahim Kanouche	}
89*46c4c49dSIbrahim Kanouche
90*46c4c49dSIbrahim Kanouche	// A scenario consists of any number of comment lines, which are ignored, then a line of the form
91*46c4c49dSIbrahim Kanouche	// EXPECTED: A,B,C
92*46c4c49dSIbrahim Kanouche	//
93*46c4c49dSIbrahim Kanouche	// or EXPECTED:<EOL>
94*46c4c49dSIbrahim Kanouche	// where A,B,C is a comma-separated list of expected licenses.
95*46c4c49dSIbrahim Kanouche	lines := strings.SplitN(string(b), "EXPECTED:", 2)
96*46c4c49dSIbrahim Kanouche	// The first part of lines is description, which we ignore. We then split on a linefeed to get the
97*46c4c49dSIbrahim Kanouche	// list of licenses and the rest of the data content.
98*46c4c49dSIbrahim Kanouche	lines = strings.SplitN(lines[1], "\n", 2)
99*46c4c49dSIbrahim Kanouche	if lines[0] != "" {
100*46c4c49dSIbrahim Kanouche		s.expected = strings.Split(lines[0], ",")
101*46c4c49dSIbrahim Kanouche	} else {
102*46c4c49dSIbrahim Kanouche		s.expected = []string{}
103*46c4c49dSIbrahim Kanouche	}
104*46c4c49dSIbrahim Kanouche	s.data = []byte(lines[1])
105*46c4c49dSIbrahim Kanouche	return &s
106*46c4c49dSIbrahim Kanouche}
107*46c4c49dSIbrahim Kanouche
108*46c4c49dSIbrahim Kanouchefunc TestContainsAndOverlaps(t *testing.T) {
109*46c4c49dSIbrahim Kanouche	tests := []struct {
110*46c4c49dSIbrahim Kanouche		name     string
111*46c4c49dSIbrahim Kanouche		a, b     *Match
112*46c4c49dSIbrahim Kanouche		contains bool
113*46c4c49dSIbrahim Kanouche		overlaps bool
114*46c4c49dSIbrahim Kanouche	}{
115*46c4c49dSIbrahim Kanouche		{
116*46c4c49dSIbrahim Kanouche			name: "no intersection",
117*46c4c49dSIbrahim Kanouche			a: &Match{
118*46c4c49dSIbrahim Kanouche				StartLine: 1,
119*46c4c49dSIbrahim Kanouche				EndLine:   3,
120*46c4c49dSIbrahim Kanouche			},
121*46c4c49dSIbrahim Kanouche			b: &Match{
122*46c4c49dSIbrahim Kanouche				StartLine: 4,
123*46c4c49dSIbrahim Kanouche				EndLine:   5,
124*46c4c49dSIbrahim Kanouche			},
125*46c4c49dSIbrahim Kanouche			contains: false,
126*46c4c49dSIbrahim Kanouche			overlaps: false,
127*46c4c49dSIbrahim Kanouche		},
128*46c4c49dSIbrahim Kanouche		{
129*46c4c49dSIbrahim Kanouche			name: "overlap at end",
130*46c4c49dSIbrahim Kanouche			a: &Match{
131*46c4c49dSIbrahim Kanouche				StartLine: 4,
132*46c4c49dSIbrahim Kanouche				EndLine:   10,
133*46c4c49dSIbrahim Kanouche			},
134*46c4c49dSIbrahim Kanouche			b: &Match{
135*46c4c49dSIbrahim Kanouche				StartLine: 1,
136*46c4c49dSIbrahim Kanouche				EndLine:   5,
137*46c4c49dSIbrahim Kanouche			},
138*46c4c49dSIbrahim Kanouche			contains: false,
139*46c4c49dSIbrahim Kanouche			overlaps: true,
140*46c4c49dSIbrahim Kanouche		},
141*46c4c49dSIbrahim Kanouche		{
142*46c4c49dSIbrahim Kanouche			name: "overlap at end",
143*46c4c49dSIbrahim Kanouche			a: &Match{
144*46c4c49dSIbrahim Kanouche				StartLine: 1,
145*46c4c49dSIbrahim Kanouche				EndLine:   10,
146*46c4c49dSIbrahim Kanouche			},
147*46c4c49dSIbrahim Kanouche			b: &Match{
148*46c4c49dSIbrahim Kanouche				StartLine: 4,
149*46c4c49dSIbrahim Kanouche				EndLine:   12,
150*46c4c49dSIbrahim Kanouche			},
151*46c4c49dSIbrahim Kanouche			contains: false,
152*46c4c49dSIbrahim Kanouche			overlaps: true,
153*46c4c49dSIbrahim Kanouche		},
154*46c4c49dSIbrahim Kanouche		{
155*46c4c49dSIbrahim Kanouche			name: "contains",
156*46c4c49dSIbrahim Kanouche			a: &Match{
157*46c4c49dSIbrahim Kanouche				StartLine: 1,
158*46c4c49dSIbrahim Kanouche				EndLine:   10,
159*46c4c49dSIbrahim Kanouche			},
160*46c4c49dSIbrahim Kanouche			b: &Match{
161*46c4c49dSIbrahim Kanouche				StartLine: 4,
162*46c4c49dSIbrahim Kanouche				EndLine:   7,
163*46c4c49dSIbrahim Kanouche			},
164*46c4c49dSIbrahim Kanouche			contains: true,
165*46c4c49dSIbrahim Kanouche			overlaps: false,
166*46c4c49dSIbrahim Kanouche		},
167*46c4c49dSIbrahim Kanouche	}
168*46c4c49dSIbrahim Kanouche
169*46c4c49dSIbrahim Kanouche	for _, test := range tests {
170*46c4c49dSIbrahim Kanouche		t.Run(test.name, func(t *testing.T) {
171*46c4c49dSIbrahim Kanouche			if got := contains(test.a, test.b); got != test.contains {
172*46c4c49dSIbrahim Kanouche				t.Errorf("contains: got %v want %v", got, test.contains)
173*46c4c49dSIbrahim Kanouche			}
174*46c4c49dSIbrahim Kanouche			if got := overlaps(test.a, test.b); got != test.overlaps {
175*46c4c49dSIbrahim Kanouche				t.Errorf("overlaps: got %v want %v", got, test.overlaps)
176*46c4c49dSIbrahim Kanouche			}
177*46c4c49dSIbrahim Kanouche		})
178*46c4c49dSIbrahim Kanouche	}
179*46c4c49dSIbrahim Kanouche}
180*46c4c49dSIbrahim Kanouche
181*46c4c49dSIbrahim Kanouchefunc TestLicName(t *testing.T) {
182*46c4c49dSIbrahim Kanouche	tests := []struct {
183*46c4c49dSIbrahim Kanouche		name     string
184*46c4c49dSIbrahim Kanouche		expected string
185*46c4c49dSIbrahim Kanouche	}{
186*46c4c49dSIbrahim Kanouche		{
187*46c4c49dSIbrahim Kanouche			// The filename for a license
188*46c4c49dSIbrahim Kanouche			name:     "GPL-2.0.txt",
189*46c4c49dSIbrahim Kanouche			expected: "GPL-2.0",
190*46c4c49dSIbrahim Kanouche		},
191*46c4c49dSIbrahim Kanouche		{
192*46c4c49dSIbrahim Kanouche			// The filename for a header reference to a license
193*46c4c49dSIbrahim Kanouche			name:     "GPL-2.0.header.txt",
194*46c4c49dSIbrahim Kanouche			expected: "GPL-2.0",
195*46c4c49dSIbrahim Kanouche		},
196*46c4c49dSIbrahim Kanouche		{
197*46c4c49dSIbrahim Kanouche			// The filename for a variant header reference to a license
198*46c4c49dSIbrahim Kanouche			name:     "GPL-2.0.header_a.txt",
199*46c4c49dSIbrahim Kanouche			expected: "GPL-2.0",
200*46c4c49dSIbrahim Kanouche		},
201*46c4c49dSIbrahim Kanouche		{
202*46c4c49dSIbrahim Kanouche			// The filename for a variant license body
203*46c4c49dSIbrahim Kanouche			name:     "Apache-2.0_no_toc.txt",
204*46c4c49dSIbrahim Kanouche			expected: "Apache-2.0",
205*46c4c49dSIbrahim Kanouche		},
206*46c4c49dSIbrahim Kanouche	}
207*46c4c49dSIbrahim Kanouche
208*46c4c49dSIbrahim Kanouche	for _, test := range tests {
209*46c4c49dSIbrahim Kanouche		t.Run(test.name, func(t *testing.T) {
210*46c4c49dSIbrahim Kanouche
211*46c4c49dSIbrahim Kanouche		})
212*46c4c49dSIbrahim Kanouche	}
213*46c4c49dSIbrahim Kanouche}
214*46c4c49dSIbrahim Kanouche
215*46c4c49dSIbrahim Kanouchefunc TestMatchFrom(t *testing.T) {
216*46c4c49dSIbrahim Kanouche	tr := iotest.TimeoutReader(strings.NewReader("some data"))
217*46c4c49dSIbrahim Kanouche	c, err := classifier()
218*46c4c49dSIbrahim Kanouche	if err != nil {
219*46c4c49dSIbrahim Kanouche		t.Fatalf("couldn't instantiate standard Google classifier: %v", err)
220*46c4c49dSIbrahim Kanouche	}
221*46c4c49dSIbrahim Kanouche
222*46c4c49dSIbrahim Kanouche	_, err = c.MatchFrom(tr)
223*46c4c49dSIbrahim Kanouche	if !errors.Is(err, iotest.ErrTimeout) {
224*46c4c49dSIbrahim Kanouche		t.Errorf("got %v want %v", err, iotest.ErrTimeout)
225*46c4c49dSIbrahim Kanouche	}
226*46c4c49dSIbrahim Kanouche
227*46c4c49dSIbrahim Kanouche	files, err := getScenarioFilenames()
228*46c4c49dSIbrahim Kanouche
229*46c4c49dSIbrahim Kanouche	if err != nil {
230*46c4c49dSIbrahim Kanouche		t.Fatalf("encountered error walking scenarios directory: %v", err)
231*46c4c49dSIbrahim Kanouche	}
232*46c4c49dSIbrahim Kanouche
233*46c4c49dSIbrahim Kanouche	for _, f := range files {
234*46c4c49dSIbrahim Kanouche		s := readScenario(f)
235*46c4c49dSIbrahim Kanouche		r := bytes.NewReader(s.data)
236*46c4c49dSIbrahim Kanouche		m, err := c.MatchFrom(r)
237*46c4c49dSIbrahim Kanouche		if err != nil {
238*46c4c49dSIbrahim Kanouche			t.Errorf("unexpected error: %v", err)
239*46c4c49dSIbrahim Kanouche		}
240*46c4c49dSIbrahim Kanouche		checkMatches(t, m.Matches, f, s.expected)
241*46c4c49dSIbrahim Kanouche	}
242*46c4c49dSIbrahim Kanouche}
243*46c4c49dSIbrahim Kanouche
244*46c4c49dSIbrahim Kanouche// checkMatches diffs the resulting matches against the expected content and
245*46c4c49dSIbrahim Kanouche// sets test results.
246*46c4c49dSIbrahim Kanouchefunc checkMatches(t *testing.T, m Matches, f string, e []string) {
247*46c4c49dSIbrahim Kanouche	found := make(map[string]bool)
248*46c4c49dSIbrahim Kanouche	// Uniquify the licenses found
249*46c4c49dSIbrahim Kanouche	for _, l := range m {
250*46c4c49dSIbrahim Kanouche		found[l.Name] = true
251*46c4c49dSIbrahim Kanouche	}
252*46c4c49dSIbrahim Kanouche
253*46c4c49dSIbrahim Kanouche	var names []string
254*46c4c49dSIbrahim Kanouche	for l := range found {
255*46c4c49dSIbrahim Kanouche		names = append(names, l)
256*46c4c49dSIbrahim Kanouche	}
257*46c4c49dSIbrahim Kanouche	sort.Strings(names)
258*46c4c49dSIbrahim Kanouche
259*46c4c49dSIbrahim Kanouche	if len(names) != len(e) {
260*46c4c49dSIbrahim Kanouche		t.Errorf("Match(%q) number matches: %v, want %v: %v", f, len(names), len(e), spew.Sdump(m))
261*46c4c49dSIbrahim Kanouche		return
262*46c4c49dSIbrahim Kanouche	}
263*46c4c49dSIbrahim Kanouche
264*46c4c49dSIbrahim Kanouche	for i := 0; i < len(names); i++ {
265*46c4c49dSIbrahim Kanouche		w := strings.TrimSpace(e[i])
266*46c4c49dSIbrahim Kanouche		if got, want := names[i], w; got != want {
267*46c4c49dSIbrahim Kanouche			t.Errorf("Match(%q) = %q, want %q", f, got, want)
268*46c4c49dSIbrahim Kanouche		}
269*46c4c49dSIbrahim Kanouche	}
270*46c4c49dSIbrahim Kanouche}
271*46c4c49dSIbrahim Kanouche
272*46c4c49dSIbrahim Kanouchefunc TestLicenseName(t *testing.T) {
273*46c4c49dSIbrahim Kanouche	tests := []struct {
274*46c4c49dSIbrahim Kanouche		input string
275*46c4c49dSIbrahim Kanouche		want  string
276*46c4c49dSIbrahim Kanouche	}{
277*46c4c49dSIbrahim Kanouche		{
278*46c4c49dSIbrahim Kanouche			input: "License/example/file.txt",
279*46c4c49dSIbrahim Kanouche			want:  "example",
280*46c4c49dSIbrahim Kanouche		},
281*46c4c49dSIbrahim Kanouche		{
282*46c4c49dSIbrahim Kanouche			input: "License/example/a.txt",
283*46c4c49dSIbrahim Kanouche			want:  "example",
284*46c4c49dSIbrahim Kanouche		},
285*46c4c49dSIbrahim Kanouche		{
286*46c4c49dSIbrahim Kanouche			input: "Header/example/header.txt",
287*46c4c49dSIbrahim Kanouche			want:  "example",
288*46c4c49dSIbrahim Kanouche		},
289*46c4c49dSIbrahim Kanouche		{
290*46c4c49dSIbrahim Kanouche			input: "Header/example/a.txt",
291*46c4c49dSIbrahim Kanouche			want:  "example",
292*46c4c49dSIbrahim Kanouche		},
293*46c4c49dSIbrahim Kanouche	}
294*46c4c49dSIbrahim Kanouche
295*46c4c49dSIbrahim Kanouche	for _, tt := range tests {
296*46c4c49dSIbrahim Kanouche		t.Run(tt.input, func(t *testing.T) {
297*46c4c49dSIbrahim Kanouche			got := LicenseName(tt.input)
298*46c4c49dSIbrahim Kanouche			if diff := cmp.Diff(tt.want, got); diff != "" {
299*46c4c49dSIbrahim Kanouche				t.Errorf("Unexpected result; diff %v", diff)
300*46c4c49dSIbrahim Kanouche			}
301*46c4c49dSIbrahim Kanouche		})
302*46c4c49dSIbrahim Kanouche	}
303*46c4c49dSIbrahim Kanouche}
304*46c4c49dSIbrahim Kanouche
305*46c4c49dSIbrahim Kanouchefunc TestNormalize(t *testing.T) {
306*46c4c49dSIbrahim Kanouche	tests := []struct {
307*46c4c49dSIbrahim Kanouche		input string
308*46c4c49dSIbrahim Kanouche		want  string
309*46c4c49dSIbrahim Kanouche	}{
310*46c4c49dSIbrahim Kanouche		{
311*46c4c49dSIbrahim Kanouche			input: "Words  With   Extra Spaces are flattened out, preserving case",
312*46c4c49dSIbrahim Kanouche			want:  "Words With Extra Spaces are flattened out preserving case",
313*46c4c49dSIbrahim Kanouche		},
314*46c4c49dSIbrahim Kanouche		{
315*46c4c49dSIbrahim Kanouche			input: "",
316*46c4c49dSIbrahim Kanouche			want:  "",
317*46c4c49dSIbrahim Kanouche		},
318*46c4c49dSIbrahim Kanouche		{
319*46c4c49dSIbrahim Kanouche			input: "   License  ",
320*46c4c49dSIbrahim Kanouche			want:  "License",
321*46c4c49dSIbrahim Kanouche		},
322*46c4c49dSIbrahim Kanouche		{
323*46c4c49dSIbrahim Kanouche			// This tests that the line breaks in the input text are properly
324*46c4c49dSIbrahim Kanouche			// preserved, which is important for visual diffing.
325*46c4c49dSIbrahim Kanouche			input: `Preserving
326*46c4c49dSIbrahim Kanoucheline
327*46c4c49dSIbrahim Kanouche
328*46c4c49dSIbrahim Kanouchebreaks is important`,
329*46c4c49dSIbrahim Kanouche			want: `Preserving
330*46c4c49dSIbrahim Kanoucheline
331*46c4c49dSIbrahim Kanouche
332*46c4c49dSIbrahim Kanouchebreaks is important`,
333*46c4c49dSIbrahim Kanouche		},
334*46c4c49dSIbrahim Kanouche		{
335*46c4c49dSIbrahim Kanouche			// This tests that soft EOL functionality doesn't affect normalized output
336*46c4c49dSIbrahim Kanouche			input: `This is a sentence looking construct. This is another sentence. What happens?`,
337*46c4c49dSIbrahim Kanouche			want:  `This is a sentence looking construct This is another sentence What happens`,
338*46c4c49dSIbrahim Kanouche		},
339*46c4c49dSIbrahim Kanouche		{
340*46c4c49dSIbrahim Kanouche			input: `header
341*46c4c49dSIbrahim Kanouche........................ This is oddly formatted`,
342*46c4c49dSIbrahim Kanouche			want: `header
343*46c4c49dSIbrahim KanoucheThis is oddly formatted`,
344*46c4c49dSIbrahim Kanouche		},
345*46c4c49dSIbrahim Kanouche		{
346*46c4c49dSIbrahim Kanouche			input: `baseball basket-
347*46c4c49dSIbrahim Kanoucheball football`,
348*46c4c49dSIbrahim Kanouche			want: "baseball basketball\nfootball",
349*46c4c49dSIbrahim Kanouche		},
350*46c4c49dSIbrahim Kanouche	}
351*46c4c49dSIbrahim Kanouche	for _, tt := range tests {
352*46c4c49dSIbrahim Kanouche		t.Run(tt.input, func(t *testing.T) {
353*46c4c49dSIbrahim Kanouche			c, err := classifier()
354*46c4c49dSIbrahim Kanouche			if err != nil {
355*46c4c49dSIbrahim Kanouche				t.Fatalf("couldn't instantiate standard Google classifier: %v", err)
356*46c4c49dSIbrahim Kanouche			}
357*46c4c49dSIbrahim Kanouche
358*46c4c49dSIbrahim Kanouche			got := c.Normalize([]byte(tt.input))
359*46c4c49dSIbrahim Kanouche			if diff := cmp.Diff(tt.want, string(got)); diff != "" {
360*46c4c49dSIbrahim Kanouche				t.Errorf("Unexpected result; diff %v", diff)
361*46c4c49dSIbrahim Kanouche			}
362*46c4c49dSIbrahim Kanouche		})
363*46c4c49dSIbrahim Kanouche	}
364*46c4c49dSIbrahim Kanouche
365*46c4c49dSIbrahim Kanouche}
366