xref: /aosp_15_r20/external/licenseclassifier/v2/classifier_test.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1// Copyright 2020 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package classifier
16
17import (
18	"bytes"
19	"errors"
20	"io/ioutil"
21	"log"
22	"os"
23	"path"
24	"path/filepath"
25	"sort"
26	"strings"
27	"testing"
28	"testing/iotest"
29
30	"github.com/davecgh/go-spew/spew"
31	"github.com/google/go-cmp/cmp"
32)
33
34type scenario struct {
35	expected []string
36	data     []byte
37}
38
39var defaultThreshold = .8
40var baseLicenses = "assets"
41
42func classifier() (*Classifier, error) {
43	c := NewClassifier(defaultThreshold)
44	return c, c.LoadLicenses(path.Join(baseLicenses))
45}
46
47func getScenarioFilenames() ([]string, error) {
48	scenarios := "scenarios"
49	var files []string
50	err := filepath.Walk(path.Join(scenarios), func(path string, info os.FileInfo, err error) error {
51		if err != nil {
52			return err
53		}
54		if strings.HasSuffix(path, "md") || info.IsDir() {
55			return nil
56		}
57		files = append(files, path)
58		return nil
59	})
60
61	return files, err
62}
63
64func TestMatchScenarios(t *testing.T) {
65	c, err := classifier()
66	if err != nil {
67		t.Fatalf("couldn't instantiate standard test classifier: %v", err)
68	}
69
70	files, err := getScenarioFilenames()
71	if err != nil {
72		t.Fatalf("encountered error walking scenarios directory: %v", err)
73	}
74
75	for _, f := range files {
76		s := readScenario(f)
77
78		m := c.Match(s.data)
79		checkMatches(t, m.Matches, f, s.expected)
80	}
81}
82
83func readScenario(path string) *scenario {
84	var s scenario
85	b, err := ioutil.ReadFile(path)
86	if err != nil {
87		log.Fatalf("Couldn't read scenario %s: %v", path, err)
88	}
89
90	// A scenario consists of any number of comment lines, which are ignored, then a line of the form
91	// EXPECTED: A,B,C
92	//
93	// or EXPECTED:<EOL>
94	// where A,B,C is a comma-separated list of expected licenses.
95	lines := strings.SplitN(string(b), "EXPECTED:", 2)
96	// The first part of lines is description, which we ignore. We then split on a linefeed to get the
97	// list of licenses and the rest of the data content.
98	lines = strings.SplitN(lines[1], "\n", 2)
99	if lines[0] != "" {
100		s.expected = strings.Split(lines[0], ",")
101	} else {
102		s.expected = []string{}
103	}
104	s.data = []byte(lines[1])
105	return &s
106}
107
108func TestContainsAndOverlaps(t *testing.T) {
109	tests := []struct {
110		name     string
111		a, b     *Match
112		contains bool
113		overlaps bool
114	}{
115		{
116			name: "no intersection",
117			a: &Match{
118				StartLine: 1,
119				EndLine:   3,
120			},
121			b: &Match{
122				StartLine: 4,
123				EndLine:   5,
124			},
125			contains: false,
126			overlaps: false,
127		},
128		{
129			name: "overlap at end",
130			a: &Match{
131				StartLine: 4,
132				EndLine:   10,
133			},
134			b: &Match{
135				StartLine: 1,
136				EndLine:   5,
137			},
138			contains: false,
139			overlaps: true,
140		},
141		{
142			name: "overlap at end",
143			a: &Match{
144				StartLine: 1,
145				EndLine:   10,
146			},
147			b: &Match{
148				StartLine: 4,
149				EndLine:   12,
150			},
151			contains: false,
152			overlaps: true,
153		},
154		{
155			name: "contains",
156			a: &Match{
157				StartLine: 1,
158				EndLine:   10,
159			},
160			b: &Match{
161				StartLine: 4,
162				EndLine:   7,
163			},
164			contains: true,
165			overlaps: false,
166		},
167	}
168
169	for _, test := range tests {
170		t.Run(test.name, func(t *testing.T) {
171			if got := contains(test.a, test.b); got != test.contains {
172				t.Errorf("contains: got %v want %v", got, test.contains)
173			}
174			if got := overlaps(test.a, test.b); got != test.overlaps {
175				t.Errorf("overlaps: got %v want %v", got, test.overlaps)
176			}
177		})
178	}
179}
180
181func TestLicName(t *testing.T) {
182	tests := []struct {
183		name     string
184		expected string
185	}{
186		{
187			// The filename for a license
188			name:     "GPL-2.0.txt",
189			expected: "GPL-2.0",
190		},
191		{
192			// The filename for a header reference to a license
193			name:     "GPL-2.0.header.txt",
194			expected: "GPL-2.0",
195		},
196		{
197			// The filename for a variant header reference to a license
198			name:     "GPL-2.0.header_a.txt",
199			expected: "GPL-2.0",
200		},
201		{
202			// The filename for a variant license body
203			name:     "Apache-2.0_no_toc.txt",
204			expected: "Apache-2.0",
205		},
206	}
207
208	for _, test := range tests {
209		t.Run(test.name, func(t *testing.T) {
210
211		})
212	}
213}
214
215func TestMatchFrom(t *testing.T) {
216	tr := iotest.TimeoutReader(strings.NewReader("some data"))
217	c, err := classifier()
218	if err != nil {
219		t.Fatalf("couldn't instantiate standard Google classifier: %v", err)
220	}
221
222	_, err = c.MatchFrom(tr)
223	if !errors.Is(err, iotest.ErrTimeout) {
224		t.Errorf("got %v want %v", err, iotest.ErrTimeout)
225	}
226
227	files, err := getScenarioFilenames()
228
229	if err != nil {
230		t.Fatalf("encountered error walking scenarios directory: %v", err)
231	}
232
233	for _, f := range files {
234		s := readScenario(f)
235		r := bytes.NewReader(s.data)
236		m, err := c.MatchFrom(r)
237		if err != nil {
238			t.Errorf("unexpected error: %v", err)
239		}
240		checkMatches(t, m.Matches, f, s.expected)
241	}
242}
243
244// checkMatches diffs the resulting matches against the expected content and
245// sets test results.
246func checkMatches(t *testing.T, m Matches, f string, e []string) {
247	found := make(map[string]bool)
248	// Uniquify the licenses found
249	for _, l := range m {
250		found[l.Name] = true
251	}
252
253	var names []string
254	for l := range found {
255		names = append(names, l)
256	}
257	sort.Strings(names)
258
259	if len(names) != len(e) {
260		t.Errorf("Match(%q) number matches: %v, want %v: %v", f, len(names), len(e), spew.Sdump(m))
261		return
262	}
263
264	for i := 0; i < len(names); i++ {
265		w := strings.TrimSpace(e[i])
266		if got, want := names[i], w; got != want {
267			t.Errorf("Match(%q) = %q, want %q", f, got, want)
268		}
269	}
270}
271
272func TestLicenseName(t *testing.T) {
273	tests := []struct {
274		input string
275		want  string
276	}{
277		{
278			input: "License/example/file.txt",
279			want:  "example",
280		},
281		{
282			input: "License/example/a.txt",
283			want:  "example",
284		},
285		{
286			input: "Header/example/header.txt",
287			want:  "example",
288		},
289		{
290			input: "Header/example/a.txt",
291			want:  "example",
292		},
293	}
294
295	for _, tt := range tests {
296		t.Run(tt.input, func(t *testing.T) {
297			got := LicenseName(tt.input)
298			if diff := cmp.Diff(tt.want, got); diff != "" {
299				t.Errorf("Unexpected result; diff %v", diff)
300			}
301		})
302	}
303}
304
305func TestNormalize(t *testing.T) {
306	tests := []struct {
307		input string
308		want  string
309	}{
310		{
311			input: "Words  With   Extra Spaces are flattened out, preserving case",
312			want:  "Words With Extra Spaces are flattened out preserving case",
313		},
314		{
315			input: "",
316			want:  "",
317		},
318		{
319			input: "   License  ",
320			want:  "License",
321		},
322		{
323			// This tests that the line breaks in the input text are properly
324			// preserved, which is important for visual diffing.
325			input: `Preserving
326line
327
328breaks is important`,
329			want: `Preserving
330line
331
332breaks is important`,
333		},
334		{
335			// This tests that soft EOL functionality doesn't affect normalized output
336			input: `This is a sentence looking construct. This is another sentence. What happens?`,
337			want:  `This is a sentence looking construct This is another sentence What happens`,
338		},
339		{
340			input: `header
341........................ This is oddly formatted`,
342			want: `header
343This is oddly formatted`,
344		},
345		{
346			input: `baseball basket-
347ball football`,
348			want: "baseball basketball\nfootball",
349		},
350	}
351	for _, tt := range tests {
352		t.Run(tt.input, func(t *testing.T) {
353			c, err := classifier()
354			if err != nil {
355				t.Fatalf("couldn't instantiate standard Google classifier: %v", err)
356			}
357
358			got := c.Normalize([]byte(tt.input))
359			if diff := cmp.Diff(tt.want, string(got)); diff != "" {
360				t.Errorf("Unexpected result; diff %v", diff)
361			}
362		})
363	}
364
365}
366