1// Copyright 2020 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package classifier 16 17import ( 18 "bytes" 19 "errors" 20 "io/ioutil" 21 "log" 22 "os" 23 "path" 24 "path/filepath" 25 "sort" 26 "strings" 27 "testing" 28 "testing/iotest" 29 30 "github.com/davecgh/go-spew/spew" 31 "github.com/google/go-cmp/cmp" 32) 33 34type scenario struct { 35 expected []string 36 data []byte 37} 38 39var defaultThreshold = .8 40var baseLicenses = "assets" 41 42func classifier() (*Classifier, error) { 43 c := NewClassifier(defaultThreshold) 44 return c, c.LoadLicenses(path.Join(baseLicenses)) 45} 46 47func getScenarioFilenames() ([]string, error) { 48 scenarios := "scenarios" 49 var files []string 50 err := filepath.Walk(path.Join(scenarios), func(path string, info os.FileInfo, err error) error { 51 if err != nil { 52 return err 53 } 54 if strings.HasSuffix(path, "md") || info.IsDir() { 55 return nil 56 } 57 files = append(files, path) 58 return nil 59 }) 60 61 return files, err 62} 63 64func TestMatchScenarios(t *testing.T) { 65 c, err := classifier() 66 if err != nil { 67 t.Fatalf("couldn't instantiate standard test classifier: %v", err) 68 } 69 70 files, err := getScenarioFilenames() 71 if err != nil { 72 t.Fatalf("encountered error walking scenarios directory: %v", err) 73 } 74 75 for _, f := range files { 76 s := readScenario(f) 77 78 m := c.Match(s.data) 79 checkMatches(t, m.Matches, f, s.expected) 80 } 81} 82 83func readScenario(path string) *scenario { 84 var s scenario 85 b, err := ioutil.ReadFile(path) 86 if err != nil { 87 log.Fatalf("Couldn't read scenario %s: %v", path, err) 88 } 89 90 // A scenario consists of any number of comment lines, which are ignored, then a line of the form 91 // EXPECTED: A,B,C 92 // 93 // or EXPECTED:<EOL> 94 // where A,B,C is a comma-separated list of expected licenses. 95 lines := strings.SplitN(string(b), "EXPECTED:", 2) 96 // The first part of lines is description, which we ignore. We then split on a linefeed to get the 97 // list of licenses and the rest of the data content. 98 lines = strings.SplitN(lines[1], "\n", 2) 99 if lines[0] != "" { 100 s.expected = strings.Split(lines[0], ",") 101 } else { 102 s.expected = []string{} 103 } 104 s.data = []byte(lines[1]) 105 return &s 106} 107 108func TestContainsAndOverlaps(t *testing.T) { 109 tests := []struct { 110 name string 111 a, b *Match 112 contains bool 113 overlaps bool 114 }{ 115 { 116 name: "no intersection", 117 a: &Match{ 118 StartLine: 1, 119 EndLine: 3, 120 }, 121 b: &Match{ 122 StartLine: 4, 123 EndLine: 5, 124 }, 125 contains: false, 126 overlaps: false, 127 }, 128 { 129 name: "overlap at end", 130 a: &Match{ 131 StartLine: 4, 132 EndLine: 10, 133 }, 134 b: &Match{ 135 StartLine: 1, 136 EndLine: 5, 137 }, 138 contains: false, 139 overlaps: true, 140 }, 141 { 142 name: "overlap at end", 143 a: &Match{ 144 StartLine: 1, 145 EndLine: 10, 146 }, 147 b: &Match{ 148 StartLine: 4, 149 EndLine: 12, 150 }, 151 contains: false, 152 overlaps: true, 153 }, 154 { 155 name: "contains", 156 a: &Match{ 157 StartLine: 1, 158 EndLine: 10, 159 }, 160 b: &Match{ 161 StartLine: 4, 162 EndLine: 7, 163 }, 164 contains: true, 165 overlaps: false, 166 }, 167 } 168 169 for _, test := range tests { 170 t.Run(test.name, func(t *testing.T) { 171 if got := contains(test.a, test.b); got != test.contains { 172 t.Errorf("contains: got %v want %v", got, test.contains) 173 } 174 if got := overlaps(test.a, test.b); got != test.overlaps { 175 t.Errorf("overlaps: got %v want %v", got, test.overlaps) 176 } 177 }) 178 } 179} 180 181func TestLicName(t *testing.T) { 182 tests := []struct { 183 name string 184 expected string 185 }{ 186 { 187 // The filename for a license 188 name: "GPL-2.0.txt", 189 expected: "GPL-2.0", 190 }, 191 { 192 // The filename for a header reference to a license 193 name: "GPL-2.0.header.txt", 194 expected: "GPL-2.0", 195 }, 196 { 197 // The filename for a variant header reference to a license 198 name: "GPL-2.0.header_a.txt", 199 expected: "GPL-2.0", 200 }, 201 { 202 // The filename for a variant license body 203 name: "Apache-2.0_no_toc.txt", 204 expected: "Apache-2.0", 205 }, 206 } 207 208 for _, test := range tests { 209 t.Run(test.name, func(t *testing.T) { 210 211 }) 212 } 213} 214 215func TestMatchFrom(t *testing.T) { 216 tr := iotest.TimeoutReader(strings.NewReader("some data")) 217 c, err := classifier() 218 if err != nil { 219 t.Fatalf("couldn't instantiate standard Google classifier: %v", err) 220 } 221 222 _, err = c.MatchFrom(tr) 223 if !errors.Is(err, iotest.ErrTimeout) { 224 t.Errorf("got %v want %v", err, iotest.ErrTimeout) 225 } 226 227 files, err := getScenarioFilenames() 228 229 if err != nil { 230 t.Fatalf("encountered error walking scenarios directory: %v", err) 231 } 232 233 for _, f := range files { 234 s := readScenario(f) 235 r := bytes.NewReader(s.data) 236 m, err := c.MatchFrom(r) 237 if err != nil { 238 t.Errorf("unexpected error: %v", err) 239 } 240 checkMatches(t, m.Matches, f, s.expected) 241 } 242} 243 244// checkMatches diffs the resulting matches against the expected content and 245// sets test results. 246func checkMatches(t *testing.T, m Matches, f string, e []string) { 247 found := make(map[string]bool) 248 // Uniquify the licenses found 249 for _, l := range m { 250 found[l.Name] = true 251 } 252 253 var names []string 254 for l := range found { 255 names = append(names, l) 256 } 257 sort.Strings(names) 258 259 if len(names) != len(e) { 260 t.Errorf("Match(%q) number matches: %v, want %v: %v", f, len(names), len(e), spew.Sdump(m)) 261 return 262 } 263 264 for i := 0; i < len(names); i++ { 265 w := strings.TrimSpace(e[i]) 266 if got, want := names[i], w; got != want { 267 t.Errorf("Match(%q) = %q, want %q", f, got, want) 268 } 269 } 270} 271 272func TestLicenseName(t *testing.T) { 273 tests := []struct { 274 input string 275 want string 276 }{ 277 { 278 input: "License/example/file.txt", 279 want: "example", 280 }, 281 { 282 input: "License/example/a.txt", 283 want: "example", 284 }, 285 { 286 input: "Header/example/header.txt", 287 want: "example", 288 }, 289 { 290 input: "Header/example/a.txt", 291 want: "example", 292 }, 293 } 294 295 for _, tt := range tests { 296 t.Run(tt.input, func(t *testing.T) { 297 got := LicenseName(tt.input) 298 if diff := cmp.Diff(tt.want, got); diff != "" { 299 t.Errorf("Unexpected result; diff %v", diff) 300 } 301 }) 302 } 303} 304 305func TestNormalize(t *testing.T) { 306 tests := []struct { 307 input string 308 want string 309 }{ 310 { 311 input: "Words With Extra Spaces are flattened out, preserving case", 312 want: "Words With Extra Spaces are flattened out preserving case", 313 }, 314 { 315 input: "", 316 want: "", 317 }, 318 { 319 input: " License ", 320 want: "License", 321 }, 322 { 323 // This tests that the line breaks in the input text are properly 324 // preserved, which is important for visual diffing. 325 input: `Preserving 326line 327 328breaks is important`, 329 want: `Preserving 330line 331 332breaks is important`, 333 }, 334 { 335 // This tests that soft EOL functionality doesn't affect normalized output 336 input: `This is a sentence looking construct. This is another sentence. What happens?`, 337 want: `This is a sentence looking construct This is another sentence What happens`, 338 }, 339 { 340 input: `header 341........................ This is oddly formatted`, 342 want: `header 343This is oddly formatted`, 344 }, 345 { 346 input: `baseball basket- 347ball football`, 348 want: "baseball basketball\nfootball", 349 }, 350 } 351 for _, tt := range tests { 352 t.Run(tt.input, func(t *testing.T) { 353 c, err := classifier() 354 if err != nil { 355 t.Fatalf("couldn't instantiate standard Google classifier: %v", err) 356 } 357 358 got := c.Normalize([]byte(tt.input)) 359 if diff := cmp.Diff(tt.want, string(got)); diff != "" { 360 t.Errorf("Unexpected result; diff %v", diff) 361 } 362 }) 363 } 364 365} 366