1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package backend contains the necessary functions to classify a license. 16package backend 17 18import ( 19 "context" 20 "fmt" 21 "io/ioutil" 22 "log" 23 "sync" 24 "time" 25 26 //gc "google3/devtools/compliance/common/licenseclassifier/classifier" 27 28 classifier "github.com/google/licenseclassifier/v2" 29 "github.com/google/licenseclassifier/v2/assets" 30 "github.com/google/licenseclassifier/v2/tools/identify_license/results" 31) 32 33// ClassifierInterface is the interface each backend must implement. 34type ClassifierInterface interface { 35 Close() 36 SetTraceConfiguration(tc *classifier.TraceConfiguration) 37 ClassifyLicenses(numTasks int, filenames []string, headers bool) []error 38 ClassifyLicensesWithContext(ctx context.Context, numTasks int, filenames []string, headers bool) []error 39 GetResults() results.LicenseTypes 40} 41 42// ClassifierBackend is an object that handles classifying a license. 43type ClassifierBackend struct { 44 results results.LicenseTypes 45 mu sync.Mutex 46 classifier *classifier.Classifier 47} 48 49// New creates a new backend working on the local filesystem. 50func New() (*ClassifierBackend, error) { 51 _, err := assets.ReadLicenseDir() 52 if err != nil { 53 return nil, err 54 } 55 lc, err := assets.DefaultClassifier() 56 if err != nil { 57 return nil, err 58 } 59 return &ClassifierBackend{classifier: lc}, nil 60} 61 62// Close does nothing here since there's nothing to close. 63func (b *ClassifierBackend) Close() { 64} 65 66// SetTraceConfiguration injects the supplied trace configuration 67func (b *ClassifierBackend) SetTraceConfiguration(tc *classifier.TraceConfiguration) { 68 //b.classifier.SetTraceConfiguration((*gc.TraceConfiguration)(tc)) 69} 70 71// ClassifyLicenses runs the license classifier over the given file. 72func (b *ClassifierBackend) ClassifyLicenses(numTasks int, filenames []string, headers bool) (errors []error) { 73 // Create a pool from which tasks can later be started. We use a pool because the OS limits 74 // the number of files that can be open at any one time. 75 task := make(chan bool, numTasks) 76 for i := 0; i < numTasks; i++ { 77 task <- true 78 } 79 80 errs := make(chan error, len(filenames)) 81 82 var wg sync.WaitGroup 83 analyze := func(filename string) { 84 defer func() { 85 wg.Done() 86 task <- true 87 }() 88 if err := b.classifyLicense(filename, headers); err != nil { 89 errs <- err 90 } 91 } 92 93 for _, filename := range filenames { 94 wg.Add(1) 95 <-task 96 go analyze(filename) 97 } 98 go func() { 99 wg.Wait() 100 close(task) 101 close(errs) 102 }() 103 104 for err := range errs { 105 errors = append(errors, err) 106 } 107 return errors 108} 109 110// ClassifyLicensesWithContext runs the license classifier over the given file; ensure that it will respect the timeout in the provided context. 111func (b *ClassifierBackend) ClassifyLicensesWithContext(ctx context.Context, numTasks int, filenames []string, headers bool) (errors []error) { 112 done := make(chan bool) 113 go func() { 114 errors = b.ClassifyLicenses(numTasks, filenames, headers) 115 done <- true 116 }() 117 select { 118 case <-ctx.Done(): 119 err := ctx.Err() 120 errors = append(errors, err) 121 return errors 122 case <-done: 123 return errors 124 } 125} 126 127// classifyLicense is called by a Go-function to perform the actual 128// classification of a license. 129func (b *ClassifierBackend) classifyLicense(filename string, headers bool) error { 130 contents, err := ioutil.ReadFile(filename) 131 if err != nil { 132 return fmt.Errorf("unable to read %q: %v", filename, err) 133 } 134 135 matchLoop := func(contents []byte) { 136 for _, m := range b.classifier.Match(contents).Matches { 137 // If not looking for headers, skip them 138 if !headers && m.MatchType == "Header" { 139 continue 140 } 141 142 b.mu.Lock() 143 b.results = append(b.results, &results.LicenseType{ 144 Filename: filename, 145 MatchType: m.MatchType, 146 Name: m.Name, 147 Variant: m.Variant, 148 Confidence: m.Confidence, 149 StartLine: m.StartLine, 150 EndLine: m.EndLine, 151 }) 152 b.mu.Unlock() 153 } 154 } 155 156 log.Printf("Classifying license(s): %s", filename) 157 start := time.Now() 158 matchLoop(contents) 159 log.Printf("Finished Classifying License %q: %v", filename, time.Since(start)) 160 return nil 161} 162 163// GetResults returns the results of the classifications. 164func (b *ClassifierBackend) GetResults() results.LicenseTypes { 165 return b.results 166} 167