1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package backend contains the necessary functions to classify a license. 16package backend 17 18import ( 19 "context" 20 "fmt" 21 "io/ioutil" 22 "log" 23 "sync" 24 "time" 25 26 "github.com/google/licenseclassifier" 27 "github.com/google/licenseclassifier/commentparser" 28 "github.com/google/licenseclassifier/commentparser/language" 29 "github.com/google/licenseclassifier/tools/identify_license/results" 30) 31 32// ClassifierInterface is the interface each backend must implement. 33type ClassifierInterface interface { 34 Close() 35 ClassifyLicenses(filenames []string, headers bool) []error 36 ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) []error 37 GetResults() results.LicenseTypes 38} 39 40// ClassifierBackend is an object that handles classifying a license. 41type ClassifierBackend struct { 42 results results.LicenseTypes 43 mu sync.Mutex 44 classifier *licenseclassifier.License 45} 46 47// New creates a new backend working on the local filesystem. 48func New(threshold float64, forbiddenOnly bool) (*ClassifierBackend, error) { 49 var lc *licenseclassifier.License 50 var err error 51 if forbiddenOnly { 52 lc, err = licenseclassifier.NewWithForbiddenLicenses(threshold) 53 } else { 54 lc, err = licenseclassifier.New(threshold) 55 } 56 if err != nil { 57 return nil, err 58 } 59 return &ClassifierBackend{classifier: lc}, nil 60} 61 62// Close does nothing here since there's nothing to close. 63func (b *ClassifierBackend) Close() { 64} 65 66// ClassifyLicenses runs the license classifier over the given file. 67func (b *ClassifierBackend) ClassifyLicenses(filenames []string, headers bool) (errors []error) { 68 return b.ClassifyLicensesWithContext(context.Background(), filenames, headers) 69} 70 71// ClassifyLicensesWithContext runs the license classifier over the given file; 72// ensure that it will respect the timeout and cancelation in the provided context. 73func (b *ClassifierBackend) ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) (errors []error) { 74 75 files := make(chan string, len(filenames)) 76 for _, f := range filenames { 77 files <- f 78 } 79 close(files) 80 errs := make(chan error, len(filenames)) 81 82 var wg sync.WaitGroup 83 84 // Create a pool from which tasks can later be started. We use a pool because the OS limits 85 // the number of files that can be open at any one time. 86 const numTasks = 1000 87 wg.Add(numTasks) 88 89 for i := 0; i < numTasks; i++ { 90 go func() { 91 // Ensure that however this function terminates, the wait group 92 // is unblocked 93 defer wg.Done() 94 95 for { 96 filename := <-files 97 98 // no file? we're done 99 if filename == "" { 100 break 101 } 102 103 // If the context is done, record that the file was not 104 // classified due to the context's termination. 105 if err := ctx.Err(); err != nil { 106 errs <- fmt.Errorf("file %s not classified due to context completion: %v", filename, err) 107 continue 108 } 109 110 if err := b.classifyLicense(filename, headers); err != nil { 111 errs <- err 112 } 113 } 114 }() 115 } 116 117 wg.Wait() 118 close(errs) 119 120 for err := range errs { 121 errors = append(errors, err) 122 } 123 return errors 124} 125 126// classifyLicense is called by a Go-function to perform the actual 127// classification of a license. 128func (b *ClassifierBackend) classifyLicense(filename string, headers bool) error { 129 contents, err := ioutil.ReadFile(filename) 130 if err != nil { 131 return fmt.Errorf("unable to read %q: %v", filename, err) 132 } 133 134 matchLoop := func(contents string) { 135 for _, m := range b.classifier.MultipleMatch(contents, headers) { 136 b.mu.Lock() 137 b.results = append(b.results, &results.LicenseType{ 138 Filename: filename, 139 Name: m.Name, 140 Confidence: m.Confidence, 141 Offset: m.Offset, 142 Extent: m.Extent, 143 }) 144 b.mu.Unlock() 145 } 146 } 147 148 log.Printf("Classifying license(s): %s", filename) 149 start := time.Now() 150 if lang := language.ClassifyLanguage(filename); lang == language.Unknown { 151 matchLoop(string(contents)) 152 } else { 153 log.Printf("detected language: %v", lang) 154 comments := commentparser.Parse(contents, lang) 155 for ch := range comments.ChunkIterator() { 156 matchLoop(ch.String()) 157 } 158 } 159 log.Printf("Finished Classifying License %q: %v", filename, time.Since(start)) 160 return nil 161} 162 163// GetResults returns the results of the classifications. 164func (b *ClassifierBackend) GetResults() results.LicenseTypes { 165 return b.results 166} 167