xref: /aosp_15_r20/external/licenseclassifier/tools/identify_license/backend/backend.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1// Copyright 2017 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package backend contains the necessary functions to classify a license.
16package backend
17
18import (
19	"context"
20	"fmt"
21	"io/ioutil"
22	"log"
23	"sync"
24	"time"
25
26	"github.com/google/licenseclassifier"
27	"github.com/google/licenseclassifier/commentparser"
28	"github.com/google/licenseclassifier/commentparser/language"
29	"github.com/google/licenseclassifier/tools/identify_license/results"
30)
31
32// ClassifierInterface is the interface each backend must implement.
33type ClassifierInterface interface {
34	Close()
35	ClassifyLicenses(filenames []string, headers bool) []error
36	ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) []error
37	GetResults() results.LicenseTypes
38}
39
40// ClassifierBackend is an object that handles classifying a license.
41type ClassifierBackend struct {
42	results    results.LicenseTypes
43	mu         sync.Mutex
44	classifier *licenseclassifier.License
45}
46
47// New creates a new backend working on the local filesystem.
48func New(threshold float64, forbiddenOnly bool) (*ClassifierBackend, error) {
49	var lc *licenseclassifier.License
50	var err error
51	if forbiddenOnly {
52		lc, err = licenseclassifier.NewWithForbiddenLicenses(threshold)
53	} else {
54		lc, err = licenseclassifier.New(threshold)
55	}
56	if err != nil {
57		return nil, err
58	}
59	return &ClassifierBackend{classifier: lc}, nil
60}
61
62// Close does nothing here since there's nothing to close.
63func (b *ClassifierBackend) Close() {
64}
65
66// ClassifyLicenses runs the license classifier over the given file.
67func (b *ClassifierBackend) ClassifyLicenses(filenames []string, headers bool) (errors []error) {
68	return b.ClassifyLicensesWithContext(context.Background(), filenames, headers)
69}
70
71// ClassifyLicensesWithContext runs the license classifier over the given file;
72// ensure that it will respect the timeout and cancelation in the provided context.
73func (b *ClassifierBackend) ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) (errors []error) {
74
75	files := make(chan string, len(filenames))
76	for _, f := range filenames {
77		files <- f
78	}
79	close(files)
80	errs := make(chan error, len(filenames))
81
82	var wg sync.WaitGroup
83
84	// Create a pool from which tasks can later be started. We use a pool because the OS limits
85	// the number of files that can be open at any one time.
86	const numTasks = 1000
87	wg.Add(numTasks)
88
89	for i := 0; i < numTasks; i++ {
90		go func() {
91			// Ensure that however this function terminates, the wait group
92			// is unblocked
93			defer wg.Done()
94
95			for {
96				filename := <-files
97
98				// no file? we're done
99				if filename == "" {
100					break
101				}
102
103				// If the context is done, record that the file was not
104				// classified due to the context's termination.
105				if err := ctx.Err(); err != nil {
106					errs <- fmt.Errorf("file %s not classified due to context completion: %v", filename, err)
107					continue
108				}
109
110				if err := b.classifyLicense(filename, headers); err != nil {
111					errs <- err
112				}
113			}
114		}()
115	}
116
117	wg.Wait()
118	close(errs)
119
120	for err := range errs {
121		errors = append(errors, err)
122	}
123	return errors
124}
125
126// classifyLicense is called by a Go-function to perform the actual
127// classification of a license.
128func (b *ClassifierBackend) classifyLicense(filename string, headers bool) error {
129	contents, err := ioutil.ReadFile(filename)
130	if err != nil {
131		return fmt.Errorf("unable to read %q: %v", filename, err)
132	}
133
134	matchLoop := func(contents string) {
135		for _, m := range b.classifier.MultipleMatch(contents, headers) {
136			b.mu.Lock()
137			b.results = append(b.results, &results.LicenseType{
138				Filename:   filename,
139				Name:       m.Name,
140				Confidence: m.Confidence,
141				Offset:     m.Offset,
142				Extent:     m.Extent,
143			})
144			b.mu.Unlock()
145		}
146	}
147
148	log.Printf("Classifying license(s): %s", filename)
149	start := time.Now()
150	if lang := language.ClassifyLanguage(filename); lang == language.Unknown {
151		matchLoop(string(contents))
152	} else {
153		log.Printf("detected language: %v", lang)
154		comments := commentparser.Parse(contents, lang)
155		for ch := range comments.ChunkIterator() {
156			matchLoop(ch.String())
157		}
158	}
159	log.Printf("Finished Classifying License %q: %v", filename, time.Since(start))
160	return nil
161}
162
163// GetResults returns the results of the classifications.
164func (b *ClassifierBackend) GetResults() results.LicenseTypes {
165	return b.results
166}
167