xref: /aosp_15_r20/external/licenseclassifier/v2/tools/identify_license/backend/backend.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1// Copyright 2017 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package backend contains the necessary functions to classify a license.
16package backend
17
18import (
19	"context"
20	"fmt"
21	"io/ioutil"
22	"log"
23	"sync"
24	"time"
25
26	//gc "google3/devtools/compliance/common/licenseclassifier/classifier"
27
28	classifier "github.com/google/licenseclassifier/v2"
29	"github.com/google/licenseclassifier/v2/assets"
30	"github.com/google/licenseclassifier/v2/tools/identify_license/results"
31)
32
33// ClassifierInterface is the interface each backend must implement.
34type ClassifierInterface interface {
35	Close()
36	SetTraceConfiguration(tc *classifier.TraceConfiguration)
37	ClassifyLicenses(numTasks int, filenames []string, headers bool) []error
38	ClassifyLicensesWithContext(ctx context.Context, numTasks int, filenames []string, headers bool) []error
39	GetResults() results.LicenseTypes
40}
41
42// ClassifierBackend is an object that handles classifying a license.
43type ClassifierBackend struct {
44	results    results.LicenseTypes
45	mu         sync.Mutex
46	classifier *classifier.Classifier
47}
48
49// New creates a new backend working on the local filesystem.
50func New() (*ClassifierBackend, error) {
51	_, err := assets.ReadLicenseDir()
52	if err != nil {
53		return nil, err
54	}
55	lc, err := assets.DefaultClassifier()
56	if err != nil {
57		return nil, err
58	}
59	return &ClassifierBackend{classifier: lc}, nil
60}
61
62// Close does nothing here since there's nothing to close.
63func (b *ClassifierBackend) Close() {
64}
65
66// SetTraceConfiguration injects the supplied trace configuration
67func (b *ClassifierBackend) SetTraceConfiguration(tc *classifier.TraceConfiguration) {
68	//b.classifier.SetTraceConfiguration((*gc.TraceConfiguration)(tc))
69}
70
71// ClassifyLicenses runs the license classifier over the given file.
72func (b *ClassifierBackend) ClassifyLicenses(numTasks int, filenames []string, headers bool) (errors []error) {
73	// Create a pool from which tasks can later be started. We use a pool because the OS limits
74	// the number of files that can be open at any one time.
75	task := make(chan bool, numTasks)
76	for i := 0; i < numTasks; i++ {
77		task <- true
78	}
79
80	errs := make(chan error, len(filenames))
81
82	var wg sync.WaitGroup
83	analyze := func(filename string) {
84		defer func() {
85			wg.Done()
86			task <- true
87		}()
88		if err := b.classifyLicense(filename, headers); err != nil {
89			errs <- err
90		}
91	}
92
93	for _, filename := range filenames {
94		wg.Add(1)
95		<-task
96		go analyze(filename)
97	}
98	go func() {
99		wg.Wait()
100		close(task)
101		close(errs)
102	}()
103
104	for err := range errs {
105		errors = append(errors, err)
106	}
107	return errors
108}
109
110// ClassifyLicensesWithContext runs the license classifier over the given file; ensure that it will respect the timeout in the provided context.
111func (b *ClassifierBackend) ClassifyLicensesWithContext(ctx context.Context, numTasks int, filenames []string, headers bool) (errors []error) {
112	done := make(chan bool)
113	go func() {
114		errors = b.ClassifyLicenses(numTasks, filenames, headers)
115		done <- true
116	}()
117	select {
118	case <-ctx.Done():
119		err := ctx.Err()
120		errors = append(errors, err)
121		return errors
122	case <-done:
123		return errors
124	}
125}
126
127// classifyLicense is called by a Go-function to perform the actual
128// classification of a license.
129func (b *ClassifierBackend) classifyLicense(filename string, headers bool) error {
130	contents, err := ioutil.ReadFile(filename)
131	if err != nil {
132		return fmt.Errorf("unable to read %q: %v", filename, err)
133	}
134
135	matchLoop := func(contents []byte) {
136		for _, m := range b.classifier.Match(contents).Matches {
137			// If not looking for headers, skip them
138			if !headers && m.MatchType == "Header" {
139				continue
140			}
141
142			b.mu.Lock()
143			b.results = append(b.results, &results.LicenseType{
144				Filename:   filename,
145				MatchType:  m.MatchType,
146				Name:       m.Name,
147				Variant:    m.Variant,
148				Confidence: m.Confidence,
149				StartLine:  m.StartLine,
150				EndLine:    m.EndLine,
151			})
152			b.mu.Unlock()
153		}
154	}
155
156	log.Printf("Classifying license(s): %s", filename)
157	start := time.Now()
158	matchLoop(contents)
159	log.Printf("Finished Classifying License %q: %v", filename, time.Since(start))
160	return nil
161}
162
163// GetResults returns the results of the classifications.
164func (b *ClassifierBackend) GetResults() results.LicenseTypes {
165	return b.results
166}
167