xref: /aosp_15_r20/external/licenseclassifier/tools/license_serializer/license_serializer.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc.
2*46c4c49dSIbrahim Kanouche//
3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License");
4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License.
5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at
6*46c4c49dSIbrahim Kanouche//
7*46c4c49dSIbrahim Kanouche//     http://www.apache.org/licenses/LICENSE-2.0
8*46c4c49dSIbrahim Kanouche//
9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software
10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS,
11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and
13*46c4c49dSIbrahim Kanouche// limitations under the License.
14*46c4c49dSIbrahim Kanouche
15*46c4c49dSIbrahim Kanouche// The license_serializer program normalizes and serializes the known
16*46c4c49dSIbrahim Kanouche// licenseclassifier licenses into a compressed archive. The hash values for
17*46c4c49dSIbrahim Kanouche// the licenses are calculated and added to the archive. These can then be used
18*46c4c49dSIbrahim Kanouche// to determine where in unknown text is a good offset to run through the
19*46c4c49dSIbrahim Kanouche// Levenshtein Distance algorithm.
20*46c4c49dSIbrahim Kanouchepackage main
21*46c4c49dSIbrahim Kanouche
22*46c4c49dSIbrahim Kanoucheimport (
23*46c4c49dSIbrahim Kanouche	"flag"
24*46c4c49dSIbrahim Kanouche	"fmt"
25*46c4c49dSIbrahim Kanouche	"log"
26*46c4c49dSIbrahim Kanouche	"os"
27*46c4c49dSIbrahim Kanouche	"path/filepath"
28*46c4c49dSIbrahim Kanouche	"strings"
29*46c4c49dSIbrahim Kanouche
30*46c4c49dSIbrahim Kanouche	"github.com/google/licenseclassifier"
31*46c4c49dSIbrahim Kanouche	"github.com/google/licenseclassifier/serializer"
32*46c4c49dSIbrahim Kanouche)
33*46c4c49dSIbrahim Kanouche
34*46c4c49dSIbrahim Kanouchevar (
35*46c4c49dSIbrahim Kanouche	forbiddenOnly = flag.Bool("forbidden", false, "serialize only forbidden licenses")
36*46c4c49dSIbrahim Kanouche	outputDir     = flag.String("output", "", "output directory")
37*46c4c49dSIbrahim Kanouche)
38*46c4c49dSIbrahim Kanouche
39*46c4c49dSIbrahim Kanouchefunc init() {
40*46c4c49dSIbrahim Kanouche	flag.Usage = func() {
41*46c4c49dSIbrahim Kanouche		fmt.Fprintf(os.Stderr, `Usage: %s [OPTIONS]
42*46c4c49dSIbrahim Kanouche
43*46c4c49dSIbrahim KanoucheCalculate the hash values for files and serialize them into a database.
44*46c4c49dSIbrahim KanoucheSee go/license-classifier
45*46c4c49dSIbrahim Kanouche
46*46c4c49dSIbrahim KanoucheOptions:
47*46c4c49dSIbrahim Kanouche`, filepath.Base(os.Args[0]))
48*46c4c49dSIbrahim Kanouche		flag.PrintDefaults()
49*46c4c49dSIbrahim Kanouche	}
50*46c4c49dSIbrahim Kanouche}
51*46c4c49dSIbrahim Kanouche
52*46c4c49dSIbrahim Kanouchefunc main() {
53*46c4c49dSIbrahim Kanouche	flag.Parse()
54*46c4c49dSIbrahim Kanouche
55*46c4c49dSIbrahim Kanouche	archiveName := licenseclassifier.LicenseArchive
56*46c4c49dSIbrahim Kanouche	if *forbiddenOnly {
57*46c4c49dSIbrahim Kanouche		archiveName = licenseclassifier.ForbiddenLicenseArchive
58*46c4c49dSIbrahim Kanouche	}
59*46c4c49dSIbrahim Kanouche
60*46c4c49dSIbrahim Kanouche	fn := filepath.Join(*outputDir, archiveName)
61*46c4c49dSIbrahim Kanouche	out, err := os.Create(fn)
62*46c4c49dSIbrahim Kanouche	if err != nil {
63*46c4c49dSIbrahim Kanouche		log.Fatalf("error: cannot create file %q: %v", fn, err)
64*46c4c49dSIbrahim Kanouche	}
65*46c4c49dSIbrahim Kanouche	defer out.Close()
66*46c4c49dSIbrahim Kanouche
67*46c4c49dSIbrahim Kanouche	lics, err := licenseclassifier.ReadLicenseDir()
68*46c4c49dSIbrahim Kanouche	if err != nil {
69*46c4c49dSIbrahim Kanouche		log.Fatalf("error: cannot read licenses directory: %v", err)
70*46c4c49dSIbrahim Kanouche	}
71*46c4c49dSIbrahim Kanouche
72*46c4c49dSIbrahim Kanouche	var licenses []string
73*46c4c49dSIbrahim Kanouche	for _, lic := range lics {
74*46c4c49dSIbrahim Kanouche		if !*forbiddenOnly || licenseclassifier.LicenseType(strings.TrimSuffix(lic.Name(), ".txt")) == "FORBIDDEN" {
75*46c4c49dSIbrahim Kanouche			licenses = append(licenses, lic.Name())
76*46c4c49dSIbrahim Kanouche		}
77*46c4c49dSIbrahim Kanouche	}
78*46c4c49dSIbrahim Kanouche
79*46c4c49dSIbrahim Kanouche	if err := serializer.ArchiveLicenses(licenses, out); err != nil {
80*46c4c49dSIbrahim Kanouche		log.Fatalf("error: cannot create database: %v", err)
81*46c4c49dSIbrahim Kanouche	}
82*46c4c49dSIbrahim Kanouche}
83