1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc. 2*46c4c49dSIbrahim Kanouche// 3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License"); 4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License. 5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at 6*46c4c49dSIbrahim Kanouche// 7*46c4c49dSIbrahim Kanouche// http://www.apache.org/licenses/LICENSE-2.0 8*46c4c49dSIbrahim Kanouche// 9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software 10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS, 11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and 13*46c4c49dSIbrahim Kanouche// limitations under the License. 14*46c4c49dSIbrahim Kanouche 15*46c4c49dSIbrahim Kanouche// The license_serializer program normalizes and serializes the known 16*46c4c49dSIbrahim Kanouche// licenseclassifier licenses into a compressed archive. The hash values for 17*46c4c49dSIbrahim Kanouche// the licenses are calculated and added to the archive. These can then be used 18*46c4c49dSIbrahim Kanouche// to determine where in unknown text is a good offset to run through the 19*46c4c49dSIbrahim Kanouche// Levenshtein Distance algorithm. 20*46c4c49dSIbrahim Kanouchepackage main 21*46c4c49dSIbrahim Kanouche 22*46c4c49dSIbrahim Kanoucheimport ( 23*46c4c49dSIbrahim Kanouche "flag" 24*46c4c49dSIbrahim Kanouche "fmt" 25*46c4c49dSIbrahim Kanouche "log" 26*46c4c49dSIbrahim Kanouche "os" 27*46c4c49dSIbrahim Kanouche "path/filepath" 28*46c4c49dSIbrahim Kanouche "strings" 29*46c4c49dSIbrahim Kanouche 30*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier" 31*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier/serializer" 32*46c4c49dSIbrahim Kanouche) 33*46c4c49dSIbrahim Kanouche 34*46c4c49dSIbrahim Kanouchevar ( 35*46c4c49dSIbrahim Kanouche forbiddenOnly = flag.Bool("forbidden", false, "serialize only forbidden licenses") 36*46c4c49dSIbrahim Kanouche outputDir = flag.String("output", "", "output directory") 37*46c4c49dSIbrahim Kanouche) 38*46c4c49dSIbrahim Kanouche 39*46c4c49dSIbrahim Kanouchefunc init() { 40*46c4c49dSIbrahim Kanouche flag.Usage = func() { 41*46c4c49dSIbrahim Kanouche fmt.Fprintf(os.Stderr, `Usage: %s [OPTIONS] 42*46c4c49dSIbrahim Kanouche 43*46c4c49dSIbrahim KanoucheCalculate the hash values for files and serialize them into a database. 44*46c4c49dSIbrahim KanoucheSee go/license-classifier 45*46c4c49dSIbrahim Kanouche 46*46c4c49dSIbrahim KanoucheOptions: 47*46c4c49dSIbrahim Kanouche`, filepath.Base(os.Args[0])) 48*46c4c49dSIbrahim Kanouche flag.PrintDefaults() 49*46c4c49dSIbrahim Kanouche } 50*46c4c49dSIbrahim Kanouche} 51*46c4c49dSIbrahim Kanouche 52*46c4c49dSIbrahim Kanouchefunc main() { 53*46c4c49dSIbrahim Kanouche flag.Parse() 54*46c4c49dSIbrahim Kanouche 55*46c4c49dSIbrahim Kanouche archiveName := licenseclassifier.LicenseArchive 56*46c4c49dSIbrahim Kanouche if *forbiddenOnly { 57*46c4c49dSIbrahim Kanouche archiveName = licenseclassifier.ForbiddenLicenseArchive 58*46c4c49dSIbrahim Kanouche } 59*46c4c49dSIbrahim Kanouche 60*46c4c49dSIbrahim Kanouche fn := filepath.Join(*outputDir, archiveName) 61*46c4c49dSIbrahim Kanouche out, err := os.Create(fn) 62*46c4c49dSIbrahim Kanouche if err != nil { 63*46c4c49dSIbrahim Kanouche log.Fatalf("error: cannot create file %q: %v", fn, err) 64*46c4c49dSIbrahim Kanouche } 65*46c4c49dSIbrahim Kanouche defer out.Close() 66*46c4c49dSIbrahim Kanouche 67*46c4c49dSIbrahim Kanouche lics, err := licenseclassifier.ReadLicenseDir() 68*46c4c49dSIbrahim Kanouche if err != nil { 69*46c4c49dSIbrahim Kanouche log.Fatalf("error: cannot read licenses directory: %v", err) 70*46c4c49dSIbrahim Kanouche } 71*46c4c49dSIbrahim Kanouche 72*46c4c49dSIbrahim Kanouche var licenses []string 73*46c4c49dSIbrahim Kanouche for _, lic := range lics { 74*46c4c49dSIbrahim Kanouche if !*forbiddenOnly || licenseclassifier.LicenseType(strings.TrimSuffix(lic.Name(), ".txt")) == "FORBIDDEN" { 75*46c4c49dSIbrahim Kanouche licenses = append(licenses, lic.Name()) 76*46c4c49dSIbrahim Kanouche } 77*46c4c49dSIbrahim Kanouche } 78*46c4c49dSIbrahim Kanouche 79*46c4c49dSIbrahim Kanouche if err := serializer.ArchiveLicenses(licenses, out); err != nil { 80*46c4c49dSIbrahim Kanouche log.Fatalf("error: cannot create database: %v", err) 81*46c4c49dSIbrahim Kanouche } 82*46c4c49dSIbrahim Kanouche} 83