1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc. 2*46c4c49dSIbrahim Kanouche// 3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License"); 4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License. 5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at 6*46c4c49dSIbrahim Kanouche// 7*46c4c49dSIbrahim Kanouche// http://www.apache.org/licenses/LICENSE-2.0 8*46c4c49dSIbrahim Kanouche// 9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software 10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS, 11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and 13*46c4c49dSIbrahim Kanouche// limitations under the License. 14*46c4c49dSIbrahim Kanouchepackage serializer 15*46c4c49dSIbrahim Kanouche 16*46c4c49dSIbrahim Kanoucheimport ( 17*46c4c49dSIbrahim Kanouche "archive/tar" 18*46c4c49dSIbrahim Kanouche "bytes" 19*46c4c49dSIbrahim Kanouche "compress/gzip" 20*46c4c49dSIbrahim Kanouche "fmt" 21*46c4c49dSIbrahim Kanouche "io" 22*46c4c49dSIbrahim Kanouche "log" 23*46c4c49dSIbrahim Kanouche "os" 24*46c4c49dSIbrahim Kanouche "reflect" 25*46c4c49dSIbrahim Kanouche "sort" 26*46c4c49dSIbrahim Kanouche "strings" 27*46c4c49dSIbrahim Kanouche "testing" 28*46c4c49dSIbrahim Kanouche 29*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier" 30*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier/stringclassifier/searchset" 31*46c4c49dSIbrahim Kanouche) 32*46c4c49dSIbrahim Kanouche 33*46c4c49dSIbrahim Kanouchevar ( 34*46c4c49dSIbrahim Kanouche apache20Header, mit []byte 35*46c4c49dSIbrahim Kanouche normApache, normMIT string 36*46c4c49dSIbrahim Kanouche) 37*46c4c49dSIbrahim Kanouche 38*46c4c49dSIbrahim Kanouchefunc TestMain(m *testing.M) { 39*46c4c49dSIbrahim Kanouche var err error 40*46c4c49dSIbrahim Kanouche apache20Header, err = licenseclassifier.ReadLicenseFile("Apache-2.0.header.txt") 41*46c4c49dSIbrahim Kanouche if err != nil { 42*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of Apache-2.0.header.txt: %v", err) 43*46c4c49dSIbrahim Kanouche } 44*46c4c49dSIbrahim Kanouche normApache = normalize(string(apache20Header)) 45*46c4c49dSIbrahim Kanouche 46*46c4c49dSIbrahim Kanouche mit, err = licenseclassifier.ReadLicenseFile("MIT.txt") 47*46c4c49dSIbrahim Kanouche if err != nil { 48*46c4c49dSIbrahim Kanouche log.Fatalf("error reading contents of MIT.txt: %v", err) 49*46c4c49dSIbrahim Kanouche } 50*46c4c49dSIbrahim Kanouche normMIT = normalize(string(mit)) 51*46c4c49dSIbrahim Kanouche 52*46c4c49dSIbrahim Kanouche os.Exit(m.Run()) 53*46c4c49dSIbrahim Kanouche} 54*46c4c49dSIbrahim Kanouche 55*46c4c49dSIbrahim Kanouchetype entry struct { 56*46c4c49dSIbrahim Kanouche name string 57*46c4c49dSIbrahim Kanouche size int64 58*46c4c49dSIbrahim Kanouche contents string 59*46c4c49dSIbrahim Kanouche} 60*46c4c49dSIbrahim Kanouche 61*46c4c49dSIbrahim Kanouchefunc TestSerializer_ArchiveLicense(t *testing.T) { 62*46c4c49dSIbrahim Kanouche tests := []struct { 63*46c4c49dSIbrahim Kanouche description string 64*46c4c49dSIbrahim Kanouche licenses []string 65*46c4c49dSIbrahim Kanouche want []entry 66*46c4c49dSIbrahim Kanouche }{ 67*46c4c49dSIbrahim Kanouche { 68*46c4c49dSIbrahim Kanouche description: "Archiving Apache 2.0 header", 69*46c4c49dSIbrahim Kanouche licenses: []string{"Apache-2.0.header.txt"}, 70*46c4c49dSIbrahim Kanouche want: []entry{ 71*46c4c49dSIbrahim Kanouche { 72*46c4c49dSIbrahim Kanouche name: "Apache-2.0.header.txt", 73*46c4c49dSIbrahim Kanouche size: int64(len(normApache)), 74*46c4c49dSIbrahim Kanouche contents: normApache, 75*46c4c49dSIbrahim Kanouche }, 76*46c4c49dSIbrahim Kanouche }, 77*46c4c49dSIbrahim Kanouche }, 78*46c4c49dSIbrahim Kanouche { 79*46c4c49dSIbrahim Kanouche description: "Archiving Apache 2.0 header + MIT", 80*46c4c49dSIbrahim Kanouche licenses: []string{"Apache-2.0.header.txt", "MIT.txt"}, 81*46c4c49dSIbrahim Kanouche want: []entry{ 82*46c4c49dSIbrahim Kanouche { 83*46c4c49dSIbrahim Kanouche name: "Apache-2.0.header.txt", 84*46c4c49dSIbrahim Kanouche size: int64(len(normApache)), 85*46c4c49dSIbrahim Kanouche contents: normApache, 86*46c4c49dSIbrahim Kanouche }, 87*46c4c49dSIbrahim Kanouche { 88*46c4c49dSIbrahim Kanouche name: "MIT.txt", 89*46c4c49dSIbrahim Kanouche size: int64(len(normMIT)), 90*46c4c49dSIbrahim Kanouche contents: normMIT, 91*46c4c49dSIbrahim Kanouche }, 92*46c4c49dSIbrahim Kanouche }, 93*46c4c49dSIbrahim Kanouche }, 94*46c4c49dSIbrahim Kanouche } 95*46c4c49dSIbrahim Kanouche 96*46c4c49dSIbrahim Kanouche for _, tt := range tests { 97*46c4c49dSIbrahim Kanouche var writer bytes.Buffer 98*46c4c49dSIbrahim Kanouche if err := ArchiveLicenses(tt.licenses, &writer); err != nil { 99*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): cannot archive license: %v", tt.description, err) 100*46c4c49dSIbrahim Kanouche continue 101*46c4c49dSIbrahim Kanouche } 102*46c4c49dSIbrahim Kanouche 103*46c4c49dSIbrahim Kanouche reader := bytes.NewReader(writer.Bytes()) 104*46c4c49dSIbrahim Kanouche gr, err := gzip.NewReader(reader) 105*46c4c49dSIbrahim Kanouche if err != nil { 106*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): cannot create gzip reader: %v", tt.description, err) 107*46c4c49dSIbrahim Kanouche continue 108*46c4c49dSIbrahim Kanouche } 109*46c4c49dSIbrahim Kanouche 110*46c4c49dSIbrahim Kanouche tr := tar.NewReader(gr) 111*46c4c49dSIbrahim Kanouche for i := 0; ; i++ { 112*46c4c49dSIbrahim Kanouche hdr, err := tr.Next() 113*46c4c49dSIbrahim Kanouche if err == io.EOF { 114*46c4c49dSIbrahim Kanouche break 115*46c4c49dSIbrahim Kanouche } 116*46c4c49dSIbrahim Kanouche if err != nil { 117*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): cannot read header: %v", tt.description, err) 118*46c4c49dSIbrahim Kanouche break 119*46c4c49dSIbrahim Kanouche } 120*46c4c49dSIbrahim Kanouche 121*46c4c49dSIbrahim Kanouche if i >= len(tt.want)+1 { 122*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): too many files in tar, %d want %d", tt.description, i, len(tt.want)) 123*46c4c49dSIbrahim Kanouche break 124*46c4c49dSIbrahim Kanouche } 125*46c4c49dSIbrahim Kanouche 126*46c4c49dSIbrahim Kanouche if hdr.Name != tt.want[i].name { 127*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q) = %+v, want %+v", tt.description, hdr.Name, tt.want[i].name) 128*46c4c49dSIbrahim Kanouche } 129*46c4c49dSIbrahim Kanouche if hdr.Size != tt.want[i].size { 130*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q) = %v, want %v", tt.description, hdr.Size, tt.want[i].size) 131*46c4c49dSIbrahim Kanouche } 132*46c4c49dSIbrahim Kanouche 133*46c4c49dSIbrahim Kanouche var b bytes.Buffer 134*46c4c49dSIbrahim Kanouche if _, err = io.Copy(&b, tr); err != nil { 135*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): cannot read contents: %v", tt.description, err) 136*46c4c49dSIbrahim Kanouche break 137*46c4c49dSIbrahim Kanouche } 138*46c4c49dSIbrahim Kanouche 139*46c4c49dSIbrahim Kanouche if got, want := b.String(), tt.want[i].contents; got != want { 140*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q) = got\n%s\nwant:\n%s", tt.description, got, want) 141*46c4c49dSIbrahim Kanouche } 142*46c4c49dSIbrahim Kanouche 143*46c4c49dSIbrahim Kanouche hdr, err = tr.Next() 144*46c4c49dSIbrahim Kanouche if err != nil { 145*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): no hash file found in archive: %v", tt.description, err) 146*46c4c49dSIbrahim Kanouche break 147*46c4c49dSIbrahim Kanouche } 148*46c4c49dSIbrahim Kanouche 149*46c4c49dSIbrahim Kanouche if hdr.Name != strings.TrimSuffix(tt.want[i].name, "txt")+"hash" { 150*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q) = %+v, want %+v", tt.description, hdr.Name, strings.TrimSuffix(tt.want[i].name, "txt")+"hash") 151*46c4c49dSIbrahim Kanouche } 152*46c4c49dSIbrahim Kanouche 153*46c4c49dSIbrahim Kanouche b.Reset() 154*46c4c49dSIbrahim Kanouche if _, err = io.Copy(&b, tr); err != nil { 155*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): cannot read contents: %v", tt.description, err) 156*46c4c49dSIbrahim Kanouche break 157*46c4c49dSIbrahim Kanouche } 158*46c4c49dSIbrahim Kanouche 159*46c4c49dSIbrahim Kanouche var got searchset.SearchSet 160*46c4c49dSIbrahim Kanouche if err := searchset.Deserialize(&b, &got); err != nil { 161*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): cannot deserialize search set: %v", tt.description, err) 162*46c4c49dSIbrahim Kanouche break 163*46c4c49dSIbrahim Kanouche } 164*46c4c49dSIbrahim Kanouche 165*46c4c49dSIbrahim Kanouche want := searchset.New(tt.want[i].contents, searchset.DefaultGranularity) 166*46c4c49dSIbrahim Kanouche if err := compareSearchSets(want, &got); err != nil { 167*46c4c49dSIbrahim Kanouche t.Errorf("ArchiveLicenses(%q): search sets not equal: %v", tt.description, err) 168*46c4c49dSIbrahim Kanouche break 169*46c4c49dSIbrahim Kanouche } 170*46c4c49dSIbrahim Kanouche } 171*46c4c49dSIbrahim Kanouche } 172*46c4c49dSIbrahim Kanouche} 173*46c4c49dSIbrahim Kanouche 174*46c4c49dSIbrahim Kanouchetype sortUInt32 []uint32 175*46c4c49dSIbrahim Kanouche 176*46c4c49dSIbrahim Kanouchefunc (s sortUInt32) Len() int { return len(s) } 177*46c4c49dSIbrahim Kanouchefunc (s sortUInt32) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 178*46c4c49dSIbrahim Kanouchefunc (s sortUInt32) Less(i, j int) bool { return s[i] < s[j] } 179*46c4c49dSIbrahim Kanouche 180*46c4c49dSIbrahim Kanouchefunc compareSearchSets(x, y *searchset.SearchSet) error { 181*46c4c49dSIbrahim Kanouche // Check to see that the tokens are equal. 182*46c4c49dSIbrahim Kanouche if len(x.Tokens) != len(y.Tokens) { 183*46c4c49dSIbrahim Kanouche return fmt.Errorf("Lengths differ = %d vs %d", len(x.Tokens), len(y.Tokens)) 184*46c4c49dSIbrahim Kanouche } 185*46c4c49dSIbrahim Kanouche for i := 0; i < len(x.Tokens); i++ { 186*46c4c49dSIbrahim Kanouche if x.Tokens[i].Text != y.Tokens[i].Text { 187*46c4c49dSIbrahim Kanouche return fmt.Errorf("Token values at %d differ = %q vs %q", i, x.Tokens[i].Text, y.Tokens[i].Text) 188*46c4c49dSIbrahim Kanouche } 189*46c4c49dSIbrahim Kanouche if x.Tokens[i].Offset != y.Tokens[i].Offset { 190*46c4c49dSIbrahim Kanouche return fmt.Errorf("Token offsets at %d differ = %d vs %d", i, x.Tokens[i].Offset, y.Tokens[i].Offset) 191*46c4c49dSIbrahim Kanouche } 192*46c4c49dSIbrahim Kanouche } 193*46c4c49dSIbrahim Kanouche 194*46c4c49dSIbrahim Kanouche // Now check that the hash maps are equal. 195*46c4c49dSIbrahim Kanouche var xKeys []uint32 196*46c4c49dSIbrahim Kanouche for k := range x.Hashes { 197*46c4c49dSIbrahim Kanouche xKeys = append(xKeys, k) 198*46c4c49dSIbrahim Kanouche } 199*46c4c49dSIbrahim Kanouche var yKeys []uint32 200*46c4c49dSIbrahim Kanouche for k := range y.Hashes { 201*46c4c49dSIbrahim Kanouche yKeys = append(yKeys, k) 202*46c4c49dSIbrahim Kanouche } 203*46c4c49dSIbrahim Kanouche 204*46c4c49dSIbrahim Kanouche if len(xKeys) != len(yKeys) { 205*46c4c49dSIbrahim Kanouche return fmt.Errorf("Lengths of hashes differ = %d vs %d", len(xKeys), len(yKeys)) 206*46c4c49dSIbrahim Kanouche } 207*46c4c49dSIbrahim Kanouche 208*46c4c49dSIbrahim Kanouche sort.Sort(sortUInt32(xKeys)) 209*46c4c49dSIbrahim Kanouche sort.Sort(sortUInt32(yKeys)) 210*46c4c49dSIbrahim Kanouche 211*46c4c49dSIbrahim Kanouche for i := 0; i < len(xKeys); i++ { 212*46c4c49dSIbrahim Kanouche if xKeys[i] != yKeys[i] { 213*46c4c49dSIbrahim Kanouche return fmt.Errorf("Hash keys differ = %d vs %d", xKeys[i], yKeys[i]) 214*46c4c49dSIbrahim Kanouche } 215*46c4c49dSIbrahim Kanouche if !reflect.DeepEqual(x.Hashes[xKeys[i]], y.Hashes[yKeys[i]]) { 216*46c4c49dSIbrahim Kanouche return fmt.Errorf("Hash values differ = %v vs %v", x.Hashes[xKeys[i]], y.Hashes[yKeys[i]]) 217*46c4c49dSIbrahim Kanouche } 218*46c4c49dSIbrahim Kanouche } 219*46c4c49dSIbrahim Kanouche 220*46c4c49dSIbrahim Kanouche return nil 221*46c4c49dSIbrahim Kanouche} 222*46c4c49dSIbrahim Kanouche 223*46c4c49dSIbrahim Kanouchefunc normalize(s string) string { 224*46c4c49dSIbrahim Kanouche for _, n := range licenseclassifier.Normalizers { 225*46c4c49dSIbrahim Kanouche s = n(s) 226*46c4c49dSIbrahim Kanouche } 227*46c4c49dSIbrahim Kanouche return s 228*46c4c49dSIbrahim Kanouche} 229